82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177 | def read_pdb(file):
"""Read PDB files."""
fileobj = open(file, 'r')
orig = np.identity(3)
trans = np.zeros(3)
serials = []
names = []
altLocs = []
resNames = []
chainIDs = []
resSeqs = []
iCodes = []
positions = []
occupancies = []
tempFactors = []
segId = []
elements = []
charges = []
cell = None
pbc = None
cellpar = []
conects = {}
# make sure that only one frame is read
continue_read_atoms_flag = True
# serial starts at 1 and we need to discard it and just keep align with positions
id = 0
for line in fileobj.readlines():
if line.startswith('CRYST1'):
cellpar = [float(line[6:15]), # a
float(line[15:24]), # b
float(line[24:33]), # c
float(line[33:40]), # alpha
float(line[40:47]), # beta
float(line[47:54])] # gamma
for c in range(3):
if line.startswith('ORIGX' + '123'[c]):
orig[c] = [float(line[10:20]),
float(line[20:30]),
float(line[30:40])]
trans[c] = float(line[45:55])
if (
line.startswith("ATOM")
or line.startswith("HETATM")
and continue_read_atoms_flag
):
# Atom name is arbitrary and does not necessarily
# contain the element symbol. The specification
# requires the element symbol to be in columns 77+78.
# Fall back to Atom name for files that do not follow
# the spec, e.g. packmol.
# line_info = type_atm, serial, name, altLoc, resName, chainID, resSeq, iCode, coord, occupancy, tempFactor, segid, element, charge
line_info = read_atom_line(line)
# serials.append(int(line_info[1]))
serials.append(id)
id += 1
names.append(line_info[2])
resNames.append(line_info[4])
resSeqs.append(line_info[6])
position = np.dot(orig, line_info[8]) + trans
positions.append(position)
if line_info[9] is not None:
occupancies.append(line_info[9])
tempFactors.append(line_info[10])
elements.append(line_info[-2])
charges.append(line_info[-1] or 0)
if line.startswith("END"):
# End of configuration reached
# According to the latest PDB file format (v3.30),
# this line should start with 'ENDMDL' (not 'END'),
# but in this way PDB trajectories from e.g. CP2K
# are supported (also VMD supports this format).
continue_read_atoms_flag = False
pass
if line.startswith("CONECT"):
l = line.split()
center_atom_idx = int(l[1])
bonded_atom_idx = [int(i) for i in l[2:]]
conects[center_atom_idx] = bonded_atom_idx
fileobj.close()
return {'serials': serials,
'names': names,
'resNames': resNames,
'resSeqs': resSeqs,
'positions': np.vstack(positions),
'charges': charges,
'connects': conects,
'box': cellpar}
|