read_atom_line(line_full)

Read atom line from pdb format HETATM 1 H14 ORTE 0 6.301 0.693 1.919 1.00 0.00 H

1-6 7-11 13-16 17 18-20 22 23-26 27 28-30 31-38 39-46 47-54 55-60 61-66 67-72 73-76 77-78 79-80 ATOM serial name altLoc resName chainID resSeq iCode _ x y z occupancy tempFactor _ segID element charge

Source code in dmff/admp/parser.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def read_atom_line(line_full):
    """
    Read atom line from pdb format
    HETATM    1  H14 ORTE    0       6.301   0.693   1.919  1.00  0.00        H

    1-6 7-11 13-16 17 18-20 22 23-26 27 28-30 31-38 39-46 47-54 55-60 61-66 67-72 73-76 77-78 79-80
    ATOM serial name altLoc resName chainID resSeq iCode _ x y z occupancy tempFactor _ segID element charge
    """

    line = line_full.rstrip("\n")
    type_atm = line[0:6]
    if type_atm == "ATOM  " or type_atm == "HETATM":

        # Roy
        serial = line[7:12].strip()

        name = line[12:16].strip()

        altLoc = line[16]
        resName = line[17:21]
        chainID = line[21]  # Not used

        resSeq = int(line[22:26].split()[0])  # sequence identifier
        iCode = line[26]  # insertion code, not used

        # atomic coordinates
        try:
            coord = np.array(
                [float(line[30:38]), float(line[38:46]), float(line[46:54])],
                dtype=np.float64,
            )
        except ValueError:
            raise ValueError("Invalid or missing coordinate(s)")

        # occupancy & B factor
        try:
            occupancy = float(line[54:60])
        except ValueError:
            occupancy = None  # Rather than arbitrary zero or one

        if occupancy is not None and occupancy < 0:
            warnings.warn("Negative occupancy in one or more atoms")

        try:
            bfactor = float(line[60:66])
        except ValueError:
            # The PDB use a default of zero if the data is missing
            bfactor = 0.0

        segid = line[72:76]  # not used
        element = line[76:78].strip().upper()
        charge = line[79:81]

    else:
        raise ValueError("Only ATOM and HETATM supported")

    return (
        type_atm,
        serial,
        name,
        altLoc,
        resName.strip(),
        chainID,
        resSeq,
        iCode,
        coord,
        occupancy,
        bfactor,
        segid,
        element,
        charge,
    )

read_pdb(file)

Read PDB files.

Source code in dmff/admp/parser.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def read_pdb(file):
    """Read PDB files."""
    fileobj = open(file, 'r')
    orig = np.identity(3)
    trans = np.zeros(3)
    serials = []
    names = []
    altLocs = []
    resNames = []
    chainIDs = []
    resSeqs = []
    iCodes = []
    positions = []
    occupancies = []
    tempFactors = []
    segId = []
    elements = []
    charges = []
    cell = None
    pbc = None
    cellpar = []
    conects = {}
    # make sure that only one frame is read
    continue_read_atoms_flag = True
    # serial starts at 1 and we need to discard it and just keep align with positions
    id = 0

    for line in fileobj.readlines():
        if line.startswith('CRYST1'):
            cellpar = [float(line[6:15]),  # a
                       float(line[15:24]),  # b
                       float(line[24:33]),  # c
                       float(line[33:40]),  # alpha
                       float(line[40:47]),  # beta
                       float(line[47:54])]  # gamma

        for c in range(3):
            if line.startswith('ORIGX' + '123'[c]):
                orig[c] = [float(line[10:20]),
                           float(line[20:30]),
                           float(line[30:40])]
                trans[c] = float(line[45:55])

        if (
            line.startswith("ATOM")
            or line.startswith("HETATM")
            and continue_read_atoms_flag
        ):
            # Atom name is arbitrary and does not necessarily
            # contain the element symbol.  The specification
            # requires the element symbol to be in columns 77+78.
            # Fall back to Atom name for files that do not follow
            # the spec, e.g. packmol.

            # line_info = type_atm, serial, name, altLoc, resName, chainID, resSeq, iCode, coord, occupancy, tempFactor, segid, element, charge
            line_info = read_atom_line(line)

            # serials.append(int(line_info[1]))
            serials.append(id)
            id += 1
            names.append(line_info[2])
            resNames.append(line_info[4])
            resSeqs.append(line_info[6])
            position = np.dot(orig, line_info[8]) + trans
            positions.append(position)
            if line_info[9] is not None:
                occupancies.append(line_info[9])
            tempFactors.append(line_info[10])
            elements.append(line_info[-2])
            charges.append(line_info[-1] or 0)

        if line.startswith("END"):
            # End of configuration reached
            # According to the latest PDB file format (v3.30),
            # this line should start with 'ENDMDL' (not 'END'),
            # but in this way PDB trajectories from e.g. CP2K
            # are supported (also VMD supports this format).
            continue_read_atoms_flag = False
            pass

        if line.startswith("CONECT"):
            l = line.split()
            center_atom_idx = int(l[1])
            bonded_atom_idx = [int(i) for i in l[2:]]

            conects[center_atom_idx] = bonded_atom_idx
    fileobj.close()

    return {'serials': serials,
           'names': names,
           'resNames': resNames,
           'resSeqs': resSeqs,
           'positions': np.vstack(positions),
           'charges': charges,
           'connects': conects,
           'box': cellpar}