parser

`read_atom_line(line_full)`

Read atom line from pdb format HETATM 1 H14 ORTE 0 6.301 0.693 1.919 1.00 0.00 H

1-6 7-11 13-16 17 18-20 22 23-26 27 28-30 31-38 39-46 47-54 55-60 61-66 67-72 73-76 77-78 79-80 ATOM serial name altLoc resName chainID resSeq iCode _ x y z occupancy tempFactor _ segID element charge

Source code in dmff/admp/parser.py

def read_atom_line(line_full):
    """
    Read atom line from pdb format
    HETATM    1  H14 ORTE    0       6.301   0.693   1.919  1.00  0.00        H

    1-6 7-11 13-16 17 18-20 22 23-26 27 28-30 31-38 39-46 47-54 55-60 61-66 67-72 73-76 77-78 79-80
    ATOM serial name altLoc resName chainID resSeq iCode _ x y z occupancy tempFactor _ segID element charge
    """

    line = line_full.rstrip("\n")
    type_atm = line[0:6]
    if type_atm == "ATOM  " or type_atm == "HETATM":

        # Roy
        serial = line[7:12].strip()

        name = line[12:16].strip()

        altLoc = line[16]
        resName = line[17:21]
        chainID = line[21]  # Not used

        resSeq = int(line[22:26].split()[0])  # sequence identifier
        iCode = line[26]  # insertion code, not used

        # atomic coordinates
        try:
            coord = np.array(
                [float(line[30:38]), float(line[38:46]), float(line[46:54])],
                dtype=np.float64,
            )
        except ValueError:
            raise ValueError("Invalid or missing coordinate(s)")

        # occupancy & B factor
        try:
            occupancy = float(line[54:60])
        except ValueError:
            occupancy = None  # Rather than arbitrary zero or one

        if occupancy is not None and occupancy < 0:
            warnings.warn("Negative occupancy in one or more atoms")

        try:
            bfactor = float(line[60:66])
        except ValueError:
            # The PDB use a default of zero if the data is missing
            bfactor = 0.0

        segid = line[72:76]  # not used
        element = line[76:78].strip().upper()
        charge = line[79:81]

    else:
        raise ValueError("Only ATOM and HETATM supported")

    return (
        type_atm,
        serial,
        name,
        altLoc,
        resName.strip(),
        chainID,
        resSeq,
        iCode,
        coord,
        occupancy,
        bfactor,
        segid,
        element,
        charge,
    )

`read_pdb(file)`

Read PDB files.

Source code in dmff/admp/parser.py

def read_pdb(file):
    """Read PDB files."""
    fileobj = open(file, 'r')
    orig = np.identity(3)
    trans = np.zeros(3)
    serials = []
    names = []
    altLocs = []
    resNames = []
    chainIDs = []
    resSeqs = []
    iCodes = []
    positions = []
    occupancies = []
    tempFactors = []
    segId = []
    elements = []
    charges = []
    cell = None
    pbc = None
    cellpar = []
    conects = {}
    # make sure that only one frame is read
    continue_read_atoms_flag = True
    # serial starts at 1 and we need to discard it and just keep align with positions
    id = 0

    for line in fileobj.readlines():
        if line.startswith('CRYST1'):
            cellpar = [float(line[6:15]),  # a
                       float(line[15:24]),  # b
                       float(line[24:33]),  # c
                       float(line[33:40]),  # alpha
                       float(line[40:47]),  # beta
                       float(line[47:54])]  # gamma

        for c in range(3):
            if line.startswith('ORIGX' + '123'[c]):
                orig[c] = [float(line[10:20]),
                           float(line[20:30]),
                           float(line[30:40])]
                trans[c] = float(line[45:55])

        if (
            line.startswith("ATOM")
            or line.startswith("HETATM")
            and continue_read_atoms_flag
        ):
            # Atom name is arbitrary and does not necessarily
            # contain the element symbol.  The specification
            # requires the element symbol to be in columns 77+78.
            # Fall back to Atom name for files that do not follow
            # the spec, e.g. packmol.

            # line_info = type_atm, serial, name, altLoc, resName, chainID, resSeq, iCode, coord, occupancy, tempFactor, segid, element, charge
            line_info = read_atom_line(line)

            # serials.append(int(line_info[1]))
            serials.append(id)
            id += 1
            names.append(line_info[2])
            resNames.append(line_info[4])
            resSeqs.append(line_info[6])
            position = np.dot(orig, line_info[8]) + trans
            positions.append(position)
            if line_info[9] is not None:
                occupancies.append(line_info[9])
            tempFactors.append(line_info[10])
            elements.append(line_info[-2])
            charges.append(line_info[-1] or 0)

        if line.startswith("END"):
            # End of configuration reached
            # According to the latest PDB file format (v3.30),
            # this line should start with 'ENDMDL' (not 'END'),
            # but in this way PDB trajectories from e.g. CP2K
            # are supported (also VMD supports this format).
            continue_read_atoms_flag = False
            pass

        if line.startswith("CONECT"):
            l = line.split()
            center_atom_idx = int(l[1])
            bonded_atom_idx = [int(i) for i in l[2:]]

            conects[center_atom_idx] = bonded_atom_idx
    fileobj.close()

    return {'serials': serials,
           'names': names,
           'resNames': resNames,
           'resSeqs': resSeqs,
           'positions': np.vstack(positions),
           'charges': charges,
           'connects': conects,
           'box': cellpar}