Source code for pylift.reader

"""
pylift.reader module

License: The MIT License (MIT)

Copyright (c) 2024 Brandon C. Tapia

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

import os
import re
import json
import copy
from typing import Optional


[docs] def read_mol2(mol2_in: str, out_json: Optional[str] = None) -> dict: """ pylift.reader.read_mol2 Extracts all information from a provided mol2 file Arguments: mol2_in (str): mol2 input file out_json (str): JSON file with mol2 information in PyLIFT-parsable dictionary Returns: dict: mol2 information in PyLIFT-parsable dictionary """ with open(mol2_in, "r", encoding="utf-8") as file: lines = file.readlines() molecule_section = False atom_section = False bond_section = False substructure_section = False atom_dict = {} bond_dict = {} auxinfo_dict = {} molecule_iter = 0 res = None atom_num = None bond_num = None ident_1 = None ident_2 = None mol_type = None charge_method = None substructure = None for _, line in enumerate(lines): if not line: continue line = line.strip() columns = line.split() if line.startswith("@<TRIPOS>MOLECULE"): molecule_section = True atom_section = False bond_section = False substructure_section = False molecule_init = line elif line.startswith("@<TRIPOS>ATOM"): molecule_section = False atom_section = True bond_section = False substructure_section = False atom_init = line elif line.startswith("@<TRIPOS>BOND"): molecule_section = False atom_section = False bond_section = True substructure_section = False bond_init = line elif line.startswith("@<TRIPOS>SUBSTRUCTURE"): molecule_section = False atom_section = False bond_section = False substructure_section = True substructure_init = line elif molecule_section: if molecule_iter == 0: res = line if len(columns) > 0: if columns[0].isdigit() and columns[1].isdigit(): atom_num = int(columns[0]) bond_num = int(columns[1]) ident_1 = int(columns[3]) ident_2 = int(columns[4]) if molecule_iter == 2: mol_type = line if molecule_iter == 3: charge_method = line molecule_iter += 1 elif atom_section: atom_dict[columns[0]] = { "atom_name": columns[1], "x": float(columns[2]), "y": float(columns[3]), "z": float(columns[4]), "atom_type": columns[5], "molecule_num": int(columns[6]), "res": columns[7], "charge": float(columns[8]), } elif bond_section: bond_dict[columns[0]] = { "atom_1": int(columns[1]), "atom_2": int(columns[2]), "bond_type": columns[3], } elif substructure_section: substructure = line auxinfo_dict["header_info"] = { "molecule_init": molecule_init, "atom_init": atom_init, "bond_init": bond_init, "substructure_init": substructure_init, } auxinfo_dict["molecule_info"] = { "res": res, "atom_num": atom_num, "bond_num": bond_num, "ident_1": ident_1, "ident_2": ident_2, "mol_type": mol_type, "charge_method": charge_method, } auxinfo_dict["substructure_info"] = {"substructure": substructure} mol2_dict = { "auxinfo_dict": auxinfo_dict, "atom_dict": atom_dict, "bond_dict": bond_dict, } overall_charge = sum(float(atom_info["charge"]) for atom_info in atom_dict.values()) mol2_dict["auxinfo_dict"]["molecule_info"].update({"total_charge": overall_charge}) if out_json: with open(out_json, "w", encoding="utf-8") as file: json.dump(mol2_dict, file, indent=4) print(f"printed .json file ({out_json}) containing information from {mol2_in}") print(f"[read_mol2] read {mol2_in}") return mol2_dict
[docs] def read_topo( topo_in: str = "topo.tmp.lmps", linker_identifier: Optional[str] = "L", pseudoatoms: str = None, out_json: Optional[str] = None, ) -> dict: """ pylift.reader.read_topo Extracts information from a skeleton LAMMPS output file provided from the TopoTools program Arguments: topo_in (str): input LAMMPS file from TopoTools linker_identifier: same as linker_identifier previously defined in builder.assign_linkers() Otherwise, set linker_identifier = None Pseudoatoms: Determines wether there is the hydrogen count appended to the atoms type. If builder.convert_to_pseudo() was used, pseudoatoms = True Otherwise, pseudoatoms = False out_json (str): JSON file with information from TopoTools file in PyLIFT-parsable dictionary Returns: dict: TopoTools information in PyLIFT-parsable dictionary """ if pseudoatoms is None: print("Variable psuedoatoms must be set as True or False") return with open(topo_in, "r", encoding="utf-8") as file: lines = file.readlines() # necessary for looping through sections pair_section = False bond_section = False angle_section = False dihedral_section = False improper_section = False mass_section = False atom_section = False # intiially we are in the header section header_section = True # initializing dictionaries to add sections to mass_dict = {} pair_dict = {} bond_dict = {} angle_dict = {} atom_dict = {} dihedral_dict = {} improper_dict = {} header = {} footer = {} # TopoTools always sets the box to be cubic -0.5 - 0.5 xlo = None xhi = None ylo = None yhi = None zlo = None zhi = None for i, line in enumerate(lines): line = line.strip() pattern = r"[ \-]+" # splitting based on space or '-' columns = re.split(pattern, line) if not line: continue if columns[0] == "Masses": pair_section = False bond_section = False angle_section = False dihedral_section = False improper_section = False header_section = False mass_section = True atom_section = False elif columns[0] == "Atoms": pair_section = False bond_section = False angle_section = False dihedral_section = False improper_section = False header_section = False mass_section = False atom_section = True elif columns[0] == "Bonds": pair_section = False bond_section = False angle_section = False dihedral_section = False improper_section = False header_section = False mass_section = False atom_section = False elif mass_section: mass_dict[columns[0]] = {"atom": columns[3], "mass": columns[1]} elif atom_section: columns = line.split() # '-' are important here (as negative signs) atom_dict[columns[0]] = { "molecule": int(columns[1]), "atom_type": int(columns[2]), "charge": float(columns[3]), "x": float(columns[4]), "y": float(columns[5]), "z": float(columns[6]), "comment": columns[8], } elif len(columns) > 1: # to avoid errors on columns < len(2) if columns[1] == "Pair": pair_section = True bond_section = False angle_section = False dihedral_section = False improper_section = False elif columns[1] == "Bond": pair_section = False bond_section = True angle_section = False dihedral_section = False improper_section = False elif columns[1] == "Angle": pair_section = False bond_section = False angle_section = True dihedral_section = False improper_section = False elif columns[1] == "Dihedral": pair_section = False bond_section = False angle_section = False dihedral_section = True improper_section = False elif columns[1] == "Improper": pair_section = False bond_section = False angle_section = False dihedral_section = False improper_section = True elif pair_section: pair_dict[columns[1]] = {"atom": columns[2]} elif bond_section: bond_dict[columns[1]] = {"atom_1": columns[2], "atom_2": columns[3]} elif angle_section: angle_dict[columns[1]] = { "atom_1": columns[2], "atom_2": columns[3], "atom_3": columns[4], } elif dihedral_section: dihedral_dict[columns[1]] = { "atom_1": columns[2], "atom_2": columns[3], "atom_3": columns[4], "atom_4": columns[5], } elif improper_section: improper_dict[columns[1]] = { "atom_1": columns[2], "atom_2": columns[3], "atom_3": columns[4], "atom_4": columns[5], } # updating the box size in for the output LAMMPS file # TopoTools gives an autommatci (but wrong) cube of -0.5 - 0.5 xyz lengths if atom_dict and all("x" in atom for atom in atom_dict.values()): xhi = max( atom["x"] for atom in atom_dict.values() if "x" in atom and atom["x"] is not None ) xlo = min( atom["x"] for atom in atom_dict.values() if "x" in atom and atom["x"] is not None ) if atom_dict and all("y" in atom for atom in atom_dict.values()): yhi = max( atom["y"] for atom in atom_dict.values() if "y" in atom and atom["y"] is not None ) ylo = min( atom["y"] for atom in atom_dict.values() if "y" in atom and atom["y"] is not None ) if atom_dict and all("z" in atom for atom in atom_dict.values()): zhi = max( atom["z"] for atom in atom_dict.values() if "z" in atom and atom["z"] is not None ) zlo = min( atom["z"] for atom in atom_dict.values() if "z" in atom and atom["z"] is not None ) if not any( [ pair_section, bond_section, angle_section, dihedral_section, improper_section, mass_section, atom_section, ] ): line_strip = line.strip() line_split = line_strip.split() if header_section: if i == 0: header["info"] = line elif len(line_split) == 2: header[str("num_" + line_split[1])] = int(line_split[0]) elif len(line_split) == 3: header[str("num_type_" + line_split[1])] = int(line_split[0]) else: footer[i + 1] = {"info": line} header.update( {"xlo": round(xlo-25,2), "xhi": round(xhi+25,2), "ylo": round(ylo-25,2), "yhi": round(yhi+25,2), "zlo": round(zlo-25,2), "zhi": round(zhi+25,2)} ) lammps_dict = { "mass_dict": mass_dict, "pair_dict": pair_dict, "bond_dict": bond_dict, "angle_dict": angle_dict, "dihedral_dict": dihedral_dict, "improper_dict": improper_dict, "atom_dict": atom_dict, } # creating a dictionary without the additional number of H atom appender if pseudoatoms: lammps_dict_ff_form = {} for key, value in lammps_dict.items(): if isinstance(value, dict): lammps_dict_ff_form[key] = {} for sub_key, sub_value in value.items(): if isinstance(sub_value, dict): lammps_dict_ff_form[key][sub_key] = {} for item_key, item_value in sub_value.items(): if isinstance(item_value, str): lammps_dict_ff_form[key][sub_key][item_key] = ( item_value[:-1] if item_value else item_value ) else: lammps_dict_ff_form[key][sub_key][item_key] = item_value else: lammps_dict_ff_form[key][sub_key] = ( sub_value[:-1] if isinstance(sub_value, str) else sub_value ) else: lammps_dict_ff_form[key] = value # otherwise, create a deepcopy of the identical dictionary else: lammps_dict_ff_form = copy.deepcopy(lammps_dict) lammps_dict_full = { "lammps_dict": lammps_dict, "lammps_dict_ff_form": lammps_dict_ff_form, "header": header, "footer": footer, } # removing the linker_identifier from lammps_dict_ff_form if present if linker_identifier is not None: for key, value in lammps_dict_full["lammps_dict_ff_form"].items(): if isinstance(value, dict): for sub_key, sub_value in value.items(): if isinstance(value, dict): for item_key, item_value in sub_value.items(): if str(item_value).startswith(str(linker_identifier)): lammps_dict_full["lammps_dict_ff_form"][key][sub_key][ item_key ] = item_value[1:] if out_json: with open(out_json, "w", encoding="utf-8") as file: json.dump(lammps_dict_full, file, indent=4) print(f"printed .json file ({out_json}) containing information from {topo_in}") print(f"[read_topo] read {topo_in}") return lammps_dict_full
[docs] def read_gaff2( gaff2_in: Optional[str] = "gaff2.dat", default_loc=True, out_json: Optional[str] = None, verbose: Optional[bool] = True, ) -> dict: """ pylift.reader.read_gaff2 Read in a GAFF2 molecule file. Arguments: gaff2_in (str): gaff2 filename if default_loc=True, or gaff2 filepath otherwise default_loc (bool): if GAFF2 file can be found in default Amber location: ~/amber24/dat/leap/parm/ out_json (str): name of the GAFF2 information translated to a PyLIFT-readable JSON file verbose (bool): Print out optional information Returns: dict: GAFF2 information translated to a PyLIFT-readable dictionary """ gaff2 = {} Information = [] Pairs = {} Atoms = {} Bonds = {} Angles = {} Dihedrals = {} Impropers = {} problem = False please_check = False section_counter = 1 # regex identifiers for these sections because of the wildcard X containing a space before it bond_pattern = re.compile(r"([a-zA-Z0-9+]+)\s*[-]\s*([a-zA-Z0-9+]+)\s*(.*)") angle_pattern = re.compile( r"([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*(.*)" ) dihedral_pattern = re.compile( r"([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*(.*)" ) improper_pattern = re.compile( r"([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*(.*)" ) if default_loc: home_directory = os.path.expanduser("~") gaff2_loc = home_directory + "/amber24/dat/leap/parm/" + gaff2_in else: gaff2_loc = gaff2_in with open(gaff2_loc, "r", encoding="utf-8") as file: lines = file.readlines() for i, line in enumerate(lines): line = line.strip() columns = line.split() # iterating through each section recognizing that each section has a line break between them # if we find that these breaks change between versions, we can switch to a keyword search if len(columns) == 0: section_counter += 1 continue if i == 0: # header info Information = { "Header": line, "Forcefield": "gaff2", "Version": columns[columns.index("(Version") + 1], "Date": " ".join(map(str, columns[columns.index("(Version") + 2 :])), "Pair_Type": "lj/charmmfsw/coul/long", # this is meant to replace lj/charmm/coul/long which could also be used if desired "Bond_Type": "Harmonic", "Angle_Type": "Harmonic", "Dihedral_Type": "Fourier", "Improper_Type": "cvff", } if section_counter == 1 and i != 0: # atom info Atoms[int(i + 1)] = { "Atom": columns[0], "Mass": float(columns[1]), "Unknown Param": float( columns[2] ), # unsure what this parameter is -> have not found a use for it "Information": " ".join(map(str, columns[3:])), } elif section_counter == 2: # bond info match = bond_pattern.match(line) if match: bond_data = match.group(3).split() Atom_1 = match.group(1) Atom_2 = match.group(2) Bonds[int(i + 1)] = { "Bond": Atom_1 + "-" + Atom_2, "r_Bond": Atom_2 + "-" + Atom_1, "Atom_1": Atom_1, "Atom_2": Atom_2, "K": float(bond_data[0]), "r": float(bond_data[1]), "Information": " ".join(map(str, bond_data[2:])), } else: if problem is True: print(problem) print(f"Matching problem with {line}") problem = True elif section_counter == 3: # angle info match = angle_pattern.match(line) if match: angle_data = match.group(4).split() Atom_1 = match.group(1) Atom_2 = match.group(2) Atom_3 = match.group(3) Angles[int(i + 1)] = { "Angle": Atom_1 + "-" + Atom_2 + "-" + Atom_3, "r_Angle": Atom_3 + "-" + Atom_2 + "-" + Atom_1, "Atom_1": Atom_1, "Atom_2": Atom_2, "Atom_3": Atom_3, "K": float(angle_data[0]), "theta": float(angle_data[1]), "Information": " ".join(map(str, angle_data[2:])), } else: print(f"Matching problem with {line}") elif section_counter == 4: # dihedral info match = dihedral_pattern.match(line) if match: dihedral_data = match.group(5).split() # The method we have employed to convert Amber style dihedrals into LAMMPS is through the Fourier style # m (the number of summations is always 1) # the first two parameters in gaff2.dat correspond to K=param_1/param_2 K = float(dihedral_data[1]) / float(dihedral_data[0]) Atom_1 = match.group(1) Atom_2 = match.group(2) Atom_3 = match.group(3) Atom_4 = match.group(4) Dihedrals[int(i + 1)] = { "Dihedral": Atom_1 + "-" + Atom_2 + "-" + Atom_3 + "-" + Atom_4, "r_Dihedral": Atom_4 + "-" + Atom_3 + "-" + Atom_2 + "-" + Atom_1, "Atom_1": Atom_1, "Atom_2": Atom_2, "Atom_3": Atom_3, "Atom_4": Atom_4, "m": 1, "K": K, "n": int(float(dihedral_data[3])), "d": float(dihedral_data[2]), "Information": " ".join(map(str, dihedral_data[4:])), } else: print(f"[read_gaff2] Matching problem with {line}") elif section_counter == 5: # improper info match = improper_pattern.match(line) if match: improper_data = match.group(5).split() # The method we have employed to convert Amber style impropers into LAMMPS is through the cvff style # Amber provides the d-angle as either 0 or 180 which we must convert into cvff_style via # where d = cos(d-angle) (e.g., -1 for 180 degrees and 1 for 0 degrees) if float(improper_data[1]) == float(180.0): d = int(-1) elif float(improper_data[1]) == float(0): d = int(1) else: print("unknown d-value, writing as None") d = None Atom_1 = match.group(1) Atom_2 = match.group(2) Atom_3 = match.group(3) Atom_4 = match.group(4) Impropers[int(i + 1)] = { "Improper": Atom_1 + "-" + Atom_2 + "-" + Atom_3 + "-" + Atom_4, "r_Improper": Atom_4 + "-" + Atom_3 + "-" + Atom_2 + "-" + Atom_1, "Atom_1": Atom_1, "Atom_2": Atom_2, "Atom_3": Atom_3, "Atom_4": Atom_4, "K": float(improper_data[0]), "n": int(float(improper_data[2])), "d": d, "Information": " ".join(map(str, dihedral_data[3:])), } else: print(f"Matching problem with {line}") elif section_counter == 8: # pair info if len(columns) > 2: # To convert Amber style pairs into LAMMPS, the sigma definition must be adjusted # Amber defined Lennard-Jones potentials as U(r) = eps*((sigma/r)^12-2*(sigma/r)^6) # while LAMMPS defines them as 4*eps*((sigma/r)^12-(sigma/r)^6 # Amber also provides the radius instead of the diameter for sigma # To address these two issues we must multiple the Amber value by 2*2**(-1/6) # Credit for this descovery goes to Andrew Jewett of MolTemplate # https://github.com/jewettaij/moltemplate/blob/master/moltemplate/amber2lt.py Atom_1 = columns[0] Atom_2 = columns[0] sigma = float(columns[1]) * 2 * 2 ** (-1 / 6) Pairs[int(i + 1)] = { "Pair": Atom_1 + "-" + Atom_2, "r_Pair": Atom_2 + "-" + Atom_1, "Atom_1": Atom_1, "Atom_2": Atom_2, "eps": float(columns[2]), "sigma": sigma, } gaff2 = { "Information": Information, "Atoms": Atoms, "Pairs": Pairs, "Bonds": Bonds, "Angles": Angles, "Dihedrals": Dihedrals, "Impropers": Impropers, } if verbose: for index, dicts in enumerate(gaff2.keys()): keys = list(gaff2[dicts].keys()) if index != 0: for j in range(1, len(keys)): expected_key = int(keys[j - 1]) + 1 while expected_key < int(keys[j]): expected_key += 1 print( f"Missing {int(keys[j])-1} from {gaff2_in} in {dicts} Section" ) please_check = True if please_check: print("----------------------------------------------------------") print(f"Please check {gaff2_in} file for errors in these line") print("----------------------------------------------------------") else: print(f"No errors found while reading {gaff2_in}!") for i, dicts in enumerate(gaff2.keys()): if i != 0: keys = list(gaff2[dicts].keys()) print(f"Section {dicts} in lines [{keys[0]}-{keys[-1]}]") if out_json: with open(out_json, "w", encoding="utf-8") as file: json.dump(gaff2, file, indent=4) print( f"printed .json file ({out_json}) containing information from {gaff2_in}" ) return gaff2
[docs] def read_frcmod(frcmod_file: str, out_json: Optional[str] = None) -> dict: """ pylift.reader.read_frcmod Read in a FRCMOD file from the AmberTools prmchk or prmchk2 utilities. Arguments: frcmod_file (str): name of the FRCMOD file out_json (str): name of the FRCMOD information translated to a PyLIFT-readable JSON file Returns: dict: FRCMOD file contents in a PyLIFT-readable dictionary """ with open(frcmod_file, "r", encoding="utf-8") as file: lines = file.readlines() mass_section = False bond_section = False angle_section = False dihedral_section = False improper_section = False nonbon_section = False mass_check = False bond_check = False angle_check = False nonbon_check = False mass_dict = {} bond_dict = {} angle_dict = {} dihedral_dict = {} improper_dict = {} nonbon_dict = {} frcmod_dict = {} for i, line in enumerate(lines): if not line: continue line = line.strip() columns = line.split() # regex to split based upon space or '-' bond_pattern = re.compile(r"[\s-]+") angle_pattern = re.compile(r"[\s-]+") # these require special regex patterns because of the wildcard X causing problems with line splitting dihedral_pattern = re.compile( r"([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s+(\d+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+(same)\s+(as)\s+([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*,\s*(penalty)\s+(score=)\s*([\d.]+)" ) improper_pattern = re.compile( r"([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*" r"(.*?)(?:\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+),\s*penalty score=\s*([\d.]+))?$" ) if line.startswith("MASS"): mass_section = True bond_section = False angle_section = False dihedral_section = False improper_section = False nonbon_section = False elif line.startswith("BOND"): mass_section = False bond_section = True angle_section = False dihedral_section = False improper_section = False nonbon_section = False elif line.startswith("ANGLE"): mass_section = False bond_section = False angle_section = True dihedral_section = False improper_section = False nonbon_section = False elif line.startswith("DIHE"): mass_section = False bond_section = False angle_section = False dihedral_section = True improper_section = False nonbon_section = False elif line.startswith("IMPROPER"): mass_section = False bond_section = False angle_section = False dihedral_section = False improper_section = True nonbon_section = False elif line.startswith("NONBON"): mass_section = False bond_section = False angle_section = False dihedral_section = False improper_section = True nonbon_section = False elif mass_section: if not line: continue mass_dict[i + 1] = { "atom": columns[0], "rep_atom": columns[-1], "mass": columns[1], "unknown_param": columns[2], "information": columns[3:], } elif bond_section: if not line: continue match = bond_pattern.match(line) if match: Atom_1 = match.group(1) Atom_2 = match.group(2) K = float(match.group(3)) r = float(match.group(4)) rep_Atom_1 = match.group(7) rep_Atom_2 = match.group(8) penalty = float(match.group(match.lastindex)) bond_dict[i + 1] = { "Bond": Atom_1 + "-" + Atom_2, "r_Bond": Atom_2 + "-" + Atom_1, "atom_1": Atom_1, "atom_2": Atom_2, "K": K, "r": r, "rep_Bond": rep_Atom_1 + "-" + rep_Atom_2, "rep_r_Bond": rep_Atom_2 + "-" + rep_Atom_1, "rep_atom_1": rep_Atom_1, "rep_atom_2": rep_Atom_2, "penalty_score": penalty, } elif angle_section: if not line: continue match = bond_pattern.match(line) if match: Atom_1 = match.group(1) Atom_2 = match.group(2) Atom_3 = match.group(3) K = float(match.group(4)) theta = float(match.group(5)) rep_Atom_1 = match.group(8) rep_Atom_2 = match.group(9) rep_Atom_3 = match.group(10) penalty = float(match.group(match.lastindex)) bond_dict[i + 1] = { "Angle": Atom_1 + "-" + Atom_2 + "-" + Atom_3, "r_Bond": Atom_3 + "-" + Atom_2 + "-" + Atom_1, "atom_1": Atom_1, "atom_2": Atom_2, "atom_3": Atom_3, "K": K, "theta": theta, "rep_Angle": rep_Atom_1 + "-" + rep_Atom_2 + "-" + rep_Atom_3, "rep_r_Angle": rep_Atom_3 + "-" + rep_Atom_2 + "-" + rep_Atom_1, "rep_atom_1": rep_Atom_1, "rep_atom_2": rep_Atom_2, "rep_Atom_3": rep_Atom_3, "penalty_score": penalty, } elif dihedral_section: if not line: continue match = dihedral_pattern.match(line) if match: # see read_gaff2 for derivation of K K = float(match.group(6)) / float(match.group(5)) Atom_1 = match.group(1) Atom_2 = match.group(2) Atom_3 = match.group(3) Atom_4 = match.group(4) rep_Atom_1 = match.group(11) rep_Atom_2 = match.group(12) rep_Atom_3 = match.group(13) rep_Atom_4 = match.group(14) penalty = float(match.group(match.lastindex)) dihedral_dict[i + 1] = { "Dihedral": Atom_1 + "-" + Atom_2 + "-" + Atom_3 + "-" + Atom_4, "r_Dihedral": Atom_4 + "-" + Atom_3 + "-" + Atom_2 + "-" + Atom_1, "atom_1": Atom_1, "atom_2": Atom_2, "atom_3": Atom_3, "atom_4": Atom_4, "m": 1, "K": K, "n": float(match.group(8)), "d": float(match.group(7)), "rep_Dihedral": rep_Atom_1 + "-" + rep_Atom_2 + "-" + rep_Atom_3 + "-" + rep_Atom_4, "rep_r_Dihedral": rep_Atom_4 + "-" + rep_Atom_3 + "-" + rep_Atom_2 + "-" + rep_Atom_1, "rep_atom_1": rep_Atom_1, "rep_atom_2": rep_Atom_2, "rep_atom_3": rep_Atom_3, "rep_atom_4": rep_Atom_4, "penalty_score": penalty, } else: print(f"Matching problem with {line}") elif improper_section: if not line: continue match = improper_pattern.match(line) if match: # see read_gaff2 for derivation of d if float(match.group(6)) == float(180.0): d = int(-1) elif float(match.group(6)) == float(0): d = int(1) else: print("unknown d-value, writing as None") d = None Atom_1 = match.group(1) Atom_2 = match.group(2) Atom_3 = match.group(3) Atom_4 = match.group(4) improper_dict[i + 1] = { "Improper": Atom_1 + "-" + Atom_2 + "-" + Atom_3 + "-" + Atom_4, "r_Improper": Atom_4 + "-" + Atom_3 + "-" + Atom_2 + "-" + Atom_1, "atom_1": Atom_1, "atom_2": Atom_2, "atom_3": Atom_3, "atom_4": Atom_4, "K": float(match.group(5)), "d": d, "n": int(float(match.group(7))), "rep_Improper": None, "rep_r_Improper": None, "rep_atom_1": None, "rep_atom_2": None, "rep_atom_3": None, "rep_atom_4": None, "penalty_score": None, "Information": match.group(8), } default_check = ["Using the default value", None] if match.group(8) not in default_check: text_part = match.group(8).strip().rstrip(")") text_parts = re.split(r"[-,\s]", text_part) text_parts = [ part for part in text_parts if part ] # Remove empty strings first = True for index, string in enumerate(text_parts): if len(string) < 3 and first: first_index = index first = False rep_Atom_1 = text_parts[first_index] rep_Atom_2 = text_parts[first_index + 1] rep_Atom_3 = text_parts[first_index + 2] rep_Atom_4 = text_parts[first_index + 3] improper_dict[i + 1].update( { "rep_Improper": rep_Atom_1 + "-" + rep_Atom_2 + "-" + rep_Atom_3 + "-" + rep_Atom_4, "rep_r_Improper": rep_Atom_4 + "-" + rep_Atom_3 + "-" + rep_Atom_2 + "-" + rep_Atom_1, "rep_atom_1": rep_Atom_1, "rep_atom_2": rep_Atom_2, "rep_atom_3": rep_Atom_3, "rep_atom_4": rep_Atom_4, "penalty_score": float(text_parts[-1]), "Information": " ".join(text_parts[0 : (first_index - 1)]), } ) else: print(f"Matching problem with dihedral line: {line}") elif nonbon_section: if not line: continue # see read_gaff2 for derivation of sigma sigma = float(columns[1]) * 2 * 2 ** (-1 / 6) eps = float(columns[2]) nonbon_dict[i + 1] = { "atom": columns[0], "rep_atom": columns[-1], "eps": eps, "sigma": sigma, } frcmod_dict = { "mass_dict": mass_dict, "bond_dict": bond_dict, "angle_dict": angle_dict, "dihedral_dict": dihedral_dict, "improper_dict": improper_dict, "nonbon_dict": nonbon_dict, } if out_json: with open(out_json, "w", encoding="utf-8") as file: json.dump(frcmod_dict, file, indent=4) print( f"printed .json file ({out_json}) containing information from {frcmod_file}" ) print("[read_frcmod] completed") return frcmod_dict