"""
pylift.reader module
License: The MIT License (MIT)
Copyright (c) 2024 Brandon C. Tapia
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import os
import re
import json
import copy
from typing import Optional
[docs]
def read_mol2(mol2_in: str, out_json: Optional[str] = None) -> dict:
"""
pylift.reader.read_mol2
Extracts all information from a provided mol2 file
Arguments:
mol2_in (str): mol2 input file
out_json (str): JSON file with mol2 information in PyLIFT-parsable dictionary
Returns:
dict: mol2 information in PyLIFT-parsable dictionary
"""
with open(mol2_in, "r", encoding="utf-8") as file:
lines = file.readlines()
molecule_section = False
atom_section = False
bond_section = False
substructure_section = False
atom_dict = {}
bond_dict = {}
auxinfo_dict = {}
molecule_iter = 0
res = None
atom_num = None
bond_num = None
ident_1 = None
ident_2 = None
mol_type = None
charge_method = None
substructure = None
for _, line in enumerate(lines):
if not line:
continue
line = line.strip()
columns = line.split()
if line.startswith("@<TRIPOS>MOLECULE"):
molecule_section = True
atom_section = False
bond_section = False
substructure_section = False
molecule_init = line
elif line.startswith("@<TRIPOS>ATOM"):
molecule_section = False
atom_section = True
bond_section = False
substructure_section = False
atom_init = line
elif line.startswith("@<TRIPOS>BOND"):
molecule_section = False
atom_section = False
bond_section = True
substructure_section = False
bond_init = line
elif line.startswith("@<TRIPOS>SUBSTRUCTURE"):
molecule_section = False
atom_section = False
bond_section = False
substructure_section = True
substructure_init = line
elif molecule_section:
if molecule_iter == 0:
res = line
if len(columns) > 0:
if columns[0].isdigit() and columns[1].isdigit():
atom_num = int(columns[0])
bond_num = int(columns[1])
ident_1 = int(columns[3])
ident_2 = int(columns[4])
if molecule_iter == 2:
mol_type = line
if molecule_iter == 3:
charge_method = line
molecule_iter += 1
elif atom_section:
atom_dict[columns[0]] = {
"atom_name": columns[1],
"x": float(columns[2]),
"y": float(columns[3]),
"z": float(columns[4]),
"atom_type": columns[5],
"molecule_num": int(columns[6]),
"res": columns[7],
"charge": float(columns[8]),
}
elif bond_section:
bond_dict[columns[0]] = {
"atom_1": int(columns[1]),
"atom_2": int(columns[2]),
"bond_type": columns[3],
}
elif substructure_section:
substructure = line
auxinfo_dict["header_info"] = {
"molecule_init": molecule_init,
"atom_init": atom_init,
"bond_init": bond_init,
"substructure_init": substructure_init,
}
auxinfo_dict["molecule_info"] = {
"res": res,
"atom_num": atom_num,
"bond_num": bond_num,
"ident_1": ident_1,
"ident_2": ident_2,
"mol_type": mol_type,
"charge_method": charge_method,
}
auxinfo_dict["substructure_info"] = {"substructure": substructure}
mol2_dict = {
"auxinfo_dict": auxinfo_dict,
"atom_dict": atom_dict,
"bond_dict": bond_dict,
}
overall_charge = sum(float(atom_info["charge"]) for atom_info in atom_dict.values())
mol2_dict["auxinfo_dict"]["molecule_info"].update({"total_charge": overall_charge})
if out_json:
with open(out_json, "w", encoding="utf-8") as file:
json.dump(mol2_dict, file, indent=4)
print(f"printed .json file ({out_json}) containing information from {mol2_in}")
print(f"[read_mol2] read {mol2_in}")
return mol2_dict
[docs]
def read_topo(
topo_in: str = "topo.tmp.lmps",
linker_identifier: Optional[str] = "L",
pseudoatoms: str = None,
out_json: Optional[str] = None,
) -> dict:
"""
pylift.reader.read_topo
Extracts information from a skeleton LAMMPS output file provided from the TopoTools program
Arguments:
topo_in (str): input LAMMPS file from TopoTools
linker_identifier: same as linker_identifier previously defined in builder.assign_linkers()
Otherwise, set linker_identifier = None
Pseudoatoms: Determines wether there is the hydrogen count appended to the atoms type.
If builder.convert_to_pseudo() was used, pseudoatoms = True
Otherwise, pseudoatoms = False
out_json (str): JSON file with information from TopoTools file in PyLIFT-parsable dictionary
Returns:
dict: TopoTools information in PyLIFT-parsable dictionary
"""
if pseudoatoms is None:
print("Variable psuedoatoms must be set as True or False")
return
with open(topo_in, "r", encoding="utf-8") as file:
lines = file.readlines()
# necessary for looping through sections
pair_section = False
bond_section = False
angle_section = False
dihedral_section = False
improper_section = False
mass_section = False
atom_section = False
# intiially we are in the header section
header_section = True
# initializing dictionaries to add sections to
mass_dict = {}
pair_dict = {}
bond_dict = {}
angle_dict = {}
atom_dict = {}
dihedral_dict = {}
improper_dict = {}
header = {}
footer = {}
# TopoTools always sets the box to be cubic -0.5 - 0.5
xlo = None
xhi = None
ylo = None
yhi = None
zlo = None
zhi = None
for i, line in enumerate(lines):
line = line.strip()
pattern = r"[ \-]+" # splitting based on space or '-'
columns = re.split(pattern, line)
if not line:
continue
if columns[0] == "Masses":
pair_section = False
bond_section = False
angle_section = False
dihedral_section = False
improper_section = False
header_section = False
mass_section = True
atom_section = False
elif columns[0] == "Atoms":
pair_section = False
bond_section = False
angle_section = False
dihedral_section = False
improper_section = False
header_section = False
mass_section = False
atom_section = True
elif columns[0] == "Bonds":
pair_section = False
bond_section = False
angle_section = False
dihedral_section = False
improper_section = False
header_section = False
mass_section = False
atom_section = False
elif mass_section:
mass_dict[columns[0]] = {"atom": columns[3], "mass": columns[1]}
elif atom_section:
columns = line.split() # '-' are important here (as negative signs)
atom_dict[columns[0]] = {
"molecule": int(columns[1]),
"atom_type": int(columns[2]),
"charge": float(columns[3]),
"x": float(columns[4]),
"y": float(columns[5]),
"z": float(columns[6]),
"comment": columns[8],
}
elif len(columns) > 1: # to avoid errors on columns < len(2)
if columns[1] == "Pair":
pair_section = True
bond_section = False
angle_section = False
dihedral_section = False
improper_section = False
elif columns[1] == "Bond":
pair_section = False
bond_section = True
angle_section = False
dihedral_section = False
improper_section = False
elif columns[1] == "Angle":
pair_section = False
bond_section = False
angle_section = True
dihedral_section = False
improper_section = False
elif columns[1] == "Dihedral":
pair_section = False
bond_section = False
angle_section = False
dihedral_section = True
improper_section = False
elif columns[1] == "Improper":
pair_section = False
bond_section = False
angle_section = False
dihedral_section = False
improper_section = True
elif pair_section:
pair_dict[columns[1]] = {"atom": columns[2]}
elif bond_section:
bond_dict[columns[1]] = {"atom_1": columns[2], "atom_2": columns[3]}
elif angle_section:
angle_dict[columns[1]] = {
"atom_1": columns[2],
"atom_2": columns[3],
"atom_3": columns[4],
}
elif dihedral_section:
dihedral_dict[columns[1]] = {
"atom_1": columns[2],
"atom_2": columns[3],
"atom_3": columns[4],
"atom_4": columns[5],
}
elif improper_section:
improper_dict[columns[1]] = {
"atom_1": columns[2],
"atom_2": columns[3],
"atom_3": columns[4],
"atom_4": columns[5],
}
# updating the box size in for the output LAMMPS file
# TopoTools gives an autommatci (but wrong) cube of -0.5 - 0.5 xyz lengths
if atom_dict and all("x" in atom for atom in atom_dict.values()):
xhi = max(
atom["x"]
for atom in atom_dict.values()
if "x" in atom and atom["x"] is not None
)
xlo = min(
atom["x"]
for atom in atom_dict.values()
if "x" in atom and atom["x"] is not None
)
if atom_dict and all("y" in atom for atom in atom_dict.values()):
yhi = max(
atom["y"]
for atom in atom_dict.values()
if "y" in atom and atom["y"] is not None
)
ylo = min(
atom["y"]
for atom in atom_dict.values()
if "y" in atom and atom["y"] is not None
)
if atom_dict and all("z" in atom for atom in atom_dict.values()):
zhi = max(
atom["z"]
for atom in atom_dict.values()
if "z" in atom and atom["z"] is not None
)
zlo = min(
atom["z"]
for atom in atom_dict.values()
if "z" in atom and atom["z"] is not None
)
if not any(
[
pair_section,
bond_section,
angle_section,
dihedral_section,
improper_section,
mass_section,
atom_section,
]
):
line_strip = line.strip()
line_split = line_strip.split()
if header_section:
if i == 0:
header["info"] = line
elif len(line_split) == 2:
header[str("num_" + line_split[1])] = int(line_split[0])
elif len(line_split) == 3:
header[str("num_type_" + line_split[1])] = int(line_split[0])
else:
footer[i + 1] = {"info": line}
header.update(
{"xlo": round(xlo-25,2), "xhi": round(xhi+25,2), "ylo": round(ylo-25,2), "yhi": round(yhi+25,2), "zlo": round(zlo-25,2), "zhi": round(zhi+25,2)}
)
lammps_dict = {
"mass_dict": mass_dict,
"pair_dict": pair_dict,
"bond_dict": bond_dict,
"angle_dict": angle_dict,
"dihedral_dict": dihedral_dict,
"improper_dict": improper_dict,
"atom_dict": atom_dict,
}
# creating a dictionary without the additional number of H atom appender
if pseudoatoms:
lammps_dict_ff_form = {}
for key, value in lammps_dict.items():
if isinstance(value, dict):
lammps_dict_ff_form[key] = {}
for sub_key, sub_value in value.items():
if isinstance(sub_value, dict):
lammps_dict_ff_form[key][sub_key] = {}
for item_key, item_value in sub_value.items():
if isinstance(item_value, str):
lammps_dict_ff_form[key][sub_key][item_key] = (
item_value[:-1] if item_value else item_value
)
else:
lammps_dict_ff_form[key][sub_key][item_key] = item_value
else:
lammps_dict_ff_form[key][sub_key] = (
sub_value[:-1] if isinstance(sub_value, str) else sub_value
)
else:
lammps_dict_ff_form[key] = value
# otherwise, create a deepcopy of the identical dictionary
else:
lammps_dict_ff_form = copy.deepcopy(lammps_dict)
lammps_dict_full = {
"lammps_dict": lammps_dict,
"lammps_dict_ff_form": lammps_dict_ff_form,
"header": header,
"footer": footer,
}
# removing the linker_identifier from lammps_dict_ff_form if present
if linker_identifier is not None:
for key, value in lammps_dict_full["lammps_dict_ff_form"].items():
if isinstance(value, dict):
for sub_key, sub_value in value.items():
if isinstance(value, dict):
for item_key, item_value in sub_value.items():
if str(item_value).startswith(str(linker_identifier)):
lammps_dict_full["lammps_dict_ff_form"][key][sub_key][
item_key
] = item_value[1:]
if out_json:
with open(out_json, "w", encoding="utf-8") as file:
json.dump(lammps_dict_full, file, indent=4)
print(f"printed .json file ({out_json}) containing information from {topo_in}")
print(f"[read_topo] read {topo_in}")
return lammps_dict_full
[docs]
def read_gaff2(
gaff2_in: Optional[str] = "gaff2.dat",
default_loc=True,
out_json: Optional[str] = None,
verbose: Optional[bool] = True,
) -> dict:
"""
pylift.reader.read_gaff2
Read in a GAFF2 molecule file.
Arguments:
gaff2_in (str): gaff2 filename if default_loc=True, or gaff2 filepath otherwise
default_loc (bool): if GAFF2 file can be found in default Amber location:
~/amber24/dat/leap/parm/
out_json (str): name of the GAFF2 information translated to a PyLIFT-readable JSON file
verbose (bool): Print out optional information
Returns:
dict: GAFF2 information translated to a PyLIFT-readable dictionary
"""
gaff2 = {}
Information = []
Pairs = {}
Atoms = {}
Bonds = {}
Angles = {}
Dihedrals = {}
Impropers = {}
problem = False
please_check = False
section_counter = 1
# regex identifiers for these sections because of the wildcard X containing a space before it
bond_pattern = re.compile(r"([a-zA-Z0-9+]+)\s*[-]\s*([a-zA-Z0-9+]+)\s*(.*)")
angle_pattern = re.compile(
r"([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*(.*)"
)
dihedral_pattern = re.compile(
r"([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*(.*)"
)
improper_pattern = re.compile(
r"([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*[- ]\s*([a-zA-Z0-9+]+)\s*(.*)"
)
if default_loc:
home_directory = os.path.expanduser("~")
gaff2_loc = home_directory + "/amber24/dat/leap/parm/" + gaff2_in
else:
gaff2_loc = gaff2_in
with open(gaff2_loc, "r", encoding="utf-8") as file:
lines = file.readlines()
for i, line in enumerate(lines):
line = line.strip()
columns = line.split()
# iterating through each section recognizing that each section has a line break between them
# if we find that these breaks change between versions, we can switch to a keyword search
if len(columns) == 0:
section_counter += 1
continue
if i == 0: # header info
Information = {
"Header": line,
"Forcefield": "gaff2",
"Version": columns[columns.index("(Version") + 1],
"Date": " ".join(map(str, columns[columns.index("(Version") + 2 :])),
"Pair_Type": "lj/charmmfsw/coul/long", # this is meant to replace lj/charmm/coul/long which could also be used if desired
"Bond_Type": "Harmonic",
"Angle_Type": "Harmonic",
"Dihedral_Type": "Fourier",
"Improper_Type": "cvff",
}
if section_counter == 1 and i != 0: # atom info
Atoms[int(i + 1)] = {
"Atom": columns[0],
"Mass": float(columns[1]),
"Unknown Param": float(
columns[2]
), # unsure what this parameter is -> have not found a use for it
"Information": " ".join(map(str, columns[3:])),
}
elif section_counter == 2: # bond info
match = bond_pattern.match(line)
if match:
bond_data = match.group(3).split()
Atom_1 = match.group(1)
Atom_2 = match.group(2)
Bonds[int(i + 1)] = {
"Bond": Atom_1 + "-" + Atom_2,
"r_Bond": Atom_2 + "-" + Atom_1,
"Atom_1": Atom_1,
"Atom_2": Atom_2,
"K": float(bond_data[0]),
"r": float(bond_data[1]),
"Information": " ".join(map(str, bond_data[2:])),
}
else:
if problem is True:
print(problem)
print(f"Matching problem with {line}")
problem = True
elif section_counter == 3: # angle info
match = angle_pattern.match(line)
if match:
angle_data = match.group(4).split()
Atom_1 = match.group(1)
Atom_2 = match.group(2)
Atom_3 = match.group(3)
Angles[int(i + 1)] = {
"Angle": Atom_1 + "-" + Atom_2 + "-" + Atom_3,
"r_Angle": Atom_3 + "-" + Atom_2 + "-" + Atom_1,
"Atom_1": Atom_1,
"Atom_2": Atom_2,
"Atom_3": Atom_3,
"K": float(angle_data[0]),
"theta": float(angle_data[1]),
"Information": " ".join(map(str, angle_data[2:])),
}
else:
print(f"Matching problem with {line}")
elif section_counter == 4: # dihedral info
match = dihedral_pattern.match(line)
if match:
dihedral_data = match.group(5).split()
# The method we have employed to convert Amber style dihedrals into LAMMPS is through the Fourier style
# m (the number of summations is always 1)
# the first two parameters in gaff2.dat correspond to K=param_1/param_2
K = float(dihedral_data[1]) / float(dihedral_data[0])
Atom_1 = match.group(1)
Atom_2 = match.group(2)
Atom_3 = match.group(3)
Atom_4 = match.group(4)
Dihedrals[int(i + 1)] = {
"Dihedral": Atom_1 + "-" + Atom_2 + "-" + Atom_3 + "-" + Atom_4,
"r_Dihedral": Atom_4 + "-" + Atom_3 + "-" + Atom_2 + "-" + Atom_1,
"Atom_1": Atom_1,
"Atom_2": Atom_2,
"Atom_3": Atom_3,
"Atom_4": Atom_4,
"m": 1,
"K": K,
"n": int(float(dihedral_data[3])),
"d": float(dihedral_data[2]),
"Information": " ".join(map(str, dihedral_data[4:])),
}
else:
print(f"[read_gaff2] Matching problem with {line}")
elif section_counter == 5: # improper info
match = improper_pattern.match(line)
if match:
improper_data = match.group(5).split()
# The method we have employed to convert Amber style impropers into LAMMPS is through the cvff style
# Amber provides the d-angle as either 0 or 180 which we must convert into cvff_style via
# where d = cos(d-angle) (e.g., -1 for 180 degrees and 1 for 0 degrees)
if float(improper_data[1]) == float(180.0):
d = int(-1)
elif float(improper_data[1]) == float(0):
d = int(1)
else:
print("unknown d-value, writing as None")
d = None
Atom_1 = match.group(1)
Atom_2 = match.group(2)
Atom_3 = match.group(3)
Atom_4 = match.group(4)
Impropers[int(i + 1)] = {
"Improper": Atom_1 + "-" + Atom_2 + "-" + Atom_3 + "-" + Atom_4,
"r_Improper": Atom_4 + "-" + Atom_3 + "-" + Atom_2 + "-" + Atom_1,
"Atom_1": Atom_1,
"Atom_2": Atom_2,
"Atom_3": Atom_3,
"Atom_4": Atom_4,
"K": float(improper_data[0]),
"n": int(float(improper_data[2])),
"d": d,
"Information": " ".join(map(str, dihedral_data[3:])),
}
else:
print(f"Matching problem with {line}")
elif section_counter == 8: # pair info
if len(columns) > 2:
# To convert Amber style pairs into LAMMPS, the sigma definition must be adjusted
# Amber defined Lennard-Jones potentials as U(r) = eps*((sigma/r)^12-2*(sigma/r)^6)
# while LAMMPS defines them as 4*eps*((sigma/r)^12-(sigma/r)^6
# Amber also provides the radius instead of the diameter for sigma
# To address these two issues we must multiple the Amber value by 2*2**(-1/6)
# Credit for this descovery goes to Andrew Jewett of MolTemplate
# https://github.com/jewettaij/moltemplate/blob/master/moltemplate/amber2lt.py
Atom_1 = columns[0]
Atom_2 = columns[0]
sigma = float(columns[1]) * 2 * 2 ** (-1 / 6)
Pairs[int(i + 1)] = {
"Pair": Atom_1 + "-" + Atom_2,
"r_Pair": Atom_2 + "-" + Atom_1,
"Atom_1": Atom_1,
"Atom_2": Atom_2,
"eps": float(columns[2]),
"sigma": sigma,
}
gaff2 = {
"Information": Information,
"Atoms": Atoms,
"Pairs": Pairs,
"Bonds": Bonds,
"Angles": Angles,
"Dihedrals": Dihedrals,
"Impropers": Impropers,
}
if verbose:
for index, dicts in enumerate(gaff2.keys()):
keys = list(gaff2[dicts].keys())
if index != 0:
for j in range(1, len(keys)):
expected_key = int(keys[j - 1]) + 1
while expected_key < int(keys[j]):
expected_key += 1
print(
f"Missing {int(keys[j])-1} from {gaff2_in} in {dicts} Section"
)
please_check = True
if please_check:
print("----------------------------------------------------------")
print(f"Please check {gaff2_in} file for errors in these line")
print("----------------------------------------------------------")
else:
print(f"No errors found while reading {gaff2_in}!")
for i, dicts in enumerate(gaff2.keys()):
if i != 0:
keys = list(gaff2[dicts].keys())
print(f"Section {dicts} in lines [{keys[0]}-{keys[-1]}]")
if out_json:
with open(out_json, "w", encoding="utf-8") as file:
json.dump(gaff2, file, indent=4)
print(
f"printed .json file ({out_json}) containing information from {gaff2_in}"
)
return gaff2
[docs]
def read_frcmod(frcmod_file: str, out_json: Optional[str] = None) -> dict:
"""
pylift.reader.read_frcmod
Read in a FRCMOD file from the AmberTools prmchk or prmchk2 utilities.
Arguments:
frcmod_file (str): name of the FRCMOD file
out_json (str): name of the FRCMOD information translated to a PyLIFT-readable JSON file
Returns:
dict: FRCMOD file contents in a PyLIFT-readable dictionary
"""
with open(frcmod_file, "r", encoding="utf-8") as file:
lines = file.readlines()
mass_section = False
bond_section = False
angle_section = False
dihedral_section = False
improper_section = False
nonbon_section = False
mass_check = False
bond_check = False
angle_check = False
nonbon_check = False
mass_dict = {}
bond_dict = {}
angle_dict = {}
dihedral_dict = {}
improper_dict = {}
nonbon_dict = {}
frcmod_dict = {}
for i, line in enumerate(lines):
if not line:
continue
line = line.strip()
columns = line.split()
# regex to split based upon space or '-'
bond_pattern = re.compile(r"[\s-]+")
angle_pattern = re.compile(r"[\s-]+")
# these require special regex patterns because of the wildcard X causing problems with line splitting
dihedral_pattern = re.compile(
r"([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s+(\d+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+(same)\s+(as)\s+([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*,\s*(penalty)\s+(score=)\s*([\d.]+)"
)
improper_pattern = re.compile(
r"([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*"
r"(.*?)(?:\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+)\s*[-\s]\s*([a-zA-Z0-9+]+),\s*penalty score=\s*([\d.]+))?$"
)
if line.startswith("MASS"):
mass_section = True
bond_section = False
angle_section = False
dihedral_section = False
improper_section = False
nonbon_section = False
elif line.startswith("BOND"):
mass_section = False
bond_section = True
angle_section = False
dihedral_section = False
improper_section = False
nonbon_section = False
elif line.startswith("ANGLE"):
mass_section = False
bond_section = False
angle_section = True
dihedral_section = False
improper_section = False
nonbon_section = False
elif line.startswith("DIHE"):
mass_section = False
bond_section = False
angle_section = False
dihedral_section = True
improper_section = False
nonbon_section = False
elif line.startswith("IMPROPER"):
mass_section = False
bond_section = False
angle_section = False
dihedral_section = False
improper_section = True
nonbon_section = False
elif line.startswith("NONBON"):
mass_section = False
bond_section = False
angle_section = False
dihedral_section = False
improper_section = True
nonbon_section = False
elif mass_section:
if not line:
continue
mass_dict[i + 1] = {
"atom": columns[0],
"rep_atom": columns[-1],
"mass": columns[1],
"unknown_param": columns[2],
"information": columns[3:],
}
elif bond_section:
if not line:
continue
match = bond_pattern.match(line)
if match:
Atom_1 = match.group(1)
Atom_2 = match.group(2)
K = float(match.group(3))
r = float(match.group(4))
rep_Atom_1 = match.group(7)
rep_Atom_2 = match.group(8)
penalty = float(match.group(match.lastindex))
bond_dict[i + 1] = {
"Bond": Atom_1 + "-" + Atom_2,
"r_Bond": Atom_2 + "-" + Atom_1,
"atom_1": Atom_1,
"atom_2": Atom_2,
"K": K,
"r": r,
"rep_Bond": rep_Atom_1 + "-" + rep_Atom_2,
"rep_r_Bond": rep_Atom_2 + "-" + rep_Atom_1,
"rep_atom_1": rep_Atom_1,
"rep_atom_2": rep_Atom_2,
"penalty_score": penalty,
}
elif angle_section:
if not line:
continue
match = bond_pattern.match(line)
if match:
Atom_1 = match.group(1)
Atom_2 = match.group(2)
Atom_3 = match.group(3)
K = float(match.group(4))
theta = float(match.group(5))
rep_Atom_1 = match.group(8)
rep_Atom_2 = match.group(9)
rep_Atom_3 = match.group(10)
penalty = float(match.group(match.lastindex))
bond_dict[i + 1] = {
"Angle": Atom_1 + "-" + Atom_2 + "-" + Atom_3,
"r_Bond": Atom_3 + "-" + Atom_2 + "-" + Atom_1,
"atom_1": Atom_1,
"atom_2": Atom_2,
"atom_3": Atom_3,
"K": K,
"theta": theta,
"rep_Angle": rep_Atom_1 + "-" + rep_Atom_2 + "-" + rep_Atom_3,
"rep_r_Angle": rep_Atom_3 + "-" + rep_Atom_2 + "-" + rep_Atom_1,
"rep_atom_1": rep_Atom_1,
"rep_atom_2": rep_Atom_2,
"rep_Atom_3": rep_Atom_3,
"penalty_score": penalty,
}
elif dihedral_section:
if not line:
continue
match = dihedral_pattern.match(line)
if match:
# see read_gaff2 for derivation of K
K = float(match.group(6)) / float(match.group(5))
Atom_1 = match.group(1)
Atom_2 = match.group(2)
Atom_3 = match.group(3)
Atom_4 = match.group(4)
rep_Atom_1 = match.group(11)
rep_Atom_2 = match.group(12)
rep_Atom_3 = match.group(13)
rep_Atom_4 = match.group(14)
penalty = float(match.group(match.lastindex))
dihedral_dict[i + 1] = {
"Dihedral": Atom_1 + "-" + Atom_2 + "-" + Atom_3 + "-" + Atom_4,
"r_Dihedral": Atom_4 + "-" + Atom_3 + "-" + Atom_2 + "-" + Atom_1,
"atom_1": Atom_1,
"atom_2": Atom_2,
"atom_3": Atom_3,
"atom_4": Atom_4,
"m": 1,
"K": K,
"n": float(match.group(8)),
"d": float(match.group(7)),
"rep_Dihedral": rep_Atom_1
+ "-"
+ rep_Atom_2
+ "-"
+ rep_Atom_3
+ "-"
+ rep_Atom_4,
"rep_r_Dihedral": rep_Atom_4
+ "-"
+ rep_Atom_3
+ "-"
+ rep_Atom_2
+ "-"
+ rep_Atom_1,
"rep_atom_1": rep_Atom_1,
"rep_atom_2": rep_Atom_2,
"rep_atom_3": rep_Atom_3,
"rep_atom_4": rep_Atom_4,
"penalty_score": penalty,
}
else:
print(f"Matching problem with {line}")
elif improper_section:
if not line:
continue
match = improper_pattern.match(line)
if match:
# see read_gaff2 for derivation of d
if float(match.group(6)) == float(180.0):
d = int(-1)
elif float(match.group(6)) == float(0):
d = int(1)
else:
print("unknown d-value, writing as None")
d = None
Atom_1 = match.group(1)
Atom_2 = match.group(2)
Atom_3 = match.group(3)
Atom_4 = match.group(4)
improper_dict[i + 1] = {
"Improper": Atom_1 + "-" + Atom_2 + "-" + Atom_3 + "-" + Atom_4,
"r_Improper": Atom_4 + "-" + Atom_3 + "-" + Atom_2 + "-" + Atom_1,
"atom_1": Atom_1,
"atom_2": Atom_2,
"atom_3": Atom_3,
"atom_4": Atom_4,
"K": float(match.group(5)),
"d": d,
"n": int(float(match.group(7))),
"rep_Improper": None,
"rep_r_Improper": None,
"rep_atom_1": None,
"rep_atom_2": None,
"rep_atom_3": None,
"rep_atom_4": None,
"penalty_score": None,
"Information": match.group(8),
}
default_check = ["Using the default value", None]
if match.group(8) not in default_check:
text_part = match.group(8).strip().rstrip(")")
text_parts = re.split(r"[-,\s]", text_part)
text_parts = [
part for part in text_parts if part
] # Remove empty strings
first = True
for index, string in enumerate(text_parts):
if len(string) < 3 and first:
first_index = index
first = False
rep_Atom_1 = text_parts[first_index]
rep_Atom_2 = text_parts[first_index + 1]
rep_Atom_3 = text_parts[first_index + 2]
rep_Atom_4 = text_parts[first_index + 3]
improper_dict[i + 1].update(
{
"rep_Improper": rep_Atom_1
+ "-"
+ rep_Atom_2
+ "-"
+ rep_Atom_3
+ "-"
+ rep_Atom_4,
"rep_r_Improper": rep_Atom_4
+ "-"
+ rep_Atom_3
+ "-"
+ rep_Atom_2
+ "-"
+ rep_Atom_1,
"rep_atom_1": rep_Atom_1,
"rep_atom_2": rep_Atom_2,
"rep_atom_3": rep_Atom_3,
"rep_atom_4": rep_Atom_4,
"penalty_score": float(text_parts[-1]),
"Information": " ".join(text_parts[0 : (first_index - 1)]),
}
)
else:
print(f"Matching problem with dihedral line: {line}")
elif nonbon_section:
if not line:
continue
# see read_gaff2 for derivation of sigma
sigma = float(columns[1]) * 2 * 2 ** (-1 / 6)
eps = float(columns[2])
nonbon_dict[i + 1] = {
"atom": columns[0],
"rep_atom": columns[-1],
"eps": eps,
"sigma": sigma,
}
frcmod_dict = {
"mass_dict": mass_dict,
"bond_dict": bond_dict,
"angle_dict": angle_dict,
"dihedral_dict": dihedral_dict,
"improper_dict": improper_dict,
"nonbon_dict": nonbon_dict,
}
if out_json:
with open(out_json, "w", encoding="utf-8") as file:
json.dump(frcmod_dict, file, indent=4)
print(
f"printed .json file ({out_json}) containing information from {frcmod_file}"
)
print("[read_frcmod] completed")
return frcmod_dict