diff --git a/calc_utils/.gitignore b/calc_utils/.gitignore new file mode 100644 index 0000000..b1e1f4e --- /dev/null +++ b/calc_utils/.gitignore @@ -0,0 +1,4 @@ +dist +__pycache__ +*.egg-info +.venv diff --git a/calc_utils/README.md b/calc_utils/README.md new file mode 100644 index 0000000..e69de29 diff --git a/calc_utils/futils/__init__.py b/calc_utils/futils/__init__.py new file mode 100644 index 0000000..ebe2ab9 --- /dev/null +++ b/calc_utils/futils/__init__.py @@ -0,0 +1,19 @@ +# from .patch_gaussian import GSUB +from .rdkit2ase import MolToAtoms,AtomsToMol +from .rdkit_utils import draw2D,draw3D +from rdkit.Chem import AllChem +from rdkit import Chem +from ase import atoms +# import patch_gaussian as gaussian + +__all__ = [ + # 'GSUB', + 'MolToAtoms', + 'AtomsToMol', + 'draw2D', + 'draw3D', + 'Chem', + 'AllChem', + 'atoms', + 'gaussian' +] \ No newline at end of file diff --git a/calc_utils/futils/gaussian.py b/calc_utils/futils/gaussian.py new file mode 100644 index 0000000..a41b754 --- /dev/null +++ b/calc_utils/futils/gaussian.py @@ -0,0 +1,367 @@ +from ase.calculators import gaussian +from ase.calculators.calculator import FileIOCalculator +from ase.io import read, write +from typing import TYPE_CHECKING, Optional, Literal +from copy import deepcopy + +GSUB = "/home/fanhj/calcs/lele/tools/gsub_wait" +gau_src= gaussian.Gaussian +gau_dyn_src = gaussian.GaussianDynamics + +methodType = Optional[ + str + | Literal[ + "HF", + "MP2", + "MP3", + "MP4", + "MP4(DQ)", + "MP4(SDQ)", + "MP5", + "CCSD", + "CCSDT", + "QCISD", + "CID", + "CISD", + "CIS", + "B3LYP", + "B3PW91", + "BLYP", + "PBE", + "PBE0", + "M06", + "M062X", + "M06L", + "M06HF", + "CAM-B3LYP", + "wb97xd", + "wb97xd3", + "LC-wPBE", + "HSE06", + "LSDA", + "SVWN", + "PW91", + "mPW1PW91", + "HCTH", + "HCTH147", + "HCTH407", + "TPSSh", + "TPSS", + "revPBE", + "PBEPBE", + "B2PLYP", + "mPW2PLYP", + "B2PLYPD3", + "PBE0DH", + "PBEQIDH", + ] +] +basisType = Optional[ + str + | Literal[ + "STO-3G", + "3-21G", + "6-31G", + "6-31G(d)", + "6-31G(d,p)", + "6-31+G(d)", + "6-31+G(d,p)", + "6-31++G(d,p)", + "6-311G", + "6-311G(d)", + "6-311G(d,p)", + "6-311+G(d)", + "6-311+G(d,p)", + "6-311++G(d,p)", + "cc-pVDZ", + "cc-pVTZ", + "cc-pVQZ", + "cc-pV5Z", + "cc-pV6Z", + "aug-cc-pVDZ", + "aug-cc-pVTZ", + "aug-cc-pVQZ", + "aug-cc-pV5Z", + "def2SVP", + "def2SVPD", + "def2TZVP", + "def2TZVPD", + "def2QZVP", + "def2QZVPP", + "LANL2DZ", + "LANL2MB", + "SDD", + "CEP-4G", + "CEP-31G", + "CEP-121G", + "DGDZVP", + "DGDZVP2", + "Gen", + "GenECP", + ] +] + +scrfSolventType = Optional[ + str + | Literal['Water', 'Acetone', 'Acetonitrile', 'Aniline', 'Benzene', 'Bromoform', 'Butanol', + 'CarbonDisulfide', 'CarbonTetrachloride', 'Chlorobenzene', 'Chloroform', 'Cyclohexane', + 'Dichloroethane', 'Dichloromethane', 'Diethylether', 'Dimethylformamide', 'Dimethylsulfoxide', + 'Ethanol', 'Ethylacetate', 'Heptane', 'Hexane', 'Methanol', 'Nitromethane', 'Octanol', + 'Pyridine', 'Tetrahydrofuran', 'Toluene', 'Xylene' + ] +] + + + +class Gaussian(gau_src): + mem:int + nprocshared:Literal[20,32] + charge: int + mult: Optional[int] + + def __init__(self, + proc:Literal[20,32]=32, + charge:int=0, + mult:Optional[int]=None, + mem="30GB", + label='Gaussian', + method:methodType=None, + basis:basisType=None, + fitting_basis:Optional[str]=None, + output_type:Literal['N','P']='P', + basisfile:Optional[str]=None, + basis_set:Optional[str]=None, + xc:Optional[str]=None, + extra:Optional[str]=None, + ioplist:Optional[list[str]]=None, + addsec=None, + spinlist=None, + zefflist=None, + qmomlist=None, + nmagmlist=None, + znuclist=None, + radnuclearlist=None, + chk:Optional[str]=None, + oldchk:Optional[str]=None, + nprocshared:Optional[int]=None, + scrfSolvent:scrfSolventType=None, + scrf:Optional[str]=None, + em:Optional[Literal["GD2","GD3","GD4","GD3BJ"]|str]=None, + **kwargs): + ''' + Parameters + ----------- + proc: int + A short name for nprocshared + method: str + Level of theory to use, e.g. ``hf``, ``ccsd``, ``mp2``, or ``b3lyp``. + Overrides ``xc`` (see below). + xc: str + Level of theory to use. Translates several XC functionals from + their common name (e.g. ``PBE``) to their internal Gaussian name + (e.g. ``PBEPBE``). + basis: str + The basis set to use. If not provided, no basis set will be requested, + which usually results in ``STO-3G``. Maybe omitted if basisfile is set + (see below). + fitting_basis: str + The name of the fitting basis set to use. + output_type: str + Level of output to record in the Gaussian + output file - this may be ``N``- normal or ``P`` - + additional. + basisfile: str + The name of the basis file to use. If a value is provided, basis may + be omitted (it will be automatically set to 'gen') + basis_set: str + The basis set definition to use. This is an alternative + to basisfile, and would be the same as the contents + of such a file. + charge: int + The system charge. If not provided, it will be automatically + determined from the ``Atoms`` object’s initial_charges. + mult: int + The system multiplicity (``spin + 1``). If not provided, it will be + automatically determined from the ``Atoms`` object’s + ``initial_magnetic_moments``. + extra: str + Extra lines to be included in the route section verbatim. + It should not be necessary to use this, but it is included for + backwards compatibility. + ioplist: list + A collection of IOPs definitions to be included in the route line. + addsec: str + Text to be added after the molecular geometry specification, e.g. for + defining masses with ``freq=ReadIso``. + spinlist: list + A list of nuclear spins to be added into the nuclear + propeties section of the molecule specification. + zefflist: list + A list of effective charges to be added into the nuclear + propeties section of the molecule specification. + qmomlist: list + A list of nuclear quadropole moments to be added into + the nuclear propeties section of the molecule + specification. + nmagmlist: list + A list of nuclear magnetic moments to be added into + the nuclear propeties section of the molecule + specification. + znuclist: list + A list of nuclear charges to be added into the nuclear + propeties section of the molecule specification. + radnuclearlist: list + A list of nuclear radii to be added into the nuclear + propeties section of the molecule specification. + params: dict + Contains any extra keywords and values that will be included in either + the link0 section or route section of the gaussian input file. + To be included in the link0 section, the keyword must be one of the + following: ``mem``, ``chk``, ``oldchk``, ``schk``, ``rwf``, + ``oldmatrix``, ``oldrawmatrix``, ``int``, ``d2e``, ``save``, + ``nosave``, ``errorsave``, ``cpu``, ``nprocshared``, ``gpucpu``, + ``lindaworkers``, ``usessh``, ``ssh``, ``debuglinda``. + Any other keywords will be placed (along with their values) in the + route section. + ''' + if nprocshared is not None and proc is not None: + if nprocshared == proc:print("Providing both nprocshared and proc is not recomanded") + else: + raise ValueError("both nprocshared and proc provided, and inequal.") + + if scrfSolvent is not None and scrf is not None: + raise ValueError("scrfSolvent and scrf both not None") + if scrfSolvent is not None: + scrf = "Solvent="+scrfSolvent + + + optional_keys = ['chk','oldchk','scrf', 'geom', 'integral', 'density', 'nosymm', 'symmetry', 'units', + 'temperature', 'pressure', 'counterpoise', 'gfinput', 'gfprint', 'test', + 'output', 'punch', 'prop', 'pseudo', 'restart', 'scale', 'sparse', 'window', 'em'] + for key in optional_keys: + val = locals().get(key,None) + if val is not None: + kwargs[key] = val + + super().__init__( + nprocshared = proc, + mem=mem, + label=label, + command=GSUB+" "+label, + + charge = charge, + mult = mult, + + method = method, + basis = basis, + fitting_basis = fitting_basis, + output_type = output_type, + + basisfile = basisfile, + basis_set = basis_set, + xc = xc, + + extra = extra, + ioplist = ioplist, + addsec = addsec, + + spinlist = spinlist, + zefflist = zefflist, + qmomlist = qmomlist, + nmagmlist = nmagmlist, + znuclist = znuclist, + radnuclearlist = radnuclearlist, + + + **kwargs + ) + assert self.fileio_rules + self.fileio_rules.stdin_name = '{prefix}.gin' + + def mod(self,charge:int=0, mult:int=1) -> "Gaussian": + new = deepcopy(self) + new.charge = charge + new.mult = mult + return new + + + def write_input(self, atoms, properties=None, system_changes=None): + FileIOCalculator.write_input(self, atoms, properties, system_changes) + if TYPE_CHECKING: + assert isinstance(self.label, str) + assert self.parameters + write(self.label + '.gin', atoms, properties=properties, + format='gaussian-in', parallel=False, **self.parameters) + + def read_results(self): + if TYPE_CHECKING: + assert isinstance(self.label, str) + output = read(self.label + '.out', format='gaussian-out') + assert output + self.calc = output.calc + self.results = output.calc.results + + +class GaussianDynamics(gau_dyn_src): + def __init__(self, atoms, calc=None): + super().__init__(atoms, calc=calc) + + def run(self, **kwargs): + calc_old = self.atoms.calc + params_old = deepcopy(self.calc.parameters) + + self.delete_keywords(kwargs) + self.delete_keywords(self.calc.parameters) + self.set_keywords(kwargs) + + self.calc.set(**kwargs) + self.atoms.calc = self.calc + + try: + self.atoms.get_potential_energy() + except OSError: + converged = False + else: + converged = True + + atoms = read(self.calc.label + '.out') + self.atoms.cell = atoms.cell + self.atoms.positions = atoms.positions + self.atoms.calc = atoms.calc + + self.calc.parameters = params_old + self.calc.reset() + if calc_old is not None: + self.atoms.calc = calc_old + + return converged + + +class GaussianOptimizer(GaussianDynamics): + keyword = 'opt' + special_keywords = { + 'fmax': '{}', + 'steps': 'maxcycle={}', + } + + +class GaussianIRC(GaussianDynamics): + keyword = 'irc' + special_keywords = { + 'direction': '{}', + 'steps': 'maxpoints={}', + } + + +gaussian.Gaussian = Gaussian +gaussian.GaussianDynamics = GaussianDynamics +gaussian.GaussianOptimizer = GaussianOptimizer +gaussian.GaussianIRC = GaussianIRC + +__all__ = [ + 'Gaussian', + 'GaussianDynamics', + 'GaussianOptimizer', + 'GaussianIRC', + 'GSUB' +] \ No newline at end of file diff --git a/calc_utils/futils/rdkit2ase.py b/calc_utils/futils/rdkit2ase.py new file mode 100644 index 0000000..aca954a --- /dev/null +++ b/calc_utils/futils/rdkit2ase.py @@ -0,0 +1,145 @@ +from rdkit import Chem +from ase import atoms +from typing import Optional +import numpy as np +from rdkit.Geometry import Point3D + + +def MolToAtoms(mol: Chem.Mol, confID=-1) -> atoms.Atoms: + conf = mol.GetConformer(confID) + symbols = [atom.GetSymbol() for atom in mol.GetAtoms()] + positions = [ + (conf.GetAtomPosition(i).x, + conf.GetAtomPosition(i).y, + conf.GetAtomPosition(i).z) + for i in range(mol.GetNumAtoms()) + ] + aseAtoms = atoms.Atoms(symbols=symbols, positions=positions) + charges = [atom.GetFormalCharge() for atom in mol.GetAtoms()] + aseAtoms.set_initial_charges(charges) + + return aseAtoms + + +def AtomsToMol( + atoms: atoms.Atoms, + mol: Optional[Chem.Mol] = None, + conf_id: int = 0, + charge: Optional[int] = None, + allow_reorder: bool = False, + inplace = False +) -> Chem.Mol: + """ + Convert ASE Atoms -> RDKit Mol. + + If mol is provided: + - verify natoms and element symbols match (unless allow_reorder=True) + - update (or add) conformer coordinates so mol matches atoms + + If mol is None: + - create a new Mol with atoms only, set 3D coords + - Determine bonds from geometry using rdDetermineBonds.DetermineBonds() + + Parameters + ---------- + atoms : ase.Atoms + Must have positions (Å). + mol : rdkit.Chem.Mol | None + Optional template mol. + conf_id : int + Conformer id to update/use. If mol has no conformer, one will be added. + charge : int | None + Total molecular charge used by DetermineBonds (recommended if known). + If None, will try to infer from formal charges when mol is provided; + otherwise defaults to 0 for new mol. + allow_reorder : bool + If True, will not enforce symbol-by-index equality (only checks counts). + Most of the time you want False to guarantee consistency. + inplace : bool + If True, will modify input mol instead of make a copy. + + Returns + ------- + rdkit.Chem.Mol + """ + positions = np.asarray(atoms.get_positions(), dtype=float) + symbols = list(atoms.get_chemical_symbols()) + n = len(symbols) + + # -------- case 1: update existing mol -------- + if mol is not None: + if mol.GetNumAtoms() != n: + raise ValueError(f"mol has {mol.GetNumAtoms()} atoms but ASE atoms has {n}.") + + mol_symbols = [a.GetSymbol() for a in mol.GetAtoms()] + if not allow_reorder: + if mol_symbols != symbols: + raise ValueError( + "Element symbols mismatch by index between mol and atoms.\n" + f"mol: {mol_symbols}\n" + f"atoms: {symbols}\n" + "If you REALLY know what you're doing, set allow_reorder=True " + "and handle mapping yourself." + ) + else: + # only check multiset counts + if sorted(mol_symbols) != sorted(symbols): + raise ValueError("Element symbol counts differ between mol and atoms.") + + if not inplace: + mol = Chem.Mol(mol) # copy + if mol.GetNumConformers() == 0: + conf = Chem.Conformer(n) + conf.Set3D(True) + mol.AddConformer(conf, assignId=True) + + # pick conformer + try: + conf = mol.GetConformer(conf_id) + except ValueError: + # create new conformer if requested id doesn't exist + conf = Chem.Conformer(n) + conf.Set3D(True) + mol.AddConformer(conf, assignId=True) + conf = mol.GetConformer(mol.GetNumConformers() - 1) + if conf_id!=0: + print("Warning: Failed to pick conformer.") + + for i in range(n): + x, y, z = map(float, positions[i]) + conf.SetAtomPosition(i, Point3D(x, y, z)) + + # charge inference if not given + if charge is None: + charge = int(sum(a.GetFormalCharge() for a in mol.GetAtoms())) + + return mol + + # -------- case 2: build mol + determine bonds -------- + rw = Chem.RWMol() + for sym in symbols: + rw.AddAtom(Chem.Atom(sym)) + + new_mol = rw.GetMol() + conf = Chem.Conformer(n) + conf.Set3D(True) + for i in range(n): + x, y, z = map(float, positions[i]) + conf.SetAtomPosition(i, Point3D(x, y, z)) + new_mol.AddConformer(conf, assignId=True) + + # Determine bonds from geometry + if charge is None: + charge = 0 + + try: + from rdkit.Chem import rdDetermineBonds + rdDetermineBonds.DetermineBonds(new_mol, charge=charge) + except Exception as e: + raise RuntimeError( + "DetermineBonds failed. This can happen for metals/ions/odd geometries.\n" + "Consider providing a template mol, or implement custom distance-based bonding.\n" + f"Original error: {e}" + ) + + return new_mol diff --git a/calc_utils/futils/rdkit_utils.py b/calc_utils/futils/rdkit_utils.py new file mode 100644 index 0000000..49df15d --- /dev/null +++ b/calc_utils/futils/rdkit_utils.py @@ -0,0 +1,14 @@ +from rdkit import Chem +from rdkit.Chem import Draw +from IPython.display import SVG +from rdkit.Chem.Draw import IPythonConsole + + +def draw2D(mol:Chem.Mol,confId:int=-1): + d = Draw.MolDraw2DSVG(250, 200) + d.drawOptions().addAtomIndices = True + d.DrawMolecule(mol,confId=confId) + d.FinishDrawing() + return SVG(d.GetDrawingText()) + +draw3D = lambda m3d,confId=-1: IPythonConsole.drawMol3D(m3d,confId=confId) \ No newline at end of file diff --git a/calc_utils/pyproject.toml b/calc_utils/pyproject.toml new file mode 100644 index 0000000..abe82e8 --- /dev/null +++ b/calc_utils/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "calc-utils" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "ase>=3.27.0", + "rdkit>=2025.9.3", +]