calc utils

This commit is contained in:
2026-01-11 11:37:11 +08:00
parent 5fca3520f6
commit 5e94b202b5
7 changed files with 559 additions and 0 deletions

4
calc_utils/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
dist
__pycache__
*.egg-info
.venv

0
calc_utils/README.md Normal file
View File

View File

@ -0,0 +1,19 @@
# from .patch_gaussian import GSUB
from .rdkit2ase import MolToAtoms,AtomsToMol
from .rdkit_utils import draw2D,draw3D
from rdkit.Chem import AllChem
from rdkit import Chem
from ase import atoms
# import patch_gaussian as gaussian
__all__ = [
# 'GSUB',
'MolToAtoms',
'AtomsToMol',
'draw2D',
'draw3D',
'Chem',
'AllChem',
'atoms',
'gaussian'
]

View File

@ -0,0 +1,367 @@
from ase.calculators import gaussian
from ase.calculators.calculator import FileIOCalculator
from ase.io import read, write
from typing import TYPE_CHECKING, Optional, Literal
from copy import deepcopy
GSUB = "/home/fanhj/calcs/lele/tools/gsub_wait"
gau_src= gaussian.Gaussian
gau_dyn_src = gaussian.GaussianDynamics
methodType = Optional[
str
| Literal[
"HF",
"MP2",
"MP3",
"MP4",
"MP4(DQ)",
"MP4(SDQ)",
"MP5",
"CCSD",
"CCSDT",
"QCISD",
"CID",
"CISD",
"CIS",
"B3LYP",
"B3PW91",
"BLYP",
"PBE",
"PBE0",
"M06",
"M062X",
"M06L",
"M06HF",
"CAM-B3LYP",
"wb97xd",
"wb97xd3",
"LC-wPBE",
"HSE06",
"LSDA",
"SVWN",
"PW91",
"mPW1PW91",
"HCTH",
"HCTH147",
"HCTH407",
"TPSSh",
"TPSS",
"revPBE",
"PBEPBE",
"B2PLYP",
"mPW2PLYP",
"B2PLYPD3",
"PBE0DH",
"PBEQIDH",
]
]
basisType = Optional[
str
| Literal[
"STO-3G",
"3-21G",
"6-31G",
"6-31G(d)",
"6-31G(d,p)",
"6-31+G(d)",
"6-31+G(d,p)",
"6-31++G(d,p)",
"6-311G",
"6-311G(d)",
"6-311G(d,p)",
"6-311+G(d)",
"6-311+G(d,p)",
"6-311++G(d,p)",
"cc-pVDZ",
"cc-pVTZ",
"cc-pVQZ",
"cc-pV5Z",
"cc-pV6Z",
"aug-cc-pVDZ",
"aug-cc-pVTZ",
"aug-cc-pVQZ",
"aug-cc-pV5Z",
"def2SVP",
"def2SVPD",
"def2TZVP",
"def2TZVPD",
"def2QZVP",
"def2QZVPP",
"LANL2DZ",
"LANL2MB",
"SDD",
"CEP-4G",
"CEP-31G",
"CEP-121G",
"DGDZVP",
"DGDZVP2",
"Gen",
"GenECP",
]
]
scrfSolventType = Optional[
str
| Literal['Water', 'Acetone', 'Acetonitrile', 'Aniline', 'Benzene', 'Bromoform', 'Butanol',
'CarbonDisulfide', 'CarbonTetrachloride', 'Chlorobenzene', 'Chloroform', 'Cyclohexane',
'Dichloroethane', 'Dichloromethane', 'Diethylether', 'Dimethylformamide', 'Dimethylsulfoxide',
'Ethanol', 'Ethylacetate', 'Heptane', 'Hexane', 'Methanol', 'Nitromethane', 'Octanol',
'Pyridine', 'Tetrahydrofuran', 'Toluene', 'Xylene'
]
]
class Gaussian(gau_src):
mem:int
nprocshared:Literal[20,32]
charge: int
mult: Optional[int]
def __init__(self,
proc:Literal[20,32]=32,
charge:int=0,
mult:Optional[int]=None,
mem="30GB",
label='Gaussian',
method:methodType=None,
basis:basisType=None,
fitting_basis:Optional[str]=None,
output_type:Literal['N','P']='P',
basisfile:Optional[str]=None,
basis_set:Optional[str]=None,
xc:Optional[str]=None,
extra:Optional[str]=None,
ioplist:Optional[list[str]]=None,
addsec=None,
spinlist=None,
zefflist=None,
qmomlist=None,
nmagmlist=None,
znuclist=None,
radnuclearlist=None,
chk:Optional[str]=None,
oldchk:Optional[str]=None,
nprocshared:Optional[int]=None,
scrfSolvent:scrfSolventType=None,
scrf:Optional[str]=None,
em:Optional[Literal["GD2","GD3","GD4","GD3BJ"]|str]=None,
**kwargs):
'''
Parameters
-----------
proc: int
A short name for nprocshared
method: str
Level of theory to use, e.g. ``hf``, ``ccsd``, ``mp2``, or ``b3lyp``.
Overrides ``xc`` (see below).
xc: str
Level of theory to use. Translates several XC functionals from
their common name (e.g. ``PBE``) to their internal Gaussian name
(e.g. ``PBEPBE``).
basis: str
The basis set to use. If not provided, no basis set will be requested,
which usually results in ``STO-3G``. Maybe omitted if basisfile is set
(see below).
fitting_basis: str
The name of the fitting basis set to use.
output_type: str
Level of output to record in the Gaussian
output file - this may be ``N``- normal or ``P`` -
additional.
basisfile: str
The name of the basis file to use. If a value is provided, basis may
be omitted (it will be automatically set to 'gen')
basis_set: str
The basis set definition to use. This is an alternative
to basisfile, and would be the same as the contents
of such a file.
charge: int
The system charge. If not provided, it will be automatically
determined from the ``Atoms`` objects initial_charges.
mult: int
The system multiplicity (``spin + 1``). If not provided, it will be
automatically determined from the ``Atoms`` objects
``initial_magnetic_moments``.
extra: str
Extra lines to be included in the route section verbatim.
It should not be necessary to use this, but it is included for
backwards compatibility.
ioplist: list
A collection of IOPs definitions to be included in the route line.
addsec: str
Text to be added after the molecular geometry specification, e.g. for
defining masses with ``freq=ReadIso``.
spinlist: list
A list of nuclear spins to be added into the nuclear
propeties section of the molecule specification.
zefflist: list
A list of effective charges to be added into the nuclear
propeties section of the molecule specification.
qmomlist: list
A list of nuclear quadropole moments to be added into
the nuclear propeties section of the molecule
specification.
nmagmlist: list
A list of nuclear magnetic moments to be added into
the nuclear propeties section of the molecule
specification.
znuclist: list
A list of nuclear charges to be added into the nuclear
propeties section of the molecule specification.
radnuclearlist: list
A list of nuclear radii to be added into the nuclear
propeties section of the molecule specification.
params: dict
Contains any extra keywords and values that will be included in either
the link0 section or route section of the gaussian input file.
To be included in the link0 section, the keyword must be one of the
following: ``mem``, ``chk``, ``oldchk``, ``schk``, ``rwf``,
``oldmatrix``, ``oldrawmatrix``, ``int``, ``d2e``, ``save``,
``nosave``, ``errorsave``, ``cpu``, ``nprocshared``, ``gpucpu``,
``lindaworkers``, ``usessh``, ``ssh``, ``debuglinda``.
Any other keywords will be placed (along with their values) in the
route section.
'''
if nprocshared is not None and proc is not None:
if nprocshared == proc:print("Providing both nprocshared and proc is not recomanded")
else:
raise ValueError("both nprocshared and proc provided, and inequal.")
if scrfSolvent is not None and scrf is not None:
raise ValueError("scrfSolvent and scrf both not None")
if scrfSolvent is not None:
scrf = "Solvent="+scrfSolvent
optional_keys = ['chk','oldchk','scrf', 'geom', 'integral', 'density', 'nosymm', 'symmetry', 'units',
'temperature', 'pressure', 'counterpoise', 'gfinput', 'gfprint', 'test',
'output', 'punch', 'prop', 'pseudo', 'restart', 'scale', 'sparse', 'window', 'em']
for key in optional_keys:
val = locals().get(key,None)
if val is not None:
kwargs[key] = val
super().__init__(
nprocshared = proc,
mem=mem,
label=label,
command=GSUB+" "+label,
charge = charge,
mult = mult,
method = method,
basis = basis,
fitting_basis = fitting_basis,
output_type = output_type,
basisfile = basisfile,
basis_set = basis_set,
xc = xc,
extra = extra,
ioplist = ioplist,
addsec = addsec,
spinlist = spinlist,
zefflist = zefflist,
qmomlist = qmomlist,
nmagmlist = nmagmlist,
znuclist = znuclist,
radnuclearlist = radnuclearlist,
**kwargs
)
assert self.fileio_rules
self.fileio_rules.stdin_name = '{prefix}.gin'
def mod(self,charge:int=0, mult:int=1) -> "Gaussian":
new = deepcopy(self)
new.charge = charge
new.mult = mult
return new
def write_input(self, atoms, properties=None, system_changes=None):
FileIOCalculator.write_input(self, atoms, properties, system_changes)
if TYPE_CHECKING:
assert isinstance(self.label, str)
assert self.parameters
write(self.label + '.gin', atoms, properties=properties,
format='gaussian-in', parallel=False, **self.parameters)
def read_results(self):
if TYPE_CHECKING:
assert isinstance(self.label, str)
output = read(self.label + '.out', format='gaussian-out')
assert output
self.calc = output.calc
self.results = output.calc.results
class GaussianDynamics(gau_dyn_src):
def __init__(self, atoms, calc=None):
super().__init__(atoms, calc=calc)
def run(self, **kwargs):
calc_old = self.atoms.calc
params_old = deepcopy(self.calc.parameters)
self.delete_keywords(kwargs)
self.delete_keywords(self.calc.parameters)
self.set_keywords(kwargs)
self.calc.set(**kwargs)
self.atoms.calc = self.calc
try:
self.atoms.get_potential_energy()
except OSError:
converged = False
else:
converged = True
atoms = read(self.calc.label + '.out')
self.atoms.cell = atoms.cell
self.atoms.positions = atoms.positions
self.atoms.calc = atoms.calc
self.calc.parameters = params_old
self.calc.reset()
if calc_old is not None:
self.atoms.calc = calc_old
return converged
class GaussianOptimizer(GaussianDynamics):
keyword = 'opt'
special_keywords = {
'fmax': '{}',
'steps': 'maxcycle={}',
}
class GaussianIRC(GaussianDynamics):
keyword = 'irc'
special_keywords = {
'direction': '{}',
'steps': 'maxpoints={}',
}
gaussian.Gaussian = Gaussian
gaussian.GaussianDynamics = GaussianDynamics
gaussian.GaussianOptimizer = GaussianOptimizer
gaussian.GaussianIRC = GaussianIRC
__all__ = [
'Gaussian',
'GaussianDynamics',
'GaussianOptimizer',
'GaussianIRC',
'GSUB'
]

View File

@ -0,0 +1,145 @@
from rdkit import Chem
from ase import atoms
from typing import Optional
import numpy as np
from rdkit.Geometry import Point3D
def MolToAtoms(mol: Chem.Mol, confID=-1) -> atoms.Atoms:
conf = mol.GetConformer(confID)
symbols = [atom.GetSymbol() for atom in mol.GetAtoms()]
positions = [
(conf.GetAtomPosition(i).x,
conf.GetAtomPosition(i).y,
conf.GetAtomPosition(i).z)
for i in range(mol.GetNumAtoms())
]
aseAtoms = atoms.Atoms(symbols=symbols, positions=positions)
charges = [atom.GetFormalCharge() for atom in mol.GetAtoms()]
aseAtoms.set_initial_charges(charges)
return aseAtoms
def AtomsToMol(
atoms: atoms.Atoms,
mol: Optional[Chem.Mol] = None,
conf_id: int = 0,
charge: Optional[int] = None,
allow_reorder: bool = False,
inplace = False
) -> Chem.Mol:
"""
Convert ASE Atoms -> RDKit Mol.
If mol is provided:
- verify natoms and element symbols match (unless allow_reorder=True)
- update (or add) conformer coordinates so mol matches atoms
If mol is None:
- create a new Mol with atoms only, set 3D coords
- Determine bonds from geometry using rdDetermineBonds.DetermineBonds()
Parameters
----------
atoms : ase.Atoms
Must have positions (Å).
mol : rdkit.Chem.Mol | None
Optional template mol.
conf_id : int
Conformer id to update/use. If mol has no conformer, one will be added.
charge : int | None
Total molecular charge used by DetermineBonds (recommended if known).
If None, will try to infer from formal charges when mol is provided;
otherwise defaults to 0 for new mol.
allow_reorder : bool
If True, will not enforce symbol-by-index equality (only checks counts).
Most of the time you want False to guarantee consistency.
inplace : bool
If True, will modify input mol instead of make a copy.
Returns
-------
rdkit.Chem.Mol
"""
positions = np.asarray(atoms.get_positions(), dtype=float)
symbols = list(atoms.get_chemical_symbols())
n = len(symbols)
# -------- case 1: update existing mol --------
if mol is not None:
if mol.GetNumAtoms() != n:
raise ValueError(f"mol has {mol.GetNumAtoms()} atoms but ASE atoms has {n}.")
mol_symbols = [a.GetSymbol() for a in mol.GetAtoms()]
if not allow_reorder:
if mol_symbols != symbols:
raise ValueError(
"Element symbols mismatch by index between mol and atoms.\n"
f"mol: {mol_symbols}\n"
f"atoms: {symbols}\n"
"If you REALLY know what you're doing, set allow_reorder=True "
"and handle mapping yourself."
)
else:
# only check multiset counts
if sorted(mol_symbols) != sorted(symbols):
raise ValueError("Element symbol counts differ between mol and atoms.")
if not inplace:
mol = Chem.Mol(mol) # copy
if mol.GetNumConformers() == 0:
conf = Chem.Conformer(n)
conf.Set3D(True)
mol.AddConformer(conf, assignId=True)
# pick conformer
try:
conf = mol.GetConformer(conf_id)
except ValueError:
# create new conformer if requested id doesn't exist
conf = Chem.Conformer(n)
conf.Set3D(True)
mol.AddConformer(conf, assignId=True)
conf = mol.GetConformer(mol.GetNumConformers() - 1)
if conf_id!=0:
print("Warning: Failed to pick conformer.")
for i in range(n):
x, y, z = map(float, positions[i])
conf.SetAtomPosition(i, Point3D(x, y, z))
# charge inference if not given
if charge is None:
charge = int(sum(a.GetFormalCharge() for a in mol.GetAtoms()))
return mol
# -------- case 2: build mol + determine bonds --------
rw = Chem.RWMol()
for sym in symbols:
rw.AddAtom(Chem.Atom(sym))
new_mol = rw.GetMol()
conf = Chem.Conformer(n)
conf.Set3D(True)
for i in range(n):
x, y, z = map(float, positions[i])
conf.SetAtomPosition(i, Point3D(x, y, z))
new_mol.AddConformer(conf, assignId=True)
# Determine bonds from geometry
if charge is None:
charge = 0
try:
from rdkit.Chem import rdDetermineBonds
rdDetermineBonds.DetermineBonds(new_mol, charge=charge)
except Exception as e:
raise RuntimeError(
"DetermineBonds failed. This can happen for metals/ions/odd geometries.\n"
"Consider providing a template mol, or implement custom distance-based bonding.\n"
f"Original error: {e}"
)
return new_mol

View File

@ -0,0 +1,14 @@
from rdkit import Chem
from rdkit.Chem import Draw
from IPython.display import SVG
from rdkit.Chem.Draw import IPythonConsole
def draw2D(mol:Chem.Mol,confId:int=-1):
d = Draw.MolDraw2DSVG(250, 200)
d.drawOptions().addAtomIndices = True
d.DrawMolecule(mol,confId=confId)
d.FinishDrawing()
return SVG(d.GetDrawingText())
draw3D = lambda m3d,confId=-1: IPythonConsole.drawMol3D(m3d,confId=confId)

10
calc_utils/pyproject.toml Normal file
View File

@ -0,0 +1,10 @@
[project]
name = "calc-utils"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"ase>=3.27.0",
"rdkit>=2025.9.3",
]