"""Base class defining the interfaces for common simulation operations"""
import json
from dataclasses import dataclass, field, asdict
from hashlib import sha512
from pathlib import Path
from typing import Any
import ase
import numpy as np
from examol.utils.conversions import read_from_string, write_to_string
[docs]
@dataclass()
class SimResult:
"""Stores the results from a calculation in a code-agnostic format"""
# Information about the result
config_name: str = field()
"""Name of the configuration used to compute the energy"""
charge: int = field()
"""Charge of the molecule"""
solvent: str | None = field()
"""Solvent around the molecule, if any"""
# Outputs
xyz: str = field(repr=False)
"""XYZ-format structure, adjusted such that the center of mass is at the origin"""
energy: float | None = None
"""Energy of the molecule (units: eV)"""
forces: np.ndarray | None = None
"""Forces acting on each atom (units: eV/Ang)"""
def __post_init__(self):
# Ensure the XYZ is centered about zero
atoms = read_from_string(self.xyz, 'xyz')
atoms.center()
self.xyz = write_to_string(atoms, 'xyz')
@property
def atoms(self) -> ase.Atoms:
"""ASE Atoms object representation of the structure"""
return read_from_string(self.xyz, 'xyz')
[docs]
def json(self, **kwargs) -> str:
"""Write the record to JSON format"""
output = asdict(self)
if isinstance(output['forces'], np.ndarray):
output['forces'] = output['forces'].tolist()
return json.dumps(output, **kwargs)
[docs]
class BaseSimulator:
"""Uniform interface for common types of computations
**Creating a New Simulator**
There are a few considerations to weigh when fulfilling the abstract methods:
- Use underscores in the name of method configurations.
Args:
scratch_dir: Path in which to create temporary directories
retain_failed: Whether to retain failed computations
"""
def __init__(self, scratch_dir: Path | str | None, retain_failed: bool = True):
self.scratch_dir: Path | None = Path('tmp') if scratch_dir is None else Path(scratch_dir)
self.retain_failed = retain_failed
def _make_run_hash(self, xyz: str, config_name: str, charge: int, solvent: str | None) -> str:
"""Generate a summary hash for a calculation
Args:
charge: Charge of the cell
config_name: Name of the configuration
solvent: Name of the solvent, if any
xyz: XYZ coordinates for the atoms
Returns:
Hash of the above contents
"""
hasher = sha512()
hasher.update(self.__class__.__name__.encode())
hasher.update(xyz.encode())
hasher.update(config_name.encode())
hasher.update(str(charge).encode())
if solvent is not None:
hasher.update(solvent.encode())
run_hash = hasher.hexdigest()[:8]
return run_hash
[docs]
def create_configuration(self, name: str, xyz: str, charge: int, solvent: str | None, **kwargs) -> Any:
"""Create the configuration needed for a certain computation
Args:
name: Name of the computational method
xyz: Structure being evaluated in XYZ format
charge: Charge on the system
solvent: Name of any solvent
"""
raise NotImplementedError()
[docs]
def optimize_structure(self, mol_key: str, xyz: str, config_name: str, charge: int = 0, solvent: str | None = None, **kwargs) \
-> tuple[SimResult, list[SimResult], str | None]:
"""Minimize the energy of a structure
Args:
mol_key: InChI key of the molecule being evaluated
xyz: 3D geometry of the molecule
config_name: Name of the method
charge: Charge on the molecule
solvent: Name of the solvent
**kwargs: Any other arguments for the method
Returns:
- The minimized structure
- Any intermediate structures
- Other metadata produced by the computation
"""
raise NotImplementedError()
[docs]
def compute_energy(self, mol_key: str, xyz: str, config_name: str, charge: int = 0, solvent: str | None = None, forces: bool = True,
**kwargs) -> tuple[SimResult, str | None]:
"""Get the energy and forces of a structure
Args:
mol_key: InChI key of the molecule being evaluated
xyz: 3D geometry of the molecule
config_name: Name of the method
charge: Charge on the molecule
solvent: Name of the solvent
forces: Whether to compute forces
**kwargs: Any other arguments for the method
Returns:
- Energy result
- Other metadata produced by the computation
"""
raise NotImplementedError()
def _make_run_directory(self, run_type: str, mol_key: str, xyz: str, charge: int, config_name: str, solvent: str | None) -> Path:
"""Create a run directory for the calculation
Args:
run_type: Type of the run to perform (e.g., "opt", "single")
mol_key: InChI key of the molecule being evaluated
charge: Charge of the cell
config_name: Name of the configuration
solvent: Name of the solvent, if any
xyz: XYZ coordinates for the atoms
Returns:
Path in which to run the computation
"""
# Make the directory
run_hash = self._make_run_hash(xyz, config_name, charge, solvent)
run_path = self.scratch_dir / mol_key / Path(f'{run_type}_{run_hash}')
run_path.mkdir(parents=True, exist_ok=True)
# Write a calculation summary to the run path
with open(run_path / 'summary.json', 'w') as fp:
# Convert to strings because json.dump does not work with Proxy objects
json.dump({
'xyz': str(xyz),
'config_name': str(config_name),
'charge': str(charge),
'solvent': str(solvent)
}, fp, indent=2)
return run_path