"""Base classes for storage utilities"""importgzipimportloggingfromabcimportABCfrompathlibimportPathfromtypingimportIterablefromcontextlibimportAbstractContextManagerfromexamol.store.modelsimportMoleculeRecordfromexamol.utils.chemistryimportget_inchi_key_from_molecule_stringlogger=logging.getLogger(__name__)
[docs]classMoleculeStore(AbstractContextManager,ABC):"""Base class defining how to interface with a dataset of molecule records. Data stores provide the ability to persist the data collected by ExaMol to disk during a run. The :meth:`update_record` call need not immediately persist the data but should ensure that the data is stored on disk eventually. In fact, it is actually better for the update operation to not block until the resulting write has completed. Stores do not need support concurrent access from multiple client, which is why this documentation avoids the word "database." """def__getitem__(self,mol_key:str)->MoleculeRecord:"""Retrieve a molecule record"""raiseNotImplementedError()def__len__(self):raiseNotImplementedError()def__contains__(self,item:str|MoleculeRecord):raiseNotImplementedError()
[docs]defget_or_make_record(self,mol_string:str)->MoleculeRecord:"""Either the existing record for a molecule or make a new one Args: mol_string: String describing a molecule (e.g., SMILES string) Returns: Record """key=get_inchi_key_from_molecule_string(mol_string)ifkeynotinself:record=MoleculeRecord.from_identifier(mol_string)self.update_record(record)returnrecordelse:returnself[key]
[docs]defiterate_over_records(self)->Iterable[MoleculeRecord]:"""Iterate over all records in data Yields: A single record """raiseNotImplementedError()
[docs]defupdate_record(self,record:MoleculeRecord):"""Update a single record Args: record: Record to be updated """raiseNotImplementedError()
[docs]defupdate_records(self,records:Iterable[MoleculeRecord]):"""Update many records at once Args: records: Iterator over records to be stored """forrecordinrecords:self.update_record(record)
[docs]defexport_records(self,path:Path):"""Save a current copy of the database to disk as line-delimited JSON Args: path: Path in which to save all data. Use a ".json.gz" """with(gzip.open(path,'wt')ifpath.name.endswith('.gz')elseopen(path,'w'))asfp:forrecordinself.iterate_over_records():print(record.json(),file=fp)