From c5f9d7cb925e9395cfa7649cf2a945abe041896d Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Mon, 16 Dec 2024 12:33:36 -0500 Subject: [PATCH] Csse layout opt/td (#363) * standardize convert_v, address review comments * Opt<> all working * fix Annotated for py38 * TD v2 models ready --- .github/workflows/Lint.yml | 2 +- docs/changelog.rst | 22 ++ pyproject.toml | 1 + qcelemental/models/v1/common_models.py | 8 +- qcelemental/models/v1/procedures.py | 219 ++++++++++-- qcelemental/models/v1/results.py | 13 +- qcelemental/models/v2/__init__.py | 3 +- qcelemental/models/v2/common_models.py | 14 +- qcelemental/models/v2/procedures.py | 455 +++++++++++++++++------- qcelemental/models/v2/results.py | 92 ++--- qcelemental/tests/test_model_general.py | 13 +- qcelemental/tests/test_model_results.py | 280 ++++++++++++--- qcelemental/tests/test_utils.py | 1 + 13 files changed, 847 insertions(+), 276 deletions(-) diff --git a/.github/workflows/Lint.yml b/.github/workflows/Lint.yml index 61b300eb..05adce9a 100644 --- a/.github/workflows/Lint.yml +++ b/.github/workflows/Lint.yml @@ -17,7 +17,7 @@ jobs: python-version: "3.8" - name: Install black run: pip install "black>=22.1.0,<23.0a0" - - name: Print code formatting with black + - name: Print code formatting with black (hints here if next step errors) run: black --diff . - name: Check code formatting with black run: black --check . diff --git a/docs/changelog.rst b/docs/changelog.rst index e6258692..136eafe7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -37,6 +37,28 @@ New Features Enhancements ++++++++++++ +- (:pr:`363`) +- (:pr:`363`) +- (:pr:`363`) ``v2.TorsionDriveResult`` no longer inherits from Input and now has indep id and extras and new native_files. +- (:pr:`363`) ``v2.TorsionDriveInput.initial_molecule`` now ``initial_molecules`` as it's a list of >=1 molecules. keep change? +- (:pr:`363`) ``v2. TorsionDriveSpecification`` is a new model. instead of ``v2.TorsionDriveInput`` having a ``input_specification`` and an ``optimization_spec`` fields, it has a ``specification`` field that is a ``TorsionDriveSpecification`` which in turn hold opt info and in turn gradient/atomic info. +- (:pr:`363`) ``v2.TDKeywords`` got a ``schema_name`` field. +- (:pr:`363`) ``native_files`` field added to ``v2.OptimizationResult`` and ``v2.TorsionDriveResult`` gained a ``native_files`` field, though not protocols for user control. +- (:pr:`363`) ``v2.AtomicResult.convert_v()`` learned external_protocols option to inject that field if known from OptIn +- (:pr:`363`) OptimizationSpecification learned a ``convert_v`` function to interconvert. +- (:pr:`363`) all the v2 models of ptcl/kw/spec/in/prop/res type have ``schema_name``. ``qcschema_input`` and ``qcschema_output`` now are ``qcschema_atomic_input`` and ``qcschema_atomic_output`` +- (:pr:`363`) whereas ``v1.AtomicInput`` and ``v1.QCInputSpecification`` shared the same schema_name, ``v2.AtomicInput`` and ``v2.AtomicSpecification`` do not. This is a step towards more explicit schema names. +- (:pr:`363`) ``v2.AtomicResult`` gets a literal schema_name and it no longer accepts the qc_schema* +- (:pr:`363`) ``v2.OptimizatonResult.energies`` becomes ``v2.OptimizationResult.trajectory_properties`` and ManyBody allowed as well as atomic. Much expands information returned +- (:pr:`363`) ``v2.OptimizatonResult.trajectory`` becomes ``v2.OptimizationResult.trajectory_results`` and ManyBody allowed as well as atomic. +- (:pr:`363`) a new basic ``v2.OptimizationProperties`` for expansion later. for now has number of opt iter. help by `OptimizationResult.properties` +- (:pr:`363`) ``v2.OptimizationResult`` gained a ``input_data`` field for the corresponding ``OptimizationInput`` and independent ``id`` and ``extras``. No longer inherits from ``OptimizationInput``. + Literal schema_name. Added ``native_files`` field. +- (:pr:`363`) ``v2.OptimizationInput`` got a Literal schema_name now. field ``specification`` now takes an ``OptimizationSpecification`` that itself takes an ``AtomicSpecification`` replaces field ``input_specification`` that took a ``QCInputSpecification``. ``v2.OptimizationInput`` gained a ``protocols`` field. + fields ``keywords``, ``extras``, and ``protocols`` from Input are now in ``OptimizationSpecification`` +- (:pr:`363`) ``v2.OptimizationSpecification`` now is used every optimization as ``v2.OptimizationInput.specification`` = ``OptimizationSpecification`` rather than only in torsion drives. No longer has schema_name and schema_version. + Its. ``procedures`` field is now ``program``. Gains new field ``specification`` that is most commonly ``AtomicSpecification`` but could be ``ManyBodySpecification`` or any other E/G/H producer. +- (:pr:`363`) ``v2.OptimizationInput`` now takes consolidated ``AtomicSpecification`` rather than ``QCInputSpecification`` (now deleted) - (:pr:`359`) ``v2.AtomicInput`` lost extras so extras belong unambiguously to the specification. - (:pr:`359`) ``v2.AtomicSpecification``, unlike ``v1.QCInputSpecification``, doesn't have schema_name and schema version. - (:pr:`359`) misc -- ``isort`` version bumped to 5.13 and imports and syntax take advantage of python 3.8+ diff --git a/pyproject.toml b/pyproject.toml index 012fde74..493fc3dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "pint >=0.10; python_version=='3.8'", "pint >=0.24; python_version>='3.9'", "pydantic >=2.0", + "typing_extensions; python_version<'3.9'", ] [project.optional-dependencies] diff --git a/qcelemental/models/v1/common_models.py b/qcelemental/models/v1/common_models.py index c20900dc..55de2f18 100644 --- a/qcelemental/models/v1/common_models.py +++ b/qcelemental/models/v1/common_models.py @@ -129,18 +129,20 @@ def __repr_args__(self) -> "ReprArgs": return [("error", self.error)] def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.FailedOperation", "qcelemental.models.v2.FailedOperation"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="FailedOperation") == "self": + if check_convertible_version(target_version, error="FailedOperation") == "self": return self dself = self.dict() - if version == 2: + if target_version == 2: # TODO if FailedOp gets a schema_version, add a validator self_vN = qcel.models.v2.FailedOperation(**dself) + else: + assert False, target_version return self_vN diff --git a/qcelemental/models/v1/procedures.py b/qcelemental/models/v1/procedures.py index 9a044c3f..93decc42 100644 --- a/qcelemental/models/v1/procedures.py +++ b/qcelemental/models/v1/procedures.py @@ -70,20 +70,22 @@ def _version_stamp(cls, v): return 1 def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.QCInputSpecification", "qcelemental.models.v2.AtomicSpecification"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="QCInputSpecification") == "self": + if check_convertible_version(target_version, error="QCInputSpecification") == "self": return self dself = self.dict() - if version == 2: + if target_version == 2: dself.pop("schema_name") dself.pop("schema_version") self_vN = qcel.models.v2.AtomicSpecification(**dself) + else: + assert False, target_version return self_vN @@ -116,18 +118,32 @@ def _version_stamp(cls, v): return 1 def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.OptimizationInput", "qcelemental.models.v2.OptimizationInput"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="OptimizationInput") == "self": + if check_convertible_version(target_version, error="OptimizationInput") == "self": return self dself = self.dict() - if version == 2: - dself["input_specification"].pop("schema_version", None) + if target_version == 2: + dself.pop("hash_index", None) # no longer used, so dropped in v2 + + spec = {} + spec["extras"] = dself.pop("extras") + spec["protocols"] = dself.pop("protocols") + spec["specification"] = self.input_specification.convert_v(target_version).model_dump() + dself.pop("input_specification") + spec["specification"]["program"] = dself["keywords"].pop( + "program", "" + ) # "" is when there's an implcit program, like nwchemopt + spec["keywords"] = dself.pop("keywords") + dself["specification"] = spec + self_vN = qcel.models.v2.OptimizationInput(**dself) + else: + assert False, target_version return self_vN @@ -180,25 +196,98 @@ def _version_stamp(cls, v): return 1 def convert_v( - self, version: int + self, + target_version: int, + /, + *, + external_input_data: Optional[Union[Dict[str, Any], "OptimizationInput"]] = None, ) -> Union["qcelemental.models.v1.OptimizationResult", "qcelemental.models.v2.OptimizationResult"]: - """Convert to instance of particular QCSchema version.""" + """Convert to instance of particular QCSchema version. + + Parameters + ---------- + target_version + The version to convert to. + external_input_data + Since self contains data merged from input, this allows passing in the original input, particularly for `extras` fields. + Can be model or dictionary and should be *already* converted to target_version. + Replaces ``input_data`` field entirely (not merges with extracts from self) and w/o consistency checking. + + Returns + ------- + OptimizationResult + Returns self (not a copy) if ``target_version`` already satisfied. + Returns a new OptimizationResult of ``target_version`` otherwise. + + """ import qcelemental as qcel - if check_convertible_version(version, error="OptimizationResult") == "self": + if check_convertible_version(target_version, error="OptimizationResult") == "self": return self trajectory_class = self.trajectory[0].__class__ dself = self.dict() - if version == 2: + if target_version == 2: # remove harmless empty error field that v2 won't accept. if populated, pydantic will catch it. if not dself.get("error", True): dself.pop("error") - dself["trajectory"] = [trajectory_class(**atres).convert_v(version) for atres in dself["trajectory"]] - dself["input_specification"].pop("schema_version", None) + dself.pop("hash_index", None) # no longer used, so dropped in v2 + + v1_input_data = { + k: dself.pop(k) + for k in list(dself.keys()) + if k in ["initial_molecule", "protocols", "keywords", "input_specification"] + } + # sep any merged extras known to belong to input + v1_input_data["extras"] = {k: dself["extras"].pop(k) for k in list(dself["extras"].keys()) if k in []} + v2_input_data = qcel.models.v1.OptimizationInput(**v1_input_data).convert_v(target_version) + + # any input provenance has been overwritten + # if dself["id"]: + # input_data["id"] = dself["id"] # in/out should likely match + + if external_input_data: + # Note: overwriting with external, not updating. reconsider? + if isinstance(external_input_data, dict): + if isinstance(external_input_data["specification"], dict): + in_extras = external_input_data["specification"].get("extras", {}) + else: + in_extras = external_input_data["specification"].extras + else: + in_extras = external_input_data.specification.extras + optsubptcl = external_input_data.specification.specification.protocols + dself["extras"] = {k: v for k, v in dself["extras"].items() if (k, v) not in in_extras.items()} + dself["input_data"] = external_input_data + else: + dself["input_data"] = v2_input_data + optsubptcl = None + + dself["properties"] = { + "return_energy": dself["energies"][-1], + "optimization_iterations": len(dself["energies"]), + } + if dself.get("trajectory", []): + if ( + last_grad := dself["trajectory"][-1].get("properties", {}).get("return_gradient", None) + ) is not None: + dself["properties"]["return_gradient"] = last_grad + if len(dself.get("trajectory", [])) == len(dself["energies"]): + dself["trajectory_properties"] = [ + res["properties"] for res in dself["trajectory"] + ] # TODO filter to key keys + dself["trajectory_properties"] = [{"return_energy": ene} for ene in dself["energies"]] + dself.pop("energies") + + dself["trajectory_results"] = [ + trajectory_class(**atres).convert_v(target_version, external_protocols=optsubptcl) + for atres in dself["trajectory"] + ] + dself.pop("trajectory") self_vN = qcel.models.v2.OptimizationResult(**dself) + else: + assert False, target_version return self_vN @@ -228,6 +317,9 @@ def _version_stamp(cls, v): def _check_procedure(cls, v): return v.lower() + # NOTE: def convert_v() is missing deliberately. Because the v1 schema has a minor and different role only for + # TorsionDrive, it doesn't have nearly enough info to create a v2 schema. + class TDKeywords(ProtoModel): """ @@ -301,21 +393,48 @@ def _version_stamp(cls, v): return 1 def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.TorsionDriveInput", "qcelemental.models.v2.TorsionDriveInput"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="TorsionDriveInput") == "self": + if check_convertible_version(target_version, error="TorsionDriveInput") == "self": return self dself = self.dict() # dself = self.model_dump(exclude_unset=True, exclude_none=True) - if version == 2: - dself["input_specification"].pop("schema_version", None) - dself["optimization_spec"].pop("schema_version", None) + if target_version == 2: + gradspec = self.input_specification.convert_v(target_version).model_dump() + gradspec["program"] = dself["optimization_spec"]["keywords"].pop("program", "") + dself.pop("input_specification") + + optspec = {} + optspec["program"] = dself["optimization_spec"].pop("procedure") + optspec["protocols"] = dself["optimization_spec"].pop("protocols") + optspec["keywords"] = dself["optimization_spec"].pop("keywords") + optspec["specification"] = gradspec + dself["optimization_spec"].pop("schema_name") + dself["optimization_spec"].pop("schema_version") + assert not dself["optimization_spec"], dself["optimization_spec"] + dself.pop("optimization_spec") + + tdspec = {} + tdspec["program"] = "torsiondrive" + tdspec["extras"] = dself.pop("extras") + tdspec["keywords"] = dself.pop("keywords") + tdspec["specification"] = optspec + + dtop = {} + dtop["provenance"] = dself.pop("provenance") + dtop["initial_molecules"] = dself.pop("initial_molecule") + dtop["specification"] = tdspec + dself.pop("schema_name") + dself.pop("schema_version") + assert not dself, dself - self_vN = qcel.models.v2.TorsionDriveInput(**dself) + self_vN = qcel.models.v2.TorsionDriveInput(**dtop) + else: + assert False, target_version return self_vN @@ -357,31 +476,71 @@ def _version_stamp(cls, v): return 1 def convert_v( - self, version: int + self, target_version: int, /, *, external_input_data: "TorsionDriveInput" = None ) -> Union["qcelemental.models.v1.TorsionDriveResult", "qcelemental.models.v2.TorsionDriveResult"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="TorsionDriveResult") == "self": + if check_convertible_version(target_version, error="TorsionDriveResult") == "self": return self - opthist_class = next(iter(self.optimization_history.values()))[0].__class__ dself = self.dict() - if version == 2: + if target_version == 2: + opthist_class = next(iter(self.optimization_history.values()))[0].__class__ + dtop = {} + # remove harmless empty error field that v2 won't accept. if populated, pydantic will catch it. if not dself.get("error", True): dself.pop("error") - dself["input_specification"].pop("schema_version", None) - dself["optimization_spec"].pop("schema_version", None) - dself["optimization_history"] = { - k: [opthist_class(**res).convert_v(version) for res in lst] + v1_input_data = { + k: dself.pop(k) + for k in list(dself.keys()) + if k in ["initial_molecule", "keywords", "optimization_spec", "input_specification"] # protocols + } + # any input provenance has been overwritten + # sep any merged extras known to belong to input + v1_input_data["extras"] = {k: dself["extras"].pop(k) for k in list(dself["extras"].keys()) if k in []} + v2_input_data = qcel.models.v1.TorsionDriveInput(**v1_input_data).convert_v(target_version) + + # if dself["id"]: + # input_data["id"] = dself["id"] # in/out should likely match + + if external_input_data: + # Note: overwriting with external, not updating. reconsider? + if isinstance(external_input_data, dict): + if isinstance(external_input_data["specification"], dict): + in_extras = external_input_data["specification"].get("extras", {}) + else: + in_extras = external_input_data["specification"].extras + else: + in_extras = external_input_data.specification.extras + dtop["extras"] = {k: v for k, v in dself["extras"].items() if (k, v) not in in_extras.items()} + dtop["input_data"] = external_input_data + else: + dtop["input_data"] = v2_input_data + dtop["extras"] = dself.pop("extras") + + dtop["provenance"] = dself.pop("provenance") + dtop["stdout"] = dself.pop("stdout") + dtop["stderr"] = dself.pop("stderr") + dtop["success"] = dself.pop("success") + dtop["final_energies"] = dself.pop("final_energies") + dtop["final_molecules"] = dself.pop("final_molecules") + dtop["optimization_history"] = { + k: [opthist_class(**res).convert_v(target_version) for res in lst] for k, lst in dself["optimization_history"].items() } - # if dself["optimization_spec"].pop("extras", None): - # pass + dself.pop("optimization_history") + dself.pop("schema_name") + dself.pop("schema_version") + if "error" in dself: + dtop["error"] = dself.pop("error") # guaranteed to be fatal + assert not dself, dself - self_vN = qcel.models.v2.TorsionDriveResult(**dself) + self_vN = qcel.models.v2.TorsionDriveResult(**dtop) + else: + assert False, target_version return self_vN diff --git a/qcelemental/models/v1/results.py b/qcelemental/models/v1/results.py index 1edb2960..8f2bdfd0 100644 --- a/qcelemental/models/v1/results.py +++ b/qcelemental/models/v1/results.py @@ -622,10 +622,10 @@ def convert_v( return self dself = self.dict() - spec = {} if target_version == 2: dself.pop("schema_name") # changes in v2 + spec = {} spec["driver"] = dself.pop("driver") spec["model"] = dself.pop("model") spec["keywords"] = dself.pop("keywords", None) @@ -633,6 +633,8 @@ def convert_v( spec["extras"] = dself.pop("extras", None) dself["specification"] = spec self_vN = qcel.models.v2.AtomicInput(**dself) + else: + assert False, target_version return self_vN @@ -805,6 +807,7 @@ def convert_v( /, *, external_input_data: Optional[Any] = None, + external_protocols: Optional[AtomicResultProtocols] = None, ) -> Union["qcelemental.models.v1.AtomicResult", "qcelemental.models.v2.AtomicResult"]: """Convert to instance of particular QCSchema version. @@ -816,6 +819,8 @@ def convert_v( Since self contains data merged from input, this allows passing in the original input, particularly for `molecule` and `extras` fields. Can be model or dictionary and should be *already* converted to target_version. Replaces ``input_data`` field entirely (not merges with extracts from self) and w/o consistency checking. + external_protocols + Allows overriding the AtomicProtocols field. Used for trajectory in user-v2+harness-v1 optimizers to correctly form the gradient schema. Returns ------- @@ -831,6 +836,8 @@ def convert_v( dself = self.dict() if target_version == 2: + dself.pop("schema_name") # changes in v2 + # remove harmless empty error field that v2 won't accept. if populated, pydantic will catch it. if not dself.get("error", True): dself.pop("error") @@ -863,8 +870,12 @@ def convert_v( dself["input_data"] = external_input_data else: dself["input_data"] = input_data + if external_protocols: + dself["input_data"]["specification"]["protocols"] = external_protocols self_vN = qcel.models.v2.AtomicResult(**dself) + else: + assert False, target_version return self_vN diff --git a/qcelemental/models/v2/__init__.py b/qcelemental/models/v2/__init__.py index 16096b13..57dee12d 100644 --- a/qcelemental/models/v2/__init__.py +++ b/qcelemental/models/v2/__init__.py @@ -6,13 +6,14 @@ from .molecule import Molecule from .procedures import ( OptimizationInput, + OptimizationProperties, OptimizationProtocols, OptimizationResult, OptimizationSpecification, - QCInputSpecification, TDKeywords, TorsionDriveInput, TorsionDriveResult, + TorsionDriveSpecification, ) from .results import ( AtomicInput, diff --git a/qcelemental/models/v2/common_models.py b/qcelemental/models/v2/common_models.py index 58d97e35..81863171 100644 --- a/qcelemental/models/v2/common_models.py +++ b/qcelemental/models/v2/common_models.py @@ -141,20 +141,22 @@ def _version_stamp(cls, v): return 2 def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.FailedOperation", "qcelemental.models.v2.FailedOperation"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="FailedOperation") == "self": + if check_convertible_version(target_version, error="FailedOperation") == "self": return self dself = self.model_dump() - if version == 1: + if target_version == 1: dself.pop("schema_name") dself.pop("schema_version") self_vN = qcel.models.v1.FailedOperation(**dself) + else: + assert False, target_version return self_vN @@ -168,10 +170,4 @@ def check_convertible_version(ver: int, error: str): raise ValueError(f"QCSchema {error} version={version} does not exist for conversion.") -qcschema_input_default = "qcschema_input" -qcschema_output_default = "qcschema_output" -qcschema_optimization_input_default = "qcschema_optimization_input" -qcschema_optimization_output_default = "qcschema_optimization_output" -qcschema_torsion_drive_input_default = "qcschema_torsion_drive_input" -qcschema_torsion_drive_output_default = "qcschema_torsion_drive_output" qcschema_molecule_default = "qcschema_molecule" diff --git a/qcelemental/models/v2/procedures.py b/qcelemental/models/v2/procedures.py index 8dbb4031..979e0a6d 100644 --- a/qcelemental/models/v2/procedures.py +++ b/qcelemental/models/v2/procedures.py @@ -1,29 +1,28 @@ from enum import Enum from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union +try: + from typing import Annotated +except ImportError: + # remove when minimum py39 + from typing_extensions import Annotated + from pydantic import Field, conlist, constr, field_validator from ...util import provenance_stamp from .basemodels import ExtendedConfigDict, ProtoModel -from .common_models import ( - ComputeError, - DriverEnum, - Model, - Provenance, - check_convertible_version, - qcschema_input_default, - qcschema_optimization_input_default, - qcschema_optimization_output_default, - qcschema_torsion_drive_input_default, - qcschema_torsion_drive_output_default, -) +from .common_models import ComputeError, DriverEnum, Model, Provenance, check_convertible_version from .molecule import Molecule -from .results import AtomicResult +from .results import AtomicResult, AtomicResultProperties, AtomicSpecification +from .types import Array if TYPE_CHECKING: from .common_models import ReprArgs +# ==== Protocols ============================================================== + + class TrajectoryProtocolEnum(str, Enum): """ Which gradient evaluations to keep in an optimization trajectory. @@ -40,6 +39,7 @@ class OptimizationProtocols(ProtoModel): Protocols regarding the manipulation of a Optimization output data. """ + schema_name: Literal["qcschema_optimization_protocols"] = "qcschema_optimization_protocols" trajectory: TrajectoryProtocolEnum = Field( TrajectoryProtocolEnum.all, description=str(TrajectoryProtocolEnum.__doc__) ) @@ -47,46 +47,107 @@ class OptimizationProtocols(ProtoModel): model_config = ExtendedConfigDict(force_skip_defaults=True) -class QCInputSpecification(ProtoModel): - """ - A compute description for energy, gradient, and Hessian computations used in a geometry optimization. - """ +# ==== Inputs (Kw/Spec/In) ==================================================== + +OptSubSpecs = Annotated[ + Union[AtomicSpecification], # , ManyBodySpecification], + Field( + discriminator="schema_name", + description="A directive to compute a gradient. Either an ordinary atomic/single-point or a many-body spec.", + ), +] + +OptSubProps = Annotated[ + Union[AtomicResultProperties], # , ManyBodyProperties], + Field( + discriminator="schema_name", + description="An abridged single-geometry property set. Either an ordinary atomic/single-point or a many-body properties.", + ), +] + +OptSubRes = Annotated[ + Union[AtomicResult], # ManyBodyResult], + Field( + discriminator="schema_name", + description="A single-geometry result. Either an ordinary atomic/single-point or a many-body result.", + ), +] + - schema_name: constr(strip_whitespace=True, pattern=qcschema_input_default) = qcschema_input_default # type: ignore - # TRIAL schema_version: int = 1 # TODO +class OptimizationSpecification(ProtoModel): + """Specification for how to run a geometry optimization.""" - driver: DriverEnum = Field(DriverEnum.gradient, description=str(DriverEnum.__doc__)) - model: Model = Field(..., description=str(Model.__doc__)) - keywords: Dict[str, Any] = Field({}, description="The program specific keywords to be used.") + schema_name: Literal["qcschema_optimization_specification"] = "qcschema_optimization_specification" + # schema_version: Literal[2] = Field( + # 2, + # description="The version number of ``schema_name`` to which this model conforms.", + # ) + # right default for program? + program: str = Field( + "", description="Optimizer CMS code / QCEngine procedure to run the geometry optimization with." + ) + keywords: Dict[str, Any] = Field({}, description="The optimization specific keywords to be used.") + protocols: OptimizationProtocols = Field(OptimizationProtocols(), description=str(OptimizationProtocols.__doc__)) extras: Dict[str, Any] = Field( {}, description="Additional information to bundle with the computation. Use for schema development and scratch space.", ) + specification: AtomicSpecification = Field( + ..., description="The specification for how to run gradients for the optimization." + ) + + @field_validator("program") + @classmethod + def _check_procedure(cls, v): + return v.lower() + + def convert_v( + self, target_version: int, / + ) -> Union["qcelemental.models.v1.OptimizationSpecification", "qcelemental.models.v2.OptimizationSpecification"]: + """Convert to instance of particular QCSchema version.""" + import qcelemental as qcel + + if check_convertible_version(target_version, error="OptimizationSpecification") == "self": + return self + + loss_store = {} + dself = self.model_dump() + if target_version == 1: + dself["procedure"] = dself.pop("program") + dself["keywords"]["program"] = dself["specification"].pop("program") + + loss_store["extras"] = dself.pop("extras") + loss_store["specification"] = dself.pop("specification") + + # if loss_store: + # dself["extras"]["_qcsk_conversion_loss"] = loss_store + + self_vN = qcel.models.v1.OptimizationSpecification(**dself) + else: + assert False, target_version + + return self_vN class OptimizationInput(ProtoModel): """QCSchema input directive for geometry optimization.""" id: Optional[str] = None - hash_index: Optional[str] = None - schema_name: constr( # type: ignore - strip_whitespace=True, pattern=qcschema_optimization_input_default - ) = qcschema_optimization_input_default - schema_version: Literal[2] = 2 - - keywords: Dict[str, Any] = Field({}, description="The optimization specific keywords to be used.") - extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.") - protocols: OptimizationProtocols = Field(OptimizationProtocols(), description=str(OptimizationProtocols.__doc__)) + schema_name: Literal["qcschema_optimization_input"] = "qcschema_optimization_input" + schema_version: Literal[2] = Field( + 2, + description="The version number of ``schema_name`` to which this model conforms.", + ) - input_specification: QCInputSpecification = Field(..., description=str(QCInputSpecification.__doc__)) + specification: OptimizationSpecification = Field(..., description=str(OptimizationSpecification.__doc__)) initial_molecule: Molecule = Field(..., description="The starting molecule for the geometry optimization.") provenance: Provenance = Field(Provenance(**provenance_stamp(__name__)), description=str(Provenance.__doc__)) def __repr_args__(self) -> "ReprArgs": return [ - ("model", self.input_specification.model.model_dump()), + ("model", self.specification.specification.model.model_dump()), ("molecule_hash", self.initial_molecule.get_hash()[:7]), ] @@ -95,35 +156,98 @@ def _version_stamp(cls, v): return 2 def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.OptimizationInput", "qcelemental.models.v2.OptimizationInput"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="OptimizationInput") == "self": + if check_convertible_version(target_version, error="OptimizationInput") == "self": return self dself = self.model_dump() - if version == 1: - dself["input_specification"].pop("schema_version", None) + if target_version == 1: + dself["extras"] = dself["specification"].pop("extras") + dself["protocols"] = dself["specification"].pop("protocols") + dself["keywords"] = dself["specification"].pop("keywords") + + dself["input_specification"] = self.specification.specification.convert_v(target_version) + dself["keywords"]["program"] = dself["specification"]["specification"].pop("program") + dself["specification"].pop("specification") + dself["specification"].pop("schema_name") + + opt_program = dself["specification"].pop("program") + assert not dself["specification"], dself["specification"] + dself.pop("specification") # now empty + self_vN = qcel.models.v1.OptimizationInput(**dself) + else: + assert False, target_version return self_vN -class OptimizationResult(OptimizationInput): +# ==== Properties ============================================================= + + +class OptimizationProperties(ProtoModel): + r""" + Named properties of geometry optimization computations following the MolSSI QCSchema. + """ + + schema_name: Literal["qcschema_optimization_properties"] = Field( + "qcschema_optimization_properties", + description=f"The QCSchema specification to which this model conforms.", + ) + # schema_version: Literal[2] = Field( + # 2, + # description="The version number of :attr:`~qcelemental.models.OptimizationProperties.schema_name` to which this model conforms.", + # ) + + # ======== Calcinfo ======================================================= + # ======== Canonical ====================================================== + + nuclear_repulsion_energy: Optional[float] = Field(None, description="The nuclear repulsion energy.") + + return_energy: Optional[float] = Field( + None, + description=f"The energy of the final optimized molecule. Always available. Identical to the final :attr:`~qcelemental.models.OptimizationResult.trajectory_properties.return_energy`.", + json_schema_extra={"units": "E_h"}, + ) + + return_gradient: Optional[Array[float]] = Field( + None, + description=f"The gradient of the final optimized molecule. Always available. Identical to :attr:`~qcelemental.models.OptimizationResult.trajectory_properties.return_gradient`.", + json_schema_extra={"units": "E_h/a0", "shape": ["nat", 3]}, + ) + + optimization_iterations: Optional[int] = Field( + None, description="The number of geometry iterations taken before convergence." + ) + + model_config = ProtoModel._merge_config_with(force_skip_defaults=True) + + +# ==== Results ================================================================ + + +class OptimizationResult(ProtoModel): """QCSchema results model for geometry optimization.""" - schema_name: constr( # type: ignore - strip_whitespace=True, pattern=qcschema_optimization_output_default - ) = qcschema_optimization_output_default - schema_version: Literal[2] = 2 + schema_name: Literal["qcschema_optimization_output"] = "qcschema_optimization_output" # TODO _result? + schema_version: Literal[2] = Field( + 2, + description="The version number of ``schema_name`` to which this model conforms.", + ) + id: Optional[str] = Field(None, description="The optional ID for the computation.") + input_data: OptimizationInput = Field(..., description=str(OptimizationInput.__doc__)) final_molecule: Optional[Molecule] = Field(..., description="The final molecule of the geometry optimization.") - trajectory: List[AtomicResult] = Field( + trajectory_results: List[AtomicResult] = Field( ..., description="A list of ordered Result objects for each step in the optimization." ) - energies: List[float] = Field(..., description="A list of ordered energies for each step in the optimization.") + trajectory_properties: List[AtomicResultProperties] = Field( + ..., description="A list of ordered energies and other properties for each step in the optimization." + ) stdout: Optional[str] = Field(None, description="The standard output of the program.") stderr: Optional[str] = Field(None, description="The standard error of the program.") @@ -133,14 +257,24 @@ class OptimizationResult(OptimizationInput): ) provenance: Provenance = Field(..., description=str(Provenance.__doc__)) - @field_validator("trajectory") + # native_files placeholder for when any opt programs supply extra files or need an input file. no protocol at present + native_files: Dict[str, Any] = Field({}, description="DSL files.") + + properties: OptimizationProperties = Field(..., description=str(OptimizationProperties.__doc__)) + + extras: Dict[str, Any] = Field( + {}, + description="Additional information to bundle with the computation. Use for schema development and scratch space.", + ) + + @field_validator("trajectory_results") @classmethod def _trajectory_protocol(cls, v, info): # Do not propogate validation errors - if "protocols" not in info.data: - raise ValueError("Protocols was not properly formed.") + if "input_data" not in info.data: + raise ValueError("Input_data was not properly formed.") - keep_enum = info.data["protocols"].trajectory + keep_enum = info.data["input_data"].specification.protocols.trajectory if keep_enum == "all": pass elif keep_enum == "initial_and_final": @@ -161,53 +295,44 @@ def _version_stamp(cls, v): return 2 def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.OptimizationResult", "qcelemental.models.v2.OptimizationResult"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="OptimizationResult") == "self": + if check_convertible_version(target_version, error="OptimizationResult") == "self": return self dself = self.model_dump() - if version == 1: - trajectory_class = self.trajectory[0].__class__ + if target_version == 1: + trajectory_class = self.trajectory_results[0].__class__ - dself["trajectory"] = [trajectory_class(**atres).convert_v(version) for atres in dself["trajectory"]] - dself["input_specification"].pop("schema_version", None) + # for input_data, work from model, not dict, to use convert_v + dself.pop("input_data") + input_data = self.input_data.convert_v(1).model_dump() # exclude_unset=True, exclude_none=True - self_vN = qcel.models.v1.OptimizationResult(**dself) - - return self_vN + dself.pop("properties") # new in v2 + dself.pop("native_files") # new in v2 + dself["trajectory"] = [ + trajectory_class(**atres).convert_v(target_version) for atres in dself["trajectory_results"] + ] + dself.pop("trajectory_results") + dself["energies"] = [atprop.pop("return_energy", None) for atprop in dself["trajectory_properties"]] + dself.pop("trajectory_properties") -class OptimizationSpecification(ProtoModel): - """ - A specification for how a geometry optimization should be performed **inside** of - another procedure. + dself["extras"] = {**input_data.pop("extras", {}), **dself.pop("extras", {})} # merge + dself = {**input_data, **dself} - Notes - ----- - * This class is still provisional and may be subject to removal and re-design. - """ + self_vN = qcel.models.v1.OptimizationResult(**dself) + else: + assert False, target_version - schema_name: constr( - strip_whitespace=True, pattern="qcschema_optimization_specification" - ) = "qcschema_optimization_specification" # type: ignore - # TRIAL schema_version: int = 1 # TODO + return self_vN - procedure: str = Field(..., description="Optimization procedure to run the optimization with.") - keywords: Dict[str, Any] = Field({}, description="The optimization specific keywords to be used.") - protocols: OptimizationProtocols = Field(OptimizationProtocols(), description=str(OptimizationProtocols.__doc__)) - extras: Dict[str, Any] = Field( - {}, - description="Additional information to bundle with the computation. Use for schema development and scratch space.", - ) - @field_validator("procedure") - @classmethod - def _check_procedure(cls, v): - return v.lower() +# ==== Protocols ============================================================== +# ==== Inputs (Kw/Spec/In) ==================================================== class TDKeywords(ProtoModel): @@ -219,6 +344,11 @@ class TDKeywords(ProtoModel): * This class is still provisional and may be subject to removal and re-design. """ + schema_name: Literal["qcschema_torsion_drive_keywords"] = Field( + "qcschema_torsion_drive_keywords", + description=f"The QCSchema specification to which this model conforms.", + ) + dihedrals: List[Tuple[int, int, int, int]] = Field( ..., description="The list of dihedrals to select for the TorsionDrive operation. Each entry is a tuple of integers " @@ -247,37 +377,61 @@ class TDKeywords(ProtoModel): ) -class TorsionDriveInput(ProtoModel): - """Inputs for running a torsion drive. +class TorsionDriveSpecification(ProtoModel): + """Specification for how to run a torsion drive scan.""" - Notes - ----- - * This class is still provisional and may be subject to removal and re-design. - """ - - schema_name: constr( - strip_whitespace=True, pattern=qcschema_torsion_drive_input_default - ) = qcschema_torsion_drive_input_default # type: ignore - schema_version: Literal[2] = 2 + schema_name: Literal["qcschema_torsion_drive_specification"] = "qcschema_torsion_drive_specification" + # schema_version: Literal[2] = Field( + # 2, + # description="The version number of ``schema_name`` to which this model conforms.", + # ) + program: str = Field( + "", description="Torsion Drive CMS code / QCEngine procedure with which to run the torsion scan." + ) keywords: TDKeywords = Field(..., description="The torsion drive specific keywords to be used.") - extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.") + # protocols: TorsionDriveProtocols = Field(TorsionDriveProtocols(), description=str(TorsionDriveProtocols.__doc__)) + extras: Dict[str, Any] = Field( + {}, + description="Additional information to bundle with the computation. Use for schema development and scratch space.", + ) + specification: OptimizationSpecification = Field( + ..., + description="The specification for how to run optimizations for the torsion scan (within this is spec for gradients for the optimization.", + ) - input_specification: QCInputSpecification = Field(..., description=str(QCInputSpecification.__doc__)) - initial_molecule: conlist(item_type=Molecule, min_length=1) = Field( - ..., description="The starting molecule(s) for the torsion drive." + @field_validator("program") + @classmethod + def _check_procedure(cls, v): + return v.lower() + + # Note: no convert_v() method as TDSpec doesn't have a v1 equivalent + + +class TorsionDriveInput(ProtoModel): + """Inputs for running a torsion drive.""" + + schema_name: Literal["qcschema_torsion_drive_input"] = "qcschema_torsion_drive_input" + schema_version: Literal[2] = Field( + 2, + description="The version number of ``schema_name`` to which this model conforms.", ) - optimization_spec: OptimizationSpecification = Field( - ..., description="Settings to use for optimizations at each grid angle." + id: Optional[str] = None + initial_molecules: conlist(item_type=Molecule, min_length=1) = Field( + ..., description="The starting molecule(s) for the torsion drive." ) + specification: TorsionDriveSpecification = Field(..., description=str(TorsionDriveSpecification.__doc__)) + provenance: Provenance = Field(Provenance(**provenance_stamp(__name__)), description=str(Provenance.__doc__)) - @field_validator("input_specification") + @field_validator("specification") @classmethod - def _check_input_specification(cls, value): - assert value.driver == DriverEnum.gradient, "driver must be set to gradient" + def _check_input_specification(cls, value, info): + driver = value.specification.specification.driver + + assert driver == DriverEnum.gradient, "driver must be set to gradient" return value @field_validator("schema_version", mode="before") @@ -285,44 +439,64 @@ def _version_stamp(cls, v): return 2 def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.TorsionDriveInput", "qcelemental.models.v2.TorsionDriveInput"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="TorsionDriveInput") == "self": + if check_convertible_version(target_version, error="TorsionDriveInput") == "self": return self dself = self.model_dump() - if version == 1: - if dself["optimization_spec"].pop("extras", None): - pass + if target_version == 1: + dself.pop("id") # unused in v1 + dself["extras"] = dself["specification"].pop("extras") + dself["initial_molecule"] = dself.pop("initial_molecules") + dself["keywords"] = dself["specification"].pop("keywords") + dself["keywords"].pop("schema_name") # unused in v1 + + dself["optimization_spec"] = self.specification.specification.convert_v(target_version) + dself["input_specification"] = self.specification.specification.specification.convert_v(target_version) + dself["specification"].pop("specification") + dself["specification"].pop("schema_name") + + td_program = dself["specification"].pop("program") + assert not dself["specification"], dself["specification"] + dself.pop("specification") # now empty self_vN = qcel.models.v1.TorsionDriveInput(**dself) + else: + assert False, target_version return self_vN -class TorsionDriveResult(TorsionDriveInput): - """Results from running a torsion drive. +# ==== Properties ============================================================= +# ======== Calcinfo ======================================================= +# ======== Canonical ====================================================== + + +# ==== Results ================================================================ - Notes - ----- - * This class is still provisional and may be subject to removal and re-design. - """ - schema_name: constr( - strip_whitespace=True, pattern=qcschema_torsion_drive_output_default - ) = qcschema_torsion_drive_output_default # type: ignore - schema_version: Literal[2] = 2 +class TorsionDriveResult(ProtoModel): + """Results from running a torsion drive.""" + schema_name: Literal["qcschema_torsion_drive_output"] = "qcschema_torsion_drive_output" + schema_version: Literal[2] = Field( + 2, + description="The version number of ``schema_name`` to which this model conforms.", + ) + id: Optional[str] = Field(None, description="The optional ID for the computation.") + input_data: TorsionDriveInput = Field(..., description=str(TorsionDriveInput.__doc__)) + + # final_energies, final_molecules, optimization_history I'm hoping to refactor into scan_properties and scan_results but need to talk to OpenFF folks final_energies: Dict[str, float] = Field( ..., description="The final energy at each angle of the TorsionDrive scan." ) final_molecules: Dict[str, Molecule] = Field( ..., description="The final molecule at each angle of the TorsionDrive scan." ) - optimization_history: Dict[str, List[OptimizationResult]] = Field( ..., description="The map of each angle of the TorsionDrive scan to each optimization computations.", @@ -331,6 +505,17 @@ class TorsionDriveResult(TorsionDriveInput): stdout: Optional[str] = Field(None, description="The standard output of the program.") stderr: Optional[str] = Field(None, description="The standard error of the program.") + # native_files placeholder for when any td programs supply extra files or need an input file. no protocol at present + native_files: Dict[str, Any] = Field({}, description="DSL files.") + + # TODO add properties if a set can be collected + # properties: TorsionDriveProperties = Field(..., description=str(TorsionDriveProperties.__doc__)) + + extras: Dict[str, Any] = Field( + {}, + description="Additional information to bundle with the computation. Use for schema development and scratch space.", + ) + success: Literal[True] = Field( True, description="The success of a given programs execution. If False, other fields may be blank." ) @@ -341,26 +526,46 @@ def _version_stamp(cls, v): return 2 def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.TorsionDriveResult", "qcelemental.models.v2.TorsionDriveResult"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="TorsionDriveResult") == "self": + if check_convertible_version(target_version, error="TorsionDriveResult") == "self": return self dself = self.model_dump() - if version == 1: + if target_version == 1: opthist_class = next(iter(self.optimization_history.values()))[0].__class__ + dtop = {} - if dself["optimization_spec"].pop("extras", None): - pass + # for input_data, work from model, not dict, to use convert_v + dself.pop("input_data") + input_data = self.input_data.convert_v(target_version).model_dump() - dself["optimization_history"] = { - k: [opthist_class(**res).convert_v(version) for res in lst] + dtop["final_energies"] = dself.pop("final_energies") + dtop["final_molecules"] = dself.pop("final_molecules") + dtop["optimization_history"] = { + k: [opthist_class(**res).convert_v(target_version) for res in lst] for k, lst in dself["optimization_history"].items() } - - self_vN = qcel.models.v1.TorsionDriveResult(**dself) + dself.pop("optimization_history") + + dself.pop("id") # unused in v1 + dself.pop("native_files") # new in v2 + dtop["provenance"] = dself.pop("provenance") + dtop["stdout"] = dself.pop("stdout") + dtop["stderr"] = dself.pop("stderr") + dtop["success"] = dself.pop("success") + dtop["extras"] = {**input_data.pop("extras", {}), **dself.pop("extras", {})} # merge + dtop["schema_name"] = dself.pop("schema_name") # otherwise merge below uses TDIn schema_name + dself.pop("schema_version") + assert not dself, dself + + dtop = {**input_data, **dtop} + + self_vN = qcel.models.v1.TorsionDriveResult(**dtop) + else: + assert False, target_version return self_vN diff --git a/qcelemental/models/v2/results.py b/qcelemental/models/v2/results.py index 049cf9d2..7b21f84d 100644 --- a/qcelemental/models/v2/results.py +++ b/qcelemental/models/v2/results.py @@ -8,15 +8,7 @@ from ...util import provenance_stamp from .basemodels import ExtendedConfigDict, ProtoModel, qcschema_draft from .basis import BasisSet -from .common_models import ( - ComputeError, - DriverEnum, - Model, - Provenance, - check_convertible_version, - qcschema_input_default, - qcschema_output_default, -) +from .common_models import ComputeError, DriverEnum, Model, Provenance, check_convertible_version from .molecule import Molecule from .types import Array @@ -24,6 +16,9 @@ from .common_models import ReprArgs +# ==== Properties ============================================================= + + class AtomicResultProperties(ProtoModel): r""" Named properties of quantum chemistry computations following the MolSSI QCSchema. @@ -34,25 +29,24 @@ class AtomicResultProperties(ProtoModel): * nmo: number of molecular orbitals = :attr:`~qcelemental.models.AtomicResultProperties.calcinfo_nmo` """ - schema_name: Literal["qcschema_atomicproperties"] = Field( - "qcschema_atomicproperties", - description=( - f"The QCSchema specification this model conforms to. Explicitly fixed as qcschema_atomicproperties." - ), + schema_name: Literal["qcschema_atomic_properties"] = Field( + "qcschema_atomic_properties", description=(f"The QCSchema specification to which this model conforms.") ) # TRIAL schema_version: Literal[2] = Field( # TRIAL 2, # TRIAL description="The version number of :attr:`~qcelemental.models.AtomicResultProperties.schema_name` to which this model conforms.", # TRIAL ) - # Calcinfo + # ======== Calcinfo ======================================================= + calcinfo_nbasis: Optional[int] = Field(None, description="The number of basis functions for the computation.") calcinfo_nmo: Optional[int] = Field(None, description="The number of molecular orbitals for the computation.") calcinfo_nalpha: Optional[int] = Field(None, description="The number of alpha electrons in the computation.") calcinfo_nbeta: Optional[int] = Field(None, description="The number of beta electrons in the computation.") calcinfo_natom: Optional[int] = Field(None, description="The number of atoms in the computation.") - # Canonical + # ======== Canonical ====================================================== + nuclear_repulsion_energy: Optional[float] = Field(None, description="The nuclear repulsion energy.") return_energy: Optional[float] = Field( None, @@ -69,6 +63,8 @@ class AtomicResultProperties(ProtoModel): json_schema_extra={"units": "E_h/a0^2"}, ) + # ======== Method data ==================================================== + # SCF Keywords scf_one_electron_energy: Optional[float] = Field( None, @@ -335,6 +331,10 @@ class WavefunctionProperties(ProtoModel): "occupations_b", } + schema_name: Literal["qcschema_wavefunction_properties"] = Field( + "qcschema_wavefunction_properties", description=f"The QCSchema specification to which this model conforms." + ) + # The full basis set description of the quantities basis: BasisSet = Field(..., description=str(BasisSet.__doc__)) restricted: bool = Field( @@ -589,6 +589,9 @@ def _assert_exists(cls, v, info): return v +# ==== Protocols ============================================================== + + class WavefunctionProtocolEnum(str, Enum): r"""Wavefunction to keep from a computation.""" @@ -632,6 +635,8 @@ class NativeFilesProtocolEnum(str, Enum): class AtomicResultProtocols(ProtoModel): r"""Protocols regarding the manipulation of computational result data.""" + schema_name: Literal["qcschema_atomic_protocols"] = "qcschema_atomic_protocols" + wavefunction: WavefunctionProtocolEnum = Field( WavefunctionProtocolEnum.none, description=str(WavefunctionProtocolEnum.__doc__) ) @@ -647,10 +652,13 @@ class AtomicResultProtocols(ProtoModel): model_config = ExtendedConfigDict(force_skip_defaults=True) +# ==== Inputs (Kw/Spec/In) ==================================================== + + class AtomicSpecification(ProtoModel): """Specification for a single point QC calculation""" - # schema_name: Literal["qcschema_atomicspecification"] = "qcschema_atomicspecification" + schema_name: Literal["qcschema_atomic_specification"] = "qcschema_atomic_specification" # schema_version: Literal[2] = Field( # 2, # description="The version number of ``schema_name`` to which this model conforms.", @@ -671,17 +679,19 @@ class AtomicSpecification(ProtoModel): ) def convert_v( - self, version: int + self, target_version: int, / ) -> Union["qcelemental.models.v1.QCInputSpecification", "qcelemental.models.v2.AtomicSpecification"]: """Convert to instance of particular QCSchema version.""" import qcelemental as qcel - if check_convertible_version(version, error="AtomicSpecification") == "self": + if check_convertible_version(target_version, error="AtomicSpecification") == "self": return self loss_store = {} dself = self.model_dump() - if version == 1: + if target_version == 1: + dself.pop("schema_name") + loss_store["protocols"] = dself.pop("protocols") loss_store["program"] = dself.pop("program") @@ -689,13 +699,12 @@ def convert_v( dself["extras"]["_qcsk_conversion_loss"] = loss_store self_vN = qcel.models.v1.QCInputSpecification(**dself) + else: + assert False, target_version return self_vN -### Primary models - - def atomic_input_json_schema_extra(schema, model): schema["$schema"] = qcschema_draft @@ -704,11 +713,8 @@ class AtomicInput(ProtoModel): r"""The MolSSI Quantum Chemistry Schema""" id: Optional[str] = Field(None, description="The optional ID for the computation.") - schema_name: Literal["qcschema_input"] = Field( - qcschema_input_default, - description=( - f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_input_default}." - ), + schema_name: Literal["qcschema_atomic_input"] = Field( + "qcschema_atomic_input", description=(f"The QCSchema specification to which this model conforms.") ) schema_version: Literal[2] = Field( 2, @@ -753,28 +759,33 @@ def convert_v( dself = self.model_dump() if target_version == 1: + dself.pop("schema_name") + dself["driver"] = dself["specification"].pop("driver") dself["model"] = dself["specification"].pop("model") dself["keywords"] = dself["specification"].pop("keywords", None) dself["protocols"] = dself["specification"].pop("protocols", None) dself["extras"] = dself["specification"].pop("extras", {}) dself["specification"].pop("program", None) # TODO store? + dself["specification"].pop("schema_name", None) assert not dself["specification"], dself["specification"] dself.pop("specification") # now empty self_vN = qcel.models.v1.AtomicInput(**dself) + else: + assert False, target_version return self_vN +# ==== Results ================================================================ + + class AtomicResult(ProtoModel): r"""Results from a CMS program execution.""" - schema_name: constr(strip_whitespace=True, pattern=r"^(qc\_?schema_output)$") = Field( # type: ignore - qcschema_output_default, - description=( - f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_output_default}." - ), + schema_name: Literal["qcschema_atomic_output"] = Field( + "qcschema_atomic_output", description=(f"The QCSchema specification to which this model conforms.") ) schema_version: Literal[2] = Field( 2, @@ -807,17 +818,6 @@ class AtomicResult(ProtoModel): description="Additional information to bundle with the computation. Use for schema development and scratch space.", ) - @field_validator("schema_name", mode="before") - @classmethod - def _input_to_output(cls, v): - r"""If qcschema_input is passed in, cast it to output, otherwise no""" - if v.lower().strip() in [qcschema_input_default, qcschema_output_default]: - return qcschema_output_default - raise ValueError( - "Only {0} or {1} is allowed for schema_name, " - "which will be converted to {0}".format(qcschema_output_default, qcschema_input_default) - ) - @field_validator("schema_version", mode="before") def _version_stamp(cls, v): return 2 @@ -964,6 +964,8 @@ def convert_v( dself = self.model_dump() if target_version == 1: + dself.pop("schema_name") + # for input_data, work from model, not dict, to use convert_v dself.pop("input_data") input_data = self.input_data.convert_v(1).model_dump() # exclude_unset=True, exclude_none=True @@ -973,5 +975,7 @@ def convert_v( dself = {**input_data, **dself} self_vN = qcel.models.v1.AtomicResult(**dself) + else: + assert False, target_version return self_vN diff --git a/qcelemental/tests/test_model_general.py b/qcelemental/tests/test_model_general.py index e29731e9..9cfb013f 100644 --- a/qcelemental/tests/test_model_general.py +++ b/qcelemental/tests/test_model_general.py @@ -74,15 +74,20 @@ def test_repr_result(request, schema_versions): assert "'gradient'" in str(result) -def test_repr_optimization(schema_versions): +def test_repr_optimization(schema_versions, request): OptimizationInput = schema_versions.OptimizationInput - opt = OptimizationInput( - **{ + if "v2" in request.node.name: + optin = { + "specification": {"specification": {"driver": "gradient", "model": {"method": "UFF"}}}, + "initial_molecule": {"symbols": ["He"], "geometry": [0, 0, 0]}, + } + else: + optin = { "input_specification": {"driver": "gradient", "model": {"method": "UFF"}}, "initial_molecule": {"symbols": ["He"], "geometry": [0, 0, 0]}, } - ) + opt = OptimizationInput(**optin) assert "molecule_hash" in str(opt) assert "molecule_hash" in repr(opt) diff --git a/qcelemental/tests/test_model_results.py b/qcelemental/tests/test_model_results.py index 7174fdd1..5e2c08df 100644 --- a/qcelemental/tests/test_model_results.py +++ b/qcelemental/tests/test_model_results.py @@ -186,24 +186,44 @@ def native_data_fixture(result_data_fixture, request): @pytest.fixture(scope="function") -def optimization_data_fixture(result_data_fixture): +def optimization_data_fixture(result_data_fixture, request): trajectory = [] energies = [] + props = [] for x in range(5): result = result_data_fixture.copy() result["return_result"] = x trajectory.append(result) energies.append(x) + props.append({"return_energy": x}) - ret = { - "initial_molecule": result_data_fixture["molecule"], - "final_molecule": result_data_fixture["molecule"], - "trajectory": trajectory, - "energies": energies, - "success": True, - "provenance": {"creator": "qcel"}, - "input_specification": {"model": {"method": "UFF"}}, - } + if "v2" in request.node.name: + ret = { + "final_molecule": result_data_fixture["molecule"], + "trajectory_results": trajectory, + "trajectory_properties": props, + "success": True, + "provenance": {"creator": "qcel"}, + "input_data": { + "initial_molecule": result_data_fixture["molecule"], + "specification": { + "program": "an opt program", + "specification": {"driver": "gradient", "model": {"method": "UFF"}, "program": "a qc program"}, + }, + }, + "properties": {"optimization_iterations": 14}, + } + else: + ret = { + "initial_molecule": result_data_fixture["molecule"], + "final_molecule": result_data_fixture["molecule"], + "trajectory": trajectory, + "energies": energies, + "success": True, + "provenance": {"creator": "qcel"}, + "keywords": {"program": "a qc program"}, + "input_specification": {"model": {"method": "UFF"}}, + } return ret @@ -228,32 +248,63 @@ def ethane_data_fixture(): @pytest.fixture(scope="function") -def torsiondrive_data_fixture(ethane_data_fixture, optimization_data_fixture): +def torsiondrive_data_fixture(ethane_data_fixture, optimization_data_fixture, request): ethane = ethane_data_fixture.copy() optres = optimization_data_fixture.copy() - input_data = { - "keywords": {"dihedrals": [(2, 0, 1, 5)], "grid_spacing": [180]}, - "input_specification": {"driver": "gradient", "model": {"method": "UFF", "basis": None}}, - "initial_molecule": [ethane] * 2, - "optimization_spec": { - "procedure": "geomeTRIC", - "keywords": { - "coordsys": "hdlc", - "maxiter": 500, - "program": "rdkit", + if "v2" in request.node.name: + input_data = { + "initial_molecules": [ethane] * 2, + "specification": { + "keywords": {"dihedrals": [(2, 0, 1, 5)], "grid_spacing": [180]}, + "specification": { + "program": "geomeTRIC", + "keywords": { + "coordsys": "hdlc", + "maxiter": 500, + "program": "rdkit", + }, + "specification": { + "driver": "gradient", + "model": {"method": "UFF", "basis": None}, + "program": "qcqc", + }, + }, }, - }, - } + } + else: + input_data = { + "keywords": {"dihedrals": [(2, 0, 1, 5)], "grid_spacing": [180]}, + "input_specification": {"driver": "gradient", "model": {"method": "UFF", "basis": None}}, + "initial_molecule": [ethane] * 2, + "optimization_spec": { + "procedure": "geomeTRIC", + "keywords": { + "coordsys": "hdlc", + "maxiter": 500, + "program": "rdkit", + }, + }, + } - ret = { - "success": True, - "provenance": {"creator": "qcel"}, - "final_energies": {"180": -2.3, "0": -4.5}, - "final_molecules": {"180": ethane, "0": ethane}, - "optimization_history": {"180": [optres, optres], "0": [optres]}, - **input_data, - } + if "v2" in request.node.name: + ret = { + "input_data": input_data, + "success": True, + "provenance": {"creator": "qcel"}, + "final_energies": {"180": -2.3, "0": -4.5}, + "final_molecules": {"180": ethane, "0": ethane}, + "optimization_history": {"180": [optres, optres], "0": [optres]}, + } + else: + ret = { + "success": True, + "provenance": {"creator": "qcel"}, + "final_energies": {"180": -2.3, "0": -4.5}, + "final_molecules": {"180": ethane, "0": ethane}, + "optimization_history": {"180": [optres, optres], "0": [optres]}, + **input_data, + } return ret @@ -540,15 +591,19 @@ def test_native_protocols(protocol, provided, expected, native_data_fixture, req "keep, indices", [(None, [0, 1, 2, 3, 4]), ("all", [0, 1, 2, 3, 4]), ("initial_and_final", [0, 4]), ("final", [4]), ("none", [])], ) -def test_optimization_trajectory_protocol(keep, indices, optimization_data_fixture, schema_versions): +def test_optimization_trajectory_protocol(keep, indices, optimization_data_fixture, schema_versions, request): OptimizationResult = schema_versions.OptimizationResult if keep is not None: - optimization_data_fixture["protocols"] = {"trajectory": keep} + if "v2" in request.node.name: + optimization_data_fixture["input_data"]["specification"]["protocols"] = {"trajectory": keep} + else: + optimization_data_fixture["protocols"] = {"trajectory": keep} opt = OptimizationResult(**optimization_data_fixture) - assert len(opt.trajectory) == len(indices) - for result, index in zip(opt.trajectory, indices): + trajs_target = opt.trajectory_results if "v2" in request.node.name else opt.trajectory + assert len(trajs_target) == len(indices) + for result, index in zip(trajs_target, indices): assert result.return_result == index @@ -745,11 +800,18 @@ def every_model_fixture(request): smodel = "OptimizationInput" data = request.getfixturevalue("optimization_data_fixture") - data = {k: data[k] for k in ["initial_molecule", "input_specification"]} + if "v2" in request.node.name: + data = data["input_data"] + else: + data = {k: data[k] for k in ["initial_molecule", "input_specification", "keywords"]} datas[smodel] = data smodel = "OptimizationSpecification" - data = {"procedure": "pyberny"} + if "v2" in request.node.name: + data = request.getfixturevalue("optimization_data_fixture") + data = data["input_data"]["specification"] + else: + data = {"procedure": "pyberny"} datas[smodel] = data smodel = "OptimizationProtocols" @@ -766,10 +828,19 @@ def every_model_fixture(request): smodel = "TorsionDriveInput" data = request.getfixturevalue("torsiondrive_data_fixture") - data = {k: data[k] for k in ["initial_molecule", "input_specification", "optimization_spec", "keywords"]} + if "v2" in request.node.name: + data = data["input_data"] + else: + data = {k: data[k] for k in ["initial_molecule", "input_specification", "optimization_spec", "keywords"]} datas[smodel] = data - # smodel = "TorsionDriveSpecification" # DNE + smodel = "TorsionDriveSpecification" + if "v2" in request.node.name: + data = request.getfixturevalue("torsiondrive_data_fixture") + data = data["input_data"]["specification"] + else: + data = {} # DNE + datas[smodel] = data smodel = "TDKeywords" # TODO "TorsionDriveKeywords" data = {"dihedrals": [(2, 0, 1, 5)], "grid_spacing": [180]} @@ -830,9 +901,9 @@ def every_model_fixture(request): pytest.param("OptimizationSpecification", "OptimizationSpecification", id="OptSpec"), pytest.param("OptimizationProtocols", "OptimizationProtocols", id="OptPtcl"), pytest.param("OptimizationResult", "OptimizationResult", id="OptRes"), - # pytest.param(None, "OptimizationProperties", id="OptProp"), + pytest.param(None, "OptimizationProperties", id="OptProp"), pytest.param("TorsionDriveInput", "TorsionDriveInput", id="TDIn"), - # pytest.param(None, "TorsionDriveSpecification", id="TDSpec"), + pytest.param(None, "TorsionDriveSpecification", id="TDSpec"), pytest.param("TDKeywords", "TDKeywords", id="TDKw"), # TODO TorsionDriveKeywords # pytest.param(None, "TorsionDriveProtocols", id="TDPtcl"), pytest.param("TorsionDriveResult", "TorsionDriveResult", id="TDRes"), @@ -877,7 +948,7 @@ def test_model_survey_success(smodel1, smodel2, every_model_fixture, request, sc "v1-MBSpec" : None, "v2-MBSpec" : None, # v2 DNE "v1-MBKw" : None, "v2-MBKw" : None, # v2 DNE "v1-MBPtcl" : None, "v2-MBPtcl" : None, # v2 DNE - "v1-MBRes" : True, "v2-MBRes" : True, # v2 DNE + "v1-MBRes" : True, "v2-MBRes" : None, # v2 DNE TODO v2 True "v1-MBProp" : None, "v2-MBProp" : None, # v2 DNE }[anskey] # fmt: on @@ -904,17 +975,34 @@ def test_model_survey_success(smodel1, smodel2, every_model_fixture, request, sc # check success override if ans is not None: - data["success"] = not ans + data2 = copy.deepcopy(data) + data2["success"] = not ans if "v2" in anskey: # v2 has enforced T/F with pytest.raises(pydantic.ValidationError) as e: - instance = model(**data) + instance = model(**data2) assert (cptd := getattr(instance, fld, "not found!")) == ans, f"[b] field {fld} = {cptd} != {ans}" else: # v1 can be reset to T/F - instance = model(**data) + instance = model(**data2) assert (cptd := getattr(instance, fld, "not found!")) == (not ans), f"[b] field {fld} = {cptd} != {not ans}" + # check inheritance + if smodel.endswith("Result"): + instance = model(**data) + smodelin = smodel.replace("Result", "Input") + if "ManyBody" in smodelin: + modelin = getattr(qcmanybody.models, smodelin) + else: + modelin = getattr(schema_versions, smodelin) + + if "v2" in anskey or "ManyBody" in smodel: + # for v2, <>Result has a field that contains <>Input. ManyBody v1 led the way here. + assert not isinstance(instance, modelin), f"[c] v2 {smodel} unexpectedly inherits {smodelin}" + else: + # for v1, <>Result inherits <>Input + assert isinstance(instance, modelin), f"[c] v1 {smodel} does not inherit {smodelin}" + @pytest.mark.parametrize("smodel1,smodel2", _model_classes_struct) def test_model_survey_schema_version(smodel1, smodel2, every_model_fixture, request, schema_versions): @@ -990,7 +1078,7 @@ def test_model_survey_extras(smodel1, smodel2, every_model_fixture, request, sch anskey = request.node.callspec.id.replace("None", "v1") # fmt: off ans = { - # v2: In/Ptcl/Prop/Kw + BasisSet, no! others, yes. In is questionable. + # v2: In/Ptcl/Prop/Kw + BasisSet, no! others, yes. is questionable. "v1-Mol-A" : {}, "v2-Mol-A" : {}, "v1-Mol-B" : {}, "v2-Mol-B" : {}, "v1-BasisSet" : None, "v2-BasisSet" : None, @@ -1001,17 +1089,17 @@ def test_model_survey_extras(smodel1, smodel2, every_model_fixture, request, sch "v1-AtRes" : {}, "v2-AtRes" : {}, "v1-AtProp" : None, "v2-AtProp" : None, "v1-WfnProp" : None, "v2-WfnProp" : None, - "v1-OptIn" : {}, "v2-OptIn" : {}, # TODO None + "v1-OptIn" : {}, "v2-OptIn" : None, "v1-OptSpec" : None, "v2-OptSpec" : {}, "v1-OptPtcl" : None, "v2-OptPtcl" : None, "v1-OptRes" : {}, "v2-OptRes" : {}, "v1-OptProp" : None, "v2-OptProp" : None, # v1 DNE - "v1-TDIn" : {}, "v2-TDIn" : {}, # TODO None + "v1-TDIn" : {}, "v2-TDIn" : None, "v1-TDSpec" : None, "v2-TDSpec" : {}, # v1 DNE "v1-TDKw" : None, "v2-TDKw" : None, - "v1-TDPtcl" : None, "v2-TDPtcl" : None, # v1 DNE + "v1-TDPtcl" : None, "v2-TDPtcl" : None, # v1/v2 DNE "v1-TDRes" : {}, "v2-TDRes" : {}, - "v1-TDProp" : None, "v2-TDProp" : None, # v1 DNE + "v1-TDProp" : None, "v2-TDProp" : None, # v1/v2 DNE "v1-MBIn" : {}, "v2-MBIn" : None, # v2 DNE "v1-MBSpec" : {}, "v2-MBSpec" : {}, # v2 DNE "v1-MBKw" : None, "v2-MBKw" : None, # v2 DNE @@ -1093,9 +1181,10 @@ def test_model_survey_convertable(smodel1, smodel2, every_model_fixture, request anskey = request.node.callspec.id.replace("None", "v1") # fmt: off ans = { - # "v1-Mol-A" , "v2-Mol-A" , - # "v1-Mol-B" , "v2-Mol-B" , - # "v1-BasisSet" , "v2-BasisSet", + # convert_v() for user-facing fns. uncomment lines if this expands + # "v1-Mol-A" , "v2-Mol-A" , # TODO + # "v1-Mol-B" , "v2-Mol-B" , # TODO + # "v1-BasisSet" , "v2-BasisSet", # TODO "v1-FailedOp" , "v2-FailedOp", "v1-AtIn" , "v2-AtIn" , "v1-AtSpec" , "v2-AtSpec" , @@ -1104,12 +1193,12 @@ def test_model_survey_convertable(smodel1, smodel2, every_model_fixture, request # "v1-AtProp" , "v2-AtProp" , # "v1-WfnProp" , "v2-WfnProp" , "v1-OptIn" , "v2-OptIn" , - # "v1-OptSpec" , "v2-OptSpec" , + "v1-OptSpec" , "v2-OptSpec" , # "v1-OptPtcl" , "v2-OptPtcl" , "v1-OptRes" , "v2-OptRes" , # "v1-OptProp" , "v2-OptProp" , "v1-TDIn" , "v2-TDIn" , - # "v1-TDSpec" , "v2-TDSpec" , + "v1-TDSpec" , "v2-TDSpec" , # "v1-TDKw" , "v2-TDKw" , # "v1-TDPtcl" , "v2-TDPtcl" , "v1-TDRes" , "v2-TDRes" , @@ -1141,9 +1230,84 @@ def test_model_survey_convertable(smodel1, smodel2, every_model_fixture, request data = every_model_fixture[smodel_fro] # check converts and converts to expected class - instance_fro = model_fro(**data) - instance_to = instance_fro.convert_v(1 if "v2" in anskey else 2) - assert isinstance(instance_to, model_to), f"instance {model_fro} failed to convert to {model_to}" + if anskey == "v1-OptSpec": + # v1 OptSpec has no convert_v + instance_fro = model_fro(**data) + with pytest.raises(AttributeError): + instance_to = instance_fro.convert_v(2) + else: + instance_fro = model_fro(**data) + instance_to = instance_fro.convert_v(1 if "v2" in anskey else 2) + assert isinstance(instance_to, model_to), f"instance {model_fro} failed to convert to {model_to}" + + +@pytest.mark.parametrize("smodel1,smodel2", _model_classes_struct) +def test_model_survey_schema_name(smodel1, smodel2, every_model_fixture, request, schema_versions): + anskey = request.node.callspec.id.replace("None", "v1") + # fmt: off + ans = { + # v2: In/Res + Mol/BasisSet/FailedOp, yes! Kw/Ptcl, no. Prop/Spec uncertain. + # note output not result + "v1-Mol-A" : "qcschema_molecule", "v2-Mol-A" : "qcschema_molecule", + "v1-Mol-B" : "qcschema_molecule", "v2-Mol-B" : "qcschema_molecule", + "v1-BasisSet" : "qcschema_basis", "v2-BasisSet" : "qcschema_basis", # TODO qcschema_basis_set? + "v1-FailedOp" : None, "v2-FailedOp" : "qcschema_failed_operation", + "v1-AtIn" : "qcschema_input", "v2-AtIn" : "qcschema_atomic_input", # TODO standardize! + "v1-AtSpec" : "qcschema_input", "v2-AtSpec" : "qcschema_atomic_specification", + "v1-AtPtcl" : None, "v2-AtPtcl" : "qcschema_atomic_protocols", + "v1-AtRes" : "qcschema_output", "v2-AtRes" : "qcschema_atomic_output", # TODO standardize! _result? + "v1-AtProp" : None, "v2-AtProp" : "qcschema_atomic_properties", + "v1-WfnProp" : None, "v2-WfnProp" : "qcschema_wavefunction_properties", + "v1-OptIn" : "qcschema_optimization_input", "v2-OptIn" : "qcschema_optimization_input", + "v1-OptSpec" : "qcschema_optimization_specification", "v2-OptSpec" : "qcschema_optimization_specification", + "v1-OptPtcl" : None, "v2-OptPtcl" : "qcschema_optimization_protocols", + "v1-OptRes" : "qcschema_optimization_output", "v2-OptRes" : "qcschema_optimization_output", # TODO change to _result? + "v1-OptProp" : None, "v2-OptProp" : "qcschema_optimization_properties", # v1 DNE + "v1-TDIn" : "qcschema_torsion_drive_input", "v2-TDIn" : "qcschema_torsion_drive_input", + "v1-TDSpec" : None, "v2-TDSpec" : "qcschema_torsion_drive_specification", # v1 DNE + "v1-TDKw" : None, "v2-TDKw" : "qcschema_torsion_drive_keywords", + "v1-TDPtcl" : None, "v2-TDPtcl" : None, # v1 DNE, v2 DNE + "v1-TDRes" : "qcschema_torsion_drive_output", "v2-TDRes" : "qcschema_torsion_drive_output", # TODO change to _result? + "v1-TDProp" : None, "v2-TDProp" : None, # v1 DNE, v2 DNE + "v1-MBIn" : "qcschema_manybodyinput", "v2-MBIn" : "qcschema_many_body_input", # v2 DNE + "v1-MBSpec" : "qcschema_manybodyspecification", "v2-MBSpec" : "qcschema_many_body_specification", # v2 DNE + "v1-MBKw" : "qcschema_manybodykeywords", "v2-MBKw" : "qcschema_many_body_keywords", # v2 DNE + "v1-MBPtcl" : None, "v2-MBPtcl" : "qcschema_many_body_protocols", # v2 DNE + "v1-MBRes" : "qcschema_manybodyresult", "v2-MBRes" : "qcschema_many_body_result", # v2 DNE + "v1-MBProp" : "qcschema_manybodyproperties", "v2-MBProp" : "qcschema_many_body_properties", # v2 DNE + }[anskey] + # fmt: on + + fieldsattr = "model_fields" if "v2" in anskey else "__fields__" + smodel = smodel2 if "v2" in anskey else smodel1 + if smodel is None: + pytest.skip("model not available for this schema version") + if "ManyBody" in smodel: + import qcmanybody + + model = getattr(qcmanybody.models, smodel.split("-")[0]) + else: + model = getattr(schema_versions, smodel.split("-")[0]) + data = every_model_fixture[smodel] + + # check default name set + instance = model(**data) + fld = "schema_name" + if ans is None: + assert fld not in (cptd := getattr(instance, fieldsattr)), f"[a] field {fld} unexpectedly present: {cptd}" + else: + assert (cptd := getattr(instance, fld, "not found!")) == ans, f"[a] field {fld} = {cptd} != {ans}" + + # check wrong name fatal + if ans is not None: + data["schema_name"] = "Asdf" + if "Molecule-B" in smodel: + # TODO fix mol validated pathway when upgrade Mol + with pytest.raises(qcel.ValidationError) as e: + instance = model(**data) + else: + with pytest.raises((pydantic.ValidationError, pydantic.v1.ValidationError)) as e: + instance = model(**data) def test_result_model_deprecations(result_data_fixture, optimization_data_fixture, request): diff --git a/qcelemental/tests/test_utils.py b/qcelemental/tests/test_utils.py index 3b40f897..9e16762a 100644 --- a/qcelemental/tests/test_utils.py +++ b/qcelemental/tests/test_utils.py @@ -385,6 +385,7 @@ def atomic_result_data(request): "success": True, } if "v2" in request.node.name: + data["schema_name"] = "qcschema_atomic_output" data["input_data"] = { "molecule": data["molecule"], "specification": {