Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable atom_format and ghost_format #332

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,31 @@ Changelog
.. +++++


0.28.0 / 2023-12-DD (Unreleased)
-------------------

.. Breaking Changes
.. ++++++++++++++++
..
.. New Features
.. ++++++++++++

Enhancements
++++++++++++
- UNMERGED (:pr:?) ``Molecule.to_string()`` learned to use arguments ``atom_format`` and
``ghost_format`` for other than ``dtype="xyz"``. For the most part, specifying these will just
break the string for intended use. But if you're looking to adapt a format or want to suppress
ghost atoms with ``ghost_format=""``, it can be useful. The only way this can be breaking is if
users were explicitly passing ``atom_format`` or ``ghost_format`` not None and relying on it
doing nothing.

.. Bug Fixes
.. +++++++++
..
.. Misc.
.. +++++


0.27.1 / 2023-10-26
-------------------

Expand Down
47 changes: 24 additions & 23 deletions qcelemental/molparse/to_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def to_string(
Overall string format. Note that it's possible to request variations
that don't fit the dtype spec so may not be re-readable (e.g., ghost
and mass in nucleus label with ``'xyz'``).
'cfour' forces nucleus label, ignoring atom_format, ghost_format
units
Units in which to write string. Usually ``Angstrom`` or ``Bohr``
but may be any length unit. There is not an option to write in
Expand All @@ -42,9 +41,11 @@ def to_string(
arrangement. For example, if a format naturally uses element symbol
and you want atomic number instead with mass info, too, pass
``'{elez}@{mass}'``. See `ghost_format` for handling field 'real'.
Previous to v0.28.0, this argument only affected ``dtype='xyz'``/``'xyz+'``.
ghost_format
General format is ``'@{elem}'``. Like `atom_format`, but this formatter
is used when `real=False`. To suppress ghost atoms, use `ghost_format=''`.
Previous to v0.28.0, this argument only affected ``dtype='xyz'``/``'xyz+'``.
width
Field width for formatting coordinate float.
prec
Expand Down Expand Up @@ -142,8 +143,8 @@ def to_dict(self) -> Dict:
smol.extend(atoms)

elif dtype == "orca":
atom_format = "{elem}"
ghost_format = "{elem}:"
atom_format = "{elem}" if atom_format is None else atom_format
ghost_format = "{elem}:" if ghost_format is None else ghost_format
umap = {"bohr": "! Bohrs", "angstrom": "!"}

atoms = _atoms_formatter(molrec, geom, atom_format, ghost_format, width, prec, 2)
Expand All @@ -161,8 +162,8 @@ def to_dict(self) -> Dict:
# * casting 'molecular_charge' to int
# * no spaces at the beginning of 1st/comment line is important

atom_format = "{elem}"
ghost_format = "GH"
atom_format = "{elem}" if atom_format is None else atom_format
ghost_format = "GH" if ghost_format is None else ghost_format
# TODO handle which units valid
umap = {"bohr": "bohr", "angstrom": "angstrom"}

Expand All @@ -180,8 +181,8 @@ def to_dict(self) -> Dict:
}

elif dtype == "molpro":
atom_format = "{elem}"
ghost_format = "{elem}"
atom_format = "{elem}" if atom_format is None else atom_format
ghost_format = "{elem}" if ghost_format is None else ghost_format
umap = {"bohr": "bohr", "angstrom": "angstrom"}

atoms = _atoms_formatter(molrec, geom, atom_format, ghost_format, width, prec, 2)
Expand Down Expand Up @@ -218,8 +219,8 @@ def to_dict(self) -> Dict:
smol.append(f"set,spin={molrec['molecular_multiplicity']-1}")

elif dtype == "nwchem":
atom_format = "{elem}{elbl}"
ghost_format = "bq{elem}{elbl}"
atom_format = "{elem}{elbl}" if atom_format is None else atom_format
ghost_format = "bq{elem}{elbl}" if ghost_format is None else ghost_format
# TODO handle which units valid
umap = {"bohr": "bohr", "angstrom": "angstroms", "nm": "nanometers", "pm": "picometers"}

Expand All @@ -245,8 +246,8 @@ def to_dict(self) -> Dict:
data.keywords["mcscf__multiplicity"] = molrec["molecular_multiplicity"]

elif dtype == "madness":
atom_format = "{elem}"
ghost_format = "GH"
atom_format = "{elem}" if atom_format is None else atom_format
ghost_format = "GH" if ghost_format is None else ghost_format
# TODO handle which units valid
umap = {"bohr": "au", "angstrom": "angstrom"}

Expand All @@ -273,8 +274,8 @@ def to_dict(self) -> Dict:
# through ``fix_symmetry``. newline encoded here if needed.
# * coord=prinaxis, as set up here, can't handle ghost atoms

atom_format = " {elem}{elbl} {elez}"
ghost_format = " {elem} -{elez}"
atom_format = " {elem}{elbl} {elez}" if atom_format is None else atom_format
ghost_format = " {elem} -{elez}" if ghost_format is None else ghost_format
umap = {"bohr": "bohr", "angstrom": "angs"}

atoms = _atoms_formatter(molrec, geom, atom_format, ghost_format, width, prec, 2)
Expand All @@ -300,8 +301,8 @@ def to_dict(self) -> Dict:
}

elif dtype == "terachem":
atom_format = "{elem}"
ghost_format = "X{elem}"
atom_format = "{elem}" if atom_format is None else atom_format
ghost_format = "X{elem}" if ghost_format is None else ghost_format
umap = {"bohr": "au", "angstrom": ""}

atoms = _atoms_formatter(molrec, geom, atom_format, ghost_format, width, prec, 2)
Expand All @@ -311,8 +312,8 @@ def to_dict(self) -> Dict:
smol.extend(atoms)

elif dtype == "psi4":
atom_format = "{elem}{elbl}"
ghost_format = "Gh({elem}{elbl})"
atom_format = "{elem}{elbl}" if atom_format is None else atom_format
ghost_format = "Gh({elem}{elbl})" if ghost_format is None else ghost_format
umap = {"bohr": "bohr", "angstrom": "angstrom"}

atoms = _atoms_formatter(molrec, geom, atom_format, ghost_format, width, prec, 2)
Expand Down Expand Up @@ -350,8 +351,8 @@ def to_dict(self) -> Dict:
# symbol comes afterwards.
# Handling of ghost atoms is done in the basis section of the control
# file by setting the nuclear charge of certain atoms to zero.
atom_format = "{elem}"
ghost_format = "{elem}"
atom_format = "{elem}" if atom_format is None else atom_format
ghost_format = "{elem}" if ghost_format is None else ghost_format
umap = {"bohr": "bohr"}
umap[units.lower()] # trigger error if downstream can't handle

Expand Down Expand Up @@ -386,8 +387,8 @@ def to_dict(self) -> Dict:
smol.append(f" {(a1 + 1):2d} {(a2 + 1):2d} {int(b):1d} 0 0 0 0")

elif dtype == "qchem":
atom_format = "{elem}"
ghost_format = "@{elem}"
atom_format = "{elem}" if atom_format is None else atom_format
ghost_format = "@{elem}" if ghost_format is None else ghost_format
umap = {"bohr": "True", "angstrom": "False"}

atoms = _atoms_formatter(molrec, geom, atom_format, ghost_format, width, prec, 2)
Expand Down Expand Up @@ -429,8 +430,8 @@ def to_dict(self) -> Dict:
data.keywords["symmetry"] = False

elif dtype == "mrchem":
atom_format = "{elem}"
ghost_format = "{elem}"
atom_format = "{elem}" if atom_format is None else atom_format
ghost_format = "{elem}" if ghost_format is None else ghost_format
atoms = _atoms_formatter(molrec, geom, atom_format, ghost_format, width, prec, 2)

smol = (
Expand Down
16 changes: 16 additions & 0 deletions qcelemental/tests/test_molparse_to_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,23 @@
Co 0.000000000000 0.000000000000 0.000000000000
GH {au2:.12f} 0.000000000000 0.000000000000
H -{au2:.12f} 0.000000000000 0.000000000000
""",
"ans2_cfour_ang_format": f"""auto-generated by QCElemental from molecule CoH2
Co 27 0.000000000000 0.000000000000 0.000000000000
GH {au2:.12f} 0.000000000000 0.000000000000
H 1 -{au2:.12f} 0.000000000000 0.000000000000
""",
"ans2_nwchem_ang": f"""geometry units angstroms
Co 0.000000000000 0.000000000000 0.000000000000
bqH {au2:.12f} 0.000000000000 0.000000000000
H_other -{au2:.12f} 0.000000000000 0.000000000000

end
""",
"ans2_nwchem_ang_format": f"""geometry units angstroms
Co 0.000000000000 0.000000000000 0.000000000000
H_other -{au2:.12f} 0.000000000000 0.000000000000

end
""",
"ans2_madness_au": f"""geometry
Expand Down Expand Up @@ -267,6 +278,11 @@
(("subject2", {"dtype": "nglview-sdf"}), "ans2_ngslviewsdf"),
(("subject2", {"dtype": "qchem", "units": "bohr"}), "ans2_qchem_au"),
(("subject2", {"dtype": "gamess", "units": "angstrom"}), "ans2_gamess_ang"),
(
("subject2", {"dtype": "cfour", "units": "angstrom", "atom_format": "{elem} {elez}"}),
"ans2_cfour_ang_format",
),
(("subject2", {"dtype": "nwchem", "units": "angstrom", "ghost_format": ""}), "ans2_nwchem_ang_format"),
],
)
def test_to_string_xyz(inp, expected):
Expand Down