Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parsing of Mulliken charges from SIESTA stdout #691

Merged
merged 16 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ we hit release version 1.0.0.
- Creation of [n]-triangulenes (`sisl.geom.triangulene`)
- added `offset` argument in `Geometry.add_vacuum` to enable shifting atomic coordinates
- A new `AtomicMatrixPlot` to plot sparse matrices, #668
- Parsing of total Mulliken charges in `stdoutSileSiesta`, #691

### Fixed
- PEP-585 compliant
Expand Down
202 changes: 180 additions & 22 deletions src/sisl/io/siesta/stdout.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

def _parse_spin(attr, instance, match):
"""Parse 'redata: Spin configuration *= <value>'"""
opt = match.string.split("=")[-1]
opt = match.string.split("=")[-1].strip()

Check warning on line 45 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L45

Added line #L45 was not covered by tests

if opt.startswith("spin-orbit"):
return Spin("spin-orbit")
Expand Down Expand Up @@ -123,6 +123,12 @@
lambda attr, instance, match: int(match.string.split()[-2]),
not_found="warn",
),
_A(
"nspecies",
r"^redata: Number of Atomic Species",
lambda attr, instance, match: int(match.string.split("=")[-1]),
not_found="warn",
),
_A(
"completed",
r".*Job completed",
Expand Down Expand Up @@ -994,14 +1000,17 @@

@sile_fh_open(True)
def read_charge(
self, name, iscf=Opt.ANY, imd=Opt.ANY, key_scf="scf", as_dataframe=False
self,
name: Literal["voronoi", "hirshfeld", "mulliken"],
iscf=Opt.ANY,
imd=Opt.ANY,
key_scf: str = "scf",
as_dataframe: bool = False,
):
r"""Read charges calculated in SCF loop or MD loop (or both)

Siesta enables many different modes of writing out charges.

NOTE: currently Mulliken charges are not implemented.

The below table shows a list of different cases that
may be encountered, the letters are referred to in the
return section to indicate what is returned.
Expand All @@ -1025,12 +1034,12 @@
the SCF charges are not present. For `Opt.ANY` it will return
the most information, effectively SCF will be returned if present.

Currently Mulliken is not implemented, any help in reading this would be
very welcome.
Currently orbitally-resolved Mulliken is not implemented, any help in
reading this would be very welcome.

Parameters
----------
name: {"voronoi", "hirshfeld"}
name:
the name of the charges that you want to read
iscf: int or Opt, optional
index (0-based) of the scf iteration you want the charges for.
Expand All @@ -1044,9 +1053,9 @@
the returned quantities depend on what is present.
If ``None/Opt.NONE`` it will not return any MD charges.
If both `imd` and `iscf` are ``None`` then only the final charges will be returned.
key_scf : str, optional
key_scf :
the key lookup for the scf iterations (a ":" will automatically be appended)
as_dataframe: boolean, optional
as_dataframe:
whether charges should be returned as a pandas dataframe.

Returns
Expand All @@ -1055,7 +1064,7 @@
if a specific MD+SCF index is requested (or special cases where output is
not complete)
list of numpy.ndarray
if one both `iscf` or `imd` is different from ``None/Opt.NONE``.
if `iscf` or `imd` is different from ``None/Opt.NONE``.
pandas.DataFrame
if `as_dataframe` is requested. The dataframe will have multi-indices if multiple
SCF or MD steps are requested.
Expand Down Expand Up @@ -1111,16 +1120,15 @@

# We have found the header, prepare a list to read the charges
atom_charges = []
line = " "
while line != "":
while (line := self.readline()) != "":

Check warning on line 1123 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1123

Added line #L1123 was not covered by tests
try:
line = self.readline()
charge_vals = _parse_charge(line)
atom_charges.append(charge_vals)
except Exception:
# We already have the charge values and we reached a line that can't be parsed,
# this means we have reached the end.
break

if pd is None:
# not as_dataframe
return _a.arrayf(atom_charges)
Expand All @@ -1141,7 +1149,159 @@
# define helper function for reading voronoi+hirshfeld charges
def _mulliken_charges():
"""Read output from Mulliken charges"""
raise NotImplementedError("Mulliken charges are not implemented currently")
nonlocal pd

# Expecting something like this (NC/SOC)
# mulliken: Atomic and Orbital Populations:

# Species: Cl

# Atom Orb Charge Spin Svec
# ----------------------------------------------------------------
# 1 1 3s 1.75133 0.00566 0.004 0.004 -0.000
# 1 2 3s 0.09813 0.00658 -0.005 -0.005 0.000
# 1 3 3py 1.69790 0.21531 -0.161 -0.142 0.018
# 1 4 3pz 1.72632 0.15770 -0.086 -0.132 -0.008
# 1 5 3px 1.81369 0.01618 -0.004 0.015 -0.004
# 1 6 3py -0.04663 0.02356 -0.017 -0.016 -0.000
# 1 7 3pz -0.04167 0.01560 -0.011 -0.011 0.001
# 1 8 3px -0.02977 0.00920 -0.006 -0.007 0.000
# 1 9 3Pdxy 0.00595 0.00054 -0.000 -0.000 -0.000
# 1 10 3Pdyz 0.00483 0.00073 -0.001 -0.001 -0.000
# 1 11 3Pdz2 0.00515 0.00098 -0.001 -0.001 -0.000
# 1 12 3Pdxz 0.00604 0.00039 -0.000 -0.000 0.000
# 1 13 3Pdx2-y2 0.00607 0.00099 -0.001 -0.001 0.000
# 1 Total 6.99733 0.41305 -0.288 -0.296 0.007

# Define the function that parses the charges
def _parse_charge_total_nc(line): # non-colinear and soc
atom_idx, _, *vals = line.split()

Check warning on line 1178 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1177-L1178

Added lines #L1177 - L1178 were not covered by tests
# assert that this is a proper line
# this should catch cases where the following line of charge output
# is still parseable
# atom_idx = int(atom_idx)
return int(atom_idx), list(map(float, vals))

Check warning on line 1183 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1183

Added line #L1183 was not covered by tests

def _parse_charge_total(line): # unpolarized and colinear spin
atom_idx, val, *_ = line.split()
return int(atom_idx), float(val)

Check warning on line 1187 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1185-L1187

Added lines #L1185 - L1187 were not covered by tests

# Define the function that parses a single species
def _parse_species_nc(): # non-colinear and soc

Check warning on line 1190 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1190

Added line #L1190 was not covered by tests
nonlocal header, atom_idx, atom_charges

# The mulliken charges are organized per species where the charges
# for each species are enclosed by dashes (-----)

# Step to header
_, line = self.step_to("Atom", allow_reread=False)
if header is None:
header = (

Check warning on line 1199 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1197-L1199

Added lines #L1197 - L1199 were not covered by tests
line.replace("Charge", "e")
.replace("Spin", "S")
.replace(
"Svec", "Sx Sy Sz"
) # Split Svec into Cartesian components
.split()
)[2:]

# Skip over the starting ---- line
self.readline()

Check warning on line 1209 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1209

Added line #L1209 was not covered by tests

# Read until closing ---- line
while "----" not in (line := self.readline()):
if "Total" in line:
ia, charge_vals = _parse_charge_total_nc(line)
atom_idx.append(ia)
atom_charges.append(charge_vals)

Check warning on line 1216 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1212-L1216

Added lines #L1212 - L1216 were not covered by tests

def _parse_spin_pol(): # unpolarized and colinear spin

Check warning on line 1218 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1218

Added line #L1218 was not covered by tests
nonlocal atom_idx, atom_charges

# The mulliken charges are organized per spin
# polarization (UP/DOWN). The end of a spin
# block is marked by a Qtot

# Read until we encounter "mulliken: Qtot"
def try_parse_int(s):
try:
int(s)
except ValueError:
return False

Check warning on line 1230 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1226-L1230

Added lines #L1226 - L1230 were not covered by tests
else:
return True

Check warning on line 1232 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1232

Added line #L1232 was not covered by tests

while "mulliken: Qtot" not in (line := self.readline()):
words = line.split()
if len(words) > 0 and try_parse_int(words[0]):

Check warning on line 1236 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1234-L1236

Added lines #L1234 - L1236 were not covered by tests
# This should be a line containing the total charge for an atom
ia, charge = _parse_charge_total(line)
atom_idx.append(ia)
atom_charges.append([charge])

Check warning on line 1240 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1238-L1240

Added lines #L1238 - L1240 were not covered by tests

# Determine with which spin type we are dealing
if self.info.spin.is_unpolarized: # UNPOLARIZED

Check warning on line 1243 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1243

Added line #L1243 was not covered by tests
# No spin components so just parse charge
atom_charges = []
atom_idx = []
header = ["e"]
_parse_spin_pol()

Check warning on line 1248 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1245-L1248

Added lines #L1245 - L1248 were not covered by tests

elif self.info.spin.is_polarized:

Check warning on line 1250 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1250

Added line #L1250 was not covered by tests
# Parse both spin polarizations
atom_charges_pol = []
header = ["e", "Sz"]
for s in ("UP", "DOWN"):
atom_charges = []
atom_idx = []
self.step_to(f"mulliken: Spin {s}", allow_reread=False)
_parse_spin_pol()
atom_charges_pol.append(atom_charges)

Check warning on line 1259 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1252-L1259

Added lines #L1252 - L1259 were not covered by tests

# Compute the charge and spin of each atom
atom_charges_pol_array = _a.arrayf(atom_charges_pol)
atom_q = (

Check warning on line 1263 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1262-L1263

Added lines #L1262 - L1263 were not covered by tests
atom_charges_pol_array[0, :, 0] + atom_charges_pol_array[1, :, 0]
)
atom_s = (

Check warning on line 1266 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1266

Added line #L1266 was not covered by tests
atom_charges_pol_array[0, :, 0] - atom_charges_pol_array[1, :, 0]
)
atom_charges[:] = np.stack((atom_q, atom_s), axis=-1)

Check warning on line 1269 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1269

Added line #L1269 was not covered by tests

elif not self.info.spin.is_diagonal:

Check warning on line 1271 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1271

Added line #L1271 was not covered by tests
# Parse each species
atom_charges = []
atom_idx = []
header = None
for _ in range(self.info.nspecies):
found, _ = self.step_to("Species:", allow_reread=False)
_parse_species_nc()

Check warning on line 1278 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1273-L1278

Added lines #L1273 - L1278 were not covered by tests

else:
raise NotImplementedError(
"Something went wrong... Couldn't parse file."
)

# Convert to array and sort in the order of the atoms
sort_idx = np.argsort(atom_idx)
atom_charges_array = _a.arrayf(atom_charges)[sort_idx]

Check warning on line 1287 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1286-L1287

Added lines #L1286 - L1287 were not covered by tests

if pd is None:

Check warning on line 1289 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1289

Added line #L1289 was not covered by tests
# not as_dataframe
return atom_charges_array

Check warning on line 1291 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1291

Added line #L1291 was not covered by tests

# determine how many columns we have
# this will remove atom indices and species, so only inside
ncols = len(atom_charges[0])
assert ncols == len(header)

Check warning on line 1296 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1295-L1296

Added lines #L1295 - L1296 were not covered by tests

# the precision is limited, so no need for double precision
return pd.DataFrame(

Check warning on line 1299 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1299

Added line #L1299 was not covered by tests
atom_charges_array,
columns=header,
dtype=np.float32,
index=pd.RangeIndex(stop=len(atom_charges), name="atom"),
)

# Check that a known charge has been requested
if namel == "voronoi":
Expand Down Expand Up @@ -1178,6 +1338,7 @@
key_scf,
*charge_keys,
]

# adjust the below while loop to take into account any additional
# segments of search_keys
IDX_SCF_END = [0, 1]
Expand Down Expand Up @@ -1207,9 +1368,11 @@
FOUND_MD = False
FOUND_FINAL = False

# TODO whalrus
ret = self.step_to(search_keys, case=True, ret_index=True, allow_reread=False)
while ret[0]:
while (

Check warning on line 1371 in src/sisl/io/siesta/stdout.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/stdout.py#L1371

Added line #L1371 was not covered by tests
ret := self.step_to(
search_keys, case=True, ret_index=True, allow_reread=False
)
)[0]:
if ret[2] in IDX_SCF_END:
# we finished all SCF iterations
current_state = state.MD
Expand Down Expand Up @@ -1270,11 +1433,6 @@

current_state = state.CHARGE

# step to next entry
ret = self.step_to(
search_keys, case=True, ret_index=True, allow_reread=False
)

if not any((FOUND_SCF, FOUND_MD, FOUND_FINAL)):
raise SileError(f"{self!s} does not contain any charges ({name})")

Expand Down
13 changes: 9 additions & 4 deletions src/sisl/io/siesta/tests/test_stdout_charges.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

# tests here tests charge reads for output
# voronoi + hirshfeld: test_vh_*
# voronoi + hirshfeld + mulliken: test_vhm_*
# voronoi: test_v_*
# hirshfeld: test_h_*
# mulliken: test_m_*
Expand All @@ -30,8 +31,8 @@
return False


@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld"))
def test_vh_empty_file(name, sisl_files):
@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld", "mulliken"))
def test_vhm_empty_file(name, sisl_files):
f = sisl_files("siesta", "ancient", "voronoi_hirshfeld_4.1_none.out")
out = stdoutSileSiesta(f)

Expand Down Expand Up @@ -76,9 +77,13 @@


@pytest.mark.parametrize("fname", ("md", "pol_md", ("ancient", "pol_md"), "nc_md"))
@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld"))
def test_vh_md(name, fname, sisl_files):
@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld", "mulliken"))
def test_vhm_md(name, fname, sisl_files):
if isinstance(fname, tuple):
if name == "mulliken":

Check warning on line 83 in src/sisl/io/siesta/tests/test_stdout_charges.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/tests/test_stdout_charges.py#L83

Added line #L83 was not covered by tests
# we don't have this in the test, so simply return
# (this isn't a test after all)
return

Check warning on line 86 in src/sisl/io/siesta/tests/test_stdout_charges.py

View check run for this annotation

Codecov / codecov/patch

src/sisl/io/siesta/tests/test_stdout_charges.py#L86

Added line #L86 was not covered by tests
f = sisl_files("siesta", "ancient", f"voronoi_hirshfeld_4.1_{fname[1]}.out")
else:
f = sisl_files("siesta", "charges", fname, "RUN.out")
Expand Down
Loading