diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 40634973a1f..8a8428c57e3 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -24,6 +24,7 @@ from monty.os.path import zpath from monty.re import regrep from numpy.testing import assert_allclose +from tqdm import tqdm from pymatgen.core import Composition, Element, Lattice, Structure from pymatgen.core.trajectory import Trajectory @@ -3781,10 +3782,15 @@ def get_alpha(self) -> VolumetricData: return VolumetricData(self.structure, alpha_data) -class Procar: +class Procar(MSONable): """ PROCAR file reader. + Updated to use code from easyunfold (https://smtg-bham.github.io/easyunfold; band-structure + unfolding package) to allow SOC PROCAR parsing, and parsing multiple PROCAR files together. + easyunfold's PROCAR parser can be used if finer control over projections (k-point weighting, + normalisation per band, quick orbital sub-selection etc) is needed. + Attributes: data (dict): The PROCAR data of the form below. It should VASP uses 1-based indexing, but all indices are converted to 0-based here. @@ -3795,48 +3801,245 @@ class Procar: nbands (int): Number of bands. nkpoints (int): Number of k-points. nions (int): Number of ions. + nspins (int): Number of spins. + is_soc (bool): Whether the PROCAR contains spin-orbit coupling (LSORBIT = True) data. + kpoints (np.array): The k-points as an nd.array of shape (nkpoints, 3). + occupancies (dict): The occupancies of the bands as a dict of the form: + { spin: nd.array accessed with (k-point index, band index) } + eigenvalues (dict): The eigenvalues of the bands as a dict of the form: + { spin: nd.array accessed with (k-point index, band index) } + xyz_data (dict): The PROCAR projections data along the x,y and z magnetisation projection + directions, with is_soc = True (see VASP wiki for more info). + { 'x'/'y'/'z': nd.array accessed with (k-point index, band index, ion index, orbital index) } """ - def __init__(self, filename: PathLike) -> None: + def __init__(self, filename: PathLike | list[PathLike]): + """ + Args: + filename: The path to PROCAR(.gz) file to read, or list of paths. + """ + # get PROCAR filenames list to parse: + filenames = filename if isinstance(filename, list) else [filename] + self.nions: int | None = None # used to check for consistency in files later + self.nspins: int | None = None # used to check for consistency in files later + self.is_soc: bool | None = None # used to check for consistency in files later + self.orbitals = None # used to check for consistency in files later + self.read(filenames) + + def read(self, filenames: list[PathLike]): + """ + Read in PROCAR projections data, possibly from multiple files. + + Args: + filenames: List of PROCAR files to read. + """ + parsed_kpoints = None + occupancies_list, kpoints_list, weights_list = [], [], [] + eigenvalues_list, data_list, xyz_data_list = [], [], [] + phase_factors_list = [] + for filename in tqdm(filenames, desc="Reading PROCARs", unit="file", disable=len(filenames) == 1): + kpoints, weights, eigenvalues, occupancies, data, phase_factors, xyz_data = self._read( + filename, parsed_kpoints=parsed_kpoints + ) + + # Append to respective lists + occupancies_list.append(occupancies) + kpoints_list.append(kpoints) + weights_list.append(weights) + eigenvalues_list.append(eigenvalues) + data_list.append(data) + xyz_data_list.append(xyz_data) + phase_factors_list.append(phase_factors) + + # Combine arrays along the kpoints axis: + # nbands (axis = 2) could differ between arrays, so set missing values to zero: + max_nbands = max(eig_dict[Spin.up].shape[1] for eig_dict in eigenvalues_list) + for dict_array in itertools.chain( + occupancies_list, eigenvalues_list, data_list, xyz_data_list, phase_factors_list + ): + if dict_array: + for key, array in dict_array.items(): + if array.shape[1] < max_nbands: + if len(array.shape) == 2: # occupancies, eigenvalues + dict_array[key] = np.pad( + array, + ((0, 0), (0, max_nbands - array.shape[2])), + mode="constant", + ) + elif len(array.shape) == 4: # data, phase_factors + dict_array[key] = np.pad( + array, + ( + (0, 0), + (0, max_nbands - array.shape[2]), + (0, 0), + (0, 0), + ), + mode="constant", + ) + elif len(array.shape) == 5: # xyz_data + dict_array[key] = np.pad( + array, + ( + (0, 0), + (0, max_nbands - array.shape[2]), + (0, 0), + (0, 0), + (0, 0), + ), + mode="constant", + ) + else: + raise ValueError("Unexpected array shape encountered!") + + # set nbands, nkpoints, and other attributes: + self.nbands = max_nbands + self.kpoints = np.concatenate(kpoints_list, axis=0) + self.nkpoints = len(self.kpoints) + self.occupancies = { + spin: np.concatenate([occupancies[spin] for occupancies in occupancies_list], axis=0) + for spin in occupancies_list[0] + } + self.eigenvalues = { + spin: np.concatenate([eigenvalues[spin] for eigenvalues in eigenvalues_list], axis=0) + for spin in eigenvalues_list[0] + } + self.weights = np.concatenate(weights_list, axis=0) + self.data = {spin: np.concatenate([data[spin] for data in data_list], axis=0) for spin in data_list[0]} + self.phase_factors = { + spin: np.concatenate([phase_factors[spin] for phase_factors in phase_factors_list], axis=0) + for spin in phase_factors_list[0] + } + if self.is_soc: + self.xyz_data: dict | None = { + key: np.concatenate([xyz_data[key] for xyz_data in xyz_data_list], axis=0) for key in xyz_data_list[0] + } + else: + self.xyz_data = None + + def _parse_kpoint_line(self, line): + """ + Parse k-point vector from a PROCAR line. + + Sometimes VASP outputs the kpoints joined together like + '0.00000000-0.50000000-0.50000000' when there are negative signs, + so need to be able to recognise and handle this. """ + fields = line.split() + kpoint_fields = fields[3 : fields.index("weight")] + kpoint_fields = [" -".join(field.split("-")).split() for field in kpoint_fields] + kpoint_fields = [val for sublist in kpoint_fields for val in sublist] # flatten + + return tuple(round(float(val), 5) for val in kpoint_fields) # tuple to make it hashable, + # rounded to 5 decimal places to ensure proper kpoint matching + + def _read(self, filename: PathLike, parsed_kpoints: set[tuple[Kpoint]] | None = None): + """Main function for reading in the PROCAR projections data. + Args: - filename: The PROCAR to read. + filename (PathLike): Path to PROCAR file to read. + parsed_kpoints (set[tuple[Kpoint]]): Set of tuples of already-parsed kpoints (e.g. from multiple + zero-weighted bandstructure calculations), to ensure redundant/duplicate parsing. """ - headers = None + if parsed_kpoints is None: + parsed_kpoints = set() with zopen(filename, mode="rt") as file_handle: preamble_expr = re.compile(r"# of k-points:\s*(\d+)\s+# of bands:\s*(\d+)\s+# of ions:\s*(\d+)") kpoint_expr = re.compile(r"^k-point\s+(\d+).*weight = ([0-9\.]+)") band_expr = re.compile(r"^band\s+(\d+)") ion_expr = re.compile(r"^ion.*") + total_expr = re.compile(r"^tot.*") expr = re.compile(r"^([0-9]+)\s+") current_kpoint = 0 current_band = 0 - done = False - spin = Spin.down + spin = Spin.down # switched to Spin.up for first block n_kpoints = None + kpoints: list[tuple[float, float, float]] = [] n_bands = None n_ions = None - weights: list[float] = [] + weights: np.ndarray[float] | None = None headers = None - data: dict[Spin, np.ndarray] | None = None + data: dict[Spin, np.ndarray] = {} + eigenvalues: dict[Spin, np.ndarray] | None = None + occupancies: dict[Spin, np.ndarray] | None = None phase_factors: dict[Spin, np.ndarray] | None = None + xyz_data: dict[str, np.ndarray] | None = None # 'x'/'y'/'z' as keys for SOC projections dict + # keep track of parsed kpoints, to avoid redundant/duplicate parsing with multiple PROCARs: + this_procar_parsed_kpoints = ( + set() + ) # set of tuples of parsed (kvectors, 0/1 for Spin.up/down) for this PROCAR + + # first dynamically determine whether PROCAR is SOC or not; SOC PROCARs have 4 lists of projections ( + # total and x,y,z) for each band, while non-SOC have only 1 list of projections: + tot_count = 0 + band_count = 0 + for line in file_handle: + if total_expr.match(line): + tot_count += 1 + elif band_expr.match(line): + band_count += 1 + if band_count == 2: + break + + file_handle.seek(0) # reset file handle to beginning + if tot_count == 1: + is_soc = False + elif tot_count == 4: + is_soc = True + else: + raise ValueError( + "Number of lines starting with 'tot' in PROCAR does not match expected values (4x or 1x number of " + "lines with 'band'), indicating a corrupted file!" + ) + if self.is_soc is not None and self.is_soc != is_soc: + raise ValueError("Mismatch in SOC setting (LSORBIT) in supplied PROCARs!") + self.is_soc = is_soc + skipping_kpoint = False # true when skipping projections for a previously-parsed kpoint + ion_line_count = 0 # printed twice when phase factors present + proj_data_parsed_for_band = 0 # 0 for non-SOC, 1-4 for SOC/phase factors for line in file_handle: line = line.strip() - if band_expr.match(line): - match = band_expr.match(line) - current_band = int(match[1]) - 1 # type: ignore[index] - done = False + if ion_expr.match(line): + ion_line_count += 1 - elif kpoint_expr.match(line): + if kpoint_expr.match(line): + kvec = self._parse_kpoint_line(line) match = kpoint_expr.match(line) current_kpoint = int(match[1]) - 1 # type: ignore[index] - weights[current_kpoint] = float(match[2]) # type: ignore[index] if current_kpoint == 0: spin = Spin.up if spin == Spin.down else Spin.down - done = False + + if ( + kvec not in parsed_kpoints + and (kvec, {Spin.down: 0, Spin.up: 1}[spin]) not in this_procar_parsed_kpoints + ): + this_procar_parsed_kpoints.add((kvec, {Spin.down: 0, Spin.up: 1}[spin])) + skipping_kpoint = False + if spin == Spin.up: + kpoints.append(kvec) # only add once + else: # skip ahead to next kpoint: + skipping_kpoint = True + continue + + if spin == Spin.up: # record k-weight only once + weights[current_kpoint] = float(match[2]) # type: ignore[index] + proj_data_parsed_for_band = 0 + + elif skipping_kpoint: + continue + + elif band_expr.match(line): + ion_line_count = 0 # printed a second time when phase factors present + match = band_expr.match(line) + current_band = int(match[1]) - 1 # type: ignore[index] + tokens = line.split() + eigenvalues[spin][current_kpoint, current_band] = float(tokens[4]) # type: ignore[index] + occupancies[spin][current_kpoint, current_band] = float(tokens[-1]) # type: ignore[index] + # keep track of parsed projections for each band (1x w/non-SOC, 4x w/SOC): + proj_data_parsed_for_band = 0 elif headers is None and ion_expr.match(line): headers = line.split() @@ -3844,10 +4047,11 @@ def __init__(self, filename: PathLike) -> None: headers.pop(-1) data = defaultdict(lambda: np.zeros((n_kpoints, n_bands, n_ions, len(headers)))) - phase_factors = defaultdict( lambda: np.full((n_kpoints, n_bands, n_ions, len(headers)), np.nan, dtype=np.complex128) ) + if self.is_soc: # dict keys are now "x", "y", "z" rather than Spin.up/down + xyz_data = defaultdict(lambda: np.zeros((n_kpoints, n_bands, n_ions, len(headers)))) elif expr.match(line): tokens = line.split() @@ -3856,11 +4060,15 @@ def __init__(self, filename: PathLike) -> None: num_data = np.array([float(t) for t in tokens[: len(headers)]]) assert phase_factors is not None - if not done: - assert data is not None + if proj_data_parsed_for_band == 0: data[spin][current_kpoint, current_band, index, :] = num_data - elif len(tokens) > len(headers): + elif self.is_soc and proj_data_parsed_for_band < 4: + proj_direction = {1: "x", 2: "y", 3: "z"}[proj_data_parsed_for_band] + assert xyz_data is not None + xyz_data[proj_direction][current_kpoint, current_band, index, :] = num_data + + elif len(tokens) > len(headers): # note no xyz projected phase factors with SOC # New format of PROCAR (VASP 5.4.4) num_data = np.array([float(t) for t in tokens[: 2 * len(headers)]]) for orb in range(len(headers)): @@ -3873,24 +4081,45 @@ def __init__(self, filename: PathLike) -> None: else: phase_factors[spin][current_kpoint, current_band, index, :] += 1j * num_data - elif line.startswith("tot"): - done = True + elif total_expr.match(line): + proj_data_parsed_for_band += 1 elif preamble_expr.match(line): match = preamble_expr.match(line) assert match is not None n_kpoints = int(match[1]) n_bands = int(match[2]) + if eigenvalues is None: # first spin + weights = np.zeros(n_kpoints) + eigenvalues = defaultdict(lambda: np.zeros((n_kpoints, n_bands))) + occupancies = defaultdict(lambda: np.zeros((n_kpoints, n_bands))) n_ions = int(match[3]) - weights = np.zeros(n_kpoints) - self.nkpoints = n_kpoints - self.nbands = n_bands - self.nions = n_ions - self.weights = weights + if self.nions is not None and self.nions != n_ions: # parsing multiple PROCARs but nions mismatch! + raise ValueError(f"Mismatch in number of ions in supplied PROCARs: ({n_ions} vs {self.nions})!") + + self.nions = n_ions # attributes that should be consistent between multiple files are set here + if self.orbitals is not None and self.orbitals != headers: # multiple PROCARs but orbitals mismatch! + raise ValueError(f"Mismatch in orbital headers in supplied PROCARs: {headers} vs {self.orbitals}!") self.orbitals = headers - self.data = data - self.phase_factors = phase_factors + if self.nspins is not None and self.nspins != len(data): # parsing multiple PROCARs but nspins mismatch! + raise ValueError("Mismatch in number of spin channels in supplied PROCARs!") + self.nspins = len(data) + + # chop off empty kpoints in arrays and redetermine nkpoints as we may have skipped previously-parsed kpoints + nkpoints = current_kpoint + 1 + weights = np.array(weights[:nkpoints]) # type: ignore[index] + data = {spin: data[spin][:nkpoints] for spin in data} # type: ignore[index] + eigenvalues = {spin: eigenvalues[spin][:nkpoints] for spin in eigenvalues} # type: ignore[union-attr,index] + occupancies = {spin: occupancies[spin][:nkpoints] for spin in occupancies} # type: ignore[union-attr,index] + phase_factors = {spin: phase_factors[spin][:nkpoints] for spin in phase_factors} # type: ignore[union-attr,index] + if self.is_soc: + xyz_data = {spin: xyz_data[spin][:nkpoints] for spin in xyz_data} # type: ignore[union-attr,index] + + # Update the parsed kpoints + parsed_kpoints.update({kvec_spin_tuple[0] for kvec_spin_tuple in this_procar_parsed_kpoints}) + + return kpoints, weights, eigenvalues, occupancies, data, phase_factors, xyz_data def get_projection_on_elements(self, structure: Structure) -> dict[Spin, list[list[dict[str, float]]]]: """Get a dict of projections on elements. diff --git a/tests/files/io/vasp/outputs/PROCAR.SOC.gz b/tests/files/io/vasp/outputs/PROCAR.SOC.gz new file mode 100755 index 00000000000..42e13226291 Binary files /dev/null and b/tests/files/io/vasp/outputs/PROCAR.SOC.gz differ diff --git a/tests/files/io/vasp/outputs/PROCAR.split1.gz b/tests/files/io/vasp/outputs/PROCAR.split1.gz new file mode 100644 index 00000000000..eade5aff211 Binary files /dev/null and b/tests/files/io/vasp/outputs/PROCAR.split1.gz differ diff --git a/tests/files/io/vasp/outputs/PROCAR.split2.gz b/tests/files/io/vasp/outputs/PROCAR.split2.gz new file mode 100644 index 00000000000..ae8a2b6759c Binary files /dev/null and b/tests/files/io/vasp/outputs/PROCAR.split2.gz differ diff --git a/tests/io/vasp/test_outputs.py b/tests/io/vasp/test_outputs.py index 59a9305d642..7adfcc340c6 100644 --- a/tests/io/vasp/test_outputs.py +++ b/tests/io/vasp/test_outputs.py @@ -1613,6 +1613,51 @@ def test_init(self): assert procar.get_occupation(0, "dxy")[Spin.up] == approx(0.96214813853000025) assert procar.get_occupation(0, "dxy")[Spin.down] == approx(0.85796295426000124) + def test_soc_procar(self): + filepath = f"{VASP_OUT_DIR}/PROCAR.SOC.gz" + procar = Procar(filepath) + assert procar.nions == 4 + assert procar.nkpoints == 25 + assert procar.nspins == 1 + assert procar.is_soc # SOC PROCAR + nb = procar.nbands + nk = procar.nkpoints + assert procar.eigenvalues[Spin.up].shape == (nk, nb) + assert procar.kpoints.shape == (nk, 3) + assert len(procar.weights) == nk + assert np.all(procar.kpoints[0][0] == 0.0) + assert procar.occupancies[Spin.up].shape == (nk, nb) + + # spot check some values: + assert procar.data[Spin.up][0, 1, 1, 0] == approx(0.095) + assert procar.data[Spin.up][0, 1, 1, 1] == approx(0) + + assert procar.xyz_data["x"][0, 1, 1, 0] == approx(-0.063) + assert procar.xyz_data["z"][0, 1, 1, 1] == approx(0) + + def test_multiple_procars(self): + filepaths = [f"{VASP_OUT_DIR}/PROCAR.split1.gz", f"{VASP_OUT_DIR}/PROCAR.split2.gz"] + procar = Procar(filepaths) + assert procar.nions == 4 + assert procar.nkpoints == 96 # 96 overall, 48 in first PROCAR, 96 in second (48 duplicates) + assert procar.nspins == 1 # SOC PROCAR, also with LORBIT = 14 + assert procar.is_soc # SOC PROCAR + nb = procar.nbands + nk = procar.nkpoints + assert procar.eigenvalues[Spin.up].shape == (nk, nb) + assert procar.kpoints.shape == (nk, 3) + assert len(procar.weights) == nk + assert procar.occupancies[Spin.up].shape == (nk, nb) + + # spot check some values: + assert procar.data[Spin.up][0, 1, 1, 0] == approx(0.094) + assert procar.data[Spin.up][0, 1, 1, 1] == approx(0) + + assert procar.xyz_data["x"][0, 1, 1, 0] == approx(0) + assert procar.xyz_data["z"][0, 1, 1, 1] == approx(0) + + assert procar.phase_factors[Spin.up][0, 1, 0, 0] == approx(-0.159 + 0.295j) + def test_phase_factors(self): filepath = f"{VASP_OUT_DIR}/PROCAR.phase.gz" procar = Procar(filepath)