Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/sof 6598 clean up parsers implementation (DRAFT) #82

Open
wants to merge 31 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d0a641c
feat: add fortran parser and parsers settings
VsevolodX Jul 11, 2023
4c53512
update: rename and move formats enum
VsevolodX Jul 11, 2023
225485a
feat: add BaseParser and MaterialsParser classes
VsevolodX Jul 11, 2023
cc27cc7
feat: add EspressoParser class and settings
VsevolodX Jul 11, 2023
ac86137
feat: add tests for Espresso parser
VsevolodX Jul 11, 2023
a3cdc53
feat: add fixtures for Espresso parser
VsevolodX Jul 11, 2023
10f41bf
update: cleanup and correct methods order
VsevolodX Jul 11, 2023
ac715d4
feat: add test for fortran parser
VsevolodX Jul 11, 2023
e1f2c47
update: change test fixture to the correct one
VsevolodX Jul 11, 2023
931c808
fix: make tests pass
VsevolodX Jul 11, 2023
e7a4478
update: shorten function and address PR comments
VsevolodX Jul 11, 2023
65d4872
update: use mixin of FortranParser
VsevolodX Jul 11, 2023
e78589d
update: remove node v10 from github cicd
VsevolodX Jul 11, 2023
a4269bf
update: temporarily comment out fortranParserMixintest
VsevolodX Jul 11, 2023
7877ab4
update: fix the method called in fortranParserMixin test
VsevolodX Jul 11, 2023
9d87b70
update: move functions inside class
VsevolodX Jul 11, 2023
8acc823
update: change fortran tests directpry
VsevolodX Jul 11, 2023
4030ede
update: address PR comments and move functions into methdos calling them
VsevolodX Jul 11, 2023
f8515fb
update: simplify logic and variables addressing PR comments
VsevolodX Jul 11, 2023
1fb0809
feat: add handling for partially missing constraints and test for this
VsevolodX Jul 11, 2023
0dfbe64
update: add JSDocs and change comments FIXME -> TODO
VsevolodX Jul 11, 2023
e150e52
update: add and fix test to call ESPRESSO parser from outside
VsevolodX Jul 11, 2023
1c1e1af
update: simplify code and move comments on separate line
VsevolodX Jul 11, 2023
0cf4997
update: rename fortranParserMixin methods to have "fortran" in them
VsevolodX Jul 11, 2023
45b952e
update: change order of regexes to be in descending logic
VsevolodX Jul 11, 2023
45df941
update: rename KV parsing functions and add JSDoc example
VsevolodX Jul 11, 2023
f4c2322
feat: add more tests to cover more functions
VsevolodX Jul 12, 2023
22891df
update: fix newly found error of constrains not setting to [] if not …
VsevolodX Jul 12, 2023
2f271c9
update: run npm lint:fix
VsevolodX Jul 12, 2023
3353a73
update: add suppression for ESLint in an abstract class
VsevolodX Jul 12, 2023
3434005
update: add accidentaly removed TODOs back in
VsevolodX Jul 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/parsers/enums.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
export const STRUCTURAL_INFORMATION_FORMATS = {
JSON: "json",
POSCAR: "poscar",
CIF: "cif",
QE: "qe",
XYZ: "xyz",
UNKNOWN: "unknown",
};

export const APPLICATIONS = {
ESPRESSO: "espresso",
VASP: "vasp",
UNKNOWN: "unknown",
};
259 changes: 259 additions & 0 deletions src/parsers/espresso/parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
import { ATOMIC_COORD_UNITS, coefficients } from "@exabyte-io/code.js/dist/constants";

import { ConstrainedBasis } from "../../basis/constrained_basis";
import { primitiveCell } from "../../cell/primitive_cell";
import { Lattice } from "../../lattice/lattice";
import math from "../../math";
import { MaterialParser } from "../init";
import { FortranParser } from "../utils/fortran";
import { IBRAV_TO_LATTICE_TYPE_MAP, regex } from "./settings";

export class ESPRESSOMaterialParser extends MaterialParser {
VsevolodX marked this conversation as resolved.
Show resolved Hide resolved
parseMaterial(content) {
VsevolodX marked this conversation as resolved.
Show resolved Hide resolved
this.content = content;
const fortranParser = new FortranParser();
this.data = fortranParser.parse(this.content);
const cell = this.getCellConfig(this.data.cards);
const { elements, coordinates, units, constraints } = this.getAtomicPositions(
this.data.cards,
);

if (this.data.system === undefined)
throw new Error("No &SYSTEM section found in input this.data.");
if (this.data.system.ibrav === undefined) throw new Error("ibrav is required in &SYSTEM.");

const lattice = Lattice.fromVectors({
a: cell.cell[0],
b: cell.cell[1],
c: cell.cell[2],
type: cell.type,
});

const basis = new ConstrainedBasis({
elements,
coordinates,
units,
type: cell.type,
cell: lattice.vectorArrays,
constraints,
});
// basis.toStandardRepresentation(); // To get the format obtained from the Mat3ra platform

return {
lattice: lattice.toJSON(),
basis: basis.toJSON(),
name: this.data.control.title,
isNonPeriodic: false,
};
}

/**
* @summary Return unit cell parameters from CELL_PARAMETERS card
* @param {String} text - cards data
* @return {{cell: Number[][], units: String}}
*/
getCellConfig(text) {
let cell = {};
if (this.data.system.ibrav === 0) {
const match = regex.cellParameters.exec(text);
if (match) {
const units = match[1];
const values = match.slice(2, 11);
// creating matrix 3 by 3 of numbers from 9 strings
const vectors = Array.from({ length: 3 }, (_, i) =>
values.slice(i * 3, i * 3 + 3).map(Number),
);
cell = { cell: vectors, units };
cell.type = "TRI"; // TODO: implement type detection, now defaults to TRI
return cell;
}
} else {
cell = this.ibravToCellConfig(this.data.system);
return cell;
}
throw new Error("Couldn't read cell parameters");
}

/**
* @summary Returns cell config from ibrav and celldm(i) parameters
*
* QE docs: https://www.quantum-espresso.org/Doc/INPUT_PW.html#ibrav
* "If ibrav /= 0, specify EITHER [ celldm(1)-celldm(6) ]
* OR [ A, B, C, cosAB, cosAC, cosBC ]
* but NOT both. The lattice parameter "alat" is set to
* alat = celldm(1) (in a.u.) or alat = A (in Angstrom);"
*
* @param {Object} system - The system parameters from &SYSTEM namelist
* @param {Number} system.ibrav - ibrav parameter
* @param {Number[]} [system.celldm] - celldm parameters
* @param {Number} [system.a] - A parameter in angstroms
* @param {Number} [system.b] - B parameter in angstroms
* @param {Number} [system.c] - C parameter in angstroms
* @param {Number} [system.cosab] - cosAB parameter
* @param {Number} [system.cosac] - cosAC parameter
* @param {Number} [system.cosbc] - cosBC parameter
* @returns {{cell: Number[][], type: String}}
*/
ibravToCellConfig(system) {
const { ibrav, celldm, a, b, c, cosab, cosac, cosbc } = system;
if (celldm && a) {
throw new Error("Both celldm and A are given");
} else if (!celldm && !a) {
throw new Error("Missing celldm(1)");
}

const type = this.ibravToCellType(ibrav);
const [_a, _b, _c] = this.getLatticeConstants(celldm, a, b, c);
const [alpha, beta, gamma] = this.getLatticeAngles(celldm, cosbc, cosac, cosab);

const lattice = new Lattice({
a: _a,
VsevolodX marked this conversation as resolved.
Show resolved Hide resolved
b: _b,
c: _c,
alpha,
beta,
gamma,
type,
});
const cell = primitiveCell(lattice);
return { cell, type };
}

/**
* @summary Converts ibrav value to cell type according to Quantum ESPRESSO docs
* https://www.quantum-espresso.org/Doc/INPUT_PW.html#ibrav
* @param {Number} ibrav - ibrav parameter
* @returns {String}
*/
ibravToCellType(ibrav) {
const type = IBRAV_TO_LATTICE_TYPE_MAP[ibrav];
if (type === undefined) {
throw new Error(`Invalid ibrav value: ${ibrav}`);
}
return type;
}

/**
* @summary Calculates cell parameters from celldm(i) or A, B, C parameters depending on which are present. Specific to Quantum ESPRESSO.
* @param {Number[]} [celldm] - celldm(i) parameters
* @param {Number} [a] - A parameter
* @param {Number} [b] - B parameter
* @param {Number} [c] - C parameter
* @returns {Number[]}
*/
getLatticeConstants(celldm, a, b, c) {
// celldm indices shifted -1 from fortran list representation. In QE input file celldm(1) list starts with 1, but parsed starting with 0.
let _a = celldm ? celldm[0] : a; // celldm(1) is a in bohr
let _b = celldm ? celldm[1] * celldm[0] : b; // celldm(2) is b/a
let _c = celldm ? celldm[2] * celldm[0] : c; // celldm(3) is c/a
if (celldm) {
[_a, _b, _c] = [_a, _b, _c].map((x) => x * coefficients.BOHR_TO_ANGSTROM);
}
return [_a, _b, _c];
}

/**
* @summary Calculates cell angles from celldm(i) or cosAB, cosAC, cosBC parameters. Specific to Quantum ESPRESSO.
* @param {Number[]} [celldm] - celldm(i) parameters
* @param {Number} [cosbc] - cosBC parameter
* @param {Number} [cosac] - cosAC parameter
* @param {Number} [cosab] - cosAB parameter
* @returns {Number[]}
*/
getLatticeAngles(celldm, cosbc, cosac, cosab) {
let alpha, beta, gamma;
if (cosbc) alpha = math.acos(cosbc);
if (cosac) beta = math.acos(cosac);
if (cosab) gamma = math.acos(cosab);

// Case for some of the cell types in QE docs
// celldm indices shifted -1 from fortran list representation. In QE input file celdm(1) array starts with 1, but parsed starting with 0.
if (celldm && celldm[3]) {
gamma = math.acos(celldm[3]);
}

// Specific case for hexagonal cell in QE docs
// celldm indices shifted -1 from fortran list representation. In QE input file celdm(1) array starts with 1, but parsed starting with 0.
if (celldm && celldm[3] && celldm[4] && celldm[5]) {
alpha = math.acos(celldm[3]);
beta = math.acos(celldm[4]);
gamma = math.acos(celldm[5]);
}

// Convert radians to degrees which are used in lattice definitions
[alpha, beta, gamma] = [alpha, beta, gamma].map((x) =>
x === undefined ? x : (x * 180) / math.PI,
);
return [alpha, beta, gamma];
}

/**
* @summary Read atomic positions from ATOMIC_POSITIONS card
* @param {String} text - cards data
* @returns {{elements: Object[], coordinates: Object[], constraints: Object[], units: String}}
*/
getAtomicPositions(text) {
const atomicSpeciesMatches = Array.from(text.matchAll(regex.atomicSpecies));
// eslint-disable-next-line no-unused-vars
const atomicSpecies = atomicSpeciesMatches.map((match) => ({
element: match[1],
mass: parseFloat(match[2]),
potential: match[3],
}));
const atomicPositionsMatches = Array.from(text.matchAll(regex.atomicPositions));
const units = text.match(regex.atomicPositionsUnits)[1];
const { _units, scalingFactor } = this.getScalingFactor(units);

const elements = atomicPositionsMatches.map((match, index) => ({
id: index,
value: match[1],
}));
const coordinates = atomicPositionsMatches.map((match, index) => ({
id: index,
value: [
parseFloat(match[2]) * scalingFactor,
parseFloat(match[3]) * scalingFactor,
parseFloat(match[4]) * scalingFactor,
],
}));
const constraints = atomicPositionsMatches
.filter((match) => match[5] && match[6] && match[7]) // Check if all three constraints exist
.map((match, index) => ({
id: index,
value: [match[5] === "1", match[6] === "1", match[7] === "1"],
}));
return { elements, coordinates, constraints, units: _units };
}

/**
* @summary Return units and scaling factor according to Quantum ESPRESSO docs
* @param {String} units - units from ATOMIC_POSITIONS card
* @returns {{_units: String, scalingFactor: Number}}
*/
getScalingFactor(units) {
VsevolodX marked this conversation as resolved.
Show resolved Hide resolved
let _units, scalingFactor;
switch (units) {
case "alat":
scalingFactor = 1.0;
VsevolodX marked this conversation as resolved.
Show resolved Hide resolved
_units = ATOMIC_COORD_UNITS.crystal;
break;
case "bohr":
scalingFactor = coefficients.BOHR_TO_ANGSTROM;
_units = ATOMIC_COORD_UNITS.cartesian;
break;
case "angstrom":
scalingFactor = 1.0;
_units = ATOMIC_COORD_UNITS.cartesian;
break;
case "crystal":
scalingFactor = 1.0;
_units = ATOMIC_COORD_UNITS.crystal;
break;
case "crystal_sg":
throw new Error("crystal_sg not supported yet");
default:
throw new Error(`Units ${units} not supported`);
}
return { _units, scalingFactor };
}
}
55 changes: 55 additions & 0 deletions src/parsers/espresso/settings.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { LATTICE_TYPE } from "../../lattice/types";
import { regex as commonRegex } from "../utils/settings";

const { double } = commonRegex.general;
export const regex = {
espressoFingerprint: /&CONTROL|&SYSTEM|ATOMIC_SPECIES/i,
atomicSpecies: new RegExp(
"([A-Z][a-z]?)\\s+" + // element symbol Aa
`(${double})\\s` + // mass
"(\\S*)\\s*" + // potential source file name
"(?=\\n)", // end of line
"gm",
),
atomicPositionsUnits: new RegExp(
"ATOMIC_POSITIONS\\s+" + // start of card
"\\(?" + // optional parentheses
"(\\w+)" + // units
"\\)?", // end of optional parentheses
),
atomicPositions: new RegExp(
`^\\s*([A-Z][a-z]*)\\s+` + // atomic element symbol
`(${double})\\s+(${double})\\s+(${double})` + // atomic coordinates
`(?:\\s+(0|1)\\s+(0|1)\\s+(0|1))?(?=\\s*\\n)`, // atomic constraints
"gm",
),
cellParameters: new RegExp(
`CELL_PARAMETERS\\s*(?:\\(?(\\w+)\\)?)?\\n` +
`^\\s*(${double})\\s+(${double})\\s+(${double})\\s*\\n` +
`^\\s*(${double})\\s+(${double})\\s+(${double})\\s*\\n` +
`^\\s*(${double})\\s+(${double})\\s+(${double})\\s*\\n`,
"gm",
),
};

export const IBRAV_TO_LATTICE_TYPE_MAP = {
1: LATTICE_TYPE.CUB,
2: LATTICE_TYPE.FCC,
3: LATTICE_TYPE.BCC,
"-3": LATTICE_TYPE.BCC,
4: LATTICE_TYPE.HEX,
5: LATTICE_TYPE.RHL,
"-5": LATTICE_TYPE.RHL,
6: LATTICE_TYPE.TET,
7: LATTICE_TYPE.BCT,
8: LATTICE_TYPE.ORC,
9: LATTICE_TYPE.ORCC,
"-9": LATTICE_TYPE.ORCC,
10: LATTICE_TYPE.ORCF,
11: LATTICE_TYPE.ORCI,
12: LATTICE_TYPE.MCL,
"-12": LATTICE_TYPE.MCL,
13: LATTICE_TYPE.MCLC,
"-13": LATTICE_TYPE.MCLC,
14: LATTICE_TYPE.TRI,
};
39 changes: 39 additions & 0 deletions src/parsers/init.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// eslint-disable-next-line max-classes-per-file
export class BaseParser {
constructor(options) {
this.options = options;
}

// eslint-disable-next-line class-methods-use-this
parse() {
throw new Error("parse() is implemented in children");
}
}

export class MaterialParser extends BaseParser {
VsevolodX marked this conversation as resolved.
Show resolved Hide resolved
parse(content, property_name = "material") {
if (!(property_name === "material")) throw new Error("Implemented for material only");
return this.parseMaterial(content);
}

parseMaterial(content) {
this.content = content;
throw new Error("parseMaterial() is implemented in children");
}

getCell() {
throw new Error("Implement in children");
}

getElements() {
throw new Error("Implement in children");
}

getCoordinates() {
throw new Error("Implement in children");
}

getConstraints() {
throw new Error("Implement in children");
}
}
Loading