Merge devel into master (#464)

deepmodeling · May 4, 2023 · 85a3b5e · 85a3b5e
2 parents 9507165 + b73b239
commit 85a3b5e
Show file tree

Hide file tree

Showing 155 changed files with 7,087 additions and 555 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,14 +18,15 @@ repos:
     -   id: check-toml
 # Python
 -   repo: https://github.com/psf/black
-    rev: 22.12.0
+    rev: 23.3.0
     hooks:
     -   id: black-jupyter
--   repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+-   repo: https://github.com/charliermarsh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.0.263
     hooks:
-    - id: isort
-    files: \.py$
+    - id: ruff
+      args: ["--fix"]
 # numpydoc
 -   repo: https://github.com/Carreau/velin
     rev: 0.0.12

diff --git a/docs/conf.py b/docs/conf.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #
 # Configuration file for the Sphinx documentation builder.
 #

diff --git a/docs/make_format.py b/docs/make_format.py
@@ -1,12 +1,9 @@
 import csv
 from collections import defaultdict
-from typing import Any
 
 # ensure all plugins are loaded!
-import dpdata.plugins
 from dpdata.driver import Driver, Minimizer
 from dpdata.format import Format
-from dpdata.system import get_cls_name
 
 
 def get_formats() -> dict:
@@ -31,7 +28,7 @@ def get_minimizer() -> dict:
 
 
 def detect_overridden(cls: Format, method: str) -> bool:
-    """Check whether a method is override
+    """Check whether a method is override.
 
     Parameters
     ----------
@@ -61,7 +58,9 @@ def get_cls_link(cls: object) -> str:
     str
         the link of a class
     """
-    return ":class:`%s <%s>`" % (cls.__name__, ".".join([cls.__module__, cls.__name__]))
+    return ":class:`{} <{}>`".format(
+        cls.__name__, ".".join([cls.__module__, cls.__name__])
+    )
 
 
 def check_supported(fmt: Format):
@@ -112,7 +111,7 @@ def check_supported(fmt: Format):
             writer.writerow(
                 {
                     "Class": get_cls_link(kk),
-                    "Alias": "\n".join(("``%s``" % vvv for vvv in vv)),
+                    "Alias": "\n".join("``%s``" % vvv for vvv in vv),
                     "Supported Functions": "\n".join(
                         method_links[mtd] for mtd in check_supported(kk)
                     ),
@@ -132,7 +131,7 @@ def check_supported(fmt: Format):
             writer.writerow(
                 {
                     "Class": get_cls_link(kk),
-                    "Alias": "\n".join(("``%s``" % vvv for vvv in vv)),
+                    "Alias": "\n".join("``%s``" % vvv for vvv in vv),
                 }
             )
 
@@ -149,6 +148,6 @@ def check_supported(fmt: Format):
             writer.writerow(
                 {
                     "Class": get_cls_link(kk),
-                    "Alias": "\n".join(("``%s``" % vvv for vvv in vv)),
+                    "Alias": "\n".join("``%s``" % vvv for vvv in vv),
                 }
             )
diff --git a/dpdata/__init__.py b/dpdata/__init__.py
@@ -9,11 +9,22 @@
 # BondOrder System has dependency on rdkit
 try:
     # prevent conflict with dpdata.rdkit
-    import rdkit as _
+    import rdkit as _  # noqa: F401
 
     USE_RDKIT = True
 except ModuleNotFoundError:
     USE_RDKIT = False
 
 if USE_RDKIT:
     from .bond_order_system import BondOrderSystem
+
+__all__ = [
+    "__version__",
+    "lammps",
+    "md",
+    "vasp",
+    "System",
+    "LabeledSystem",
+    "MultiSystems",
+    "BondOrderSystem",
+]
diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py
@@ -1,19 +1,14 @@
 import os
-import re
-import sys
 import warnings
-from ast import dump
 
 import numpy as np
 
 from .scf import (
     bohr2ang,
-    get_block,
     get_cell,
     get_coords,
     get_geometry_in,
     kbar2evperang3,
-    ry2ev,
 )
 
 # Read in geometries from an ABACUS MD trajectory.
@@ -72,69 +67,51 @@ def get_coords_from_dump(dumplines, natoms):
     for iline in range(nlines):
         if "MDSTEP" in dumplines[iline]:
             # read in LATTICE_CONSTANT
-            celldm = float(dumplines[iline + 1].split(" ")[-1])
+            # for abacus version >= v3.1.4, the unit is angstrom, and "ANGSTROM" is added at the end
+            # for abacus version <  v3.1.4, the unit is bohr
+            celldm = float(dumplines[iline + 1].split()[1])
+            newversion = True
+            if "Angstrom" not in dumplines[iline + 1]:
+                celldm *= bohr2ang  # transfer unit to ANGSTROM
+                newversion = False
+
             # read in LATTICE_VECTORS
             for ix in range(3):
                 cells[iframe, ix] = (
-                    np.array(
-                        [
-                            float(i)
-                            for i in re.split("\s+", dumplines[iline + 3 + ix])[-3:]
-                        ]
-                    )
+                    np.array([float(i) for i in dumplines[iline + 3 + ix].split()[0:3]])
                     * celldm
                 )
                 if calc_stress:
                     stresses[iframe, ix] = np.array(
-                        [
-                            float(i)
-                            for i in re.split("\s+", dumplines[iline + 7 + ix])[-3:]
-                        ]
+                        [float(i) for i in dumplines[iline + 7 + ix].split()[0:3]]
                     )
+
+            if calc_stress:
+                skipline = 11
+            else:
+                skipline = 7
+
             for iat in range(total_natoms):
-                if calc_stress:
-                    coords[iframe, iat] = (
-                        np.array(
-                            [
-                                float(i)
-                                for i in re.split("\s+", dumplines[iline + 11 + iat])[
-                                    -6:-3
-                                ]
-                            ]
-                        )
-                        * celldm
-                    )
-                    forces[iframe, iat] = np.array(
-                        [
-                            float(i)
-                            for i in re.split("\s+", dumplines[iline + 11 + iat])[-3:]
-                        ]
-                    )
-                else:
-                    coords[iframe, iat] = (
-                        np.array(
-                            [
-                                float(i)
-                                for i in re.split("\s+", dumplines[iline + 7 + iat])[
-                                    -6:-3
-                                ]
-                            ]
-                        )
-                        * celldm
-                    )
-                    forces[iframe, iat] = np.array(
-                        [
-                            float(i)
-                            for i in re.split("\s+", dumplines[iline + 7 + iat])[-3:]
-                        ]
-                    )
+                # INDEX    LABEL    POSITION (Angstrom)    FORCE (eV/Angstrom)    VELOCITY (Angstrom/fs)
+                # 0  Sn  0.000000000000  0.000000000000  0.000000000000  -0.000000000000  -0.000000000001  -0.000000000001  0.001244557166  -0.000346684288  0.000768457739
+                # 1  Sn  0.000000000000  3.102800034079  3.102800034079  -0.000186795145  -0.000453823768  -0.000453823768  0.000550996187  -0.000886442775  0.001579501983
+                # for abacus version >= v3.1.4, the value of POSITION is the real cartessian position, and unit is angstrom, and if cal_force the VELOCITY is added at the end.
+                # for abacus version < v3.1.4, the real position = POSITION * celldm
+                coords[iframe, iat] = np.array(
+                    [float(i) for i in dumplines[iline + skipline + iat].split()[2:5]]
+                )
+
+                if not newversion:
+                    coords[iframe, iat] *= celldm
+
+                forces[iframe, iat] = np.array(
+                    [float(i) for i in dumplines[iline + skipline + iat].split()[5:8]]
+                )
             iframe += 1
     assert iframe == nframes_dump, (
         "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump."
         % (iframe, nframes_dump)
     )
-    cells *= bohr2ang
-    coords *= bohr2ang
     stresses *= kbar2evperang3
     return coords, cells, forces, stresses
 
@@ -166,12 +143,12 @@ def get_frame(fname):
         path_in = os.path.join(fname, "INPUT")
     else:
         raise RuntimeError("invalid input")
-    with open(path_in, "r") as fp:
+    with open(path_in) as fp:
         inlines = fp.read().split("\n")
     geometry_path_in = get_geometry_in(fname, inlines)  # base dir of STRU
     path_out = get_path_out(fname, inlines)
 
-    with open(geometry_path_in, "r") as fp:
+    with open(geometry_path_in) as fp:
         geometry_inlines = fp.read().split("\n")
     celldm, cell = get_cell(geometry_inlines)
     atom_names, natoms, types, coords = get_coords(
@@ -182,11 +159,11 @@ def get_frame(fname):
     # ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0])
     # number of dumped geometry files
     # coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell)
-    with open(os.path.join(path_out, "MD_dump"), "r") as fp:
+    with open(os.path.join(path_out, "MD_dump")) as fp:
         dumplines = fp.read().split("\n")
     coords, cells, force, stress = get_coords_from_dump(dumplines, natoms)
     ndump = np.shape(coords)[0]
-    with open(os.path.join(path_out, "running_md.log"), "r") as fp:
+    with open(os.path.join(path_out, "running_md.log")) as fp:
         outlines = fp.read().split("\n")
     energy = get_energy(outlines, ndump, dump_freq)
 
@@ -201,7 +178,7 @@ def get_frame(fname):
             unconv_stru += "%d " % i
     ndump = len(energy)
     if unconv_stru != "":
-        warnings.warn(f"Structure %s are unconverged and not collected!" % unconv_stru)
+        warnings.warn("Structure %s are unconverged and not collected!" % unconv_stru)
 
     for iframe in range(ndump):
         stress[iframe] *= np.linalg.det(cells[iframe, :, :].reshape([3, 3]))

diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py
@@ -1,5 +1,4 @@
 import os
-import sys
 
 import numpy as np
 
@@ -16,19 +15,18 @@ def get_log_file(fname, inlines):
             suffix = line.split()[1]
         elif "calculation" in line and "calculation" == line.split()[0]:
             calculation = line.split()[1]
-    logf = os.path.join(fname, "OUT.%s/running_%s.log" % (suffix, calculation))
+    logf = os.path.join(fname, f"OUT.{suffix}/running_{calculation}.log")
     return logf
 
 
 def get_coords_from_log(loglines, natoms):
-    """
-    NOTICE: unit of coords and cells is Angstrom
+    """NOTICE: unit of coords and cells is Angstrom
     order:
         coordinate
         cell (no output if cell is not changed)
         energy (no output, if SCF is not converged)
         force (no output, if cal_force is not setted or abnormal ending)
-        stress (no output, if set cal_stress is not setted or abnormal ending)
+        stress (no output, if set cal_stress is not setted or abnormal ending).
     """
     natoms_log = 0
     for line in loglines:
@@ -175,10 +173,10 @@ def get_frame(fname):
         path_in = os.path.join(fname, "INPUT")
     else:
         raise RuntimeError("invalid input")
-    with open(path_in, "r") as fp:
+    with open(path_in) as fp:
         inlines = fp.read().split("\n")
     geometry_path_in = get_geometry_in(fname, inlines)  # base dir of STRU
-    with open(geometry_path_in, "r") as fp:
+    with open(geometry_path_in) as fp:
         geometry_inlines = fp.read().split("\n")
     celldm, cell = get_cell(geometry_inlines)
     atom_names, natoms, types, coord_tmp = get_coords(

diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py
@@ -1,6 +1,5 @@
 import os
 import re
-import sys
 
 import numpy as np
 
@@ -172,17 +171,17 @@ def get_frame(fname):
     if not CheckFile(path_in):
         return data
 
-    with open(path_in, "r") as fp:
+    with open(path_in) as fp:
         inlines = fp.read().split("\n")
 
     geometry_path_in = get_geometry_in(fname, inlines)
     path_out = get_path_out(fname, inlines)
     if not (CheckFile(geometry_path_in) and CheckFile(path_out)):
         return data
 
-    with open(geometry_path_in, "r") as fp:
+    with open(geometry_path_in) as fp:
         geometry_inlines = fp.read().split("\n")
-    with open(path_out, "r") as fp:
+    with open(path_out) as fp:
         outlines = fp.read().split("\n")
 
     celldm, cell = get_cell(geometry_inlines)
@@ -257,7 +256,7 @@ def get_nele_from_stru(geometry_inlines):
 
 def get_frame_from_stru(fname):
     assert type(fname) == str
-    with open(fname, "r") as fp:
+    with open(fname) as fp:
         geometry_inlines = fp.read().split("\n")
     nele = get_nele_from_stru(geometry_inlines)
     inlines = ["ntype %d" % nele]

diff --git a/dpdata/amber/mask.py b/dpdata/amber/mask.py
@@ -1,12 +1,12 @@
-"""Amber mask"""
+"""Amber mask."""
 try:
     import parmed
 except ImportError:
     pass
 
 
 def pick_by_amber_mask(param, maskstr, coords=None):
-    """Pick atoms by amber masks
+    """Pick atoms by amber masks.
 
     Parameters
     ----------

diff --git a/dpdata/amber/md.py b/dpdata/amber/md.py
@@ -30,7 +30,7 @@ def read_amber_traj(
     * mdfrc, NetCDF format, stores forces
     * mden (optional), text format, stores energies
     * mdout (optional), text format, may store energies if there is no mden_file
-    * parm7, text format, stores types
+    * parm7, text format, stores types.
 
     Parameters
     ----------
@@ -41,8 +41,9 @@ def read_amber_traj(
         instead of amber types. For example, a ligand will use C, H, O, N, and so on
         instead of h1, hc, o, os, and so on.
         IF use_element_symbols is str, it will be considered as Amber mask.
+    labeled : bool
+        Whether to return labeled data
     """
-
     flag_atom_type = False
     flag_atom_numb = False
     amber_types = []

diff --git a/dpdata/amber/sqm.py b/dpdata/amber/sqm.py
@@ -13,9 +13,7 @@
 
 
 def parse_sqm_out(fname):
-    """
-    Read atom symbols, charges and coordinates from ambertools sqm.out file
-    """
+    """Read atom symbols, charges and coordinates from ambertools sqm.out file."""
     atom_symbols = []
     coords = []
     charges = []