Merge pull request #287 from MC-kit/devel

fix saving nuclides
MC-kit · Dec 26, 2023 · 2a26d06 · 2a26d06
2 parents 5b45c28 + d926905
commit 2a26d06
Show file tree

Hide file tree

Showing 7 changed files with 93 additions and 35 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "xpypact"
-version = "0.5.4"
+version = "0.5.5"
 description = "\"Python workflow framework for FISPACT.\""
 authors = ["dvp <[email protected]>"]
 license = "MIT"

diff --git a/src/xpypact/dao/api.py b/src/xpypact/dao/api.py
@@ -40,7 +40,7 @@ def drop_schema(self) -> None:
         """Drop our DB objects."""
 
     @abstractmethod
-    def save(self, inventory: Inventory, material_id: int = 1, case_id: str = "") -> None:
+    def save(self, inventory: Inventory, material_id: int = 1, case_id: int = 1) -> None:
         """Save xpypact inventory to database.
 
         Args:
@@ -49,6 +49,13 @@ def save(self, inventory: Inventory, material_id: int = 1, case_id: str = "") ->
             case_id: second additional key
         """
 
+    @abstractmethod
+    def on_save_complete(self) -> None:
+        """Execute on saving all the innventories.
+
+        Save information that is to be saved after multithreading processing.
+        """
+
     @abstractmethod
     def load_rundata(self) -> pd.DataFrame:
         """Load FISPACT run data as table.

diff --git a/src/xpypact/dao/duckdb/create_schema.sql b/src/xpypact/dao/duckdb/create_schema.sql
@@ -39,14 +39,15 @@ create table timestep (
 
 
 create table nuclide (
+    zai uinteger not null check (10010 <= zai) primary key,
     element varchar(2) not null,
     mass_number usmallint not null check (0 < mass_number),
     state varchar(1) not null,
-    zai uinteger not null check (10010 <= zai) unique,
-    half_life real not null check (0 <= half_life),
-    primary key (element, mass_number, state)
+    half_life real not null check (0 <= half_life)
 );
 
+create unique index nuclide_ems on nuclide (element, mass_number, state);
+
 create table timestep_nuclide (
     material_id uinteger not null,
     case_id uinteger not null,
@@ -70,8 +71,7 @@ create table timestep_nuclide (
     inhalation real not null,
 
     primary key (material_id, case_id, time_step_number, zai),
-    foreign key (material_id, case_id, time_step_number) references timestep (material_id, case_id, time_step_number),
-    foreign key (zai) references nuclide (zai)
+    foreign key (material_id, case_id, time_step_number) references timestep (material_id, case_id, time_step_number)
 );
 
 create table gbins (

diff --git a/src/xpypact/dao/duckdb/implementation.py b/src/xpypact/dao/duckdb/implementation.py
@@ -3,7 +3,9 @@
 
 from typing import TYPE_CHECKING
 
-from dataclasses import dataclass
+import threading
+
+from dataclasses import dataclass, field
 from pathlib import Path
 
 import numpy as np
@@ -16,7 +18,7 @@
     import duckdb as db
     import pandas as pd
 
-    from xpypact.inventory import Inventory
+    from xpypact.inventory import Inventory, NuclideInfo
 
 HERE = Path(__file__).parent
 
@@ -34,6 +36,8 @@ class DuckDBDAO(DataAccessInterface):
     """Implementation of DataAccessInterface for DuckDB."""
 
     con: db.DuckDBPyConnection
+    nuclides: set[NuclideInfo] = field(default_factory=set)
+    nuclides_lock: threading.RLock = field(default_factory=threading.RLock)
 
     def get_tables_info(self) -> pd.DataFrame:
         """Get information on tables in schema."""
@@ -81,9 +85,11 @@ def drop_schema(self) -> None:
         for table in tables:
             self.con.execute(f"drop table if exists {table}")
 
-    def save(self, inventory: Inventory, material_id=1, case_id=1) -> None:
+    def save(self, inventory: Inventory, material_id: int = 1, case_id: int = 1) -> None:
         """Save xpypact dataset to database.
 
+        This can be used in multithreading mode.
+
         Args:
             inventory: xpypact dataset to save
             material_id: additional key to distinguish multiple FISPACT run
@@ -93,10 +99,29 @@ def save(self, inventory: Inventory, material_id=1, case_id=1) -> None:
         # https://duckdb.org/docs/api/python/dbapi
         cursor = self.con.cursor()
         _save_run_data(cursor, inventory, material_id, case_id)
-        _save_nuclides(cursor, inventory)
         _save_time_steps(cursor, inventory, material_id, case_id)
         _save_time_step_nuclides(cursor, inventory, material_id, case_id)
         _save_gamma(cursor, inventory, material_id, case_id)
+        # accumulate nuclides for saving when multithreading is done
+        with self.nuclides_lock:
+            self.nuclides.update(inventory.extract_nuclides())
+
+    def on_save_complete(self) -> None:
+        """Save information accumulated on multithreading processing all the inventories."""
+        self.save_nuclides()
+
+    def save_nuclides(self) -> None:
+        """Save nuclides on multithreading saving is complete.
+
+        Call this when all the inventories are saved.
+        """
+        sql = """
+            insert or ignore
+            into nuclide
+            values (?,?,?,?,?)
+            ;
+        """
+        self.con.executemany(sql, (ms.structs.astuple(x) for x in self.nuclides))
 
     def load_rundata(self) -> db.DuckDBPyRelation:
         """Load FISPACT run data as table.
@@ -177,14 +202,6 @@ def _save_run_data(
 
 
 # noinspection SqlNoDataSourceInspection
-def _save_nuclides(cursor: db.DuckDBPyConnection, inventory: Inventory):
-    nuclides = inventory.extract_nuclides()
-    sql = """
-        insert or ignore
-        into nuclide
-        values (?,?,?,?,?)
-    """
-    cursor.executemany(sql, (ms.structs.astuple(x) for x in nuclides))
 
 
 # noinspection SqlNoDataSourceInspection

diff --git a/src/xpypact/nuclide.py b/src/xpypact/nuclide.py
@@ -16,7 +16,13 @@
 FLOAT_ZERO = 0.0
 
 
-class NuclideInfo(ms.Struct, frozen=True, gc=False):
+class _NuclideID(ms.Struct, order=True, frozen=True, gc=False):
+    """The class organizes NuclideInfo equality and ordering on zai."""
+
+    zai: int
+
+
+class NuclideInfo(_NuclideID, frozen=True, gc=False):
     """Basic information on a nuclide.
 
     This is extracted as a separate database entity to improve normalization.
@@ -25,7 +31,6 @@ class NuclideInfo(ms.Struct, frozen=True, gc=False):
     element: str
     isotope: int
     state: str = ""
-    zai: int = 0
     half_life: float = 0.0
 
 
@@ -53,13 +58,16 @@ class Nuclide(ms.Struct):  # pylint: disable=too-many-instance-attributes
 
     def __post_init__(self) -> None:
         """Make the values consistent in data from old FISPACT."""
-        _z = z(self.element)
-        if self.zai == 0:
-            self.zai = _z * 10000 + self.isotope * 10
-            if self.state:
-                self.zai += 1
-        if self.atoms == FLOAT_ZERO and self.grams > FLOAT_ZERO:
-            self.atoms = Avogadro * self.grams / get_nuclide_mass(_z, self.isotope)
+        if (
+            self.zai == 0 or self.atoms == FLOAT_ZERO and self.grams > FLOAT_ZERO
+        ):  # pragma: no cover
+            _z = z(self.element)
+            if self.zai == 0:
+                self.zai = _z * 10000 + self.isotope * 10
+                if self.state:
+                    self.zai += 1
+            if self.atoms == FLOAT_ZERO and self.grams > FLOAT_ZERO:
+                self.atoms = Avogadro * self.grams / get_nuclide_mass(_z, self.isotope)
 
     @property
     def a(self) -> int:
@@ -77,4 +85,4 @@ def info(self) -> NuclideInfo:
         Returns:
             element, a, state, zai, half_life
         """
-        return NuclideInfo(self.element, self.a, self.state, self.zai, self.half_life)
+        return NuclideInfo(self.zai, self.element, self.a, self.state, self.half_life)
diff --git a/tests/test_duckdb_dao.py b/tests/test_duckdb_dao.py
@@ -39,18 +39,20 @@ def test_save(inventory_with_gamma) -> None:
     with closing(connect()) as con:
         dao = DataAccessObject(con)
         dao.create_schema()
-        dao.save(inventory_with_gamma)
-        run_data = dao.load_rundata().df()
-        assert run_data["timestamp"].item() == pd.Timestamp("2022-02-21 01:52:45")
-        assert run_data["run_name"].item() == "* Material Cu, fluxes 104_2_1_1"
+        dao.save(inventory_with_gamma, material_id=1, case_id=1)
+        dao.save(inventory_with_gamma, material_id=2, case_id=1)
+        dao.on_save_complete()
+        run_data = dao.load_rundata().df().loc[0]
+        assert run_data["timestamp"] == pd.Timestamp("2022-02-21 01:52:45")
+        assert run_data["run_name"] == "* Material Cu, fluxes 104_2_1_1"
         nuclides = dao.load_nuclides().df()
         nuclides = nuclides.set_index(["element", "mass_number", "state"])
         assert not nuclides.loc["Cu"].empty
         time_steps = dao.load_time_steps().df()
         assert not time_steps.empty
         time_steps = time_steps.set_index("time_step_number")
         assert not time_steps.loc[2].empty
-        time_step_nuclides = dao.load_time_step_nuclides().df()
+        time_step_nuclides = dao.load_time_step_nuclides().filter("material_id=1").df()
         assert not time_step_nuclides.empty
         time_step_nuclides = time_step_nuclides.set_index(
             [
@@ -61,7 +63,7 @@ def test_save(inventory_with_gamma) -> None:
         assert not time_step_nuclides.loc[2, 290630].empty
         gbins = dao.load_gbins().df().set_index("g")
         assert gbins.loc[0].boundary == pytest.approx(1e-11)
-        gamma = dao.load_gamma().df()
+        gamma = dao.load_gamma().filter("material_id=1").df()
         assert not gamma.empty
         gamma = gamma.set_index(["time_step_number", "g"])
         assert not gamma.loc[2, 1].empty

diff --git a/tests/test_nuclide.py b/tests/test_nuclide.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+import pytest
+
+from xpypact.nuclide import Nuclide
+
+
+@pytest.mark.parametrize(
+    "a,b,eq,order",
+    [
+        (("H", 1, "", 10010), ("H", 1, "", 10010), True, False),
+        (("H", 1, "", 10010), ("H", 2, "", 10020), False, True),
+    ],
+)
+def test_equality_and_comparison(
+    a: tuple[str, int, str, int],
+    b: tuple[str, int, str, int],
+    eq: bool,  # noqa: FBT001
+    order: bool,  # noqa: FBT001
+) -> None:
+    _a = Nuclide(*a).info
+    _b = Nuclide(*b).info
+    assert eq == (_a == _b)
+    assert order == (_a < _b)