first concentraton calculation implementation

FAIRChemistry · Mar 15, 2024 · 01e47ed · 01e47ed
1 parent 1bee8e8
commit 01e47ed
Show file tree

Hide file tree

Showing 6 changed files with 2,273 additions and 114 deletions.
diff --git a/chromatopy/core/analyte.py b/chromatopy/core/analyte.py
@@ -162,3 +162,15 @@ def add_to_peaks(
             params["id"] = id
         self.peaks.append(Peak(**params))
         return self.peaks[-1]
+
+    def get_peak_by_injection_time(self, injection_time: Datetime) -> Peak:
+        """
+        This method returns the peak with the given injection time
+
+        Args:
+            injection_time (Datetime): Injection time of the peak
+        """
+        for peak, peak_injection_time in zip(self.peaks, self.injection_times):
+            if injection_time == peak_injection_time:
+                return peak
+        return None
diff --git a/chromatopy/core/chromhandler.py b/chromatopy/core/chromhandler.py
@@ -1,6 +1,6 @@
+import numpy as np
 import sdRDM
 
-import numpy as np
 import warnings
 import pandas as pd
 import plotly.graph_objects as go
@@ -15,13 +15,13 @@
 from sdRDM.base.datatypes import Unit
 from sdRDM.tools.utils import elem2dict
 from datetime import datetime as Datetime
-from .standard import Standard
-from .chromatogram import Chromatogram
+from .signaltype import SignalType
 from .analyte import Analyte
+from .standard import Standard
 from .peak import Peak
-from .role import Role
 from .measurement import Measurement
-from .signaltype import SignalType
+from .role import Role
+from .chromatogram import Chromatogram
 from ..readers.abstractreader import AbstractReader
 
 
@@ -49,6 +49,12 @@ class ChromHandler(sdRDM.DataModel):
         tag="measurements",
         json_schema_extra=dict(multiple=True),
     )
+    _repo: Optional[str] = PrivateAttr(
+        default="https://github.com/FAIRChemistry/chromatopy"
+    )
+    _commit: Optional[str] = PrivateAttr(
+        default="10cacc0f6eea0feefa9a3bc7a4b4e90ee75bd03f"
+    )
     _raw_xml_data: Dict = PrivateAttr(default_factory=dict)
 
     @model_validator(mode="after")
@@ -69,7 +75,6 @@ def add_to_analytes(
         molecular_weight: Optional[float] = None,
         retention_time: Optional[float] = None,
         peaks: List[Peak] = ListPlus(),
-        injection_times: List[Datetime] = ListPlus(),
         concentrations: List[float] = ListPlus(),
         standard: Optional[Standard] = None,
         role: Optional[Role] = None,
@@ -85,7 +90,6 @@ def add_to_analytes(
             molecular_weight (): Molar weight of the molecule in g/mol. Defaults to None
             retention_time (): Approximated retention time of the molecule. Defaults to None
             peaks (): All peaks of the dataset, which are within the same retention time interval related to the molecule. Defaults to ListPlus()
-            injection_times (): Injection times of the molecule measured peaks. Defaults to ListPlus()
             concentrations (): Concentration of the molecule. Defaults to ListPlus()
             standard (): Standard, describing the signal-to-concentration relationship. Defaults to None
             role (): Role of the molecule in the experiment. Defaults to None
@@ -96,7 +100,6 @@ def add_to_analytes(
             "molecular_weight": molecular_weight,
             "retention_time": retention_time,
             "peaks": peaks,
-            "injection_times": injection_times,
             "concentrations": concentrations,
             "standard": standard,
             "role": role,
@@ -230,6 +233,7 @@ def _set_analyte(
             retention_time=retention_time,
             molecular_weight=molecular_weight,
             peaks=peaks,
+            injection_times=times,
             role=role,
         )
 
@@ -432,7 +436,6 @@ def calculate_concentrations(
                 for analyte in self.analytes
                 if analyte.role == Role.STANDARD.value
             ][0]
-            standard_areas = np.array([peak.area for peak in internal_standard.peaks])
 
         if not analytes:
             analytes = [
@@ -441,16 +444,63 @@ def calculate_concentrations(
                 if analyte.role == Role.ANALYTE.value
             ]
 
-        for analyte in analytes:
-            analyte_areas = np.array([peak.area for peak in analyte.peaks])
-            analyte_concs = (
-                analyte_areas
-                / standard_areas
-                / analyte.standard.factor
-                * internal_standard.molecular_weight
-            )
+        entries = []
+
+        for peak, injection_time in zip(
+            internal_standard.peaks, internal_standard.injection_times
+        ):
+            standard_area = peak.area
+
+            for analyte in analytes:
+                analyte_peak = analyte.get_peak_by_injection_time(injection_time)
+                if not analyte_peak:
+                    continue
+
+                # print(
+                #     analyte.name,
+                #     analyte_area,
+                #     standard_area,
+                #     analyte.standard.factor,
+                #     internal_standard.molecular_weight,
+                # )
+                analyte_conc = (
+                    analyte_peak.area
+                    / standard_area
+                    / analyte.standard.factor
+                    * internal_standard.molecular_weight
+                )
+                analyte.concentrations.append(analyte_conc)
+
+                entries.append(
+                    {
+                        "analyte": analyte.name,
+                        "injection_time": injection_time,
+                        "concentration": analyte_conc,
+                    }
+                )
+                # print(
+                #     f"Concentration of {analyte.name} at {injection_time} is {analyte_conc:.2f}"
+                # )
+
+        df = pd.DataFrame(entries)
+        df = df.pivot_table(
+            index="injection_time",
+            columns="analyte",
+            values="concentration",
+            aggfunc="first",
+        )
+        df.reset_index(inplace=True)
+        df.columns.name = None
+
+        # df.drop("analyte", axis=1, inplace=True)
+
+        df["injection_time"] = pd.to_datetime(df["injection_time"])
 
-        return analyte_concs
+        df["relative_time"] = (
+            df["injection_time"] - df["injection_time"].iloc[0]
+        ).dt.total_seconds()
+
+        return df
 
     @staticmethod
     def _sample_colorscale(size: int, plotly_scale: str) -> List[str]:
@@ -467,3 +517,89 @@ def injection_times(self):
             for measurement in self.measurements
         ]
         return relative_times
+
+    def visualize_concentrations(self, analytes: List[Analyte] = None):
+
+        if analytes is None:
+            analytes = [
+                analyte
+                for analyte in self.analytes
+                if analyte.role == Role.ANALYTE.value
+            ]
+
+        fig = go.Figure()
+
+        for analyte in analytes:
+            fig.add_trace(
+                go.Scatter(
+                    x=analyte.injection_times,
+                    y=analyte.concentrations,
+                    # mode is lines and markers
+                    mode="lines+markers",
+                    name=analyte.name,
+                    hovertemplate=(
+                        "<br>Time: %{x}<br>Concentration:"
+                        " %{y:.2f} mmol/l<extra></extra>"
+                    ),
+                )
+            )
+
+        fig.update_xaxes(title_text="Time")
+
+        fig.update_yaxes(title_text="Concentration / mmol l<sup>-1<sup>")
+
+        return fig
+
+    def concentration_to_df(self, analytes: List[Analyte] = None):
+        if analytes is None:
+            analytes = [
+                analyte
+                for analyte in self.analytes
+                if analyte.role == Role.ANALYTE.value
+            ]
+
+        data = []
+        for analyte in analytes:
+            for injection_time, concentration in zip(
+                analyte.injection_times, analyte.concentrations
+            ):
+                data.append(
+                    {
+                        "analyte": analyte.name,
+                        "injection_time": injection_time,
+                        "concentration": concentration,
+                    }
+                )
+
+        # Create DataFrame
+        df = pd.DataFrame(data)
+
+        df["injection_time"] = pd.to_datetime(df["injection_time"])
+
+        # Pivot the DataFrame
+        df = df.pivot_table(
+            index="injection_time",
+            columns="analyte",
+            values="concentration",
+            aggfunc="first",
+        )
+
+        earliest_time = df.index.min()
+
+        df["relative_time"] = (
+            pd.Series(df.index)
+            .apply(lambda x: (x - earliest_time).total_seconds())
+            .values
+        )
+
+        df.set_index("relative_time", inplace=True)
+        df.columns.name = None
+        df.rename_axis("relative time [s]", inplace=True)
+        df.columns = [col + f" [mmol/l]" for col in df.columns]
+
+        # Your pivoted DataFrame now has a 'relative_time' column
+        return df
+
+    def concentrations_to_csv(self, path: str, analytes: List[Analyte] = None):
+        df = self.concentration_to_df(analytes)
+        df.to_csv(path)
diff --git a/examples/chemstation-example/concentrations.csv b/examples/chemstation-example/concentrations.csv
@@ -0,0 +1,39 @@
+relative time [s],product1 [mmol/l],product2 [mmol/l],starting material [mmol/l]
+0.0,202.66416972673377,,243.12639735883374
+902.0,225.40554677457018,,
+1802.0,250.34284593982497,,
+2705.0,270.0864313522914,,
+3604.0,274.6892101577222,,
+4506.0,277.4322620817434,,
+5406.0,287.6810927758205,,
+6310.0,502.55777821027186,93.62544256282386,
+7210.0,481.56827369579895,85.91052677980255,
+8112.0,496.7627275943982,86.57414800482213,
+9012.0,513.1432875614132,88.93505150296286,
+9914.0,485.21996267030784,81.14543213194105,
+10816.0,495.7140925116171,82.89146784435722,0.19171335727087865
+11717.0,508.7693527228321,85.68341346912987,
+12619.0,492.67524677626915,81.54878387806144,
+13521.0,448.77797745326046,57.93355182988204,0.16965208072463986
+14423.0,481.8871666043568,66.52228610447898,0.15481880299315753
+15325.0,486.5425865964661,68.01299979822002,0.14331223868166726
+16227.0,522.3918175888001,88.51281346291348,
+17131.0,387.0493217696806,45.94911495081903,0.12052367683395629
+18032.0,487.547261381932,80.87368174312695,
+18935.0,516.076930321944,87.63130520478262,
+19836.0,506.08023741243346,83.51928778256533,
+20736.0,520.3077096665074,88.92584660380776,
+21638.0,519.6296225248918,89.17701050793264,
+22540.0,519.120261656428,88.92877261593614,
+23443.0,514.5684633693365,88.20260091628352,
+24346.0,512.9346514299281,87.80444266651183,
+25247.0,505.9977199057837,86.47485849766315,
+26149.0,488.6843727039107,83.17891080601265,
+27051.0,502.023964199595,86.04283341183167,
+27954.0,492.6501740156666,84.94656830991927,
+28858.0,488.56008552505597,83.47319074265826,5.365613280345353
+29759.0,374.119817928048,65.67764866610909,149.9021443710986
+30663.0,212.56040674444355,43.911049493478686,331.36628307400656
+31564.0,143.49156835512696,25.374150729523315,422.24351812367195
+32468.0,80.47194779336878,20.303541578534723,505.1638201790611
+33367.0,66.11367115851941,17.72073447068817,539.391165949943