Merge pull request #145 from Fxe/dev

bumped to version 0.4.0
ModelSEED · Jul 9, 2024 · d7fce5a · d7fce5a
2 parents 06462e1 + 311563e
commit d7fce5a
Show file tree

Hide file tree

Showing 10 changed files with 346 additions and 27 deletions.
diff --git a/README.rst b/README.rst
@@ -51,8 +51,3 @@ The associated ModelSEED Database, which is required for a few packages, is simp
  git clone https://github.com/ModelSEED/ModelSEEDDatabase.git
 
 and the path to this repository is passed as an argument to the corresponding packages.
-
-**Windows users** must separately install the ``pyeda`` module: 1) download the appropriate wheel for your Python version from `this website <https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda>`_ ; and 2) install the wheel through the following commands in a command prompt/powershell console::
-
- cd path/to/pyeda/wheel
- pip install pyeda_wheel_name.whl
diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py
@@ -14,7 +14,7 @@
 
 __author__ = "Christopher Henry"
 __email__ = "[email protected]"
-__version__ = "0.3.3"
+__version__ = "0.4.0"
 
 logger = logging.getLogger(__name__)
 

diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py
@@ -250,6 +250,56 @@ def _load_metabolites(
     return metabolites
 
 
+def build_modelseed_reaction(
+    o, names, aliases, ec_numbers, metabolites_indexed, metabolites
+):
+    if "id" in o and o["id"]:
+        rxn_names = set()
+        if o["id"] in names:
+            rxn_names |= names[o["id"]]
+        (
+            lower_bound,
+            upper_bound,
+        ) = get_reaction_constraints_from_direction(o.get("reversibility"))
+        stoichiometry = o.get("stoichiometry")
+        reaction_metabolites = {}
+        for s in stoichiometry:
+            cmp_token = s["compartment"]
+            value = s["coefficient"]
+            cpd = metabolites[s["compound"]]
+            cpd_index_id = f"{cpd.id}_{cmp_token}"
+            if cpd_index_id not in metabolites_indexed:
+                cpd_token = cpd.copy()
+                cpd_token.id = f"{cpd.id}_{cmp_token}"
+                cpd_token.base_id = cpd.id
+                cpd_token.compartment = cmp_token
+                metabolites_indexed[cpd_index_id] = cpd_token
+            reaction_metabolites[metabolites_indexed[cpd_index_id]] = value
+        rxn = ModelSEEDReaction2(
+            o["id"],
+            o.get("name"),
+            "",
+            lower_bound,
+            upper_bound,
+            "",
+            rxn_names,
+            o.get("deltag"),
+            o.get("deltagerr"),
+            o.get("is_obsolete"),
+            None,
+            o.get("status"),
+            o.get("source"),
+        )
+        rxn.add_metabolites(reaction_metabolites)
+        if rxn.id in aliases:
+            rxn.annotation.update(aliases[rxn.id])
+        if rxn.id in ec_numbers:
+            rxn.annotation["ec-code"] = ec_numbers[rxn.id]
+        return rxn
+    else:
+        raise ValueError("unable to build reaction")
+
+
 def _load_reactions(
     database_path: str, metabolites: dict, aliases=None, names=None, ec_numbers=None
 ) -> (dict, dict):

diff --git a/modelseedpy/biochem/modelseed_compound.py b/modelseedpy/biochem/modelseed_compound.py
@@ -57,7 +57,9 @@ def __init__(
             self.flags |= set(flags)
 
     def to_template_compartment_compound(self, compartment):
-        cpd_id = f"{self.seed_id}_{compartment}"
+        cpd_id = f"{self.seed_id}"
+        if compartment:
+            cpd_id += f"_{compartment}"
         # build Template Compound
         metabolite = MSTemplateMetabolite(
             self.seed_id,
@@ -71,6 +73,8 @@ def to_template_compartment_compound(self, compartment):
             self.abbr,
         )
         # build Template Compartment Compound
+        if compartment is None:
+            compartment = "x"
         res = MSTemplateSpecies(cpd_id, self.charge, compartment, metabolite.id)
 
         # assign Compound to Compartment Compound

diff --git a/modelseedpy/biochem/modelseed_reaction.py b/modelseedpy/biochem/modelseed_reaction.py
@@ -174,8 +174,11 @@ def to_template_reaction(self, compartment_setup=None):
             raise ValueError("invalid compartment setup")
         from modelseedpy.core.msmodel import get_cmp_token
 
+        rxn_id = f"{self.id}"
         reaction_compartment = get_cmp_token(compartment_setup.values())
-        rxn_id = f"{self.id}_{reaction_compartment}"
+        if reaction_compartment:
+            rxn_id += f"_{reaction_compartment}"
+
         name = f"{self.name}"
         metabolites = {}
         for m, v in self.metabolites.items():

diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py
@@ -8,6 +8,21 @@
 DEFAULT_SPLIT = " "
 
 
+def to_fasta(features, filename, l=80, fn_header=None):
+    with open(filename, "w") as fh:
+        for feature in features:
+            h = f">{feature.id}\n"
+            if fn_header:
+                h = fn_header(feature)
+            fh.write(h)
+            lines = [
+                feature.seq[i : i + l] + "\n" for i in range(0, len(feature.seq), l)
+            ]
+            for line in lines:
+                fh.write(line)
+    return filename
+
+
 def normalize_role(s):
     s = s.strip().lower()
     s = re.sub(r"[\W_]+", "", s)
@@ -25,6 +40,17 @@ def read_fasta(f, split=DEFAULT_SPLIT, h_func=None):
             return parse_fasta_str(fh.read(), split, h_func)
 
 
+def read_fasta2(f, split=DEFAULT_SPLIT, h_func=None):
+    if f.endswith(".gz"):
+        import gzip
+
+        with gzip.open(f, "rb") as fh:
+            return extract_features(fh.read().decode("utf-8"), split, h_func)
+    else:
+        with open(f, "r") as fh:
+            return extract_features(fh.read(), split, h_func)
+
+
 def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None):
     features = []
     seq = None
@@ -53,6 +79,37 @@ def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None):
     return features
 
 
+def extract_features(faa_str, split=DEFAULT_SPLIT, h_func=None):
+    features = []
+    active_seq = None
+    seq_lines = []
+    for line in faa_str.split("\n"):
+        if line.startswith(">"):
+            if active_seq is not None:
+                active_seq.seq = "".join(seq_lines)
+                features.append(active_seq)
+                seq_lines = []
+            seq_id = line[1:]
+            desc = None
+            if h_func:
+                seq_id, desc = h_func(seq_id)
+            elif split:
+                header_data = line[1:].split(split, 1)
+                seq_id = header_data[0]
+                if len(header_data) > 1:
+                    desc = header_data[1]
+            active_seq = MSFeature(seq_id, "", desc)
+        else:
+            seq_lines.append(line.strip())
+
+    # add last sequence
+    if len(seq_lines) > 0:
+        active_seq.seq = "".join(seq_lines)
+        features.append(active_seq)
+
+    return features
+
+
 class MSFeature:
     def __init__(self, feature_id, sequence, description=None, aliases=None):
         """
@@ -110,18 +167,14 @@ def from_fasta(
         genome.features += read_fasta(filename, split, h_func)
         return genome
 
+    @staticmethod
+    def from_fasta2(filename, split=" ", h_func=None):
+        genome = MSGenome()
+        genome.features += read_fasta2(filename, split, h_func)
+        return genome
+
     def to_fasta(self, filename, l=80, fn_header=None):
-        with open(filename, "w") as fh:
-            for feature in self.features:
-                h = f">{feature.id}\n"
-                if fn_header:
-                    h = fn_header(feature)
-                fh.write(h)
-                lines = [
-                    feature.seq[i : i + l] + "\n" for i in range(0, len(feature.seq), l)
-                ]
-                for line in lines:
-                    fh.write(line)
+        to_fasta(self.features, filename, l, fn_header)
         return filename
 
     @staticmethod

diff --git a/modelseedpy/core/msmodel.py b/modelseedpy/core/msmodel.py
@@ -1,10 +1,12 @@
 # -*- coding: utf-8 -*-
 import logging
 import re
-from cobra.core import Model
-from pyeda.inter import (
-    expr,
-)  # wheels must be specially downloaded and installed for Windows https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda
+from sympy.logic.inference import satisfiable
+from sympy import Symbol
+import sympy.logic.boolalg as spl
+from cobra.core import Model, GPR
+
+# from pyeda.inter import expr
 
 logger = logging.getLogger(__name__)
 
@@ -103,18 +105,37 @@ def get_cmp_token(compartments):
     return None
 
 
-def get_set_set(expr_str):  # !!! this currently returns dictionaries, not sets??
+def get_set_set_pyeda(expr_str: str, pyeda_expr):
     if len(expr_str.strip()) == 0:
         return {}
     expr_str = expr_str.replace(" or ", " | ")
     expr_str = expr_str.replace(" and ", " & ")
-    dnf = expr(expr_str).to_dnf()
+    dnf = pyeda_expr(expr_str).to_dnf()
     if len(dnf.inputs) == 1 or dnf.NAME == "And":
         return {frozenset({str(x) for x in dnf.inputs})}
     else:
         return {frozenset({str(x) for x in o.inputs}) for o in dnf.xs}
 
 
+def get_set_set(expr_str: str):
+    if expr_str is None or len(expr_str.strip()) == 0:
+        return {}
+    gpr = GPR.from_string(expr_str)
+    expr = gpr.as_symbolic()
+    expr_model = list(satisfiable(expr, all_models=True))
+    dnf = spl.SOPform(tuple(gpr.genes), list(expr_model))
+    if type(dnf) == spl.And or type(dnf) == Symbol:
+        variable_set = set()
+        variable_set.add(frozenset({atom.name for atom in dnf.atoms()}))
+        return frozenset(variable_set)
+    elif type(dnf) == spl.Or:
+        return frozenset(
+            {frozenset({atom.name for atom in x.atoms()}) for x in dnf.args}
+        )
+    else:
+        raise ValueError(f"unable to decode {expr_str} found token of type {type(dnf)}")
+
+
 class MSModel(Model):
     def __init__(self, id_or_model=None, genome=None, template=None):
         """

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 
 setup(
     name="ModelSEEDpy",
-    version="0.3.3",
+    version="0.4.0",
     description="Python package for building and analyzing models using ModelSEED",
     long_description_content_type="text/x-rst",
     long_description=readme,
@@ -40,7 +40,8 @@
         "chemicals >= 1.0.13",
         "chemw >= 0.3.2",
         "matplotlib >= 3.0.0",
-        "pyeda",
+        "Jinja2 >= 3.1.4",
+        "sympy >=1.12.0",
     ],
     tests_require=[
         "pytest",

diff --git a/tests/core/test_msmodel.py b/tests/core/test_msmodel.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+from modelseedpy.core.msmodel import *
+
+
+def test_get_direction_from_constraints1():
+    res = get_direction_from_constraints(0, 1000)
+
+    assert res == ">"
+
+
+def test_get_direction_from_constraints2():
+    res = get_direction_from_constraints(-1000, 0)
+
+    assert res == "<"
+
+
+def test_get_direction_from_constraints3():
+    res = get_direction_from_constraints(-1000, 1000)
+
+    assert res == "="
+
+
+def test_get_set_set1():
+    res = get_set_set("A")
+
+    assert len(res) == 1
+    assert {"A"} in res
+
+
+def test_get_set_set2():
+    res = get_set_set("A and B")
+
+    assert len(res) == 1
+    assert {"A", "B"} in res
+
+
+def test_get_set_set3():
+    res = get_set_set("A or B")
+
+    assert len(res) == 2
+    assert {"A"} in res
+    assert {"B"} in res
+
+
+def test_get_set_set4():
+    res = get_set_set("A or B or C")
+
+    assert len(res) == 3
+    assert {"A"} in res
+    assert {"B"} in res
+    assert {"C"} in res
+
+
+def test_get_set_set5():
+    res = get_set_set("A or B and C")
+
+    assert len(res) == 2
+    assert {"A"} in res
+    assert {"B", "C"} in res
+
+
+def test_get_set_set6():
+    res = get_set_set("A and B or C")
+
+    assert len(res) == 2
+    assert {"A", "B"} in res
+    assert {"C"} in res
+
+
+def test_get_set_set7():
+    res = get_set_set("(A or B) and C")
+
+    assert len(res) == 2
+    assert {"A", "C"} in res
+    assert {"B", "C"} in res
+
+
+def test_get_set_set8():
+    res = get_set_set("A and (B or C)")
+
+    assert len(res) == 2
+    assert {"A", "B"} in res
+    assert {"A", "C"} in res