Skip to content

Commit

Permalink
Merge pull request #145 from Fxe/dev
Browse files Browse the repository at this point in the history
bumped to version 0.4.0
  • Loading branch information
Fxe authored Jul 9, 2024
2 parents 06462e1 + 311563e commit d7fce5a
Show file tree
Hide file tree
Showing 10 changed files with 346 additions and 27 deletions.
5 changes: 0 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,3 @@ The associated ModelSEED Database, which is required for a few packages, is simp
git clone https://github.com/ModelSEED/ModelSEEDDatabase.git

and the path to this repository is passed as an argument to the corresponding packages.

**Windows users** must separately install the ``pyeda`` module: 1) download the appropriate wheel for your Python version from `this website <https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda>`_ ; and 2) install the wheel through the following commands in a command prompt/powershell console::

cd path/to/pyeda/wheel
pip install pyeda_wheel_name.whl
2 changes: 1 addition & 1 deletion modelseedpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

__author__ = "Christopher Henry"
__email__ = "[email protected]"
__version__ = "0.3.3"
__version__ = "0.4.0"

logger = logging.getLogger(__name__)

Expand Down
50 changes: 50 additions & 0 deletions modelseedpy/biochem/modelseed_biochem.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,56 @@ def _load_metabolites(
return metabolites


def build_modelseed_reaction(
o, names, aliases, ec_numbers, metabolites_indexed, metabolites
):
if "id" in o and o["id"]:
rxn_names = set()
if o["id"] in names:
rxn_names |= names[o["id"]]
(
lower_bound,
upper_bound,
) = get_reaction_constraints_from_direction(o.get("reversibility"))
stoichiometry = o.get("stoichiometry")
reaction_metabolites = {}
for s in stoichiometry:
cmp_token = s["compartment"]
value = s["coefficient"]
cpd = metabolites[s["compound"]]
cpd_index_id = f"{cpd.id}_{cmp_token}"
if cpd_index_id not in metabolites_indexed:
cpd_token = cpd.copy()
cpd_token.id = f"{cpd.id}_{cmp_token}"
cpd_token.base_id = cpd.id
cpd_token.compartment = cmp_token
metabolites_indexed[cpd_index_id] = cpd_token
reaction_metabolites[metabolites_indexed[cpd_index_id]] = value
rxn = ModelSEEDReaction2(
o["id"],
o.get("name"),
"",
lower_bound,
upper_bound,
"",
rxn_names,
o.get("deltag"),
o.get("deltagerr"),
o.get("is_obsolete"),
None,
o.get("status"),
o.get("source"),
)
rxn.add_metabolites(reaction_metabolites)
if rxn.id in aliases:
rxn.annotation.update(aliases[rxn.id])
if rxn.id in ec_numbers:
rxn.annotation["ec-code"] = ec_numbers[rxn.id]
return rxn
else:
raise ValueError("unable to build reaction")


def _load_reactions(
database_path: str, metabolites: dict, aliases=None, names=None, ec_numbers=None
) -> (dict, dict):
Expand Down
6 changes: 5 additions & 1 deletion modelseedpy/biochem/modelseed_compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ def __init__(
self.flags |= set(flags)

def to_template_compartment_compound(self, compartment):
cpd_id = f"{self.seed_id}_{compartment}"
cpd_id = f"{self.seed_id}"
if compartment:
cpd_id += f"_{compartment}"
# build Template Compound
metabolite = MSTemplateMetabolite(
self.seed_id,
Expand All @@ -71,6 +73,8 @@ def to_template_compartment_compound(self, compartment):
self.abbr,
)
# build Template Compartment Compound
if compartment is None:
compartment = "x"
res = MSTemplateSpecies(cpd_id, self.charge, compartment, metabolite.id)

# assign Compound to Compartment Compound
Expand Down
5 changes: 4 additions & 1 deletion modelseedpy/biochem/modelseed_reaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,11 @@ def to_template_reaction(self, compartment_setup=None):
raise ValueError("invalid compartment setup")
from modelseedpy.core.msmodel import get_cmp_token

rxn_id = f"{self.id}"
reaction_compartment = get_cmp_token(compartment_setup.values())
rxn_id = f"{self.id}_{reaction_compartment}"
if reaction_compartment:
rxn_id += f"_{reaction_compartment}"

name = f"{self.name}"
metabolites = {}
for m, v in self.metabolites.items():
Expand Down
75 changes: 64 additions & 11 deletions modelseedpy/core/msgenome.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,21 @@
DEFAULT_SPLIT = " "


def to_fasta(features, filename, l=80, fn_header=None):
with open(filename, "w") as fh:
for feature in features:
h = f">{feature.id}\n"
if fn_header:
h = fn_header(feature)
fh.write(h)
lines = [
feature.seq[i : i + l] + "\n" for i in range(0, len(feature.seq), l)
]
for line in lines:
fh.write(line)
return filename


def normalize_role(s):
s = s.strip().lower()
s = re.sub(r"[\W_]+", "", s)
Expand All @@ -25,6 +40,17 @@ def read_fasta(f, split=DEFAULT_SPLIT, h_func=None):
return parse_fasta_str(fh.read(), split, h_func)


def read_fasta2(f, split=DEFAULT_SPLIT, h_func=None):
if f.endswith(".gz"):
import gzip

with gzip.open(f, "rb") as fh:
return extract_features(fh.read().decode("utf-8"), split, h_func)
else:
with open(f, "r") as fh:
return extract_features(fh.read(), split, h_func)


def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None):
features = []
seq = None
Expand Down Expand Up @@ -53,6 +79,37 @@ def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None):
return features


def extract_features(faa_str, split=DEFAULT_SPLIT, h_func=None):
features = []
active_seq = None
seq_lines = []
for line in faa_str.split("\n"):
if line.startswith(">"):
if active_seq is not None:
active_seq.seq = "".join(seq_lines)
features.append(active_seq)
seq_lines = []
seq_id = line[1:]
desc = None
if h_func:
seq_id, desc = h_func(seq_id)
elif split:
header_data = line[1:].split(split, 1)
seq_id = header_data[0]
if len(header_data) > 1:
desc = header_data[1]
active_seq = MSFeature(seq_id, "", desc)
else:
seq_lines.append(line.strip())

# add last sequence
if len(seq_lines) > 0:
active_seq.seq = "".join(seq_lines)
features.append(active_seq)

return features


class MSFeature:
def __init__(self, feature_id, sequence, description=None, aliases=None):
"""
Expand Down Expand Up @@ -110,18 +167,14 @@ def from_fasta(
genome.features += read_fasta(filename, split, h_func)
return genome

@staticmethod
def from_fasta2(filename, split=" ", h_func=None):
genome = MSGenome()
genome.features += read_fasta2(filename, split, h_func)
return genome

def to_fasta(self, filename, l=80, fn_header=None):
with open(filename, "w") as fh:
for feature in self.features:
h = f">{feature.id}\n"
if fn_header:
h = fn_header(feature)
fh.write(h)
lines = [
feature.seq[i : i + l] + "\n" for i in range(0, len(feature.seq), l)
]
for line in lines:
fh.write(line)
to_fasta(self.features, filename, l, fn_header)
return filename

@staticmethod
Expand Down
33 changes: 27 additions & 6 deletions modelseedpy/core/msmodel.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# -*- coding: utf-8 -*-
import logging
import re
from cobra.core import Model
from pyeda.inter import (
expr,
) # wheels must be specially downloaded and installed for Windows https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda
from sympy.logic.inference import satisfiable
from sympy import Symbol
import sympy.logic.boolalg as spl
from cobra.core import Model, GPR

# from pyeda.inter import expr

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -103,18 +105,37 @@ def get_cmp_token(compartments):
return None


def get_set_set(expr_str): # !!! this currently returns dictionaries, not sets??
def get_set_set_pyeda(expr_str: str, pyeda_expr):
if len(expr_str.strip()) == 0:
return {}
expr_str = expr_str.replace(" or ", " | ")
expr_str = expr_str.replace(" and ", " & ")
dnf = expr(expr_str).to_dnf()
dnf = pyeda_expr(expr_str).to_dnf()
if len(dnf.inputs) == 1 or dnf.NAME == "And":
return {frozenset({str(x) for x in dnf.inputs})}
else:
return {frozenset({str(x) for x in o.inputs}) for o in dnf.xs}


def get_set_set(expr_str: str):
if expr_str is None or len(expr_str.strip()) == 0:
return {}
gpr = GPR.from_string(expr_str)
expr = gpr.as_symbolic()
expr_model = list(satisfiable(expr, all_models=True))
dnf = spl.SOPform(tuple(gpr.genes), list(expr_model))
if type(dnf) == spl.And or type(dnf) == Symbol:
variable_set = set()
variable_set.add(frozenset({atom.name for atom in dnf.atoms()}))
return frozenset(variable_set)
elif type(dnf) == spl.Or:
return frozenset(
{frozenset({atom.name for atom in x.atoms()}) for x in dnf.args}
)
else:
raise ValueError(f"unable to decode {expr_str} found token of type {type(dnf)}")


class MSModel(Model):
def __init__(self, id_or_model=None, genome=None, template=None):
"""
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name="ModelSEEDpy",
version="0.3.3",
version="0.4.0",
description="Python package for building and analyzing models using ModelSEED",
long_description_content_type="text/x-rst",
long_description=readme,
Expand Down Expand Up @@ -40,7 +40,8 @@
"chemicals >= 1.0.13",
"chemw >= 0.3.2",
"matplotlib >= 3.0.0",
"pyeda",
"Jinja2 >= 3.1.4",
"sympy >=1.12.0",
],
tests_require=[
"pytest",
Expand Down
83 changes: 83 additions & 0 deletions tests/core/test_msmodel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
from modelseedpy.core.msmodel import *


def test_get_direction_from_constraints1():
res = get_direction_from_constraints(0, 1000)

assert res == ">"


def test_get_direction_from_constraints2():
res = get_direction_from_constraints(-1000, 0)

assert res == "<"


def test_get_direction_from_constraints3():
res = get_direction_from_constraints(-1000, 1000)

assert res == "="


def test_get_set_set1():
res = get_set_set("A")

assert len(res) == 1
assert {"A"} in res


def test_get_set_set2():
res = get_set_set("A and B")

assert len(res) == 1
assert {"A", "B"} in res


def test_get_set_set3():
res = get_set_set("A or B")

assert len(res) == 2
assert {"A"} in res
assert {"B"} in res


def test_get_set_set4():
res = get_set_set("A or B or C")

assert len(res) == 3
assert {"A"} in res
assert {"B"} in res
assert {"C"} in res


def test_get_set_set5():
res = get_set_set("A or B and C")

assert len(res) == 2
assert {"A"} in res
assert {"B", "C"} in res


def test_get_set_set6():
res = get_set_set("A and B or C")

assert len(res) == 2
assert {"A", "B"} in res
assert {"C"} in res


def test_get_set_set7():
res = get_set_set("(A or B) and C")

assert len(res) == 2
assert {"A", "C"} in res
assert {"B", "C"} in res


def test_get_set_set8():
res = get_set_set("A and (B or C)")

assert len(res) == 2
assert {"A", "B"} in res
assert {"A", "C"} in res
Loading

0 comments on commit d7fce5a

Please sign in to comment.