Skip to content

Commit

Permalink
Metanetx dictionaries update (#57)
Browse files Browse the repository at this point in the history
* chore: update metanetx dictionaries

* fix: back compatibility for python2 - pickle protocol

* Do not filter chem_prop by database
  • Loading branch information
sgalkina authored and phantomas1234 committed Aug 1, 2016
1 parent 6a9beb5 commit 393d1bc
Show file tree
Hide file tree
Showing 17 changed files with 69 additions and 40,364 deletions.
Binary file modified cameo/data/metanetx.pickle
Binary file not shown.
Binary file modified cameo/data/metanetx_chem_prop.pklz
Binary file not shown.
40,313 changes: 1 addition & 40,312 deletions cameo/models/universal_models/metanetx_universal_model_bigg.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def _extend_model(self, original_exchanges):

logger.info("Adding reactions from universal model to host model.")
new_reactions = list()
original_model_metabolites = [self.mapping.get(m.id[0:-2], m.id) for
original_model_metabolites = [self.mapping.get('bigg:' + m.id[0:-2], m.id) for
r in original_exchanges for m, coeff in six.iteritems(r.metabolites)
if len(r.metabolites) == 1 and coeff < 0 < r.upper_bound]

Expand Down
2 changes: 1 addition & 1 deletion cameo/strain_design/pathway_prediction/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def create_adapter_reactions(original_metabolites, database, mapping, compartmen

name = metabolite.id[0:-2]
try:
mapped_name = mapping[name]
mapped_name = mapping['bigg:' + name] # assuming that model uses BIGG ids
except KeyError:
continue
# print name, 'N/A'
Expand Down
Binary file modified data/metanetx/chem_prop.tsv.gz
Binary file not shown.
Binary file modified data/metanetx/chem_xref.tsv.gz
Binary file not shown.
Binary file modified data/metanetx/comp_prop.tsv.gz
Binary file not shown.
Binary file modified data/metanetx/comp_xref.tsv.gz
Binary file not shown.
Binary file modified data/metanetx/reac_prop.tsv.gz
Binary file not shown.
Binary file modified data/metanetx/reac_xref.tsv.gz
Binary file not shown.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ future>=0.15.2
lazy-object-proxy==1.2.0
IProgress==0.2
palettable>=2.1.1
requests>=2.10.0
107 changes: 61 additions & 46 deletions scripts/parse_metanetx.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@

import logging
import re
import gzip
import pickle
import sys

import requests
import optlang
from cobra.core.Formula import Formula
from cobra.io.json import save_json_model
Expand Down Expand Up @@ -119,20 +123,61 @@ def construct_universal_model(list_of_db_prefixes):
return model


def load_metanetx_files():
BASE_URL = 'http://www.metanetx.org/cgi-bin/mnxget/mnxref/{}.tsv'
for filename in ['chem_prop', 'chem_xref', 'reac_prop', 'reac_xref', 'comp_prop', 'comp_xref']:
response = requests.get(BASE_URL.format(filename))
filepath = '../data/metanetx/{}.tsv.gz'.format(filename)
compress_by_lines(response, filepath)


def compress_by_lines(response, filepath):
prev_line = next(response.iter_lines())
with gzip.open(filepath, 'wb') as f:
for line in response.iter_lines(decode_unicode=response.encoding):
if line.startswith('#'):
prev_line = line
continue
if prev_line:
f.write(str.encode(prev_line + '\n'))
prev_line = None
f.write(str.encode(line + '\n'))


def add_to_all_mapping(dataframe, mapping):
for other_id, mnx_id in dataframe[['XREF', 'MNX_ID']].values:
cleaned_key = _apply_sanitize_rules(
_apply_sanitize_rules(other_id, REVERSE_ID_SANITIZE_RULES_SIMPHENY),
ID_SANITIZE_RULES_TAB_COMPLETION)
mapping[cleaned_key] = mnx_id


def add_to_bigg_mapping(xref, bigg2mnx, mnx2bigg):
bigg_selection = xref[['bigg' in blub for blub in xref.XREF]]
sanitized_XREF = [
_apply_sanitize_rules(_apply_sanitize_rules(id, REVERSE_ID_SANITIZE_RULES_SIMPHENY),
ID_SANITIZE_RULES_TAB_COMPLETION) for id in bigg_selection.XREF]
bigg2mnx.update(dict(zip(sanitized_XREF, bigg_selection.MNX_ID)))
mnx2bigg.update(dict(zip(bigg_selection.MNX_ID, sanitized_XREF)))


if __name__ == '__main__':

import logging

logging.basicConfig(level='INFO')

if len(sys.argv) > 1 and sys.argv[1] == '--load':
load_metanetx_files()

# load metanetx data
chem_xref = read_table('../data/metanetx/chem_xref.tsv.gz', skiprows=124, compression='gzip')
chem_xref = read_table('../data/metanetx/chem_xref.tsv.gz', compression='gzip')
chem_xref.columns = [name.replace('#', '') for name in chem_xref.columns]
reac_xref = read_table('../data/metanetx/reac_xref.tsv.gz', skiprows=107, compression='gzip')
reac_xref = read_table('../data/metanetx/reac_xref.tsv.gz', compression='gzip')
reac_xref.columns = [name.replace('#', '') for name in reac_xref.columns]
reac_prop = read_table('../data/metanetx/reac_prop.tsv.gz', skiprows=107, compression='gzip', index_col=0)
reac_prop = read_table('../data/metanetx/reac_prop.tsv.gz', compression='gzip', index_col=0)
reac_prop.columns = [name.replace('#', '') for name in reac_prop.columns]
chem_prop = read_table('../data/metanetx/chem_prop.tsv.gz', skiprows=125, compression='gzip', index_col=0,
chem_prop = read_table('../data/metanetx/chem_prop.tsv.gz', compression='gzip', index_col=0,
names=['name', 'formula', 'charge', 'mass', 'InChI', 'SMILES', 'source'])

# replace NaN with None
Expand All @@ -141,41 +186,16 @@ def construct_universal_model(list_of_db_prefixes):
REVERSE_ID_SANITIZE_RULES_SIMPHENY = [(value, key) for key, value in ID_SANITIZE_RULES_SIMPHENY]

metanetx = dict()
metanetx['all2mnx'] = dict()
metanetx['bigg2mnx'] = dict()
metanetx['mnx2bigg'] = dict()
# Metabolites
bigg_selection = chem_xref[['bigg' in blub for blub in chem_xref.XREF]]
sanitized_XREF = [
_apply_sanitize_rules(_apply_sanitize_rules(id.replace('bigg:', ''), REVERSE_ID_SANITIZE_RULES_SIMPHENY),
ID_SANITIZE_RULES_TAB_COMPLETION) for id in bigg_selection.XREF]
bigg2mnx = dict(zip(sanitized_XREF, bigg_selection.MNX_ID))
mnx2bigg = dict(zip(bigg_selection.MNX_ID, sanitized_XREF))

# Reactions
bigg_selection = reac_xref[['bigg' in blub for blub in reac_xref.XREF]]
sanitized_XREF = [
_apply_sanitize_rules(_apply_sanitize_rules(id.replace('bigg:', ''), REVERSE_ID_SANITIZE_RULES_SIMPHENY),
ID_SANITIZE_RULES_TAB_COMPLETION) for id in bigg_selection.XREF]
bigg2mnx.update(dict(zip(sanitized_XREF, bigg_selection.MNX_ID)))
mnx2bigg.update(dict(zip(bigg_selection.MNX_ID, sanitized_XREF)))
for xref in [chem_xref, reac_xref]:
add_to_bigg_mapping(xref, metanetx['bigg2mnx'], metanetx['mnx2bigg'])
add_to_all_mapping(xref, metanetx['all2mnx'])

# put into final result dict
metanetx['bigg2mnx'] = bigg2mnx
metanetx['mnx2bigg'] = mnx2bigg

all2mnx = dict()
for other_id, mnx_id in chem_xref[['XREF', 'MNX_ID']].values:
cleaned_key = _apply_sanitize_rules(
_apply_sanitize_rules(other_id.split(':')[1], REVERSE_ID_SANITIZE_RULES_SIMPHENY),
ID_SANITIZE_RULES_TAB_COMPLETION)
all2mnx[cleaned_key] = mnx_id
for other_id, mnx_id in reac_xref[['XREF', 'MNX_ID']].values:
cleaned_key = _apply_sanitize_rules(
_apply_sanitize_rules(other_id.split(':')[1], REVERSE_ID_SANITIZE_RULES_SIMPHENY),
ID_SANITIZE_RULES_TAB_COMPLETION)
all2mnx[cleaned_key] = mnx_id

metanetx['all2mnx'] = all2mnx
# with open('../cameo/data/metanetx.pickle', 'wb') as f:
# pickle.dump(metanetx, f)
with open('../cameo/data/metanetx.pickle', 'wb') as f:
pickle.dump(metanetx, f, protocol=2)

# generate universal reaction models
db_combinations = [('bigg',), ('rhea',), ('bigg', 'rhea'), ('bigg', 'rhea', 'kegg'),
Expand All @@ -186,14 +206,9 @@ def construct_universal_model(list_of_db_prefixes):
from cobra.io.json import _REQUIRED_REACTION_ATTRIBUTES

_REQUIRED_REACTION_ATTRIBUTES.add('annotation')
# d_model = _to_dict(universal_model)
with open('../cameo/models/universal_models/{model_name}.json'.format(model_name=universal_model.id), 'w') as f:
save_json_model(universal_model, f)
# json.dump(d_model, f)
# save_json_model(universal_model, '../cameo/models/universal_models/{model_name}.json'.format(model_name=universal_model.id))
chem_prop_filtered = chem_prop[
[any([source.startswith(db) for db in ('bigg', 'rhea', 'kegg', 'brenda', 'chebi')]) for source in
chem_prop.source]]
chem_prop_filtered = chem_prop_filtered.dropna(subset=['name'])
# with gzip.open('../cameo/data/metanetx_chem_prop.pklz', 'wb') as f:
# pickle.dump(chem_prop_filtered, f)

chem_prop_filtered = chem_prop.dropna(subset=['name'])
with gzip.open('../cameo/data/metanetx_chem_prop.pklz', 'wb') as f:
pickle.dump(chem_prop_filtered, f, protocol=2)

0 comments on commit 393d1bc

Please sign in to comment.