diff --git a/.github/workflows/publish-to-test-pypi.yml b/.github/workflows/publish-to-test-pypi.yml new file mode 100644 index 00000000..ca85595f --- /dev/null +++ b/.github/workflows/publish-to-test-pypi.yml @@ -0,0 +1,33 @@ +name: Publish new refineGEMs release to PyPI and TestPyPI + +on: workflow_dispatch + +jobs: + build-n-publish: + name: Build and publish new refineGEMs release to TestPyPI + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: >- + python3 -m + build + --sdist + --wheel + --outdir dist/ + . + - name: Publish distribution 📦 to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + repository-url: https://test.pypi.org/legacy/ \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..b830052d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include LICENSE +include refinegems/database/current_bigg_db_version.txt +include refinegems/database/sbo_media_db.sql +include refinegems/database/data.db \ No newline at end of file diff --git a/Pipfile.lock b/Pipfile.lock index 11c84474..75422e0b 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1694,7 +1694,7 @@ }, "refinegems": { "path": ".", - "version": "==1.2.2" + "version": "==1.3.0" }, "requests": { "hashes": [ diff --git a/docs/requirements.txt b/docs/requirements.txt index 9105e34a..5bbd7931 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,5 @@ nbsphinx ipython sphinxcontrib-bibtex +sphinx_copybutton accessible-pygments \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index ba5fd774..f81ff0d9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -23,7 +23,7 @@ author = 'Famke Bäuerle and Gwendolyn O. Gusak' # The full version, including alpha/beta/rc tags -release = '1.2.2' +release = '1.3.0' # -- General configuration --------------------------------------------------- @@ -35,12 +35,16 @@ 'sphinx.ext.autodoc', 'sphinx.ext.autosectionlabel', 'sphinx.ext.mathjax', + 'sphinx_copybutton', 'nbsphinx', 'sphinx_rtd_theme', 'IPython.sphinxext.ipython_console_highlighting', 'sphinxcontrib.bibtex' ] +# For copy buttons in code blocks +copybutton_selector = "div.copyable pre" + # For citations bibtex_bibfiles = ['library.bib'] diff --git a/docs/source/development.rst b/docs/source/development.rst index 878cabc6..0748e801 100644 --- a/docs/source/development.rst +++ b/docs/source/development.rst @@ -13,12 +13,13 @@ Development installation * `pandoc` * `ipython` * `sphinxcontrib-bibtex` + * `sphinx_copybutton` You can install the packages via pip to your local environment: .. code:: bash - pip install sphinx nbsphinx sphinx_rtd_theme pandoc ipython sphinxcontrib-bibtex + pip install sphinx nbsphinx sphinx_rtd_theme pandoc ipython sphinxcontrib-bibtex sphinx_copybutton If you run into an error with jinja2, just switch to version 3.0.3: @@ -36,7 +37,7 @@ If you want your print message to show in the log file, replace the ```print()`` Documentation notes ------------------- -We use the autoDocstring extension (njpwerner.autodocstring) for vscode with the google format to generate function docstrings. To ensure a nice looking sphinx documentation, we add ``-`` to all variables that are passed as Args. And tuple returns are written as follows: +We use the autoDocstring extension (njpwerner.autodocstring) for VSCode with the google format to generate function docstrings. To ensure a nice looking sphinx documentation, we add ``-`` to all variables that are passed as Args. And tuple returns are written as follows: .. code:: python :linenos: @@ -57,4 +58,6 @@ We are also trying to make input and return types explicit by declaring those in .. code:: python :linenos: - def my_func(input1: int, input2: str, input3: Model) -> tuple[str, int]: \ No newline at end of file + def my_func(input1: int, input2: str, input3: Model) -> tuple[str, int]: + +More details for certain specifics can also be found `here `__. \ No newline at end of file diff --git a/docs/source/in_silico_media_generation.rst b/docs/source/in_silico_media_generation.rst index 65988527..4e31a287 100644 --- a/docs/source/in_silico_media_generation.rst +++ b/docs/source/in_silico_media_generation.rst @@ -3,11 +3,11 @@ From laboratory to *in silico* medium .. hint:: If you want to use the medium with ``refineGEMs.growth`` add the definition to the database schema ``sbo_media_db.sql`` - in the folder *data/database* in the downloaded repository. To update the database with the newly added table just + in the folder *refinegems/database* in the downloaded repository. To update the database with the newly added table just delete the file ``data.db`` in the same folder and run refineGEMs. -1. Search papers containing medium definitions/ Search paper or provider information for a medium that could be - interesting for your organism +1. Search papers containing medium definitions./ Search paper or provider information for a medium that could be + interesting for your organism. 2. | If the paper contains already an *in silico* defintion: Go to step 3. | If not: diff --git a/docs/source/index.rst b/docs/source/index.rst index 4e5ad64b..2c277bd0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,16 +1,16 @@ Welcome to refineGEMs! ====================================== -``refineGEMs`` is a python package intended +``refineGEMs`` is a Python package intended to help with the curation of genome-scale metabolic models (GEMS). -.. hint:: For bug reports please write issues on the `GitHub page `__. +.. hint:: For bug reports please write issues on the `GitHub page `__ or open a discussion `here `__. Overview -------- Currently ``refineGEMs`` can be used for the investigation of a GEM, it can complete the following tasks: -* loading GEMS with ``cobrapy`` and ``libSBML`` +* loading GEMS with ``COBRApy`` and ``libSBML`` * report number of metabolites, reactions and genes * report orphaned, deadends and disconnected metabolites * report mass and charge unbalanced reactions @@ -18,15 +18,15 @@ Currently ``refineGEMs`` can be used for the investigation of a GEM, it can comp * compare the genes present in the model to the genes found in: * the `KEGG `__ Database (Note: This requires a GFF file of your organism and the KEGG identifier of your organism.) * Or the `BioCyc `__ Database (Note: This requires that a database entry for your organism exists in BioCyc.) -* compare the charges and masses of the metabolites present in the model to the charges and masses denoted in the `ModelSEED `__ Database +* compare the charges and masses of the metabolites present in the model to the charges and masses denoted in the `ModelSEED `__ Database. Other applications of ``refineGEMs`` to curate a given model include: -* The correction of a model created with `CarveMe `__ v.1.5.1 (for example moving all relevant information from the notes to the annotation field) this includes automated annotation of NCBI genes to the GeneProtein section of the model -* The addition of `KEGG `__ Pathways as Groups (using the `libSBML `__ Groups Plugin) -* Updating the SBO-Term annotations based on a SBOannotator -* Updating the annotation of metabolites and extending the model with reactions (for the purpose of filling gaps) based on a table filled by the user ``data/manual_annotations.xlsx``, note that this only works when the structure of the given table is used -* And extending the model with all information surrounding reactions including the corresponding GeneProducts and metabolites by filling in the table ``data/modelName_gapfill_analysis_date_example.xlsx``, note this also only works when the structure of the given Excel file is used +* The correction of a model created with `CarveMe `__ v.1.5.1 (for example moving all relevant information from the notes to the annotation field) this includes automated annotation of NCBI genes to the GeneProduct section of the model, +* The addition of `KEGG `__ Pathways as Groups (using the `libSBML `__ Groups Plugin), +* Updating the SBO-Term annotations based on SBOannotator\ :footcite:p:`Leonidou2023_sboann`, +* Updating the annotation of metabolites and extending the model with reactions (for the purpose of filling gaps) based on a table filled by the user ``data/manual_annotations.xlsx`` (Note: This only works when the structure of the given table is used.), +* And extending the model with all information surrounding reactions including the corresponding GeneProducts and metabolites by filling in the table ``data/modelName_gapfill_analysis_date_example.xlsx`` (Note: This also only works when the structure of the given Excel file is used). .. toctree:: @@ -43,3 +43,5 @@ Other applications of ``refineGEMs`` to curate a given model include: * :ref:`genindex` * :ref:`search` + +.. footbibliography:: diff --git a/docs/source/installation.rst b/docs/source/installation.rst index ba576bd6..d3ac524a 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -3,11 +3,12 @@ Installation Installation via pip -------------------- -To install refineGEMs as Python package, simply install it via ``pip``: +To install refineGEMs as Python package from `PyPI `__, simply install it via ``pip``: -.. code:: bash +.. code:: console + :class: copyable - pip install refinegems + pip install refineGEMs The corresponding project site can be found `here `__. @@ -60,6 +61,10 @@ If `which pip` does not show pip in the conda environment you can also create a **Pipenv** +.. warning:: + | Since version 1.1.0 the Pipfile and Pipfile.lock files are not up to date anymore. + | This installation method might not work. + You can use `pipenv `__ to keep all dependencies together. You will need to install ``pipenv`` first. To install ``refineGEMs`` locally complete the @@ -96,7 +101,7 @@ Troubleshooting ``pipenv install``. - If you run into a problem with ``pipenv`` not locking after f.ex. moving the repository try uninstalling ``pipenv`` and reinstalling it via pip. Then run ``pipenv install`` and it should work again. -- If you use vscode terminals and have trouble accessing the python from within your conda environment, deactivate base and reactivate again: +- If you use VSCode terminals and have trouble accessing the python from within your conda environment, deactivate base and reactivate again: .. code:: bash @@ -104,5 +109,3 @@ Troubleshooting conda deactivate conda activate base conda activate - - diff --git a/docs/source/modules/examples.ipynb b/docs/source/modules/examples.ipynb index a59278c5..fafbffbd 100644 --- a/docs/source/modules/examples.ipynb +++ b/docs/source/modules/examples.ipynb @@ -32,7 +32,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = rg.ioload_model_cobra('../../data/e_coli_core.xml')" + "model = rg.io.load_model_cobra('../../data/e_coli_core.xml')" ] }, { diff --git a/docs/source/modules/gapfill.rst b/docs/source/modules/gapfill.rst index e27c95c7..29bd48a4 100644 --- a/docs/source/modules/gapfill.rst +++ b/docs/source/modules/gapfill.rst @@ -65,7 +65,7 @@ To perform the gap analysis the following parameters are relevant for the config To add genes, metabolites and reactions from an Excel table to a model the following parameters need to be set: (The Excel file is either obtained by running gapfill_analysis or created by hand with the same structure as the result file from gapfill_analysis. -An example Excel file to fill in by hand can be found in the cloned repository under 'data/modelName_gapfill_analysis_date_example.xlsx') +An example Excel file to fill in by hand can be found in the cloned repository under ``data/modelName_gapfill_analysis_date_example.xlsx``) .. code:: yaml diff --git a/docs/source/modules/growth.rst b/docs/source/modules/growth.rst index ded01267..d394fdda 100644 --- a/docs/source/modules/growth.rst +++ b/docs/source/modules/growth.rst @@ -12,14 +12,14 @@ Outputs a table with the column headers: Implementation -------------- -Growth rates and thus doubling times can be determined with Flux Balance Analysis (FBA). RefineGEMs uses a COBRApy based implementation that adds metabolites one-by-one to custom media definitions until growth is obtained. The pseudocode is shown below. +Growth rates and, thus, doubling times can be determined with Flux Balance Analysis (FBA). RefineGEMs uses a COBRApy based implementation that adds metabolites one-by-one to custom media definitions until growth is obtained. The pseudocode is shown below. .. image:: ../images/growth_algorithm.png :align: center :width: 400 :alt: Pseudocode representation of the algorithm implemented for growth simulation. -There is a flag called basis which can be set to either default_uptake or minimal_uptake. You can decide from which uptake you want to fill your medium of interest when looking for missing metabolites. Either the default_uptake which is the uptake that the model has when no specific medium is set or the minimal_uptake which is the uptake resulting from cobrapys minimal_medium optimization. +There is a flag called basis which can be set to either ``default_uptake`` or ``minimal_uptake``. You can decide from which uptake you want to fill your medium of interest when looking for missing metabolites. Either the ``default_uptake`` which is the uptake that the model has when no specific medium is set or the ``minimal_uptake`` which is the uptake resulting from COBRApy's minimal_medium optimization. Available media --------------- diff --git a/docs/source/modules/pathways.rst b/docs/source/modules/pathways.rst index 409d6be1..30d85aa7 100644 --- a/docs/source/modules/pathways.rst +++ b/docs/source/modules/pathways.rst @@ -1,13 +1,13 @@ Addition of KEGG Pathways ========================= -The KEGG database holds information on metabolic pathways. If your organism occurs in the KEGG database, you can use this module to add KEGG pathways with the libSBML Groups plugin. +The KEGG database holds information on metabolic pathways. You can use this module to add KEGG pathways with the libSBML Groups plugin. The workflow of the script is as follows: -1. Extraction of the KEGG reaction ID from the annotations of your reactions -2. Identification, in which KEGG pathways this reaction occurs -3. Addition of all KEGG pathways for a reaction then as annotations with the biological qualifier ‘OCCURS_IN’ to the respective reaction. -4. Addition of all KEGG pathways as groups with references to the contained reactions as groups:member +1. Extraction of the KEGG reaction IDs from the annotations of your reactions +2. Identification, in which KEGG pathways these reactions occur +3. Addition of all KEGG pathways for a reaction with the biological qualifier ``OCCURS_IN`` to the annotations +4. Addition of all KEGG pathways as groups with references to the contained reactions as ``groups:member`` The only function that you will need to access is ``kegg_pathways``: diff --git a/docs/source/modules/polish.rst b/docs/source/modules/polish.rst index 10db0c90..b8db234a 100644 --- a/docs/source/modules/polish.rst +++ b/docs/source/modules/polish.rst @@ -1,7 +1,7 @@ Polishing a CarveMe model ========================= -The newer version of CarveMe leads to some irritations in the model, the scripts in ``polish`` enable for example the addition of BiGG Ids to the annotations as well as a correct formatting of the annotations. +CarveMe version 1.5.1 leads to some irritations in the model, the scripts in ``polish`` enable for example the addition of BiGG IDs to the annotations as well as a correct formatting of the annotations. .. warning:: Using ``lab_strain=True`` has the following two requirements: diff --git a/docs/source/modules/sboann.rst b/docs/source/modules/sboann.rst index b1cc8578..bc4d571e 100644 --- a/docs/source/modules/sboann.rst +++ b/docs/source/modules/sboann.rst @@ -3,7 +3,7 @@ SBOannotator with refineGEMs RefineGEMs offers access to the functionalities of `SBOannotator `__\ :footcite:p:`Leonidou2023_sboann`. -The ``sboann`` module is splitted into a lot of small functions which are all annotated, however when using it for SBO-Term annotation it only makes sense to run the "main" function: +The ``sboann`` module is splitted into a lot of small functions which are all annotated, however when using it for SBO-Term annotation it only makes sense to run the function ``sbo_annotation``: .. autofunction:: refinegems.sboann.sbo_annotation :noindex: @@ -15,6 +15,6 @@ The ``sboann`` module is splitted into a lot of small functions which are all an model_sboann = rg.sboann.sbo_annotation() rg.io.write_to_file(model_sboann, ) -If you use it from the refineGEMs toolbox with the config you can get visualizations of SBO-Term distribution before and after SBO-Term updates. +If you use it from the refineGEMs toolbox with the config you can get a visualization of the SBO-Term distribution before and after the SBO-Term update. .. footbibliography:: diff --git a/docs/source/pipeline.rst b/docs/source/pipeline.rst index b45165fc..dd830ea6 100644 --- a/docs/source/pipeline.rst +++ b/docs/source/pipeline.rst @@ -5,16 +5,16 @@ Generating a model for an organism where no information on genes and proteins is causes the problem that the model will not contain valid database identifiers for any GeneProduct. To resolve this issue the workflow in Figure :numref:`workflow` can be used. -1. First annotate the genome with NCBI's Prokaryotic Genome Annotation Pipeline (PGAP) to obtain the same FASTA format as used in NCBI. -2. Then use diamond with the ``nr`` database from NCBI and the obtained annotated FASTA file as input. Restrict the search to your organism's taxon if known and use the flag for taxonomy checking. +1. First annotate the genome with NCBI's Prokaryotic Genome Annotation Pipeline (PGAP) to obtain the same FASTA format as used in NCBI and use the flag for taxonomy checking. +2. Then use DIAMOND with the ``nr`` database from NCBI and the obtained annotated FASTA file as input. Restrict the search to your organism's taxon if known. 3. Check if any protein in the annotation FASTA file still has no database identifier. - | -> YES: Rerun diamond without the taxonomy check and without the restriction for the organism's taxon. + | -> YES: Rerun DIAMOND without the taxonomy check and without the restriction for the organism's taxon. | | -> NO: Continue with step 4. -4. Add the diamond result to the annotated FASTA file. -5. Run e.g. ``CarveME`` to obtain a draft model. +4. Add the DIAMOND result to the annotated FASTA file. +5. Run e.g. ``CarveMe`` to obtain a draft model. 6. Check if in the model any GeneProducts without NCBI Protein or RefSeq identifiers occur. | -> YES: diff --git a/docs/source/usage.rst b/docs/source/usage.rst index fdbc7b22..8ab1230f 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -5,7 +5,7 @@ Usage as standalone application ------------------------------- The script ``main.py`` can be used directly in the command line after -entering the virtual environment with ``pipenv shell``. +entering the virtual environment with ``pipenv shell`` or ``conda activate ``. The ``config.yaml`` file contains defaults for all variables that need to be set by the user. @@ -130,11 +130,11 @@ to be set by the user. The repository structure has the following intention: -* ``refineGEMs/`` contains all the functions needed in ``main.py`` -* ``data/`` contains all tables that are used by different parts of the script as well as a toy model ``e_coli_core.xml`` +* ``refinegems/`` contains all the functions needed in ``main.py`` +* ``data/`` contains all example tables that can be used as input for the curation scripts as well as the ``media_db.csv`` and a toy model ``e_coli_core.xml`` * Instead of using the files given in ``data/``, you can use your own files and just change the paths in ``config.yaml``. Please be aware that some functions rely on input in a certain format so make sure to check the files given in the ``data/`` folder and use the same formatting. -* ``databases/`` contains the ``sql`` file as well as the ``db`` file necessary for the SBOAnn script by Elisabeth Fritze as well as the modules ``gapfill``, ``growth`` and ``modelseed``. -* The ``setup.py`` and ``pyproject.toml`` enable creating a PyPi package called ``refineGEMs``. +* ``refinegems/databases/`` contains the SQL Schema file for the media and ``sboann``-related tables as well as the ready-to-use database file necessary for the SBOAnn script by Elisabeth Fritze as well as the modules ``gapfill``, ``growth`` and ``modelseed``. +* The ``setup.py`` and ``pyproject.toml`` enable creating a PyPI package called ``refineGEMs``. Usage as python module diff --git a/refinegems/analysis_db.py b/refinegems/analysis_db.py index 4a05f512..e10118bc 100644 --- a/refinegems/analysis_db.py +++ b/refinegems/analysis_db.py @@ -22,23 +22,23 @@ COMPARTMENTS = ('c', 'e', 'p') -def get_search_regex(other_db: Literal['KEGG', 'BioCyc'], metabolites: bool) -> str: - """Retrieves the search regex for BioCyc/KEGG to be used in the BiGG mapping +def get_search_regex(other_db: Literal['KEGG', 'BioCyc', 'SEED'], metabolites: bool) -> str: + """Retrieves the search regex for BioCyc/KEGG/SEED to be used in the BiGG mapping Args: - - other_db (Literal): Specifies if the search regex should be for BioCyc or KEGG - - metabolites (bool): Is required if one wants to search for KEGG Compound IDs in the bigg_models_metabolites.txt + - other_db (Literal): Specifies if the search regex should be for BioCyc/KEGG/SEED + - metabolites (bool): Is required if one wants to search for KEGG/SEED Compound IDs in the bigg_models_metabolites.txt Returns: str: Search regex """ if other_db == 'BioCyc': return 'BioCyc: http://identifiers.org/biocyc/META:(.*?);' - elif other_db == 'KEGG': + elif other_db == 'KEGG' or other_db == 'SEED': if metabolites: - return 'KEGG Compound: http://identifiers.org/kegg.compound/(.*?);' + return f'{other_db} Compound: http://identifiers.org/{other_db.lower()}.compound/(.*?);' else: - return 'KEGG Reaction: http://identifiers.org/kegg.reaction/(.*?);' + return f'{other_db} Reaction: http://identifiers.org/{other_db.lower()}.reaction/(.*?);' def compare_ids(id1: str, id2: str) -> bool: @@ -159,15 +159,15 @@ def get_reaction_compartment(bigg_id: str) -> str: # Function originally from refineGEMs.genecomp/refineGEMs.KEGG_analysis --- Modified -def get_bigg2other_db(other_db: Literal['KEGG', 'BioCyc'], metabolites: bool=False) -> pd.DataFrame: +def get_bigg2other_db(other_db: Literal['KEGG', 'BioCyc', 'SEED'], metabolites: bool=False) -> pd.DataFrame: """Uses list of BiGG reactions/metabolites to get a mapping from BiGG to KEGG/BioCyc Id Args: - - other_db (Literal): Set to 'KEGG'/'BioCyc' to map KEGG/BioCyc IDs to BiGG IDs + - other_db (Literal): Set to 'KEGG'/'BioCyc'/'SEED' to map KEGG/BioCyc/SEED IDs to BiGG IDs - metabolites (bool): Set to True to map other_db IDs to BiGG IDs for metabolites Returns: - pd.DataFrame: Table containing BiGG Ids with corresponding KEGG/BioCyc Ids + pd.DataFrame: Table containing BiGG Ids with corresponding KEGG/BioCyc/SEED Ids """ # Get only rows with BioCyc/KEGG entries @@ -206,7 +206,7 @@ def compare_bigg_model(complete_df: pd.DataFrame, model_entities: pd.DataFrame, Needed to back check previous comparisons. Args: - - complete_df (pd.DataFrame): Table that contains BioCyc Id, BiGG Id & more + - complete_df (pd.DataFrame): Table that contains KEGG/BioCyc Id, BiGG Id & more - model_entities (pd.DataFrame): BiGG Ids of entities in the model - metabolites (bool): True if names of metabolites should be added, otherwise false @@ -223,12 +223,12 @@ def compare_bigg_model(complete_df: pd.DataFrame, model_entities: pd.DataFrame, db_ids = entities_missing_in_model.groupby('bigg_id')[db].agg(set) # Get a set of all BioCyc/KEGG IDs belonging to one BiGG ID - # Add set of BioCyc IDs belonging to one BiGG ID to the dataframe + # Add set of BioCyc/KEGG IDs belonging to one BiGG ID to the dataframe entities_missing_in_model.set_index('bigg_id', inplace=True) entities_missing_in_model.loc[:, db] = db_ids entities_missing_in_model.reset_index(inplace=True) - if 'id_group' in entities_missing_in_model.columns: # Remove reaction ID duplicates but keep all realted BiGG & BioCyc IDs in a list + if 'id_group' in entities_missing_in_model.columns: # Remove reaction ID duplicates but keep all related BiGG & BioCyc/KEGG IDs in a list aliases = entities_missing_in_model.groupby(['compartment', 'id_group'])['bigg_id'].agg(set) # Get a set of the 'duplicated' BiGG reaction IDs -> aliases entities_missing_in_model.drop_duplicates(['compartment', 'id_group'], inplace=True, ignore_index=True) # Drop duplicates where compartments & id_group same @@ -252,7 +252,7 @@ def get_name_from_bigg(bigg_id: str): entities_missing_in_model['name'] = entities_missing_in_model['bigg_id'].map(get_name_from_bigg) con.close() - # Add compartment ID to all BiGG metabolites that were added due to filtering for BiGG metabolites in BiGG reactions + # Add compartment ID to all BiGG metabolites if metabolites: def get_compartment_from_id(bigg_id: str): compartment = bigg_id[-1] diff --git a/data/database/current_bigg_db_version.txt b/refinegems/database/current_bigg_db_version.txt similarity index 100% rename from data/database/current_bigg_db_version.txt rename to refinegems/database/current_bigg_db_version.txt diff --git a/data/database/data.db b/refinegems/database/data.db similarity index 100% rename from data/database/data.db rename to refinegems/database/data.db diff --git a/data/database/sbo_media_db.sql b/refinegems/database/sbo_media_db.sql similarity index 100% rename from data/database/sbo_media_db.sql rename to refinegems/database/sbo_media_db.sql diff --git a/refinegems/databases.py b/refinegems/databases.py index 93805cf8..084c5c12 100644 --- a/refinegems/databases.py +++ b/refinegems/databases.py @@ -11,7 +11,7 @@ __author__ = 'Gwendolyn O. Gusak' -PATH_TO_DB_DATA = path.join(path.abspath(path.dirname(path.dirname(__file__))), 'data/database') +PATH_TO_DB_DATA = path.join(path.dirname(path.realpath(__file__)), 'database') PATH_TO_DB = path.join(PATH_TO_DB_DATA, 'data.db') VERSION_FILE = path.join(PATH_TO_DB_DATA, 'current_bigg_db_version.txt') VERSION_URL = 'http://bigg.ucsd.edu/api/v2/database_version' diff --git a/refinegems/entities.py b/refinegems/entities.py index 3ce8f07b..d583b30f 100644 --- a/refinegems/entities.py +++ b/refinegems/entities.py @@ -59,15 +59,16 @@ def compare_gene_lists(gps_in_model: pd.DataFrame, db_genes: pd.DataFrame, kegg: # Function originally from refineGEMs.genecomp/refineGEMs.KEGG_analysis --- Modified -def get_model_reacs_or_metabs(model_libsbml: libModel, metabolites: bool=False) -> pd.DataFrame: +def get_model_reacs_or_metabs(model_libsbml: libModel, metabolites: bool=False, col_name: str='bigg_id') -> pd.DataFrame: """Extracts table of reactions/metabolites with BiGG IDs from model Args: - model_libsbml (libModel): Model loaded with libSBML - metabolites (bool): Set to True if metabolites from model should be extracted + - col_name (str): Name to be used for column in Table, default: 'bigg_id' Returns: - pd.DataFrame: Table with BiGG IDs of reactions in the model + pd.DataFrame: Table with model identifiers for either metabolites or reactions """ reac_or_metab_list = model_libsbml.getListOfSpecies() if metabolites else model_libsbml.getListOfReactions() @@ -76,7 +77,7 @@ def get_model_reacs_or_metabs(model_libsbml: libModel, metabolites: bool=False) list_of_reacs_or_metabs.append(reac_or_metab.id[2:]) reac_or_metab_list_df = pd.Series(list_of_reacs_or_metabs) - reac_or_metab_list_df = pd.DataFrame(reac_or_metab_list_df, columns=['bigg_id']) + reac_or_metab_list_df = pd.DataFrame(reac_or_metab_list_df, columns=[col_name]) return reac_or_metab_list_df diff --git a/refinegems/investigate.py b/refinegems/investigate.py index 68bda5ac..fd863997 100644 --- a/refinegems/investigate.py +++ b/refinegems/investigate.py @@ -13,7 +13,7 @@ from libsbml import Model as libModel from cobra import Model as cobraModel from memote.support import consistency -# needed by memote.support.consitency +# needed by memote.support.consistency from memote.support import consistency_helpers as con_helpers from refinegems.io import load_model_cobra, load_model_libsbml, search_sbo_label diff --git a/setup.py b/setup.py index 83b20559..2422d5db 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ readme = readme_file.read() setup(name='refineGEMs', - version='1.2.2', + version='1.3.0', description='refineGEMs: a python package intended to help with the curation of genome-scale metabolic models (GEMS)', long_description=readme, long_description_content_type='text/markdown', @@ -14,12 +14,13 @@ url='https://github.com/draeger-lab/refinegems', license='MIT', packages=['refinegems'], + python_requires ='>=3.8, <3.10', install_requires = [ "cobra==0.22.0", "biopython==1.79", "bioregistry", "bioservices", - "importlib_resources==5.13.0", + "importlib-resources<=5.13.0", "memote==0.13.0", "pandas==1.2.4", "numpy==1.20.3", @@ -33,7 +34,14 @@ "venn==0.1.3", "ols-client==0.1.3", "seaborn==0.12.2", - "sqlalchemy==1.4.43", "click==8.1.3" ], - zip_safe=False) + zip_safe=False, + include_package_data=True, + package_data={ + 'refinegems': [ + 'database/current_bigg_db_version.txt', + 'database/sbo_media_db.sql', + 'database/data.db' + ] + })