diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7db3ee6..b804f26 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,39 +1,39 @@ -name: Publish - -on: - workflow_dispatch: - release: - types: [created] - -jobs: - publish: - runs-on: ubuntu-latest - environment: - name: pypi - url: https://pypi.org/p/im2deep - permissions: - id-token: write - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.8" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip build - - - name: Build - run: python -m build - - - name: Install - run: pip install dist/im2deep-*.whl - - - name: Test package - run: | - im2deep --help - - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 +name: Publish + +on: + workflow_dispatch: + release: + types: [created] + +jobs: + publish: + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/im2deep + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.8" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip build + + - name: Build + run: python -m build + + - name: Install + run: pip install dist/im2deep-*.whl + + - name: Test package + run: | + im2deep --help + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index f04d9ce..51d0f6b 100644 --- a/.gitignore +++ b/.gitignore @@ -108,3 +108,6 @@ ENV/ .vscode/ .pytest_cache/ IM2Deep.code-workspace + +# Testing +test_data/ \ No newline at end of file diff --git a/LICENSE b/LICENSE index 261eeb9..29f81d8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in index 8426040..804fb87 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ -include im2deep/models/* -include im2deep/models/**/* -include im2deep/reference_data/* +include im2deep/models/* +include im2deep/models/**/* +include im2deep/reference_data/* diff --git a/README.md b/README.md index 73173ef..b15ac2e 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,46 @@ -# IM2Deep -Collisional cross-section prediction for (modified) peptides. - ---- -## Introduction - -IM2Deep is a CCS predictor for (modified) peptides. -It is able to accurately predict CCS for modified peptides, even if the modification wasn't observed during training. - -## Installation -Install with pip: -`pip install im2deep` - -## Usage -### Basic CLI usage: -```sh -im2deep -``` -If you want to calibrate your predictions (HIGHLY recommended), please provide a calibration file: -```sh -im2deep --calibration_file -``` -For an overview of all CLI arguments, run `im2deep --help`. - -## Input files -Both peptide and calibration files are expected to be comma-separated values (CSV) with the following columns: - - `seq`: unmodified peptide sequence - - `modifications`: every modifications should be listed as `location|name`, separated by a pipe character (`|`) - between the location, the name, and other modifications. `location` is an integer counted starting at 1 for the - first AA. 0 is reserved for N-terminal modifications, -1 for C-terminal modifications. `name` has to correspond - to a Unimod (PSI-MS) name. - - `charge`: peptide precursor charge - - `CCS`: collisional cross-section (only for calibration file) - -For example: - -```csv -seq,modifications,charge,CCS -VVDDFADITTPLK,,2,422.9984309464991 -GVEVLSLTPSFMDIPEK,12|Oxidation,2,464.6568644356109 -SYSGREFDDLSPTEQK,,2,468.9863221739147 -SYSQSILLDLTDNR,,2,460.9340710819608 -DEELIHLDGK,,2,383.8693416055445 -IPQEKCILQTDVK,5|Butyryl|6|Carbamidomethyl,3,516.2079366048176 -``` - +# IM2Deep +Collisional cross-section prediction for (modified) peptides. + +--- +## Introduction + +IM2Deep is a CCS predictor for (modified) peptides. +It is able to accurately predict CCS for modified peptides, even if the modification wasn't observed during training. + +## Installation +Install with pip: +`pip install im2deep` + +## Usage +### Basic CLI usage: +```sh +im2deep +``` +If you want to calibrate your predictions (HIGHLY recommended), please provide a calibration file: +```sh +im2deep --calibration-file +``` +For an overview of all CLI arguments, run `im2deep --help`. + +## Input files +Both peptide and calibration files are expected to be comma-separated values (CSV) with the following columns: + - `seq`: unmodified peptide sequence + - `modifications`: every modifications should be listed as `location|name`, separated by a pipe character (`|`) + between the location, the name, and other modifications. `location` is an integer counted starting at 1 for the + first AA. 0 is reserved for N-terminal modifications, -1 for C-terminal modifications. `name` has to correspond + to a Unimod (PSI-MS) name. + - `charge`: peptide precursor charge + - `CCS`: collisional cross-section (only for calibration file) + +For example: + +```csv +seq,modifications,charge,CCS +VVDDFADITTPLK,,2,422.9984309464991 +GVEVLSLTPSFMDIPEK,12|Oxidation,2,464.6568644356109 +SYSGREFDDLSPTEQK,,2,468.9863221739147 +SYSQSILLDLTDNR,,2,460.9340710819608 +DEELIHLDGK,,2,383.8693416055445 +IPQEKCILQTDVK,5|Butyryl|6|Carbamidomethyl,3,516.2079366048176 +``` + diff --git a/im2deep/__init__.py b/im2deep/__init__.py index 63bc290..488b6cc 100644 --- a/im2deep/__init__.py +++ b/im2deep/__init__.py @@ -1,3 +1,3 @@ -"""IM2Deep: Deep learning framework for peptide collisional cross section prediction.""" - -__version__ = "0.1.7" +"""IM2Deep: Deep learning framework for peptide collisional cross section prediction.""" + +__version__ = "0.1.7" diff --git a/im2deep/__main__.py b/im2deep/__main__.py index 5f8299c..742ea00 100644 --- a/im2deep/__main__.py +++ b/im2deep/__main__.py @@ -1,210 +1,218 @@ -"""Command line interface to IM2Deep.""" - -from __future__ import annotations - -import logging -import sys -from pathlib import Path -from typing import Optional - -import click -import pandas as pd - -# from deeplc import DeepLC -from psm_utils.io import read_file -from psm_utils.io.exceptions import PSMUtilsIOException -from psm_utils.io.peptide_record import peprec_to_proforma -from psm_utils.psm import PSM -from psm_utils.psm_list import PSMList -from rich.logging import RichHandler - -from im2deep._exceptions import IM2DeepError -from im2deep.im2deep import predict_ccs - -# from im2deep.calibrate import linear_calibration - -REFERENCE_DATASET_PATH = Path(__file__).parent / "reference_data" / "reference_ccs.zip" - -LOGGER = logging.getLogger(__name__) - - -def setup_logging(passed_level): - log_mapping = { - "debug": logging.DEBUG, - "info": logging.INFO, - "warning": logging.WARNING, - "error": logging.ERROR, - "critical": logging.CRITICAL, - } - - if passed_level.lower() not in log_mapping: - raise ValueError( - f"""Invalid log level: {passed_level}. - Should be one of {log_mapping.keys()}""" - ) - - logging.basicConfig( - level=log_mapping[passed_level.lower()], - format="%(message)s", - datefmt="[%X]", - handlers=[RichHandler()], - ) - - -# Command line arguments TODO: Make config_parser script -@click.command() -@click.argument("psm_file", type=click.Path(exists=True, dir_okay=False)) -@click.option( - "-c", - "--calibration_file", - type=click.Path(exists=False), - default=None, - help="Calibration file name.", -) -@click.option( - "-o", - "--output_file", - type=click.Path(exists=False), - default=None, - help="Output file name.", -) -@click.option( - "-m", - "--model_name", - type=click.Choice(["tims"]), - default="tims", - help="Model name.", -) -@click.option( - "-l", - "--log_level", - type=click.Choice(["debug", "info", "warning", "error", "critical"]), - default="info", - help="Logging level.", -) -@click.option( - "-n", - "--n_jobs", - type=click.INT, - default=None, - help="Number of jobs to use for parallel processing.", -) -@click.option( - "--calibrate_per_charge", - type=click.BOOL, - default=True, - help="Calibrate CCS values per charge state.", -) -@click.option( - "--use_charge_state", - type=click.INT, - default=2, - help="Charge state to use for calibration. Only used if calibrate_per_charge is set to False.", -) -@click.option( - "--use_single_model", - type=click.BOOL, - default=True, - help="Use a single model for prediction.", -) -def main( - psm_file: str, - calibration_file: Optional[str] = None, - output_file: Optional[str] = None, - model_name: Optional[str] = "tims", - log_level: Optional[str] = "info", - n_jobs: Optional[int] = None, - use_single_model: Optional[bool] = True, - calibrate_per_charge: Optional[bool] = True, - use_charge_state: Optional[int] = 2, -): - """Command line interface to IM2Deep.""" - setup_logging(log_level) - - with open(psm_file) as f: - first_line_pred = f.readline().strip() - if calibration_file: - with open(calibration_file) as fc: - first_line_cal = fc.readline().strip() - - if "modifications" in first_line_pred.split(",") and "seq" in first_line_pred.split(","): - # Read input file - df_pred = pd.read_csv(psm_file) - df_pred.fillna("", inplace=True) - - list_of_psms = [] - for seq, mod, charge, ident in zip( - df_pred["seq"], df_pred["modifications"], df_pred["charge"], df_pred.index - ): - list_of_psms.append( - PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident) - ) - psm_list_pred = PSMList(psm_list=list_of_psms) - - else: - # psm_list_pred = read_file(file_pred) - try: - psm_list_pred = read_file(psm_file) - except PSMUtilsIOException: - LOGGER.error("Invalid input file. Please check the format of the input file.") - sys.exit(1) - - psm_list_cal = [] - if ( - calibration_file - and "modifications" in first_line_cal.split(",") - and "seq" in first_line_cal.split(",") - ): - try: - df_cal = pd.read_csv(calibration_file) - df_cal.fillna("", inplace=True) - del calibration_file - - list_of_cal_psms = [] - for seq, mod, charge, ident, CCS in zip( - df_cal["seq"], - df_cal["modifications"], - df_cal["charge"], - df_cal.index, - df_cal["CCS"], - ): - list_of_cal_psms.append( - PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident) - ) - psm_list_cal = PSMList(psm_list=list_of_cal_psms) - psm_list_cal_df = psm_list_cal.to_dataframe() - psm_list_cal_df["ccs_observed"] = df_cal["CCS"] - del df_cal - - except IOError: - LOGGER.error( - "Invalid calibration file. Please check the format of the calibration file." - ) - sys.exit(1) - - else: - LOGGER.warning( - "No calibration file found. Proceeding without calibration. Calibration is HIGHLY recommended for accurate CCS prediction." - ) - psm_list_cal_df = None - - if not output_file: - output_file = Path(psm_file).parent / (Path(psm_file).stem + "_IM2Deep-predictions.csv") - try: - predict_ccs( - psm_list_pred, - psm_list_cal_df, - output_file=output_file, - model_name=model_name, - calibrate_per_charge=calibrate_per_charge, - use_charge_state=use_charge_state, - n_jobs=n_jobs, - use_single_model=use_single_model, - ) - except IM2DeepError as e: - LOGGER.error(e) - sys.exit(1) - - -if __name__ == "__main__": - main() +"""Command line interface to IM2Deep.""" + +from __future__ import annotations + +import logging +import sys +from pathlib import Path +from typing import Optional + +import click +import pandas as pd + +# from deeplc import DeepLC +from psm_utils.io import read_file +from psm_utils.io.exceptions import PSMUtilsIOException +from psm_utils.io.peptide_record import peprec_to_proforma +from psm_utils.psm import PSM +from psm_utils.psm_list import PSMList +from rich.logging import RichHandler + +from im2deep._exceptions import IM2DeepError +from im2deep.im2deep import predict_ccs + +# from im2deep.calibrate import linear_calibration + +REFERENCE_DATASET_PATH = Path(__file__).parent / "reference_data" / "reference_ccs.zip" + +LOGGER = logging.getLogger(__name__) + + +def setup_logging(passed_level): + log_mapping = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL, + } + + if passed_level.lower() not in log_mapping: + raise ValueError( + f"""Invalid log level: {passed_level}. + Should be one of {log_mapping.keys()}""" + ) + + logging.basicConfig( + level=log_mapping[passed_level.lower()], + format="%(message)s", + datefmt="[%X]", + handlers=[RichHandler()], + ) + + +# Command line arguments TODO: Make config_parser script +@click.command() +@click.argument("psm-file", type=click.Path(exists=True, dir_okay=False)) +@click.option( + "-c", + "--calibration-file", + type=click.Path(exists=False), + default=None, + help="Calibration file name.", +) +@click.option( + "-o", + "--output-file", + type=click.Path(exists=False), + default=None, + help="Output file name.", +) +@click.option( + "-m", + "--model-name", + type=click.Choice(["tims"]), + default="tims", + help="Model name.", +) +@click.option( + "-l", + "--log-level", + type=click.Choice(["debug", "info", "warning", "error", "critical"]), + default="info", + help="Logging level.", +) +@click.option( + "-n", + "--n-jobs", + type=click.INT, + default=None, + help="Number of jobs to use for parallel processing.", +) +@click.option( + "--calibrate-per-charge", + type=click.BOOL, + default=True, + help="Calibrate CCS values per charge state. Default is True.", +) +@click.option( + "--use-charge-state", + type=click.INT, + default=2, + help="Charge state to use for calibration. Only used if calibrate_per_charge is set to False.", +) +@click.option( + "--use-single-model", + type=click.BOOL, + default=True, + help="Use a single model for prediction. If False, an ensemble of models will be used, which may slightly improve prediction accuracy but increase runtimes. Default is True.", +) +@click.option( + "--ion-mobility", + type=click.BOOL, + default=False, + help="Output predictions in ion mobility (1/K0) instead of CCS. Default is False.", +) +def main( + psm_file: str, + calibration_file: Optional[str] = None, + output_file: Optional[str] = None, + model_name: Optional[str] = "tims", + log_level: Optional[str] = "info", + n_jobs: Optional[int] = None, + use_single_model: Optional[bool] = True, + calibrate_per_charge: Optional[bool] = True, + use_charge_state: Optional[int] = 2, + ion_mobility: Optional[bool] = False, +): + """Command line interface to IM2Deep.""" + setup_logging(log_level) + + with open(psm_file) as f: + first_line_pred = f.readline().strip() + if calibration_file: + with open(calibration_file) as fc: + first_line_cal = fc.readline().strip() + + if "modifications" in first_line_pred.split(",") and "seq" in first_line_pred.split(","): + # Read input file + df_pred = pd.read_csv(psm_file) + df_pred.fillna("", inplace=True) + + list_of_psms = [] + for seq, mod, charge, ident in zip( + df_pred["seq"], df_pred["modifications"], df_pred["charge"], df_pred.index + ): + list_of_psms.append( + PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident) + ) + psm_list_pred = PSMList(psm_list=list_of_psms) + + else: + # psm_list_pred = read_file(file_pred) + try: + psm_list_pred = read_file(psm_file) + except PSMUtilsIOException: + LOGGER.error("Invalid input file. Please check the format of the input file.") + sys.exit(1) + + psm_list_cal = [] + if ( + calibration_file + and "modifications" in first_line_cal.split(",") + and "seq" in first_line_cal.split(",") + ): + try: + df_cal = pd.read_csv(calibration_file) + df_cal.fillna("", inplace=True) + del calibration_file + + list_of_cal_psms = [] + for seq, mod, charge, ident, CCS in zip( + df_cal["seq"], + df_cal["modifications"], + df_cal["charge"], + df_cal.index, + df_cal["CCS"], + ): + list_of_cal_psms.append( + PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident) + ) + psm_list_cal = PSMList(psm_list=list_of_cal_psms) + psm_list_cal_df = psm_list_cal.to_dataframe() + psm_list_cal_df["ccs_observed"] = df_cal["CCS"] + del df_cal + + except IOError: + LOGGER.error( + "Invalid calibration file. Please check the format of the calibration file." + ) + sys.exit(1) + + else: + LOGGER.warning( + "No calibration file found. Proceeding without calibration. Calibration is HIGHLY recommended for accurate CCS prediction." + ) + psm_list_cal_df = None + + if not output_file: + output_file = Path(psm_file).parent / (Path(psm_file).stem + "_IM2Deep-predictions.csv") + try: + predict_ccs( + psm_list_pred, + psm_list_cal_df, + output_file=output_file, + model_name=model_name, + calibrate_per_charge=calibrate_per_charge, + use_charge_state=use_charge_state, + n_jobs=n_jobs, + use_single_model=use_single_model, + ion_mobility=ion_mobility, + ) + except IM2DeepError as e: + LOGGER.error(e) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/im2deep/_exceptions.py b/im2deep/_exceptions.py index bd07533..caea6ac 100644 --- a/im2deep/_exceptions.py +++ b/im2deep/_exceptions.py @@ -1,8 +1,8 @@ -"""IM2Deep exceptions.""" - -class IM2DeepError(Exception): - pass - - -class CalibrationError(IM2DeepError): - pass +"""IM2Deep exceptions.""" + +class IM2DeepError(Exception): + pass + + +class CalibrationError(IM2DeepError): + pass diff --git a/im2deep/calibrate.py b/im2deep/calibrate.py index 3e1381a..66b037b 100644 --- a/im2deep/calibrate.py +++ b/im2deep/calibrate.py @@ -1,214 +1,190 @@ -import logging - -import numpy as np -import pandas as pd -from numpy import ndarray -from psm_utils.peptidoform import Peptidoform - -LOGGER = logging.getLogger(__name__) - - -def im2ccs(reverse_im, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): - """ - Convert ion mobility to collisional cross section. - - Parameters - ---------- - reverse_im - Reduced ion mobility. - mz - Precursor m/z. - charge - Precursor charge. - mass_gas - Mass of gas, default 28.013 - temp - Temperature in Celsius, default 31.85 - t_diff - Factor to convert Celsius to Kelvin, default 273.15 - - Notes - ----- - Adapted from theGreatHerrLebert/ionmob (https://doi.org/10.1093/bioinformatics/btad486) - - """ - - SUMMARY_CONSTANT = 18509.8632163405 - reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) - return (SUMMARY_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / reverse_im) - - -def get_ccs_shift( - cal_df: pd.DataFrame, reference_dataset: pd.DataFrame, use_charge_state: int = 2 -) -> float: - """ - Calculate CCS shift factor, i.e. a constant offset, - based on identical precursors as in reference dataset. - - Parameters - ---------- - cal_df - PSMs with CCS values. - reference_dataset - Reference dataset with CCS values. - use_charge_state - Charge state to use for CCS shift calculation, needs to be [2,4], by default 2. - return_shift_factor - CCS shift factor. - - """ - LOGGER.debug(f"Using charge state {use_charge_state} for CCS shift calculation.") - - reference_tmp = reference_dataset[reference_dataset["charge"] == use_charge_state] - df_tmp = cal_df[cal_df["charge"] == use_charge_state] - both = pd.merge( - left=reference_tmp, - right=df_tmp, - right_on=["sequence", "charge"], - left_on=["peptidoform", "charge"], - how="inner", - suffixes=("_ref", "_data"), - ) - LOGGER.debug( - """Calculating CCS shift based on {} overlapping peptide-charge pairs - between PSMs and reference dataset""".format( - both.shape[0] - ) - ) - - # How much CCS in calibration data is larger than reference CCS, so predictions - # need to be increased by this amount - return 0 if both.empty else np.mean(both["ccs_observed"] - both["CCS"]) - - -def get_ccs_shift_per_charge(cal_df: pd.DataFrame, reference_dataset: pd.DataFrame) -> ndarray: - """ - Calculate CCS shift factor per charge state, - i.e. a constant offset based on identical precursors as in reference. - - Parameters - ---------- - cal_df - PSMs with CCS values. - reference_dataset - Reference dataset with CCS values. - - Returns - ------- - ndarray - CCS shift factors per charge state. - - """ - both = pd.merge( - left=reference_dataset, - right=cal_df, - right_on=["sequence", "charge"], - left_on=["peptidoform", "charge"], - how="inner", - suffixes=("_ref", "_data"), - ) - return both.groupby("charge").apply(lambda x: np.mean(x["ccs_observed"] - x["CCS"])).to_dict() - - -def calculate_ccs_shift( - cal_df: pd.DataFrame, reference_dataset: pd.DataFrame, per_charge=True, use_charge_state=None -) -> float: - """ - Apply CCS shift to CCS values. - - Parameters - ---------- - cal_df - PSMs with CCS values. - reference_dataset - Reference dataset with CCS values. - per_charge - Whether to calculate shift factor per charge state, default True. - use_charge_state - Charge state to use for CCS shift calculation, needs to be [2,4], by default None. - - Returns - ------- - float - CCS shift factor. - - """ - cal_df = cal_df[cal_df["charge"] < 7] # predictions do not go higher for IM2Deep - - if not per_charge: - shift_factor = get_ccs_shift( - cal_df, - reference_dataset, - use_charge_state=use_charge_state, - ) - LOGGER.debug(f"CCS shift factor: {shift_factor}") - return shift_factor - - else: - shift_factor_dict = get_ccs_shift_per_charge(cal_df, reference_dataset) - LOGGER.debug(f"CCS shift factor dict: {shift_factor_dict}") - return shift_factor_dict - - -def linear_calibration( - preds_df: pd.DataFrame, - calibration_dataset: pd.DataFrame, - reference_dataset: pd.DataFrame, - per_charge: bool = True, - use_charge_state: bool = None, -) -> pd.DataFrame: - """ - Calibrate PSM df using linear calibration. - - Parameters - ---------- - preds_df - PSMs with CCS values. - calibration_dataset - Calibration dataset with CCS values. - reference_dataset - Reference dataset with CCS values. - per_charge - Whether to calculate shift factor per charge state, default True. - use_charge_state - Charge state to use for CCS shift calculation, needs to be [2,4], by default None. - - Returns - ------- - pd.DataFrame - PSMs with calibrated CCS values. - - """ - LOGGER.info("Calibrating CCS values using linear calibration...") - calibration_dataset['sequence'] = calibration_dataset['peptidoform'].apply(lambda x: x.proforma.split("\\")[0]) - calibration_dataset['charge'] = calibration_dataset['peptidoform'].apply(lambda x: x.precursor_charge) - # reference_dataset['sequence'] = reference_dataset['peptidoform'].apply(lambda x: x.split('/')[0]) - reference_dataset['charge'] = reference_dataset['peptidoform'].apply(lambda x: int(x.split('/')[1])) - - if per_charge: - LOGGER.info('Getting general shift factor') - general_shift = calculate_ccs_shift( - calibration_dataset, - reference_dataset, - per_charge=False, - use_charge_state=use_charge_state, - ) - LOGGER.info('Getting shift factors per charge state') - shift_factor_dict = calculate_ccs_shift( - calibration_dataset, reference_dataset, per_charge=True - ) - - preds_df['shift'] = preds_df['charge'].map(shift_factor_dict).fillna(general_shift) - preds_df['predicted_ccs'] = preds_df['predicted_ccs'] + preds_df['shift'] - - else: - shift_factor = calculate_ccs_shift( - calibration_dataset, - reference_dataset, - per_charge=False, - use_charge_state=use_charge_state, - ) - preds_df['predicted_ccs'] += shift_factor - - LOGGER.info("CCS values calibrated.") - return preds_df +import logging + +import numpy as np +import pandas as pd +from numpy import ndarray +from psm_utils.peptidoform import Peptidoform + +LOGGER = logging.getLogger(__name__) + + +def get_ccs_shift( + cal_df: pd.DataFrame, reference_dataset: pd.DataFrame, use_charge_state: int = 2 +) -> float: + """ + Calculate CCS shift factor, i.e. a constant offset, + based on identical precursors as in reference dataset. + + Parameters + ---------- + cal_df + PSMs with CCS values. + reference_dataset + Reference dataset with CCS values. + use_charge_state + Charge state to use for CCS shift calculation, needs to be [2,4], by default 2. + return_shift_factor + CCS shift factor. + + """ + LOGGER.debug(f"Using charge state {use_charge_state} for CCS shift calculation.") + + reference_tmp = reference_dataset[reference_dataset["charge"] == use_charge_state] + df_tmp = cal_df[cal_df["charge"] == use_charge_state] + both = pd.merge( + left=reference_tmp, + right=df_tmp, + right_on=["sequence", "charge"], + left_on=["peptidoform", "charge"], + how="inner", + suffixes=("_ref", "_data"), + ) + LOGGER.debug( + """Calculating CCS shift based on {} overlapping peptide-charge pairs + between PSMs and reference dataset""".format( + both.shape[0] + ) + ) + + # How much CCS in calibration data is larger than reference CCS, so predictions + # need to be increased by this amount + return 0 if both.empty else np.mean(both["ccs_observed"] - both["CCS"]) + + +def get_ccs_shift_per_charge(cal_df: pd.DataFrame, reference_dataset: pd.DataFrame) -> ndarray: + """ + Calculate CCS shift factor per charge state, + i.e. a constant offset based on identical precursors as in reference. + + Parameters + ---------- + cal_df + PSMs with CCS values. + reference_dataset + Reference dataset with CCS values. + + Returns + ------- + ndarray + CCS shift factors per charge state. + + """ + both = pd.merge( + left=reference_dataset, + right=cal_df, + right_on=["sequence", "charge"], + left_on=["peptidoform", "charge"], + how="inner", + suffixes=("_ref", "_data"), + ) + return both.groupby("charge").apply(lambda x: np.mean(x["ccs_observed"] - x["CCS"])).to_dict() + + +def calculate_ccs_shift( + cal_df: pd.DataFrame, reference_dataset: pd.DataFrame, per_charge=True, use_charge_state=None +) -> float: + """ + Apply CCS shift to CCS values. + + Parameters + ---------- + cal_df + PSMs with CCS values. + reference_dataset + Reference dataset with CCS values. + per_charge + Whether to calculate shift factor per charge state, default True. + use_charge_state + Charge state to use for CCS shift calculation, needs to be [2,4], by default None. + + Returns + ------- + float + CCS shift factor. + + """ + cal_df = cal_df[cal_df["charge"] < 7] # predictions do not go higher for IM2Deep + + if not per_charge: + shift_factor = get_ccs_shift( + cal_df, + reference_dataset, + use_charge_state=use_charge_state, + ) + LOGGER.debug(f"CCS shift factor: {shift_factor}") + return shift_factor + + else: + shift_factor_dict = get_ccs_shift_per_charge(cal_df, reference_dataset) + LOGGER.debug(f"CCS shift factor dict: {shift_factor_dict}") + return shift_factor_dict + + +def linear_calibration( + preds_df: pd.DataFrame, + calibration_dataset: pd.DataFrame, + reference_dataset: pd.DataFrame, + per_charge: bool = True, + use_charge_state: bool = None, +) -> pd.DataFrame: + """ + Calibrate PSM df using linear calibration. + + Parameters + ---------- + preds_df + PSMs with CCS values. + calibration_dataset + Calibration dataset with CCS values. + reference_dataset + Reference dataset with CCS values. + per_charge + Whether to calculate shift factor per charge state, default True. + use_charge_state + Charge state to use for CCS shift calculation, needs to be [2,4], by default None. + + Returns + ------- + pd.DataFrame + PSMs with calibrated CCS values. + + """ + LOGGER.info("Calibrating CCS values using linear calibration...") + calibration_dataset["sequence"] = calibration_dataset["peptidoform"].apply( + lambda x: x.proforma.split("\\")[0] + ) + calibration_dataset["charge"] = calibration_dataset["peptidoform"].apply( + lambda x: x.precursor_charge + ) + # reference_dataset['sequence'] = reference_dataset['peptidoform'].apply(lambda x: x.split('/')[0]) + reference_dataset["charge"] = reference_dataset["peptidoform"].apply( + lambda x: int(x.split("/")[1]) + ) + + if per_charge: + LOGGER.info("Getting general shift factor") + general_shift = calculate_ccs_shift( + calibration_dataset, + reference_dataset, + per_charge=False, + use_charge_state=use_charge_state, + ) + LOGGER.info("Getting shift factors per charge state") + shift_factor_dict = calculate_ccs_shift( + calibration_dataset, reference_dataset, per_charge=True + ) + + preds_df["shift"] = preds_df["charge"].map(shift_factor_dict).fillna(general_shift) + preds_df["predicted_ccs"] = preds_df["predicted_ccs"] + preds_df["shift"] + + else: + shift_factor = calculate_ccs_shift( + calibration_dataset, + reference_dataset, + per_charge=False, + use_charge_state=use_charge_state, + ) + preds_df["predicted_ccs"] += shift_factor + + LOGGER.info("CCS values calibrated.") + return preds_df diff --git a/im2deep/im2deep.py b/im2deep/im2deep.py index 9363144..409868d 100644 --- a/im2deep/im2deep.py +++ b/im2deep/im2deep.py @@ -1,72 +1,92 @@ -import logging -from pathlib import Path - -import pandas as pd -from deeplc import DeepLC -from psm_utils.psm_list import PSMList - -from im2deep.calibrate import linear_calibration - -LOGGER = logging.getLogger(__name__) -REFERENCE_DATASET_PATH = Path(__file__).parent / "reference_data" / "reference_ccs.zip" - - -# TODO: get file reading out of the function -def predict_ccs( - psm_list_pred: PSMList, - psm_list_cal_df=None, - file_reference=REFERENCE_DATASET_PATH, - output_file=None, - model_name="tims", - calibrate_per_charge=True, - use_charge_state=2, - use_single_model=True, - n_jobs=None, - write_output=True, -): - """Run IM2Deep.""" - LOGGER.info("IM2Deep started.") - reference_dataset = pd.read_csv(file_reference) - - if model_name == "tims": - path_model = Path(__file__).parent / "models" / "TIMS" - - path_model_list = list(path_model.glob("*.hdf5")) - if use_single_model: - LOGGER.debug("Using model {}".format(path_model_list[2])) - path_model_list = [path_model_list[2]] - - dlc = DeepLC(path_model=path_model_list, n_jobs=n_jobs, predict_ccs=True) - LOGGER.info("Predicting CCS values...") - preds = dlc.make_preds(psm_list=psm_list_pred, calibrate=False) - LOGGER.info("CCS values predicted.") - psm_list_pred_df = psm_list_pred.to_dataframe() - psm_list_pred_df["predicted_ccs"] = preds - psm_list_pred_df["charge"] = psm_list_pred_df["peptidoform"].apply( - lambda x: x.precursor_charge - ) - - if psm_list_cal_df is not None: - psm_list_pred_df = linear_calibration( - psm_list_pred_df, - calibration_dataset=psm_list_cal_df, - reference_dataset=reference_dataset, - per_charge=calibrate_per_charge, - use_charge_state=use_charge_state, - ) - - if write_output: - LOGGER.info("Writing output file...") - output_file = open(output_file, "w") - output_file.write("modified_seq,charge,predicted CCS\n") - for peptidoform, charge, CCS in zip( - psm_list_pred_df["peptidoform"], - psm_list_pred_df["charge"], - psm_list_pred_df["predicted_ccs"], - ): - output_file.write(f"{peptidoform},{charge},{CCS}\n") - output_file.close() - - LOGGER.info("IM2Deep finished!") - - return psm_list_pred_df["predicted_ccs"] +import logging +from pathlib import Path + +import pandas as pd +from deeplc import DeepLC +from psm_utils.psm_list import PSMList + +from im2deep.calibrate import linear_calibration +from im2deep.utils import ccs2im + +LOGGER = logging.getLogger(__name__) +REFERENCE_DATASET_PATH = Path(__file__).parent / "reference_data" / "reference_ccs.zip" + + +# TODO: get file reading out of the function +def predict_ccs( + psm_list_pred: PSMList, + psm_list_cal_df=None, + file_reference=REFERENCE_DATASET_PATH, + output_file=None, + model_name="tims", + calibrate_per_charge=True, + use_charge_state=2, + use_single_model=True, + n_jobs=None, + write_output=True, + ion_mobility=False, +): + """Run IM2Deep.""" + LOGGER.info("IM2Deep started.") + reference_dataset = pd.read_csv(file_reference) + + if model_name == "tims": + path_model = Path(__file__).parent / "models" / "TIMS" + + path_model_list = list(path_model.glob("*.hdf5")) + if use_single_model: + LOGGER.debug("Using model {}".format(path_model_list[2])) + path_model_list = [path_model_list[2]] + + dlc = DeepLC(path_model=path_model_list, n_jobs=n_jobs, predict_ccs=True) + LOGGER.info("Predicting CCS values...") + preds = dlc.make_preds(psm_list=psm_list_pred, calibrate=False) + LOGGER.info("CCS values predicted.") + psm_list_pred_df = psm_list_pred.to_dataframe() + psm_list_pred_df["predicted_ccs"] = preds + psm_list_pred_df["charge"] = psm_list_pred_df["peptidoform"].apply( + lambda x: x.precursor_charge + ) + + if psm_list_cal_df is not None: + psm_list_pred_df = linear_calibration( + psm_list_pred_df, + calibration_dataset=psm_list_cal_df, + reference_dataset=reference_dataset, + per_charge=calibrate_per_charge, + use_charge_state=use_charge_state, + ) + + if write_output: + if not ion_mobility: + LOGGER.info("Writing output file...") + output_file = open(output_file, "w") + output_file.write("modified_seq,charge,predicted CCS\n") + for peptidoform, charge, CCS in zip( + psm_list_pred_df["peptidoform"], + psm_list_pred_df["charge"], + psm_list_pred_df["predicted_ccs"], + ): + output_file.write(f"{peptidoform},{charge},{CCS}\n") + output_file.close() + else: + LOGGER.info("Converting CCS to IM values...") + psm_list_pred_df["predicted_im"] = ccs2im( + psm_list_pred_df["predicted_ccs"], + psm_list_pred_df["peptidoform"].apply(lambda x: x.theoretical_mz), + psm_list_pred_df["charge"], + ) + LOGGER.info("Writing output file...") + output_file = open(output_file, "w") + output_file.write("modified_seq,charge,predicted IM\n") + for peptidoform, charge, IM in zip( + psm_list_pred_df["peptidoform"], + psm_list_pred_df["charge"], + psm_list_pred_df["predicted_im"], + ): + output_file.write(f"{peptidoform},{charge},{IM}\n") + output_file.close() + + LOGGER.info("IM2Deep finished!") + + return psm_list_pred_df["predicted_ccs"] diff --git a/im2deep/utils.py b/im2deep/utils.py new file mode 100644 index 0000000..dba11c0 --- /dev/null +++ b/im2deep/utils.py @@ -0,0 +1,61 @@ +import numpy as np + + +def im2ccs(reverse_im, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): + """ + Convert ion mobility to collisional cross section. + + Parameters + ---------- + reverse_im + Reduced ion mobility. + mz + Precursor m/z. + charge + Precursor charge. + mass_gas + Mass of gas, default 28.013 + temp + Temperature in Celsius, default 31.85 + t_diff + Factor to convert Celsius to Kelvin, default 273.15 + + Notes + ----- + Adapted from theGreatHerrLebert/ionmob (https://doi.org/10.1093/bioinformatics/btad486) + + """ + + SUMMARY_CONSTANT = 18509.8632163405 + reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) + return (SUMMARY_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / reverse_im) + + +def ccs2im(ccs, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): + """ + Convert collisional cross section to ion mobility. + + Parameters + ---------- + ccs + Collisional cross section. + mz + Precursor m/z. + charge + Precursor charge. + mass_gas + Mass of gas, default 28.013 + temp + Temperature in Celsius, default 31.85 + t_diff + Factor to convert Celsius to Kelvin, default 273.15 + + Notes + ----- + Adapted from theGreatHerrLebert/ionmob (https://doi.org/10.1093/bioinformatics/btad486) + + """ + + SUMMARY_CONSTANT = 18509.8632163405 + reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) + return ((np.sqrt(reduced_mass * (temp + t_diff))) * ccs) / (SUMMARY_CONSTANT * charge) diff --git a/pyproject.toml b/pyproject.toml index 4aa2f8f..b46e03e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,68 +1,68 @@ -[project] -name = "im2deep" -description = "Framework for prediction of collisional cross-section of peptides." -readme = "README.md" -license = { file = "LICENSE" } -keywords = ["proteomics", "peptide", "timsTOF", "CCS"] -authors = [ - { name = "Robbe Devreese", email = "robbe.devreese@ugent.be" }, - { name = "Robbin Bouwmeester", email = "robbin.bouwmeester@ugent.be" }, - { name = "Ralf Gabriels", email = "ralf@gabriels.dev" }, -] -classifiers = [ - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3 :: Only", - "Topic :: Scientific/Engineering :: Bio-Informatics", -] -dynamic = ["version"] -requires-python = ">=3.8" -dependencies = [ - "click", - "deeplc", - "deeplcretrainer", - "psm_utils", - "tensorflow>=2.2,<2.13.0", - "pandas", - "numpy", - "rich" -] - -[project.optional-dependencies] -dev = ["black", "isort>5", "pytest", "pytest-cov"] -docs = [ - "sphinx", - "numpydoc>=1,<2", - "recommonmark", - "sphinx-mdinclude", - "toml", - "semver>=2", - "sphinx_rtd_theme", - "sphinx-autobuild", -] - -[project.urls] -GitHub = "https://github.com/rodvrees/IM2Deep" -CompOmics = "https://www.compomics.com" - -[project.scripts] -im2deep = "im2deep.__main__:main" - -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" - -[tool.setuptools.dynamic] -version = {attr = "im2deep.__version__"} - -[tool.isort] -profile = "black" - -[tool.black] -line-length = 99 -target-version = ['py38'] - -[tool.ruff] -line-length = 99 -target-version = "py38" +[project] +name = "im2deep" +description = "Framework for prediction of collisional cross-section of peptides." +readme = "README.md" +license = { file = "LICENSE" } +keywords = ["proteomics", "peptide", "timsTOF", "CCS"] +authors = [ + { name = "Robbe Devreese", email = "robbe.devreese@ugent.be" }, + { name = "Robbin Bouwmeester", email = "robbin.bouwmeester@ugent.be" }, + { name = "Ralf Gabriels", email = "ralf@gabriels.dev" }, +] +classifiers = [ + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +dynamic = ["version"] +requires-python = ">=3.8" +dependencies = [ + "click", + "deeplc==2.2.38", + "deeplcretrainer", + "psm_utils", + "tensorflow>=2.2,<2.13.0", + "pandas", + "numpy", + "rich" +] + +[project.optional-dependencies] +dev = ["black", "isort>5", "pytest", "pytest-cov"] +docs = [ + "sphinx", + "numpydoc>=1,<2", + "recommonmark", + "sphinx-mdinclude", + "toml", + "semver>=2", + "sphinx_rtd_theme", + "sphinx-autobuild", +] + +[project.urls] +GitHub = "https://github.com/rodvrees/IM2Deep" +CompOmics = "https://www.compomics.com" + +[project.scripts] +im2deep = "im2deep.__main__:main" + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.dynamic] +version = {attr = "im2deep.__version__"} + +[tool.isort] +profile = "black" + +[tool.black] +line-length = 99 +target-version = ['py38'] + +[tool.ruff] +line-length = 99 +target-version = "py38"