-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixes and conda packaging for run_dbcan (#101)
* Edited hmmscan_parser and run_dbcan so that they can be imported as python modules and called from other python scripts without invoking subprocesses, etc. They retain the oriignal functionality of being called via CLI through a shell. Additionally, prediction.py had a bug in get_validation_results that caused an empty folder structure to be created when the output was given as an absolute path instead of relative path. This bug should now be fixed. * Missed some bugfix updates from the main branch of run_dbcan in last commit, which are included here. Also changed the call to hmmscan_parser in runHmmScan to a function call instead of subprocess. This will remove the overhead of creating a new subprocess, and makes more sense given that hmmscan_parser is now a module import. * Added build files for conda installation. Also moved around the arg input in run_dbcan so that the entry points in setup.py is correct. * Added note about changing git_url to meta.yaml when merging into main repo. * Fixed bug where including branch in git_url prevented conda build from executing successfully. * Added missing dependencies to conda build file. * Changed run_dbcan imports to solve module not found error in conda installation. * Removed bioconda as a package. It's a channel, not a package. * Added init file for proper module installation, and licence file name to meta file. * made setuptools a build dependency instead of run dependency * Edited import to function properly * Edited import to function properly * Changed to noarch in meta.yaml * Change meta.yaml to point to main repo for merging into it. Also pull version info from setup.py, meaning there is a single source of truth for version number now.
- Loading branch information
1 parent
5264805
commit 3a93d08
Showing
8 changed files
with
271 additions
and
205 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
{% set name = "run_dbcan" %} | ||
{% set data = load_setup_py_data(setup_file='../setup.py', from_recipe_dir=True) %} | ||
|
||
package: | ||
name: "{{ name|lower }}" | ||
version: {{ data.get('version') }} | ||
# pulls version number from setup.py | ||
|
||
source: | ||
# may want to change this to pull from a stable git release branch, or look for stable release git commit labels, or from another source such as PyPi | ||
git_url: https://github.com/linnabrown/run_dbcan.git | ||
git_depth: 1 # only grab most recent commit on branch | ||
|
||
build: | ||
noarch: python | ||
number: 0 | ||
script: {{ PYTHON }} setup.py install | ||
|
||
requirements: | ||
build: | ||
- python | ||
- setuptools | ||
- git | ||
host: | ||
- python | ||
run: | ||
- diamond | ||
- hmmer | ||
- prodigal | ||
- python | ||
- natsort | ||
- scipy | ||
- psutil | ||
- numpy | ||
|
||
about: | ||
home: http://bcb.unl.edu/dbCAN2/ | ||
dev_url: https://github.com/linnabrown/run_dbcan | ||
license: GPLv3 | ||
license_file: LICENSE | ||
summary: A standalone tool of http://bcb.unl.edu/dbCAN2/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#!/usr/bin/env python3 | ||
########################################################## | ||
# hmmscan parser for dbCAN meta server | ||
# | ||
# Based off the hmmscan parser used in the dbCAN server, | ||
# written by Dr. Yin | ||
# | ||
# Written by Tanner Yohe under the supervision | ||
# of Dr. Yin in the YinLab at NIU. | ||
# | ||
# Updated by Le Huang from tips the contributor WATSON Mick <[email protected]>, | ||
# Thank you! | ||
# | ||
# Modified by Alex Fraser to have a run() method that can be called and returns data for better integration with other | ||
# scripts. This script also retains the ability to be called from shell and output to pipe redirection. | ||
# This file had to be renamed from "hmmscan-parser.py" to "hmmscan_parser.py" because of python module import conventions. | ||
# Modified on 07/06/22 | ||
# | ||
# INPUT | ||
# python hmmscan-parser-dbCANmeta.py [inputFile] [eval] [coverage] | ||
# eval and coverage are optional, inputFile is required | ||
# -updating info: | ||
# -adds pid for every subprocess to make codes robust. | ||
# Last updated: 1/10/19 | ||
########################################################### | ||
|
||
from subprocess import call | ||
import sys | ||
import os | ||
|
||
|
||
def run(input_file, eval_num=1e-15, coverage=0.35, verbose=False): | ||
|
||
tmpfile = "temp." + str(os.getpid()) | ||
|
||
call("cat "+input_file+" | grep -v '^#' | awk '{print $1,$3,$4,$6,$13,$16,$17,$18,$19}' | sed 's/ /\t/g' | sort -k 3,3 -k 8n -k 9n | perl -e 'while(<>){chomp;@a=split;next if $a[-1]==$a[-2];push(@{$b{$a[2]}},$_);}foreach(sort keys %b){@a=@{$b{$_}};for($i=0;$i<$#a;$i++){@b=split(/\t/,$a[$i]);@c=split(/\t/,$a[$i+1]);$len1=$b[-1]-$b[-2];$len2=$c[-1]-$c[-2];$len3=$b[-1]-$c[-2];if($len3>0 and ($len3/$len1>0.5 or $len3/$len2>0.5)){if($b[4]<$c[4]){splice(@a,$i+1,1);}else{splice(@a,$i,1);}$i=$i-1;}}foreach(@a){print $_.\"\n\";}}' > " + tmpfile, shell=True) | ||
|
||
output = "" | ||
with open(tmpfile) as f: | ||
for line in f: | ||
row = line.rstrip().split('\t') | ||
row.append(float(int(row[6])-int(row[5]))/int(row[1])) | ||
if float(row[4]) <= eval_num and float(row[-1]) >= coverage: | ||
if verbose: | ||
print('\t'.join([str(x) for x in row])) | ||
output += '\t'.join([str(x) for x in row]) + '\n' | ||
call(['rm', tmpfile]) | ||
|
||
return output | ||
|
||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) > 3: | ||
file = sys.argv[1] | ||
eval_arg = float(sys.argv[2]) | ||
coverage_arg = float(sys.argv[3]) | ||
run(file, eval_arg, coverage_arg, verbose=True) | ||
if len(sys.argv) > 1: | ||
file = sys.argv[1] | ||
run(file, verbose=True) | ||
else: | ||
print("Please give a hmmscan output file as the first command") | ||
exit() |
Oops, something went wrong.