Update README

imgag · Mar 30, 2022 · ca745e7 · ca745e7
1 parent 3f2d9c2
commit ca745e7
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -59,7 +59,7 @@ Make sure to put the trained models in the data folder and make sure that the fi
 ### Running AIdiva on already annotated data:
 
 ```
-python run_AIdiva.py --config AIdiva_configuration_annotated.yaml --snp_vcf annotated_snp.vcf --indel_vcf annotated_indel.vcf --expanded_indel_vcf annotated_expanded_indel.vcf --out_prefix aidiva_result --workdir aidiva_workdir/ [--hpo_list hpo_terms.txt] [--family_file family.txt] [--threads 1]
+python run_AIdiva.py --config AIdiva_configuration_annotated.yaml --snp_vcf annotated_snp.vcf --indel_vcf annotated_indel.vcf --expanded_indel_vcf annotated_expanded_indel.vcf --out_prefix aidiva_result --workdir aidiva_workdir/ [--hpo_list hpo_terms.txt] [--gene_exclusion gene_exclusion.txt] [--family_file family.txt] [--family_type SINGLE] [--skip_db_check] [--only_top_results] [--threads 1] [--log_level INFO]
 ```
 
 + _config_ -- YAML configuration file (in the `data` folder there are example configuration files for each of the two modes)
@@ -69,13 +69,18 @@ python run_AIdiva.py --config AIdiva_configuration_annotated.yaml --snp_vcf anno
 + _out_prefix_ -- A prefix for the resulting output files
 + _workdir_ -- Working directory, where all temporary files are created and saved (the results will also be stored here)
 + _hpo_list_ -- TXT file containing all the HPO terms observed with the patient [optional]
++ _gene_exclusion_ -- TXT file containing genes that should be excluded during the analysis of the HPO relatedness [optional]
 + _family_file_ -- TXT file containing the sample information if run on multisample VCF files [optional]
++ _family_type_ -- Type of the family relation [SINGLE, TRIO, FAMILY] (default: SINGLE) [optional]
++ _skip_db_check_ -- Skip the database checkup for existing entries in ClinVar (and HGMD) [optional]
++ _only_top_results_ -- Restrict the results to only report the top 25 variants [optional]
 + _threads_ -- Number of threads that should be used (default: 1) [optional]
++ _log_level_ -- Define logging level [DEBUG, INFO, WARN, ERROR, CRITICAL] (default: INFO) [optional]
 
 ### Running AIdiva and perform the annotation:
 
 ```
-python run_annotation_and_AIdiva.py --config AIdiva_configuration_with_annotation.yaml --vcf input.vcf --workdir aidiva_workdir/ [--hpo_list hpo_terms.txt] [--gene_exclusion gene_exclusion.txt] [--family_file family.txt] [--threads 1]
+python run_annotation_and_AIdiva.py --config AIdiva_configuration_with_annotation.yaml --vcf input.vcf --workdir aidiva_workdir/ [--hpo_list hpo_terms.txt] [--gene_exclusion gene_exclusion.txt] [--family_file family.txt] [--family_type SINGLE] [--skip_db_check] [--only_top_results] [--threads 1] [--log_level INFO]
 ```
 
 

diff --git a/aidiva/run_AIdiva.py b/aidiva/run_AIdiva.py
@@ -24,7 +24,6 @@
     parser.add_argument("--family_file", type=str, dest="family_file", metavar="family.txt", required=False, help="TXT file showing the sample relations of the current data")
     parser.add_argument("--family_type", type=str, dest="family_type", metavar="SINGLE", required=False, help="In case of multisample data the kind of sample relation [SINGLE, TRIO, MULTI]")
     parser.add_argument("--config", type=str, dest="config", metavar="config.yaml", required=True, help="Config file specifying the parameters for AIdiva [required]")
-    parser.add_argument("--reference", type=str, dest="reference", metavar="GRCh37.fa", required=True, help="Reference sequence to use as FASTA [required]")
     parser.add_argument("--skip_db_check", dest="skip_db_check", action="store_true", required=False, help="Flag to skip database (ClinVar, HGMD) lookup")
     parser.add_argument("--only_top_results", dest="only_top_results", action="store_true", required=False, help="Report only the top 25 variants as result")
     parser.add_argument("--threads", type=int, dest="threads", metavar="1", required=False, help="Number of threads to use (default: 1)")
@@ -112,11 +111,12 @@
 
     skip_db_check = args.skip_db_check
 
-    ref_path = args.reference
+
 
     allele_frequency_list = configuration["Model-Features"]["allele-frequency-list"]
     feature_list = configuration["Model-Features"]["feature-list"]
     assembly_build = configuration["Assembly-Build"]
+    ref_path = configuration["Analysis-Input"]["ref-path"]
 
     # convert splitted input data to vcf and annotate
     input_data_snp = convert_vcf.convert_vcf_to_pandas_dataframe(snp_vcf, False, num_cores)

diff --git a/doc/install_additional_tools.md b/doc/install_additional_tools.md
@@ -36,21 +36,6 @@ rm 103.1.tar.gz
 mkdir -p $vep_cpan_dir
 cpanm -l $vep_cpan_dir -L $vep_cpan_dir Set::IntervalTree URI::Escape DB_File Carp::Assert JSON::XS PerlIO::gzip DBI
 
-# Install BigWig support (needed to annotate phyloP)
-cd $vep_install_dir
-export KENT_SRC=$vep_install_dir/kent-335_base/src
-export MACHTYPE=$(uname -m)
-export CFLAGS="-fPIC"
-wget https://github.com/ucscGenomeBrowser/kent/archive/v335_base.tar.gz
-tar xzf v335_base.tar.gz
-rm v335_base.tar.gz
-cd $KENT_SRC/lib
-echo 'CFLAGS="-fPIC"' > $KENT_SRC/inc/localEnvironment.mk
-make clean && make
-cd $KENT_SRC/jkOwnLib
-make clean && make
-cpanm -l $vep_cpan_dir -L $vep_cpan_dir Bio::DB::BigFile
-
 # Download VEP cache data
 mkdir -p $vep_data_dir
 cd $vep_data_dir