diff --git a/README.md b/README.md index 0a62b7a..f3e3c81 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Make sure to put the trained models in the data folder and make sure that the fi ### Running AIdiva on already annotated data: ``` -python run_AIdiva.py --config AIdiva_configuration_annotated.yaml --snp_vcf annotated_snp.vcf --indel_vcf annotated_indel.vcf --expanded_indel_vcf annotated_expanded_indel.vcf --out_prefix aidiva_result --workdir aidiva_workdir/ [--hpo_list hpo_terms.txt] [--family_file family.txt] [--threads 1] +python run_AIdiva.py --config AIdiva_configuration_annotated.yaml --snp_vcf annotated_snp.vcf --indel_vcf annotated_indel.vcf --expanded_indel_vcf annotated_expanded_indel.vcf --out_prefix aidiva_result --workdir aidiva_workdir/ [--hpo_list hpo_terms.txt] [--gene_exclusion gene_exclusion.txt] [--family_file family.txt] [--family_type SINGLE] [--skip_db_check] [--only_top_results] [--threads 1] [--log_level INFO] ``` + _config_ -- YAML configuration file (in the `data` folder there are example configuration files for each of the two modes) @@ -69,13 +69,18 @@ python run_AIdiva.py --config AIdiva_configuration_annotated.yaml --snp_vcf anno + _out_prefix_ -- A prefix for the resulting output files + _workdir_ -- Working directory, where all temporary files are created and saved (the results will also be stored here) + _hpo_list_ -- TXT file containing all the HPO terms observed with the patient [optional] ++ _gene_exclusion_ -- TXT file containing genes that should be excluded during the analysis of the HPO relatedness [optional] + _family_file_ -- TXT file containing the sample information if run on multisample VCF files [optional] ++ _family_type_ -- Type of the family relation [SINGLE, TRIO, FAMILY] (default: SINGLE) [optional] ++ _skip_db_check_ -- Skip the database checkup for existing entries in ClinVar (and HGMD) [optional] ++ _only_top_results_ -- Restrict the results to only report the top 25 variants [optional] + _threads_ -- Number of threads that should be used (default: 1) [optional] ++ _log_level_ -- Define logging level [DEBUG, INFO, WARN, ERROR, CRITICAL] (default: INFO) [optional] ### Running AIdiva and perform the annotation: ``` -python run_annotation_and_AIdiva.py --config AIdiva_configuration_with_annotation.yaml --vcf input.vcf --workdir aidiva_workdir/ [--hpo_list hpo_terms.txt] [--gene_exclusion gene_exclusion.txt] [--family_file family.txt] [--threads 1] +python run_annotation_and_AIdiva.py --config AIdiva_configuration_with_annotation.yaml --vcf input.vcf --workdir aidiva_workdir/ [--hpo_list hpo_terms.txt] [--gene_exclusion gene_exclusion.txt] [--family_file family.txt] [--family_type SINGLE] [--skip_db_check] [--only_top_results] [--threads 1] [--log_level INFO] ``` diff --git a/aidiva/run_AIdiva.py b/aidiva/run_AIdiva.py index 892dd89..db5535f 100644 --- a/aidiva/run_AIdiva.py +++ b/aidiva/run_AIdiva.py @@ -24,7 +24,6 @@ parser.add_argument("--family_file", type=str, dest="family_file", metavar="family.txt", required=False, help="TXT file showing the sample relations of the current data") parser.add_argument("--family_type", type=str, dest="family_type", metavar="SINGLE", required=False, help="In case of multisample data the kind of sample relation [SINGLE, TRIO, MULTI]") parser.add_argument("--config", type=str, dest="config", metavar="config.yaml", required=True, help="Config file specifying the parameters for AIdiva [required]") - parser.add_argument("--reference", type=str, dest="reference", metavar="GRCh37.fa", required=True, help="Reference sequence to use as FASTA [required]") parser.add_argument("--skip_db_check", dest="skip_db_check", action="store_true", required=False, help="Flag to skip database (ClinVar, HGMD) lookup") parser.add_argument("--only_top_results", dest="only_top_results", action="store_true", required=False, help="Report only the top 25 variants as result") parser.add_argument("--threads", type=int, dest="threads", metavar="1", required=False, help="Number of threads to use (default: 1)") @@ -112,11 +111,12 @@ skip_db_check = args.skip_db_check - ref_path = args.reference + allele_frequency_list = configuration["Model-Features"]["allele-frequency-list"] feature_list = configuration["Model-Features"]["feature-list"] assembly_build = configuration["Assembly-Build"] + ref_path = configuration["Analysis-Input"]["ref-path"] # convert splitted input data to vcf and annotate input_data_snp = convert_vcf.convert_vcf_to_pandas_dataframe(snp_vcf, False, num_cores) diff --git a/doc/install_additional_tools.md b/doc/install_additional_tools.md index e2b7010..180aae3 100644 --- a/doc/install_additional_tools.md +++ b/doc/install_additional_tools.md @@ -36,21 +36,6 @@ rm 103.1.tar.gz mkdir -p $vep_cpan_dir cpanm -l $vep_cpan_dir -L $vep_cpan_dir Set::IntervalTree URI::Escape DB_File Carp::Assert JSON::XS PerlIO::gzip DBI -# Install BigWig support (needed to annotate phyloP) -cd $vep_install_dir -export KENT_SRC=$vep_install_dir/kent-335_base/src -export MACHTYPE=$(uname -m) -export CFLAGS="-fPIC" -wget https://github.com/ucscGenomeBrowser/kent/archive/v335_base.tar.gz -tar xzf v335_base.tar.gz -rm v335_base.tar.gz -cd $KENT_SRC/lib -echo 'CFLAGS="-fPIC"' > $KENT_SRC/inc/localEnvironment.mk -make clean && make -cd $KENT_SRC/jkOwnLib -make clean && make -cpanm -l $vep_cpan_dir -L $vep_cpan_dir Bio::DB::BigFile - # Download VEP cache data mkdir -p $vep_data_dir cd $vep_data_dir