diff --git a/README.md b/README.md index 5003a0a..01bc418 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,18 @@ indicate the fields (from the INFO) that will be available to the op, and the *o powerful. For an extensive example that demonstrates the utility of this type of approach, see [docs/examples/clinvar_exac.md](http://brentp.github.io/vcfanno/examples/clinvar_exac/). +A user can set the ID field of the VCF in a `[[postannotation]]` block by using `name=ID`. For example: + +``` +[[postannotation]] +name="ID" +fields=["other_field", "ID"] +op="lua:other_field .. ';' .. ID" +type="String" +``` + +will take the value in `other_field`, concatenate it with the existing ID, and set the ID to that value. + Binaries ======== diff --git a/api/api.go b/api/api.go index 76608b7..52dc03b 100644 --- a/api/api.go +++ b/api/api.go @@ -423,12 +423,13 @@ func (s *Source) UpdateHeader(r HeaderUpdater, ends bool, htype string, number s } // PostAnnotate happens after everything is done. -func (a *Annotator) PostAnnotate(chrom string, start int, end int, info interfaces.Info, prefix string) error { +func (a *Annotator) PostAnnotate(chrom string, start int, end int, info interfaces.Info, prefix string, id string) (error, string) { var e, err error vals := make([]interface{}, 0, 2) fields := make([]string, 0, 2) missing := make([]string, 0, 2) var val interface{} + newid := "" for i := range a.PostAnnos { post := a.PostAnnos[i] vals = vals[:0] @@ -437,7 +438,11 @@ func (a *Annotator) PostAnnotate(chrom string, start int, end int, info interfac // lua code if post.code != "" { for _, field := range post.Fields { - val, e = info.Get(field) + if field == "ID" { + val = id + } else { + val, e = info.Get(field) + } if val != nil { vals = append(vals, val) fields = append(fields, field) @@ -516,7 +521,9 @@ func (a *Annotator) PostAnnotate(chrom string, start int, end int, info interfac } } else { - if e := info.Set(prefix+post.Name, val); e != nil { + if post.Name == "ID" && prefix == "" { + newid = val + } else if e := info.Set(prefix+post.Name, val); e != nil { err = e } } @@ -539,12 +546,15 @@ func (a *Annotator) PostAnnotate(chrom string, start int, end int, info interfac // run this as long as we found any of the values. if len(vals) != 0 { fn := Reducers[post.Op] - info.Set(prefix+post.Name, fn(vals)) + if post.Name == "ID" && prefix == "" { + newid = fmt.Sprintf("%s", fn(vals)) + } else { + info.Set(prefix+post.Name, fn(vals)) + } } } - } - return err + return err, newid } // Setup reads all the tabix indexes and setups up the Queryables @@ -624,11 +634,12 @@ func (a *Annotator) AnnotateEnds(v interfaces.Relatable, ends string) error { var err error // if Both, call the interval, left, and right version to annotate. + id := v.(*parsers.Variant).IVariant.(*vcfgo.Variant).Id() if ends == BOTH { if e := a.AnnotateOne(v, a.Strict); e != nil { err = e } - if e := a.PostAnnotate(v.Chrom(), int(v.Start()), int(v.End()), v.(interfaces.IVariant).Info(), ""); e != nil { + if e, _ := a.PostAnnotate(v.Chrom(), int(v.Start()), int(v.End()), v.(interfaces.IVariant).Info(), "", id); e != nil { err = e } if e := a.AnnotateEnds(v, LEFT); e != nil { @@ -640,10 +651,13 @@ func (a *Annotator) AnnotateEnds(v interfaces.Relatable, ends string) error { } if ends == INTERVAL { err := a.AnnotateOne(v, a.Strict) - err2 := a.PostAnnotate(v.Chrom(), int(v.Start()), int(v.End()), v.(interfaces.IVariant).Info(), "") + err2, newid := a.PostAnnotate(v.Chrom(), int(v.Start()), int(v.End()), v.(interfaces.IVariant).Info(), "", id) if err != nil { return err } + if err2 == nil && newid != "" { + v.(*parsers.Variant).IVariant.(*vcfgo.Variant).Id_ = newid + } return err2 } // hack: @@ -677,7 +691,7 @@ func (a *Annotator) AnnotateEnds(v interfaces.Relatable, ends string) error { val, err = v2.Info().Get(key) variant.Info().Set(key, val) } - err2 := a.PostAnnotate(v.Chrom(), int(l), int(r), variant.Info(), ends) + err2, _ := a.PostAnnotate(v.Chrom(), int(l), int(r), variant.Info(), ends, id) if err2 != nil { err = err2 } diff --git a/tests/dbnsfp/Calls_for_dbNSFP_example.vcf.gz b/tests/dbnsfp/Calls_for_dbNSFP_example.vcf.gz index 9d93998..1c269cd 100644 Binary files a/tests/dbnsfp/Calls_for_dbNSFP_example.vcf.gz and b/tests/dbnsfp/Calls_for_dbNSFP_example.vcf.gz differ diff --git a/tests/dbnsfp/conf.toml b/tests/dbnsfp/conf.toml index cf55a4d..283819b 100644 --- a/tests/dbnsfp/conf.toml +++ b/tests/dbnsfp/conf.toml @@ -3,3 +3,9 @@ file="dbNSFP_ex.txt.gz" columns=[1,2,4,5] names=["nschrom", "nspos", "nsref", "nsalt"] ops=["first", "max", "uniq", "uniq"] + +[[postannotation]] +name="ID" +fields=["culprit", "ID"] +op="lua:culprit .. ';' .. ID" +type="String" diff --git a/tests/dbnsfp/dbNSFP_example.txt.gz b/tests/dbnsfp/dbNSFP_example.txt.gz new file mode 100644 index 0000000..d8d27f5 Binary files /dev/null and b/tests/dbnsfp/dbNSFP_example.txt.gz differ diff --git a/tests/dbnsfp/dbNSFP_example.txt.gz.tbi b/tests/dbnsfp/dbNSFP_example.txt.gz.tbi new file mode 100644 index 0000000..d34e08e Binary files /dev/null and b/tests/dbnsfp/dbNSFP_example.txt.gz.tbi differ diff --git a/tests/functional-test.sh b/tests/functional-test.sh index 67b8eac..08656b5 100755 --- a/tests/functional-test.sh +++ b/tests/functional-test.sh @@ -104,7 +104,7 @@ assert_equal 3 $(grep -c ";right_ref_alt=A" $STDOUT_FILE) rm e.lua irefalt() { - vcfanno -permissive-overlap -base-path tests/dbnsfp/ tests/dbnsfp/conf.toml tests/dbnsfp/Calls_for_dbNSFP_example.vcf.gz | grep -v ^# + vcfanno -lua <(echo "") -permissive-overlap -base-path tests/dbnsfp/ tests/dbnsfp/conf.toml tests/dbnsfp/Calls_for_dbNSFP_example.vcf.gz | grep -v ^# } run check_iref_alt irefalt @@ -112,10 +112,12 @@ assert_in_stdout "nsalt=A,G,T" assert_exit_code 0 irefalt_strict() { - vcfanno -base-path tests/dbnsfp/ tests/dbnsfp/conf.toml tests/dbnsfp/Calls_for_dbNSFP_example.vcf.gz | grep -v ^# + vcfanno -lua <(echo "") -base-path tests/dbnsfp/ tests/dbnsfp/conf.toml tests/dbnsfp/Calls_for_dbNSFP_example.vcf.gz | grep -v ^# } run check_iref_alt_strict irefalt_strict assert_in_stdout $'nsalt=T\t' +# check that ID was set. +assert_in_stdout $'\tReadPosRankSum;ORIGID\t' assert_exit_code 0 diff --git a/vcfanno.go b/vcfanno.go index e104bdf..91b6cc7 100644 --- a/vcfanno.go +++ b/vcfanno.go @@ -24,7 +24,7 @@ import ( "github.com/brentp/xopen" ) -var VERSION = "0.1.1" +var VERSION = "0.1.2-dev" func envGet(name string, vdefault int) int { sval := os.Getenv(name)