Skip to content

Commit

Permalink
better warnings and handling of multiple alts
Browse files Browse the repository at this point in the history
re #83 and #87

if there are already values in the query info field for a variant with
multiple alternates, incoming values will only overwrite existing values
if they are non-nil (or non-zero values of the type).

thanks @RoanKanninga for reporting and providing test-cases.

when Number=1 in the annotation file (and therefore the input file)
and there are multiple alternates in the input file, the values
can be out of order. This now issues a warning indicating the file
and the field in question and noting that it can be mitigated by
decomposing the input file.
  • Loading branch information
brentp committed Jun 5, 2018
1 parent f97f91d commit 272800d
Show file tree
Hide file tree
Showing 13 changed files with 95 additions and 5 deletions.
8 changes: 4 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ os:
- osx

go:
- 1.7
- 1.8
- 1.9
- 1.10
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x

before_install:
- go get github.com/axw/gocov/gocov
Expand Down
47 changes: 47 additions & 0 deletions api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ func collect(v interfaces.IVariant, rels []interfaces.Relatable, src *Source, st
// special-case 'self' when the annotation has Number=A and either query or anno have multiple alts
// so that we get the alts matched up.
if src.NumberA && src.Op == "self" && src.Field != "ID" && src.Field != "FILTER" {
//if (src.NumberA) && src.Op == "self" && src.Field != "ID" && src.Field != "FILTER" {
var out []interface{}
if len(coll) > 0 {
out = coll[0].([]interface{})
Expand Down Expand Up @@ -466,6 +467,17 @@ func (a *Annotator) AnnotateOne(r interfaces.Relatable, strict bool, end ...stri
return e
}

func v0len(vals []interface{}) int {
if len(vals) == 0 {
return -1
}
if v, ok := vals[0].([]interface{}); ok {
return len(v)
}
return -1

}

// AnnotateOne annotates a single variant with the vals
func (s *Source) AnnotateOne(v interfaces.IVariant, vals []interface{}, prefix string) {
if len(vals) == 0 {
Expand All @@ -481,6 +493,37 @@ func (s *Source) AnnotateOne(v interfaces.IVariant, vals []interface{}, prefix s
v.Info().Set(prefix+s.Name, luaval)
}
} else {
if len(vals) > 0 && s.Op == "self" && len(v.Alt()) > 1 && (len(vals) > 1 || v0len(vals) > 1) {
// multiple annotations writing to a mult-allelic. grab the current and replace any empty
// incoming vals with the current ones.
// there is a lot of BS here to check that the current info values and in the incoming
// vals are the same length.
current, _ := v.Info().Get(prefix + s.Name)
if current != nil && reflect.ValueOf(current).Kind() == reflect.Slice {
cv := reflect.ValueOf(current)
rv := reflect.ValueOf(vals)
if rv.Kind() == reflect.Slice {
if rv.Len() == 1 {
vals = rv.Index(0).Interface().([]interface{})
rv = reflect.ValueOf(vals)
}
if rv.Kind() == reflect.Slice && rv.Len() == cv.Len() {
for i, v := range vals {
if v == nil {
newv := cv.Index(i)
// don't set vals if we just got the zero value out of necessity.
// this kinda works around a bug in vcfgo where it returns the empty value
// for missing which, e.g. for float is 0.
if reflect.Zero(newv.Type()) != newv {
vals[i] = cv.Index(i).Interface()
}
}
}
}
}

}
}
val := Reducers[s.Op](vals)
v.Info().Set(prefix+s.Name, val)
}
Expand Down Expand Up @@ -752,6 +795,10 @@ func (a *Annotator) Setup(query HeaderUpdater) ([]interfaces.Queryable, error) {
for _, src := range fmap[file] {
num := q.GetHeaderNumber(src.Field)
// must set this to accurately represent multi-allelics.
if num == "1" && src.Op == "self" {
log.Printf("WARNING: using op 'self' when with Number='1' for '%s' from '%s' can result in out-of-order values when the query is multi-allelic", src.Field, src.File)
log.Printf(" : this is not an issue if the query has been decomposed.")
}
/*
if num == "1" && src.Op == "self" {
num = "A"
Expand Down
11 changes: 10 additions & 1 deletion tests/functional-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,15 @@ assert_exit_code 0
# there should be 0 non-header lines without 'max_maf' since we are annotating self.
assert_equal 0 $(grep -v max_maf $STDOUT_FILE | grep -cv ^#)
run check_overwrite_a vcfanno tests/overwrite-multiple-alts/a/conf.toml tests/overwrite-multiple-alts/a/input.vcf
assert_exit_code 0
assert_in_stderr "using op 'self' when with Number='1' for 'raw' from 'tests/overwrite-multiple-alts/a/whole.vcf.gz' can"
run check_overwrite_b vcfanno tests/overwrite-multiple-alts/b/conf.toml tests/overwrite-multiple-alts/b/input.vcf
assert_exit_code 0
assert_in_stdout "CADD_SCALED=1.9,0.6;CADD=-0.1,-0.3"
touch e.lua
run check_ends_overlap vcfanno -lua e.lua -base-path tests/citest/at/ -ends tests/citest/at/conf.toml tests/citest/at/test.vcf | grep -v ^#
assert_exit_code 0
Expand Down Expand Up @@ -139,7 +148,7 @@ vcfanno tests/astar/astar.conf tests/astar/astar.vcf
run check_astar astar
assert_exit_code 0
assert_in_stdout "ExAC_AF=0.021771,0"
assert_instdout "ExAC_AN=17546;ExAC_Hom=7,.;"
assert_in_stdout "ExAC_AN=17546;ExAC_Hom=7,.;"
multiallelics() {
Expand Down
5 changes: 5 additions & 0 deletions tests/overwrite-multiple-alts/a/conf.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[[annotation]]
file="tests/overwrite-multiple-alts/a/whole.vcf.gz"
fields=["raw"]
names=["CADD"]
ops=["self"]
9 changes: 9 additions & 0 deletions tests/overwrite-multiple-alts/a/input.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##fileformat=VCFv4.2
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##contig=<ID=1,length=249250621,assembly=b37>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2
1 208063100 rs5780411 G GA,T 2896 . AC=9,1 GT:AD:DP:GQ:PL 0/1:16,21,12:52:85:681,0,494,644,85,1010 0/2:33,0,14:49:95:95,184,663,0,479,441
Binary file added tests/overwrite-multiple-alts/a/whole.vcf.gz
Binary file not shown.
Binary file added tests/overwrite-multiple-alts/a/whole.vcf.gz.tbi
Binary file not shown.
Binary file added tests/overwrite-multiple-alts/b/cadd_indels.vcf.gz
Binary file not shown.
Binary file not shown.
11 changes: 11 additions & 0 deletions tests/overwrite-multiple-alts/b/conf.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[[annotation]]
file="tests/overwrite-multiple-alts/b/cadd_indels.vcf.gz"
fields=["phred", "raw"]
names=["CADD_SCALED","CADD"]
ops=["self","self"]

[[annotation]]
file="tests/overwrite-multiple-alts/b/whole_genome_snv_slice.vcf.gz"
fields=["phred", "raw"]
names=["CADD_SCALED","CADD"]
ops=["self","self"]
9 changes: 9 additions & 0 deletions tests/overwrite-multiple-alts/b/input.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##fileformat=VCFv4.2
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##contig=<ID=1,length=249250621,assembly=b37>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2
1 208063100 rs5780411 G GA,T 2896 . AC=9,1 GT:AD:DP:GQ:PL 0/1:16,21,12:52:85:681,0,494,644,85,1010 0/2:33,0,14:49:95:95,184,663,0,479,441
Binary file not shown.
Binary file not shown.

0 comments on commit 272800d

Please sign in to comment.