Skip to content

Commit

Permalink
add by_alt to address #68
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Jun 19, 2017
1 parent 6952a38 commit 7bda8bb
Show file tree
Hide file tree
Showing 12 changed files with 88 additions and 15 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ this case, the op determines how the many values are `reduced`. Valid operations
+ min // numbers only
+ sum // numbers only
+ uniq // comma-delimited list of uniq vlues
+ by_alt // comma-delimited by alt, pipe-delimited (|) for multiple annos for the same alt.

There are some operations that are only for `postannotation`:

Expand Down
49 changes: 41 additions & 8 deletions api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,24 @@ func allEqual(a, b []string) bool {
return true
}

// given the output from handleA and the alts:
// append new values to the appropriate alt.
// 22,33, A,G -> 22,33
// then XX, G -> 22,33|G
// then YY, A -> 22|YY,33|G
func byAlt(in []interface{}, qAlts []string, existing [][]string) [][]string {
if existing == nil {
existing = make([][]string, len(qAlts))
}
for i, v := range in {
if v == "." || v == "" || v == nil {
continue
}
existing[i] = append(existing[i], fmt.Sprintf("%v", v))
}
return existing
}

// handleA converts the `val` to the correct slice of vals to match what's isnt
// qAlts and oAlts. Then length of the returned value should always be equal
// to the len of qAlts.
Expand Down Expand Up @@ -257,6 +275,7 @@ func handleA(val interface{}, qAlts []string, oAlts []string, out []interface{})
func collect(v interfaces.IVariant, rels []interfaces.Relatable, src *Source, strict bool) ([]interface{}, error) {
coll := make([]interface{}, 0, len(rels))
var val interface{}
var valByAlt [][]string
var finalerr error
for _, other := range rels {
if int(other.Source())-1 != src.Index {
Expand Down Expand Up @@ -293,15 +312,17 @@ func collect(v interfaces.IVariant, rels []interfaces.Relatable, src *Source, st
continue
}
}
if src.Op == "by_alt" {
// with alt uses handleA machinery and then concats each value with then
// alternate allele.
out := make([]interface{}, len(v.Alt()))
handleA(val, v.Alt(), o.Alt(), out)
valByAlt = byAlt(out, v.Alt(), valByAlt)
continue
}

/*
if src.Field == "ID" || src.Field == "FILTER" {
coll = append(coll, val)
continue
}
*/
// special-case 'self' when the annotation has Number=A and either query or anno have multiple alts
// note that if len(rels) > 1, we could miss some since we return here. however, that shouldn't happen as we are matching on ref and alt and we wouldn't know what to do anyway.
// so that we get the alts matched up.
if src.NumberA && src.Op == "self" && src.Field != "ID" && src.Field != "FILTER" {
var out []interface{}
if len(coll) > 0 {
Expand Down Expand Up @@ -390,6 +411,15 @@ func collect(v interfaces.IVariant, rels []interfaces.Relatable, src *Source, st
coll = []interface{}{msg}
}
}
if valByAlt != nil {
for _, v := range valByAlt {
if len(v) == 0 {
coll = append(coll, ".")
} else {
coll = append(coll, strings.Join(v, "|"))
}
}
}
return coll, finalerr
}

Expand Down Expand Up @@ -460,7 +490,10 @@ func (s *Source) AnnotateOne(v interfaces.IVariant, vals []interface{}, prefix s
func (s *Source) UpdateHeader(r HeaderUpdater, ends bool, htype string, number string, desc string) {
ntype := "String"
// for 'self' and 'first', we can get the type from the header of the annotation file.
if htype != "" && (s.Op == "self" || s.Op == "first") {
if s.Op == "by_alt" {
number = "A"
ntype = htype
} else if htype != "" && (s.Op == "self" || s.Op == "first") {
ntype = htype
} else {
if strings.HasSuffix(s.Name, "_float") {
Expand Down
30 changes: 30 additions & 0 deletions api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,3 +348,33 @@ func TestHandlAMulti(t *testing.T) {
}

}

// given the output from handleA and the alts:
// append new values to the appropriate alt.
// 22,33, A,G -> 22,33
// then XX, G -> 22,33|G
// then YY, A -> 22|YY,33|G
// func byAlt(in []interface{}, qAlts []string, existing [][]string) [][]string {

var byAltTests = []struct {
in []interface{}
out [][]string
}{
{[]interface{}{"AAA", "."}, [][]string{[]string{"AAA"}, nil}},
{[]interface{}{"AAA", "BBB"}, [][]string{[]string{"AAA"}, []string{"BBB"}}},
{[]interface{}{".", "BBB"}, [][]string{nil, []string{"BBB"}}},
}

func TestByAlt(t *testing.T) {

qAlts := []string{"C", "T"}
for _, tt := range byAltTests {
var existing [][]string

existing = byAlt(tt.in, qAlts, existing)

if !reflect.DeepEqual(existing, tt.out) {
t.Fatalf("got %v. expected %v", existing, tt.out)
}
}
}
1 change: 1 addition & 0 deletions api/reducers.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,4 +272,5 @@ var Reducers = map[string]Reducer{
"div2": Reducer(div2),
"DP2": Reducer(dp2),
"setid": Reducer(setid),
"by_alt": Reducer(concat),
}
9 changes: 7 additions & 2 deletions docs/CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
v0.2.7 (dev)
------------
+ restore multiple threads per annotation file after fix in biogo/hts (#64)
+ add `setid` builtin to set the ID field and remove need for lua and fix some bugs.
+ new op `setid` builtin to set the ID field and remove need for lua and fix some bugs.
+ fix bug in `self` with multiple alternates when there were multiple overlaps (thanks Matthew).
+ fix for #68, #69
+ fix for #68, #69 -- with Number=A, op="self", the output will always have a number of elements
equal to the number of alternates. If there are > 1 annos for a given site, later values will
overwrite previous.
+ new `op` by_alt that will have Number=A and will append multiple annotations for the same alternate
(from the same file) and output them as pipe-delimited. e.g with 2 alts, it might look like: `0.111,0.222|0.333`
when the 1st alternate has a single value and the 2nd alternate has 2 values.

v0.2.6
------
Expand Down
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ this case, the op determines how the many values are `reduced`. Valid operations
+ min // numbers only
+ sum // numbers only
+ uniq // comma-delimited list of uniq vlues
+ by_alt // comma-delimited by alt, pipe-delimited (|) for multiple annos for the same alt.

There are some operations that are only for `postannotation`:

Expand Down
2 changes: 2 additions & 0 deletions tests/functional-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ multiallelics() {
}
run check_multiallelics multiallelics
assert_exit_code 0
assert_in_stdout "STR_by=hello,goodbye|goodbye-again"
assert_in_stdout "AF_by=0.000599042|0.99,0.00299521"
idtest() {
vcfanno -lua tests/id-test/some.lua tests/id-test/small.toml tests/id-test/small.vcf.gz
Expand Down
2 changes: 1 addition & 1 deletion tests/id-test/small.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
file="tests/id-test/dbsnp.small.vcf.gz"
fields=["ID", "CAF"]
names=["rs_ids", "CAF"]
ops=["self", "self"]
ops=["by_alt", "self"]

[[annotation]]
file="tests/id-test/cosmic.small.vcf.gz"
Expand Down
Binary file modified tests/multiple-alts/ma-db.vcf.gz
Binary file not shown.
Binary file modified tests/multiple-alts/ma-db.vcf.gz.tbi
Binary file not shown.
6 changes: 3 additions & 3 deletions tests/multiple-alts/ma.conf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[[annotation]]
file="tests/multiple-alts/ma-db.vcf.gz"
fields=["AF", "STR"]
names= ["AF_out", "STR_out"]
ops=["self", "self"]
fields=["AF", "STR", "AF", "STR"]
names= ["AF_out", "STR_out", "AF_by", "STR_by"]
ops=["self", "self", "by_alt", "by_alt"]
2 changes: 1 addition & 1 deletion vcfanno.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
"github.com/brentp/xopen"
)

var VERSION = "0.2.7-beta2"
var VERSION = "0.2.7-beta3"

func envGet(name string, vdefault int) int {
sval := os.Getenv(name)
Expand Down

0 comments on commit 7bda8bb

Please sign in to comment.