Skip to content

Commit

Permalink
more lenient handling of CSQ fields. See #122
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Feb 25, 2022
1 parent 6f116df commit 3fc3bb5
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ v0.2.8
======
+ [internal] use same zip library for make-gnotate as gnotate. this should
improve speed for reading many small chromosomes.
+ don't quit on CSQ/ANN/BCSQ fields that don't have enough information to parse (#122)

v0.2.7
======
Expand Down
4 changes: 2 additions & 2 deletions src/slivarpkg/comphet.nim
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ proc main*(dropfirst:bool=false) =
if f == "": continue
try:
var gf:GeneIndexes
ivcf.set_csq_fields(f, gf)
gene_fields.add(gf)
if ivcf.set_csq_fields(f, gf).len > 0:
gene_fields.add(gf)
# add this to the field names so we can clear it as needed
except KeyError:
continue
Expand Down
5 changes: 3 additions & 2 deletions src/slivarpkg/evaluator.nim
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,9 @@ proc newEvaluator*(ivcf:VCF, samples: seq[Sample], groups: seq[Group], float_exp
try:
var gf:GeneIndexes
var fields = ivcf.set_csq_fields(f, gf)
result.gene_fields.add(gf)
result.VCF[f] = fields
if fields.len > 0:
result.gene_fields.add(gf)
result.VCF[f] = fields
# add this to the field names so we can clear it as needed
except KeyError:
continue
Expand Down
14 changes: 10 additions & 4 deletions src/slivarpkg/tsv.nim
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,12 @@ proc set_csq_fields*(ivcf:VCF, field:string, gene_fields: var GeneIndexes, csq_c
var spl = (if "Format: '" in desc: "Format: '" else: "Format: ")
if spl notin desc:
spl = ": '"
var adesc = desc.split(spl)[1].split("'")[0].strip().strip(chars={'"', '\''}).multiReplace(("[", ""), ("]", ""), ("'", ""), ("*", "")).split("|")
var adesc:seq[string]
try:
adesc = desc.split(spl)[1].split("'")[0].strip().strip(chars={'"', '\''}).multiReplace(("[", ""), ("]", ""), ("'", ""), ("*", "")).split("|")
except IndexDefect:
# format field description not as expected. return emptyr result and don't fill gene fields
return result

for v in adesc.mitems: v = v.toUpperAscii.strip()
result = adesc
Expand All @@ -132,7 +137,8 @@ proc set_csq_fields*(ivcf:VCF, field:string, gene_fields: var GeneIndexes, csq_c
if gene_fields.transcript != -1: break

if gene_fields.gene == -1:
quit &"[slivar] unable to find gene field in {field}"
stderr.write_line &"[slivar] warning: found {field} but it did not contain a description that indicated gene field. skipping"
raise newException(KeyError, "&[slivar] gene field not found")
if gene_fields.consequence == -1:
quit &"[slivar] unable to find consequence field in {field}"
if gene_fields.transcript == -1:
Expand Down Expand Up @@ -268,8 +274,8 @@ or gene->pLI with:
impact_order = adjustOrder(opts.impact_order.readFile)

if opts.csq_field != "":
set_csq_fields(ivcf, opts.csq_field, gene_fields, opts.csq_column)
tsv_header.add(["gene", "highest_impact"])
if set_csq_fields(ivcf, opts.csq_field, gene_fields, opts.csq_column).len > 0:
tsv_header.add(["gene", "highest_impact"])

for f in opts.sample_field:
doAssert ivcf.header.get(f, BCF_HEADER_TYPE.BCF_HL_INFO)["Type"] == "String"
Expand Down
2 changes: 1 addition & 1 deletion src/slivarpkg/version.nim
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
const slivarVersion* = "0.2.7"
const slivarVersion* = "0.2.8"
const slivarGitCommit* = staticExec("git rev-parse --verify HEAD")

0 comments on commit 3fc3bb5

Please sign in to comment.