Skip to content

Commit

Permalink
Abolish unfilled likelihoods and revamp VariantAnnotator (#6172)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin authored Nov 25, 2019
1 parent b922fef commit 47045a3
Show file tree
Hide file tree
Showing 98 changed files with 426 additions and 4,430 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* is the standard long Option name, and the value of the constant is the standard shortName.
*/
public final class StandardArgumentDefinitions {

private StandardArgumentDefinitions(){}

public static final String INPUT_LONG_NAME = "input";
Expand All @@ -13,6 +14,8 @@ private StandardArgumentDefinitions(){}
public static final String VARIANT_LONG_NAME = "variant";
public static final String FEATURE_LONG_NAME = "feature";
public static final String INTERVALS_LONG_NAME = "intervals";
public static final String COMPARISON_LONG_NAME = "comparison";
public static final String RESOURCE_LONG_NAME = "resource";
public static final String READ_INDEX_LONG_NAME = "read-index";
public static final String USE_ORIGINAL_QUALITIES_LONG_NAME = "use-original-qualities";
public static final String LENIENT_LONG_NAME = "lenient";
Expand Down Expand Up @@ -49,6 +52,7 @@ private StandardArgumentDefinitions(){}
public static final String VARIANT_SHORT_NAME = "V";
public static final String FEATURE_SHORT_NAME = "F";
public static final String INTERVALS_SHORT_NAME = "L";
public static final String COMPARISON_SHORT_NAME = "comp";
public static final String READ_INDEX_SHORT_NAME = READ_INDEX_LONG_NAME;
public static final String LENIENT_SHORT_NAME = "LE";
public static final String READ_VALIDATION_STRINGENCY_SHORT_NAME = "VS";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@

public final class DbsnpArgumentCollection implements Serializable {
private static final long serialVersionUID = 1L;
public static final String DBSNP_LONG_NAME = "dbsnp";
public static final String DBSNP_SHORT_NAME = "D";

/**
* A dbSNP VCF file.
*/
@Argument(fullName="dbsnp", shortName = "D", doc="dbSNP file", optional=true)
@Argument(fullName= DBSNP_LONG_NAME, shortName = DBSNP_SHORT_NAME, doc="dbSNP file", optional=true)
public FeatureInput<VariantContext> dbsnp;

}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package org.broadinstitute.hellbender.engine.spark.datasources;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.util.FileExtensions;
import org.broadinstitute.hellbender.utils.SerializableFunction;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.FileExtensions;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.SerializableFunction;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;

import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -40,12 +37,9 @@ protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc

public static OptionalDouble getReadBaseQuality(final GATKRead read, final int refLoc) {
Utils.nonNull(read);
return OptionalDouble.of(read.getBaseQuality(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, refLoc, ReadUtils.ClippingTail.RIGHT_TAIL)));
}

@Override
// When we have a pileupe element we only need its underlying read in order to com
protected OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc) {
return OptionalDouble.of(p.getQual());
final int readCoordinate = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true);
return readCoordinate == ReadUtils.CLIPPING_GOAL_NOT_REACHED ? OptionalDouble.empty() : OptionalDouble.of(read.getBaseQuality(readCoordinate));
}

}
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;

import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -41,10 +38,4 @@ protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc
return OptionalDouble.of(AlignmentUtils.getNumHardClippedBases(read));
}

@Override
protected OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc) {
Utils.nonNull(p);
// default to returning the same value
return getElementForRead(p.getRead(),refLoc);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,42 +56,7 @@ public void annotate(final ReferenceContext ref,
// make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a subset of AlleleLikelihoods alleles " + likelihoods.alleles());

int[] counts;
if (likelihoods.hasFilledLikelihoods()) {
// Compute based on the alignment map
counts = annotateWithLikelihoods(vc, g, alleles, likelihoods);
} else if (likelihoods.evidenceCount()==0) {
// No reads, thus cant compute the AD so do nothing
return;
} else if (vc.isSNP()) {
// Compute based on pileup bases at the snp site (won't match haplotype caller output)
counts = annotateWithPileup(vc, likelihoods.getStratifiedPileups(vc).get(g.getSampleName()));
} else {
// Otherwise return an empty AD field for an indel.
counts = new int[vc.getNAlleles()];
}

gb.AD(counts);
}

private int[] annotateWithPileup(final VariantContext vc, List<PileupElement> pileupElements) {

final HashMap<Byte, Integer> alleleCounts = new HashMap<>();
for ( final Allele allele : vc.getAlleles() ) {
alleleCounts.put(allele.getBases()[0], 0);
}
for ( final PileupElement p : pileupElements) {
// only count bases that support alleles in the variant context
alleleCounts.computeIfPresent(p.getBase(), (base, count) -> count+ 1);
}

// we need to add counts in the correct order
final int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
for (int i = 0; i < vc.getNAlleles() -1; i++) {
counts[i + 1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
}
return counts;
gb.AD(annotateWithLikelihoods(vc, g, alleles, likelihoods));
}

protected int[] annotateWithLikelihoods(VariantContext vc, Genotype g, Set<Allele> alleles, final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

Expand Down Expand Up @@ -60,14 +59,6 @@ protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesCont
return ( tableFromPerSampleAnnotations != null )? annotationForOneTable(pValueForContingencyTable(tableFromPerSampleAnnotations)) : null;
}

@Override
protected Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, List<PileupElement>> stratifiedContexts,
final VariantContext vc){
final int[][] tableNoFiltering = getPileupContingencyTable(stratifiedContexts, vc.getReference(), vc.getAlternateAlleles(), -1, MIN_COUNT);
final int[][] tableFiltering = getPileupContingencyTable(stratifiedContexts, vc.getReference(), vc.getAlternateAlleles(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
return annotationForOneTable(Math.max(pValueForContingencyTable(tableFiltering), pValueForContingencyTable(tableNoFiltering)));
}

@Override
protected Map<String, Object> calculateAnnotationFromLikelihoods(final AlleleLikelihoods<GATKRead, Allele> likelihoods,
final VariantContext vc){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

Expand Down Expand Up @@ -47,9 +46,4 @@ protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc
return OptionalDouble.empty();
}

@Override
protected OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc) {
// todo its possible this should throw, as This method should never have been called as getElementForRead(read,refloc,mostLikelyAllele) was overriden
return OptionalDouble.empty();
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;

import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -44,8 +41,4 @@ protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc
return OptionalDouble.of(read.getMappingQuality());
}

protected OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc) {
// default to returning the same value
return OptionalDouble.of(p.getMappingQual());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,7 @@ public Map<String, Object> annotate(final ReferenceContext ref,
final int refLoc = vc.getStart();

if( likelihoods != null) {
if (likelihoods.hasFilledLikelihoods()) {
// Default to using the likelihoods to calculate the rank sum
fillQualsFromLikelihood(vc, likelihoods, refQuals, altQuals, refLoc);

// Use the pileup to stratify otherwise
} else {
for (final PileupElement p : likelihoods.getStratifiedPileups(vc).values().stream().flatMap(Collection::stream).collect(Collectors.toList())) {
if (PileupElement.isUsableBaseForAnnotation(p)) {
final OptionalDouble value = getElementForPileupElement(p, refLoc);
if (value.isPresent() && value.getAsDouble() != INVALID_ELEMENT_FROM_READ) {
if (vc.getReference().equals(Allele.create(p.getBase(), true))) {
refQuals.add(value.getAsDouble());
} else if (vc.hasAllele(Allele.create(p.getBase()))) {
altQuals.add(value.getAsDouble());
}
}
}
}
}
fillQualsFromLikelihood(vc, likelihoods, refQuals, altQuals, refLoc);
}


Expand Down Expand Up @@ -129,14 +111,4 @@ protected boolean isUsableRead(final GATKRead read, final int refLoc) {
return read.getMappingQuality() != 0 && read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE;
}

/**
* Get the element for the given read at the given reference position
*
* This can return an OptionalDouble.empty() if the annotation should not be computed based on the pileups.
*
* @param p the pileup element
* @return a Double representing the element to be used in the rank sum test, or null if it should not be used
*/
protected abstract OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc);

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
Expand Down Expand Up @@ -81,12 +80,6 @@ public static OptionalDouble getReadPosition(final GATKRead read, final int refL
return OptionalDouble.of(readPos);
}

@Override
protected OptionalDouble getElementForPileupElement(final PileupElement p, int refLoc) {
final int offset = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
return OptionalDouble.of(getFinalVariantReadPosition(p.getRead(), offset));
}

// Utility methods necessary for computing the rank sum using read pileups.
// TODO these methods contain bugs ported from GATK3, see issue https://github.com/broadinstitute/gatk/issues/4450 for progress towards fixing them

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,13 @@ public Map<String, Object> annotate(final ReferenceContext ref,
}

if (likelihoods != null) {
if (vc.isSNP() && !likelihoods.hasFilledLikelihoods() && (likelihoods.evidenceCount() != 0)) {
return calculateAnnotationFromStratifiedContexts(likelihoods.getStratifiedPileups(vc), vc);
}

if (likelihoods.hasFilledLikelihoods()) {
return calculateAnnotationFromLikelihoods(likelihoods, vc);
}
return calculateAnnotationFromLikelihoods(likelihoods, vc);
}
return Collections.emptyMap();
}

protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes);

protected abstract Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, List<PileupElement>> stratifiedContexts,
final VariantContext vc);

protected abstract Map<String, Object> calculateAnnotationFromLikelihoods(final AlleleLikelihoods<GATKRead, Allele> likelihoods,
final VariantContext vc);

Expand Down Expand Up @@ -249,34 +240,5 @@ public static int[][] decodeSBBS( final int[] array ) {
return table;
}

/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
* allele1 # #
* allele2 # #
* @return a 2x2 contingency table over SNP sites
*/
protected static int[][] getPileupContingencyTable(final Map<String, List<PileupElement>> stratifiedContexts,
final Allele ref,
final List<Allele> allAlts,
final int minQScoreToConsider,
final int minCount ) {
int[][] table = new int[ARRAY_DIM][ARRAY_DIM];

for (final Map.Entry<String, List<PileupElement>> sample : stratifiedContexts.entrySet() ) {
final int[] myTable = new int[ARRAY_SIZE];
for (final PileupElement p : sample.getValue()) {
if (PileupElement.isUsableBaseForAnnotation(p) && Math.min(p.getQual(), p.getMappingQual()) >= minQScoreToConsider) {
updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, allAlts);
}
}

if ( passesMinimumThreshold( myTable, minCount ) ) {
copyToMainTable(myTable, table);
}
}
return table;
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

Expand Down Expand Up @@ -130,14 +129,6 @@ protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesCont
return tableFromPerSampleAnnotations != null ? annotationForOneTable(calculateSOR(tableFromPerSampleAnnotations)) : null;
}

@Override
protected Map<String, Object> calculateAnnotationFromStratifiedContexts(Map<String, List<PileupElement>> stratifiedContexts,
final VariantContext vc){
final int[][] tableNoFiltering = getPileupContingencyTable(stratifiedContexts, vc.getReference(), vc.getAlternateAlleles(), -1, MIN_COUNT);
final double ratio = calculateSOR(tableNoFiltering);
return annotationForOneTable(ratio);
}


@Override
protected Map<String, Object> calculateAnnotationFromLikelihoods(final AlleleLikelihoods<GATKRead, Allele> likelihoods, final VariantContext vc){
Expand Down
Loading

0 comments on commit 47045a3

Please sign in to comment.