Skip to content

Commit

Permalink
do not sort VCF samples alphabetically
Browse files Browse the repository at this point in the history
  • Loading branch information
jonbaber committed Mar 28, 2019
1 parent 7052b8a commit 4bfc93d
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@

<amber.version>2.2</amber.version>
<cobalt.version>1.5</cobalt.version>
<purple.version>2.22</purple.version>
<purple.version>2.23</purple.version>

<immutables.version>2.4.4</immutables.version>
<htsjdk.version>2.12.0</htsjdk.version>
Expand Down
6 changes: 4 additions & 2 deletions purity-ploidy-estimator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ Argument | Default | Description
run_dir | None | If provided, parameters `amber`, `cobalt` and `output_dir` are no longer mandatory and will have default values of <run_dir>/amber, <run_dir>/cobalt and <run_dir>/purple respectively unless overridden.
threads | 2 | Number of threads to use.
somatic_vcf | None | Optional location of somatic variants vcf. Sample name must match tumor parameter. GZ files supported.
structural_vcf | None | Optional location of structural variants vcf. Sample name must match tumor parameter. GZ files supported.
sv_recovery_vcf | None | Optional location of structural variants recovery vcf. Sample name must match tumor parameter. GZ files supported.
structural_vcf | None | Optional location of structural variants vcf. GZ files supported.
sv_recovery_vcf | None | Optional location of structural variants recovery vcf. GZ files supported.
circos | None | Optional path to circos binary. When supplied, circos graphs will be written to <output_dir>/plot
db_enabled | None | This parameter has no arguments. Optionally include if you wish to persist results to a database. Database initialization script can be found [here](https://github.com/hartwigmedical/hmftools/blob/master/patient-db/src/main/resources/generate_database.sql).
db_user | None | Database username. Mandatory if db_enabled.
Expand Down Expand Up @@ -502,6 +502,8 @@ Threads | Elapsed Time| CPU Time | Peak Mem


## Version History
- 2.23
- Fixed bug where SV VCF samples were being sorted into alphabetical order. Now they will be in same order as input VCF.
- 2.22
- Added new tool to annotate SNPs and INDELS in strelka output with AD field. Will **not** override existing AD values.
- Example Usage: `java -Xmx4G -cp purple.jar com.hartwig.hmftools.purple.tools.AnnotateStrelkaWithAllelicDepth -in strelka.vcf -out strelka.annotated.vcf`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.util.TreeSet;
import java.util.function.Predicate;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists;
import com.hartwig.hmftools.common.chromosome.Chromosome;
Expand Down Expand Up @@ -82,7 +83,6 @@ class PurpleStructuralVariantSupplier {
private static final Allele INCREASING_ALLELE = Allele.create(".N", false);
private static final Allele DECREASING_ALLELE = Allele.create("N.", false);

private final String purpleVersion;
private final String outputVCF;
private final Optional<VCFHeader> header;
private final TreeSet<VariantContext> variantContexts;
Expand All @@ -95,14 +95,12 @@ class PurpleStructuralVariantSupplier {
header = Optional.empty();
variantContexts = new TreeSet<>();
outputVCF = Strings.EMPTY;
purpleVersion = "0";
}

PurpleStructuralVariantSupplier(@NotNull final String version, @NotNull final String templateVCF, @NotNull final String outputVCF) {
final VCFFileReader vcfReader = new VCFFileReader(new File(templateVCF), false);
this.outputVCF = outputVCF;
purpleVersion = version;
header = Optional.of(generateOutputHeader(vcfReader.getFileHeader()));
header = Optional.of(generateOutputHeader(version, vcfReader.getFileHeader()));
variantContexts = new TreeSet<>(new VCComparator(header.get().getSequenceDictionary()));
for (VariantContext context : vcfReader) {
variantContexts.add(context);
Expand Down Expand Up @@ -294,8 +292,9 @@ public List<StructuralVariant> variants() {
}

@NotNull
private VCFHeader generateOutputHeader(@NotNull final VCFHeader template) {
final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getSampleNamesInOrder());
@VisibleForTesting
static VCFHeader generateOutputHeader(@NotNull final String purpleVersion, @NotNull final VCFHeader template) {
final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples());
outputVCFHeader.addMetaDataLine(new VCFHeaderLine("purpleVersion", purpleVersion));

outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT"));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.hartwig.hmftools.purple;

import static org.junit.Assert.assertEquals;

import java.util.Collections;

import com.google.common.collect.Lists;

import org.junit.Test;

import htsjdk.variant.vcf.VCFHeader;

public class PurpleStructuralVariantSupplierTest {

@Test
public void testHeaderSamplesAreNotSorted() {
final VCFHeader outOfOrderHeader = new VCFHeader(Collections.emptySet(), Lists.newArrayList("BBBBB", "AAAAA"));
final VCFHeader victim = PurpleStructuralVariantSupplier.generateOutputHeader("2.23", outOfOrderHeader);
assertEquals(2, victim.getGenotypeSamples().size());
assertEquals("BBBBB", victim.getGenotypeSamples().get(0));
assertEquals("AAAAA", victim.getGenotypeSamples().get(1));
}

}

0 comments on commit 4bfc93d

Please sign in to comment.