Skip to content

Commit 6596ea8

Browse files
authored
VariantsToTable: Include all fields when none are specified (#7911)
VariantsToTable now outputs all fields declared in the VCF header when no fields are selected. Added integration tests to cover this new functionality Fixes #7677
1 parent c40187a commit 6596ea8

File tree

7 files changed

+373
-6
lines changed

7 files changed

+373
-6
lines changed

src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java

+41-6
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,14 @@
22

33
import htsjdk.variant.variantcontext.Allele;
44
import htsjdk.variant.variantcontext.VariantContext;
5-
import htsjdk.variant.vcf.VCFConstants;
6-
import htsjdk.variant.vcf.VCFHeader;
7-
import htsjdk.variant.vcf.VCFHeaderLineCount;
5+
import htsjdk.variant.vcf.*;
86
import org.apache.logging.log4j.LogManager;
97
import org.apache.logging.log4j.Logger;
108
import org.broadinstitute.barclay.argparser.Advanced;
119
import org.broadinstitute.barclay.argparser.Argument;
1210
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
1311
import org.broadinstitute.barclay.help.DocumentedFeature;
1412
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
15-
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
1613
import picard.cmdline.programgroups.VariantEvaluationProgramGroup;
1714
import org.broadinstitute.hellbender.engine.FeatureContext;
1815
import org.broadinstitute.hellbender.engine.ReadsContext;
@@ -38,7 +35,8 @@
3835
* This tool extracts specified fields for each variant in a VCF file to a tab-delimited table, which may be easier
3936
* to work with than a VCF. By default, the tool only extracts PASS or . (unfiltered) variants in the VCF file. Filtered variants may be
4037
* included in the output by adding the --show-filtered flag. The tool can extract both INFO (i.e. site-level) fields and
41-
* FORMAT (i.e. sample-level) fields.
38+
* FORMAT (i.e. sample-level) fields. If the tool is run without specifying any fields, it defaults to include all fields
39+
* declared in the VCF header.
4240
* </p>
4341
*
4442
* <h4>INFO/site-level fields</h4>
@@ -100,6 +98,12 @@
10098
* 1 65068538 SNP 49,0 35,4
10199
* 1 111146235 SNP 69,1 77,4
102100
* </pre>
101+
* <pre>
102+
* gatk VariantsToTable \
103+
* -V input.vcf \
104+
* -O output.table
105+
* </pre>
106+
* <p>would produce a file that includes all fields declared in the VCF header.</p>
103107
*
104108
* <h3>Notes</h3>
105109
* <ul>
@@ -212,9 +216,39 @@ public void onTraversalStart() {
212216
inputHeader = getHeaderForVariants();
213217
outputStream = createPrintStream();
214218

219+
// if no fields specified, default to include all fields listed in header into table
220+
if(fieldsToTake.isEmpty() && genotypeFieldsToTake.isEmpty() && asFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()){
221+
logger.warn("No fields were specified. All fields declared in the VCF header will be included in the output table.");
222+
223+
// add all mandatory VCF fields (except INFO)
224+
for(VCFHeader.HEADER_FIELDS headerField : VCFHeader.HEADER_FIELDS.values()){
225+
if(!headerField.name().equals(VCFHeader.HEADER_FIELDS.INFO.name())) {
226+
fieldsToTake.add(headerField.name());
227+
}
228+
}
229+
230+
// add all INFO fields present in VCF header
231+
for (final VCFInfoHeaderLine infoLine : inputHeader.getInfoHeaderLines()) {
232+
fieldsToTake.add(infoLine.getID());
233+
}
234+
235+
// add all FORMAT fields present in VCF header
236+
for (final VCFFormatHeaderLine formatLine : inputHeader.getFormatHeaderLines()) {
237+
// ensure GT field listed as first FORMAT field
238+
if(formatLine.getID().equals(VCFConstants.GENOTYPE_KEY)) {
239+
genotypeFieldsToTake.add(0, formatLine.getID());
240+
}
241+
else {
242+
genotypeFieldsToTake.add(formatLine.getID());
243+
}
244+
}
245+
}
246+
247+
// if fields specified, but none are genotype fields, set samples to empty
215248
if (genotypeFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()) {
216249
samples = Collections.emptySortedSet();
217-
} else {
250+
}
251+
else {
218252
final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
219253
samples = VcfUtils.getSortedSampleSet(vcfHeaders, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
220254

@@ -238,6 +272,7 @@ public void onTraversalStart() {
238272
outputStream.println("RecordID\tSample\tVariable\tValue");
239273
} else {
240274
final List<String> fields = new ArrayList<>();
275+
241276
fields.addAll(fieldsToTake);
242277
fields.addAll(asFieldsToTake);
243278
fields.addAll(createGenotypeFields());

src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java

+41
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
66
import org.testng.annotations.Test;
77

8+
import java.io.File;
89
import java.io.IOException;
910
import java.util.Arrays;
1011

@@ -236,4 +237,44 @@ public void testMoltenOutputWithMultipleAlleles() throws IOException {
236237
spec.setTrimWhiteSpace(false);
237238
spec.executeTest("testMoltenOutputWithMultipleAlleles", this);
238239
}
240+
241+
@Test
242+
public void testNoFieldsSpecifiedNoSamples() throws IOException {
243+
final File inputFile = new File(getToolTestDataDir(), "VCFWithoutGenotypes_dbsnp_138.snippet.vcf");
244+
final File outputFile = createTempFile("noFieldsSpecifiedOutput", ".table");
245+
final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedNoSamples.table");
246+
247+
final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(),
248+
"-O", outputFile.getAbsolutePath()};
249+
runCommandLine(args);
250+
251+
IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile);
252+
}
253+
254+
@Test
255+
public void testNoFieldsSpecifiedWithSamples() throws IOException {
256+
final File inputFile = new File(getToolTestDataDir(), "VCFWithGenotypes_1000G.phase3.snippet.vcf");
257+
final File outputFile = createTempFile("noFieldsSpecifiedWithSamplesOutput", ".table");
258+
final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedWithSamples.table");
259+
260+
final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(),
261+
"-O", outputFile.getAbsolutePath()};
262+
runCommandLine(args);
263+
264+
IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile);
265+
}
266+
267+
@Test
268+
public void testNoFieldsSpecifiedFormatFieldInHeaderNoSamples() throws IOException {
269+
final File inputFile = new File(getToolTestDataDir(), "VCFWithoutGenotypesWithFormatField_dbsnp_138.snippet.vcf");
270+
final File outputFile = createTempFile("noFieldsSpecifiedNoSamplesOutput", ".table");
271+
final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedNoSamples.table");
272+
273+
final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(),
274+
"-O", outputFile.getAbsolutePath()};
275+
runCommandLine(args);
276+
277+
IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile);
278+
}
279+
239280
}

0 commit comments

Comments
 (0)