2
2
3
3
import htsjdk .variant .variantcontext .Allele ;
4
4
import htsjdk .variant .variantcontext .VariantContext ;
5
- import htsjdk .variant .vcf .VCFConstants ;
6
- import htsjdk .variant .vcf .VCFHeader ;
7
- import htsjdk .variant .vcf .VCFHeaderLineCount ;
5
+ import htsjdk .variant .vcf .*;
8
6
import org .apache .logging .log4j .LogManager ;
9
7
import org .apache .logging .log4j .Logger ;
10
8
import org .broadinstitute .barclay .argparser .Advanced ;
11
9
import org .broadinstitute .barclay .argparser .Argument ;
12
10
import org .broadinstitute .barclay .argparser .CommandLineProgramProperties ;
13
11
import org .broadinstitute .barclay .help .DocumentedFeature ;
14
12
import org .broadinstitute .hellbender .cmdline .StandardArgumentDefinitions ;
15
- import org .broadinstitute .hellbender .utils .variant .GATKVCFConstants ;
16
13
import picard .cmdline .programgroups .VariantEvaluationProgramGroup ;
17
14
import org .broadinstitute .hellbender .engine .FeatureContext ;
18
15
import org .broadinstitute .hellbender .engine .ReadsContext ;
38
35
* This tool extracts specified fields for each variant in a VCF file to a tab-delimited table, which may be easier
39
36
* to work with than a VCF. By default, the tool only extracts PASS or . (unfiltered) variants in the VCF file. Filtered variants may be
40
37
* included in the output by adding the --show-filtered flag. The tool can extract both INFO (i.e. site-level) fields and
41
- * FORMAT (i.e. sample-level) fields.
38
+ * FORMAT (i.e. sample-level) fields. If the tool is run without specifying any fields, it defaults to include all fields
39
+ * declared in the VCF header.
42
40
* </p>
43
41
*
44
42
* <h4>INFO/site-level fields</h4>
100
98
* 1 65068538 SNP 49,0 35,4
101
99
* 1 111146235 SNP 69,1 77,4
102
100
* </pre>
101
+ * <pre>
102
+ * gatk VariantsToTable \
103
+ * -V input.vcf \
104
+ * -O output.table
105
+ * </pre>
106
+ * <p>would produce a file that includes all fields declared in the VCF header.</p>
103
107
*
104
108
* <h3>Notes</h3>
105
109
* <ul>
@@ -212,9 +216,39 @@ public void onTraversalStart() {
212
216
inputHeader = getHeaderForVariants ();
213
217
outputStream = createPrintStream ();
214
218
219
+ // if no fields specified, default to include all fields listed in header into table
220
+ if (fieldsToTake .isEmpty () && genotypeFieldsToTake .isEmpty () && asFieldsToTake .isEmpty () && asGenotypeFieldsToTake .isEmpty ()){
221
+ logger .warn ("No fields were specified. All fields declared in the VCF header will be included in the output table." );
222
+
223
+ // add all mandatory VCF fields (except INFO)
224
+ for (VCFHeader .HEADER_FIELDS headerField : VCFHeader .HEADER_FIELDS .values ()){
225
+ if (!headerField .name ().equals (VCFHeader .HEADER_FIELDS .INFO .name ())) {
226
+ fieldsToTake .add (headerField .name ());
227
+ }
228
+ }
229
+
230
+ // add all INFO fields present in VCF header
231
+ for (final VCFInfoHeaderLine infoLine : inputHeader .getInfoHeaderLines ()) {
232
+ fieldsToTake .add (infoLine .getID ());
233
+ }
234
+
235
+ // add all FORMAT fields present in VCF header
236
+ for (final VCFFormatHeaderLine formatLine : inputHeader .getFormatHeaderLines ()) {
237
+ // ensure GT field listed as first FORMAT field
238
+ if (formatLine .getID ().equals (VCFConstants .GENOTYPE_KEY )) {
239
+ genotypeFieldsToTake .add (0 , formatLine .getID ());
240
+ }
241
+ else {
242
+ genotypeFieldsToTake .add (formatLine .getID ());
243
+ }
244
+ }
245
+ }
246
+
247
+ // if fields specified, but none are genotype fields, set samples to empty
215
248
if (genotypeFieldsToTake .isEmpty () && asGenotypeFieldsToTake .isEmpty ()) {
216
249
samples = Collections .emptySortedSet ();
217
- } else {
250
+ }
251
+ else {
218
252
final Map <String , VCFHeader > vcfHeaders = Collections .singletonMap (getDrivingVariantsFeatureInput ().getName (), getHeaderForVariants ());
219
253
samples = VcfUtils .getSortedSampleSet (vcfHeaders , GATKVariantContextUtils .GenotypeMergeType .REQUIRE_UNIQUE );
220
254
@@ -238,6 +272,7 @@ public void onTraversalStart() {
238
272
outputStream .println ("RecordID\t Sample\t Variable\t Value" );
239
273
} else {
240
274
final List <String > fields = new ArrayList <>();
275
+
241
276
fields .addAll (fieldsToTake );
242
277
fields .addAll (asFieldsToTake );
243
278
fields .addAll (createGenotypeFields ());
0 commit comments