Skip to content

Commit 3040848

Browse files
committed
Add option to keep combined AS annotations, as requested in #5698
1 parent 771ea2e commit 3040848

15 files changed

+55
-54
lines changed

src/main/java/org/broadinstitute/hellbender/tools/HaplotypeCallerSpark.java

+3-11
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,15 @@
1616
import org.broadinstitute.barclay.argparser.ArgumentCollection;
1717
import org.broadinstitute.barclay.argparser.BetaFeature;
1818
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
19-
import org.broadinstitute.barclay.argparser.*;
2019
import org.broadinstitute.barclay.help.DocumentedFeature;
2120
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
2221
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
2322
import org.broadinstitute.hellbender.engine.AssemblyRegion;
2423
import org.broadinstitute.hellbender.engine.AssemblyRegionEvaluator;
2524
import org.broadinstitute.hellbender.engine.FeatureContext;
2625
import org.broadinstitute.hellbender.engine.ShardBoundary;
27-
import org.broadinstitute.hellbender.engine.*;
2826
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
2927
import org.broadinstitute.hellbender.engine.spark.*;
30-
import org.broadinstitute.hellbender.engine.spark.GATKSparkTool;
31-
import org.broadinstitute.hellbender.engine.spark.SparkSharder;
3228
import org.broadinstitute.hellbender.engine.spark.datasources.VariantsSparkSink;
3329
import org.broadinstitute.hellbender.exceptions.UserException;
3430
import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation;
@@ -41,7 +37,6 @@
4137
import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile;
4238
import org.broadinstitute.hellbender.utils.io.IOUtils;
4339
import org.broadinstitute.hellbender.utils.read.GATKRead;
44-
import scala.Tuple2;
4540

4641
import java.io.IOException;
4742
import java.nio.file.Path;
@@ -51,9 +46,6 @@
5146
import java.util.HashSet;
5247
import java.util.Iterator;
5348
import java.util.List;
54-
import java.util.function.Function;
55-
import java.util.stream.Collectors;
56-
import java.util.stream.Stream;
5749

5850
/**
5951
* ********************************************************************************
@@ -181,7 +173,7 @@ private static void processAssemblyRegions(
181173
final Logger logger,
182174
final boolean createOutputVariantIndex) {
183175

184-
final VariantAnnotatorEngine variantannotatorEngine = new VariantAnnotatorEngine(annotations, hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE);
176+
final VariantAnnotatorEngine variantannotatorEngine = new VariantAnnotatorEngine(annotations, hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE, false);
185177

186178
final Path referencePath = IOUtils.getPath(reference);
187179
final ReferenceSequenceFile driverReferenceSequenceFile = new CachingIndexedFastaSequenceFile(referencePath);
@@ -237,7 +229,7 @@ protected Broadcast<Supplier<AssemblyRegionEvaluator>> assemblyRegionEvaluatorSu
237229
final String pathOnExecutor = SparkFiles.get(referenceFileName);
238230
final ReferenceSequenceFile taskReferenceSequenceFile = new CachingIndexedFastaSequenceFile(IOUtils.getPath(pathOnExecutor));
239231
final Collection<Annotation> annotations = makeVariantAnnotations();
240-
final VariantAnnotatorEngine annotatorEngine = new VariantAnnotatorEngine(annotations, hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE);
232+
final VariantAnnotatorEngine annotatorEngine = new VariantAnnotatorEngine(annotations, hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE, false);
241233
return assemblyRegionEvaluatorSupplierBroadcastFunction(ctx, hcArgs, getHeaderForReads(), taskReferenceSequenceFile, annotatorEngine);
242234
}
243235

@@ -250,7 +242,7 @@ private static Broadcast<Supplier<AssemblyRegionEvaluator>> assemblyRegionEvalua
250242
final Path referencePath = IOUtils.getPath(reference);
251243
final String referenceFileName = referencePath.getFileName().toString();
252244
final ReferenceSequenceFile taskReferenceSequenceFile = taskReferenceSequenceFile(referenceFileName);
253-
final VariantAnnotatorEngine annotatorEngine = new VariantAnnotatorEngine(annotations, hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE);
245+
final VariantAnnotatorEngine annotatorEngine = new VariantAnnotatorEngine(annotations, hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE, false);
254246
return assemblyRegionEvaluatorSupplierBroadcastFunction(ctx, hcArgs, header, taskReferenceSequenceFile, annotatorEngine);
255247
}
256248

src/main/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ public void onTraversalStart() {
257257
}
258258

259259
// create the annotation engine
260-
annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, Collections.emptyList(), false);
260+
annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, Collections.emptyList(), false, false);
261261

262262
vcfWriter = getVCFWriter();
263263

src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import org.broadinstitute.hellbender.tools.walkers.genotyper.*;
1717
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider;
1818
import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection;
19-
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.M2FiltersArgumentCollection;
2019
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
2120
import org.broadinstitute.hellbender.utils.SimpleInterval;
2221
import org.broadinstitute.hellbender.utils.Utils;
@@ -94,6 +93,8 @@ public final class GenotypeGVCFs extends VariantLocusWalker {
9493
public static final String ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME = "only-output-calls-starting-in-intervals";
9594
public static final String ALL_SITES_LONG_NAME = "include-non-variant-sites";
9695
public static final String ALL_SITES_SHORT_NAME = "all-sites";
96+
public static final String KEEP_COMBINED_LONG_NAME = "keep-combined-raw-annotations";
97+
public static final String KEEP_COMBINED_SHORT_NAME = "keep-combined";
9798
private static final String GVCF_BLOCK = "GVCFBlock";
9899
private VCFHeader outputHeader;
99100

@@ -127,6 +128,12 @@ public final class GenotypeGVCFs extends VariantLocusWalker {
127128
@Argument(fullName=CombineGVCFs.ALLELE_FRACTION_DELTA_LONG_NAME, doc = "Margin of error in allele fraction to consider a somatic variant homoplasmic")
128129
protected double afTolerance = 1e-3; //based on Q30 as a "good" base quality score
129130

131+
/**
132+
* If specified, keep the combined raw annotations (e.g. AS_SB_TABLE) after genotyping. This is applicable to Allele-Specific annotations
133+
*/
134+
@Argument(fullName=KEEP_COMBINED_LONG_NAME, shortName = KEEP_COMBINED_SHORT_NAME, doc = "If specified, keep the combined raw annotations")
135+
protected boolean keepCombined = false;
136+
130137
@ArgumentCollection
131138
private GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
132139

@@ -199,7 +206,7 @@ public void onTraversalStart() {
199206

200207
final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples()); //todo should this be getSampleNamesInOrder?
201208

202-
annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, Collections.emptyList(), false);
209+
annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, Collections.emptyList(), false, keepCombined);
203210

204211
// Request INFO field annotations inheriting from RankSumTest and RMSAnnotation added to remove list
205212
for ( final InfoFieldAnnotation annotation : annotationEngine.getInfoAnnotations() ) {

src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ public void onTraversalStart() {
178178
final List<String> samples = getHeaderForVariants().getGenotypeSamples();
179179
variantSamples = new IndexedSampleList(samples);
180180

181-
annotatorEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, comps, false);
181+
annotatorEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, comps, false, false);
182182
annotatorEngine.addExpressions(expressionsToUse, resources, expressionAlleleConcordance );
183183

184184
// setup the header fields

src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotatorEngine.java

+15-10
Original file line numberDiff line numberDiff line change
@@ -35,26 +35,28 @@ public final class VariantAnnotatorEngine {
3535
private final VariantOverlapAnnotator variantOverlapAnnotator;
3636
private boolean expressionAlleleConcordance;
3737
private final boolean useRawAnnotations;
38+
private final boolean keepRawCombinedAnnotations;
3839

3940
private final static Logger logger = LogManager.getLogger(VariantAnnotatorEngine.class);
4041

4142
/**
4243
* Creates an annotation engine from a list of selected annotations output from command line parsing
4344
* @param annotationList list of annotation objects (with any parameters already filled) to include
4445
* @param dbSNPInput input for variants from a known set from DbSNP or null if not provided.
45-
* The annotation engine will mark variants overlapping anything in this set using {@link htsjdk.variant.vcf.VCFConstants#DBSNP_KEY}.
46+
* The annotation engine will mark variants overlapping anything in this set using {@link VCFConstants#DBSNP_KEY}.
4647
* @param featureInputs list of inputs with known variants.
47-
* The annotation engine will mark variants overlapping anything in those sets using the name given by {@link FeatureInput#getName()}.
48-
* Note: the DBSNP FeatureInput should be passed in separately, and not as part of this List - an GATKException will be thrown otherwise.
49-
* Note: there are no non-DBSNP comparison FeatureInputs an empty List should be passed in here, rather than null.
48+
* The annotation engine will mark variants overlapping anything in those sets using the name given by {@link FeatureInput#getName()}.
49+
* Note: the DBSNP FeatureInput should be passed in separately, and not as part of this List - an GATKException will be thrown otherwise.
50+
* Note: there are no non-DBSNP comparison FeatureInputs an empty List should be passed in here, rather than null.
5051
* @param useRaw When this is set to true, the annotation engine will call {@link ReducibleAnnotation#annotateRawData(ReferenceContext, VariantContext, ReadLikelihoods)}
51-
* on annotations that extend {@link ReducibleAnnotation}, instead of {@link InfoFieldAnnotation#annotate(ReferenceContext, VariantContext, ReadLikelihoods)},
52-
* which is the default for all annotations.
52+
* on annotations that extend {@link ReducibleAnnotation}, instead of {@link InfoFieldAnnotation#annotate(ReferenceContext, VariantContext, ReadLikelihoods)},
53+
* @param keepCombined If true, retain the combined raw annotation values instead of removing them after finalizing
5354
*/
5455
public VariantAnnotatorEngine(final Collection<Annotation> annotationList,
55-
final FeatureInput<VariantContext> dbSNPInput,
56-
final List<FeatureInput<VariantContext>> featureInputs,
57-
final boolean useRaw){
56+
final FeatureInput<VariantContext> dbSNPInput,
57+
final List<FeatureInput<VariantContext>> featureInputs,
58+
final boolean useRaw,
59+
boolean keepCombined){
5860
Utils.nonNull(featureInputs, "comparisonFeatureInputs is null");
5961
infoAnnotations = new ArrayList<>();
6062
genotypeAnnotations = new ArrayList<>();
@@ -69,6 +71,7 @@ public VariantAnnotatorEngine(final Collection<Annotation> annotationList,
6971
variantOverlapAnnotator = initializeOverlapAnnotator(dbSNPInput, featureInputs);
7072
reducibleKeys = new HashSet<>();
7173
useRawAnnotations = useRaw;
74+
keepRawCombinedAnnotations = keepCombined;
7275
for (InfoFieldAnnotation annot : infoAnnotations) {
7376
if (annot instanceof ReducibleAnnotation) {
7477
reducibleKeys.add(((ReducibleAnnotation) annot).getRawKeyName());
@@ -221,7 +224,9 @@ public VariantContext finalizeAnnotations(VariantContext vc, VariantContext orig
221224
variantAnnotations.putAll(annotationsFromCurrentType);
222225
}
223226
//clean up raw annotation data after annotations are finalized
224-
variantAnnotations.remove(currentASannotation.getRawKeyName());
227+
if (!keepRawCombinedAnnotations) {
228+
variantAnnotations.remove(currentASannotation.getRawKeyName());
229+
}
225230
}
226231
}
227232

src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
55
import org.broadinstitute.barclay.argparser.Argument;
66
import org.broadinstitute.barclay.argparser.ArgumentCollection;
7-
import org.broadinstitute.barclay.argparser.CommandLineException;
87
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
98
import org.broadinstitute.barclay.help.DocumentedFeature;
109
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
@@ -221,7 +220,7 @@ public void onTraversalStart() {
221220
}
222221

223222
final VariantAnnotatorEngine variantAnnotatorEngine = new VariantAnnotatorEngine(makeVariantAnnotations(),
224-
hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE);
223+
hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE, false);
225224
hcEngine = new HaplotypeCallerEngine(hcArgs, createOutputBamIndex, createOutputBamMD5, getHeaderForReads(), getReferenceReader(referenceArguments), variantAnnotatorEngine);
226225

227226
// The HC engine will make the right kind (VCF or GVCF) of writer for us

src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
1313
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
1414
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReferenceConfidenceMode;
15-
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.FilterMutectCalls;
1615
import org.broadinstitute.hellbender.transformers.ReadTransformer;
1716
import org.broadinstitute.hellbender.utils.downsampling.MutectDownsampler;
1817
import org.broadinstitute.hellbender.utils.downsampling.ReadsDownsampler;
@@ -269,7 +268,7 @@ protected ReadsDownsampler createDownsampler() {
269268

270269
@Override
271270
public void onTraversalStart() {
272-
VariantAnnotatorEngine annotatorEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), null, Collections.emptyList(), false);
271+
VariantAnnotatorEngine annotatorEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), null, Collections.emptyList(), false, false);
273272
m2Engine = new Mutect2Engine(MTAC, createOutputBamIndex, createOutputBamMD5, getHeaderForReads(), referenceArguments.getReferenceFileName(), annotatorEngine);
274273
vcfWriter = createVCFWriter(outputVCF);
275274
if (m2Engine.emitReferenceConfidence()) {

src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection;
1111
import org.broadinstitute.hellbender.engine.*;
1212
import org.broadinstitute.hellbender.exceptions.UserException;
13-
import org.broadinstitute.hellbender.tools.walkers.GenotypeGVCFs;
1413
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
1514
import org.broadinstitute.hellbender.tools.walkers.genotyper.*;
1615
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
@@ -184,7 +183,7 @@ private HaplotypeCallerGenotypingEngine createGenotypingEngine(SampleList sample
184183

185184
@VisibleForTesting
186185
protected void createAnnotationEngine() {
187-
annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, Collections.emptyList(), false);
186+
annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, Collections.emptyList(), false, false);
188187
}
189188

190189
// get VariantContexts from input gVCFs and regenotype

src/test/java/org/broadinstitute/hellbender/cmdline/GATKPlugin/GATKAnnotationPluginDescriptorUnitTest.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ public void testMultipleOptionalArguments(final List<Annotation> toolDefaultAnno
331331
Collections.emptySet());
332332

333333
clp.parseArguments(nullMessageStream, arguments);
334-
VariantAnnotatorEngine vae = new VariantAnnotatorEngine(instantiateAnnotations(clp), null, Collections.emptyList(), false);
334+
VariantAnnotatorEngine vae = new VariantAnnotatorEngine(instantiateAnnotations(clp), null, Collections.emptyList(), false, false);
335335
VariantContext vc = inbreedingCoefficientVC;
336336
vc = vae.annotateContext(vc, new FeatureContext(), null, null, a->true);
337337

@@ -355,7 +355,7 @@ public void testHierarchicalAnnotationDiscovery() throws IllegalAccessException,
355355
});
356356
pluginDescriptor.validateAndResolvePlugins();
357357

358-
VariantAnnotatorEngine vae = new VariantAnnotatorEngine(Arrays.asList(pluginDescriptor.getResolvedInstances().toArray(new Annotation[0])), null, Collections.emptyList(), false);
358+
VariantAnnotatorEngine vae = new VariantAnnotatorEngine(Arrays.asList(pluginDescriptor.getResolvedInstances().toArray(new Annotation[0])), null, Collections.emptyList(), false, false);
359359
VariantContext vc = inbreedingCoefficientVC;
360360
vc = vae.annotateContext(vc, new FeatureContext(), null, null, a->true);
361361

@@ -412,7 +412,7 @@ public void testDisableDefaultsAndReplaceOnCommandLine() {
412412
List<String> args = Stream.of(StandardArgumentDefinitions.ANNOTATION_GROUP_SHORT_NAME, StandardAnnotation.class.getSimpleName(), "--"+ StandardArgumentDefinitions.DISABLE_TOOL_DEFAULT_ANNOTATIONS).collect(Collectors.toList());
413413

414414
clp.parseArguments(nullMessageStream, args.toArray(new String[args.size()]));
415-
VariantAnnotatorEngine vae = new VariantAnnotatorEngine(instantiateAnnotations(clp), null, Collections.emptyList(), false);
415+
VariantAnnotatorEngine vae = new VariantAnnotatorEngine(instantiateAnnotations(clp), null, Collections.emptyList(), false, false);
416416

417417
Assert.assertFalse(vae.getInfoAnnotations().isEmpty());
418418
Assert.assertTrue(vae.getInfoAnnotations().stream().noneMatch(a -> a.getClass().getSimpleName().equals(DepthPerSampleHC.class.getSimpleName())));
@@ -613,7 +613,7 @@ public void testOverridingInstancesWithGetInstance() throws InstantiationExcepti
613613
Collection<Annotation> finalAnnotations = pluginDescriptor.getResolvedInstances();
614614
Assert.assertEquals(finalAnnotations.size(), 1);
615615

616-
VariantAnnotatorEngine vae = new VariantAnnotatorEngine(Arrays.asList(finalAnnotations.toArray(new Annotation[0])), null, Collections.emptyList(), false);
616+
VariantAnnotatorEngine vae = new VariantAnnotatorEngine(Arrays.asList(finalAnnotations.toArray(new Annotation[0])), null, Collections.emptyList(), false, false);
617617
VariantContext vc = inbreedingCoefficientVC;
618618
vc = vae.annotateContext(vc, new FeatureContext(), null, null, a->true);
619619

src/test/java/org/broadinstitute/hellbender/engine/AssemblyRegionIteratorUnitTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public void testRegionsHaveCorrectReadsAndSize( final String reads, final String
6363
final MultiIntervalLocalReadShard readShard = new MultiIntervalLocalReadShard(shardIntervals, assemblyRegionPadding, readsSource);
6464
final HaplotypeCallerArgumentCollection hcArgs = new HaplotypeCallerArgumentCollection();
6565
final AssemblyRegionEvaluator evaluator = new HaplotypeCallerEngine(hcArgs, false, false, readsSource.getHeader(),
66-
referenceReader, new VariantAnnotatorEngine(new ArrayList<>(), hcArgs.dbsnp.dbsnp, hcArgs.comps, false));
66+
referenceReader, new VariantAnnotatorEngine(new ArrayList<>(), hcArgs.dbsnp.dbsnp, hcArgs.comps, false, false));
6767
final ReadCoordinateComparator readComparator = new ReadCoordinateComparator(readsSource.getHeader());
6868

6969
final List<ReadFilter> readFilters = new ArrayList<>(2);

0 commit comments

Comments
 (0)