Skip to content

Commit 45d9ecb

Browse files
authored
Fixed a rare bug in the genotyping engine where it could emit untrimmed alleles for SNP sites (#6044)
1 parent 2d3a6fe commit 45d9ecb

File tree

2 files changed

+50
-8
lines changed

2 files changed

+50
-8
lines changed

src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java

+16-8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.broadinstitute.hellbender.engine.ReferenceContext;
1010
import org.broadinstitute.hellbender.engine.ReferenceDataSource;
1111
import org.broadinstitute.hellbender.engine.ReferenceMemorySource;
12+
import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine;
1213
import org.broadinstitute.hellbender.tools.walkers.genotyper.*;
1314
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
1415
import org.broadinstitute.hellbender.utils.SimpleInterval;
@@ -172,6 +173,8 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List<Haplotype> haplotyp
172173
continue;
173174
}
174175

176+
int mergedAllelesListSizeBeforePossibleTrimming = mergedVC.getAlleles().size();
177+
175178
final Map<Allele, List<Haplotype>> alleleMapper = AssemblyBasedCallerUtils.createAlleleMapper(mergedVC, loc, haplotypes, activeAllelesToGenotype);
176179

177180
if( hcArgs.assemblerArgs.debugAssembly && logger != null ) {
@@ -188,6 +191,7 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List<Haplotype> haplotyp
188191
if (emitReferenceConfidence) {
189192
mergedVC = ReferenceConfidenceUtils.addNonRefSymbolicAllele(mergedVC);
190193
readAlleleLikelihoods.addNonReferenceAllele(Allele.NON_REF_ALLELE);
194+
mergedAllelesListSizeBeforePossibleTrimming++;
191195
}
192196

193197
final GenotypesContext genotypes = calculateGLsForThisEvent(readAlleleLikelihoods, mergedVC, noCallAlleles);
@@ -197,7 +201,7 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List<Haplotype> haplotyp
197201
readAlleleLikelihoods = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList,
198202
emitReferenceConfidence, alleleMapper, readAlleleLikelihoods, call);
199203

200-
final VariantContext annotatedCall = makeAnnotatedCall(ref, refLoc, tracker, header, mergedVC, readAlleleLikelihoods, call);
204+
final VariantContext annotatedCall = makeAnnotatedCall(ref, refLoc, tracker, header, mergedVC, mergedAllelesListSizeBeforePossibleTrimming, readAlleleLikelihoods, call, annotationEngine);
201205
returnCalls.add( annotatedCall );
202206

203207
if (withBamOut) {
@@ -365,14 +369,18 @@ static VariantContext removeExcessAltAllelesFromVC(final VariantContext inputVC,
365369
return vcb.make();
366370
}
367371

368-
protected VariantContext makeAnnotatedCall(byte[] ref, SimpleInterval refLoc, FeatureContext tracker, SAMFileHeader header, VariantContext mergedVC, ReadLikelihoods<Allele> readAlleleLikelihoods, VariantContext call) {
369-
final SimpleInterval locus = new SimpleInterval(mergedVC.getContig(), mergedVC.getStart(), mergedVC.getEnd());
372+
@VisibleForTesting
373+
static protected VariantContext makeAnnotatedCall(byte[] ref, SimpleInterval refLoc, FeatureContext tracker, SAMFileHeader header, VariantContext mergedVC, int mergedAllelesListSizeBeforePossibleTrimming, ReadLikelihoods<Allele> readAlleleLikelihoods, VariantContext call, VariantAnnotatorEngine annotationEngine) {
374+
final SimpleInterval locus = new SimpleInterval(mergedVC);
370375
final SimpleInterval refLocInterval= new SimpleInterval(refLoc);
371376
final ReferenceDataSource refData = new ReferenceMemorySource(new ReferenceBases(ref, refLocInterval), header.getSequenceDictionary());
372377
final ReferenceContext referenceContext = new ReferenceContext(refData, locus, refLocInterval);
373378

374379
final VariantContext untrimmedResult = annotationEngine.annotateContext(call, tracker, referenceContext, readAlleleLikelihoods, a -> true);
375-
return call.getAlleles().size() == mergedVC.getAlleles().size() ? untrimmedResult
380+
381+
// NOTE: We choose to reverseTrimAlleles() here as opposed to when we actually do the trimming because otherwise we would have to resolve
382+
// the mismatching readAlleleLikelihoods object which is keyed to the old, possibly incorrectly trimmed alleles.
383+
return untrimmedResult.getAlleles().size() == mergedAllelesListSizeBeforePossibleTrimming ? untrimmedResult
376384
: GATKVariantContextUtils.reverseTrimAlleles(untrimmedResult);
377385
}
378386

@@ -426,10 +434,10 @@ protected static GenotypeLikelihoodsCalculationModel getGLModel(final VariantCon
426434
* @return never {@code null} but perhaps an empty list if there is no variants to report.
427435
*/
428436
private TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Haplotype> haplotypes,
429-
final byte[] ref,
430-
final SimpleInterval refLoc,
431-
final List<VariantContext> activeAllelesToGenotype,
432-
final int maxMnpDistance) {
437+
final byte[] ref,
438+
final SimpleInterval refLoc,
439+
final List<VariantContext> activeAllelesToGenotype,
440+
final int maxMnpDistance) {
433441
final boolean inGGAMode = ! activeAllelesToGenotype.isEmpty();
434442

435443
// Using the cigar from each called haplotype figure out what events need to be written out in a VCF file

src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java

+34
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
11
package org.broadinstitute.hellbender.tools.walkers.haplotypecaller;
22

33
import com.google.common.base.Strings;
4+
import htsjdk.samtools.SAMTestUtil;
45
import htsjdk.samtools.util.Locatable;
56
import htsjdk.variant.variantcontext.*;
67
import org.apache.commons.lang3.tuple.Pair;
78
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy;
89
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters;
10+
import org.broadinstitute.hellbender.engine.FeatureContext;
11+
import org.broadinstitute.hellbender.engine.FeatureInput;
912
import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
13+
import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine;
1014
import org.broadinstitute.hellbender.utils.QualityUtils;
1115
import org.broadinstitute.hellbender.utils.SimpleInterval;
1216
import org.broadinstitute.hellbender.utils.Utils;
17+
import org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList;
18+
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
19+
import org.broadinstitute.hellbender.utils.genotyper.SampleList;
1320
import org.broadinstitute.hellbender.utils.haplotype.EventMap;
1421
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
1522
import org.broadinstitute.hellbender.utils.pileup.ReadPileup;
@@ -301,6 +308,33 @@ public void testRemoveExcessiveAltAlleleFromVC(){
301308
Assert.assertTrue(reducedVC.getAlleles().containsAll(Arrays.asList(Allele.create("A", true), Allele.create("T", false), Allele.create("C", false))));
302309
}
303310

311+
@Test
312+
@SuppressWarnings("unchecked")
313+
public void testMakeAnnotatedCallTrimmingAlleles(){
314+
List<Allele> alleles = Arrays.asList(Allele.create("AGGGGGGGGG", true), Allele.create("TGGGGGGGGG", false));
315+
List<Allele> mergedAlleles = Arrays.asList(Allele.create("AGGGGGGGGG", true), Allele.create("TGGGGGGGGG", false), Allele.create("A", false));
316+
ReadLikelihoods<Allele> likelihoods = new ReadLikelihoods<Allele>(SampleList.EMPTY_LIST, new IndexedAlleleList<Allele>(alleles), new HashMap<>());
317+
318+
// Both a deletion and SNPs are present at this site
319+
final VariantContext originalVC = new VariantContextBuilder("source", "1", 1000000, 1000009, alleles).make();
320+
321+
final List<FeatureInput<VariantContext>> features = Collections.emptyList();
322+
323+
VariantContext reducedVC = HaplotypeCallerGenotypingEngine.makeAnnotatedCall("AGGGGGGGGG".getBytes(),
324+
new SimpleInterval(originalVC),
325+
new FeatureContext(),
326+
ArtificialReadUtils.createArtificialSamHeader(),
327+
originalVC,
328+
mergedAlleles.size(),
329+
likelihoods,
330+
originalVC,
331+
new VariantAnnotatorEngine(Collections.emptyList(), null, features, false, true));
332+
333+
// Asserting that the two alleles were trimmed after calling removeExcessAltAlleles
334+
Assert.assertEquals(reducedVC.getNAlleles(), 2);
335+
Assert.assertTrue(reducedVC.getAlleles().containsAll(Arrays.asList(Allele.create("A", true), Allele.create("T", false))));
336+
}
337+
304338
@Test
305339
public void testReplaceWithSpanDelVC() {
306340
final VariantContext snp = new VariantContextBuilder("source", "1", 1000000, 1000000,

0 commit comments

Comments
 (0)