Skip to content

Commit ad7beff

Browse files
committed
add column for std nomenclature
1 parent b11abd1 commit ad7beff

File tree

2 files changed

+160
-7
lines changed

2 files changed

+160
-7
lines changed

src/main/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesis.java

+159-7
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ public final class AnalyzeSaturationMutagenesis extends GATKTool {
103103
@Argument(doc = "minimum number of wt calls flanking variant", fullName = "min-flanking-length")
104104
@VisibleForTesting static int minFlankingLength = 2;
105105

106+
@Argument(doc = "minimum map quality for read alignment", fullName = "min-mapq")
107+
private static int minMapQ = 4;
108+
106109
@Argument(doc = "reference interval(s) of the ORF (1-based, inclusive), for example, '134-180,214-238' (no spaces)",
107110
fullName = "orf")
108111
private static String orfCoords;
@@ -286,14 +289,20 @@ private static void writeVariationCounts( final List<SNVCollectionCount> variati
286289
}
287290
}
288291

292+
// interpret the effects of the SNVs on the ORF
289293
private static void describeVariantsAsCodons( final BufferedWriter writer, final List<SNV> snvs )
290294
throws IOException {
291295
final List<CodonVariation> codonVariations = codonTracker.encodeSNVsAsCodons(snvs);
292-
final int[] refCodonValues = codonTracker.getRefCodonValues();
296+
if ( codonVariations.size() == 0 ) {
297+
writer.write("\t0");
298+
return;
299+
}
293300

294301
writer.write('\t');
295-
writer.write(Long.toString(codonVariations.size()));
302+
writer.write(Integer.toString(codonVariations.size()));
303+
final int[] refCodonValues = codonTracker.getRefCodonValues();
296304

305+
// write a column describing each altered codon as DNA
297306
String sep = "\t";
298307
for ( final CodonVariation variation : codonVariations ) {
299308
writer.write(sep);
@@ -310,6 +319,7 @@ private static void describeVariantsAsCodons( final BufferedWriter writer, final
310319
}
311320
}
312321

322+
// write a column describing each altered codon as an amino acid
313323
sep = "\t";
314324
for ( final CodonVariation variation : codonVariations ) {
315325
writer.write(sep);
@@ -323,7 +333,7 @@ private static void describeVariantsAsCodons( final BufferedWriter writer, final
323333
} else if ( variation.isDeletion() ) {
324334
writer.write("D:");
325335
writer.write(codonTranslation.charAt(refCodonValues[codonId]));
326-
writer.write(":-");
336+
writer.write(">-");
327337
} else {
328338
final char fromAA = codonTranslation.charAt(refCodonValues[codonId]);
329339
final char toAA = codonTranslation.charAt(variation.getCodonValue());
@@ -335,6 +345,36 @@ private static void describeVariantsAsCodons( final BufferedWriter writer, final
335345
writer.write(toAA);
336346
}
337347
}
348+
349+
// write one final column describing the total effect in HGVS standard nomenclature
350+
// this involves grouping together codon variations that can be described together
351+
sep = "\t";
352+
CodonVariationGroup codonVariationGroup = null;
353+
for ( final CodonVariation variation : codonVariations ) {
354+
if ( codonVariationGroup == null ) {
355+
if ( !isSynonymous(variation, refCodonValues) ) {
356+
codonVariationGroup = new CodonVariationGroup(refCodonValues, variation);
357+
}
358+
} else if ( !codonVariationGroup.addVariation(variation) ) {
359+
writer.write(sep);
360+
sep = ";";
361+
writer.write(codonVariationGroup.toString());
362+
codonVariationGroup = null;
363+
if ( !isSynonymous(variation, refCodonValues) ) {
364+
codonVariationGroup = new CodonVariationGroup(refCodonValues, variation);
365+
}
366+
}
367+
}
368+
if ( codonVariationGroup != null && !codonVariationGroup.isEmpty() ) {
369+
writer.write(sep);
370+
writer.write(codonVariationGroup.toString());
371+
}
372+
}
373+
374+
private static boolean isSynonymous( final CodonVariation variation, final int[] refCodonValues ) {
375+
return variation.getVariationType() == CodonVariationType.MODIFICATION &&
376+
codonTranslation.charAt(variation.getCodonValue()) ==
377+
codonTranslation.charAt(refCodonValues[variation.getCodonId()]);
338378
}
339379

340380
private static void writeRefCoverage() {
@@ -577,6 +617,7 @@ private static void writeCoverageSizeHistogram() {
577617
}
578618
}
579619

620+
// categories for interpretation of reads
580621
enum ReportType {
581622
UNMAPPED("unmapped", "Unmapped Reads"),
582623
LOW_QUALITY("lowQ", "LowQ Reads"),
@@ -591,16 +632,17 @@ enum ReportType {
591632
public final String attributeValue; // value for tagging rejected reads. when null, don't tag the read.
592633
public final String label;
593634

594-
private ReportType( final String attributeValue, final String label ) {
635+
ReportType( final String attributeValue, final String label ) {
595636
this.attributeValue = attributeValue;
596637
this.label = label;
597638
}
598639

599640
public final static String REPORT_TYPE_ATTRIBUTE_KEY = "XX";
600641
}
601642

643+
// counts of the reporting categories
602644
public final static class ReportTypeCounts {
603-
private long[] counts = new long[ReportType.values().length];
645+
private final long[] counts = new long[ReportType.values().length];
604646

605647
public void bumpCount( final ReportType reportType ) {
606648
counts[reportType.ordinal()] += 1;
@@ -613,6 +655,7 @@ public long totalCounts() {
613655
}
614656
}
615657

658+
// a description of the wild type amplicon and its coverage
616659
@VisibleForTesting final static class Reference {
617660
// the amplicon -- all bytes are converted to upper-case 'A', 'C', 'G', or 'T', no nonsense
618661
private final byte[] refSeq;
@@ -885,6 +928,7 @@ public int compareTo( final SNVCollectionCount that ) {
885928
MODIFICATION
886929
}
887930

931+
// a description of a codon that varies from wild type
888932
@VisibleForTesting static final class CodonVariation {
889933
private final int codonId;
890934
private final int codonValue; // ignored for FRAMESHIFT and DELETION
@@ -934,6 +978,112 @@ public static CodonVariation createModification( final int codonId, final int co
934978
}
935979
}
936980

981+
// one or more codon variations that can be described together in standard nomenclature
982+
@VisibleForTesting static final class CodonVariationGroup {
983+
private final int[] refCodonValues;
984+
private final StringBuilder altCalls;
985+
private int startingCodon;
986+
private int endingCodon;
987+
private boolean isFrameShift;
988+
private int insCount;
989+
private int delCount;
990+
private int subCount;
991+
992+
// start a new group with a first variation
993+
public CodonVariationGroup( final int[] refCodonValues, final CodonVariation codonVariation ) {
994+
this.refCodonValues = refCodonValues;
995+
altCalls = new StringBuilder();
996+
switch ( codonVariation.getVariationType() ) {
997+
case FRAMESHIFT:
998+
isFrameShift = true;
999+
break;
1000+
case INSERTION:
1001+
insCount += 1;
1002+
altCalls.append(codonTranslation.charAt(codonVariation.getCodonValue()));
1003+
break;
1004+
case DELETION:
1005+
delCount += 1;
1006+
break;
1007+
case MODIFICATION:
1008+
subCount += 1;
1009+
altCalls.append(codonTranslation.charAt(codonVariation.getCodonValue()));
1010+
break;
1011+
}
1012+
startingCodon = endingCodon = codonVariation.getCodonId();
1013+
}
1014+
1015+
// attempt to add more variations
1016+
// returns false if the presented variation cannot be added to the group
1017+
public boolean addVariation( final CodonVariation codonVariation ) {
1018+
final int codonId = codonVariation.getCodonId();
1019+
if ( codonId > endingCodon + 1 && !isFrameShift ) return false;
1020+
switch ( codonVariation.getVariationType() ) {
1021+
case FRAMESHIFT:
1022+
return false;
1023+
case INSERTION:
1024+
insCount += 1;
1025+
altCalls.append(codonTranslation.charAt(codonVariation.getCodonValue()));
1026+
if ( isFrameShift && isEmpty() ) startingCodon = codonId;
1027+
break;
1028+
case DELETION:
1029+
delCount += 1;
1030+
break;
1031+
case MODIFICATION:
1032+
final char aa = codonTranslation.charAt(codonVariation.getCodonValue());
1033+
if ( aa == codonTranslation.charAt(refCodonValues[codonId]) ) {
1034+
if ( isFrameShift ) {
1035+
if ( !isEmpty() ) altCalls.append(aa);
1036+
break;
1037+
}
1038+
return false;
1039+
}
1040+
if ( isFrameShift && isEmpty() ) startingCodon = codonId;
1041+
subCount += 1;
1042+
altCalls.append(aa);
1043+
break;
1044+
}
1045+
endingCodon = codonId;
1046+
return true;
1047+
}
1048+
1049+
// sometimes when starting with a frame shift you end up with nothing because the variants are all synonymous
1050+
public boolean isEmpty() { return subCount + insCount + delCount == 0; }
1051+
1052+
// translate the group into standard nomenclature
1053+
@Override public String toString() {
1054+
final String alts = altCalls.toString();
1055+
final StringBuilder sb = new StringBuilder();
1056+
sb.append(codonTranslation.charAt(refCodonValues[startingCodon])).append(startingCodon + 1);
1057+
if ( isFrameShift && !alts.isEmpty() ) {
1058+
sb.append(alts.charAt(0));
1059+
final int len = endingCodon - startingCodon + 1;
1060+
if ( len > 1 ) {
1061+
sb.append("fs*");
1062+
if ( alts.charAt(alts.length() - 1) == 'X' ) sb.append(len);
1063+
else sb.append('?');
1064+
}
1065+
} else {
1066+
// pure inserts need to have the ending codon fixed up
1067+
if ( insCount != 0 && delCount == 0 && subCount == 0 ) {
1068+
endingCodon = startingCodon + 1;
1069+
}
1070+
// suppress printing range and type when there is a single variation
1071+
if ( startingCodon != endingCodon ) {
1072+
sb.append('_').append(codonTranslation.charAt(refCodonValues[endingCodon])).append(endingCodon + 1);
1073+
}
1074+
if ( subCount == 0 && insCount == 0 ) {
1075+
sb.append("del");
1076+
} else if ( subCount == 0 && delCount == 0 ) {
1077+
sb.append("ins");
1078+
} else if ( subCount + delCount + insCount > 1 ) {
1079+
sb.append("insdel");
1080+
}
1081+
sb.append(alts);
1082+
}
1083+
return sb.toString();
1084+
}
1085+
}
1086+
9371087
// A class to translate a sequence of base-call variants into a sequence of codon variants.
9381088
// also has some counters to track the number of observations of different codon values at each codon
9391089
@VisibleForTesting static final class CodonTracker {
@@ -1270,6 +1420,7 @@ static boolean isStop( final int codonValue ) {
12701420
}
12711421
}
12721422

1423+
// describes the SNVs found in a read, and the reference covered by the read
12731424
@VisibleForTesting static final class ReadReport {
12741425
final List<Interval> refCoverage;
12751426
final List<SNV> snvList;
@@ -1546,11 +1697,12 @@ private static List<SNV> combineVariations( final ReadReport report1, final Read
15461697
@VisibleForTesting static ReadReport getReadReport( final GATKRead read ) {
15471698
totalBaseCalls += read.getLength();
15481699

1549-
if ( read.isUnmapped() || read.isDuplicate() || read.failsVendorQualityCheck() ) {
1700+
if ( read.isUnmapped() || read.isDuplicate() || read.failsVendorQualityCheck() ||
1701+
read.getMappingQuality() < minMapQ ) {
15501702
return rejectRead(read, ReportType.UNMAPPED);
15511703
}
15521704

1553-
Interval trim = calculateTrim(read);
1705+
final Interval trim = calculateTrim(read);
15541706
if ( trim.size() < minLength ) {
15551707
return rejectRead(read, ReportType.LOW_QUALITY);
15561708
}

src/test/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesisUnitTest.java

+1
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ public void testGetReadReport() {
548548
Arrays.fill(quals, QUAL_30);
549549
final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1,
550550
bases, quals, "17M");
551+
read.setMappingQuality(60);
551552
reference = new Reference(refSeq);
552553
final ReadReport readReport = getReadReport(read);
553554
Assert.assertEquals(readReport.getRefCoverage(), Collections.singletonList(new Interval(0, 17)));

0 commit comments

Comments
 (0)