@@ -103,6 +103,9 @@ public final class AnalyzeSaturationMutagenesis extends GATKTool {
103
103
@ Argument (doc = "minimum number of wt calls flanking variant" , fullName = "min-flanking-length" )
104
104
@ VisibleForTesting static int minFlankingLength = 2 ;
105
105
106
+ @ Argument (doc = "minimum map quality for read alignment" , fullName = "min-mapq" )
107
+ private static int minMapQ = 4 ;
108
+
106
109
@ Argument (doc = "reference interval(s) of the ORF (1-based, inclusive), for example, '134-180,214-238' (no spaces)" ,
107
110
fullName = "orf" )
108
111
private static String orfCoords ;
@@ -286,14 +289,20 @@ private static void writeVariationCounts( final List<SNVCollectionCount> variati
286
289
}
287
290
}
288
291
292
+ // interpret the effects of the SNVs on the ORF
289
293
private static void describeVariantsAsCodons ( final BufferedWriter writer , final List <SNV > snvs )
290
294
throws IOException {
291
295
final List <CodonVariation > codonVariations = codonTracker .encodeSNVsAsCodons (snvs );
292
- final int [] refCodonValues = codonTracker .getRefCodonValues ();
296
+ if ( codonVariations .size () == 0 ) {
297
+ writer .write ("\t 0" );
298
+ return ;
299
+ }
293
300
294
301
writer .write ('\t' );
295
- writer .write (Long .toString (codonVariations .size ()));
302
+ writer .write (Integer .toString (codonVariations .size ()));
303
+ final int [] refCodonValues = codonTracker .getRefCodonValues ();
296
304
305
+ // write a column describing each altered codon as DNA
297
306
String sep = "\t " ;
298
307
for ( final CodonVariation variation : codonVariations ) {
299
308
writer .write (sep );
@@ -310,6 +319,7 @@ private static void describeVariantsAsCodons( final BufferedWriter writer, final
310
319
}
311
320
}
312
321
322
+ // write a column describing each altered codon as an amino acid
313
323
sep = "\t " ;
314
324
for ( final CodonVariation variation : codonVariations ) {
315
325
writer .write (sep );
@@ -323,7 +333,7 @@ private static void describeVariantsAsCodons( final BufferedWriter writer, final
323
333
} else if ( variation .isDeletion () ) {
324
334
writer .write ("D:" );
325
335
writer .write (codonTranslation .charAt (refCodonValues [codonId ]));
326
- writer .write (": -" );
336
+ writer .write ("> -" );
327
337
} else {
328
338
final char fromAA = codonTranslation .charAt (refCodonValues [codonId ]);
329
339
final char toAA = codonTranslation .charAt (variation .getCodonValue ());
@@ -335,6 +345,36 @@ private static void describeVariantsAsCodons( final BufferedWriter writer, final
335
345
writer .write (toAA );
336
346
}
337
347
}
348
+
349
+ // write one final column describing the total effect in HGVS standard nomenclature
350
+ // this involves grouping together codon variations that can be described together
351
+ sep = "\t " ;
352
+ CodonVariationGroup codonVariationGroup = null ;
353
+ for ( final CodonVariation variation : codonVariations ) {
354
+ if ( codonVariationGroup == null ) {
355
+ if ( !isSynonymous (variation , refCodonValues ) ) {
356
+ codonVariationGroup = new CodonVariationGroup (refCodonValues , variation );
357
+ }
358
+ } else if ( !codonVariationGroup .addVariation (variation ) ) {
359
+ writer .write (sep );
360
+ sep = ";" ;
361
+ writer .write (codonVariationGroup .toString ());
362
+ codonVariationGroup = null ;
363
+ if ( !isSynonymous (variation , refCodonValues ) ) {
364
+ codonVariationGroup = new CodonVariationGroup (refCodonValues , variation );
365
+ }
366
+ }
367
+ }
368
+ if ( codonVariationGroup != null && !codonVariationGroup .isEmpty () ) {
369
+ writer .write (sep );
370
+ writer .write (codonVariationGroup .toString ());
371
+ }
372
+ }
373
+
374
+ private static boolean isSynonymous ( final CodonVariation variation , final int [] refCodonValues ) {
375
+ return variation .getVariationType () == CodonVariationType .MODIFICATION &&
376
+ codonTranslation .charAt (variation .getCodonValue ()) ==
377
+ codonTranslation .charAt (refCodonValues [variation .getCodonId ()]);
338
378
}
339
379
340
380
private static void writeRefCoverage () {
@@ -577,6 +617,7 @@ private static void writeCoverageSizeHistogram() {
577
617
}
578
618
}
579
619
620
+ // categories for interpretation of reads
580
621
enum ReportType {
581
622
UNMAPPED ("unmapped" , "Unmapped Reads" ),
582
623
LOW_QUALITY ("lowQ" , "LowQ Reads" ),
@@ -591,16 +632,17 @@ enum ReportType {
591
632
public final String attributeValue ; // value for tagging rejected reads. when null, don't tag the read.
592
633
public final String label ;
593
634
594
- private ReportType ( final String attributeValue , final String label ) {
635
+ ReportType ( final String attributeValue , final String label ) {
595
636
this .attributeValue = attributeValue ;
596
637
this .label = label ;
597
638
}
598
639
599
640
public final static String REPORT_TYPE_ATTRIBUTE_KEY = "XX" ;
600
641
}
601
642
643
+ // counts of the reporting categories
602
644
public final static class ReportTypeCounts {
603
- private long [] counts = new long [ReportType .values ().length ];
645
+ private final long [] counts = new long [ReportType .values ().length ];
604
646
605
647
public void bumpCount ( final ReportType reportType ) {
606
648
counts [reportType .ordinal ()] += 1 ;
@@ -613,6 +655,7 @@ public long totalCounts() {
613
655
}
614
656
}
615
657
658
+ // a description of the wild type amplicon and its coverage
616
659
@ VisibleForTesting final static class Reference {
617
660
// the amplicon -- all bytes are converted to upper-case 'A', 'C', 'G', or 'T', no nonsense
618
661
private final byte [] refSeq ;
@@ -885,6 +928,7 @@ public int compareTo( final SNVCollectionCount that ) {
885
928
MODIFICATION
886
929
}
887
930
931
+ // a description of a codon that varies from wild type
888
932
@ VisibleForTesting static final class CodonVariation {
889
933
private final int codonId ;
890
934
private final int codonValue ; // ignored for FRAMESHIFT and DELETION
@@ -934,6 +978,112 @@ public static CodonVariation createModification( final int codonId, final int co
934
978
}
935
979
}
936
980
981
+ // one or more codon variations that can be described together in standard nomenclature
982
+ @ VisibleForTesting static final class CodonVariationGroup {
983
+ private final int [] refCodonValues ;
984
+ private final StringBuilder altCalls ;
985
+ private int startingCodon ;
986
+ private int endingCodon ;
987
+ private boolean isFrameShift ;
988
+ private int insCount ;
989
+ private int delCount ;
990
+ private int subCount ;
991
+
992
+ // start a new group with a first variation
993
+ public CodonVariationGroup ( final int [] refCodonValues , final CodonVariation codonVariation ) {
994
+ this .refCodonValues = refCodonValues ;
995
+ altCalls = new StringBuilder ();
996
+ switch ( codonVariation .getVariationType () ) {
997
+ case FRAMESHIFT :
998
+ isFrameShift = true ;
999
+ break ;
1000
+ case INSERTION :
1001
+ insCount += 1 ;
1002
+ altCalls .append (codonTranslation .charAt (codonVariation .getCodonValue ()));
1003
+ break ;
1004
+ case DELETION :
1005
+ delCount += 1 ;
1006
+ break ;
1007
+ case MODIFICATION :
1008
+ subCount += 1 ;
1009
+ altCalls .append (codonTranslation .charAt (codonVariation .getCodonValue ()));
1010
+ break ;
1011
+ }
1012
+ startingCodon = endingCodon = codonVariation .getCodonId ();
1013
+ }
1014
+
1015
+ // attempt to add more variations
1016
+ // returns false if the presented variation cannot be added to the group
1017
+ public boolean addVariation ( final CodonVariation codonVariation ) {
1018
+ final int codonId = codonVariation .getCodonId ();
1019
+ if ( codonId > endingCodon + 1 && !isFrameShift ) return false ;
1020
+ switch ( codonVariation .getVariationType () ) {
1021
+ case FRAMESHIFT :
1022
+ return false ;
1023
+ case INSERTION :
1024
+ insCount += 1 ;
1025
+ altCalls .append (codonTranslation .charAt (codonVariation .getCodonValue ()));
1026
+ if ( isFrameShift && isEmpty () ) startingCodon = codonId ;
1027
+ break ;
1028
+ case DELETION :
1029
+ delCount += 1 ;
1030
+ break ;
1031
+ case MODIFICATION :
1032
+ final char aa = codonTranslation .charAt (codonVariation .getCodonValue ());
1033
+ if ( aa == codonTranslation .charAt (refCodonValues [codonId ]) ) {
1034
+ if ( isFrameShift ) {
1035
+ if ( !isEmpty () ) altCalls .append (aa );
1036
+ break ;
1037
+ }
1038
+ return false ;
1039
+ }
1040
+ if ( isFrameShift && isEmpty () ) startingCodon = codonId ;
1041
+ subCount += 1 ;
1042
+ altCalls .append (aa );
1043
+ break ;
1044
+ }
1045
+ endingCodon = codonId ;
1046
+ return true ;
1047
+ }
1048
+
1049
+ // sometimes when starting with a frame shift you end up with nothing because the variants are all synonymous
1050
+ public boolean isEmpty () { return subCount + insCount + delCount == 0 ; }
1051
+
1052
+ // translate the group into standard nomenclature
1053
+ @ Override public String toString () {
1054
+ final String alts = altCalls .toString ();
1055
+ final StringBuilder sb = new StringBuilder ();
1056
+ sb .append (codonTranslation .charAt (refCodonValues [startingCodon ])).append (startingCodon + 1 );
1057
+ if ( isFrameShift && !alts .isEmpty () ) {
1058
+ sb .append (alts .charAt (0 ));
1059
+ final int len = endingCodon - startingCodon + 1 ;
1060
+ if ( len > 1 ) {
1061
+ sb .append ("fs*" );
1062
+ if ( alts .charAt (alts .length () - 1 ) == 'X' ) sb .append (len );
1063
+ else sb .append ('?' );
1064
+ }
1065
+ } else {
1066
+ // pure inserts need to have the ending codon fixed up
1067
+ if ( insCount != 0 && delCount == 0 && subCount == 0 ) {
1068
+ endingCodon = startingCodon + 1 ;
1069
+ }
1070
+ // suppress printing range and type when there is a single variation
1071
+ if ( startingCodon != endingCodon ) {
1072
+ sb .append ('_' ).append (codonTranslation .charAt (refCodonValues [endingCodon ])).append (endingCodon + 1 );
1073
+ }
1074
+ if ( subCount == 0 && insCount == 0 ) {
1075
+ sb .append ("del" );
1076
+ } else if ( subCount == 0 && delCount == 0 ) {
1077
+ sb .append ("ins" );
1078
+ } else if ( subCount + delCount + insCount > 1 ) {
1079
+ sb .append ("insdel" );
1080
+ }
1081
+ sb .append (alts );
1082
+ }
1083
+ return sb .toString ();
1084
+ }
1085
+ }
1086
+
937
1087
// A class to translate a sequence of base-call variants into a sequence of codon variants.
938
1088
// also has some counters to track the number of observations of different codon values at each codon
939
1089
@ VisibleForTesting static final class CodonTracker {
@@ -1270,6 +1420,7 @@ static boolean isStop( final int codonValue ) {
1270
1420
}
1271
1421
}
1272
1422
1423
+ // describes the SNVs found in a read, and the reference covered by the read
1273
1424
@ VisibleForTesting static final class ReadReport {
1274
1425
final List <Interval > refCoverage ;
1275
1426
final List <SNV > snvList ;
@@ -1546,11 +1697,12 @@ private static List<SNV> combineVariations( final ReadReport report1, final Read
1546
1697
@ VisibleForTesting static ReadReport getReadReport ( final GATKRead read ) {
1547
1698
totalBaseCalls += read .getLength ();
1548
1699
1549
- if ( read .isUnmapped () || read .isDuplicate () || read .failsVendorQualityCheck () ) {
1700
+ if ( read .isUnmapped () || read .isDuplicate () || read .failsVendorQualityCheck () ||
1701
+ read .getMappingQuality () < minMapQ ) {
1550
1702
return rejectRead (read , ReportType .UNMAPPED );
1551
1703
}
1552
1704
1553
- Interval trim = calculateTrim (read );
1705
+ final Interval trim = calculateTrim (read );
1554
1706
if ( trim .size () < minLength ) {
1555
1707
return rejectRead (read , ReportType .LOW_QUALITY );
1556
1708
}
0 commit comments