-
Notifications
You must be signed in to change notification settings - Fork 602
[BIOIN-1570] Fixed edge case in variant annotation #8810
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,6 @@ | |
* are accumulated as well. | ||
*/ | ||
public abstract class FlowAnnotatorBase implements InfoFieldAnnotation { | ||
private final static Logger logger = LogManager.getLogger(FlowAnnotatorBase.class); | ||
protected final OneShotLogger flowMissingOneShotLogger = new OneShotLogger(FlowAnnotatorBase.class); | ||
|
||
|
||
|
@@ -202,7 +201,7 @@ protected void variantType(final VariantContext vc, final LocalContext localCont | |
if (isSpecial(alleles.get(i))){ | ||
continue; | ||
} | ||
if ((localContext.hmerIndelLength.get(i)==null) || (localContext.hmerIndelLength.get(i)==0)){ | ||
if ((localContext.hmerIndelLength==null) || (localContext.hmerIndelLength.get(i)==null) || (localContext.hmerIndelLength.get(i)==0)){ | ||
isHmer=false; | ||
} | ||
} | ||
|
@@ -250,7 +249,11 @@ protected void isHmerIndel(final VariantContext vc, final LocalContext localCont | |
// get byte before and after | ||
final byte before = getReferenceNucleotide(localContext, vc.getStart() - 1); | ||
final byte[] after = getReferenceHmerPlus(localContext, vc.getEnd() + 1, MOTIF_SIZE); | ||
|
||
if (after.length==0){ | ||
flowMissingOneShotLogger.warn("Failed to get hmer from reference, isHmerIndel and RightMotif annotations will not be calculated. " + | ||
"Start index: " + vc.getEnd() + 1 + " Reference length: " + localContext.ref.getBases().length); | ||
return; | ||
} | ||
// build two haplotypes. add byte before and after | ||
final byte[] refHap = buildHaplotype(before, ref.getBases(), after); | ||
final byte[] altHap = buildHaplotype(before, alt.getBases(), after); | ||
|
@@ -338,6 +341,11 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon | |
} | ||
|
||
String motif = getRefMotif(localContext, vc.getStart() - MOTIF_SIZE, MOTIF_SIZE); | ||
if (motif.length() != MOTIF_SIZE){ | ||
flowMissingOneShotLogger.warn("Failed to get motif from reference, getLeftMotif annotation will not be calculated. " + | ||
"Start index: " + vc.getStart() + " Reference length: " + localContext.ref.getBases().length); | ||
return; | ||
} | ||
if ( a.length() != refLength ) { | ||
motif = motif.substring(1) + vc.getReference().getBaseString().substring(0, 1); | ||
} | ||
|
@@ -350,8 +358,12 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon | |
protected void getRightMotif(final VariantContext vc, final LocalContext localContext) { | ||
|
||
final int refLength = vc.getReference().length(); | ||
final String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); | ||
|
||
String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); | ||
if (motif.length() != MOTIF_SIZE){ | ||
flowMissingOneShotLogger.warn("Failed to get motif from reference, getRightMotif annotation will not be calculated. " + | ||
"Start index: " + vc.getStart() + refLength + " Reference length: " + localContext.ref.getBases().length); | ||
return; | ||
} | ||
// fill empty entries (non indel alelles) | ||
for ( int i = 0 ; i < localContext.rightMotif.size() ; i++ ) { | ||
if ( localContext.rightMotif.get(i) == null ) { | ||
|
@@ -366,6 +378,11 @@ protected void gcContent(final VariantContext vc, final LocalContext localContex | |
|
||
final int begin = vc.getStart() - (GC_CONTENT_SIZE / 2); | ||
final String seq = getRefMotif(localContext, begin + 1, GC_CONTENT_SIZE); | ||
if ( seq.length() != GC_CONTENT_SIZE ) { | ||
flowMissingOneShotLogger.warn("gcContent will not be calculated at position " + vc.getContig() + ":" + vc.getStart() + | ||
" too close to the edge of the reference"); | ||
return; | ||
} | ||
int gcCount = 0; | ||
for ( byte b : seq.getBytes() ) { | ||
if ( b == 'G' || b == 'C' ) { | ||
|
@@ -424,19 +441,24 @@ protected void cycleSkip(final VariantContext vc, final LocalContext localContex | |
localContext.attributes.put(GATKVCFConstants.FLOW_CYCLESKIP_STATUS, css); | ||
} | ||
|
||
// get a single nucleoid from reference | ||
// get a single nucleotid from reference | ||
private byte getReferenceNucleotide(final LocalContext localContext, final int start) { | ||
final int index = start - localContext.ref.getWindow().getStart(); | ||
final byte[] bases = localContext.ref.getBases(); | ||
Utils.validIndex(index, bases.length); | ||
Utils.validIndex(index, bases.length); // do not catch, if here the location is outside of the reference, there is a problem! | ||
return bases[index]; | ||
} | ||
|
||
// get an hmer from reference plus a number of additional bases | ||
private byte[] getReferenceHmerPlus(final LocalContext localContext, final int start, final int additional) { | ||
int index = start - localContext.ref.getWindow().getStart(); | ||
final byte[] bases = localContext.ref.getBases(); | ||
Utils.validIndex(index, bases.length); | ||
try { | ||
Utils.validIndex(index, bases.length); | ||
} catch (IllegalArgumentException e) { | ||
flowMissingOneShotLogger.warn("Failed to get hmer from reference. Start index: " + index + " Reference length: " + bases.length); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this also tied to being toward the end of the reference? You aren't asking the reference context for its actual length but the lenght of the bases view which is arbitrary and going to be deeply confusing to users. This error message should either try to give the actual reference name and index or give a clue as to why this could happen since that bases.length view is confusing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed, could not find an easy way to get the length of the reference, |
||
return new byte[0]; | ||
} | ||
|
||
// get hmer | ||
final StringBuilder sb = new StringBuilder(); | ||
|
@@ -458,8 +480,12 @@ private String getRefMotif(final LocalContext localContext, final int start, fin | |
final byte[] bases = localContext.ref.getBases(); | ||
final int startIndex = start - localContext.ref.getWindow().getStart(); | ||
final int endIndex = startIndex + length; | ||
Utils.validIndex(startIndex, bases.length); | ||
Utils.validIndex(endIndex-1, bases.length); | ||
try { | ||
Utils.validIndex(startIndex, bases.length); | ||
Utils.validIndex(endIndex-1, bases.length); | ||
} catch (IllegalArgumentException e) { | ||
return ""; | ||
} | ||
return new String(Arrays.copyOfRange(bases, startIndex, endIndex)); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,7 +69,25 @@ public Object[][] getTestData() { | |
// ins hmer indel | ||
"TATCT C ATTGACCAA", "CA", | ||
"ins", "1", "2", "A", "ATCTC", "TTGAC", "0.3", "NA", "h-indel" | ||
}, | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add to these comments a little more context for the nature of this error and what those are testing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added comments |
||
// Close to 5' edge | ||
"ATCT C ATTGACCAA", "CA", | ||
"ins", "1", "2", "A", null, "TTGAC", "0.3", "NA", "h-indel" | ||
}, | ||
{ | ||
// close to 3' edge | ||
"TATCT C ATT", "CA", | ||
"ins", "1", "2", "A", "ATCTC", null, null, "NA", "h-indel" | ||
}, | ||
{ | ||
// very close to 3' edge | ||
"TATCT C ", "CA", | ||
"ins", "1", null, null, "ATCTC", null, null, "NA", "non-h-indel" | ||
} | ||
|
||
|
||
|
||
}; | ||
|
||
return testData; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm looking inside of establishReadGroupFlowOrder() and it looks like it tries to disable annotation if it can't establish what the flow order is for the local context (which is explicitly the case for the user that brought up this issue)... Can you disentangle what is supposed to happen for this annotation class in the event that no-flow order can be established? The annotation requires a flow order in order to determine what kind of indel it is no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there is a method called isActualFlowOrderRequired in this abstract class that returns false, the annotations that the user enabled all inherit from that abstract class and do not require flow order. The only annotation that does require it is
CycleSkipStatus
.