Skip to content

Commit 1ac0038

Browse files
committed
ScatterIntervals produces interval_list instead of intervals
* This matches what picard expects an interval list to be named. * Added a new --extension argument to allow changing the extension. * Fixes #5390
1 parent 1db9cd5 commit 1ac0038

File tree

5 files changed

+51
-26
lines changed

5 files changed

+51
-26
lines changed

scripts/cnn_variant_wdl/cnn_variant_common_tasks.wdl

+1-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ task SplitIntervals {
234234
}
235235

236236
output {
237-
Array[File] interval_files = glob("*.intervals")
237+
Array[File] interval_files = glob("*.interval_list")
238238
}
239239
}
240240

scripts/mutect2_wdl/mutect2.wdl

+2-2
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ task SplitIntervals {
488488
-scatter ${scatter_count} \
489489
-O interval-files \
490490
${split_intervals_extra_args}
491-
cp interval-files/*.intervals .
491+
cp interval-files/*.interval_list .
492492
}
493493

494494
runtime {
@@ -502,7 +502,7 @@ task SplitIntervals {
502502
}
503503

504504
output {
505-
Array[File] interval_files = glob("*.intervals")
505+
Array[File] interval_files = glob("*.interval_list")
506506
}
507507
}
508508

scripts/mutect2_wdl/mutect2_nio.wdl

+2-2
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ task SplitIntervals {
433433
-scatter ${scatter_count} \
434434
-O interval-files \
435435
${split_intervals_extra_args}
436-
cp interval-files/*.intervals .
436+
cp interval-files/*.interval_list .
437437
}
438438

439439
runtime {
@@ -447,7 +447,7 @@ task SplitIntervals {
447447
}
448448

449449
output {
450-
Array[File] interval_files = glob("*.intervals")
450+
Array[File] interval_files = glob("*.interval_list")
451451
}
452452
}
453453

src/main/java/org/broadinstitute/hellbender/tools/walkers/SplitIntervals.java

+11-4
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141
* </pre>
4242
*
4343
* <p>
44-
* The -O argument specifies a directory name for the scatter intervals files. Each file will be named, e.g 0000-scattered.intervals,
45-
* 0001-scattered.intervals, 0002-scattered.intervals and so on.
46-
* The default --scatter_count is 1 and so this value should be changed to utilize the tool's functionality.
44+
* The -O argument specifies a directory name for the scatter intervals files. Each file will be named, e.g 0000-scattered.interval_list,
45+
* 0001-scattered.interval_list, 0002-scattered.interval_list and so on.
46+
* The default --scatter-count is 1 and so this value should be changed to utilize the tool's functionality.
4747
* Specify --subdivision-mode BALANCING_WITHOUT_INTERVAL_SUBDIVISION to avoid splitting input intervals -- that is, the set
4848
* of input intervals is split, but individual intervals are left intact. This may affect results when using assembly-based callers downstream.
4949
* </p>
@@ -63,6 +63,10 @@ public class SplitIntervals extends GATKTool {
6363
public static final String SUBDIVISION_MODE_SHORT_NAME = "mode";
6464
public static final String SUBDIVISION_MODE_lONG_NAME = "subdivision-mode";
6565

66+
public static final String INTERVAL_FILE_EXTENSION_FULL_NAME = "extension";
67+
68+
public static final String PICARD_INTERVAL_FILE_EXTENSION = "interval_list";
69+
public static final String DEFAULT_EXTENSION = "-scattered." + PICARD_INTERVAL_FILE_EXTENSION;
6670

6771
@Argument(fullName = SCATTER_COUNT_LONG_NAME, shortName = SCATTER_COUNT_SHORT_NAME,
6872
doc = "scatter count: number of output interval files to split into", optional = true)
@@ -76,6 +80,9 @@ public class SplitIntervals extends GATKTool {
7680
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME)
7781
public File outputDir;
7882

83+
@Argument(doc = "Extension to use when writing interval files", fullName = INTERVAL_FILE_EXTENSION_FULL_NAME, optional = true)
84+
public String extension = DEFAULT_EXTENSION;
85+
7986
@Override
8087
public void onTraversalStart() {
8188
ParamUtils.isPositive(scatterCount, "scatter-count must be > 0.");
@@ -97,7 +104,7 @@ public void onTraversalStart() {
97104
final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
98105

99106
final DecimalFormat formatter = new DecimalFormat("0000");
100-
IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
107+
IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + extension)));
101108
}
102109

103110
@Override

src/test/java/org/broadinstitute/hellbender/tools/walkers/SplitIntervalsIntegrationTest.java

+35-17
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package org.broadinstitute.hellbender.tools.walkers;
22

33
import htsjdk.samtools.SAMSequenceRecord;
4-
import java.nio.file.Path;
5-
import java.nio.file.Paths;
64
import org.broadinstitute.hellbender.CommandLineProgramTest;
75
import org.broadinstitute.hellbender.engine.ReferenceDataSource;
86
import org.broadinstitute.hellbender.utils.GenomeLocParser;
@@ -12,6 +10,8 @@
1210
import org.testng.annotations.Test;
1311

1412
import java.io.File;
13+
import java.nio.file.Path;
14+
import java.nio.file.Paths;
1515
import java.util.List;
1616
import java.util.stream.Collectors;
1717
import java.util.stream.IntStream;
@@ -26,6 +26,7 @@ public class SplitIntervalsIntegrationTest extends CommandLineProgramTest {
2626
private static final Path REFERENCE = Paths.get(b37_reference_20_21);
2727
private static final GenomeLocParser GLP = new GenomeLocParser(ReferenceDataSource.of(REFERENCE).getSequenceDictionary());
2828

29+
2930
@Test
3031
public void testOneInterval() {
3132
final int scatterCount = 5;
@@ -37,8 +38,25 @@ public void testOneInterval() {
3738
"-O", outputDir.getAbsolutePath()
3839
};
3940
runCommandLine(args);
40-
verifyScatteredFilesExist(scatterCount, outputDir);
41-
checkIntervalSizes(scatterCount, outputDir, 1000000);
41+
verifyScatteredFilesExist(scatterCount, outputDir, SplitIntervals.DEFAULT_EXTENSION);
42+
checkIntervalSizes(scatterCount, outputDir, 1000000, SplitIntervals.DEFAULT_EXTENSION);
43+
}
44+
45+
@Test
46+
public void testOneIntervalAlternateExtension() {
47+
final int scatterCount = 5;
48+
final File outputDir = createTempDir("output");
49+
final String extension = "-scattered.with.a.wierd.extension";
50+
final String[] args = {
51+
"-L", "20:1000000-2000000",
52+
"-R", REFERENCE.toAbsolutePath().toString(),
53+
"-" + SplitIntervals.SCATTER_COUNT_SHORT_NAME, Integer.toString(scatterCount),
54+
"-O", outputDir.getAbsolutePath(),
55+
"--extension", extension
56+
};
57+
runCommandLine(args);
58+
verifyScatteredFilesExist(scatterCount, outputDir, extension);
59+
checkIntervalSizes(scatterCount, outputDir, 1000000, extension);
4260
}
4361

4462
@Test
@@ -52,8 +70,8 @@ public void testSingleScatter() {
5270
"-O", outputDir.getAbsolutePath()
5371
};
5472
runCommandLine(args);
55-
verifyScatteredFilesExist(scatterCount, outputDir);
56-
checkIntervalSizes(scatterCount, outputDir, 1000000);
73+
verifyScatteredFilesExist(scatterCount, outputDir, SplitIntervals.DEFAULT_EXTENSION);
74+
checkIntervalSizes(scatterCount, outputDir, 1000000, SplitIntervals.DEFAULT_EXTENSION);
5775

5876
}
5977

@@ -69,8 +87,8 @@ public void testTwoIntervals() {
6987
"-O", outputDir.getAbsolutePath()
7088
};
7189
runCommandLine(args);
72-
verifyScatteredFilesExist(scatterCount, outputDir);
73-
checkIntervalSizes(scatterCount, outputDir, 2000000);
90+
verifyScatteredFilesExist(scatterCount, outputDir, SplitIntervals.DEFAULT_EXTENSION);
91+
checkIntervalSizes(scatterCount, outputDir, 2000000, SplitIntervals.DEFAULT_EXTENSION);
7492

7593
}
7694

@@ -84,28 +102,28 @@ public void testNoIntervals() {
84102
"-O", outputDir.getAbsolutePath()
85103
};
86104
runCommandLine(args);
87-
verifyScatteredFilesExist(scatterCount, outputDir);
105+
verifyScatteredFilesExist(scatterCount, outputDir, SplitIntervals.DEFAULT_EXTENSION);
88106
final int totalLengthInRef = GLP.getSequenceDictionary().getSequences().stream().mapToInt(SAMSequenceRecord::getSequenceLength).sum();
89-
checkIntervalSizes(scatterCount, outputDir, totalLengthInRef);
107+
checkIntervalSizes(scatterCount, outputDir, totalLengthInRef, SplitIntervals.DEFAULT_EXTENSION);
90108

91109
}
92110

93-
private static Stream<File> getScatteredFiles(final int scatterCount, final File outputDir) {
94-
return IntStream.range(0, scatterCount).mapToObj(n -> new File(outputDir, "000" + n + "-scattered.intervals"));
111+
private static Stream<File> getScatteredFiles(final int scatterCount, final File outputDir, String extension) {
112+
return IntStream.range(0, scatterCount).mapToObj(n -> new File(outputDir, "000" + n + extension));
95113
}
96114

97-
private static void verifyScatteredFilesExist(final int scatterCount, final File outputDir) {
98-
getScatteredFiles(scatterCount, outputDir).forEach(f -> Assert.assertTrue(f.exists()));
99-
Assert.assertFalse(new File(outputDir, "000" + scatterCount + "-scattered.intervals").exists());
115+
private static void verifyScatteredFilesExist(final int scatterCount, final File outputDir, String extension) {
116+
getScatteredFiles(scatterCount, outputDir, extension).forEach(f -> Assert.assertTrue(f.exists()));
117+
Assert.assertFalse(new File(outputDir, "000" + scatterCount + extension).exists());
100118
}
101119

102120
private static List<SimpleInterval> readIntervals(final File intervalsFile) {
103121
return IntervalUtils.intervalFileToList(GLP, intervalsFile.getAbsolutePath()).stream().map(SimpleInterval::new).collect(Collectors.toList());
104122
}
105123

106-
private static void checkIntervalSizes(final int scatterCount, final File outputDir, final int expectedTotalLength) {
124+
private static void checkIntervalSizes(final int scatterCount, final File outputDir, final int expectedTotalLength, String extension) {
107125
final int splitLength = expectedTotalLength / scatterCount;
108-
getScatteredFiles(scatterCount, outputDir).forEach(f -> Assert.assertEquals(readIntervals(f).stream().mapToInt(SimpleInterval::size).sum(), splitLength, 100));
126+
getScatteredFiles(scatterCount, outputDir, extension).forEach(f -> Assert.assertEquals(readIntervals(f).stream().mapToInt(SimpleInterval::size).sum(), splitLength, 100));
109127
}
110128

111129
}

0 commit comments

Comments
 (0)