Skip to content

Commit b8e168e

Browse files
committed
responding to review comments
1 parent 40c723f commit b8e168e

15 files changed

+1082
-3304
lines changed

src/main/java/org/broadinstitute/hellbender/tools/reference/CompareReferences.java

+131-94
Large diffs are not rendered by default.

src/main/java/org/broadinstitute/hellbender/utils/alignment/MummerExecutor.java

+3-29
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
import java.util.*;
1919

2020
/**
21-
* Class for executing MUMmer pipeline.
21+
* Class for executing MUMmer alignment pipeline to detect SNPs and INDELs in mismatching sequences.
22+
* The MUMmer pipeline runs nucmer, delta-filter, and show-snps programs, sequentially.
23+
* Used in CompareReferences tool when running in FULL_ALIGNMENT mode.
2224
*/
2325
public final class MummerExecutor {
2426

@@ -83,7 +85,6 @@ public File executeMummer(File fasta1, File fasta2, File outputDirectory){
8385
String[] showSNPsArgs = {mummerExecutableDirectory.getAbsolutePath() + "/show-snps", "-rlTH", deltaFilterOutput.getAbsolutePath()};
8486
ProcessOutput showSNPs = runShellCommand(showSNPsArgs, null, showSNPSOutput, false);
8587

86-
8788
return showSNPSOutput;
8889
}
8990

@@ -116,33 +117,6 @@ public static ProcessOutput runShellCommand(String[] command, Map<String, String
116117
return output;
117118
}
118119

119-
/**
120-
* Method to run python commands from GATK.
121-
*
122-
* @param script the name of the executable as a String
123-
* @param scriptArguments a List of Strings containing the remainder of the commandline for execution
124-
* @param additionalEnvironmentVars a Map of environment variables to their values; may be left null if no environment variables needed
125-
* @param stdoutCaptureFile File for output
126-
* @param printStdout boolean to trigger displaying to stdout
127-
* @return the ProcessOutput of the run
128-
*/
129-
public static ProcessOutput runPythonCommand(String script, List<String> scriptArguments, Map<String, String> additionalEnvironmentVars, File stdoutCaptureFile, boolean printStdout){
130-
Map<String, String> environment = new HashMap<>();
131-
environment.putAll(System.getenv());
132-
if(additionalEnvironmentVars != null){
133-
environment.putAll(additionalEnvironmentVars);
134-
}
135-
List<String> args = new ArrayList<>();
136-
args.add("python");
137-
args.add(script);
138-
args.addAll(scriptArguments);
139-
ProcessOutput output = runShellCommand(args.toArray(new String[]{}), environment, stdoutCaptureFile, printStdout);
140-
if(output.getExitValue() != 0){
141-
throw new UserException("Error running " + script + ". Exit with code " + output.getExitValue());
142-
}
143-
return output;
144-
}
145-
146120
// method to unzip and locate the MUMmer binaries packaged within GATK
147121
private File prepareMUMmerExecutionDirectory(){
148122
try{

src/main/java/org/broadinstitute/hellbender/utils/python/PythonScriptExecutor.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
* between command/script/module execution. Using -i doesn't buy you anything (for this version of the executor, at
3636
* least) since the process is terminated after each command completes.
3737
*/
38-
public class PythonScriptExecutor extends PythonExecutorBase {
38+
public class PythonScriptExecutor extends PythonExecutorBase {
3939
private static final Logger logger = LogManager.getLogger(PythonScriptExecutor.class);
4040

4141
private final List<String> curatedCommandLineArgs = new ArrayList<>();

src/main/java/org/broadinstitute/hellbender/utils/runtime/ScriptExecutor.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
* {@link #getApproximateCommandLine}
1717
* {@link #getScriptException}
1818
*/
19-
public abstract class
20-
ScriptExecutor {
19+
public abstract class ScriptExecutor {
2120
private static final Logger logger = LogManager.getLogger(org.broadinstitute.hellbender.utils.runtime.ScriptExecutor.class);
2221

2322
protected final String externalScriptExecutableName; // external program to run; e.g. "RScript" or "python"

src/test/java/org/broadinstitute/hellbender/tools/reference/CompareReferencesIntegrationTest.java

+92-67
Original file line numberDiff line numberDiff line change
@@ -125,103 +125,128 @@ public void testCompareReferencesMultipleReferencesStdOut() throws IOException{
125125
}
126126

127127
// FIND_SNPS_ONLY tests:
128-
@Test
129-
public void testFindSNPsMultipleSNPs() throws IOException{
130-
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
131-
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2multiplesnps.fasta");
132-
final File output = IOUtils.createTempDir("tempFindSNPs");
133-
134-
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FIND_SNPS_ONLY", "-base-comparison-output", output.getAbsolutePath()};
135-
runCommandLine(args);
136128

137-
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr2multiplesnps.fasta_snps.tsv");
138-
final File expectedOutput = new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_chr2multiplesnps.fasta_snps.tsv");
139-
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
129+
@DataProvider(name = "findSNPsData")
130+
public Object[][] findSNPsData() {
131+
return new Object[][]{
132+
// ref1, ref2, output name, expected output
133+
new Object[]{ new File(getToolTestDataDir() + "hg19mini.fasta"),
134+
new File(getToolTestDataDir() + "hg19mini_chr2multiplesnps.fasta"),
135+
"hg19mini.fasta_hg19mini_chr2multiplesnps.fasta_snps.tsv",
136+
new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_chr2multiplesnps.fasta_snps.tsv"),
137+
},
138+
new Object[]{ new File(getToolTestDataDir() + "hg19mini.fasta"),
139+
new File(getToolTestDataDir() + "hg19mini_chr2iupacsnps.fasta"),
140+
"hg19mini.fasta_hg19mini_chr2iupacsnps.fasta_snps.tsv",
141+
new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_chr2iupacsnps.fasta_snps.tsv"),
142+
},
143+
// produces empty output file bc FIND_SNPS_ONLY doesn't work on indels
144+
new Object[]{ new File(getToolTestDataDir() + "hg19mini.fasta"),
145+
new File(getToolTestDataDir() + "hg19mini_chr1indel.fasta"),
146+
"hg19mini.fasta_hg19mini_chr1indel.fasta_snps.tsv",
147+
new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_chr1indel.fasta_snps.tsv"),
148+
},
149+
};
140150
}
141-
142-
@Test
143-
public void testFindSNPsIUPACBases() throws IOException{
144-
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
145-
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2iupacsnps.fasta");
151+
@Test(dataProvider = "findSNPsData")
152+
public void testFindSNPs(File ref1, File ref2, String actualOutputFileName, File expectedOutput) throws IOException{
146153
final File output = IOUtils.createTempDir("tempFindSNPs");
147154

148-
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FIND_SNPS_ONLY", "-base-comparison-output", output.toPath().toString()};
155+
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FIND_SNPS_ONLY", "-base-comparison-output", output.getAbsolutePath()};
149156
runCommandLine(args);
150157

151-
final File expectedOutput = new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_chr2iupacsnps.fasta_snps.tsv");
152-
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr2iupacsnps.fasta_snps.tsv");
153-
158+
final File actualOutput = new File(output, actualOutputFileName);
154159
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
155160
}
156161

157-
// FULL_ALIGNMENT tests:
158-
@Test
159-
public void testFullAlignmentModeMultipleSNPs() throws IOException{
160-
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
161-
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2multiplesnps.fasta");
162-
final File output = IOUtils.createTempDir("tempFullAlignmentSNPs");
163-
164-
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
165-
runCommandLine(args);
166-
167-
final File expectedOutput = new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_chr2multiplesnps.fasta.vcf");
168-
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr2multiplesnps.fasta.vcf");
169-
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
162+
@DataProvider(name = "baseComparisonIllegalArgumentsData")
163+
public Object[][] baseComparisonIllegalArgumentsData() {
164+
return new Object[][]{
165+
// base comparison mode
166+
new Object[]{"FULL_ALIGNMENT"},
167+
new Object[]{"FIND_SNPS_ONLY"},
168+
};
170169
}
171170

172-
@Test
173-
public void testFullAlignmentModeDeletion() throws IOException{
171+
// tool throws a UserException.CouldNotCreateOutputFile but gets wrapped in a NullPointerException
172+
@Test(dataProvider = "baseComparisonIllegalArgumentsData", expectedExceptions = NullPointerException.class)
173+
public void testFindSNPsNoOutputDir(String baseComparisonMode) throws IOException{
174174
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
175-
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr1indel.fasta");
176-
final File output = IOUtils.createTempDir("tempFullAlignmentIndel");
175+
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2iupacsnps.fasta");
177176

178-
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
177+
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", baseComparisonMode};
179178
runCommandLine(args);
180-
181-
final File expectedOutput = new File(getToolTestDataDir(), "expected.testDeletion.hg19mini.fasta_hg19mini_chr1indel.fasta.vcf");
182-
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr1indel.fasta.vcf");
183-
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
184179
}
185180

186-
@Test
187-
public void testFullAlignmentModeInsertion() throws IOException{
188-
final File ref1 = new File(getToolTestDataDir() + "hg19mini_chr1indel.fasta");
189-
final File ref2 = new File(getToolTestDataDir() + "hg19mini.fasta");
190-
final File output = IOUtils.createTempDir("tempFullAlignmentIndel");
181+
@Test(dataProvider = "baseComparisonIllegalArgumentsData", expectedExceptions = UserException.CouldNotCreateOutputFile.class)
182+
public void testFindSNPsNonexistentOutputDir(String baseComparisonMode) throws IOException{
183+
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
184+
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2iupacsnps.fasta");
185+
final File nonexistentOutputDir = new File("/tempreferences");
191186

192-
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
187+
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", baseComparisonMode, "-base-comparison-output", nonexistentOutputDir.getAbsolutePath()};
193188
runCommandLine(args);
194-
195-
final File expectedOutput = new File(getToolTestDataDir(), "expected.testInsertion.hg19mini_chr1indel.fasta_hg19mini.fasta.vcf");
196-
final File actualOutput = new File(output, "hg19mini_chr1indel.fasta_hg19mini.fasta.vcf");
197-
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
198189
}
199190

200-
@Test
201-
public void testFullAlignmentSNPsOnMultipleContigs() throws IOException{
191+
@Test(dataProvider = "baseComparisonIllegalArgumentsData", expectedExceptions = UserException.BadInput.class)
192+
public void testFindSNPsMoreThanTwoReferences(String baseComparisonMode) throws IOException{
202193
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
203-
final File ref2 = new File(getToolTestDataDir() + "hg19mini_snpsmultiplecontigs.fasta");
204-
final File output = IOUtils.createTempDir("tempFullAlignmentSNPsMultipleContigs");
194+
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2iupacsnps.fasta");
195+
final File ref3 = new File(getToolTestDataDir() + "hg19mini_chr2multiplesnps.fasta");
196+
final File output = IOUtils.createTempDir("tempFindSNPs");
205197

206-
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
198+
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-refcomp", ref3.getAbsolutePath(), "-base-comparison", baseComparisonMode, "-base-comparison-output", output.getAbsolutePath()};
207199
runCommandLine(args);
200+
}
208201

209-
final File expectedOutput = new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.vcf");
210-
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.vcf");
211-
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
202+
// FULL_ALIGNMENT tests:
203+
@DataProvider(name = "fullAlignmentData")
204+
public Object[][] fullAlignmentData() {
205+
return new Object[][]{
206+
// ref1, ref2, output name, expected output
207+
new Object[]{ new File(getToolTestDataDir() + "hg19mini.fasta"),
208+
new File(getToolTestDataDir() + "hg19mini_chr2multiplesnps.fasta"),
209+
"hg19mini.fasta_hg19mini_chr2multiplesnps.fasta.vcf",
210+
new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_chr2multiplesnps.fasta.vcf"),
211+
},
212+
// order that these 2 references are specified determines whether it is an insertion or deletion
213+
new Object[]{ new File(getToolTestDataDir() + "hg19mini.fasta"),
214+
new File(getToolTestDataDir() + "hg19mini_chr1indel.fasta"),
215+
"hg19mini.fasta_hg19mini_chr1indel.fasta.vcf",
216+
new File(getToolTestDataDir(), "expected.testDeletion.hg19mini.fasta_hg19mini_chr1indel.fasta.vcf"),
217+
},
218+
// order that these 2 references are specified determines whether it is an insertion or deletion
219+
new Object[]{ new File(getToolTestDataDir() + "hg19mini_chr1indel.fasta"),
220+
new File(getToolTestDataDir() + "hg19mini.fasta"),
221+
"hg19mini_chr1indel.fasta_hg19mini.fasta.vcf",
222+
new File(getToolTestDataDir(), "expected.testInsertion.hg19mini_chr1indel.fasta_hg19mini.fasta.vcf"),
223+
},
224+
new Object[]{ new File(getToolTestDataDir() + "hg19mini.fasta"),
225+
new File(getToolTestDataDir() + "hg19mini_snpsmultiplecontigs.fasta"),
226+
"hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.vcf",
227+
new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.vcf"),
228+
},
229+
new Object[]{ new File(getToolTestDataDir() + "hg19mini.fasta"),
230+
new File(getToolTestDataDir() + "hg19mini_snpandindel.fasta"),
231+
"hg19mini.fasta_hg19mini_snpandindel.fasta.vcf",
232+
new File(getToolTestDataDir(), "expected.SNPandINDEL.hg19mini.fasta_hg19mini_snpandindel.fasta.vcf"),
233+
},
234+
new Object[]{ new File(getToolTestDataDir() + "hg19mini.fasta"),
235+
new File(getToolTestDataDir() + "hg19mini_manySNPsINDELs.fasta"),
236+
"hg19mini.fasta_hg19mini_manySNPsINDELs.fasta.vcf",
237+
new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_manySNPsINDELs.fasta.vcf"),
238+
},
239+
};
212240
}
213241

214-
@Test
215-
public void testFullAlignmentModeSNPAndINDEL() throws IOException{
216-
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
217-
final File ref2 = new File(getToolTestDataDir() + "hg19mini_snpandindel.fasta");
218-
final File output = IOUtils.createTempDir("tempFullAlignmentIndel");
242+
@Test(dataProvider = "fullAlignmentData")
243+
public void testFullAlignmentMode(File ref1, File ref2, String actualOutputFileName, File expectedOutput) throws IOException{
244+
final File output = IOUtils.createTempDir("tempDirFullAlignment");
219245

220246
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
221247
runCommandLine(args);
222248

223-
final File expectedOutput = new File(getToolTestDataDir(), "expected.SNPandINDEL.hg19mini.fasta_hg19mini_snpandindel.fasta.vcf");
224-
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_snpandindel.fasta.vcf");
249+
final File actualOutput = new File(output, actualOutputFileName);
225250
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
226251
}
227252
}

src/test/java/org/broadinstitute/hellbender/tools/reference/CompareReferencesUnitTest.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.broadinstitute.hellbender.tools.reference;
22

33
import org.broadinstitute.hellbender.CommandLineProgramTest;
4+
import org.broadinstitute.hellbender.GATKBaseTest;
45
import org.broadinstitute.hellbender.engine.GATKPath;
56
import org.broadinstitute.hellbender.engine.ReferenceDataSource;
67
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
@@ -11,17 +12,16 @@
1112
import java.io.File;
1213
import java.io.IOException;
1314

14-
public class CompareReferencesUnitTest extends CommandLineProgramTest {
15+
public class CompareReferencesUnitTest extends GATKBaseTest {
1516

1617
@Test
1718
public void testGenerateFastaForSequence() throws IOException {
18-
File ref = new File(getToolTestDataDir() + "hg19mini.fasta");
19+
GATKPath ref = new GATKPath(getToolTestDataDir() + "hg19mini.fasta");
1920
File expectedOutput = new File(getToolTestDataDir() + "1.fasta");
2021
String sequenceName = "1";
2122
File output = createTempFile("example_chr1", ".fasta");
2223

23-
ReferenceDataSource source = ReferenceDataSource.of(ref.toPath());
24-
CompareReferences.generateFastaForSequence(source, sequenceName, new GATKPath(output.toString()));
24+
CompareReferences.generateFastaForSequence(ref, sequenceName, new GATKPath(output.toString()));
2525

2626
IntegrationTestSpec.assertEqualTextFiles(output, expectedOutput);
2727
}

0 commit comments

Comments
 (0)