Skip to content

Commit 7fa9c39

Browse files
committed
Added separate allele-count thresholds for the normal and tumor in ModelSegments.
1 parent 9562a48 commit 7fa9c39

File tree

2 files changed

+45
-22
lines changed

2 files changed

+45
-22
lines changed

scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl

+17-8
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ workflow CNVSomaticPairWorkflow {
9090
##############################################
9191
Int? max_num_segments_per_chromosome
9292
Int? min_total_allele_count
93+
Int? min_total_allele_count_normal
9394
Float? genotyping_homozygous_log_ratio_threshold
9495
Float? genotyping_base_error_rate
9596
Float? kernel_variance_copy_ratio
@@ -218,6 +219,7 @@ workflow CNVSomaticPairWorkflow {
218219
normal_allelic_counts = CollectAllelicCountsNormal.allelic_counts,
219220
max_num_segments_per_chromosome = max_num_segments_per_chromosome,
220221
min_total_allele_count = min_total_allele_count,
222+
min_total_allele_count_normal = min_total_allele_count_normal,
221223
genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
222224
genotyping_base_error_rate = genotyping_base_error_rate,
223225
kernel_variance_copy_ratio = kernel_variance_copy_ratio,
@@ -345,7 +347,7 @@ workflow CNVSomaticPairWorkflow {
345347
denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
346348
allelic_counts = CollectAllelicCountsNormal.allelic_counts,
347349
max_num_segments_per_chromosome = max_num_segments_per_chromosome,
348-
min_total_allele_count = min_total_allele_count,
350+
min_total_allele_count = min_total_allele_count_normal,
349351
genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
350352
genotyping_base_error_rate = genotyping_base_error_rate,
351353
kernel_variance_copy_ratio = kernel_variance_copy_ratio,
@@ -542,6 +544,7 @@ task ModelSegments {
542544
File? normal_allelic_counts
543545
Int? max_num_segments_per_chromosome
544546
Int? min_total_allele_count
547+
Int? min_total_allele_count_normal
545548
Float? genotyping_homozygous_log_ratio_threshold
546549
Float? genotyping_base_error_rate
547550
Float? kernel_variance_copy_ratio
@@ -577,6 +580,11 @@ task ModelSegments {
577580
# If optional output_dir not specified, use "out"
578581
String output_dir_ = select_first([output_dir, "out"])
579582

583+
# default values are min_total_allele_count_ = 0 in matched-normal mode
584+
# = 30 in case-only mode
585+
Int default_min_total_allele_count = if defined(normal_allelic_counts) then 0 else 30
586+
Int min_total_allele_count_ = select_first([min_total_allele_count, default_min_total_allele_count])
587+
580588
command <<<
581589
set -e
582590
mkdir ${output_dir_}
@@ -586,7 +594,8 @@ task ModelSegments {
586594
--denoised-copy-ratios ${denoised_copy_ratios} \
587595
--allelic-counts ${allelic_counts} \
588596
${"--normal-allelic-counts " + normal_allelic_counts} \
589-
--minimum-total-allele-count ${default="30" min_total_allele_count} \
597+
--minimum-total-allele-count ${min_total_allele_count_} \
598+
--minimum-total-allele-count-normal ${default="30" min_total_allele_count_normal} \
590599
--genotyping-homozygous-log-ratio-threshold ${default="-10.0" genotyping_homozygous_log_ratio_threshold} \
591600
--genotyping-base-error-rate ${default="0.05" genotyping_base_error_rate} \
592601
--maximum-number-of-segments-per-chromosome ${default="1000" max_num_segments_per_chromosome} \
@@ -597,14 +606,14 @@ task ModelSegments {
597606
--window-size ${sep=" --window-size " window_sizes} \
598607
--number-of-changepoints-penalty-factor ${default="1.0" num_changepoints_penalty_factor} \
599608
--minor-allele-fraction-prior-alpha ${default="25.0" minor_allele_fraction_prior_alpha} \
600-
--number-of-samples-copy-ratio ${default=100 num_samples_copy_ratio} \
601-
--number-of-burn-in-samples-copy-ratio ${default=50 num_burn_in_copy_ratio} \
602-
--number-of-samples-allele-fraction ${default=100 num_samples_allele_fraction} \
603-
--number-of-burn-in-samples-allele-fraction ${default=50 num_burn_in_allele_fraction} \
609+
--number-of-samples-copy-ratio ${default="100" num_samples_copy_ratio} \
610+
--number-of-burn-in-samples-copy-ratio ${default="50" num_burn_in_copy_ratio} \
611+
--number-of-samples-allele-fraction ${default="100" num_samples_allele_fraction} \
612+
--number-of-burn-in-samples-allele-fraction ${default="50" num_burn_in_allele_fraction} \
604613
--smoothing-credible-interval-threshold-copy-ratio ${default="2.0" smoothing_threshold_copy_ratio} \
605614
--smoothing-credible-interval-threshold-allele-fraction ${default="2.0" smoothing_threshold_allele_fraction} \
606-
--maximum-number-of-smoothing-iterations ${default=10 max_num_smoothing_iterations} \
607-
--number-of-smoothing-iterations-per-fit ${default=0 num_smoothing_iterations_per_fit} \
615+
--maximum-number-of-smoothing-iterations ${default="10" max_num_smoothing_iterations} \
616+
--number-of-smoothing-iterations-per-fit ${default="0" num_smoothing_iterations_per_fit} \
608617
--output ${output_dir_} \
609618
--output-prefix ${entity_id}
610619

src/main/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegments.java

+28-14
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ public final class ModelSegments extends CommandLineProgram {
231231
public static final String ALLELE_FRACTION_LEGACY_SEGMENTS_FILE_SUFFIX = ".af.igv" + SEGMENTS_FILE_SUFFIX;
232232

233233
//het genotyping argument names
234-
public static final String MINIMUM_TOTAL_ALLELE_COUNT_LONG_NAME = "minimum-total-allele-count";
234+
public static final String MINIMUM_TOTAL_ALLELE_COUNT_CASE_LONG_NAME = "minimum-total-allele-count-case";
235+
public static final String MINIMUM_TOTAL_ALLELE_COUNT_NORMAL_LONG_NAME = "minimum-total-allele-count-normal";
235236
public static final String GENOTYPING_HOMOZYGOUS_LOG_RATIO_THRESHOLD_LONG_NAME = "genotyping-homozygous-log-ratio-threshold";
236237
public static final String GENOTYPING_BASE_ERROR_RATE_LONG_NAME = "genotyping-base-error-rate";
237238

@@ -248,8 +249,8 @@ public final class ModelSegments extends CommandLineProgram {
248249
public static final String MINOR_ALLELE_FRACTION_PRIOR_ALPHA_LONG_NAME = "minor-allele-fraction-prior-alpha";
249250
public static final String NUMBER_OF_SAMPLES_COPY_RATIO_LONG_NAME = "number-of-samples-copy-ratio";
250251
public static final String NUMBER_OF_BURN_IN_SAMPLES_COPY_RATIO_LONG_NAME = "number-of-burn-in-samples-copy-ratio";
251-
public static final String NUM_SAMPLES_ALLELE_FRACTION_LONG_NAME = "number-of-samples-allele-fraction";
252-
public static final String NUM_BURN_IN_ALLELE_FRACTION_LONG_NAME = "number-of-burn-in-samples-allele-fraction";
252+
public static final String NUMBER_OF_SAMPLES_ALLELE_FRACTION_LONG_NAME = "number-of-samples-allele-fraction";
253+
public static final String NUMBER_OF_BURN_IN_SAMPLES_ALLELE_FRACTION_LONG_NAME = "number-of-burn-in-samples-allele-fraction";
253254

254255
//smoothing argument names
255256
public static final String SMOOTHING_CREDIBLE_INTERVAL_THRESHOLD_COPY_RATIO_LONG_NAME = "smoothing-credible-interval-threshold-copy-ratio";
@@ -292,12 +293,22 @@ public final class ModelSegments extends CommandLineProgram {
292293
private String outputDir;
293294

294295
@Argument(
295-
doc = "Minimum total count for filtering allelic counts, if available.",
296-
fullName = MINIMUM_TOTAL_ALLELE_COUNT_LONG_NAME,
296+
doc = "Minimum total count for filtering allelic counts in the case sample, if available. " +
297+
"The default value of zero is appropriate for matched-normal mode; " +
298+
"increase to an appropriate value for case-only mode.",
299+
fullName = MINIMUM_TOTAL_ALLELE_COUNT_CASE_LONG_NAME,
297300
minValue = 0,
298301
optional = true
299302
)
300-
private int minTotalAlleleCount = 30;
303+
private int minTotalAlleleCountCase = 0;
304+
305+
@Argument(
306+
doc = "Minimum total count for filtering allelic counts in the matched-normal sample, if available.",
307+
fullName = MINIMUM_TOTAL_ALLELE_COUNT_NORMAL_LONG_NAME,
308+
minValue = 0,
309+
optional = true
310+
)
311+
private int minTotalAlleleCountNormal = 30;
301312

302313
@Argument(
303314
doc = "Log-ratio threshold for genotyping and filtering homozygous allelic counts, if available. " +
@@ -414,15 +425,15 @@ public final class ModelSegments extends CommandLineProgram {
414425

415426
@Argument(
416427
doc = "Total number of MCMC samples for allele-fraction model.",
417-
fullName = NUM_SAMPLES_ALLELE_FRACTION_LONG_NAME,
428+
fullName = NUMBER_OF_SAMPLES_ALLELE_FRACTION_LONG_NAME,
418429
optional = true,
419430
minValue = 1
420431
)
421432
private int numSamplesAlleleFraction = 100;
422433

423434
@Argument(
424435
doc = "Number of burn-in samples to discard for allele-fraction model.",
425-
fullName = NUM_BURN_IN_ALLELE_FRACTION_LONG_NAME,
436+
fullName = NUMBER_OF_BURN_IN_SAMPLES_ALLELE_FRACTION_LONG_NAME,
426437
optional = true,
427438
minValue = 0
428439
)
@@ -619,12 +630,14 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada
619630

620631
logger.info("Genotyping heterozygous sites from available allelic counts...");
621632

633+
AllelicCountCollection filteredAllelicCounts = allelicCounts;
634+
622635
//filter on total count in case sample
623-
logger.info(String.format("Filtering allelic counts with total count less than %d...", minTotalAlleleCount));
624-
AllelicCountCollection filteredAllelicCounts = new AllelicCountCollection(
636+
logger.info(String.format("Filtering allelic counts with total count less than %d...", minTotalAlleleCountCase));
637+
filteredAllelicCounts = new AllelicCountCollection(
625638
metadata,
626-
allelicCounts.getRecords().stream()
627-
.filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCount)
639+
filteredAllelicCounts.getRecords().stream()
640+
.filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCountCase)
628641
.collect(Collectors.toList()));
629642
logger.info(String.format("Retained %d / %d sites after filtering on total count...",
630643
filteredAllelicCounts.size(), allelicCounts.size()));
@@ -645,6 +658,7 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada
645658
if (normalAllelicCounts == null) {
646659
//filter on homozygosity in case sample
647660
logger.info("No matched normal was provided, not running in matched-normal mode...");
661+
648662
logger.info("Performing binomial testing and filtering homozygous allelic counts...");
649663
hetAllelicCounts = new AllelicCountCollection(
650664
metadata,
@@ -672,11 +686,11 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada
672686
}
673687

674688
//filter on total count in matched normal
675-
logger.info(String.format("Filtering allelic counts in matched normal with total count less than %d...", minTotalAlleleCount));
689+
logger.info(String.format("Filtering allelic counts in matched normal with total count less than %d...", minTotalAlleleCountNormal));
676690
AllelicCountCollection filteredNormalAllelicCounts = new AllelicCountCollection(
677691
normalMetadata,
678692
normalAllelicCounts.getRecords().stream()
679-
.filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCount)
693+
.filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCountNormal)
680694
.collect(Collectors.toList()));
681695
logger.info(String.format("Retained %d / %d sites in matched normal after filtering on total count...",
682696
filteredNormalAllelicCounts.size(), normalAllelicCounts.size()));

0 commit comments

Comments
 (0)