Skip to content

Commit b1688d9

Browse files
authored
Germline CNV WDLs for WGS (#6607)
1 parent ef71145 commit b1688d9

4 files changed

+111
-14
lines changed

scripts/cnv_wdl/cnv_common_tasks.wdl

+72-1
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ task CollectCounts {
188188
File ref_fasta
189189
File ref_fasta_fai
190190
File ref_fasta_dict
191+
Array[String]? disabled_read_filters
191192
Boolean? enable_indexing
192193
String? format
193194
File? gatk4_jar_override
@@ -201,11 +202,22 @@ task CollectCounts {
201202
Int? preemptible_attempts
202203
}
203204

205+
parameter_meta {
206+
bam: {
207+
localization_optional: true
208+
}
209+
bam_idx: {
210+
localization_optional: true
211+
}
212+
}
213+
204214
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
205215
Int command_mem_mb = machine_mem_mb - 1000
206216

207217
Boolean enable_indexing_ = select_first([enable_indexing, false])
208218

219+
Array[String] disabled_read_filters_arr = if defined(disabled_read_filters) then prefix("--disable-read-filter ", select_first([disabled_read_filters])) else []
220+
209221
# Sample name is derived from the bam filename
210222
String base_filename = basename(bam, ".bam")
211223
String format_ = select_first([format, "HDF5"])
@@ -257,7 +269,8 @@ task CollectCounts {
257269
--reference ~{ref_fasta} \
258270
--format ~{default="HDF5" hdf5_or_tsv_or_null_format} \
259271
--interval-merging-rule OVERLAPPING_ONLY \
260-
--output ~{counts_filename_for_collect_read_counts}
272+
--output ~{counts_filename_for_collect_read_counts} \
273+
~{sep=' ' disabled_read_filters_arr}
261274

262275
if [ ~{do_block_compression} = "true" ]; then
263276
bgzip ~{counts_filename_for_collect_read_counts}
@@ -303,6 +316,15 @@ task CollectAllelicCounts {
303316
Int? preemptible_attempts
304317
}
305318

319+
parameter_meta {
320+
bam: {
321+
localization_optional: true
322+
}
323+
bam_idx: {
324+
localization_optional: true
325+
}
326+
}
327+
306328
Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
307329
Int command_mem_mb = machine_mem_mb - 1000
308330

@@ -605,3 +627,52 @@ task CollectModelQualityMetrics {
605627
String qc_status_string = read_string("qcStatus.txt")
606628
}
607629
}
630+
631+
task ScatterPloidyCallsBySample {
632+
input {
633+
File contig_ploidy_calls_tar
634+
Array[String] samples
635+
636+
# Runtime parameters
637+
String docker
638+
Int? mem_gb
639+
Int? disk_space_gb
640+
Boolean use_ssd = false
641+
Int? cpu
642+
Int? preemptible_attempts
643+
}
644+
645+
Int num_samples = length(samples)
646+
String out_dir = "calls_renamed"
647+
648+
command <<<
649+
set -eu
650+
651+
# Extract ploidy calls
652+
mkdir calls
653+
tar xzf ~{contig_ploidy_calls_tar} -C calls/
654+
655+
# Archive call files by sample, renaming so they will be glob'd in order
656+
sample_ids=(~{sep=" " samples})
657+
num_samples=~{num_samples}
658+
num_digits=${#num_samples}
659+
for (( i=0; i<~{num_samples}; i++ ))
660+
do
661+
sample_id=${sample_ids[$i]}
662+
padded_sample_index=$(printf "%0${num_digits}d" $i)
663+
tar -czf sample_${padded_sample_index}.${sample_id}.contig_ploidy_calls.tar.gz -C calls/SAMPLE_${i} .
664+
done
665+
>>>
666+
667+
runtime {
668+
docker: docker
669+
memory: select_first([mem_gb, 2]) + " GiB"
670+
disks: "local-disk " + select_first([disk_space_gb, 10]) + if use_ssd then " SSD" else " HDD"
671+
cpu: select_first([cpu, 1])
672+
preemptible: select_first([preemptible_attempts, 5])
673+
}
674+
675+
output {
676+
Array[File] sample_contig_ploidy_calls_tar = glob("sample_*.contig_ploidy_calls.tar.gz")
677+
}
678+
}

scripts/cnv_wdl/germline/cnv_germline_case_scattered_workflow.wdl

+10-8
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ workflow CNVGermlineCaseScatteredWorkflow {
4949
##############################################
5050
#### optional arguments for CollectCounts ####
5151
##############################################
52+
Array[String]? disabled_read_filters_for_collect_counts
5253
String? collect_counts_format
5354
Boolean? collect_counts_enable_indexing
5455
Int? mem_gb_for_collect_counts
@@ -149,6 +150,7 @@ workflow CNVGermlineCaseScatteredWorkflow {
149150
preemptible_attempts = preemptible_attempts,
150151
padding = padding,
151152
bin_length = bin_length,
153+
disabled_read_filters_for_collect_counts = disabled_read_filters_for_collect_counts,
152154
collect_counts_format = collect_counts_format,
153155
collect_counts_enable_indexing = collect_counts_enable_indexing,
154156
mem_gb_for_collect_counts = mem_gb_for_collect_counts,
@@ -196,16 +198,16 @@ workflow CNVGermlineCaseScatteredWorkflow {
196198

197199
output {
198200
Array[File] preprocessed_intervals = CNVGermlineCaseWorkflow.preprocessed_intervals
199-
Array[Array[File]] read_counts_entity_id = CNVGermlineCaseWorkflow.read_counts_entity_id
200-
Array[Array[File]] read_counts = CNVGermlineCaseWorkflow.read_counts
201-
Array[File] contig_ploidy_calls_tars = CNVGermlineCaseWorkflow.contig_ploidy_calls_tar
201+
Array[File] read_counts_entity_id = flatten(CNVGermlineCaseWorkflow.read_counts_entity_id)
202+
Array[File] read_counts = flatten(CNVGermlineCaseWorkflow.read_counts)
203+
Array[File] sample_contig_ploidy_calls_tars = flatten(CNVGermlineCaseWorkflow.sample_contig_ploidy_calls_tars)
202204
Array[Array[Array[File]]] gcnv_calls_tars = CNVGermlineCaseWorkflow.gcnv_calls_tars
203205
Array[Array[File]] gcnv_tracking_tars = CNVGermlineCaseWorkflow.gcnv_tracking_tars
204-
Array[Array[File]] genotyped_intervals_vcf = CNVGermlineCaseWorkflow.genotyped_intervals_vcf
205-
Array[Array[File]] genotyped_segments_vcf = CNVGermlineCaseWorkflow.genotyped_segments_vcf
206-
Array[Array[File]] qc_status_files = CNVGermlineCaseWorkflow.qc_status_files
207-
Array[Array[String]] qc_status_strings = CNVGermlineCaseWorkflow.qc_status_strings
208-
Array[Array[File]] denoised_copy_ratios = CNVGermlineCaseWorkflow.denoised_copy_ratios
206+
Array[File] genotyped_intervals_vcf = flatten(CNVGermlineCaseWorkflow.genotyped_intervals_vcf)
207+
Array[File] genotyped_segments_vcf = flatten(CNVGermlineCaseWorkflow.genotyped_segments_vcf)
208+
Array[File] denoised_copy_ratios = flatten(CNVGermlineCaseWorkflow.denoised_copy_ratios)
209+
Array[File] qc_status_files = flatten(CNVGermlineCaseWorkflow.qc_status_files)
210+
Array[String] qc_status_strings = flatten(CNVGermlineCaseWorkflow.qc_status_strings)
209211
}
210212
}
211213

scripts/cnv_wdl/germline/cnv_germline_case_workflow.wdl

+15-3
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ workflow CNVGermlineCaseWorkflow {
5959
##############################################
6060
#### optional arguments for CollectCounts ####
6161
##############################################
62+
Array[String]? disabled_read_filters_for_collect_counts
6263
String? collect_counts_format
6364
Boolean? collect_counts_enable_indexing
6465
Int? mem_gb_for_collect_counts
@@ -116,6 +117,8 @@ workflow CNVGermlineCaseWorkflow {
116117
###################################################
117118
Int ref_copy_number_autosomal_contigs
118119
Array[String]? allosomal_contigs
120+
Int? disk_space_gb_for_postprocess_germline_cnv_calls
121+
Int? mem_gb_for_postprocess_germline_cnv_calls
119122

120123
##########################
121124
#### arguments for QC ####
@@ -150,6 +153,7 @@ workflow CNVGermlineCaseWorkflow {
150153
ref_fasta_dict = ref_fasta_dict,
151154
format = collect_counts_format,
152155
enable_indexing = collect_counts_enable_indexing,
156+
disabled_read_filters = disabled_read_filters_for_collect_counts,
153157
gatk4_jar_override = gatk4_jar_override,
154158
gatk_docker = gatk_docker,
155159
mem_gb = mem_gb_for_collect_counts,
@@ -253,18 +257,26 @@ workflow CNVGermlineCaseWorkflow {
253257
}
254258
}
255259

260+
call CNVTasks.ScatterPloidyCallsBySample {
261+
input :
262+
contig_ploidy_calls_tar = DetermineGermlineContigPloidyCaseMode.contig_ploidy_calls_tar,
263+
samples = CollectCounts.entity_id,
264+
docker = gatk_docker,
265+
preemptible_attempts = preemptible_attempts
266+
}
267+
256268
output {
257269
File preprocessed_intervals = PreprocessIntervals.preprocessed_intervals
258270
Array[File] read_counts_entity_id = CollectCounts.entity_id
259271
Array[File] read_counts = CollectCounts.counts
260-
File contig_ploidy_calls_tar = DetermineGermlineContigPloidyCaseMode.contig_ploidy_calls_tar
272+
Array[File] sample_contig_ploidy_calls_tars = ScatterPloidyCallsBySample.sample_contig_ploidy_calls_tar
261273
Array[Array[File]] gcnv_calls_tars = GermlineCNVCallerCaseMode.gcnv_call_tars
262274
Array[File] gcnv_tracking_tars = GermlineCNVCallerCaseMode.gcnv_tracking_tar
263275
Array[File] genotyped_intervals_vcf = PostprocessGermlineCNVCalls.genotyped_intervals_vcf
264276
Array[File] genotyped_segments_vcf = PostprocessGermlineCNVCalls.genotyped_segments_vcf
277+
Array[File] denoised_copy_ratios = PostprocessGermlineCNVCalls.denoised_copy_ratios
265278
Array[File] qc_status_files = CollectSampleQualityMetrics.qc_status_file
266279
Array[String] qc_status_strings = CollectSampleQualityMetrics.qc_status_string
267-
Array[File] denoised_copy_ratios = PostprocessGermlineCNVCalls.denoised_copy_ratios
268280
}
269281
}
270282

@@ -314,7 +326,7 @@ task DetermineGermlineContigPloidyCaseMode {
314326
--mapping-error-rate ~{default="0.01" mapping_error_rate} \
315327
--sample-psi-scale ~{default="0.0001" sample_psi_scale}
316328

317-
tar czf case-contig-ploidy-calls.tar.gz -C ~{output_dir_}/case-calls .
329+
tar c -C ~{output_dir_}/case-calls . | gzip -1 > case-contig-ploidy-calls.tar.gz
318330

319331
rm -rf contig-ploidy-model
320332
>>>

scripts/cnv_wdl/germline/cnv_germline_cohort_workflow.wdl

+14-2
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ workflow CNVGermlineCohortWorkflow {
8787
##############################################
8888
#### optional arguments for CollectCounts ####
8989
##############################################
90+
Array[String]? disabled_read_filters_for_collect_counts
9091
String? collect_counts_format
9192
Boolean? collect_counts_enable_indexing
9293
Int? mem_gb_for_collect_counts
@@ -152,6 +153,8 @@ workflow CNVGermlineCohortWorkflow {
152153
#### arguments for PostprocessGermlineCNVCalls ####
153154
###################################################
154155
Int ref_copy_number_autosomal_contigs
156+
Int? mem_gb_for_postprocess_germline_cnv_calls
157+
Int? disk_space_gb_for_postprocess_germline_cnv_calls
155158
Array[String]? allosomal_contigs
156159

157160
##########################
@@ -206,6 +209,7 @@ workflow CNVGermlineCohortWorkflow {
206209
ref_fasta_dict = ref_fasta_dict,
207210
format = collect_counts_format,
208211
enable_indexing = collect_counts_enable_indexing,
212+
disabled_read_filters = disabled_read_filters_for_collect_counts,
209213
gatk4_jar_override = gatk4_jar_override,
210214
gatk_docker = gatk_docker,
211215
mem_gb = mem_gb_for_collect_counts,
@@ -353,24 +357,32 @@ workflow CNVGermlineCohortWorkflow {
353357
preemptible_attempts = preemptible_attempts
354358
}
355359

360+
call CNVTasks.ScatterPloidyCallsBySample {
361+
input :
362+
contig_ploidy_calls_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_calls_tar,
363+
samples = CollectCounts.entity_id,
364+
docker = gatk_docker,
365+
preemptible_attempts = preemptible_attempts
366+
}
367+
356368
output {
357369
File preprocessed_intervals = PreprocessIntervals.preprocessed_intervals
358370
Array[File] read_counts_entity_ids = CollectCounts.entity_id
359371
Array[File] read_counts = CollectCounts.counts
360372
File? annotated_intervals = AnnotateIntervals.annotated_intervals
361373
File filtered_intervals = FilterIntervals.filtered_intervals
362374
File contig_ploidy_model_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_model_tar
363-
File contig_ploidy_calls_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_calls_tar
375+
Array[File] sample_contig_ploidy_calls_tars = ScatterPloidyCallsBySample.sample_contig_ploidy_calls_tar
364376
Array[File] gcnv_model_tars = GermlineCNVCallerCohortMode.gcnv_model_tar
365377
Array[Array[File]] gcnv_calls_tars = GermlineCNVCallerCohortMode.gcnv_call_tars
366378
Array[File] gcnv_tracking_tars = GermlineCNVCallerCohortMode.gcnv_tracking_tar
367379
Array[File] genotyped_intervals_vcfs = PostprocessGermlineCNVCalls.genotyped_intervals_vcf
368380
Array[File] genotyped_segments_vcfs = PostprocessGermlineCNVCalls.genotyped_segments_vcf
381+
Array[File] denoised_copy_ratios = PostprocessGermlineCNVCalls.denoised_copy_ratios
369382
Array[File] sample_qc_status_files = CollectSampleQualityMetrics.qc_status_file
370383
Array[String] sample_qc_status_strings = CollectSampleQualityMetrics.qc_status_string
371384
File model_qc_status_file = CollectModelQualityMetrics.qc_status_file
372385
String model_qc_string = CollectModelQualityMetrics.qc_status_string
373-
Array[File] denoised_copy_ratios = PostprocessGermlineCNVCalls.denoised_copy_ratios
374386
}
375387
}
376388

0 commit comments

Comments
 (0)