Newer
Older
nextflow.enable.dsl = 2
include {read_inputs} from './inputs.nf'
include {validation} from './validation.nf'
params.bcbio = null
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
process merge_fastqs {
publishDir "outputs/individuals/$indv_id/merged_fastqs", mode: 'copy'
input:
tuple(val(indv_id), val(r1), val(r2))
output:
tuple(
val(indv_id),
path("${indv_id}_merged_r1.fastq.gz"),
path("${indv_id}_merged_r2.fastq.gz")
)
script:
// todo: pigz if gzip becomes a bottleneck
"""
cat ${r1.join(' ')} | gzip -c > ${indv_id}_merged_r1.fastq.gz &
cat ${r2.join(' ')} | gzip -c > ${indv_id}_merged_r2.fastq.gz
"""
}
process write_bcbio_csv {
publishDir "outputs/families/$family_id", mode: 'copy'
input:
tuple(val(family_id), val(individual_info))
output:
path("${family_id}.csv")
script:
"""
#!/usr/bin/env python
individual_info = '$individual_info'
lines = individual_info.lstrip('[').rstrip(']').split('], [')
with open('${family_id}.csv', 'w') as f:
f.write('samplename,description,batch,sex,phenotype,variant_regions\\n')
for l in lines:
l = l.lstrip('[').rstrip(']').split(', ')
f.write(','.join(l))
"""
}
process run_bcbio {
input:
path(bcbio_config)
script:
"""
${params.bcbio} $bcbio_config -n 16 -t local
"""
}
workflow prepare_bcbio_config {
/*
- For each individual, merge all R1 and R2 fastqs
- For each family:
- Read the relevant information from the samplesheet, ped files and merged fastqs
- Write a BCBio config file
- Run BCBio on it
*/
take:
ch_samplesheet_info
ch_individuals_by_family
main:
ch_merged_fastqs = merge_fastqs(ch_samplesheet_info)
.join(ch_merged_fastqs)
.map(
{ sample_id, family_id, father, mother, sex, phenotype, r1s, r2s, merged_r1, merged_r2 ->
[family_id, [sample_id, father, mother, sex, phenotype, merged_r1, merged_r2]]
}).groupTuple()
ch_bcbio_config = write_bcbio_csv(ch_merged_data)
run_bcbio(ch_bcbio_config)
}
workflow {
read_inputs()
ch_samplesheet_info = read_inputs.out[0]
ch_ped_file_info = read_inputs.out[1]
ch_individuals_by_family = read_inputs.out[2]
validation(ch_samplesheet_info)
prepare_bcbio_config(ch_samplesheet_info, ch_individuals_by_family)
}