Skip to content
Snippets Groups Projects
Commit 839b6154 authored by mwham's avatar mwham
Browse files

Passing files into read_inputs instead of values, adding BCBio Yaml writing and execution

parent 64e14f18
No related branches found
No related tags found
1 merge request!1NextFlow
Pipeline #10017 failed
......@@ -46,8 +46,8 @@ workflow read_inputs {
[
[
indv1,
[lane_1_r1.fastq.gz, lane_2_r1.fastq.gz],
[lane_1_r2.fastq.gz, lane_2_r2.fastq.gz]
[/abs/path/to/lane_1_r1.fastq.gz, /abs/path/to/lane_2_r1.fastq.gz],
[/abs/path/to/lane_1_r2.fastq.gz, /abs/path/to/lane_2_r2.fastq.gz]
],
[
indv2,
......@@ -59,7 +59,7 @@ workflow read_inputs {
ch_samplesheet_info = Channel.fromPath(samplesheet)
.splitCsv(sep:'\t', header: true)
.map(
{ line -> [line.individual_id, line.read_1, line.read_2] }
{ line -> [line.individual_id, file(line.read_1), file(line.read_2)] }
)
.groupTuple()
......
......@@ -4,12 +4,13 @@ include {read_inputs} from './inputs.nf'
include {validation} from './validation.nf'
params.bcbio = null
params.bcbio_template = null
process merge_fastqs {
publishDir "outputs/individuals/$indv_id/merged_fastqs", mode: 'copy'
input:
tuple(val(indv_id), val(r1), val(r2))
tuple(val(indv_id), path(r1), path(r2))
output:
tuple(
......@@ -47,6 +48,26 @@ process write_bcbio_csv {
for l in lines:
l = l.lstrip('[').rstrip(']').split(', ')
f.write(','.join(l))
f.write('\\n')
"""
}
process prepare_bcbio_yaml {
publishDir "outputs/families/$family_id", mode: 'copy'
input:
tuple(val(family_id), val(family_fastqs))
val(family_csv)
output:
path("${family_id}.yaml")
script:
"""
${params.bcbio_prepare_samples} --out . --csv $family_csv
${params.bcbio} -w template ${params.bcbio_template} ${family_id}-merged.csv ${family_fastqs.join(' ')}
"""
}
......@@ -76,15 +97,23 @@ workflow prepare_bcbio_config {
main:
ch_merged_fastqs = merge_fastqs(ch_samplesheet_info)
ch_merged_data = ch_individuals_by_family.map({ k, v -> v })
ch_joined_family_info = ch_individuals_by_family.map({ k, v -> v })
.join(ch_merged_fastqs)
.map(
ch_metadata = ch_joined_family_info.map(
{ sample_id, family_id, father, mother, sex, phenotype, r1s, r2s, merged_r1, merged_r2 ->
[family_id, [sample_id, father, mother, sex, phenotype, merged_r1, merged_r2]]
}).groupTuple()
ch_bcbio_config = write_bcbio_csv(ch_merged_data)
run_bcbio(ch_bcbio_config)
ch_family_fastqs = ch_joined_family_info.map(
{ sample_id, family_id, father, mother, sex, phenotype, r1s, r2s, merged_r1, merged_r2 ->
[family_id, merged_r1, merged_r2]
}
).groupTuple()
ch_bcbio_csv = write_bcbio_csv(ch_metadata)
ch_bcbio_yaml = prepare_bcbio_yaml(ch_family_fastqs, ch_bcbio_csv)
run_bcbio(ch_bcbio_yaml)
}
......
......@@ -35,7 +35,7 @@ workflow validation {
ch_md5_files = ch_fastqs.map(
{ fastq -> fastq.getParent().getParent() + '/md5sums.txt' }
)
).unique()
check_md5s(ch_md5_files)
}
details:
- algorithm:
platform: illumina
quality_format: standard
aligner: bwa
align_split_size: false
trim_reads: fastp
adapters: [nextera2, polyg]
mark_duplicates: true
realign: false
recalibrate: true
effects: vep
effects_transcripts: all
variantcaller: gatk-haplotype
indelcaller: false
remove_lcr: true
tools_on:
- vep_splicesite_annotations
analysis: variant2
genome_build: hg38
upload:
dir: outputs/bcbio/results
......@@ -2,6 +2,9 @@
source scripts/test_config.sh
bcbio=$PWD/scripts/bcbio_nextgen.py
bcbio_prepare_samples=$PWD/scripts/bcbio_prepare_samples.py
common_args="--bcbio $bcbio --bcbio_prepare_samples $bcbio_prepare_samples --bcbio_template $PWD/bcbio/bcbio_template.yaml"
test_exit_status=0
......@@ -9,7 +12,7 @@ nextflow clean -f
rm -r ./outputs/* ./work/*
echo "Test case 1: simple trio"
run_nextflow ../pipeline/main.nf --ped_file assets/input_data/ped_files/batch_1.ped --sample_sheet assets/input_data/sample_sheets/batch_1.tsv --bcbio $bcbio
run_nextflow ../pipeline/main.nf --ped_file assets/input_data/ped_files/batch_1.ped --sample_sheet assets/input_data/sample_sheets/batch_1.tsv $common_args
test_exit_status=$(( $test_exit_status + $? ))
for f in "
outputs/individuals/000001/merged_fastqs/000001_merged_r1.fastq.gz
......@@ -26,7 +29,7 @@ do
done
echo "Test case 2: MD5 errors"
run_nextflow ../pipeline/main.nf --ped_file assets/input_data/ped_files/batch_2_md5_errors.ped --sample_sheet assets/input_data/sample_sheets/batch_2_md5_errors.tsv --bcbio $bcbio
run_nextflow ../pipeline/main.nf --ped_file assets/input_data/ped_files/batch_2_md5_errors.ped --sample_sheet assets/input_data/sample_sheets/batch_2_md5_errors.tsv $common_args
if [ $? == 0 ]
then
test_exit_status=$(( $test_exit_status + 1 ))
......
......@@ -2,3 +2,20 @@
"""
Fake BCBio for testing with the empty datasets from tests/assets/
"""
import argparse
def main():
a = argparse.ArgumentParser()
a.add_argument('positionals', nargs='+')
a.add_argument('-w')
a.add_argument('-n', type=int, default=16)
a.add_argument('-t', default='local')
args = a.parse_args()
if args.w:
open(args.positionals[1].split('-')[0] + '.yaml', 'w').close()
if __name__ == '__main__':
main()
#!/usr/bin/env python
"""
Fake BCBio prepare script for testing with the empty datasets from tests/assets/
"""
import os
import argparse
def main():
a = argparse.ArgumentParser()
a.add_argument('--out')
a.add_argument('--csv')
args = a.parse_args()
open(args.csv, 'w').close()
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment