nextflow.enable.dsl = 2 process check_md5s { label 'small' input: val(fastq_file) // getParent() gives null object errors if this is a path script: """ edgen_dir=${fastq_file.getParent().getParent()} edgen_md5_file=\$edgen_dir/md5sums.txt edgen_fastq=${fastq_file.getParent().getName()}/${fastq_file.getName()} crf_dir=${fastq_file.getParent()} crf_md5_file="\$(ls \$crf_dir/*md5.txt | head -n 1)" crf_fastq=${fastq_file.getName()} local_md5_file=${fastq_file}.md5 if [ -f \$edgen_md5_file ] then cd \$edgen_dir md5sum -c <(cat md5sums.txt | grep \$edgen_fastq) elif [ -f \$crf_md5_file ] then cd \$crf_dir md5sum -c <(cat \$crf_md5_file | grep \$crf_fastq) elif [ -f \$local_md5_file ] then cd ${fastq_file.getParent()} md5sum -c \$local_md5_file else echo "Could not find md5 file for $fastq_file" exit 1 fi """ } workflow validation { /* Take a parsed samplesheet, flatten it and parse into a channel of observed vs. expected checksums. Calls check_errors above to raise an exception upon any mismatches. */ take: ch_indv_info main: ch_fastqs = ch_indv_info.map( { indv, family, father, mother, sex, affected, r1, r2 -> [r1, r2] } ) .flatten() .map({file(it)}) check_md5s(ch_fastqs) }