Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
nextflow.enable.dsl = 2
include {read_inputs} from './inputs.nf'
process observed_md5 {
// Run md5sum on a file to get its observed checksum
input:
path(downloaded_file)
output:
tuple(val("${downloaded_file.getName()}"), stdout)
script:
"""
md5sum $downloaded_file | cut -d ' ' -f 1
"""
}
process expected_md5 {
/*
Grep out downloaded_file's expected checksum from md5sum_file. Assumes the
md5sum_file to be in the format '<checksum> path/to/downloaded.file',
separated by a space.
*/
input:
path(downloaded_file)
path(md5sum_file)
output:
tuple(val("${downloaded_file.getName()}"), stdout)
script:
"""
grep $downloaded_file $md5sum_file | cut -d ' ' -f 1
"""
}
process raise_errors {
// Raise any identified checksum mismatches
input:
val(errors)
exec:
exit 1, "MD5 mismatches found"
}
/*
Take a parsed samplesheet, flatten it and parse into a channel of observed vs.
expected checksums. Calls check_errors above to raise an exception upon any
mismatches.
*/
take:
ch_samplesheet_info
main:
ch_fastqs = ch_samplesheet_info
.map(
{ indv, r1, r2 ->
[r1, r2]
}
).flatten()
.map({file(it)})
ch_md5_files = ch_fastqs.map(
{ fastq -> fastq.getParent().getParent() + '/md5sums.txt' }
)
ch_obs = observed_md5(ch_fastqs)
ch_exp = expected_md5(ch_fastqs, ch_md5_files)
ch_mismatches = ch_obs.concat(ch_exp)
.map({fastq, md5 -> [fastq, md5.strip()]})
.groupTuple()
.filter({it[1][0] != it[1][1]})
.collect({"${it[0]}: ${it[1][0]} != ${it[1][1]}"})
ch_mismatches.view({"\nChecksum mismatches:\n${it.join('\n')}"})
raise_errors(ch_mismatches)