Skip to content
Snippets Groups Projects
Commit acd6068e authored by mwham's avatar mwham
Browse files

Making min/max resources configurable, adding test stubs

parent abf8d102
No related branches found
No related tags found
2 merge requests!10Merge in master,!8Bin dir, CI build fixes
Pipeline #14806 failed
Showing
with 196 additions and 23 deletions
test:
script:
- conda env create -p $PWD/conda
- if [ -d $PWD/conda ]; then conda env update -p $PWD/conda; else conda env create -p $PWD/conda; fi
- conda activate $PWD/conda
- cd tests/ && ./run_tests.sh
- cd tests/ && ./run_stubs.sh
- conda deactivate
......@@ -41,4 +41,3 @@ workflow {
exit 1, 'params.workflow required - variant-calling or variant-prioritisation'
}
}
params {
max_cpus = 16
max_mem = 32.GB
max_time = 48.h
min_cpus = 1
min_mem = 1.GB
min_time = 2.h
}
profiles {
standard {
process.executor = 'local'
}
debug {
process.echo = true
}
stubs {
process.executor = 'local'
params.max_cpus = 1
params.max_mem = 1.GB
params.max_time = 1.h
params.bcbio = 'scripts/bcbio_nextgen.py'
params.bcbio_template = 'assets/bcbio/bcbio_template.yaml'
params.target_bed = 'assets/input_data/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed'
params.reference_genome = 'assets/ref.fa'
params.output_dir = 'outputs'
}
slurm {
process.executor = 'slurm'
}
sge {
process.executor = 'sge'
}
}
process {
executor = 'slurm'
cpus = 4
memory = 8.GB
time = '6h'
cpus = get_cpus(4)
memory = get_mem(8.GB)
time = get_time(6.h)
withLabel: small {
withLabel: local {
executor = 'local'
cpus = 2
memory = 2.GB
}
withLabel: small {
cpus = get_cpus(2)
memory = get_mem(2.GB)
}
withLabel: medium {
cpus = 4
memory = 8.GB
cpus = get_cpus(4)
memory = get_mem(8.GB)
}
withLabel: large {
cpus = 16
memory = 32.GB
cpus = get_cpus(16)
memory = get_mem(32.GB)
}
}
profiles {
debug {
process.echo = true
withLabel: long {
time = get_time(48.h)
}
}
def get_cpus(cpus) {
return Math.min(
params.max_cpus,
Math.max(
params.min_cpus,
cpus
)
)
}
def get_mem(mem) {
return Math.min(
params.max_mem.size,
Math.max(
params.min_mem.size,
mem.size
)
) as nextflow.util.MemoryUnit
}
def get_time(time) {
return Math.min(
params.max_time.toMillis(),
Math.max(
params.min_time.toMillis(),
time.toMillis()
)
) as nextflow.util.Duration
}
......@@ -43,7 +43,7 @@ process write_bcbio_csv {
target_bed = os.path.realpath('${target_bed}')
individual_info = '$individual_info'
lines = individual_info.lstrip('[').rstrip(']').split('], [')
with open('${family_id}.csv', 'w') as f:
f.write('samplename,description,batch,sex,phenotype,variant_regions\\n')
for l in lines:
......@@ -71,6 +71,38 @@ process bcbio_family_processing {
cd ${family_id}-merged &&
../${bcbio}/anaconda/bin/bcbio_nextgen.py config/${family_id}-merged.yaml -n 16 -t local
"""
stub:
"""
output_dir=${family_id}-merged/results
family_dir="${family_id}-merged/results/\$(date '+%Y-%m-%d')_${family_id}-merged"
mkdir -p \$family_dir
mkdir ${family_id}-merged/config
touch ${family_id}-merged/config/${family_id}-merged{.csv,.yaml,-template.yaml}
cd \$family_dir
touch "\$(echo ${family_id} | sed 's/_//g')-gatk-haplotype-annotated.vcf.gz{,.tbi}" bcbio-nextgen{,-commands}.log data_versions.csv
touch project-summary.yaml metadata.csv programs.txt
mkdir multiqc
touch list_files_final.txt multiqc_config.yaml multiqc_report.html
mkdir multiqc_data report
cd ..
for i in ${individuals.collect().join(' ')}
do
mkdir -p \$i/qc
cd \$i
touch \$i-{callable.bed,ready.bam,ready.bam.bai}
cd qc
mkdir contamination coverage fastqc peddy samtools variants
touch contamination/\$i-verifybamid.{out,selfSM}
touch coverage/cleaned-Twist_Exome_RefSeq_targets_hg38.plus15bp-merged-padded.bed
touch fastqc/{\$i.zip,fastqc_data.txt,fastqc_report.html}
touch peddy/{\$i.ped_check.csv,\$i.peddy.ped,\$i.sex_check.csv}
touch samtools/{\$i-idxstats.txt,\$i.txt}
touch variants/\${i}_bcftools_stats.txt
cd ../..
done
"""
}
......@@ -96,6 +128,7 @@ process format_bcbio_individual_outputs {
ln -s \$indv_input/\${i}-callable.bed \$indv_output/\${i}-callable.bed &&
ln -s \$indv_input/qc \$indv_output/qc &&
# todo: make cram compression its own process
bam=\$indv_input/\$i-ready.bam
cram="\$indv_output/\$i-ready.cram" &&
\$samtools view -@ ${task.cpus} -T ${reference_genome} -C -o \$cram \$bam &&
......@@ -113,6 +146,29 @@ process format_bcbio_individual_outputs {
fi
done
"""
stub:
"""
mkdir individual_outputs
for i in ${individuals.join(' ')}
do
indv_input=\$PWD/${bcbio_output_dir}/results/\$i
indv_output=individual_outputs/\$i &&
mkdir -p \$indv_output &&
ln -s \$indv_input/\${i}-callable.bed \$indv_output/\${i}-callable.bed &&
ln -s \$indv_input/qc \$indv_output/qc &&
bam=\$indv_input/\$i-ready.bam
cram="\$indv_output/\$i-ready.cram" &&
cp \$bam \$cram &&
touch \$cram.crai &&
bam_flagstat=./\$i-ready.bam.flagstat.txt &&
cram_flagstat=\$cram.flagstat.txt &&
touch \$bam_flagstat &&
touch \$cram_flagstat
done
"""
}
......@@ -185,7 +241,7 @@ process collate_pipeline_outputs {
outputs=${params.pipeline_project_id}_${params.pipeline_project_version}
mkdir \$outputs &&
mkdir \$outputs/{config,families,params,prioritization,qc} &&
for d in ${bcbio_family_output_dirs.join(' ')}
do
cp -rL \$d \$outputs/families/\$(basename \$d)
......@@ -197,12 +253,14 @@ process collate_pipeline_outputs {
done &&
cd \$outputs/families &&
# todo: make multiqc its own process
../../${bcbio}/anaconda/bin/multiqc \
--title "Trio whole exome QC report: ${params.pipeline_project_id}_${params.pipeline_project_version}" \
--outdir ../qc \
--filename ${params.pipeline_project_id}_${params.pipeline_project_version}_qc_report.html \
. &&
peddy_validation_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
peddy_validation.pl \
--output \$peddy_validation_output \
......@@ -232,6 +290,49 @@ process collate_pipeline_outputs {
cp -L ${samplesheet} \$outputs/params/
done
"""
stub:
"""
outputs=${params.pipeline_project_id}_${params.pipeline_project_version}
mkdir \$outputs &&
mkdir \$outputs/{config,families,params,prioritization,qc} &&
for d in ${bcbio_family_output_dirs.join(' ')}
do
cp -rL \$d \$outputs/families/\$(basename \$d)
done &&
for f in ${family_ids.join(' ')}
do
grep \$f ${ped_file} > \$outputs/params/\$f.ped
done &&
cd \$outputs/families &&
# todo: make multiqc its own process
echo "Trio whole exome QC report: ${params.pipeline_project_id}_${params.pipeline_project_version}" > ../qc/${params.pipeline_project_id}_${params.pipeline_project_version}_qc_report.html &&
peddy_validation_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
peddy_validation.pl \
--output \$peddy_validation_output \
--project ${params.pipeline_project_id} \
--version ${params.pipeline_project_version} \
--ped ../../${ped_file} \
--families . &&
cd ../.. &&
for d in ${raw_bcbio_output_dirs.join(' ')}
do
family_id_merged=\$(basename \$d)
family_id=\$(echo \$family_id_merged | sed 's/-merged//') &&
dest_basename=${params.pipeline_project_id}_${params.pipeline_project_version}_\$family_id &&
cp -L \$d/config/\$family_id_merged.csv \$outputs/params/\$dest_basename.csv &&
cp -L \$d/config/\$family_id_merged.yaml \$outputs/config/\$dest_basename.yaml &&
cp -L ${ped_file} \$outputs/params/ &&
cp -L ${samplesheet} \$outputs/params/
done
"""
}
......@@ -250,7 +351,7 @@ workflow process_families {
ch_individuals
ch_ped_file
ch_samplesheet
main:
ch_bcbio = file(params.bcbio, checkIfExists: true)
ch_bcbio_template = file(params.bcbio_template, checkIfExists: true)
......@@ -301,7 +402,7 @@ workflow process_families {
ch_bcbio_family_outputs,
ch_bcbio,
ch_reference_genome
)
ch_formatted_bcbio_outputs = format_bcbio_family_outputs(
ch_bcbio_family_outputs.join(ch_individual_folders)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment