Skip to content
Snippets Groups Projects
Commit fca1deaf authored by 3mma-mack's avatar 3mma-mack
Browse files

commit functioning test pipeline commit

parent 607d6f79
No related branches found
No related tags found
No related merge requests found
nextflow.enable.dsl = 2
include {var_calling} from './pipeline/var_calling.nf'
include {hello} from './pipeline/cnv_calling.nf'
include {check_inputs} from './pipeline/cnv_calling.nf'
// which part of the pipeline to run - either 'variant-calling' or 'variant-prioritisation' or 'cnv-calling'
params.workflow = null
......@@ -43,7 +43,7 @@ workflow {
println "Variant prioritisation coming soon"
} else if (params.workflow == 'cnv-calling') {
println "CNV identification pipeline in development"
hello()
check_inputs()
} else {
exit 1, 'params.workflow required - variant-calling, cnv-calling or variant-prioritisation'
}
......
executor = 'slurm'
// executor = 'slurm'
process {
......@@ -18,7 +18,7 @@ process {
}
withLabel: large {
cpus = 16
cpus = 10
memory = 32.GB
}
}
......
......@@ -5,16 +5,97 @@ nextflow.enable.dsl = 2
include {read_inputs} from './inputs.nf'
include {validation} from './validation.nf'
process HELLO {
process check_argument_files {
// a process to look at the files given as arguments and check they exist (ie provided in eddie.config)
// as defaults are set to Null there may not be a 'problem' if one of these files is not found
// arguments to be checked are: params.bcbio, params.bcbio_template, params.target_bed, params.reference_genome
// params.parse_peddy_output
// sample sheet and ped file are checked seperately
script:
"""
echo "hello world"
"""
}
process check_sample_sheet {
input:
val sample_sheet
script:
"echo \"hello world\""
"""
touch "$workflow.projectDir"/sample_sheet_check.txt
# echo $workflow.projectDir
# count the number of words in the first line of the sample sheet
# this is the number of columns. it is expected ther will be 3
# sample reads_1 reads_2
columns=\$(head -n 1 $sample_sheet| wc -w)
if( "\${columns}" != 3 ); then
echo "ERROR: $sample_sheet has an unexpected format" >> "$workflow.projectDir"/sample_sheet_check.txt
else
echo "$sample_sheet has expected number of columns" >> "$workflow.projectDir"/sample_sheet_check.txt
fi
# Then take columns 2 and 3, check that all fastqs listed exist
# use tail to ignore the header
reads_1=\$(cat $sample_sheet | cut -f2,3 | tail -n +2)
#check that the file exists
for file in \${reads_1};
do
# build up path to specified file
path_to_file=$workflow.projectDir
path_to_file+="/"
path_to_file+=\${file}
# check if they exist
if [ -f "\${path_to_file}" ]; then
echo "\"\${path_to_file}\" exist" >> "$workflow.projectDir"/sample_sheet_check.txt
else
# if file doesn't exist, should return error and exit
echo "\"\${path_to_file}\" does not exist" >> "$workflow.projectDir"/sample_sheet_check.txt
fi
done
"""
}
process check_ped_file {
input:
val ped_file
script:
"""
cat $ped_file
"""
}
workflow hello {
HELLO()
workflow check_inputs {
read_inputs()
check_sample_sheet(read_inputs.out.ch_samplesheet)
check_ped_file(read_inputs.out.ch_ped_file)
}
00001_000001 000001_000001 000002_000002 000003_000002 1 2
00001_000001 000002_000001 0 0 1 1
00001_000001 000003_000001 0 0 2 1
00001_000002 000004_000002 000005_000002 000006_000002 1 2
00001_000002 000005_000002 0 0 1 1
00001_000002 000006_000002 0 0 2 1
# 00001_000002 000004_000002 000005_000002 000006_000002 1 2
# 00001_000002 000005_000002 0 0 1 1
# 00001_000002 000006_000002 0 0 2 1
individual_id read_1 read_2
000001_000001 assets/input_data/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG002_R2.fastq.gz
000003_000001 assets/input_data/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG004_R2.fastq.gz
000004_000002 assets/input_data/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/giab/ChineseTrio/HG005_R2.fastq.gz
000005_000003 assets/input_data/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG002_R2.fastq.gz
000006_000003 assets/input_data/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/giab/ChineseTrio/HG005_R2.fastq.gz
000007_000003 assets/input_data/giab/AshkenazimTrio/HG003_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG003_R2.fastq.gz
000008_000003 assets/input_data/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG004_R2.fastq.gz
000001_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG002_R2.fastq.gz
000003_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG004_R2.fastq.gz
000004_000002 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz
000005_000003 assets/input_data/scripts/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG002_R2.fastq.gz
000006_000003 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz
000007_000003 assets/input_data/scripts/giab/AshkenazimTrio/HG003_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG003_R2.fastq.gz
000008_000003 assets/input_data/scripts/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG004_R2.fastq.gz
......@@ -2,6 +2,7 @@ individual_id read_1 read_2
000001_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG002_R2.fastq.gz
000002_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG003_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG003_R2.fastq.gz
000003_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG004_R2.fastq.gz
000004_000002 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz
000005_000002 assets/input_data/scripts/giab/ChineseTrio/HG006_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG006_R2.fastq.gz
000006_000002 assets/input_data/scripts/giab/ChineseTrio/HG007_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG007_R2.fastq.gz
# 000004_000002 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz
# 000005_000002 assets/input_data/scripts/giab/ChineseTrio/HG006_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG006_R2.fastq.gz
# 000006_000002 assets/input_data/scripts/giab/ChineseTrio/HG007_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG007_R2.fastq.gz
individual_id read_1 read_2
0f00001_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG002_R2.fastq.gz
000002_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG003_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG003_R2.fastq.gz
00003_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG004_R2.fastq.gz
000004_000002 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz
000005_000002 assets/input_data/scripts/giab/ChineseTrio/HG006_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG006_R2.fastq.gz
......@@ -3,7 +3,7 @@
source scripts/nextflow_detached.sh
test_exit_status=0
nextflow -c /exports/igmm/eddie/IGMM-VariantAnalysis/emma/eddie.config clean -f
# nextflow -c /exports/igmm/eddie/IGMM-VariantAnalysis/emma/eddie.config clean -f
echo "Reduced GiaB data - trios"
run_nextflow ../main.nf \
......@@ -16,14 +16,14 @@ run_nextflow ../main.nf \
test_exit_status=$(( $test_exit_status + $? ))
echo "Reduced GiaB data - non-trios"
run_nextflow ../main.nf \
-c /exports/igmm/eddie/IGMM-VariantAnalysis/emma/eddie.config \
--workflow "variant-calling" \
--pipeline_project_id giab_test_non_trios \
--pipeline_project_version v1 \
--ped_file $PWD/assets/input_data/ped_files/giab_test_non_trios.ped \
--sample_sheet $PWD/assets/input_data/sample_sheets/giab_test_non_trios.tsv
#echo "Reduced GiaB data - non-trios"
#run_nextflow ../main.nf \
# -c /exports/igmm/eddie/IGMM-VariantAnalysis/emma/eddie.config \
# --workflow "variant-calling" \
# --pipeline_project_id giab_test_non_trios \
# --pipeline_project_version v1 \
# --ped_file $PWD/assets/input_data/ped_files/giab_test_non_trios.ped \
# --sample_sheet $PWD/assets/input_data/sample_sheets/giab_test_non_trios.tsv
test_exit_status=$(( $test_exit_status + $? ))
......
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment