diff --git a/main.nf b/main.nf index fabc8746b5b122784fc44ad06190569f7d1e157e..65f2e1b5121b46e280fd94a2ed0d54c8039e48d1 100644 --- a/main.nf +++ b/main.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl = 2 include {var_calling} from './pipeline/var_calling.nf' -include {hello} from './pipeline/cnv_calling.nf' +include {check_inputs} from './pipeline/cnv_calling.nf' // which part of the pipeline to run - either 'variant-calling' or 'variant-prioritisation' or 'cnv-calling' params.workflow = null @@ -43,7 +43,7 @@ workflow { println "Variant prioritisation coming soon" } else if (params.workflow == 'cnv-calling') { println "CNV identification pipeline in development" - hello() + check_inputs() } else { exit 1, 'params.workflow required - variant-calling, cnv-calling or variant-prioritisation' } diff --git a/nextflow.config b/nextflow.config index 585d5b2bf74d4c5b944474845291a0491c8b00fa..79bd9c5a66f09995bdb5b8bf66d9ada9d574bdd9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,4 +1,4 @@ -executor = 'slurm' +// executor = 'slurm' process { @@ -18,7 +18,7 @@ process { } withLabel: large { - cpus = 16 + cpus = 10 memory = 32.GB } } diff --git a/pipeline/cnv_calling.nf b/pipeline/cnv_calling.nf index a93fd384992dc17277ce27138c5cf200c3b38346..7ca4cefd8b462d930373c22daf66378b77253a14 100644 --- a/pipeline/cnv_calling.nf +++ b/pipeline/cnv_calling.nf @@ -5,16 +5,97 @@ nextflow.enable.dsl = 2 include {read_inputs} from './inputs.nf' include {validation} from './validation.nf' -process HELLO { +process check_argument_files { + +// a process to look at the files given as arguments and check they exist (ie provided in eddie.config) +// as defaults are set to Null there may not be a 'problem' if one of these files is not found +// arguments to be checked are: params.bcbio, params.bcbio_template, params.target_bed, params.reference_genome +// params.parse_peddy_output +// sample sheet and ped file are checked seperately + script: + + """ + echo "hello world" + """ + +} + + +process check_sample_sheet { + input: + val sample_sheet + script: - "echo \"hello world\"" + + """ + touch "$workflow.projectDir"/sample_sheet_check.txt + + # echo $workflow.projectDir + # count the number of words in the first line of the sample sheet + # this is the number of columns. it is expected ther will be 3 + # sample reads_1 reads_2 + + columns=\$(head -n 1 $sample_sheet| wc -w) + + if( "\${columns}" != 3 ); then + + echo "ERROR: $sample_sheet has an unexpected format" >> "$workflow.projectDir"/sample_sheet_check.txt + + else + + echo "$sample_sheet has expected number of columns" >> "$workflow.projectDir"/sample_sheet_check.txt + + + fi + + # Then take columns 2 and 3, check that all fastqs listed exist + # use tail to ignore the header + + reads_1=\$(cat $sample_sheet | cut -f2,3 | tail -n +2) + + #check that the file exists + + for file in \${reads_1}; + do + # build up path to specified file + + path_to_file=$workflow.projectDir + path_to_file+="/" + path_to_file+=\${file} + + # check if they exist + + if [ -f "\${path_to_file}" ]; then + + echo "\"\${path_to_file}\" exist" >> "$workflow.projectDir"/sample_sheet_check.txt + + else + + # if file doesn't exist, should return error and exit + + echo "\"\${path_to_file}\" does not exist" >> "$workflow.projectDir"/sample_sheet_check.txt + fi + done + """ } +process check_ped_file { + input: + val ped_file + + script: + + """ + cat $ped_file + """ +} -workflow hello { - HELLO() +workflow check_inputs { + read_inputs() + check_sample_sheet(read_inputs.out.ch_samplesheet) + check_ped_file(read_inputs.out.ch_ped_file) } diff --git a/tests/assets/input_data/ped_files/giab_test_trios.ped b/tests/assets/input_data/ped_files/giab_test_trios.ped index 566cce43692ccc031da29f5336c4a7130d093c25..3f3c4cd1905f2d8c0977695a90351906591b0692 100644 --- a/tests/assets/input_data/ped_files/giab_test_trios.ped +++ b/tests/assets/input_data/ped_files/giab_test_trios.ped @@ -1,6 +1,6 @@ 00001_000001 000001_000001 000002_000002 000003_000002 1 2 00001_000001 000002_000001 0 0 1 1 00001_000001 000003_000001 0 0 2 1 -00001_000002 000004_000002 000005_000002 000006_000002 1 2 -00001_000002 000005_000002 0 0 1 1 -00001_000002 000006_000002 0 0 2 1 +# 00001_000002 000004_000002 000005_000002 000006_000002 1 2 +# 00001_000002 000005_000002 0 0 1 1 +# 00001_000002 000006_000002 0 0 2 1 diff --git a/tests/assets/input_data/sample_sheets/giab_test_non_trios.tsv b/tests/assets/input_data/sample_sheets/giab_test_non_trios.tsv index cf341e536ed8bc95cb4856bf622b54cf44bceb4c..9ffd458c56fec4ac842e06c090b385aae026622e 100644 --- a/tests/assets/input_data/sample_sheets/giab_test_non_trios.tsv +++ b/tests/assets/input_data/sample_sheets/giab_test_non_trios.tsv @@ -1,8 +1,8 @@ individual_id read_1 read_2 -000001_000001 assets/input_data/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG002_R2.fastq.gz -000003_000001 assets/input_data/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG004_R2.fastq.gz -000004_000002 assets/input_data/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/giab/ChineseTrio/HG005_R2.fastq.gz -000005_000003 assets/input_data/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG002_R2.fastq.gz -000006_000003 assets/input_data/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/giab/ChineseTrio/HG005_R2.fastq.gz -000007_000003 assets/input_data/giab/AshkenazimTrio/HG003_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG003_R2.fastq.gz -000008_000003 assets/input_data/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/giab/AshkenazimTrio/HG004_R2.fastq.gz +000001_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG002_R2.fastq.gz +000003_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG004_R2.fastq.gz +000004_000002 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz +000005_000003 assets/input_data/scripts/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG002_R2.fastq.gz +000006_000003 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz +000007_000003 assets/input_data/scripts/giab/AshkenazimTrio/HG003_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG003_R2.fastq.gz +000008_000003 assets/input_data/scripts/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG004_R2.fastq.gz diff --git a/tests/assets/input_data/sample_sheets/giab_test_trios.tsv b/tests/assets/input_data/sample_sheets/giab_test_trios.tsv index f7b152609625ee8cc5c6a8e5f7cc92376d57b894..0a45838d53b59b68d8a3520e2c392a34d5ad6a3d 100644 --- a/tests/assets/input_data/sample_sheets/giab_test_trios.tsv +++ b/tests/assets/input_data/sample_sheets/giab_test_trios.tsv @@ -2,6 +2,7 @@ individual_id read_1 read_2 000001_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG002_R2.fastq.gz 000002_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG003_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG003_R2.fastq.gz 000003_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG004_R2.fastq.gz -000004_000002 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz -000005_000002 assets/input_data/scripts/giab/ChineseTrio/HG006_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG006_R2.fastq.gz -000006_000002 assets/input_data/scripts/giab/ChineseTrio/HG007_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG007_R2.fastq.gz +# 000004_000002 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz +# 000005_000002 assets/input_data/scripts/giab/ChineseTrio/HG006_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG006_R2.fastq.gz +# 000006_000002 assets/input_data/scripts/giab/ChineseTrio/HG007_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG007_R2.fastq.gz + diff --git a/tests/assets/input_data/sample_sheets/giab_test_trios.tsv.save b/tests/assets/input_data/sample_sheets/giab_test_trios.tsv.save new file mode 100644 index 0000000000000000000000000000000000000000..5f0269375b4ba1480855f4507f46328a1f064988 --- /dev/null +++ b/tests/assets/input_data/sample_sheets/giab_test_trios.tsv.save @@ -0,0 +1,6 @@ +individual_id read_1 read_2 +0f00001_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG002_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG002_R2.fastq.gz +000002_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG003_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG003_R2.fastq.gz +00003_000001 assets/input_data/scripts/giab/AshkenazimTrio/HG004_R1.fastq.gz assets/input_data/scripts/giab/AshkenazimTrio/HG004_R2.fastq.gz +000004_000002 assets/input_data/scripts/giab/ChineseTrio/HG005_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG005_R2.fastq.gz +000005_000002 assets/input_data/scripts/giab/ChineseTrio/HG006_R1.fastq.gz assets/input_data/scripts/giab/ChineseTrio/HG006_R2.fastq.gz diff --git a/tests/run_giab_tests.sh b/tests/run_giab_tests.sh index 1a685561200b538e9e6b85bdfa5efe98f7d32dfc..3b965562743edb755162b0e643a20c77dc34b65d 100644 --- a/tests/run_giab_tests.sh +++ b/tests/run_giab_tests.sh @@ -3,7 +3,7 @@ source scripts/nextflow_detached.sh test_exit_status=0 -nextflow -c /exports/igmm/eddie/IGMM-VariantAnalysis/emma/eddie.config clean -f +# nextflow -c /exports/igmm/eddie/IGMM-VariantAnalysis/emma/eddie.config clean -f echo "Reduced GiaB data - trios" run_nextflow ../main.nf \ @@ -16,14 +16,14 @@ run_nextflow ../main.nf \ test_exit_status=$(( $test_exit_status + $? )) -echo "Reduced GiaB data - non-trios" -run_nextflow ../main.nf \ - -c /exports/igmm/eddie/IGMM-VariantAnalysis/emma/eddie.config \ - --workflow "variant-calling" \ - --pipeline_project_id giab_test_non_trios \ - --pipeline_project_version v1 \ - --ped_file $PWD/assets/input_data/ped_files/giab_test_non_trios.ped \ - --sample_sheet $PWD/assets/input_data/sample_sheets/giab_test_non_trios.tsv +#echo "Reduced GiaB data - non-trios" +#run_nextflow ../main.nf \ +# -c /exports/igmm/eddie/IGMM-VariantAnalysis/emma/eddie.config \ +# --workflow "variant-calling" \ +# --pipeline_project_id giab_test_non_trios \ +# --pipeline_project_version v1 \ +# --ped_file $PWD/assets/input_data/ped_files/giab_test_non_trios.ped \ +# --sample_sheet $PWD/assets/input_data/sample_sheets/giab_test_non_trios.tsv test_exit_status=$(( $test_exit_status + $? )) diff --git a/tests/scripts/bcbio_nextgen.py b/tests/scripts/bcbio_nextgen.py old mode 100755 new mode 100644 diff --git a/tests/scripts/bcbio_prepare_samples.py b/tests/scripts/bcbio_prepare_samples.py old mode 100755 new mode 100644