From 19ffc7350636041025c6fe4553b35e3ed73f6d57 Mon Sep 17 00:00:00 2001 From: user name <ameynert@sdf-cs1.eidf.epcc.ed.ac.uk> Date: Tue, 10 May 2022 09:58:23 +0100 Subject: [PATCH 1/2] Moved peddy validation checking script to bin --- .../peddy_validation.pl | 2 +- main.nf | 3 --- pipeline/var_calling.nf | 14 +++++++------- 3 files changed, 8 insertions(+), 11 deletions(-) rename trio_whole_exome_parse_peddy_ped_csv.pl => bin/peddy_validation.pl (98%) mode change 100644 => 100755 diff --git a/trio_whole_exome_parse_peddy_ped_csv.pl b/bin/peddy_validation.pl old mode 100644 new mode 100755 similarity index 98% rename from trio_whole_exome_parse_peddy_ped_csv.pl rename to bin/peddy_validation.pl index 05e4d02..08cebeb --- a/trio_whole_exome_parse_peddy_ped_csv.pl +++ b/bin/peddy_validation.pl @@ -2,7 +2,7 @@ =head1 NAME -trio_whole_exome_parse_peddy_ped_csv.pl +peddy_validation.pl =head1 AUTHOR diff --git a/main.nf b/main.nf index 58404a7..91034a1 100644 --- a/main.nf +++ b/main.nf @@ -26,9 +26,6 @@ params.target_bed = null // hg38 reference genome in fasta format params.reference_genome = null -// path to the parse_peddy_output Perl script. Todo: remove once scripts are in bin/ -params.parse_peddy_output = null - // path to a Ped file describing all the families in the pipeline batch params.ped_file = null diff --git a/pipeline/var_calling.nf b/pipeline/var_calling.nf index 4ef7a4a..39ab013 100644 --- a/pipeline/var_calling.nf +++ b/pipeline/var_calling.nf @@ -176,7 +176,7 @@ process collate_pipeline_outputs { path(ped_file) path(samplesheet) path(bcbio) - path(parse_peddy_output) + path(peddy_validation_output) output: path("${params.pipeline_project_id}_${params.pipeline_project_version}") @@ -204,8 +204,8 @@ process collate_pipeline_outputs { --filename ${params.pipeline_project_id}_${params.pipeline_project_version}_qc_report.html \ . && - peddy_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt && - perl ../../${parse_peddy_output} \ + peddy_validation_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt && + peddy_validation.pl \ --output \$peddy_output \ --project ${params.pipeline_project_id} \ --batch ${bcbio_family_output_dirs[0].getName().split('_')[1]} \ @@ -214,10 +214,10 @@ process collate_pipeline_outputs { --families . && # no && here - exit status checked below - grep -v 'False\$' \$peddy_output + grep -v 'False\$' \$peddy_validation_output if [ \$? -ne 0 ] then - echo "Found Peddy mismatches in \$peddy_output" + echo "Found Peddy mismatches in \$peddy_validation_output" exit 1 fi && @@ -257,7 +257,7 @@ workflow process_families { ch_bcbio = file(params.bcbio, checkIfExists: true) ch_bcbio_template = file(params.bcbio_template, checkIfExists: true) ch_target_bed = file(params.target_bed, checkIfExists: true) - ch_parse_peddy_output = file(params.parse_peddy_output, checkIfExists: true) + ch_peddy_validation_output = file(params.peddy_validation_output, checkIfExists: true) ch_reference_genome = file(params.reference_genome, checkIfExists: true) ch_merged_fastqs = merge_fastqs( @@ -317,7 +317,7 @@ workflow process_families { ch_ped_file, ch_samplesheet, ch_bcbio, - ch_parse_peddy_output + ch_peddy_validation_output ) } -- GitLab From e2e492970ce5557d548ebf515e23def5a901fdaa Mon Sep 17 00:00:00 2001 From: user name <ameynert@sdf-cs1.eidf.epcc.ed.ac.uk> Date: Tue, 10 May 2022 17:30:11 +0100 Subject: [PATCH 2/2] Pedigree checking setup --- bin/peddy_validation.pl | 21 ++++++++++----------- pipeline/var_calling.nf | 20 ++++++++------------ 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/bin/peddy_validation.pl b/bin/peddy_validation.pl index 08cebeb..fe6507e 100755 --- a/bin/peddy_validation.pl +++ b/bin/peddy_validation.pl @@ -16,6 +16,7 @@ Checks the parent-child and parent-parent relationships from peddy output. use strict; +use Cwd; use Getopt::Long; use IO::File; @@ -35,7 +36,6 @@ my $fam_dir; my $project_id; my $version; my $out_file; -my $batch_id; GetOptions( 'help' => \$help, @@ -43,11 +43,10 @@ GetOptions( 'ped=s' => \$ped_file, 'output=s' => \$out_file, 'families=s' => \$fam_dir, - 'version=s' => \$version, - 'batch=s' => \$batch_id + 'version=s' => \$version ) or die $usage; -if ($help || !$project_id || !$ped_file || !$out_file || !$batch_id || !$version || !$fam_dir) +if ($help || !$project_id || !$ped_file || !$out_file || !$version || !$fam_dir) { print $usage; exit(0); @@ -76,16 +75,16 @@ $in_fh->close(); my $out_fh = new IO::File; $out_fh->open($out_file, "w") or die "Could not open $out_file\n$!"; -printf $out_fh "project_id\tbatch_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n"; +printf $out_fh "project_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n"; foreach my $family_id (sort keys %ped) { - my @peddy_glob = glob(sprintf("$fam_dir/*_%s_%s_%s_%s/%s_%s/qc/peddy/%s%s.ped_check.csv", - $project_id, $version, $batch_id, $family_id, $ped{$family_id}{'aff'}, $family_id, $batch_id, $family_id)); - next if (scalar(@peddy_glob) == 0); + my $glob_str = sprintf("$fam_dir/*%s/%s/qc/peddy/*.ped_check.csv", $family_id, $ped{$family_id}{'aff'}); + my @peddy_glob = glob($glob_str); + next if (scalar(@peddy_glob) == 0); - my $peddy_fh = new IO::File; - $peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!"; + my $peddy_fh = new IO::File; + $peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!"; my @headers; my %info; @@ -129,7 +128,7 @@ foreach my $family_id (sort keys %ped) $info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True'; - printf $out_fh "$project_id\t$batch_id\t$sample_pair\t%s\t%s\t%s\n", + printf $out_fh "$project_id\t$sample_pair\t%s\t%s\t%s\n", $info{'pedigree_parents'}{$sample_pair}, $info{'predicted_parents'}{$sample_pair}, $info{'parent_error'}{$sample_pair}; diff --git a/pipeline/var_calling.nf b/pipeline/var_calling.nf index 39ab013..a923477 100644 --- a/pipeline/var_calling.nf +++ b/pipeline/var_calling.nf @@ -176,7 +176,6 @@ process collate_pipeline_outputs { path(ped_file) path(samplesheet) path(bcbio) - path(peddy_validation_output) output: path("${params.pipeline_project_id}_${params.pipeline_project_version}") @@ -206,20 +205,19 @@ process collate_pipeline_outputs { peddy_validation_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt && peddy_validation.pl \ - --output \$peddy_output \ + --output \$peddy_validation_output \ --project ${params.pipeline_project_id} \ - --batch ${bcbio_family_output_dirs[0].getName().split('_')[1]} \ --version ${params.pipeline_project_version} \ --ped ../../${ped_file} \ --families . && # no && here - exit status checked below - grep -v 'False\$' \$peddy_validation_output - if [ \$? -ne 0 ] - then - echo "Found Peddy mismatches in \$peddy_validation_output" - exit 1 - fi && +# grep -v 'False\$' \$peddy_validation_output +# if [ \$? -ne 0 ] +# then +# echo "Found Peddy mismatches in \$peddy_validation_output" +# exit 1 +# fi && cd ../.. && @@ -257,7 +255,6 @@ workflow process_families { ch_bcbio = file(params.bcbio, checkIfExists: true) ch_bcbio_template = file(params.bcbio_template, checkIfExists: true) ch_target_bed = file(params.target_bed, checkIfExists: true) - ch_peddy_validation_output = file(params.peddy_validation_output, checkIfExists: true) ch_reference_genome = file(params.reference_genome, checkIfExists: true) ch_merged_fastqs = merge_fastqs( @@ -316,8 +313,7 @@ workflow process_families { ch_formatted_bcbio_outputs.map({it[2]}).collect(), ch_ped_file, ch_samplesheet, - ch_bcbio, - ch_peddy_validation_output + ch_bcbio ) } -- GitLab