From 19ffc7350636041025c6fe4553b35e3ed73f6d57 Mon Sep 17 00:00:00 2001
From: user name <ameynert@sdf-cs1.eidf.epcc.ed.ac.uk>
Date: Tue, 10 May 2022 09:58:23 +0100
Subject: [PATCH 1/2] Moved peddy validation checking script to bin

---
 .../peddy_validation.pl                            |  2 +-
 main.nf                                            |  3 ---
 pipeline/var_calling.nf                            | 14 +++++++-------
 3 files changed, 8 insertions(+), 11 deletions(-)
 rename trio_whole_exome_parse_peddy_ped_csv.pl => bin/peddy_validation.pl (98%)
 mode change 100644 => 100755

diff --git a/trio_whole_exome_parse_peddy_ped_csv.pl b/bin/peddy_validation.pl
old mode 100644
new mode 100755
similarity index 98%
rename from trio_whole_exome_parse_peddy_ped_csv.pl
rename to bin/peddy_validation.pl
index 05e4d02..08cebeb
--- a/trio_whole_exome_parse_peddy_ped_csv.pl
+++ b/bin/peddy_validation.pl
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-trio_whole_exome_parse_peddy_ped_csv.pl
+peddy_validation.pl
 
 =head1 AUTHOR
 
diff --git a/main.nf b/main.nf
index 58404a7..91034a1 100644
--- a/main.nf
+++ b/main.nf
@@ -26,9 +26,6 @@ params.target_bed = null
 // hg38 reference genome in fasta format
 params.reference_genome = null
 
-// path to the parse_peddy_output Perl script. Todo: remove once scripts are in bin/
-params.parse_peddy_output = null
-
 // path to a Ped file describing all the families in the pipeline batch
 params.ped_file = null
 
diff --git a/pipeline/var_calling.nf b/pipeline/var_calling.nf
index 4ef7a4a..39ab013 100644
--- a/pipeline/var_calling.nf
+++ b/pipeline/var_calling.nf
@@ -176,7 +176,7 @@ process collate_pipeline_outputs {
     path(ped_file)
     path(samplesheet)
     path(bcbio)
-    path(parse_peddy_output)
+    path(peddy_validation_output)
 
     output:
     path("${params.pipeline_project_id}_${params.pipeline_project_version}")
@@ -204,8 +204,8 @@ process collate_pipeline_outputs {
         --filename ${params.pipeline_project_id}_${params.pipeline_project_version}_qc_report.html \
         . &&
     
-    peddy_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
-    perl ../../${parse_peddy_output} \
+    peddy_validation_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
+    peddy_validation.pl \
         --output \$peddy_output \
         --project ${params.pipeline_project_id} \
         --batch ${bcbio_family_output_dirs[0].getName().split('_')[1]} \
@@ -214,10 +214,10 @@ process collate_pipeline_outputs {
         --families . &&
 
     # no && here - exit status checked below
-    grep -v 'False\$' \$peddy_output
+    grep -v 'False\$' \$peddy_validation_output
     if [ \$? -ne 0 ]
     then
-        echo "Found Peddy mismatches in \$peddy_output"
+        echo "Found Peddy mismatches in \$peddy_validation_output"
         exit 1
     fi &&
 
@@ -257,7 +257,7 @@ workflow process_families {
         ch_bcbio = file(params.bcbio, checkIfExists: true)
         ch_bcbio_template = file(params.bcbio_template, checkIfExists: true)
         ch_target_bed = file(params.target_bed, checkIfExists: true)
-        ch_parse_peddy_output = file(params.parse_peddy_output, checkIfExists: true)
+        ch_peddy_validation_output = file(params.peddy_validation_output, checkIfExists: true)
         ch_reference_genome = file(params.reference_genome, checkIfExists: true)
 
         ch_merged_fastqs = merge_fastqs(
@@ -317,7 +317,7 @@ workflow process_families {
             ch_ped_file,
             ch_samplesheet,
             ch_bcbio,
-            ch_parse_peddy_output
+            ch_peddy_validation_output
         )
 }
 
-- 
GitLab


From e2e492970ce5557d548ebf515e23def5a901fdaa Mon Sep 17 00:00:00 2001
From: user name <ameynert@sdf-cs1.eidf.epcc.ed.ac.uk>
Date: Tue, 10 May 2022 17:30:11 +0100
Subject: [PATCH 2/2] Pedigree checking setup

---
 bin/peddy_validation.pl | 21 ++++++++++-----------
 pipeline/var_calling.nf | 20 ++++++++------------
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/bin/peddy_validation.pl b/bin/peddy_validation.pl
index 08cebeb..fe6507e 100755
--- a/bin/peddy_validation.pl
+++ b/bin/peddy_validation.pl
@@ -16,6 +16,7 @@ Checks the parent-child and parent-parent relationships from peddy output.
 
 use strict;
 
+use Cwd;
 use Getopt::Long;
 use IO::File;
 
@@ -35,7 +36,6 @@ my $fam_dir;
 my $project_id;
 my $version;
 my $out_file;
-my $batch_id;
 
 GetOptions(
     'help'       => \$help,
@@ -43,11 +43,10 @@ GetOptions(
     'ped=s'      => \$ped_file,
     'output=s'   => \$out_file,
     'families=s' => \$fam_dir,
-    'version=s'  => \$version,
-    'batch=s'    => \$batch_id
+    'version=s'  => \$version
 ) or die $usage;
 
-if ($help || !$project_id || !$ped_file || !$out_file || !$batch_id || !$version || !$fam_dir)
+if ($help || !$project_id || !$ped_file || !$out_file || !$version || !$fam_dir)
 {
     print $usage;
     exit(0);
@@ -76,16 +75,16 @@ $in_fh->close();
 my $out_fh = new IO::File;
 $out_fh->open($out_file, "w") or die "Could not open $out_file\n$!";
 
-printf $out_fh "project_id\tbatch_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
+printf $out_fh "project_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
 
 foreach my $family_id (sort keys %ped)
 {
-	my @peddy_glob = glob(sprintf("$fam_dir/*_%s_%s_%s_%s/%s_%s/qc/peddy/%s%s.ped_check.csv", 
-	        $project_id, $version, $batch_id, $family_id, $ped{$family_id}{'aff'}, $family_id, $batch_id, $family_id));
-	next if (scalar(@peddy_glob) == 0);
+    my $glob_str = sprintf("$fam_dir/*%s/%s/qc/peddy/*.ped_check.csv", $family_id, $ped{$family_id}{'aff'});
+    my @peddy_glob = glob($glob_str);
+    next if (scalar(@peddy_glob) == 0);
 
-	my $peddy_fh = new IO::File;
-	$peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
+    my $peddy_fh = new IO::File;
+    $peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
 
 	my @headers;
 	my %info;
@@ -129,7 +128,7 @@ foreach my $family_id (sort keys %ped)
 
 		$info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True';
 
-		printf $out_fh "$project_id\t$batch_id\t$sample_pair\t%s\t%s\t%s\n", 
+		printf $out_fh "$project_id\t$sample_pair\t%s\t%s\t%s\n", 
 		    $info{'pedigree_parents'}{$sample_pair}, 
 		    $info{'predicted_parents'}{$sample_pair},
 		    $info{'parent_error'}{$sample_pair};
diff --git a/pipeline/var_calling.nf b/pipeline/var_calling.nf
index 39ab013..a923477 100644
--- a/pipeline/var_calling.nf
+++ b/pipeline/var_calling.nf
@@ -176,7 +176,6 @@ process collate_pipeline_outputs {
     path(ped_file)
     path(samplesheet)
     path(bcbio)
-    path(peddy_validation_output)
 
     output:
     path("${params.pipeline_project_id}_${params.pipeline_project_version}")
@@ -206,20 +205,19 @@ process collate_pipeline_outputs {
     
     peddy_validation_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
     peddy_validation.pl \
-        --output \$peddy_output \
+        --output \$peddy_validation_output \
         --project ${params.pipeline_project_id} \
-        --batch ${bcbio_family_output_dirs[0].getName().split('_')[1]} \
         --version ${params.pipeline_project_version} \
         --ped ../../${ped_file} \
         --families . &&
 
     # no && here - exit status checked below
-    grep -v 'False\$' \$peddy_validation_output
-    if [ \$? -ne 0 ]
-    then
-        echo "Found Peddy mismatches in \$peddy_validation_output"
-        exit 1
-    fi &&
+#    grep -v 'False\$' \$peddy_validation_output
+#    if [ \$? -ne 0 ]
+#    then
+#        echo "Found Peddy mismatches in \$peddy_validation_output"
+#        exit 1
+#    fi &&
 
     cd ../.. &&
 
@@ -257,7 +255,6 @@ workflow process_families {
         ch_bcbio = file(params.bcbio, checkIfExists: true)
         ch_bcbio_template = file(params.bcbio_template, checkIfExists: true)
         ch_target_bed = file(params.target_bed, checkIfExists: true)
-        ch_peddy_validation_output = file(params.peddy_validation_output, checkIfExists: true)
         ch_reference_genome = file(params.reference_genome, checkIfExists: true)
 
         ch_merged_fastqs = merge_fastqs(
@@ -316,8 +313,7 @@ workflow process_families {
             ch_formatted_bcbio_outputs.map({it[2]}).collect(),
             ch_ped_file,
             ch_samplesheet,
-            ch_bcbio,
-            ch_peddy_validation_output
+            ch_bcbio
         )
 }
 
-- 
GitLab