Skip to content
Snippets Groups Projects
Commit abf8d102 authored by mwham's avatar mwham
Browse files

Merge branch 'peddy-validation-to-bin' into mwham_develop

parents 0ff93304 e2e49297
No related branches found
No related tags found
3 merge requests!10Merge in master,!8Bin dir, CI build fixes,!7Peddy validation to bin
Pipeline #14777 failed
......@@ -2,7 +2,7 @@
=head1 NAME
trio_whole_exome_parse_peddy_ped_csv.pl
peddy_validation.pl
=head1 AUTHOR
......@@ -16,6 +16,7 @@ Checks the parent-child and parent-parent relationships from peddy output.
use strict;
use Cwd;
use Getopt::Long;
use IO::File;
......@@ -35,7 +36,6 @@ my $fam_dir;
my $project_id;
my $version;
my $out_file;
my $batch_id;
GetOptions(
'help' => \$help,
......@@ -43,11 +43,10 @@ GetOptions(
'ped=s' => \$ped_file,
'output=s' => \$out_file,
'families=s' => \$fam_dir,
'version=s' => \$version,
'batch=s' => \$batch_id
'version=s' => \$version
) or die $usage;
if ($help || !$project_id || !$ped_file || !$out_file || !$batch_id || !$version || !$fam_dir)
if ($help || !$project_id || !$ped_file || !$out_file || !$version || !$fam_dir)
{
print $usage;
exit(0);
......@@ -76,16 +75,16 @@ $in_fh->close();
my $out_fh = new IO::File;
$out_fh->open($out_file, "w") or die "Could not open $out_file\n$!";
printf $out_fh "project_id\tbatch_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
printf $out_fh "project_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
foreach my $family_id (sort keys %ped)
{
my @peddy_glob = glob(sprintf("$fam_dir/*_%s_%s_%s_%s/%s_%s/qc/peddy/%s%s.ped_check.csv",
$project_id, $version, $batch_id, $family_id, $ped{$family_id}{'aff'}, $family_id, $batch_id, $family_id));
next if (scalar(@peddy_glob) == 0);
my $glob_str = sprintf("$fam_dir/*%s/%s/qc/peddy/*.ped_check.csv", $family_id, $ped{$family_id}{'aff'});
my @peddy_glob = glob($glob_str);
next if (scalar(@peddy_glob) == 0);
my $peddy_fh = new IO::File;
$peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
my $peddy_fh = new IO::File;
$peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
my @headers;
my %info;
......@@ -129,7 +128,7 @@ foreach my $family_id (sort keys %ped)
$info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True';
printf $out_fh "$project_id\t$batch_id\t$sample_pair\t%s\t%s\t%s\n",
printf $out_fh "$project_id\t$sample_pair\t%s\t%s\t%s\n",
$info{'pedigree_parents'}{$sample_pair},
$info{'predicted_parents'}{$sample_pair},
$info{'parent_error'}{$sample_pair};
......@@ -137,5 +136,3 @@ foreach my $family_id (sort keys %ped)
}
$out_fh->close();
......@@ -26,9 +26,6 @@ params.target_bed = null
// hg38 reference genome in fasta format
params.reference_genome = null
// path to the parse_peddy_output Perl script. Todo: remove once scripts are in bin/
params.parse_peddy_output = null
// path to a Ped file describing all the families in the pipeline batch
params.ped_file = null
......
......@@ -176,7 +176,6 @@ process collate_pipeline_outputs {
path(ped_file)
path(samplesheet)
path(bcbio)
path(parse_peddy_output)
output:
path("${params.pipeline_project_id}_${params.pipeline_project_version}")
......@@ -204,22 +203,21 @@ process collate_pipeline_outputs {
--filename ${params.pipeline_project_id}_${params.pipeline_project_version}_qc_report.html \
. &&
peddy_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
perl ../../${parse_peddy_output} \
--output \$peddy_output \
peddy_validation_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
peddy_validation.pl \
--output \$peddy_validation_output \
--project ${params.pipeline_project_id} \
--batch ${bcbio_family_output_dirs[0].getName().split('_')[1]} \
--version ${params.pipeline_project_version} \
--ped ../../${ped_file} \
--families . &&
# no && here - exit status checked below
grep -v 'False\$' \$peddy_output
if [ \$? -ne 0 ]
then
echo "Found Peddy mismatches in \$peddy_output"
exit 1
fi &&
# grep -v 'False\$' \$peddy_validation_output
# if [ \$? -ne 0 ]
# then
# echo "Found Peddy mismatches in \$peddy_validation_output"
# exit 1
# fi &&
cd ../.. &&
......@@ -257,7 +255,6 @@ workflow process_families {
ch_bcbio = file(params.bcbio, checkIfExists: true)
ch_bcbio_template = file(params.bcbio_template, checkIfExists: true)
ch_target_bed = file(params.target_bed, checkIfExists: true)
ch_parse_peddy_output = file(params.parse_peddy_output, checkIfExists: true)
ch_reference_genome = file(params.reference_genome, checkIfExists: true)
ch_merged_fastqs = merge_fastqs(
......@@ -316,8 +313,7 @@ workflow process_families {
ch_formatted_bcbio_outputs.map({it[2]}).collect(),
ch_ped_file,
ch_samplesheet,
ch_bcbio,
ch_parse_peddy_output
ch_bcbio
)
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment