Skip to content
Snippets Groups Projects
Commit 75652ec0 authored by ameyner2's avatar ameyner2
Browse files

Parses peddy ped check output from bcbio to flag up sample swaps

parent 2a1db33d
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/perl -w
=head1 NAME
trio_whole_exome_parse_peddy_ped_csv.pl
=head1 AUTHOR
Alison Meynert (alison.meynert@igmm.ed.ac.uk)
=head1 DESCRIPTION
Checks the parent-child and parent-parent relationships from peddy output.
=cut
use strict;
use Getopt::Long;
use IO::File;
my $usage = qq{USAGE:
$0 [--help]
--output Output directory
--ped Pedigree file for project
--project Project id
--batch Batch id
};
my $help = 0;
my $ped_file;
my $project_id;
my $out_dir;
my $batch_id;
GetOptions(
'help' => \$help,
'project=s' => \$project_id,
'ped=s' => \$ped_file,
'output=s' => \$out_dir,
'batch=s' => \$batch_id
) or die $usage;
if ($help || !$project_id || !$ped_file || !$out_dir || !$batch_id)
{
print $usage;
exit(0);
}
# Read in the pedigree file
my $in_fh = new IO::File;
$in_fh->open($ped_file, "r") or die "Could not open $ped_file\n$!";
my %ped;
while (my $line = <$in_fh>)
{
chomp $line;
my ($family_id, $individual_id, $father_id, $mother_id, $sex, $aff) = split(/\t/, $line);
$ped{$family_id}{'count'}++;
$ped{$family_id}{$individual_id}{'father'} = $father_id;
$ped{$family_id}{$individual_id}{'mother'} = $mother_id;
if ($aff == 2)
{
$ped{$family_id}{'aff'} = $individual_id;
}
}
$in_fh->close();
my $out_file = "$out_dir/qc/$project_id.ped_check.txt";
my $out_fh = new IO::File;
$out_fh->open($out_file, "w") or die "Could not open $out_file\n$!";
printf $out_fh "project_id\tbatch_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
foreach my $family_id (sort keys %ped)
{
#my $peddy_file = "$out_dir/2020-01-15_11972_Diamond_Austin_17304_402965/103195_402965/qc/peddy/17304402965.ped_check.csv";
my @peddy_glob = glob(sprintf("$out_dir/*_%s_%s_%s/%s_%s/qc/peddy/%s%s.ped_check.csv",
$project_id, $batch_id, $family_id, $ped{$family_id}{'aff'}, $family_id, $batch_id, $family_id));
my $peddy_fh = new IO::File;
$peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
my @headers;
my %info;
my @sample_pairs;
while (my $line = <$peddy_fh>)
{
chomp $line;
if ($line =~ /^sample_a/)
{
@headers = split(/,/, $line);
}
else
{
my @data = split(/,/, $line);
push(@sample_pairs, sprintf("%s\t%s", $data[0], $data[1]));
for (my $i = 2; $i < scalar(@headers); $i++)
{
$info{$headers[$i]}{sprintf("%s\t%s", $data[0], $data[1])} = $data[$i];
}
}
}
$peddy_fh->close();
foreach my $sample_pair (@sample_pairs)
{
my ($sample_a, $sample_b) = split(/\t/, $sample_pair);
$sample_a =~ /(.+)_$family_id/;
my $sample_a_nofam = $1;
$sample_b =~ /(.+)_$family_id/;
my $sample_b_nofam = $1;
if ($ped{$family_id}{$sample_a_nofam}{'father'} eq $sample_b_nofam ||
$ped{$family_id}{$sample_a_nofam}{'mother'} eq $sample_b_nofam ||
$ped{$family_id}{$sample_b_nofam}{'father'} eq $sample_a_nofam ||
$ped{$family_id}{$sample_b_nofam}{'mother'} eq $sample_a_nofam)
{
$info{'pedigree_parents'}{$sample_pair} = 'True';
}
$info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True';
printf $out_fh "$project_id\t$batch_id\t$sample_pair\t%s\t%s\t%s\n",
$info{'pedigree_parents'}{$sample_pair},
$info{'predicted_parents'}{$sample_pair},
$info{'parent_error'}{$sample_pair};
}
}
$out_fh->close();
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment