Newer
Older
#!/usr/bin/perl -w
=head1 NAME
trio_whole_exome_parse_peddy_ped_csv.pl
=head1 AUTHOR
Alison Meynert (alison.meynert@igmm.ed.ac.uk)
=head1 DESCRIPTION
Checks the parent-child and parent-parent relationships from peddy output.
=cut
use strict;
use Getopt::Long;
use IO::File;
my $usage = qq{USAGE:
$0 [--help]
--output Output file
--families Family directory
--ped Pedigree file for project
--project Project id
--batch Batch id
--version Analysis run version (v1, v2, etc)
};
my $help = 0;
my $ped_file;
my $project_id;
my $batch_id;
GetOptions(
'help' => \$help,
'project=s' => \$project_id,
'ped=s' => \$ped_file,
'output=s' => \$out_file,
'families=s' => \$fam_dir,
'version=s' => \$version,
'batch=s' => \$batch_id
) or die $usage;
if ($help || !$project_id || !$ped_file || !$out_file || !$batch_id || !$version || !$fam_dir)
{
print $usage;
exit(0);
}
# Read in the pedigree file
my $in_fh = new IO::File;
$in_fh->open($ped_file, "r") or die "Could not open $ped_file\n$!";
my %ped;
while (my $line = <$in_fh>)
{
chomp $line;
my ($family_id, $individual_id, $father_id, $mother_id, $sex, $aff) = split(/\t/, $line);
$ped{$family_id}{'count'}++;
$ped{$family_id}{$individual_id}{'father'} = $father_id;
$ped{$family_id}{$individual_id}{'mother'} = $mother_id;
if ($aff == 2)
{
$ped{$family_id}{'aff'} = $individual_id;
}
}
$in_fh->close();
my $out_fh = new IO::File;
$out_fh->open($out_file, "w") or die "Could not open $out_file\n$!";
printf $out_fh "project_id\tbatch_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
foreach my $family_id (sort keys %ped)
{
my @peddy_glob = glob(sprintf("$fam_dir/*_%s_%s_%s_%s/%s_%s/qc/peddy/%s%s.ped_check.csv",
$project_id, $version, $batch_id, $family_id, $ped{$family_id}{'aff'}, $family_id, $batch_id, $family_id));
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
my $peddy_fh = new IO::File;
$peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
my @headers;
my %info;
my @sample_pairs;
while (my $line = <$peddy_fh>)
{
chomp $line;
if ($line =~ /^sample_a/)
{
@headers = split(/,/, $line);
}
else
{
my @data = split(/,/, $line);
push(@sample_pairs, sprintf("%s\t%s", $data[0], $data[1]));
for (my $i = 2; $i < scalar(@headers); $i++)
{
$info{$headers[$i]}{sprintf("%s\t%s", $data[0], $data[1])} = $data[$i];
}
}
}
$peddy_fh->close();
foreach my $sample_pair (@sample_pairs)
{
my ($sample_a, $sample_b) = split(/\t/, $sample_pair);
$sample_a =~ /(.+)_$family_id/;
my $sample_a_nofam = $1;
$sample_b =~ /(.+)_$family_id/;
my $sample_b_nofam = $1;
if ($ped{$family_id}{$sample_a_nofam}{'father'} eq $sample_b_nofam ||
$ped{$family_id}{$sample_a_nofam}{'mother'} eq $sample_b_nofam ||
$ped{$family_id}{$sample_b_nofam}{'father'} eq $sample_a_nofam ||
$ped{$family_id}{$sample_b_nofam}{'mother'} eq $sample_a_nofam)
{
$info{'pedigree_parents'}{$sample_pair} = 'True';
}
$info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True';
printf $out_fh "$project_id\t$batch_id\t$sample_pair\t%s\t%s\t%s\n",
$info{'pedigree_parents'}{$sample_pair},
$info{'predicted_parents'}{$sample_pair},
$info{'parent_error'}{$sample_pair};
}
}
$out_fh->close();