#!/usr/bin/perl -w =head1 NAME trio_whole_exome_parse_peddy_ped_csv.pl =head1 AUTHOR Alison Meynert (alison.meynert@igmm.ed.ac.uk) =head1 DESCRIPTION Checks the parent-child and parent-parent relationships from peddy output. =cut use strict; use Getopt::Long; use IO::File; my $usage = qq{USAGE: $0 [--help] --output Output file --families Family directory --ped Pedigree file for project --project Project id --batch Batch id --version Analysis run version (v1, v2, etc) }; my $help = 0; my $ped_file; my $fam_dir; my $project_id; my $version; my $out_file; my $batch_id; GetOptions( 'help' => \$help, 'project=s' => \$project_id, 'ped=s' => \$ped_file, 'output=s' => \$out_file, 'families=s' => \$fam_dir, 'version=s' => \$version, 'batch=s' => \$batch_id ) or die $usage; if ($help || !$project_id || !$ped_file || !$out_file || !$batch_id || !$version || !$fam_dir) { print $usage; exit(0); } # Read in the pedigree file my $in_fh = new IO::File; $in_fh->open($ped_file, "r") or die "Could not open $ped_file\n$!"; my %ped; while (my $line = <$in_fh>) { chomp $line; my ($family_id, $individual_id, $father_id, $mother_id, $sex, $aff) = split(/\t/, $line); $ped{$family_id}{'count'}++; $ped{$family_id}{$individual_id}{'father'} = $father_id; $ped{$family_id}{$individual_id}{'mother'} = $mother_id; if ($aff == 2) { $ped{$family_id}{'aff'} = $individual_id; } } $in_fh->close(); my $out_fh = new IO::File; $out_fh->open($out_file, "w") or die "Could not open $out_file\n$!"; printf $out_fh "project_id\tbatch_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n"; foreach my $family_id (sort keys %ped) { my @peddy_glob = glob(sprintf("$fam_dir/*_%s_%s_%s_%s/%s_%s/qc/peddy/%s%s.ped_check.csv", $project_id, $version, $batch_id, $family_id, $ped{$family_id}{'aff'}, $family_id, $batch_id, $family_id)); next if (scalar(@peddy_glob) == 0); my $peddy_fh = new IO::File; $peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!"; my @headers; my %info; my @sample_pairs; while (my $line = <$peddy_fh>) { chomp $line; if ($line =~ /^sample_a/) { @headers = split(/,/, $line); } else { my @data = split(/,/, $line); push(@sample_pairs, sprintf("%s\t%s", $data[0], $data[1])); for (my $i = 2; $i < scalar(@headers); $i++) { $info{$headers[$i]}{sprintf("%s\t%s", $data[0], $data[1])} = $data[$i]; } } } $peddy_fh->close(); foreach my $sample_pair (@sample_pairs) { my ($sample_a, $sample_b) = split(/\t/, $sample_pair); $sample_a =~ /(.+)_$family_id/; my $sample_a_nofam = $1; $sample_b =~ /(.+)_$family_id/; my $sample_b_nofam = $1; if ($ped{$family_id}{$sample_a_nofam}{'father'} eq $sample_b_nofam || $ped{$family_id}{$sample_a_nofam}{'mother'} eq $sample_b_nofam || $ped{$family_id}{$sample_b_nofam}{'father'} eq $sample_a_nofam || $ped{$family_id}{$sample_b_nofam}{'mother'} eq $sample_a_nofam) { $info{'pedigree_parents'}{$sample_pair} = 'True'; } $info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True'; printf $out_fh "$project_id\t$batch_id\t$sample_pair\t%s\t%s\t%s\n", $info{'pedigree_parents'}{$sample_pair}, $info{'predicted_parents'}{$sample_pair}, $info{'parent_error'}{$sample_pair}; } } $out_fh->close();