Skip to content
Snippets Groups Projects
trio_whole_exome_parse_peddy_ped_csv.pl 3.41 KiB
Newer Older
#!/usr/bin/perl -w

=head1 NAME

trio_whole_exome_parse_peddy_ped_csv.pl

=head1 AUTHOR

Alison Meynert (alison.meynert@igmm.ed.ac.uk)

=head1 DESCRIPTION

Checks the parent-child and parent-parent relationships from peddy output.

=cut

use strict;

use Getopt::Long;
use IO::File;

my $usage = qq{USAGE:
$0 [--help]
  --output   Output file
  --families Family directory
  --ped      Pedigree file for project
  --project  Project id
  --batch    Batch id
  --version  Analysis run version (v1, v2, etc)
ameyner2's avatar
ameyner2 committed
my $version;
    'help'       => \$help,
    'project=s'  => \$project_id,
    'ped=s'      => \$ped_file,
    'output=s'   => \$out_file,
    'families=s' => \$fam_dir,
    'version=s'  => \$version,
    'batch=s'    => \$batch_id
if ($help || !$project_id || !$ped_file || !$out_file || !$batch_id || !$version || !$fam_dir)
{
    print $usage;
    exit(0);
}

# Read in the pedigree file
my $in_fh = new IO::File;
$in_fh->open($ped_file, "r") or die "Could not open $ped_file\n$!";

my %ped;
while (my $line = <$in_fh>)
{
	chomp $line;
	my ($family_id, $individual_id, $father_id, $mother_id, $sex, $aff) = split(/\t/, $line);
	$ped{$family_id}{'count'}++;
	$ped{$family_id}{$individual_id}{'father'} = $father_id;
	$ped{$family_id}{$individual_id}{'mother'} = $mother_id;
	if ($aff == 2)
	{
		$ped{$family_id}{'aff'} = $individual_id;
	}
}

$in_fh->close();

my $out_fh = new IO::File;
$out_fh->open($out_file, "w") or die "Could not open $out_file\n$!";

printf $out_fh "project_id\tbatch_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";

foreach my $family_id (sort keys %ped)
{
	my @peddy_glob = glob(sprintf("$fam_dir/*_%s_%s_%s_%s/%s_%s/qc/peddy/%s%s.ped_check.csv", 
	        $project_id, $version, $batch_id, $family_id, $ped{$family_id}{'aff'}, $family_id, $batch_id, $family_id));
	next if (scalar(@peddy_glob) == 0);

	my $peddy_fh = new IO::File;
	$peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";

	my @headers;
	my %info;
	my @sample_pairs;
	while (my $line = <$peddy_fh>)
	{
		chomp $line;
		if ($line =~ /^sample_a/)
		{
			@headers = split(/,/, $line);
		}
		else
		{
			my @data = split(/,/, $line);
			push(@sample_pairs, sprintf("%s\t%s", $data[0], $data[1]));
			for (my $i = 2; $i < scalar(@headers); $i++)
			{
				$info{$headers[$i]}{sprintf("%s\t%s", $data[0], $data[1])} = $data[$i];
			}
		}
	}

	$peddy_fh->close();

	foreach my $sample_pair (@sample_pairs)
	{
		my ($sample_a, $sample_b) = split(/\t/, $sample_pair);

		$sample_a =~ /(.+)_$family_id/;
		my $sample_a_nofam = $1;
		$sample_b =~ /(.+)_$family_id/;
		my $sample_b_nofam = $1;

		if ($ped{$family_id}{$sample_a_nofam}{'father'} eq $sample_b_nofam ||
		    $ped{$family_id}{$sample_a_nofam}{'mother'} eq $sample_b_nofam ||
		    $ped{$family_id}{$sample_b_nofam}{'father'} eq $sample_a_nofam ||
		    $ped{$family_id}{$sample_b_nofam}{'mother'} eq $sample_a_nofam)
		{
			$info{'pedigree_parents'}{$sample_pair} = 'True';
		}

		$info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True';

		printf $out_fh "$project_id\t$batch_id\t$sample_pair\t%s\t%s\t%s\n", 
		    $info{'pedigree_parents'}{$sample_pair}, 
		    $info{'predicted_parents'}{$sample_pair},
		    $info{'parent_error'}{$sample_pair};
	}
}

$out_fh->close();