Skip to content
Snippets Groups Projects
trio_whole_exome_create_parameter_files.pl 3.39 KiB
Newer Older
ameyner2's avatar
ameyner2 committed
#!/usr/bin/perl -w

=head1 NAME

trio_whole_exome_create_parameter_files.pl

=head1 AUTHOR

Alison Meynert (alison.meynert@igmm.ed.ac.uk)

=head1 DESCRIPTION

Creates the files:
  $prefix.family_ids.txt - format <pcr_plate_id>_<family_id>
  $prefix_$family_id.ped - select only the individuals in a given family,
                           prefix <family_id> with <pcr_plate_id> and
                           add suffix <family_id> to <individual_id>

=cut

use strict;

use Getopt::Long;
use IO::File;

my $usage = qq{USAGE:
$0 [--help]
  --prefix Output prefix for files
  --ped    Pedigree file for project
  --suffix Sample suffix
};

my $help = 0;
my $ped_file;
my $output_prefix;
my $sample_suffix;

GetOptions(
    'help'     => \$help,
    'prefix=s' => \$output_prefix,
    'suffix=s' => \$sample_suffix,
    'ped=s'    => \$ped_file
) or die $usage;

if ($help || !$output_prefix || !$sample_suffix || !$ped_file)
{
    print $usage;
    exit(0);
}

# Read in the file_list.tsv file(s) from the Edinburgh Genomics project delivery folder
my %family;
while (my $line = <>)
{
	next if ($line =~ /^File/);
	next if ($line =~ /unassigned/);
ameyner2's avatar
ameyner2 committed
	chomp $line;

	my @tokens = split(/\t/, $line);
	my $sample_dir = $tokens[7];

	# remove the sample suffix
	$sample_dir =~ /(.+)$sample_suffix/;
	my $sample_full_id = $1;

	my ($pcr_plate_id, $individual_id, $family_id) = split('_', $sample_full_id);

	$family{$family_id}{'pcr_plate_id'} = $pcr_plate_id;
	$family{$family_id}{'individual_id'}{$individual_id}++;
}

my $in_fh = new IO::File;
$in_fh->open($ped_file, "r") or die "Could not open $ped_file\n$!";

my %ped;
while (my $line = <$in_fh>)
{
	chomp $line;
	my ($family_id, $individual_id, $father_id, $mother_id, $sex, $aff) = split(/\t/, $line);
	$ped{$family_id}{$individual_id}{'father_id'} = $father_id;
	$ped{$family_id}{$individual_id}{'mother_id'} = $mother_id;
	$ped{$family_id}{$individual_id}{'sex'}       = $sex;
	$ped{$family_id}{$individual_id}{'aff'}       = $aff;
}

$in_fh->close();

my $family_id_out_fh = new IO::File;
$family_id_out_fh->open(sprintf("%s.family_ids.txt", $output_prefix), "w") or die "Could not open $output_prefix.family_ids.txt\n$!";

my $ped_out_fh = new IO::File;
foreach my $family_id (keys %ped)
ameyner2's avatar
ameyner2 committed
{
	my $pcr_plate_id = $family{$family_id}{'pcr_plate_id'};
	my $new_family_id = sprintf("%s_%s", $pcr_plate_id, $family_id);

	print $family_id_out_fh "$new_family_id\n";

	$ped_out_fh->open(sprintf("%s_%s.ped", $output_prefix, $new_family_id), "w") or die "Could not open $output_prefix.$new_family_id.ped\n$!";

	foreach my $individual_id (keys %{ $ped{$family_id} })
ameyner2's avatar
ameyner2 committed
	{
		my $father_id = $ped{$family_id}{$individual_id}{'father_id'};
		my $mother_id = $ped{$family_id}{$individual_id}{'mother_id'};
		my $sex       = $ped{$family_id}{$individual_id}{'sex'};
		my $aff       = $ped{$family_id}{$individual_id}{'aff'};

		if (!defined($father_id) || !defined($mother_id))
		{
                	print STDERR "$individual_id - no father/mother id found\n";
			next;
		}

ameyner2's avatar
ameyner2 committed
		my $new_individual_id = sprintf("%s_%s", $individual_id, $family_id);

		my $new_father_id = 0;
		my $new_mother_id = 0;
		if ($father_id != 0) { $new_father_id = sprintf("%s_%s", $father_id, $family_id); }
		if ($mother_id != 0) { $new_mother_id = sprintf("%s_%s", $mother_id, $family_id); }

		printf $ped_out_fh "$new_family_id\t$new_individual_id\t$new_father_id\t$new_mother_id\t$sex\t$aff\n";
	}	

	$ped_out_fh->close();
}

$family_id_out_fh->close();