#!/usr/bin/perl -w =head1 NAME trio_whole_exome_create_parameter_files.pl =head1 AUTHOR Alison Meynert (alison.meynert@igmm.ed.ac.uk) =head1 DESCRIPTION Creates the files: $prefix.family_ids.txt - format <pcr_plate_id>_<family_id> $prefix_$family_id.ped - select only the individuals in a given family, prefix <family_id> with <pcr_plate_id> and add suffix <family_id> to <individual_id> =cut use strict; use Getopt::Long; use IO::File; my $usage = qq{USAGE: $0 [--help] --prefix Output prefix for files --ped Pedigree file for project --suffix Sample suffix }; my $help = 0; my $ped_file; my $output_prefix; my $sample_suffix; GetOptions( 'help' => \$help, 'prefix=s' => \$output_prefix, 'suffix=s' => \$sample_suffix, 'ped=s' => \$ped_file ) or die $usage; if ($help || !$output_prefix || !$sample_suffix || !$ped_file) { print $usage; exit(0); } # Read in the file_list.tsv file(s) from the Edinburgh Genomics project delivery folder my %family; while (my $line = <>) { next if ($line =~ /^File/); chomp $line; my @tokens = split(/\t/, $line); my $sample_dir = $tokens[7]; # remove the sample suffix $sample_dir =~ /(.+)$sample_suffix/; my $sample_full_id = $1; my ($pcr_plate_id, $individual_id, $family_id) = split('_', $sample_full_id); $family{$family_id}{'pcr_plate_id'} = $pcr_plate_id; $family{$family_id}{'individual_id'}{$individual_id}++; } my $in_fh = new IO::File; $in_fh->open($ped_file, "r") or die "Could not open $ped_file\n$!"; my %ped; while (my $line = <$in_fh>) { chomp $line; my ($family_id, $individual_id, $father_id, $mother_id, $sex, $aff) = split(/\t/, $line); $ped{$family_id}{$individual_id}{'father_id'} = $father_id; $ped{$family_id}{$individual_id}{'mother_id'} = $mother_id; $ped{$family_id}{$individual_id}{'sex'} = $sex; $ped{$family_id}{$individual_id}{'aff'} = $aff; } $in_fh->close(); my $family_id_out_fh = new IO::File; $family_id_out_fh->open(sprintf("%s.family_ids.txt", $output_prefix), "w") or die "Could not open $output_prefix.family_ids.txt\n$!"; my $ped_out_fh = new IO::File; foreach my $family_id (keys %family) { my $pcr_plate_id = $family{$family_id}{'pcr_plate_id'}; my $new_family_id = sprintf("%s_%s", $pcr_plate_id, $family_id); print $family_id_out_fh "$new_family_id\n"; $ped_out_fh->open(sprintf("%s_%s.ped", $output_prefix, $new_family_id), "w") or die "Could not open $output_prefix.$new_family_id.ped\n$!"; foreach my $individual_id (keys %{ $family{$family_id}{'individual_id'} }) { my $father_id = $ped{$family_id}{$individual_id}{'father_id'}; my $mother_id = $ped{$family_id}{$individual_id}{'mother_id'}; my $sex = $ped{$family_id}{$individual_id}{'sex'}; my $aff = $ped{$family_id}{$individual_id}{'aff'}; if (!defined($father_id) || !defined($mother_id)) { print STDERR "$individual_id - no father/mother id found\n"; next; } my $new_individual_id = sprintf("%s_%s", $individual_id, $family_id); my $new_father_id = 0; my $new_mother_id = 0; if ($father_id != 0) { $new_father_id = sprintf("%s_%s", $father_id, $family_id); } if ($mother_id != 0) { $new_mother_id = sprintf("%s_%s", $mother_id, $family_id); } printf $ped_out_fh "$new_family_id\t$new_individual_id\t$new_father_id\t$new_mother_id\t$sex\t$aff\n"; } $ped_out_fh->close(); } $family_id_out_fh->close();