Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/perl -w
=head1 NAME
trio_whole_exome_create_parameter_files.pl
=head1 AUTHOR
Alison Meynert (alison.meynert@igmm.ed.ac.uk)
=head1 DESCRIPTION
Creates the files:
$prefix.family_ids.txt - format <pcr_plate_id>_<family_id>
$prefix_$family_id.ped - select only the individuals in a given family,
prefix <family_id> with <pcr_plate_id> and
add suffix <family_id> to <individual_id>
=cut
use strict;
use Getopt::Long;
use IO::File;
my $usage = qq{USAGE:
$0 [--help]
--prefix Output prefix for files
--ped Pedigree file for project
--suffix Sample suffix
};
my $help = 0;
my $ped_file;
my $output_prefix;
my $sample_suffix;
GetOptions(
'help' => \$help,
'prefix=s' => \$output_prefix,
'suffix=s' => \$sample_suffix,
'ped=s' => \$ped_file
) or die $usage;
if ($help || !$output_prefix || !$sample_suffix || !$ped_file)
{
print $usage;
exit(0);
}
# Read in the file_list.tsv file(s) from the Edinburgh Genomics project delivery folder
my %family;
while (my $line = <>)
{
next if ($line =~ /^File/);
next if ($line =~ /unassigned/);
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
chomp $line;
my @tokens = split(/\t/, $line);
my $sample_dir = $tokens[7];
# remove the sample suffix
$sample_dir =~ /(.+)$sample_suffix/;
my $sample_full_id = $1;
my ($pcr_plate_id, $individual_id, $family_id) = split('_', $sample_full_id);
$family{$family_id}{'pcr_plate_id'} = $pcr_plate_id;
$family{$family_id}{'individual_id'}{$individual_id}++;
}
my $in_fh = new IO::File;
$in_fh->open($ped_file, "r") or die "Could not open $ped_file\n$!";
my %ped;
while (my $line = <$in_fh>)
{
chomp $line;
my ($family_id, $individual_id, $father_id, $mother_id, $sex, $aff) = split(/\t/, $line);
$ped{$family_id}{$individual_id}{'father_id'} = $father_id;
$ped{$family_id}{$individual_id}{'mother_id'} = $mother_id;
$ped{$family_id}{$individual_id}{'sex'} = $sex;
$ped{$family_id}{$individual_id}{'aff'} = $aff;
}
$in_fh->close();
my $family_id_out_fh = new IO::File;
$family_id_out_fh->open(sprintf("%s.family_ids.txt", $output_prefix), "w") or die "Could not open $output_prefix.family_ids.txt\n$!";
my $ped_out_fh = new IO::File;
{
my $pcr_plate_id = $family{$family_id}{'pcr_plate_id'};
my $new_family_id = sprintf("%s_%s", $pcr_plate_id, $family_id);
print $family_id_out_fh "$new_family_id\n";
$ped_out_fh->open(sprintf("%s_%s.ped", $output_prefix, $new_family_id), "w") or die "Could not open $output_prefix.$new_family_id.ped\n$!";
foreach my $individual_id (keys %{ $ped{$family_id} })
{
my $father_id = $ped{$family_id}{$individual_id}{'father_id'};
my $mother_id = $ped{$family_id}{$individual_id}{'mother_id'};
my $sex = $ped{$family_id}{$individual_id}{'sex'};
my $aff = $ped{$family_id}{$individual_id}{'aff'};
if (!defined($father_id) || !defined($mother_id))
{
print STDERR "$individual_id - no father/mother id found\n";
next;
}
my $new_individual_id = sprintf("%s_%s", $individual_id, $family_id);
my $new_father_id = 0;
my $new_mother_id = 0;
if ($father_id != 0) { $new_father_id = sprintf("%s_%s", $father_id, $family_id); }
if ($mother_id != 0) { $new_mother_id = sprintf("%s_%s", $mother_id, $family_id); }
printf $ped_out_fh "$new_family_id\t$new_individual_id\t$new_father_id\t$new_mother_id\t$sex\t$aff\n";
}
$ped_out_fh->close();
}
$family_id_out_fh->close();