Skip to content
Snippets Groups Projects
Commit 04064420 authored by ameyner2's avatar ameyner2
Browse files

Versions of the scripts for Santosh samples

parent bafadff4
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
#
# prepare_bcbio_config_crf.sh <config.sh> <project_id> <version>
#
# Adaptation of prepare_bcbio_config.sh for data from the Santosh set
#
# Given a <project_id>.ped file for a set of trios (families) in the
# folder $PARAMS_DIR, creates the files <project_id>.family_ids.txt
# and <project>.sample_ids.txt in the same folder.
#
# Assumes that reads for the samples are in the path
# $READS_DIR/<project_id>/*.gz,
# and that no samples other than those with reads are listed in the
# PED file. $READS_DIR is specified in the <config.sh> file.
#
# Assumes that the sample names in the PED file match those
# specifying the read files.
#
# All samples must be annotated with sex (1=male, 2=female) in the
# 5th column and phenotype (1=unaffected, 2=affected) in the 6th
# column of the PED file.
#
# Runs bcbio sample preparation and configuration file generation,
# assuming the template configuration file is at $BCBIO_TEMPLATE,
# specified in the <config.sh> file.
#
# Assumes bcbio is on the PATH (set in <config.sh>).
#
CONFIG_SH=$1
PROJECT_ID=$2
VERSION=$3
source $CONFIG_SH
#
# Create the files:
# $PROJECT_ID.family_ids.txt - format <pcr_plate_id>_<family_id>
# $PROJECT_ID.$FAMILY_ID.ped - select only the individuals in a given family,
# prefix <family_id> with <pcr_plate_id> and
# add suffix <family_id> to <individual_id>
#
cd $PARAMS_DIR
# remove DOS newline characters if necessary
perl -pi -e 's/\r//' $PROJECT_ID.ped
# create reads directory for project
mkdir -p $READS_DIR/$PROJECT_ID
# generate the family_ids list - makes strong assumption about relative paths!
cut -f 1 $PROJECT_ID.ped | sort -u > $PROJECT_ID.family_ids.txt
for FAMILY_ID in `cat $PROJECT_ID.family_ids.txt`
do
grep ^$FAMILY_ID $PROJECT_ID.ped > ${PROJECT_ID}_${FAMILY_ID}.ped
echo "samplename,description,batch,sex,phenotype,variant_regions" > ${VERSION}_${PROJECT_ID}_${FAMILY_ID}.csv
COUNT=`wc -l ${PROJECT_ID}_${FAMILY_ID}.ped | awk '{ print $1 }'`
for ((i=1; i<=$COUNT; i=i+1))
do
SAMPLE=`head -n $i ${PROJECT_ID}_${FAMILY_ID}.ped | tail -n 1 | cut -f 2`
SEX=`head -n $i ${PROJECT_ID}_${FAMILY_ID}.ped | tail -n 1 | cut -f 5`
PHENOTYPE=`head -n $i ${PROJECT_ID}_${FAMILY_ID}.ped | tail -n 1 | cut -f 6`
for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*${SAMPLE}*.gz`
do
echo "$FILE,${SAMPLE}_${FAMILY_ID},$FAMILY_ID,$SEX,$PHENOTYPE,$TARGET" >> ${VERSION}_${PROJECT_ID}_${FAMILY_ID}.csv
done
done
bcbio_prepare_samples.py --out $READS_DIR/$PROJECT_ID --csv ${VERSION}_${PROJECT_ID}_${FAMILY_ID}.csv
mv ${VERSION}_${PROJECT_ID}_${FAMILY_ID}-merged.csv ${VERSION}_${PROJECT_ID}_${FAMILY_ID}.csv
BARE_FAMILY_ID=`echo $FAMILY_ID | cut -d '_' -f 2`
bcbio_nextgen.py -w template $BCBIO_TEMPLATE ${VERSION}_${PROJECT_ID}_${FAMILY_ID}.csv $READS_DIR/$PROJECT_ID/*_${BARE_FAMILY_ID}_R[12].fastq.gz
mv ${VERSION}_${PROJECT_ID}_${FAMILY_ID}/config/${VERSION}_${PROJECT_ID}_${FAMILY_ID}.yaml $CONFIG_DIR/
COMPRESSED_ID=`echo "$FAMILY_ID" | perl -pe "s/\_//"`
perl -i -pe "s/${COMPRESSED_ID}/${FAMILY_ID}/" $CONFIG_DIR/${VERSION}_${PROJECT_ID}_${FAMILY_ID}.yaml
rm -r ${VERSION}_${PROJECT_ID}_${FAMILY_ID}
done
#!/usr/bin/perl -w
=head1 NAME
trio_whole_exome_parse_peddy_ped_csv_no_batch.pl
=head1 AUTHOR
Alison Meynert (alison.meynert@igmm.ed.ac.uk)
=head1 DESCRIPTION
Checks the parent-child and parent-parent relationships from peddy output.
=cut
use strict;
use Getopt::Long;
use IO::File;
my $usage = qq{USAGE:
$0 [--help]
--output Output directory
--ped Pedigree file for project
--project Project id
--version Analysis run version (v1, v2, etc)
};
my $help = 0;
my $ped_file;
my $project_id;
my $version;
my $out_dir;
GetOptions(
'help' => \$help,
'project=s' => \$project_id,
'ped=s' => \$ped_file,
'output=s' => \$out_dir,
'version=s' => \$version
) or die $usage;
if ($help || !$project_id || !$ped_file || !$out_dir || !$version)
{
print $usage;
exit(0);
}
# Read in the pedigree file
my $in_fh = new IO::File;
$in_fh->open($ped_file, "r") or die "Could not open $ped_file\n$!";
my %ped;
while (my $line = <$in_fh>)
{
chomp $line;
my ($family_id, $individual_id, $father_id, $mother_id, $sex, $aff) = split(/\t/, $line);
$ped{$family_id}{'count'}++;
$ped{$family_id}{$individual_id}{'father'} = $father_id;
$ped{$family_id}{$individual_id}{'mother'} = $mother_id;
if ($aff == 2)
{
$ped{$family_id}{'aff'} = $individual_id;
}
}
$in_fh->close();
my $out_file = sprintf("$out_dir/qc/%s_%s.ped_check.txt", $version, $project_id);
my $out_fh = new IO::File;
$out_fh->open($out_file, "w") or die "Could not open $out_file\n$!";
printf $out_fh "project_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
foreach my $family_id (sort keys %ped)
{
my @peddy_glob = glob(sprintf("$out_dir/*_%s_%s_%s/%s_%s/qc/peddy/*.ped_check.csv",
$version, $project_id, $family_id, $ped{$family_id}{'aff'}, $family_id));
next if (scalar(@peddy_glob) == 0);
my $peddy_fh = new IO::File;
$peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
my @headers;
my %info;
my @sample_pairs;
while (my $line = <$peddy_fh>)
{
chomp $line;
if ($line =~ /^sample_a/)
{
@headers = split(/,/, $line);
}
else
{
my @data = split(/,/, $line);
push(@sample_pairs, sprintf("%s\t%s", $data[0], $data[1]));
for (my $i = 2; $i < scalar(@headers); $i++)
{
$info{$headers[$i]}{sprintf("%s\t%s", $data[0], $data[1])} = $data[$i];
}
}
}
$peddy_fh->close();
foreach my $sample_pair (@sample_pairs)
{
my ($sample_a, $sample_b) = split(/\t/, $sample_pair);
$sample_a =~ /(.+)_$family_id/;
my $sample_a_nofam = $1;
$sample_b =~ /(.+)_$family_id/;
my $sample_b_nofam = $1;
if ($ped{$family_id}{$sample_a_nofam}{'father'} eq $sample_b_nofam ||
$ped{$family_id}{$sample_a_nofam}{'mother'} eq $sample_b_nofam ||
$ped{$family_id}{$sample_b_nofam}{'father'} eq $sample_a_nofam ||
$ped{$family_id}{$sample_b_nofam}{'mother'} eq $sample_a_nofam)
{
$info{'pedigree_parents'}{$sample_pair} = 'True';
}
$info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True';
printf $out_fh "$project_id\t$sample_pair\t%s\t%s\t%s\n",
$info{'pedigree_parents'}{$sample_pair},
$info{'predicted_parents'}{$sample_pair},
$info{'parent_error'}{$sample_pair};
}
}
$out_fh->close();
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment