#!/bin/bash # # Create the reanalysis folder named e.g. results/20220418_reanalysis, and create files for each # of the family types, tab-delimited text format: # project_version plate_id family_id (excluding plate id) # # Run this script in the reanalysis folder. Ensure that the environment variable project_id is set to # the same name as the reanalysis folder. # # Create symlinks for the families that don't require any re-processing for file in quad.txt shared_affected.txt singleton.txt trio_affected_parent.txt trio.txt do count=`wc -l params/$file | awk '{ print $1 }'` for ((i = 1; i <= $count; i = i + 1)) do project=`head -n $i params/$file | tail -n 1 | cut -f 1` family=`head -n $i params/$file | tail -n 1 | cut -f 3` cd families family_dir=`ls ../../${project}/families | grep $family` ln -s ../../${project}/families/$family_dir $family_dir cd ../params ped=`ls ../../${project}/params/*.ped | grep $family` ln -s $ped `basename $ped` cd .. done done # For the singletons from duos with unaffected parents that need to be re-generated, # prepare appropriate PED files in the analysis/params folder to begin analysis. cp singleton_from_duo.txt ../../analysis/params cd ../../analysis/params count=`wc -l singleton_from_duo.txt | awk '{ print $1 }'` file=singleton_from_duo.txt for ((i = 1; i <= $count; i = i + 1)) do project=`head -n $i $file | tail -n 1 | cut -f 1` family=`head -n $i $file | tail -n 1 | cut -f 3` ped=`ls ../../results/${project}/params/*.ped | grep $family` grep 2$ $ped | awk '{ print $1 "\t" $2 "\t0\t0\t" $5 "\t" $6}' > `basename $ped` done # Create a family ids list cat *.ped | cut -f 1 | sort > $project_id.family_ids.txt