Skip to content
Snippets Groups Projects
reanalysis_preparation.sh 1.68 KiB
Newer Older
#!/bin/bash

#
# Create the reanalysis folder named e.g. results/20220418_reanalysis, and create files for each
# of the family types, tab-delimited text format:
# project_version plate_id family_id (excluding plate id)
#
# Run this script in the reanalysis folder. Ensure that the environment variable project_id is set to
# the same name as the reanalysis folder.
#

# Create symlinks for the families that don't require any re-processing
for file in quad.txt shared_affected.txt singleton.txt trio_affected_parent.txt trio.txt
do
    count=`wc -l params/$file | awk '{ print $1 }'`

    for ((i = 1; i <= $count; i = i + 1))
    do
	project=`head -n $i params/$file | tail -n 1 | cut -f 1`
	family=`head -n $i params/$file | tail -n 1 | cut -f 3`

	cd families
	family_dir=`ls ../../${project}/families | grep $family`
	ln -s ../../${project}/families/$family_dir $family_dir

	cd ../params
	ped=`ls ../../${project}/params/*.ped | grep $family`
	ln -s $ped `basename $ped`

	cd ..
    done
done

# For the singletons from duos with unaffected parents that need to be re-generated,
# prepare appropriate PED files in the analysis/params folder to begin analysis.
cp singleton_from_duo.txt ../../analysis/params
cd ../../analysis/params

count=`wc -l singleton_from_duo.txt | awk '{ print $1 }'`
file=singleton_from_duo.txt
for ((i = 1; i <= $count; i = i + 1))
do
    project=`head -n $i $file | tail -n 1 | cut -f 1`
    family=`head -n $i $file | tail -n 1 | cut -f 3`

    ped=`ls ../../results/${project}/params/*.ped | grep $family`
    grep 2$ $ped | awk '{ print $1 "\t" $2 "\t0\t0\t" $5 "\t" $6}' > `basename $ped`
done

# Create a family ids list
cat *.ped | cut -f 1 | sort > $project_id.family_ids.txt