Newer
Older
user name
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/bin/bash
#
# Create the reanalysis folder named e.g. results/20220418_reanalysis, and create files for each
# of the family types, tab-delimited text format:
# project_version plate_id family_id (excluding plate id)
#
# Run this script in the reanalysis folder. Ensure that the environment variable project_id is set to
# the same name as the reanalysis folder.
#
# Create symlinks for the families that don't require any re-processing
for file in quad.txt shared_affected.txt singleton.txt trio_affected_parent.txt trio.txt
do
count=`wc -l params/$file | awk '{ print $1 }'`
for ((i = 1; i <= $count; i = i + 1))
do
project=`head -n $i params/$file | tail -n 1 | cut -f 1`
family=`head -n $i params/$file | tail -n 1 | cut -f 3`
cd families
family_dir=`ls ../../${project}/families | grep $family`
ln -s ../../${project}/families/$family_dir $family_dir
cd ../params
ped=`ls ../../${project}/params/*.ped | grep $family`
ln -s $ped `basename $ped`
cd ..
done
done
# For the singletons from duos with unaffected parents that need to be re-generated,
# prepare appropriate PED files in the analysis/params folder to begin analysis.
cp singleton_from_duo.txt ../../analysis/params
cd ../../analysis/params
count=`wc -l singleton_from_duo.txt | awk '{ print $1 }'`
file=singleton_from_duo.txt
for ((i = 1; i <= $count; i = i + 1))
do
project=`head -n $i $file | tail -n 1 | cut -f 1`
family=`head -n $i $file | tail -n 1 | cut -f 3`
ped=`ls ../../results/${project}/params/*.ped | grep $family`
grep 2$ $ped | awk '{ print $1 "\t" $2 "\t0\t0\t" $5 "\t" $6}' > `basename $ped`
done
# Create a family ids list
cat *.ped | cut -f 1 | sort > $project_id.family_ids.txt