Newer
Older
#!/bin/bash
#PBS -l walltime=01:00:00
#PBS -l ncpus=1,mem=2gb
#PBS -q uv2000
#PBS -N downstream_setup
#PBS -j oe
echo "PROJECT_ID = $PROJECT_ID"
echo "DATE = $DATE"
echo "BATCH = $BATCH"
DATE_BATCH=${DATE}_${BATCH}
echo "DATE_BATCH = ${DATE_BATCH}"
BASE=/scratch/u035/u035/shared/analysis/wes_pilot
SOURCE_DIR=/scratch/u035/u035/shared/analysis/wes_pilot/bcbio/final
PED_DIR=/scratch/u035/u035/shared/analysis/wes_pilot/params
WORK_DIR=$BASE/${PROJECT_ID}
G2P_DIR=${WORK_DIR}/G2P
VASE_DIR=${WORK_DIR}/VASE
COV_DIR=${WORK_DIR}/COV
DEC_DIR=${WORK_DIR}/DECIPHER
CNV_DIR=${WORK_DIR}/CNV
SCRIPTS_DIR=/home/u035/u035/shared/scripts
PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# create the working dir and the required subfolders
mkdir ${WORK_DIR}
cd ${WORK_DIR}
mkdir PED
mkdir VCF
mkdir LOG
echo "Created WORK_DIR = ${WORK_DIR} for this batch and the required subfolders"
# create the folders needed for G2P
mkdir ${G2P_DIR}
cd ${G2P_DIR}
echo "Created ${G2P_DIR} for this batch"
# create the folders needed for VASE
cd ${WORK_DIR}
mkdir ${VASE_DIR}
cd ${VASE_DIR}
echo "Created ${VASE_DIR} for this batch"
# create the folders needed for the coverage analysis
cd ${WORK_DIR}
mkdir ${COV_DIR}
echo "Created ${COV_DIR} for this batch"
# create the DECIPHER folders
cd ${WORK_DIR}
mkdir ${DEC_DIR}
cd ${DEC_DIR}
mkdir IGV
echo "Created ${DEC_DIR} for this batch and the required subfolders"
# create the CNV folders
cd ${WORK_DIR}
mkdir ${CNV_DIR}
echo "Created ${CNV_DIR} for this batch"
### Copy the PED file per each family ###
### format: <BATCH_ID>_<FAMILY_ID>.ped ###
### and create the FAM_IDs file ###
for FILE in ${PED_DIR}/${BATCH}_*.ped
do
# copy the PED file
cp $FILE ${WORK_DIR}/PED
# # create the file with the family ids
# filename="${FILE##*/}"
# IFS='_|.' read -ra array_1 <<< "$filename"
# echo "family id = ${array_1[1]}"
# echo ${array_1[1]} >> ${WORK_DIR}/FAM_IDs.txt
done
######################################################################################
### generate the FAM_IDs.txt, PRO_IDs.txt and FAM_PRO.txt *only for trio* families ###
######################################################################################
time ${PYTHON2} ${SCRIPTS_DIR}/extract_trio_FAM_PRO_ID.py ${WORK_DIR}
echo "OK: Setup for PROJECT_ID = $PROJECT_ID (DATE_BATCH = ${DATE_BATCH}) successful"
######################################################
# LOG_DIR=$BASE/${PROJECT_ID}/logs
#mkdir ${LOG_DIR}
#echo "Created LOG_DIR = ${LOG_DIR} for this batch"
#cd $LOG_DIR
#echo "Created LOG_DIR = ${LOG_DIR} for this batch"
#cd $BASE
#mkdir $BASE/$PROJECT_ID
#source $CONFIG
## Change to the working directory
#cd $WORK_DIR/gvcfs
## Copy the GVCF files
#for file in `cat ../params/$BATCH.gvcfs.txt`
#do
# cp $file* ./
#done
## Check the md5s
#for file in *.md5
#do
# md5sum --check $file
#done