Newer
Older
#!/bin/bash
#PBS -l walltime=01:00:00
#PBS -l ncpus=1,mem=2gb
#PBS -q sgp
#PBS -N NHS_WES_rm_bam
#PBS -j oe
### Setup the folder structure for the downstream analysis###
BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis
WORK_DIR=$BASE/${PROJECT_ID}
VCF_DIR=${WORK_DIR}/VCF
PED_DIR=${WORK_DIR}/PED
LOG_DIR=${WORK_DIR}/LOG
G2P_DIR=${WORK_DIR}/G2P
VASE_DIR=${WORK_DIR}/VASE
COV_DIR=${WORK_DIR}/COV
DEC_DIR=${WORK_DIR}/DECIPHER
IGV_DIR=${DEC_DIR}/IGV
CNV_DIR=${WORK_DIR}/CNV
BAMOUT_DIR=${WORK_DIR}/BAMOUT
SCRIPTS_DIR=/home/u035/u035/shared/scripts
echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/${VERSION_N}_${PLATE_ID}
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna
echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862
echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done
echo "VERSION_N = ${VERSION_N}" # the version of the alignment and genotyping analysis
##############################################################
### Delete indivdual BAMs (and indexes) iff CRAM found ###
##############################################################
# make sure we are reading the data from the exact version, batch & plate ID
SOURCE_VCF_DIRS=${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_*
for S_VCF_DIR in ${SOURCE_VCF_DIRS}
do
VCF_DIR_NAME="${S_VCF_DIR##*/}"
IFS=_ read -ra my_arr <<< "${VCF_DIR_NAME}"
FAM_ID=${my_arr[-1]}
echo " FAM_ID = ${FAM_ID}"
# identify all folders (one for each individual) for this family containing cram/bam files (format: <INDI_ID>_<FAM_ID>)
cd ${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_${FAM_ID}
for ITEM in `ls -l`
do
if test -d $ITEM && [[ "$ITEM" == *"_"* ]]
then
echo " $ITEM is a CRAM/BAM folder..."
BAM=${ITEM}/${ITEM}-ready.bam
CRAM=${ITEM}/${ITEM}-ready.cram
# check if the CRAM file exists, iff yes, delete the BAM file and its index
if [[ -f "$CRAM" ]]
then
echo " Found ${CRAM}"
echo " Removing ${BAM}"
rm ${BAM}
echo " Removing ${BAM}.bai"
rm ${BAM}.bai
else
echo " ERROR: CRAM file ${CRAM} not found - have not deleted BAM ${BAM}!"
fi
fi
done
done
echo ""
echo ""
echo "OK: Deletion of BAM files and their indexes for PROJECT_ID = $PROJECT_ID successful"