#!/bin/bash #PBS -l walltime=01:00:00 #PBS -l ncpus=1,mem=2gb #PBS -q sgp #PBS -N NHS_WES_rm_bam #PBS -j oe ### Setup the folder structure for the downstream analysis### BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED LOG_DIR=${WORK_DIR}/LOG G2P_DIR=${WORK_DIR}/G2P VASE_DIR=${WORK_DIR}/VASE COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT SCRIPTS_DIR=/home/u035/u035/shared/scripts echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/${VERSION_N}_${PLATE_ID} echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done echo "VERSION_N = ${VERSION_N}" # the version of the alignment and genotyping analysis ############################################################## ### Delete indivdual BAMs (and indexes) iff CRAM found ### ############################################################## # make sure we are reading the data from the exact version, batch & plate ID SOURCE_VCF_DIRS=${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_* for S_VCF_DIR in ${SOURCE_VCF_DIRS} do VCF_DIR_NAME="${S_VCF_DIR##*/}" IFS=_ read -ra my_arr <<< "${VCF_DIR_NAME}" FAM_ID=${my_arr[-1]} echo " FAM_ID = ${FAM_ID}" # identify all folders (one for each individual) for this family containing cram/bam files (format: <INDI_ID>_<FAM_ID>) cd ${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_${FAM_ID} for ITEM in `ls -l` do if test -d $ITEM && [[ "$ITEM" == *"_"* ]] then echo " $ITEM is a CRAM/BAM folder..." BAM=${ITEM}/${ITEM}-ready.bam CRAM=${ITEM}/${ITEM}-ready.cram # check if the CRAM file exists, iff yes, delete the BAM file and its index if [[ -f "$CRAM" ]] then echo " Found ${CRAM}" echo " Removing ${BAM}" rm ${BAM} echo " Removing ${BAM}.bai" rm ${BAM}.bai else echo " ERROR: CRAM file ${CRAM} not found - have not deleted BAM ${BAM}!" fi fi done done echo "" echo "" echo "OK: Deletion of BAM files and their indexes for PROJECT_ID = $PROJECT_ID successful"