Skip to content
Snippets Groups Projects
NHS_WES_trio_delete_BAM.sh 2.55 KiB
Newer Older
#!/bin/bash
#PBS -l walltime=01:00:00
#PBS -l ncpus=1,mem=2gb
#PBS -q sgp
#PBS -N NHS_WES_rm_bam
#PBS -j oe


### Setup the folder structure for the downstream analysis###
BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis
WORK_DIR=$BASE/${PROJECT_ID}
VCF_DIR=${WORK_DIR}/VCF
PED_DIR=${WORK_DIR}/PED
LOG_DIR=${WORK_DIR}/LOG
G2P_DIR=${WORK_DIR}/G2P
VASE_DIR=${WORK_DIR}/VASE 
COV_DIR=${WORK_DIR}/COV
DEC_DIR=${WORK_DIR}/DECIPHER
IGV_DIR=${DEC_DIR}/IGV
CNV_DIR=${WORK_DIR}/CNV
BAMOUT_DIR=${WORK_DIR}/BAMOUT
SCRIPTS_DIR=/home/u035/u035/shared/scripts
echo "SOURCE_DIR = ${SOURCE_DIR}"	# the general path to the source VCF, BAM and PED files			i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/${VERSION_N}_${PLATE_ID}
echo "BATCH_ID = ${BATCH_ID}"		# the ID of the batch being processed 					e.g. 11870_Germain_Lorna
echo "PLATE_ID = ${PLATE_ID}" 		# the PCR plate ID of the batch being currently processed, 		e.g. 16862
echo "PROJECT_ID = ${PROJECT_ID}"	# this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done
echo "VERSION_N = ${VERSION_N}"         # the version of the alignment and genotyping analysis



##############################################################
###   Delete indivdual BAMs (and indexes) iff CRAM found   ###
##############################################################

# make sure we are reading the data from the exact version, batch & plate ID
SOURCE_VCF_DIRS=${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_*	

for S_VCF_DIR in ${SOURCE_VCF_DIRS}
do
  VCF_DIR_NAME="${S_VCF_DIR##*/}" 
  IFS=_ read -ra my_arr <<< "${VCF_DIR_NAME}"
  FAM_ID=${my_arr[-1]}  
  echo "  FAM_ID = ${FAM_ID}"

  # identify all folders (one for each individual) for this family containing cram/bam files (format: <INDI_ID>_<FAM_ID>)
  cd ${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_${FAM_ID}
  for ITEM in `ls -l`
  do
    if test -d $ITEM && [[ "$ITEM" == *"_"* ]]
    then
      echo "    $ITEM is a CRAM/BAM folder..."
      BAM=${ITEM}/${ITEM}-ready.bam
      CRAM=${ITEM}/${ITEM}-ready.cram

      #  check if the CRAM file exists, iff yes, delete the BAM file and its index
      if [[ -f "$CRAM" ]]
      then
        echo "      Found ${CRAM}"
        echo "      Removing ${BAM}"
        rm ${BAM}
        echo "      Removing ${BAM}.bai"
        rm ${BAM}.bai  
      else
        echo "      ERROR: CRAM file ${CRAM} not found - have not deleted BAM ${BAM}!"
      fi
    fi
  done
done




echo ""
echo ""
echo "OK: Deletion of BAM files and their indexes for PROJECT_ID = $PROJECT_ID successful"