#!/bin/bash #PBS -l walltime=24:00:00 #PBS -l ncpus=1,mem=16gb #PBS -q uv2000 #PBS -N process_trio #PBS -j oe # setup PATH export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED LOG_DIR=${WORK_DIR}/LOG G2P_DIR=${WORK_DIR}/G2P VASE_DIR=${WORK_DIR}/VASE COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done # enable running singletons if [ -z $PBS_ARRAY_INDEX ] then if [ -z $INDEX ] then export PBS_ARRAY_INDEX=1 else export PBS_ARRAY_INDEX=$INDEX fi fi # change to the LOG folder cd ${LOG_DIR} ################################ ##### for each family #### ################################ FAMILY_ID=`head -n ${PBS_ARRAY_INDEX} ${FAMILY_IDS} | tail -n 1` ################################################################################### ### for each proband generate the DECIPHER file ### ### ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz - the cleaned family VCF ### ### ${VASE_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.denovo.vcf - the VASE file ### ### ${TRANS_MAP} - the current transcript mapping file ### ################################################################################### echo "Generating the DECIPHER file for PROBAND_ID = ${PROBAND_ID}_${FAMILY_ID} ..." # # first, split the family VCF to individual VCFs # # -c1: minimum allele count (INFO/AC) of sites to be printed # # split multi-allelic sites (by -m -any) # # left-alignment and normalization (by adding the -f) # file=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz # echo "splitting $file" # for indi in `${BCFTOOLS} query -l $file`; do # ${BCFTOOLS} view -c1 -Oz -s $indi -o ${file/.vcf*/.$indi.rough.vcf.gz} $file # ${BCFTOOLS} norm -f ${REFERENCE_GENOME} -m -any -Oz -o ${file/.vcf*/.$indi.vcf.gz} ${file/.vcf*/.$indi.rough.vcf.gz} # rm ${file/.vcf*/.$indi.rough.vcf.gz} # done # # VASE file - already split, left-aligned and normalized # create the names of the needed files PED_FILE=${PED_DIR}/${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}.ped DEC_MAP=${WORK_DIR}/DECIPHER_INTERNAL_IDs.txt IN_G2P_FILE=${G2P_DIR}/${PLATE_ID}_${FAMILY_ID}_LOG_DIR/${PLATE_ID}_${FAMILY_ID}.report.txt IN_VASE_FILE=${VASE_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.denovo.vcf FAM_IGV_DIR=${IGV_DIR}/${PLATE_ID}_${FAMILY_ID} FAM_BAM_DIR=${SOURCE_DIR}/????-??-??_${BATCH_ID}_${PLATE_ID}_${FAMILY_ID} ## call the python scrpit time ${PYTHON2} ${SCRIPTS_DIR}/NHS_WES_generate_DEC_IGV.py \ ${DEC_MAP} \ ${TRANS_MAP} \ ${PED_FILE} \ ${IN_G2P_FILE} \ ${IN_VASE_FILE} \ ${FAM_IGV_DIR} \ ${VCF_DIR} \ ${PLATE_ID} \ ${FAMILY_ID} \ ${DEC_DIR} \ ${FAM_BAM_DIR} echo "" echo "" echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" echo "DECIPHER analysis of PROBAND_ID = ${PROBAND_ID}_${FAMILY_ID}: done" echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" echo "" echo ""