#!/bin/bash #PBS -l walltime=01:00:00 #PBS -l ncpus=1,mem=2gb #PBS -q uv2000 #PBS -N down_setup #PBS -j oe ### Setup the folder structure for the downstream analysis### BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED LOG_DIR=${WORK_DIR}/LOG G2P_DIR=${WORK_DIR}/G2P VASE_DIR=${WORK_DIR}/VASE COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV SCRIPTS_DIR=/home/u035/u035/shared/scripts ### Tools PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 # check if ${WORK_DIR} already exists - if so, exit - to prevent accidental overwriting if [ -d "${WORK_DIR}" ]; then echo "${WORK_DIR} already exists - EXIT! If really intended, delete manually!!!!" exit fi echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done echo "SOURCE_DIR = ${SOURCE_DIR}" # the command-line argument SOURCE_DIR is the general path to the source VCF, BAM and PED files S_PED_DIR=${SOURCE_DIR}/../params # requires that the PED files are in this folder # create the working dir and the required subfolders mkdir ${WORK_DIR} mkdir ${VCF_DIR} mkdir ${PED_DIR} mkdir ${LOG_DIR} mkdir ${G2P_DIR} mkdir ${VASE_DIR} mkdir ${COV_DIR} mkdir ${DEC_DIR} mkdir ${IGV_DIR} mkdir ${CNV_DIR} echo "Created ${WORK_DIR} for this batch and all the required subfolders" ###################################################### ### Copy the VCF and PED file per each family ### ###################################################### SOURCE_VCF_DIRS=${SOURCE_DIR}/????-??-??_* echo "Found the following source VCF folders" for S_VCF_DIR in ${SOURCE_VCF_DIRS} do # echo " ${S_VCF_DIR}" VCF_DIR_NAME="${S_VCF_DIR##*/}" # echo " ${VCF_DIR_NAME}" IFS=_ read -ra my_arr <<< "${VCF_DIR_NAME}" BATCH=${my_arr[1]} FAM_ID="" for ELEMENT in ${my_arr[@]:2}; do FAM_ID+="${ELEMENT}_" done FAM_ID=${FAM_ID::-1} # echo " BATCH = ${BATCH}, FAM_ID = ${FAM_ID}" S_VCF_FILE=${S_VCF_DIR}/${FAM_ID}-gatk-haplotype-annotated.vcf.gz S_PED_FILE=${S_PED_DIR}/${BATCH}_${FAM_ID}.ped cp ${S_VCF_FILE} ${VCF_DIR} cp ${S_PED_FILE} ${PED_DIR} echo " copied ${S_VCF_FILE} to ${VCF_DIR}" echo " copied ${S_PED_FILE} to ${PED_DIR}" done ###################################################################################### ### generate the FAM_IDs.txt, PRO_IDs.txt and FAM_PRO.txt *only for trio* families ### ###################################################################################### time ${PYTHON2} ${SCRIPTS_DIR}/extract_trio_FAM_PRO_ID.py ${WORK_DIR} echo "" echo "" echo "OK: Setup for PROJECT_ID = $PROJECT_ID successful"