-
not populated not populated authorednot populated not populated authored
gather_trio_results.sh 2.83 KiB
#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --mem=2GB
#SBATCH --time=01:00:00
#SBATCH --job-name=gather_results
#SBATCH --output=gather_results.%A_%a.out
#SBATCH --error=gather_results.%A_%a.err
### folder structure for the downstream analysis - created by trio_setup.sh ###
BASE=/home/u035/u035/shared/analysis/work
WORK_DIR=${BASE}/${PROJECT_ID}
NHS_DIR=${WORK_DIR}/${BATCH_NUM}_${VERSION_N}_results
# other files to be used
FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by trio_setup.sh
CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by trio_setup.sh
echo "BATCH_NUM = ${BATCH_NUM}" # the numerical part of the BATCH_ID
echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862
echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done
echo "VERSION_N = ${VERSION_N}" # the version of the alignment and genotyping analysis
# check if ${NHS_DIR} already exists - if not, exit and ask to be created
if [ ! -d "${NHS_DIR}" ]; then
echo "${NHS_DIR} does not exist - need to create it before running this script!!!!"
exit
fi
#~## enable running singletons
#~#if [ -z $PBS_ARRAY_INDEX ]
#~#then
#~# if [ -z $INDEX ]
#~# then
#~# export PBS_ARRAY_INDEX=1
#~# else
#~# export PBS_ARRAY_INDEX=$INDEX
#~# fi
#~#fi
FAMILY_ID=`head -n ${SLURM_ARRAY_TASK_ID} ${FAMILY_IDS} | tail -n 1` # contains only the family IDs (e.g.385295)
PROBAND_ID=`head -n ${SLURM_ARRAY_TASK_ID} ${CHILD_IDS} | tail -n 1` # contains only the proband IDs (e.g. 107060)
# create the family folder for the results
FAM_DIR=${NHS_DIR}/${PLATE_ID}_${FAMILY_ID}
mkdir ${FAM_DIR}
# copy the VASE de novo variants in the proband VCF file
cp ${WORK_DIR}/VASE/${PLATE_ID}_${FAMILY_ID}.ready.denovo.vcf ${FAM_DIR}
# copy the DECIPHER-to-INTERNAL ID mapping
cp ${WORK_DIR}/DECIPHER_INTERNAL_IDs.txt ${FAM_DIR}
# copy the LOG files
cp ${WORK_DIR}/LOG/process_trio.*_${SLURM_ARRAY_TASK_ID}.err ${FAM_DIR}
cp ${WORK_DIR}/LOG/process_trio.*_${SLURM_ARRAY_TASK_ID}.out ${FAM_DIR}
# copy the G2P family html report
cp ${WORK_DIR}/G2P/${PLATE_ID}_${FAMILY_ID}_LOG_DIR/${PLATE_ID}_${FAMILY_ID}.report.html ${FAM_DIR}
# copy the DECIPHER file for bulk upload
cp ${WORK_DIR}/DECIPHER/${PROBAND_ID}_${FAMILY_ID}_DEC_FLT.csv ${FAM_DIR}
cp ${WORK_DIR}/DECIPHER/${PROBAND_ID}_${FAMILY_ID}_DECIPHER_v10.xlsx ${FAM_DIR}
# copy the variant snapshots
cp ${WORK_DIR}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}/*.png ${FAM_DIR}
# copy proband coverage files
cp ${WORK_DIR}/COV/${PROBAND_ID}_${FAMILY_ID}.DD15.COV.txt ${FAM_DIR}
cp ${WORK_DIR}/COV/${PROBAND_ID}_${FAMILY_ID}.REC_SNP_COV.txt ${FAM_DIR}
echo "OK: Results for ${FAMILY_ID} are stored in ${FAM_DIR}"