#!/bin/bash #SBATCH --cpus-per-task=16 #SBATCH --mem=8GB #SBATCH --time=48:00:00 #SBATCH --job-name=trio_whole_exome_bcbio #SBATCH --output=trio_whole_exome_bcbio.%A_%a.out #SBATCH --error=trio_whole_exome_bcbio.%A_%a.err # Expects environment variables to be set # PROJECT_ID - e.g. 12345_LastnameFirstname # CONFIG_SH - absolute path to configuration script setting environment variables # VERSION - e.g. v1, v2 source $CONFIG_SH FAMILY_ID=`head -n $SLURM_ARRAY_TASK_ID $PARAMS_DIR/$PROJECT_ID.family_ids.txt | tail -n 1` SHORT_PROJECT_ID=`echo $PROJECT_ID | cut -f 1 -d '_'` CONFIG_FILE=$CONFIG_DIR/*_${FAMILY_ID}.yaml mkdir -p $WORK_DIR/$FAMILY_ID cd $WORK_DIR/$FAMILY_ID bcbio_nextgen.py $CONFIG_FILE -n $SLURM_CPUS_PER_TASK -t local DATE=$(basename `tail log/bcbio-nextgen.log | grep 'Storing in local filesystem' | tail -n 1 | awk '{ print $6 }' | perl -pe "s/_${SHORT_PROJECT_ID}.+//"`) FAMILY_DIR=${DATE}_${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID} if [ -e $OUTPUT_DIR/$FAMILY_DIR ] then for INDV in `cut -f 2 $OUTPUT_DIR/${SHORT_PROJECT_ID}_${VERSION}/params/${PROJECT_ID}_${FAMILY_ID}.ped` do mv $OUTPUT_DIR/$INDV $OUTPUT_DIR/$FAMILY_DIR/ done # fix VCF output file names cd $OUTPUT_DIR/$FAMILY_DIR if [ ! -e ${FAMILY_ID}-gatk-haplotype-annotated.vcf.gz ] then PREFIX=`echo $FAMILY_ID | cut -d '_' -f 1` SUFFIX=`echo $FAMILY_ID | cut -d '_' -f 2` mv ${PREFIX}${SUFFIX}-gatk-haplotype-annotated.vcf.gz ${FAMILY_ID}-gatk-haplotype-annotated.vcf.gz mv ${PREFIX}${SUFFIX}-gatk-haplotype-annotated.vcf.gz.tbi ${FAMILY_ID}-gatk-haplotype-annotated.vcf.gz.tbi fi cd $OUTPUT_DIR mkdir -p ${SHORT_PROJECT_ID}_${VERSION}/families mv $FAMILY_DIR ${SHORT_PROJECT_ID}_${VERSION}/families/ else echo $OUTPUT_DIR/${DATE}_${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID} does not exist. fi