#!/bin/bash
#SBATCH --cpus-per-task=16
#SBATCH --mem=8GB
#SBATCH --time=48:00:00
#SBATCH --job-name=trio_whole_exome_bcbio
#SBATCH --output=trio_whole_exome_bcbio.%A_%a.out
#SBATCH --error=trio_whole_exome_bcbio.%A_%a.err

# Expects environment variables to be set
# PROJECT_ID - e.g. 12345_LastnameFirstname
# CONFIG_SH - absolute path to configuration script setting environment variables
# VERSION - e.g. v1, v2

source $CONFIG_SH

FAMILY_ID=`head -n $SLURM_ARRAY_TASK_ID $PARAMS_DIR/$PROJECT_ID.family_ids.txt | tail -n 1`

SHORT_PROJECT_ID=`echo $PROJECT_ID | cut -f 1 -d '_'`

CONFIG_FILE=$CONFIG_DIR/*_${FAMILY_ID}.yaml

mkdir -p $WORK_DIR/$FAMILY_ID
cd $WORK_DIR/$FAMILY_ID

bcbio_nextgen.py $CONFIG_FILE -n $SLURM_CPUS_PER_TASK -t local

DATE=$(basename `tail log/bcbio-nextgen.log | grep 'Storing in local filesystem' | tail -n 1 | awk '{ print $6 }' | perl -pe "s/_${SHORT_PROJECT_ID}.+//"`)

FAMILY_DIR=${DATE}_${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}

if [ -e $OUTPUT_DIR/$FAMILY_DIR ]
then
  for INDV in `cut -f 2 $OUTPUT_DIR/${SHORT_PROJECT_ID}_${VERSION}/params/${PROJECT_ID}_${FAMILY_ID}.ped`
  do
    mv $OUTPUT_DIR/$INDV $OUTPUT_DIR/$FAMILY_DIR/
  done

  # fix VCF output file names
  cd $OUTPUT_DIR/$FAMILY_DIR
  if [ ! -e ${FAMILY_ID}-gatk-haplotype-annotated.vcf.gz ]
  then
    PREFIX=`echo $FAMILY_ID | cut -d '_' -f 1`
    SUFFIX=`echo $FAMILY_ID | cut -d '_' -f 2`
    mv ${PREFIX}${SUFFIX}-gatk-haplotype-annotated.vcf.gz ${FAMILY_ID}-gatk-haplotype-annotated.vcf.gz
    mv ${PREFIX}${SUFFIX}-gatk-haplotype-annotated.vcf.gz.tbi ${FAMILY_ID}-gatk-haplotype-annotated.vcf.gz.tbi
  fi

  cd $OUTPUT_DIR
  mkdir -p ${SHORT_PROJECT_ID}_${VERSION}/families
  mv $FAMILY_DIR ${SHORT_PROJECT_ID}_${VERSION}/families/

else
  echo $OUTPUT_DIR/${DATE}_${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID} does not exist.
fi