#!/bin/bash
#PBS -l walltime=24:00:00
#PBS -l ncpus=1,mem=16gb
#PBS -q uv2000
#PBS -N go_quad
#PBS -j oe


# setup PATH
export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin
export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2



### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh, done previously by the stanard trio-based pipeline ###
BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis
WORK_DIR=$BASE/${PROJECT_ID}
VCF_DIR=${WORK_DIR}/VCF
PED_DIR=${WORK_DIR}/PED
LOG_DIR=${WORK_DIR}/LOG
G2P_DIR=${WORK_DIR}/G2P
VASE_DIR=${WORK_DIR}/VASE
COV_DIR=${WORK_DIR}/COV
DEC_DIR=${WORK_DIR}/DECIPHER
IGV_DIR=${DEC_DIR}/IGV
CNV_DIR=${WORK_DIR}/CNV
BAMOUT_DIR=${WORK_DIR}/BAMOUT
SCRIPTS_DIR=/home/u035/u035/shared/scripts



# other files to be used
TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20200601.plus15bp.merged.bed			# OK
CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20200601.clinvar.20200520.plus15bp.txt	# OK
BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt			# OK
TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt			# OK




### TOOLS ###
BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools
BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip
TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix
VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt
VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase
GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk							# points to ../share/gatk4-4.1.8.1-0/gatk
GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar
PYTHON3=/home/u035/u035/shared/software/bcbio/anaconda/bin/python3							# points to python3.6
PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7
VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep"	# points to ../share/ensembl-vep-100.4-0/vep
REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa





echo "SOURCE_DIR = ${SOURCE_DIR}"       # the general path to the source BAM files (VCF and PED already copied)		i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/
echo "BATCH_ID = ${BATCH_ID}"           # the ID of the batch being processed                                   	e.g. 11870_Germain_Lorna
echo "PLATE_ID = ${PLATE_ID}"           # the PCR plate ID of the batch being currently processed,              	e.g. 16862
echo "PROJECT_ID = ${PROJECT_ID}"       # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done
echo "VERSION_N = ${VERSION_N}"         # the version of the alignment and genotyping analysis
echo "FAMILY_ID = ${FAMILY_ID}"		# the family ID of this family with affected probands

echo "PRO_1_ID = ${PRO_1_ID}"		# the ID of the first affected proband
echo "PRO_2_ID = ${PRO_2_ID}"           # the ID of the second affected proband
echo "PAR_1_ID = ${PAR_1_ID}"           # the ID of the first unaffected parent
echo "PAR_2_ID = ${PAR_2_ID}"           # the ID of the second unaffected parent
 
echo "DECIPHER_ID = ${DECIPHER_ID}"	# the DECIPHER_ID for this family




# change to the LOG folder
cd ${LOG_DIR}




#########################################################################################################
###   check the PED file to make sure exactly 2 affected probands with exactly 2 unaffected parents   ###
#########################################################################################################

echo ""
echo ""
echo "checking the ${PED_DIR}/${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}.ped file..."

time ${PYTHON2} ${SCRIPTS_DIR}/NHS_WES_check_PED_quad.py ${PED_DIR}/${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}.ped

# check if the PED file checks were successful (python exit code = 0), if not exit the bash script
ret=$?
if [ $ret -ne 0 ]; then
     echo "...it appears that the PED file does not corresponds to a quad family !"
     echo "ERROR: Aborting the analysis"
     exit
fi
echo ""
echo ""





########################################################################
###        DNU and clean the the family VCF   			     ###
### format: ${PLATE_ID}_${FAMILY_ID}-gatk-haplotype-annotated.vcf.gz ###
########################################################################

echo ""
echo ""
echo "Performing DNU and cleaning of the ${PLATE_ID}_${FAMILY_ID}'s VCF file..."


time ${VT} decompose -s ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}-gatk-haplotype-annotated.vcf.gz -o ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.decomp.vcf.gz
time ${VT} normalize ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.decomp.vcf.gz -r ${REFERENCE_GENOME} -o ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.norm.vcf.gz
time ${VT} uniq ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.norm.vcf.gz -o ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.DNU.vcf.gz


# remove sites with AC=0
time ${BCFTOOLS} view --min-ac=1 --no-update ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.DNU.vcf.gz > ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.AC0.vcf

# reset GT to no-call if num_ALT < num_ALT_THERSH or VAF < VAF_THRESH and GT != 0/0
# exlude variants from the blacklist (matching on chr,pos,ref,alt)
time ${PYTHON2} ${SCRIPTS_DIR}/NHS_WES_filter_LQ_GT.py ${BLACKLIST} ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.AC0.vcf ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf

# bgzip and tabix it
time cat ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf | ${BGZIP} > ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz
time ${TABIX} -p vcf ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz

# delete intermediate files
rm ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}-gatk-haplotype-annotated.vcf.gz
rm ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.decomp.vcf.gz*
rm ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.norm.vcf.gz*
rm ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.DNU.vcf.gz*
rm ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.AC0.vcf


# to avoid bgzip pipe broken annoying, but not problematic message - skip the next step, the file will be used by G2P as IN_FILE and will be deleted last
# rm ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf


echo ""
echo ""
echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
echo "DNU, AC=0, num_ALT & VAF & blacklist cleaning and of the ${PLATE_ID}_${FAMILY_ID}'s VCF file: done"
echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
echo ""
echo ""



###########################################################
###     run G2P for each family VCF (DD genes)          ###
###     format: ${PLATE_ID}_${FAMILY_ID}.clean.vcf      ###
###########################################################


echo "Performing G2P analysis (DD genes)for FAMILY_ID = ${PLATE_ID}_${FAMILY_ID}..."
echo "Using DDG2P.01062020.csv"


IN_FILE=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf   
G2P_LOG_DIR=${G2P_DIR}/${PLATE_ID}_${FAMILY_ID}_LOG_DIR
mkdir ${G2P_LOG_DIR}
TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt
HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html
#VCF_KEYS='gnomADe|gnomADg'     # old VEP version 97
VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38'


time ${VEP} \
    -i ${IN_FILE} \
    --output_file ${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_inter_out.txt \
    --force_overwrite \
    --assembly GRCh38 \
    --fasta ${REFERENCE_GENOME} \
    --offline \
    --merged \
    --use_given_ref \
    --cache --cache_version 100 \
    --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \
    --individual all \
    --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.01062020.txt" \
    --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \
    --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}


echo ""
echo ""
echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
echo "G2P analysis of FAMILY_ID = ${PLATE_ID}_${FAMILY_ID}: done"
echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
echo ""
echo ""




#################################################################################################################################################
###          run coverage for each proband (DD genes)                                                                                         ###
###   format: ${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}/${PROBAND_ID}_${FAMILY_ID}/${PROBAND_ID}_${FAMILY_ID}-ready.bam   ###
#################################################################################################################################################


# from the VCF, get all IDs (in the format probad_family_id), they are all affected probands (already checked at the start)

file=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz

# INDI_ID is in format ${PROBAND_ID}_${FAMILY_ID}

for INDI_ID in `${BCFTOOLS} query -l $file`; do

    #################################
    #####    for each proband    ####
    #################################

    echo "Performing coverage analysis for PROBAND_ID = ${INDI_ID} ...."

    # make sure we are reading the data from the exact batch & plate ID
    BAM_FILE=${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}/${INDI_ID}/${INDI_ID}-ready.bam
    OUT_FILE=${COV_DIR}/${INDI_ID}.DD15


    time java -Xmx8g -jar ${GATK3} -T DepthOfCoverage -R ${REFERENCE_GENOME} -o ${OUT_FILE} -I ${BAM_FILE} -L ${TARGETS} \
        --omitDepthOutputAtEachBase \
        --minBaseQuality 20 \
        --minMappingQuality 20 \
        -ct 20 \
        -jdk_deflater \
        -jdk_inflater \
        --allow_potentially_misencoded_quality_scores

    echo ""
    echo ""
    echo "----------------------------------------------------------------------------------------------------"
    echo "percentage of DD exons (+/-15bp) covered at least 20x in PROBAND_ID = ${INDI_ID} ..."
    cat ${COV_DIR}/${INDI_ID}.DD15.sample_summary | awk '{print $7}'
    echo "----------------------------------------------------------------------------------------------------"

    # now compute the coverage per DD exon (+/-15bp) interval, adding the number of P/LP ClinVar variants (assertion criteria provided) in each interval
    time ${PYTHON2} ${SCRIPTS_DIR}/NHS_WES_generate_coverage_result_file.py ${COV_DIR}/${INDI_ID}.DD15.sample_interval_summary ${CLINVAR} ${COV_DIR}/${INDI_ID}.DD15.COV.txt

    echo ""
    echo ""
    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
    echo "Coverage analysis of PROBAND_ID = ${INDI_ID}: done    "
    echo "    Coverage file = ${COV_DIR}/${INDI_ID}.DD15.COV.txt"
    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
    echo ""
    echo ""

done


..............................


###################################################################################
###      for each proband generate the DECIPHER file                            ###
###  ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz - the cleaned family VCF  ###
###  ${TRANS_MAP} - the current transcript mapping file                         ###
###################################################################################


echo "Generating the DECIPHER file for all probands in ${FAMILY_ID} ..."


# first, split the family VCF to individual VCFs
# -c1:  minimum allele count (INFO/AC) of sites to be printed
# split multi-allelic sites (by -m -any)
# left-alignment and normalization (by adding the -f)

file=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz
echo "splitting $file"
for indi in `${BCFTOOLS} query -l $file`; do
    ${BCFTOOLS} view -c1 -Oz -s $indi -o ${file/.vcf*/.$indi.rough.vcf.gz} $file
    ${BCFTOOLS} norm -f ${REFERENCE_GENOME} -m -any -Oz -o ${file/.vcf*/.$indi.vcf.gz} ${file/.vcf*/.$indi.rough.vcf.gz}
    rm ${file/.vcf*/.$indi.rough.vcf.gz}
done



# create the names of the needed files
PED_FILE=${PED_DIR}/${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}.ped
IN_G2P_FILE=${G2P_DIR}/${PLATE_ID}_${FAMILY_ID}_LOG_DIR/${PLATE_ID}_${FAMILY_ID}.report.txt
FAM_IGV_DIR=${IGV_DIR}/${PLATE_ID}_${FAMILY_ID}
FAM_BAM_DIR=${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}


## call the python scrpit
time ${PYTHON2} ${SCRIPTS_DIR}/NHS_WES_generate_DEC_IGV_aff_probands.py \
${DECIPHER_ID} \
${TRANS_MAP} \
${PED_FILE} \
${IN_G2P_FILE} \
${FAM_IGV_DIR} \
${VCF_DIR} \
${PLATE_ID} \
${FAMILY_ID} \
${DEC_DIR} \
${FAM_BAM_DIR}



#############################################################################################################################
## using the DECIPHER bulk upload file v9 for each proband --> generate the DECIPHER bulk upload file v10 for each proband ##
#############################################################################################################################
# from the VCF, get all IDs (in the format probad_family_id), they are all affected probands (already checked at the start)
# INDI_ID is in format ${PROBAND_ID}_${FAMILY_ID}

file=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz
for INDI_ID in `${BCFTOOLS} query -l $file`; do
    #################################
    #####    for each proband    ####
    #################################
    echo "...Generating v10 Decipher bulk upload file for proband = ${INDI_ID} ...."
    time ${PYTHON3} ${SCRIPTS_DIR}/convert_DEC_to_v10.py ${DEC_DIR} ${INDI_ID}
done


echo ""
echo ""
echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
echo "DECIPHER analysis of all probands in ${FAMILY_ID}: done"
echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
echo ""
echo ""


rm ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf




##############################################################################################
###  for each of the affected probands                                                     ###
###  for each variant in the DECIPHER upload file (only the shared variants)               ###
###  generate a IGV snapshot based on the realigned BAM used by GATK for calling variants  ###
###  first, generate BAMOUTs for each variant (to be stored in the BAMOUT folder)          ###
###  then, generate a batch script for IGV to produce the snapshots based on the BAMOUTs   ###
##############################################################################################

echo ""
echo ""
echo "Generating the BAMOUT files for the ${FAMILY_ID} family ...."


# get the ids of the affected probands to generate the part of the command line which refers to the BAM files to by analysed

#++++++++++++++++
#arr=() 	Create an empty array
#arr+=(4) 	Append value(s)
#${arr[2]} 	Retrieve third element
#${arr[@]} 	Retrieve all elements
#${!arr[@]} 	Retrieve array indices
#${#arr[@]} 	Calculate array size

#for key in "${!ary[@]}"; do echo "$key ${ary[$key]}"; done

#https://opensource.com/article/18/5/you-dont-know-bash-intro-bash-arrays

# https://unix.stackexchange.com/questions/105517/concatenating-string-variable-inside-a-for-loop-in-the-bash-shell/105522

#VAR=""
#for ELEMENT in 'Hydrogen' 'Helium' 'Lithium' 'Beryllium'; do
#  VAR+="${ELEMENT} "
#done

#echo "$VAR"

#https://linuxize.com/post/bash-concatenate-strings/
#+++++++++++++++++++++++


file=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz
# INDI_ID is in format ${PROBAND_ID}_${FAMILY_ID}
aff_pro_arr=()
for INDI_ID in `${BCFTOOLS} query -l $file`; do
    aff_pro_arr+=(${INDI_ID})
done
echo "  Found ${#aff_pro_arr[@]} affected probands in ${FAMILY_ID} for which BAMOUTs to be generated"

for key in "${!aff_pro_arr[@]}"; do 
  echo "    ${aff_pro_arr[$key]}"; 
  INPUT_BAM_LINE=${INPUT_BAM_LINE}" --input ${SOURCE_DIR}/????-??-??_${VERSION_N}_${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}/${aff_pro_arr[$key]}/${aff_pro_arr[$key]}-ready.bam "
done

#echo ${INPUT_BAM_LINE}
   



# now go over the shared G2P variants in the for which to generate bamouts
# the variants should be identical in the DECIPHER files for all affected individuals, pick the first individual
# chr is the second column - need to add the 'chr' prefix
# pos is the third column
# the first line is a header line, starting with 'Internal reference number or ID'
# file called: ${DEC_DIR}/<proband_id>_<fam_id>_DEC_FLT.csv
# and for each run GATK to generate the bamout files
# to be stored in ${BAMOUT_DIR}/${FAMILY_ID}

mkdir ${BAMOUT_DIR}/${FAMILY_ID}

var_file=${DEC_DIR}/${aff_pro_arr[0]}_DEC_FLT.csv
echo "... reading the shared variants in ${var_file} to generate the bamouts for each ..."


grep -v '^Internal' ${var_file} |
while IFS= read -r line
do
  echo ""
  echo ""
  echo ""
  echo "$line"
  IFS=, read -ra ary <<<"$line"
#  for key in "${!ary[@]}"; do echo "$key ${ary[$key]}"; done
  chr=${ary[1]}
  pos=${ary[2]}
  ref=${ary[4]}
  alt=${ary[5]}
  echo " --> chr = $chr, pos = $pos, ref = ${ref}, alt = ${alt}"

  # generate the bamout file
  echo "...doing the bamout"
  echo "   time ${GATK4} HaplotypeCaller --reference ${REFERENCE_GENOME}" ${INPUT_BAM_LINE}" -L chr${chr}:${pos} --interval-padding 500 --active-probability-threshold 0.000 -ploidy 2 --output ${BAMOUT_DIR}/${FAMILY_ID}/${FAMILY_ID}_chr${chr}_${pos}.bamout.vcf -bamout ${BAMOUT_DIR}/${FAMILY_ID}/${FAMILY_ID}_chr${chr}_${pos}.bamout.bam"

  time ${GATK4} HaplotypeCaller --reference ${REFERENCE_GENOME} ${INPUT_BAM_LINE} -L chr${chr}:${pos} --interval-padding 500 \
  --active-probability-threshold 0.000 -ploidy 2 \
  --output ${BAMOUT_DIR}/${FAMILY_ID}/${FAMILY_ID}_chr${chr}_${pos}.bamout.vcf -bamout ${BAMOUT_DIR}/${FAMILY_ID}/${FAMILY_ID}_chr${chr}_${pos}.bamout.bam

done





##############################################################################################
## write the IGV batch file for each affected individual in this family based on the bamouts #
## to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt #
## ${PROBAND_ID}_${FAMILY_ID} == ${aff_pro_arr[$key] #
##################################################################

for key in "${!aff_pro_arr[@]}"; do
  echo ""
  echo "" 
  echo "Generating the IGV batch file for ${aff_pro_arr[$key]}";

  snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.snapshot.txt

  # check if previous version exist, if so - delete it
  if [ -f "${snap_file}" ]; then
    echo "previous version of ${snap_file} exist --> deleted"
    rm ${snap_file}
  fi

  # write the header for the IGV batch file
  echo "new" >> ${snap_file}
  echo "genome hg38" >> ${snap_file}
  echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file}
  echo "" >> ${snap_file}


  # now, go again over the variants in this individual's DECIPHER file and generate one snapshot file for all the variants
  var_file=${DEC_DIR}/${aff_pro_arr[$key]}_DEC_FLT.csv
  echo "... reading ${var_file} to generate the IGV batch file using the bamouts..."

  grep -v '^Internal' ${var_file} |
  while IFS= read -r line
  do
    IFS=, read -ra ary <<<"$line"
    chr=${ary[1]}
    pos=${ary[2]}
    ref=${ary[4]}
    alt=${ary[5]}
    left=$((${pos}-25))
    right=$((${pos}+25))

    echo "new" >> ${snap_file}
    echo "load ${BAMOUT_DIR}/${FAMILY_ID}/${FAMILY_ID}_chr${chr}_${pos}.bamout.bam" >> ${snap_file}
    echo "preference SAM.SHADE_BASE_QUALITY true" >> ${snap_file}

    echo "goto chr${chr}:${left}-${right}" >> ${snap_file}
    echo "group SAMPLE" >> ${snap_file}
    echo "sort base" >> ${snap_file}
    echo "squish" >> ${snap_file}
    echo "snapshot bamout_${aff_pro_arr[$key]}_${FAMILY_ID}_chr${chr}_${pos}_${ref}_${alt}.png" >> ${snap_file}
    echo "" >> ${snap_file}
    echo "" >> ${snap_file}

  done

  echo "Generating of the IGV batch file based on bamouts for ${aff_pro_arr[$key]}- done!"
  echo "snap_file = ${snap_file}"

done

echo ""
echo ""
echo ""
echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
echo "+++   Variant prioritization of family ${FAMILY_ID} completed   +++"
echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"