diff --git a/delete_BAM.sh b/delete_BAM.sh
new file mode 100755
index 0000000000000000000000000000000000000000..c765ff4a338c9a576e5afca7397f4faf8b7d8800
--- /dev/null
+++ b/delete_BAM.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=2GB
+#SBATCH --time=01:00:00
+#SBATCH --job-name=delete_bam
+#SBATCH --output=delete_bam.%A_%a.out
+#SBATCH --error=delete_bam.%A_%a.err
+
+
+### Setup the folder structure for the downstream analysis###
+BASE=/home/u035/u035/shared/analysis/work
+WORK_DIR=$BASE/${PROJECT_ID}
+VCF_DIR=${WORK_DIR}/VCF
+PED_DIR=${WORK_DIR}/PED
+LOG_DIR=${WORK_DIR}/LOG
+G2P_DIR=${WORK_DIR}/G2P
+VASE_DIR=${WORK_DIR}/VASE
+COV_DIR=${WORK_DIR}/COV
+DEC_DIR=${WORK_DIR}/DECIPHER
+IGV_DIR=${DEC_DIR}/IGV
+CNV_DIR=${WORK_DIR}/CNV
+BAMOUT_DIR=${WORK_DIR}/BAMOUT
+SCRIPTS_DIR=/home/u035/u035/shared/scripts
+
+
+
+echo "SOURCE_DIR = ${SOURCE_DIR}"       # the general path to the source VCF, BAM and PED files                 i.e. /home/u035/u035/shared/results
+echo "BATCH_ID = ${BATCH_ID}"           # the ID of the batch being processed                                   e.g. 19650_Ansari_Morad
+echo "BATCH_NUM = ${BATCH_NUM}"         # the numerical part of the BATCH_ID                                    e.g. 19650
+echo "PLATE_ID = ${PLATE_ID}"           # the PCR plate ID of the batch being currently processed,              e.g. 19285
+echo "PROJECT_ID = ${PROJECT_ID}"       # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done
+echo "VERSION_N = ${VERSION_N}"         # the version of the alignment and genotyping analysis
+
+
+
+
+
+
+
+##############################################################
+###   Delete indivdual BAMs (and indexes) iff CRAM found   ###
+##############################################################
+
+# make sure we are reading the data from the exact version, batch & plate ID
+SOURCE_VCF_DIRS=${SOURCE_DIR}/${BATCH_NUM}_${VERSION_N}/families/????-??-??_${BATCH_NUM}_${VERSION_N}_${PLATE_ID}_*
+
+
+for S_VCF_DIR in ${SOURCE_VCF_DIRS}
+do
+  VCF_DIR_NAME="${S_VCF_DIR##*/}"
+  IFS=_ read -ra my_arr <<< "${VCF_DIR_NAME}"
+  FAM_ID=${my_arr[-1]}
+  echo "  FAM_ID = ${FAM_ID}"
+
+  # identify all folders (one for each individual) for this family containing cram/bam files (format: <INDI_ID>_<FAM_ID>)
+  cd ${SOURCE_DIR}/${BATCH_NUM}_${VERSION_N}/families/????-??-??_${BATCH_NUM}_${VERSION_N}_${PLATE_ID}_${FAM_ID}
+  for ITEM in `ls -l`
+  do
+    if test -d $ITEM && [[ "$ITEM" == *"_"* ]]
+    then
+      echo "    $ITEM is a CRAM/BAM folder..."
+      BAM=${ITEM}/${ITEM}-ready.bam
+      CRAM=${ITEM}/${ITEM}-ready.cram
+
+      #  check if the CRAM file exists, iff yes, delete the BAM file and its index
+      if [[ -f "$CRAM" ]]
+      then
+        echo "      Found ${CRAM}"
+        echo "      Removing ${BAM}"
+        rm ${BAM}
+        echo "      Removing ${BAM}.bai"
+        rm ${BAM}.bai  
+      else
+        echo "      ERROR: CRAM file ${CRAM} not found - have not deleted BAM ${BAM}!"
+      fi
+    fi
+  done
+done
+
+
+
+
+echo ""
+echo ""
+echo "OK: Deletion of BAM files and their indexes for PROJECT_ID = $PROJECT_ID successful"
+
+
+
+
+
+
+
+
+
+
diff --git a/generate_DEC_IGV_aff_sib_scripts_from_quad.py b/generate_DEC_IGV_aff_sib_scripts_from_quad.py
index 728882d76752cc7f30f61cf176ad316918804206..7fbf4bff32fd6943a669ee861b8ee7a03d280aa1 100755
--- a/generate_DEC_IGV_aff_sib_scripts_from_quad.py
+++ b/generate_DEC_IGV_aff_sib_scripts_from_quad.py
@@ -12,7 +12,7 @@
 #		all G2P variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: SEPT 16, 2020
+#       last modified: DEC 07, 2021
 
 
 
@@ -33,11 +33,13 @@ ACCESS = 'No'
 TRANS_DICT = {}				# key: transcriptID not found in DECIPHER; value: the chosen replacement transcriptID from those available in DECIPHER
 KIDS_SEX_DICT = {}			# key: <indi_fam_id>; value: sex (in the format 46XX/46XY)
 KIDS_G2P_DICT = defaultdict(dict)	# 1st level key: <indi_fam_id>; 2nd level key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
-KIDS_VCF_DICT = defaultdict(dict)	# 1st level key: <indi_fam_id>; 2nd level key: chr:pos:ref:alt; value: irrelevant
+KIDS_VCF_DICT = defaultdict(dict)	# 1st level key: <indi_fam_id>; 2nd level key: chr:pos:ref:alt; value: (FS,SOR)
 SHARED_DICT = {}			# key: chr:start:ref:alt; value: (ZYG,gene,trans)
 NUM_SHARED_G2P_VARS = 0
 SNAP_FLANK = 25
 
+FS_THRESH = float(60)
+SOR_THRESH = float(3)
 
 
 
@@ -118,9 +120,11 @@ def go(dec_id,trans_map_file,ped_file,in_g2p_file,fam_igv_dir,vcf_dir,plate_id,f
         # SHARED_DICT = {}                        # key: chr:start:ref:alt; value: (ZYG,gene,trans)
 
         pro_vcf_vars = KIDS_VCF_DICT[pro_id]
-        for pro_vcf_var,rub in pro_vcf_vars.iteritems():
+        for pro_vcf_var,fs_sor in pro_vcf_vars.iteritems():
             chr,pos,ref,alt = pro_vcf_var.split(':')
             pos = int(pos)
+            FS = fs_sor[0]
+            SOR = fs_sor[1]
 
             # adjust pro_vcf_var for indels to match G2P style of recording
             if len(ref) == len(alt):							# SNP
@@ -210,7 +214,16 @@ def go(dec_id,trans_map_file,ped_file,in_g2p_file,fam_igv_dir,vcf_dir,plate_id,f
             # write to the IGV file
             i_s = pos - SNAP_FLANK
             i_e = pos + SNAP_FLANK
-            i_name = '%s_shared_%s_%s_%s_%s.png' % (pro_id,chr,pos,ref,alt)
+
+            # check if above FS/SOR_THRESH to include in the snapshot name
+            if (FS == '') or (SOR == ''):
+                flag = 'NA'
+            elif (FS >= FS_THRESH) and (SOR >= SOR_THRESH):
+                flag = 'FS_%.1f_SOR_%.1f' % (FS,SOR)
+            else:
+                flag = 'OK'
+            i_name = '%s_%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt,flag)
+
             out_igv_han.write('goto %s:%s-%s\n' % (chr,i_s,i_e))
             out_igv_han.write('sort strand\n')
             out_igv_han.write('squish\n')
@@ -249,6 +262,18 @@ def read_all_VCF_vars(in_vcf_file,THIS_DICT,pro_id):
         ref = data[3]
         alt = data[4]
 
+        # extract FS and SOR
+        FS = ''
+        SOR = ''
+        infos = [y.strip() for y in data[7].strip().split(';')]
+        for info in infos:
+            if info.startswith('FS='):
+                tag,FS = info.split('=')
+                FS = float(FS)
+            elif info.startswith('SOR='):
+                tag,SOR = info.split('=')
+                SOR = float(SOR)
+
         # did the splitting and normalizing - should not have multiallelic variants
         if alt.find(',') != -1:
             print "ERROR: found multiallelic variant"
@@ -258,9 +283,9 @@ def read_all_VCF_vars(in_vcf_file,THIS_DICT,pro_id):
         key = '%s:%s:%s:%s' % (chr,pos,ref,alt)
 
         if pro_id not in THIS_DICT:
-            THIS_DICT[pro_id][key] = 1
+            THIS_DICT[pro_id][key] = (FS,SOR)
         elif key not in THIS_DICT[pro_id]:
-            THIS_DICT[pro_id][key] = 1
+            THIS_DICT[pro_id][key] = (FS,SOR)
         else:
             print "ERROR: duplicate key = %s in %s" % (key,in_vcf_file)
             raise SystemExit
diff --git a/generate_DEC_IGV_scripts.py b/generate_DEC_IGV_scripts.py
index b42a82f99595566df1ebb15a256c6ea76fac8ef8..3c3ced5a886a19895491486d6a2f2085b055d588 100755
--- a/generate_DEC_IGV_scripts.py
+++ b/generate_DEC_IGV_scripts.py
@@ -15,7 +15,7 @@
 #		all VASE denovo variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: DEC 05, 2019
+#       last modified: DEC 03, 2021
 
 
 
@@ -65,6 +65,8 @@ MAP_DICT = {}			# key: family_id (aka decipher_id); value: internal (decipher) I
 TRANS_DICT = {}			# key: transcriptID not found in DECIPHER; value: the chosen replacement transcriptID from those available in DECIPHER
 
 
+FS_THRESH = float(60)
+SOR_THRESH = float(3)
 
 
 
@@ -194,6 +196,18 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
         ref = data[3]
         alt = data[4]
 
+        # extract FS and SOR
+        FS = ''
+        SOR = ''
+        infos = [y.strip() for y in data[7].strip().split(';')]
+        for info in infos:
+            if info.startswith('FS='):
+                tag,FS = info.split('=')
+                FS = float(FS)
+            elif info.startswith('SOR='):
+                tag,SOR = info.split('=')
+                SOR = float(SOR)
+
         VCF_VAR = data[9]
 
         key = '%s:%s:%s:%s' % (chr,pos,ref,alt)
@@ -283,7 +297,16 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
                 # write to the IGV file
                 i_s = pos - SNAP_FLANK
                 i_e = pos + SNAP_FLANK
-                i_name = '%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt)
+
+                # check if above FS/SOR_THRESH to include in the snapshot name
+                if (FS == '') or (SOR == ''):
+                    flag = 'NA'
+                elif (FS >= FS_THRESH) and (SOR >= SOR_THRESH):
+                    flag = 'FS_%.1f_SOR_%.1f' % (FS,SOR)
+                else:
+                    flag = 'OK'
+                i_name = '%s_%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt,flag)
+
                 out_igv_han.write('goto %s:%s-%s\n' % (chr,i_s,i_e))
                 out_igv_han.write('sort strand\n')
                 out_igv_han.write('squish\n')
@@ -370,7 +393,16 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
                 # write to the IGV file
                 i_s = pos - SNAP_FLANK
                 i_e = pos + SNAP_FLANK
-                i_name = '%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt)
+
+                # check if above FS/SOR_THRESH to include in the snapshot name
+                if (FS == '') or (SOR == ''):
+                    flag = 'NA'
+                elif (FS >= FS_THRESH) and (SOR >= SOR_THRESH):
+                    flag = 'FS_%.1f_SOR_%.1f' % (FS,SOR)
+                else:
+                    flag = 'OK'
+                i_name = '%s_%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt,flag)
+
                 out_igv_han.write('goto %s:%s-%s\n' % (chr,i_s,i_e))
                 out_igv_han.write('sort strand\n')
                 out_igv_han.write('squish\n')
@@ -458,7 +490,16 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
                 # write to the IGV file
                 i_s = pos - SNAP_FLANK
                 i_e = pos + SNAP_FLANK
-                i_name = '%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt)
+
+                # check if above FS/SOR_THRESH to include in the snapshot name
+                if (FS == '') or (SOR == ''):
+                    flag = 'NA'
+                elif (FS >= FS_THRESH) and (SOR >= SOR_THRESH):
+                    flag = 'FS_%.1f_SOR_%.1f' % (FS,SOR)
+                else:
+                    flag = 'OK'
+                i_name = '%s_%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt,flag)
+
                 out_igv_han.write('goto %s:%s-%s\n' % (chr,i_s,i_e))
                 out_igv_han.write('sort strand\n')
                 out_igv_han.write('squish\n')
diff --git a/generate_DEC_IGV_shared_scripts.py b/generate_DEC_IGV_shared_scripts.py
index eb2b1ece3e52e0b33293e964b67ee271cbe69cc4..96c9f5e4b4204e85a16e32c8d0f59290a517cf4b 100755
--- a/generate_DEC_IGV_shared_scripts.py
+++ b/generate_DEC_IGV_shared_scripts.py
@@ -12,7 +12,7 @@
 #		all G2P variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: MAR 23, 2020
+#       last modified: DEC 07, 2021
 
 
 
@@ -33,12 +33,15 @@ ACCESS = 'No'
 TRANS_DICT = {}				# key: transcriptID not found in DECIPHER; value: the chosen replacement transcriptID from those available in DECIPHER
 KIDS_SEX_DICT = {}			# key: <indi_fam_id>; value: sex (in the format 46XX/46XY)
 KIDS_G2P_DICT = defaultdict(dict)	# 1st level key: <indi_fam_id>; 2nd level key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
-KIDS_VCF_DICT = defaultdict(dict)	# 1st level key: <indi_fam_id>; 2nd level key: chr:pos:ref:alt; value: irrelevant
+KIDS_VCF_DICT = defaultdict(dict)	# 1st level key: <indi_fam_id>; 2nd level key: chr:pos:ref:alt; value: (FS,SOR)
 SHARED_DICT = {}			# key: chr:start:ref:alt; value: (ZYG,gene,trans)
 NUM_SHARED_G2P_VARS = 0
 SNAP_FLANK = 25
 
 
+FS_THRESH = float(60)
+SOR_THRESH = float(3)
+
 
 
 ### call the python scrpit
@@ -113,9 +116,11 @@ def go(dec_id,trans_map_file,ped_file,in_g2p_file,fam_igv_dir,vcf_dir,plate_id,f
         # SHARED_DICT = {}                        # key: chr:start:ref:alt; value: (ZYG,gene,trans)
 
         pro_vcf_vars = KIDS_VCF_DICT[pro_id]
-        for pro_vcf_var,rub in pro_vcf_vars.iteritems():
+        for pro_vcf_var,fs_sor in pro_vcf_vars.iteritems():
             chr,pos,ref,alt = pro_vcf_var.split(':')
             pos = int(pos)
+            FS = fs_sor[0]
+            SOR = fs_sor[1]
 
             # adjust pro_vcf_var for indels to match G2P style of recording
             if len(ref) == len(alt):							# SNP
@@ -205,7 +210,16 @@ def go(dec_id,trans_map_file,ped_file,in_g2p_file,fam_igv_dir,vcf_dir,plate_id,f
             # write to the IGV file
             i_s = pos - SNAP_FLANK
             i_e = pos + SNAP_FLANK
-            i_name = '%s_%s_%s_%s_%s.png' % (pro_id,chr,pos,ref,alt)
+
+            # check if above FS/SOR_THRESH to include in the snapshot name
+            if (FS == '') or (SOR == ''):
+                flag = 'NA'
+            elif (FS >= FS_THRESH) and (SOR >= SOR_THRESH):
+                flag = 'FS_%.1f_SOR_%.1f' % (FS,SOR)
+            else:
+                flag = 'OK'
+            i_name = '%s_%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt,flag)
+
             out_igv_han.write('goto %s:%s-%s\n' % (chr,i_s,i_e))
             out_igv_han.write('sort strand\n')
             out_igv_han.write('squish\n')
@@ -244,6 +258,18 @@ def read_all_VCF_vars(in_vcf_file,THIS_DICT,pro_id):
         ref = data[3]
         alt = data[4]
 
+        # extract FS and SOR
+        FS = ''
+        SOR = ''
+        infos = [y.strip() for y in data[7].strip().split(';')]
+        for info in infos:
+            if info.startswith('FS='):
+                tag,FS = info.split('=')
+                FS = float(FS)
+            elif info.startswith('SOR='):
+                tag,SOR = info.split('=')
+                SOR = float(SOR)
+
         # did the splitting and normalizing - should not have multiallelic variants
         if alt.find(',') != -1:
             print "ERROR: found multiallelic variant"
@@ -253,9 +279,9 @@ def read_all_VCF_vars(in_vcf_file,THIS_DICT,pro_id):
         key = '%s:%s:%s:%s' % (chr,pos,ref,alt)
 
         if pro_id not in THIS_DICT:
-            THIS_DICT[pro_id][key] = 1
+            THIS_DICT[pro_id][key] = (FS,SOR)
         elif key not in THIS_DICT[pro_id]:
-            THIS_DICT[pro_id][key] = 1
+            THIS_DICT[pro_id][key] = (FS,SOR)
         else:
             print "ERROR: duplicate key = %s in %s" % (key,in_vcf_file)
             raise SystemExit
diff --git a/generate_DEC_IGV_trio_scripts_from_quad.py b/generate_DEC_IGV_trio_scripts_from_quad.py
index 2537002b64c6c451c5ff4f692216cdbc7794934d..3afdd794f3e4a95a9abc32baa77d6a22f83133c0 100755
--- a/generate_DEC_IGV_trio_scripts_from_quad.py
+++ b/generate_DEC_IGV_trio_scripts_from_quad.py
@@ -15,7 +15,7 @@
 #		all VASE denovo variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: DEC 05, 2019
+#       last modified: DEC 07, 2021
 
 
 
@@ -65,6 +65,8 @@ MAP_DICT = {}			# key: family_id (aka decipher_id); value: internal (decipher) I
 TRANS_DICT = {}			# key: transcriptID not found in DECIPHER; value: the chosen replacement transcriptID from those available in DECIPHER
 
 
+FS_THRESH = float(60)
+SOR_THRESH = float(3)
 
 
 
@@ -217,6 +219,18 @@ def go(dec_id,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_d
         ref = data[3]
         alt = data[4]
 
+        # extract FS and SOR
+        FS = ''
+        SOR = ''
+        infos = [y.strip() for y in data[7].strip().split(';')]
+        for info in infos:
+            if info.startswith('FS='):
+                tag,FS = info.split('=')
+                FS = float(FS)
+            elif info.startswith('SOR='):
+                tag,SOR = info.split('=')
+                SOR = float(SOR)
+
         VCF_VAR = data[9]
 
         key = '%s:%s:%s:%s' % (chr,pos,ref,alt)
@@ -306,7 +320,16 @@ def go(dec_id,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_d
                 # write to the IGV file
                 i_s = pos - SNAP_FLANK
                 i_e = pos + SNAP_FLANK
-                i_name = '%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt)
+
+                # check if above FS/SOR_THRESH to include in the snapshot name
+                if (FS == '') or (SOR == ''):
+                    flag = 'NA'
+                elif (FS >= FS_THRESH) and (SOR >= SOR_THRESH):
+                    flag = 'FS_%.1f_SOR_%.1f' % (FS,SOR)
+                else:
+                    flag = 'OK'
+                i_name = '%s_%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt,flag)
+
                 out_igv_han.write('goto %s:%s-%s\n' % (chr,i_s,i_e))
                 out_igv_han.write('sort strand\n')
                 out_igv_han.write('squish\n')
@@ -393,7 +416,16 @@ def go(dec_id,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_d
                 # write to the IGV file
                 i_s = pos - SNAP_FLANK
                 i_e = pos + SNAP_FLANK
-                i_name = '%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt)
+
+                # check if above FS/SOR_THRESH to include in the snapshot name
+                if (FS == '') or (SOR == ''):
+                    flag = 'NA'
+                elif (FS >= FS_THRESH) and (SOR >= SOR_THRESH):
+                    flag = 'FS_%.1f_SOR_%.1f' % (FS,SOR)
+                else:
+                    flag = 'OK'
+                i_name = '%s_%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt,flag)
+
                 out_igv_han.write('goto %s:%s-%s\n' % (chr,i_s,i_e))
                 out_igv_han.write('sort strand\n')
                 out_igv_han.write('squish\n')
@@ -481,7 +513,16 @@ def go(dec_id,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_d
                 # write to the IGV file
                 i_s = pos - SNAP_FLANK
                 i_e = pos + SNAP_FLANK
-                i_name = '%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt)
+
+                # check if above FS/SOR_THRESH to include in the snapshot name
+                if (FS == '') or (SOR == ''):
+                    flag = 'NA'
+                elif (FS >= FS_THRESH) and (SOR >= SOR_THRESH):
+                    flag = 'FS_%.1f_SOR_%.1f' % (FS,SOR)
+                else:
+                    flag = 'OK'
+                i_name = '%s_%s_%s_%s_%s_%s.png' % (CHILD_ID,chr,pos,ref,alt,flag)
+
                 out_igv_han.write('goto %s:%s-%s\n' % (chr,i_s,i_e))
                 out_igv_han.write('sort strand\n')
                 out_igv_han.write('squish\n')
diff --git a/process_trio.sh b/process_trio.sh
index cb18c41d6c2c0e625bc8658cadb5e31fd73dbdda..5cbe9b40d3f86bc2cdfbc07e8bcba1f9333797cf 100755
--- a/process_trio.sh
+++ b/process_trio.sh
@@ -367,6 +367,7 @@ FAM_BAM_DIR=${SOURCE_DIR}/${BATCH_NUM}_${VERSION_N}/families/????-??-??_${BATCH_
 
 
 ## call the python scrpit
+#~~#time ${PYTHON2} /home/u035/u035/shared/temp/generate_DEC_IGV_scripts.py \
 time ${PYTHON2} ${SCRIPTS_DIR}/generate_DEC_IGV_scripts.py \
 ${DEC_MAP} \
 ${TRANS_MAP} \
diff --git a/trio_cram_setup.sh b/trio_cram_setup.sh
new file mode 100755
index 0000000000000000000000000000000000000000..0cad9bd38dc8e55e77950c31a7dbccc7a36a89da
--- /dev/null
+++ b/trio_cram_setup.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+#SBATCH --cpus-per-task=16
+#SBATCH --mem=2GB
+#SBATCH --time=24:00:00
+#SBATCH --job-name=trio_cram_setup
+#SBATCH --output=trio_cram_setup.%A_%a.out
+#SBATCH --error=trio_cram_setup.%A_%a.err
+
+
+### Setup the folder structure for the downstream analysis###
+BASE=/home/u035/u035/shared/analysis/work
+WORK_DIR=$BASE/${PROJECT_ID}
+VCF_DIR=${WORK_DIR}/VCF
+PED_DIR=${WORK_DIR}/PED
+LOG_DIR=${WORK_DIR}/LOG
+G2P_DIR=${WORK_DIR}/G2P
+VASE_DIR=${WORK_DIR}/VASE
+COV_DIR=${WORK_DIR}/COV
+DEC_DIR=${WORK_DIR}/DECIPHER
+IGV_DIR=${DEC_DIR}/IGV
+CNV_DIR=${WORK_DIR}/CNV
+BAMOUT_DIR=${WORK_DIR}/BAMOUT
+SCRIPTS_DIR=/home/u035/u035/shared/scripts
+
+
+
+### Tools
+PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7
+SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools
+PICARD=/home/u035/u035/shared/software/bcbio/anaconda/bin/picard
+REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa
+
+
+
+#~## check if ${WORK_DIR} already exists - if so, exit - to prevent accidental overwriting
+#~#if [ -d "${WORK_DIR}" ]; then
+#~#  echo "${WORK_DIR} already exists - EXIT! If really intended, delete manually!!!!"
+#~#  exit
+#~#fi
+
+
+
+
+echo "SOURCE_DIR = ${SOURCE_DIR}"       # the general path to the source VCF, BAM and PED files                 i.e. /home/u035/u035/shared/results
+echo "BATCH_ID = ${BATCH_ID}"           # the ID of the batch being processed                                   e.g. 19650_Ansari_Morad
+echo "BATCH_NUM = ${BATCH_NUM}"         # the numerical part of the BATCH_ID                                    e.g. 19650
+echo "PLATE_ID = ${PLATE_ID}"           # the PCR plate ID of the batch being currently processed,              e.g. 19285
+echo "PROJECT_ID = ${PROJECT_ID}"       # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done
+echo "VERSION_N = ${VERSION_N}"         # the version of the alignment and genotyping analysis
+
+#~#S_PED_DIR=${SOURCE_DIR}/../../params		# requires that the family PED files are in this folder
+
+
+
+
+# create the working dir and the required subfolders
+mkdir ${WORK_DIR}
+mkdir ${VCF_DIR}
+mkdir ${PED_DIR}
+mkdir ${LOG_DIR}
+mkdir ${G2P_DIR}
+mkdir ${VASE_DIR}
+mkdir ${COV_DIR}
+mkdir ${DEC_DIR}
+mkdir ${IGV_DIR}
+mkdir ${CNV_DIR}
+mkdir ${BAMOUT_DIR}
+echo "Created ${WORK_DIR} for this batch and all the required subfolders"
+
+
+
+######################################################
+###   Copy the VCF and PED file per each family    ###
+######################################################
+
+SOURCE_VCF_DIRS=${SOURCE_DIR}/${BATCH_NUM}_${VERSION_N}/families/????-??-??_${BATCH_NUM}_${VERSION_N}_${PLATE_ID}_*
+SOURCE_PED_DIR=${SOURCE_DIR}/${BATCH_NUM}_${VERSION_N}/params
+
+for S_VCF_DIR in ${SOURCE_VCF_DIRS}
+do
+#  echo "  ${S_VCF_DIR}"
+  VCF_DIR_NAME="${S_VCF_DIR##*/}"
+#  echo "    ${VCF_DIR_NAME}"
+  IFS=_ read -ra my_arr <<< "${VCF_DIR_NAME}"
+  FAM_ID=${my_arr[-1]}
+#  echo "      BATCH = ${BATCH_ID}, PLATE = ${PLATE_ID}, FAM_ID = ${FAM_ID}"
+  echo "  FAM_ID = ${FAM_ID}"
+
+  # construct the VCF and PED file names for this family
+  S_VCF_FILE=${S_VCF_DIR}/${PLATE_ID}_${FAM_ID}-gatk-haplotype-annotated.vcf.gz
+  S_PED_FILE=${SOURCE_PED_DIR}/${BATCH_ID}_${PLATE_ID}_${FAM_ID}.ped
+
+
+  # copy the trio VCF and PED files
+  cp ${S_VCF_FILE} ${VCF_DIR}
+  cp ${S_PED_FILE} ${PED_DIR}
+  echo "    copied ${S_VCF_FILE} --> ${VCF_DIR}"
+  echo "    copied ${S_PED_FILE} --> ${PED_DIR}"
+
+
+  # identify all folders (one for each individual) for this family containing cram/bam files (format: <INDI_ID>_<FAM_ID>)
+  cd ${SOURCE_DIR}/${BATCH_NUM}_${VERSION_N}/families/????-??-??_${BATCH_NUM}_${VERSION_N}_${PLATE_ID}_${FAM_ID}
+  for ITEM in `ls -l`
+  do
+      if test -d $ITEM && [[ "$ITEM" == *"_"* ]]
+      then
+        echo "    $ITEM is a CRAM/BAM folder..."
+        BAM=${ITEM}/${ITEM}-ready.bam
+        CRAM=${ITEM}/${ITEM}-ready.cram
+        ${SAMTOOLS} view -@ 16 -T ${REFERENCE_GENOME} -hb -o ${BAM} ${CRAM}
+        ${PICARD} BuildBamIndex -I ${BAM} -O ${BAM}.bai -USE_JDK_DEFLATER true -USE_JDK_INFLATER true -VERBOSITY ERROR -QUIET true
+        echo "      Generated ${BAM} and ${BAM}.bai from ${CRAM}"
+      fi
+  done
+
+done
+
+
+
+######################################################################################
+### generate the FAM_IDs.txt, PRO_IDs.txt and FAM_PRO.txt *only for trio* families ###
+######################################################################################
+
+time ${PYTHON2} ${SCRIPTS_DIR}/extract_trio_FAM_PRO_ID.py ${WORK_DIR}
+
+echo ""
+echo ""
+echo "OK: Setup for PROJECT_ID = $PROJECT_ID successful"
+
+
+
+
+
+
+
+
+