diff --git a/bin/compute_DDG2P_coverage_illumina_exome_bam.sh b/bin/compute_DDG2P_coverage_illumina_exome_bam.sh
index d344ee2fc8ca8682c4e0cf515771b009c5b5ba26..9cd6fc4c1984660774db2b17fe549facfe870f3b 100755
--- a/bin/compute_DDG2P_coverage_illumina_exome_bam.sh
+++ b/bin/compute_DDG2P_coverage_illumina_exome_bam.sh
@@ -8,7 +8,7 @@
 
 
 #       Author: MH
-#       last modified: FEB 27, 2023
+#       last modified: SEPT 11, 2023
 
 
 # setup PATH
@@ -17,7 +17,8 @@ export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/sit
 
 
 # The current DDG2P gene panel BED file
-TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20221207.plus15bp.merged.bed
+#TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20221207.plus15bp.merged.bed
+TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20230817.plus15bp.merged.bed
 
 GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar
 SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools
@@ -37,7 +38,7 @@ BAM_FILE=`head -n ${SLURM_ARRAY_TASK_ID} ${BAM_LIST} | tail -n 1`
 
 # get the ID (Illumina Exome) to generate the out file name
 IFS='/' read -r -a array <<< "${BAM_FILE}"
-ID="${array[8]}"
+ID="${array[9]}"
 OUT_FILE=${ID}.DDG2P.COV
 
 
diff --git a/bin/compute_DDG2P_default_mosdepth_coverage.sh b/bin/compute_DDG2P_default_mosdepth_coverage.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f444bde4c5f1f5a34af50017f236d60bf6f85767
--- /dev/null
+++ b/bin/compute_DDG2P_default_mosdepth_coverage.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=16GB
+#SBATCH --time=24:00:00
+#SBATCH --job-name=DDG2P_default_mosdepth_cov
+#SBATCH --output=DDG2P_default_mosdepth_cov.%A_%a.out
+#SBATCH --error=DDG2P_default_mosdepth_cov.%A_%a.err
+
+
+#       Author: MH
+#       last modified: SEPT 15, 2023
+
+
+# setup PATH
+export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin
+export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2
+
+
+
+export MOSDEPTH_Q0=NO_COVERAGE
+export MOSDEPTH_Q1=LOW_COVERAGE
+export MOSDEPTH_Q2=CALLABLE
+
+MOSDEPTH=/mnt/e1000/home/u035/u035/shared/software/bcbio/galaxy/../anaconda/bin/mosdepth
+
+
+
+## The current TWIST gene panel BED file
+#TARGETS=/home/u035/u035/shared/resources/exome_targets/hg38_Twist_ILMN_Exome_2.0_Plus_Panel_annotated_June2022.plus15bp.bed
+
+# The current DDG2P gene panel BED file
+TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20230817.plus15bp.merged.bed
+
+
+
+
+
+
+#################################
+######  for each BAM file    ####
+#################################
+#BAM_FILE=`head -n ${SLURM_ARRAY_TASK_ID} ${BAM_LIST} | tail -n 1`
+#
+## get the ID (Illumina Exome) to generate the out file name
+#IFS='/' read -r -a array <<< "${BAM_FILE}"
+#ID="${array[9]}"
+
+#time ${MOSDEPTH} -t 16 -F 1804 -Q 1 --no-per-base --by ${TARGETS} --quantize 0:1:4: --thresholds 20,50 ${ID}.DDG2P ${BAM_FILE}
+
+
+#################################
+#####  for each CRAM file    ####
+#################################
+CRAM_FILE=`head -n ${SLURM_ARRAY_TASK_ID} ${CRAM_LIST} | tail -n 1`
+
+# get the ID (Illumina Exome) to generate the out file name
+IFS='/' read -r -a array <<< "${CRAM_FILE}"
+ID="${array[9]}"
+
+REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa
+
+time ${MOSDEPTH} -t 16 -F 1804 -Q 1 --no-per-base --by ${TARGETS} --quantize 0:1:4: --thresholds 20,50 --fasta ${REFERENCE_GENOME} ${ID}.DDG2P ${CRAM_FILE}
+
+
+
+
+
diff --git a/bin/compute_full_coverage_illumina_exome_bam.sh b/bin/compute_full_coverage_illumina_exome_bam.sh
new file mode 100755
index 0000000000000000000000000000000000000000..14f97717567397461135e5b6a1968ee59bca9d5e
--- /dev/null
+++ b/bin/compute_full_coverage_illumina_exome_bam.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=16GB
+#SBATCH --time=24:00:00
+#SBATCH --job-name=compute_full_cov
+#SBATCH --output=compute_full_cov.%A_%a.out
+#SBATCH --error=compute_full_cov.%A_%a.err
+
+
+#       Author: MH
+#       last modified: SEPT 12, 2023
+
+
+# setup PATH
+export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin
+export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2
+
+
+# The current DDG2P gene panel BED file
+# TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20230817.plus15bp.merged.bed
+
+# The current TWIST gene panel BED file
+TARGETS=/home/u035/u035/shared/resources/exome_targets/hg38_Twist_ILMN_Exome_2.0_Plus_Panel_annotated_June2022.plus15bp.bed
+
+
+
+GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar
+SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools
+PICARD=/home/u035/u035/shared/software/bcbio/anaconda/bin/picard
+REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa
+
+
+
+
+# echo "BAM_LIST = ${BAM_LIST}"
+
+
+################################
+#####  for each BAM file    ####
+################################
+BAM_FILE=`head -n ${SLURM_ARRAY_TASK_ID} ${BAM_LIST} | tail -n 1`
+
+# get the ID (Illumina Exome) to generate the out file name
+IFS='/' read -r -a array <<< "${BAM_FILE}"
+ID="${array[9]}"
+#OUT_FILE=${ID}.DDG2P.COV
+OUT_FILE=${ID}.full.COV
+
+
+time java -Xmx8g -jar ${GATK3} -T DepthOfCoverage -R ${REFERENCE_GENOME} -o ${OUT_FILE} -I ${BAM_FILE} -L ${TARGETS} \
+  --omitDepthOutputAtEachBase \
+  --minBaseQuality 20 \
+  --minMappingQuality 20 \
+  -ct 1 \
+  -ct 5 \
+  -ct 10 \
+  -ct 15 \
+  -ct 20 \
+  -ct 25 \
+  -ct 30 \
+  -ct 50 \
+  -jdk_deflater \
+  -jdk_inflater \
+  --allow_potentially_misencoded_quality_scores
+
+
diff --git a/bin/compute_mosdepth_with_overlap_coverage_bam.sh b/bin/compute_mosdepth_with_overlap_coverage_bam.sh
new file mode 100755
index 0000000000000000000000000000000000000000..4b49bf002491dbbda987afd088587ef8bcf6c096
--- /dev/null
+++ b/bin/compute_mosdepth_with_overlap_coverage_bam.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=16GB
+#SBATCH --time=24:00:00
+#SBATCH --job-name=compute_mosdepth_with_overlap_cov
+#SBATCH --output=compute_mosdepth_with_overlap_cov.%A_%a.out
+#SBATCH --error=compute_mosdepth_with_overlap_cov.%A_%a.err
+
+
+#       Author: MH
+#       last modified: SEPT 13, 2023
+
+
+# setup PATH
+export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin
+export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2
+
+
+
+export MOSDEPTH_Q0=NO_COVERAGE
+export MOSDEPTH_Q1=LOW_COVERAGE
+export MOSDEPTH_Q2=CALLABLE
+
+MOSDEPTH=/mnt/e1000/home/u035/u035/shared/software/bcbio/galaxy/../anaconda/bin/mosdepth
+
+
+# The current TWIST gene panel BED file
+TARGETS=/home/u035/u035/shared/resources/exome_targets/hg38_Twist_ILMN_Exome_2.0_Plus_Panel_annotated_June2022.plus15bp.bed
+
+
+################################
+#####  for each BAM file    ####
+################################
+BAM_FILE=`head -n ${SLURM_ARRAY_TASK_ID} ${BAM_LIST} | tail -n 1`
+
+# get the ID (Illumina Exome) to generate the out file name
+IFS='/' read -r -a array <<< "${BAM_FILE}"
+ID="${array[9]}"
+
+
+time ${MOSDEPTH} --fast-mode -t 16 -F 1804 -Q 1 --no-per-base --by ${TARGETS} --quantize 0:1:4: ${ID} ${BAM_FILE}
+
+
+
diff --git a/bin/convert_DEC_to_v10.py b/bin/convert_DEC_to_v10.py
old mode 100755
new mode 100644
index f4b23df27aaaaff3fc01bb4d1a83d224d4ff01ab..67f26a78bb6cd0b061a1c298a7704d8b268c6217
--- a/bin/convert_DEC_to_v10.py
+++ b/bin/convert_DEC_to_v10.py
@@ -1,4 +1,3 @@
-#!/bin/env python
 #	given a DECIPHER bulk upload file v9
 #	convert it to v10
 #
@@ -15,7 +14,10 @@ import xlsxwriter
 
 
 
-def go(id, in_file, out_file):		# the folder where the bulk upload files are strored; id is in the format <indiv_id>_<family_id>
+def go(inout_dir,id):		# the folder where the bulk upload files are strored; id is in the format <indiv_id>_<family_id>
+
+    in_file = '%s/%s_DEC_FLT.csv' % (inout_dir,id)
+    out_file = '%s/%s_DECIPHER_v10.xlsx' % (inout_dir,id)
 
     # create the workbook
     workbook = xlsxwriter.Workbook(out_file)
@@ -70,9 +72,9 @@ def go(id, in_file, out_file):		# the folder where the bulk upload files are str
 
 
 if __name__ == '__main__':
-    if len(sys.argv) == 4:
-        go(*sys.argv[1:])
+    if len(sys.argv) == 3:
+        go(sys.argv[1],sys.argv[2])
     else:
-        print("Suggested use: convert_DEC_to_v10.py <indiv_id>_<family_id> <in_file> <out_file>")
+        print ("Suggested use: time python convert_DEC_to_v10.py decipher_dir <indiv_id>_<family_id>")
         raise SystemExit
 
diff --git a/bin/count_decipher_entries.py b/bin/count_decipher_entries.py
new file mode 100644
index 0000000000000000000000000000000000000000..82916ef065a7eb144c4c788c28c7febfbf707741
--- /dev/null
+++ b/bin/count_decipher_entries.py
@@ -0,0 +1,36 @@
+#	Counts the number of variants in a DECIPHER Excel upload file (v10 or v11).
+#
+#       Author: AM
+#       last modified: SEP 01, 2023
+
+
+
+
+
+
+import sys
+import os
+import shutil
+import csv
+import xlsxwriter
+import xlrd
+
+
+def go(DEC_file):
+
+    if not os.path.exists(DEC_file):
+        print ('ERROR: cannot find ', DEC_file)
+        raise SystemExit
+
+    wb = xlrd.open_workbook(DEC_file)
+    ws = wb.sheet_by_index(0)
+    print (DEC_file, ' ', ws.nrows)
+    
+if __name__ == '__main__':
+    if len(sys.argv) == 2:
+        go(sys.argv[1])
+    else:
+        print ('Suggested use: time python3 count_decipher_entries.py DECIPHER_v11.xlsx')
+
+
+        raise SystemExit
diff --git a/bin/extract_solo_FAM_PRO_ID.py b/bin/extract_solo_FAM_PRO_ID.py
index 6306fa987fa369962c8b8fd67ecd50a03e2f613b..648dd3d6f254126da138109217a3dcc6e4c7b02a 100755
--- a/bin/extract_solo_FAM_PRO_ID.py
+++ b/bin/extract_solo_FAM_PRO_ID.py
@@ -1,17 +1,22 @@
-#!/usr/bin/env python3
 #	input:	the work folder which contains a PED subfolder where all family PED files were copied
 #	output:	only for singletons
 #		solo_FAM_IDs.txt, solo_PRO_IDs.txt and solo_FAM_PRO.txt
 #
 #       Author: MH
-#       last modified: JUL 6, 2022
+#       last modified: SEPT 05, 2023
 
-import argparse
 
-def go(args):
-    out_fam_file = 'solo_FAM_IDs.txt'
-    out_pro_file = 'solo_PRO_IDs.txt'
-    out_f_p_file = 'solo_FAM_PRO.txt'
+
+import sys
+import os
+
+
+
+def go(work_dir):
+
+    out_fam_file = work_dir + '/solo_FAM_IDs.txt'
+    out_pro_file = work_dir + '/solo_PRO_IDs.txt'
+    out_f_p_file = work_dir + '/solo_FAM_PRO.txt'
 
     out_fam_han = open(out_fam_file,'w')
     out_pro_han = open(out_pro_file,'w')
@@ -21,34 +26,58 @@ def go(args):
     cntr_pro = 0
     cntr_f_p = 0
 
+
     # go over the PED folder in the working dir and process each PED file
-    print("")
-    print("Processing %i PED files to extract singleton FAM_ID, PRO_ID and FAM_PRO files" % (len(args.ped_files)))
+    ped_dir = work_dir + '/PED'
+    print ""
+    print "Processing the PED files (in %s) to extract singleton FAM_ID, PRO_ID amd FAM_PRO files" % (ped_dir)
+
+
 
-    for ped_file in args.ped_files:					# per each PED file
-        if ped_file.endswith(".ped"):
+    for file in os.listdir(ped_dir):					# per each PED file
+        if file.endswith(".ped"):
 
-            print("  %s" % (ped_file))
+            print "  %s" % (file)
+            in_file = os.path.join(ped_dir, file)
 
-            # check how many lines in the PED file - if more than one cannot be a singleton, ignore
-            num_lines = 0
-            in_han = open(ped_file,'r')
+            # check if this PED has a single record - most likely a singleton
+            first_cntr_check = 0
+            in_han = open(in_file,'r')
             for line in in_han:
-                num_lines += 1
+                first_cntr_check += 1
             in_han.close()
 
-            if num_lines > 1:
+            if first_cntr_check != 1:
+                # print "WARNING: There are %s records in PED file = %s" % (first_cntr_check, file)
+                # print "         This family will not be treated as a singleton family"
                 continue
-            if num_lines == 0:
-                print("ERROR: empty PED file %s" % (ped_file))
-                raise SystemExit
 
-            # if here, the PED file contains exactly one line
-            # check if all fine: parents IDs = 0, kid with known sex and is affected
+#            # check how many lines in the PED file - if more than one cannot be a singleton, ignore
+#            num_lines = 0
+#            in_han = open(in_file,'r')
+#            for line in in_han:
+#                num_lines += 1
+#            in_han.close()
+#
+#            if num_lines > 1:
+#                #print "ERROR: found more than 1 individual in a singleton PED file = %s" % (in_file)
+#                #raise SystemExit
+#                print "WARNING: found more than 1 individual in PED file = %s, not a singleton, skipping...." % (in_file)
+#                continue
+#            if num_lines == 0:
+#                print "ERROR: empty PED file %s" % (file)
+#                raise SystemExit
+#
+#            # if here, the PED file contains exactly one line
+#            # check if all fine: parents IDs = 0, kid with known sex and is affected
+
+
+
+            # if here, there are exactly 1 record in the family PED file - keep checking and collect information
             CHILD_ID = 0
             FAM_ID = 0
 
-            in_han = open(ped_file,'r')
+            in_han = open(in_file,'r')
             for line in in_han:
                 data = [x.strip() for x in line.strip().split('\t')]
 
@@ -56,8 +85,8 @@ def go(args):
                 y_indi,y_fam = data[1].split('_')			# in the internal PED file, indi_id is indiID_familyID, split
 
                 if x_fam != y_fam:
-                    print("ERROR: FAMILY_ID mismatch in %s" % (ped_file))
-                    print(line)
+                    print "ERROR: FAMILY_ID mismatch in %s" % (file)
+                    print line
                     raise SystemExit
 
                 FAM_ID = x_fam
@@ -65,8 +94,8 @@ def go(args):
 
                 # check both parent IDs == 0
                 if (data[2] != '0') or (data[3] != '0'):
-                    print("ERROR: found parent ID for a singleton child in %s" % (ped_file))
-                    print(line)
+                    print "ERROR: found parent ID for a singleton child in %s" % (file)
+                    print line
                     raise SystemExit
 
                 # make sure the sex of the child is known
@@ -74,23 +103,23 @@ def go(args):
                 if (CHILD_SEX == 1) or (CHILD_SEX == 2):
                     pass
                 else:
-                    print("ERROR: proband sex unknown in %s" % (ped_file))
-                    print(line)
+                    print "ERROR: proband sex unknown in %s" % (file)
+                    print line
                     raise SystemExit
 
                 # check kid is affected
                 if int(data[5]) != 2:
-                    print("ERROR: singleton child not affected")
-                    print(line)
+                    print "ERROR: singleton child not affected"
+                    print line
                     raise SystemExit
 
             if FAM_ID == 0:
-                print("ERROR: Cannot find the FAMILY_ID in %s" % (ped_file))
+                print "ERROR: Cannot find the FAMILY_ID in %s" % (file)
                 raise SystemExit
             if CHILD_ID == 0:
-                print("ERROR: Cannot find CHILD_ID in %s" % (ped_file))
+                print "ERROR: Cannot find CHILD_ID in %s" % (file)
                 raise SystemExit
-            elif FAM_ID not in args.exclude_families:
+            else:
                 out_fam_han.write('%s\n' % (FAM_ID))
                 cntr_fam += 1
                 out_pro_han.write('%s\n' % (CHILD_ID))
@@ -102,23 +131,23 @@ def go(args):
     out_pro_han.close()
     out_f_p_han.close()
 
-    print("")
-    print("Singleton Families:")
-    print("   %s FAM_IDs --> %s" % (cntr_fam, out_fam_file))
-    print("   %s PRO_IDs --> %s" % (cntr_pro, out_pro_file))
-    print("   %s FAM_PRO --> %s" % (cntr_f_p, out_f_p_file))
-    print("")
+
+
+    print ""
+    print "Singleton Families:"
+    print "   %s FAM_IDs --> %s" % (cntr_fam, out_fam_file)
+    print "   %s PRO_IDs --> %s" % (cntr_pro, out_pro_file)
+    print "   %s FAM_PRO --> %s" % (cntr_f_p, out_f_p_file)
+    print ""
+
+
+
 
 
 if __name__ == '__main__':
-    a = argparse.ArgumentParser(
-        description='''
-            Extract singleton FAM_ID, PRO_ID and FAM_PRO files. Suggested use:
-            time python /home/u035/u035/shared/scripts/extract_solo_FAM_PRO_ID.py
-            /home/u035/u035/shared/analysis/work/<PROJECT_ID>
-        '''
-    )
-    a.add_argument('ped_files', nargs='+')
-    args = a.parse_args()
-    a.add_argument('--exclude-families', nargs='*')
-    go(args)
+    if len(sys.argv) == 2:
+        go(sys.argv[1])
+    else:
+        print "Suggested use: time python /home/u035/u035/shared/scripts/extract_solo_FAM_PRO_ID.py /home/u035/u035/shared/analysis/work/<PROJECT_ID>"
+        raise SystemExit
+
diff --git a/bin/extract_trio_FAM_PRO_ID.py b/bin/extract_trio_FAM_PRO_ID.py
index 3bec2e218b0ac6ea5ce18fa9ee2c4d49580eaddc..bc10665a0881ce1f2b906b5cdc28706b6f91c1c9 100755
--- a/bin/extract_trio_FAM_PRO_ID.py
+++ b/bin/extract_trio_FAM_PRO_ID.py
@@ -1,18 +1,22 @@
-#!/usr/bin/env python3
 #	input:	the work folder which contains a PED subfolder where all family PED files were copied
 #	output:	only for trio families
 #		FAM_IDs.txt, PRO_IDs.txt and FAM_PRO.txt
 #
 #       Author: MH
-#       last modified: JUL 6, 2022
+#       last modified: SEPT 05, 2023
 
-import argparse
 
 
-def go(args):
-    out_fam_file = 'FAM_IDs.txt'
-    out_pro_file = 'PRO_IDs.txt'
-    out_f_p_file = 'FAM_PRO.txt'
+import sys
+import os
+
+
+
+def go(work_dir):
+
+    out_fam_file = work_dir + '/FAM_IDs.txt'
+    out_pro_file = work_dir + '/PRO_IDs.txt'
+    out_f_p_file = work_dir + '/FAM_PRO.txt'
 
     out_fam_han = open(out_fam_file,'w')
     out_pro_han = open(out_pro_file,'w')
@@ -22,18 +26,34 @@ def go(args):
     cntr_pro = 0
     cntr_f_p = 0
 
+
     # go over the PED folder in the working dir and process each PED file
-    print("")
-    print("Processing %i PED files to extract FAM_ID, PRO_ID and FAM_PRO files" % (len(args.ped_files)))
+    ped_dir = work_dir + '/PED'
+    print ""
+    print "Processing the PED files (in %s) to extract FAM_ID, PRO_ID amd FAM_PRO files" % (ped_dir)
+
+    for file in os.listdir(ped_dir):					# per each PED file
+        if file.endswith(".ped"):
+            print "  %s" % (file)
+            in_file = os.path.join(ped_dir, file)
+
+            # check if this PED has three records - most likely a trio
+            first_cntr_check = 0
+            in_han = open(in_file,'r')
+            for line in in_han:
+                first_cntr_check += 1
+            in_han.close()
 
-    for ped_file in args.ped_files:					# per each PED file
-        if ped_file.endswith(".ped"):
-            print("  %s" % (ped_file))
+            if first_cntr_check != 3:
+                # print "WARNING: There are %s records in PED file = %s" % (first_cntr_check, file)
+                # print "         This family will not be treated as a trio family"
+                continue
 
+            # if here, there are exactly 3 records in the family PED file - keep checking and collect information
             CHILD_ID = 0
             FAM_ID = 0
 
-            in_han = open(ped_file,'r')
+            in_han = open(in_file,'r')
             for line in in_han:
                 data = [x.strip() for x in line.strip().split('\t')]
 
@@ -43,14 +63,14 @@ def go(args):
                 if FAM_ID == 0:
                     FAM_ID = x_fam
                 elif FAM_ID != x_fam:
-                    print("ERROR: more than one FAMILY_ID in %s" % (ped_file))
+                    print "ERROR: more than one FAMILY_ID in %s" % (file)
                     raise SystemExit
 
                 if data[2] != '0' and data[3] != '0':			# this is the child in the trio
                     if CHILD_ID == 0:
                         CHILD_ID = y_indi
                     else:						# seen another child
-                        print("WARNING: already have seen a child (possibly a quad) in %s" % (ped_file))
+                        print "WARNING: already have seen a child (possibly a quad) in %s" % (file)
                         CHILD_ID = 0
                         break
 
@@ -58,21 +78,27 @@ def go(args):
                     if (CHILD_SEX == 1) or (CHILD_SEX == 2):
                         pass
                     else:
-                        print("ERROR: proband sex unknown")
-                        print(line)
+                        print "ERROR: proband sex unknown"
+                        print line
                         raise SystemExit
 
                     if int(data[5]) != 2:
-                        print("ERROR: child in a trio not affected")
-                        print(line)
+                        print "ERROR: child in a trio not affected"
+                        print line
                         raise SystemExit
 
+                elif data[2] == '0' and data[3] == '0':                   # this is a parent in the trio
+                    if int(data[5]) == 2:
+                        print "WARNING: affected parent %s in a trio - still processed as a trio, but no de novo analysis" % (y_indi)
+                        print line
+
+
             if FAM_ID == 0:
-                print("ERROR: Cannot find the FAMILY_ID in %s" % (ped_file))
+                print "ERROR: Cannot find the FAMILY_ID in %s" % (file)
                 raise SystemExit
             if CHILD_ID == 0:
-                print("WARNING: Cannot find exactly one CHILD_ID (with 2 available parents) in %s : not a trio --> will not be analyzed" % (ped_file))
-            elif FAM_ID not in args.exclude_families:
+                print "WARNING: Cannot find exactly one CHILD_ID (with 2 available parents) in %s : not a trio --> will not be analyzed" % (file)
+            else:
                 out_fam_han.write('%s\n' % (FAM_ID))
                 cntr_fam += 1
                 out_pro_han.write('%s\n' % (CHILD_ID))
@@ -80,27 +106,29 @@ def go(args):
                 out_f_p_han.write('%s\t%s\n' % (FAM_ID,CHILD_ID))
                 cntr_f_p += 1
 
+            in_han.close()
+
     out_fam_han.close()
     out_pro_han.close()
     out_f_p_han.close()
 
-    print("")
-    print("Trio Families:")
-    print("   %s FAM_IDs --> %s" % (cntr_fam, out_fam_file))
-    print("   %s PRO_IDs --> %s" % (cntr_pro, out_pro_file))
-    print("   %s FAM_PRO --> %s" % (cntr_f_p, out_f_p_file))
-    print("")
+
+
+    print ""
+    print "Trio Families:"
+    print "   %s FAM_IDs --> %s" % (cntr_fam, out_fam_file)
+    print "   %s PRO_IDs --> %s" % (cntr_pro, out_pro_file)
+    print "   %s FAM_PRO --> %s" % (cntr_f_p, out_f_p_file)
+    print ""
+
+
+
 
 
 if __name__ == '__main__':
-    a = argparse.ArgumentParser(
-        description='''
-            Extract FAM_ID, PRO_ID and FAM_PRO files from Ped files. Suggested use:
-            time python /home/u035/u035/shared/scripts/extract_trio_FAM_PRO_ID.py
-            /home/u035/u035/shared/analysis/work/<PROJECT_ID>
-        '''
-    )
-    a.add_argument('ped_files', nargs='+')
-    a.add_argument('--exclude-families', nargs='*')
-    args = a.parse_args()
-    go(args)
+    if len(sys.argv) == 2:
+        go(sys.argv[1])
+    else:
+        print "Suggested use: time python /home/u035/u035/shared/scripts/extract_trio_FAM_PRO_ID.py /home/u035/u035/shared/analysis/work/<PROJECT_ID>"
+        raise SystemExit
+
diff --git a/bin/filter_LQ_GT.py b/bin/filter_LQ_GT.py
index 2e612f867280cfd82140c4599bc98c506c4fe9b5..b53a222a42fb3b4fe97121439c8ee24fee04c404 100755
--- a/bin/filter_LQ_GT.py
+++ b/bin/filter_LQ_GT.py
@@ -1,4 +1,3 @@
-#!/bin/env python2.7
 #	given a multi-sample VCF
 #	reset the GT to ./. (no-call) for indi variant with num_ALT < num_ALT_THERSH or VAF < VAF_THRESH
 #
@@ -51,7 +50,7 @@ def go(black_file,in_file,out_file):
         alt = data[4]
         key = '%s:%s:%s:%s' % (chr,pos,ref,alt)
         if key in BLACKLIST:
-            print "    Warning: %s is a blacklisted variant, excluding it ..." % (key)
+            print "    INFO: %s is a blacklisted variant, excluding it ..." % (key)
             continue
 
 
@@ -180,6 +179,6 @@ if __name__ == '__main__':
     if len(sys.argv) == 4:
         go(sys.argv[1],sys.argv[2],sys.argv[3])
     else:
-        print "Suggested use: time $PYTHON2 ${BLACKLIST} ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.AC0.vcf ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf"
+        print "Suggested use: time $PYTHON2 filter_LQ_GT.py ${BLACKLIST} ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.AC0.vcf ${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf"
         raise SystemExit
 
diff --git a/bin/generate_DEC_IGV_scripts.py b/bin/generate_DEC_IGV_scripts.py
index 0c347caa630e76cd198b8bef01c66110035f5545..48f9e1400ada94cb7716489dbe3236bccaf3eddd 100755
--- a/bin/generate_DEC_IGV_scripts.py
+++ b/bin/generate_DEC_IGV_scripts.py
@@ -1,4 +1,3 @@
-#!/bin/env python2.7
 #	input:
 #		the family PED file				[${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}.ped]
 #		individual VCF file for the trio proband	[${FAMILY_ID}-gatk-haplotype-annotated.${SAMPLE_ID}.vcf.gz]
@@ -72,10 +71,12 @@ SOR_THRESH = float(3)
 
 
 
-def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,child_vcf_file,mom_vcf_file,dad_vcf_file,plate_id,fam_id,child_bam,mom_bam,dad_bam, out_dec_file, out_igv_file):
+def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir):
+
     # read the decipher to internal ID mapping file
     read_map_file(dec_map_file)
 
+
     # read the transcript mapping file
     read_trans_map(trans_map_file)
 
@@ -105,6 +106,11 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
 
 
     # now read the individual VCFs and record all the variants
+    child_vcf_file = '%s/%s_%s.ready.%s.vcf.gz' % (vcf_dir,plate_id,fam_id,CHILD_ID)
+    mom_vcf_file = '%s/%s_%s.ready.%s.vcf.gz' % (vcf_dir,plate_id,fam_id,MOM_ID)
+    dad_vcf_file = '%s/%s_%s.ready.%s.vcf.gz' % (vcf_dir,plate_id,fam_id,DAD_ID)
+
+
     read_all_VCF_vars(child_vcf_file,ALL_CHILD_DICT)
     print "Found %s unique VCF variants for CHILD (%s)" % (len(ALL_CHILD_DICT),CHILD_ID)
     sys.stdout.flush()
@@ -144,11 +150,13 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
 
 
     # setup the DECIPHER output file
+    out_dec_file = '%s/%s_DEC_FLT.csv' % (dec_dir,CHILD_ID)		################################
     out_han = open(out_dec_file,'w')
     out_han.write('Internal reference number or ID,Chromosome,Start,Genome assembly,Reference allele,Alternate allele,Transcript,Gene name,Intergenic,Chromosomal sex,Other rearrangements/aneuploidy,Open-access consent,Age at last clinical assessment,Prenatal age in weeks,Note,Inheritance,Pathogenicity,Phenotypes,HGVS code,Genotype,Responsible contact\n')
 
 
     # setup the IGV snapshot file
+    out_igv_file = '%s/IGV/%s.snapshot.FLT.txt' % (dec_dir,CHILD_ID)	#################################
     out_igv_han = open(out_igv_file,'w')
     out_igv_han.write('new\n')
     out_igv_han.write('genome hg38\n')
@@ -156,6 +164,9 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
     out_igv_han.write('maxPanelHeight 10000\n')
     out_igv_han.write('new\n')
 
+    child_bam = '%s/%s/%s-ready.bam' % (fam_bam_dir,CHILD_ID,CHILD_ID)
+    mom_bam = '%s/%s/%s-ready.bam' % (fam_bam_dir,MOM_ID,MOM_ID)
+    dad_bam = '%s/%s/%s-ready.bam' % (fam_bam_dir,DAD_ID,DAD_ID)
     out_igv_han.write('load %s\n' % (child_bam))
     out_igv_han.write('load %s\n' % (mom_bam))
     out_igv_han.write('load %s\n' % (dad_bam))
@@ -171,6 +182,7 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
     in_cntr = 0
     out_cntr = 0
 
+    child_vcf_file = '%s/%s_%s.ready.%s.vcf.gz' % (vcf_dir,plate_id,fam_id,CHILD_ID)
     in_han = gzip.open(child_vcf_file,'r')
 
 
@@ -1331,10 +1343,10 @@ def read_trans_map(in_file):
 
 
 if __name__ == '__main__':
-    if len(sys.argv) == 17:
-        go(*sys.argv[1:])
+    if len(sys.argv) == 12:
+        go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11])
     else:
         print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \
-        dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,child_vcf_file,mom_vcf_file,dad_vcf_file,plate_id,fam_id,child_bam,mom_bam,dad_bam,out_dec_file.txt,out_igv_file.txt"
+        dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir"
         raise SystemExit
 
diff --git a/bin/merge_and_rename_NGI_fastq_files.py b/bin/merge_and_rename_NGI_fastq_files.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0ef918ce1c0bc9f04ccaf873ed8ee6760e44521
--- /dev/null
+++ b/bin/merge_and_rename_NGI_fastq_files.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+import re
+import os
+import sys
+import shutil
+import argparse
+import collections
+
+def merge_files(sample_name, read_nb, file_list, dest_dir, suffix):
+    outfile=os.path.join(dest_dir, "{}{}_R{}.fastq.gz".format(sample_name, suffix, read_nb))
+    tomerge = file_list.split(':')
+    print("Merging the following files:")
+    if not tomerge:
+        print("No read {} files found".format(read_nb))
+        return
+    for fq in tomerge:
+        print(fq)
+    print("as {}".format(outfile))
+    with open(outfile, 'wb') as wfp:
+        for fn in tomerge:
+            with open(fn, 'rb') as rfp:
+                shutil.copyfileobj(rfp, wfp)
+        
+
+if __name__ == "__main__":
+   parser = argparse.ArgumentParser(description=""" Merges the given list of FASTQ files. Output as {dest_dir}/{sample_name}{suffix}_R{read_end}.fastq.gz.""")
+   parser.add_argument("files", nargs='?', default='.', help="Colon-delimited list of FASTQ files to merge.")
+   parser.add_argument("sample_name", nargs='?', default='.', help="Output sample name.")
+   parser.add_argument("read_end", nargs='?', default='.', help="Read end (1 or 2).")
+   parser.add_argument("dest_dir", nargs='?', default='.', help="Path to where the merged files should be output. ")
+   parser.add_argument("suffix", nargs='?', default='', help="Optional suffix for sample names in output file names, e.g. sample_R1.fastq.gz. ")
+   args = parser.parse_args() 
+   merge_files(args.sample_name, args.read_end, args.files, args.dest_dir, args.suffix)
+
diff --git a/bin/old_convert_DEC_to_v10.py b/bin/old_convert_DEC_to_v10.py
new file mode 100755
index 0000000000000000000000000000000000000000..f4b23df27aaaaff3fc01bb4d1a83d224d4ff01ab
--- /dev/null
+++ b/bin/old_convert_DEC_to_v10.py
@@ -0,0 +1,78 @@
+#!/bin/env python
+#	given a DECIPHER bulk upload file v9
+#	convert it to v10
+#
+#       Author: MH
+#       last modified: APR 25, 2022
+
+
+
+import sys
+import os
+import csv
+import xlsxwriter
+
+
+
+
+def go(id, in_file, out_file):		# the folder where the bulk upload files are strored; id is in the format <indiv_id>_<family_id>
+
+    # create the workbook
+    workbook = xlsxwriter.Workbook(out_file)
+
+    # create the worksheet
+    worksheet = workbook.add_worksheet('Sequence Variants')
+
+    # write the header row
+    header = ('Patient internal reference number or ID','Shared','Assembly','HGVS code','Chromosome','Genomic start','Ref sequence','Alt sequence','Gene name','Transcript','Is intergenic','Genotype','Inheritance','Pathogenicity','Pathogenicity evidence','Contribution','Genotype groups')
+    worksheet.write_row(0,0,header)
+
+    # now, open and read the old file, for each variant collecting the information required for v10 and writing it in the v10 file
+    cntr = 0
+
+    with open(in_file,'r') as tsvfile:
+        reader = csv.reader(tsvfile, delimiter=',', quotechar='"')
+        for row in reader:
+            if row[0] == 'Internal reference number or ID':      # ignore the header line
+                continue
+
+            cntr += 1
+            id = str(row[0])
+            shared = 'NHS-SCE'
+            assembly = 'GRCh38'
+            HGVS = ''
+            chr = str(row[1])
+            start = str(row[2])
+            ref = str(row[4])
+            alt = str(row[5])
+            gene = str(row[7])
+            trans = str(row[6])
+            inter = str(row[8])
+            genotype = str(row[19])
+            inher = str(row[15])
+            if inher == 'Maternally inherited, constitutive in mother':
+                inher = 'Maternally inherited'
+            elif inher == 'Paternally inherited, constitutive in father':
+                inher = 'Paternally inherited'
+            patho = ''
+            evid = ''
+            cont = ''
+            gt_groups = ''
+            data = (id,shared,assembly,HGVS,chr,start,ref,alt,gene,trans,inter,genotype,inher,patho,evid,cont,gt_groups)
+
+            # write it
+            worksheet.write_row(cntr,0,data)
+
+
+    # close the workbook
+    workbook.close()
+
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 4:
+        go(*sys.argv[1:])
+    else:
+        print("Suggested use: convert_DEC_to_v10.py <indiv_id>_<family_id> <in_file> <out_file>")
+        raise SystemExit
+
diff --git a/bin/peddy_validation.pl b/bin/peddy_validation.pl
index f8239522291223f9cc10bb2fb1fe1464239693fb..f25bf4831e33f8376a029203b51faf2f09ae593e 100755
--- a/bin/peddy_validation.pl
+++ b/bin/peddy_validation.pl
@@ -78,6 +78,7 @@ $out_fh->open($out_file, "w") or die "Could not open $out_file\n$!";
 
 printf $out_fh "sample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
 
+my %out_lines;
 foreach my $family_id (sort keys %ped)
 {
     my $glob_str = sprintf("$fam_dir/*%s*/*%s*/qc/peddy/*.ped_check.csv", $family_id, $ped{$family_id}{'aff'});
@@ -88,8 +89,10 @@ foreach my $family_id (sort keys %ped)
 	next;
     }
 
-    my $peddy_fh = new IO::File;
-    $peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
+    foreach my $peddy_file (@peddy_glob)
+    {
+	my $peddy_fh = new IO::File;
+	$peddy_fh->open($peddy_file, "r") or die "Could not open $peddy_file\n$!";
 
 	my @headers;
 	my %info;
@@ -128,11 +131,16 @@ foreach my $family_id (sort keys %ped)
 
 		$info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True';
 
-		printf $out_fh "$sample_pair\t%s\t%s\t%s\n", 
+		my $out_line = sprintf "$sample_pair\t%s\t%s\t%s", 
 		    $info{'pedigree_parents'}{$sample_pair}, 
 		    $info{'predicted_parents'}{$sample_pair},
 		    $info{'parent_error'}{$sample_pair};
+
+		$out_lines{$out_line}++;
 	}
+    }
 }
 
+printf $out_fh join("\n", sort keys %out_lines);
+
 $out_fh->close();
diff --git a/bin/qc_metrics.R b/bin/qc_metrics.R
index 73e79d83321b74705cb00ebeaa1c63f9b1632e69..d5af02c82d427fc9ce338bc0e441e0b79533c858 100644
--- a/bin/qc_metrics.R
+++ b/bin/qc_metrics.R
@@ -1,6 +1,7 @@
 x = read.table("multiqc_general_stats.txt", header=T, stringsAsFactors=F, sep="\t")
-y = read.table("samples_not_on_run.txt", header=F, stringsAsFactors=F)
-z = subset(x, !(x$Sample %in% y$V1))
+#y = read.table("samples_not_on_run.txt", header=F, stringsAsFactors=F)
+#z = subset(x, !(x$Sample %in% y$V1))
+z = x
 
 print("Total reads < 50M")
 
diff --git a/bin/extract_quad_FAM_PRO_ID.py b/bin/redo_extract_quad_FAM_PRO_ID.py
similarity index 100%
rename from bin/extract_quad_FAM_PRO_ID.py
rename to bin/redo_extract_quad_FAM_PRO_ID.py
diff --git a/bin/extract_shared_FAM_PRO_ID.py b/bin/redo_extract_shared_FAM_PRO_ID.py
similarity index 100%
rename from bin/extract_shared_FAM_PRO_ID.py
rename to bin/redo_extract_shared_FAM_PRO_ID.py
diff --git a/bin/redo_extract_solo_FAM_PRO_ID.py b/bin/redo_extract_solo_FAM_PRO_ID.py
new file mode 100755
index 0000000000000000000000000000000000000000..c7a9451b59a53912c4180f39206c45fc17e54c61
--- /dev/null
+++ b/bin/redo_extract_solo_FAM_PRO_ID.py
@@ -0,0 +1,153 @@
+#	input:	the work folder which contains a PED subfolder where all family PED files were copied
+#	output:	only for singletons
+#		solo_FAM_IDs.txt, solo_PRO_IDs.txt and solo_FAM_PRO.txt
+#
+#       Author: MH
+#       last modified: SEPT 05, 2023
+
+
+
+import sys
+import os
+
+
+
+def go(work_dir):
+
+    out_fam_file = work_dir + '/solo_FAM_IDs.txt'
+    out_pro_file = work_dir + '/solo_PRO_IDs.txt'
+    out_f_p_file = work_dir + '/solo_FAM_PRO.txt'
+
+    out_fam_han = open(out_fam_file,'w')
+    out_pro_han = open(out_pro_file,'w')
+    out_f_p_han = open(out_f_p_file,'w')
+
+    cntr_fam = 0
+    cntr_pro = 0
+    cntr_f_p = 0
+
+
+    # go over the PED folder in the working dir and process each PED file
+    ped_dir = work_dir + '/PED'
+    print ""
+    print "Processing the PED files (in %s) to extract singleton FAM_ID, PRO_ID amd FAM_PRO files" % (ped_dir)
+
+
+
+    for file in os.listdir(ped_dir):					# per each PED file
+        if file.endswith(".ped"):
+
+            print "  %s" % (file)
+            in_file = os.path.join(ped_dir, file)
+
+            # check if this PED has a single record - most likely a singleton
+            first_cntr_check = 0
+            in_han = open(in_file,'r')
+            for line in in_han:
+                first_cntr_check += 1
+            in_han.close()
+
+            if first_cntr_check != 1:
+                print "WARNING: There are %s records in PED file = %s" % (first_cntr_check, file)
+                print "         This family will not be treated as a singleton family!!!!"
+                continue
+
+#            # check how many lines in the PED file - if more than one cannot be a singleton, ignore
+#            num_lines = 0
+#            in_han = open(in_file,'r')
+#            for line in in_han:
+#                num_lines += 1
+#            in_han.close()
+#
+#            if num_lines > 1:
+#                #print "ERROR: found more than 1 individual in a singleton PED file = %s" % (in_file)
+#                #raise SystemExit
+#                print "WARNING: found more than 1 individual in PED file = %s, not a singleton, skipping...." % (in_file)
+#                continue
+#            if num_lines == 0:
+#                print "ERROR: empty PED file %s" % (file)
+#                raise SystemExit
+#
+#            # if here, the PED file contains exactly one line
+#            # check if all fine: parents IDs = 0, kid with known sex and is affected
+
+
+
+            # if here, there are exactly 1 record in the family PED file - keep checking and collect information
+            CHILD_ID = 0
+            FAM_ID = 0
+
+            in_han = open(in_file,'r')
+            for line in in_han:
+                data = [x.strip() for x in line.strip().split('\t')]
+
+                x_plate,x_fam = data[0].split('_')			# in the internal PED file, family_id is plateID_familyID, will keep only clean family_id, which corresponds to DECIPHER ID
+                y_indi,y_fam = data[1].split('_')			# in the internal PED file, indi_id is indiID_familyID, split
+
+                if x_fam != y_fam:
+                    print "ERROR: FAMILY_ID mismatch in %s" % (file)
+                    print line
+                    raise SystemExit
+
+                FAM_ID = x_fam
+                CHILD_ID = y_indi
+
+                # check both parent IDs == 0
+                if (data[2] != '0') or (data[3] != '0'):
+                    print "ERROR: found parent ID for a singleton child in %s" % (file)
+                    print line
+                    raise SystemExit
+
+                # make sure the sex of the child is known
+                CHILD_SEX = int(data[4])
+                if (CHILD_SEX == 1) or (CHILD_SEX == 2):
+                    pass
+                else:
+                    print "ERROR: proband sex unknown in %s" % (file)
+                    print line
+                    raise SystemExit
+
+                # check kid is affected
+                if int(data[5]) != 2:
+                    print "ERROR: singleton child not affected"
+                    print line
+                    raise SystemExit
+
+            if FAM_ID == 0:
+                print "ERROR: Cannot find the FAMILY_ID in %s" % (file)
+                raise SystemExit
+            if CHILD_ID == 0:
+                print "ERROR: Cannot find CHILD_ID in %s" % (file)
+                raise SystemExit
+            else:
+                out_fam_han.write('%s\n' % (FAM_ID))
+                cntr_fam += 1
+                out_pro_han.write('%s\n' % (CHILD_ID))
+                cntr_pro += 1
+                out_f_p_han.write('%s\t%s\n' % (FAM_ID,CHILD_ID))
+                cntr_f_p += 1
+
+    out_fam_han.close()
+    out_pro_han.close()
+    out_f_p_han.close()
+
+
+
+    print ""
+    print "Singleton Families:"
+    print "   %s FAM_IDs --> %s" % (cntr_fam, out_fam_file)
+    print "   %s PRO_IDs --> %s" % (cntr_pro, out_pro_file)
+    print "   %s FAM_PRO --> %s" % (cntr_f_p, out_f_p_file)
+    print ""
+
+
+
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 2:
+        go(sys.argv[1])
+    else:
+        print "Suggested use: time python /home/u035/u035/shared/scripts/extract_solo_FAM_PRO_ID.py /home/u035/u035/shared/analysis/work/<PROJECT_ID>"
+        raise SystemExit
+
diff --git a/bin/redo_extract_trio_FAM_PRO_ID.py b/bin/redo_extract_trio_FAM_PRO_ID.py
new file mode 100755
index 0000000000000000000000000000000000000000..3d02a30731bfbcb57fa52141d77907a4c431087d
--- /dev/null
+++ b/bin/redo_extract_trio_FAM_PRO_ID.py
@@ -0,0 +1,134 @@
+#	input:	the work folder which contains a PED subfolder where all family PED files were copied
+#	output:	only for trio families
+#		FAM_IDs.txt, PRO_IDs.txt and FAM_PRO.txt
+#
+#       Author: MH
+#       last modified: SEPT 05, 2023
+
+
+
+import sys
+import os
+
+
+
+def go(work_dir):
+
+    out_fam_file = work_dir + '/FAM_IDs.txt'
+    out_pro_file = work_dir + '/PRO_IDs.txt'
+    out_f_p_file = work_dir + '/FAM_PRO.txt'
+
+    out_fam_han = open(out_fam_file,'w')
+    out_pro_han = open(out_pro_file,'w')
+    out_f_p_han = open(out_f_p_file,'w')
+
+    cntr_fam = 0
+    cntr_pro = 0
+    cntr_f_p = 0
+
+
+    # go over the PED folder in the working dir and process each PED file
+    ped_dir = work_dir + '/PED'
+    print ""
+    print "Processing the PED files (in %s) to extract FAM_ID, PRO_ID amd FAM_PRO files" % (ped_dir)
+
+    for file in os.listdir(ped_dir):					# per each PED file
+        if file.endswith(".ped"):
+            print "  %s" % (file)
+            in_file = os.path.join(ped_dir, file)
+
+            # check if this PED has three records - most likely a trio
+            first_cntr_check = 0
+            in_han = open(in_file,'r')
+            for line in in_han:
+                first_cntr_check += 1
+            in_han.close()
+
+            if first_cntr_check != 3:
+                print "WARNING: There are %s records in PED file = %s" % (first_cntr_check, file)
+                print "         This family will not be treated as a trio family!!!"
+                continue
+
+            # if here, there are exactly 3 records in the family PED file - keep checking and collect information
+            CHILD_ID = 0
+            FAM_ID = 0
+
+            in_han = open(in_file,'r')
+            for line in in_han:
+                data = [x.strip() for x in line.strip().split('\t')]
+
+                x_plate,x_fam = data[0].split('_')			# in the internal PED file, family_id is plateID_familyID, will keep only clean family_id, which corresponds to DECIPHER ID
+                y_indi,y_fam = data[1].split('_')			# in the internal PED file, indi_id is indiID_familyID, split
+
+                if FAM_ID == 0:
+                    FAM_ID = x_fam
+                elif FAM_ID != x_fam:
+                    print "ERROR: more than one FAMILY_ID in %s" % (file)
+                    raise SystemExit
+
+                if data[2] != '0' and data[3] != '0':			# this is the child in the trio
+                    if CHILD_ID == 0:
+                        CHILD_ID = y_indi
+                    else:						# seen another child
+                        print "WARNING: already have seen a child (possibly a quad) in %s" % (file)
+                        CHILD_ID = 0
+                        break
+
+                    CHILD_SEX = int(data[4])
+                    if (CHILD_SEX == 1) or (CHILD_SEX == 2):
+                        pass
+                    else:
+                        print "ERROR: proband sex unknown"
+                        print line
+                        raise SystemExit
+
+                    if int(data[5]) != 2:
+                        print "ERROR: child in a trio not affected"
+                        print line
+                        raise SystemExit
+
+                elif data[2] == '0' and data[3] == '0':                   # this is a parent in the trio
+                    if int(data[5]) == 2:
+                        print "WARNING: affected parent %s in a trio - still processed as a trio, but no de novo analysis" % (y_indi)
+                        print line
+
+
+            if FAM_ID == 0:
+                print "ERROR: Cannot find the FAMILY_ID in %s" % (file)
+                raise SystemExit
+            if CHILD_ID == 0:
+                print "WARNING: Cannot find exactly one CHILD_ID (with 2 available parents) in %s : not a trio --> will not be analyzed" % (file)
+            else:
+                out_fam_han.write('%s\n' % (FAM_ID))
+                cntr_fam += 1
+                out_pro_han.write('%s\n' % (CHILD_ID))
+                cntr_pro += 1
+                out_f_p_han.write('%s\t%s\n' % (FAM_ID,CHILD_ID))
+                cntr_f_p += 1
+
+            in_han.close()
+
+    out_fam_han.close()
+    out_pro_han.close()
+    out_f_p_han.close()
+
+
+
+    print ""
+    print "Trio Families:"
+    print "   %s FAM_IDs --> %s" % (cntr_fam, out_fam_file)
+    print "   %s PRO_IDs --> %s" % (cntr_pro, out_pro_file)
+    print "   %s FAM_PRO --> %s" % (cntr_f_p, out_f_p_file)
+    print ""
+
+
+
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 2:
+        go(sys.argv[1])
+    else:
+        print "Suggested use: time python /home/u035/u035/shared/scripts/extract_trio_FAM_PRO_ID.py /home/u035/u035/shared/analysis/work/<PROJECT_ID>"
+        raise SystemExit
+
diff --git a/bin/redo_generate_DEC_IGV_trio_scripts.py b/bin/redo_generate_DEC_IGV_trio_scripts.py
index 96e12e72c7a200933ee23ade4ad408f7b13f9563..62cce17bbd21b06c11212571115e6f318bae82b3 100755
--- a/bin/redo_generate_DEC_IGV_trio_scripts.py
+++ b/bin/redo_generate_DEC_IGV_trio_scripts.py
@@ -56,7 +56,7 @@ ALL_MOM_DICT = {}		# key: chr:pos:ref:alt; value: irrelevant
 ALL_DAD_DICT = {}		# key: chr:pos:ref:alt; value: irrelevant
 
 
-CHILD_INHER_DICT = {}		# key: chr:pos:ref:alt; value: 'Paternally inherited, constitutive in father' | 'Maternally inherited, constitutive in mother' | 'Biparental' | 'De novo constitutive' | 'Unknown'
+CHILD_INHER_DICT = {}		# key: chr:pos:ref:alt; value: 'Paternally inherited, constitutive in father' | 'Maternally inherited, constitutive in mother' | 'Biparental' | 'De novo (parentage confirmed)' | 'Unknown'
 
 #+#SNAP_FLANK = 25
 IGV_PAD = 25
@@ -240,7 +240,7 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
 
                 if is_denovo:
                     if inher_stat == 'Unknown':
-                        inher_stat = 'De novo constitutive'
+                        inher_stat = 'De novo (parentage confirmed)'
                     else:
                         print "ERROR: %s is both VASE denovo and %s from VCF" % (key,inher_stat)
                         raise SystemExit
@@ -337,7 +337,7 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
 
                 if is_denovo:
                     if inher_stat == 'Unknown':
-                        inher_stat = 'De novo constitutive'
+                        inher_stat = 'De novo (parentage confirmed)'
                     else:
                         print "ERROR: %s is both VASE denovo and %s from VCF" % (key,inher_stat)
                         raise SystemExit
@@ -433,7 +433,7 @@ def go(dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir
 
                 if is_denovo:
                     if inher_stat == 'Unknown':
-                        inher_stat = 'De novo constitutive'
+                        inher_stat = 'De novo (parentage confirmed)'
                     else:
                         print "ERROR: %s is both VASE denovo and %s from VCF" % (key,inher_stat)
                         raise SystemExit
diff --git a/bin/redo_hunt_novel_shared_vars.py b/bin/redo_hunt_novel_shared_vars.py
index 59881955fc7bcbb45b555f89720406c9e96d1e51..2948028be221a41a83680d89de4430b2a1edf0f2 100755
--- a/bin/redo_hunt_novel_shared_vars.py
+++ b/bin/redo_hunt_novel_shared_vars.py
@@ -21,7 +21,7 @@
 #
 #
 #       Author: MH
-#       last modified: AUG 24, 2023
+#       last modified: SEPT 06, 2023
 
 
 
@@ -45,12 +45,14 @@ def go(old_DEC_DIR,new_DEC_DIR,FAMILY_ID):
     for file in os.listdir(new_DEC_DIR):
         if file.startswith("IGV"):
             continue
+        if file.find(FAMILY_ID) == -1:		# DECIPHER upload file *not* for this family
+            continue
+        # DECIPHER upload file for this FAMILY_ID
         data = [x.strip() for x in file.strip().split('_')]
         if str(data[0]) not in INDIS:
             INDIS.append(str(data[0]))
 
-    # now, process each indivdual
-
+    # now, process each indivdual from this family
     for INDI_ID in INDIS:
 
         INDI_FAM = "%s_%s" % (INDI_ID,FAMILY_ID)
diff --git a/bin/redo_setup_quad.sh b/bin/redo_setup_quad.sh
index 9ccbdb3623a4283ffb75a3feda7232e4a5f3af53..312a0ad9bfa31fc708a740a038ef5edaa5d616fd 100755
--- a/bin/redo_setup_quad.sh
+++ b/bin/redo_setup_quad.sh
@@ -114,7 +114,7 @@ echo ""
 ### generate the FAM_IDs.txt, PRO_IDs.txt and FAM_PRO.txt *only for quad* families      ###
 ###########################################################################################
 
-time ${PYTHON2} ${SCRIPTS_DIR}/extract_quad_FAM_PRO_ID.py ${WORK_DIR}
+time ${PYTHON2} ${SCRIPTS_DIR}/redo_extract_quad_FAM_PRO_ID.py ${WORK_DIR}
 
 
 
@@ -129,7 +129,7 @@ echo "Copying the the previous DECIPHER upload files ..."
 mkdir ${WORK_DIR}/previous_DECIPHER_upload
 while IFS=$'\t' read -r -a myArray
 do
-    old_DEC_FILE=${INPUT_DIR}/previous_DECIPHER_upload/*${myArray[2]}*
+    old_DEC_FILE=${INPUT_DIR}/previous_DECIPHER_upload/quad/*${myArray[2]}*
     cp ${old_DEC_FILE} ${WORK_DIR}/previous_DECIPHER_upload/
 done < ${INPUT_DIR}/params/quad.txt
 #cp -r ${INPUT_DIR}/previous_DECIPHER_upload ${WORK_DIR}
diff --git a/bin/redo_setup_shared.sh b/bin/redo_setup_shared.sh
index fa8401e784f595f10f14da66bec8b0bef545fe1b..d36eda4234f0361080b041d0f15f63e852904dff 100755
--- a/bin/redo_setup_shared.sh
+++ b/bin/redo_setup_shared.sh
@@ -111,7 +111,7 @@ echo ""
 ### generate the FAM_IDs.txt, PRO_IDs.txt and FAM_PRO.txt *only for shared* families    ###
 ###########################################################################################
 
-time ${PYTHON2} ${SCRIPTS_DIR}/extract_shared_FAM_PRO_ID.py ${WORK_DIR}
+time ${PYTHON2} ${SCRIPTS_DIR}/redo_extract_shared_FAM_PRO_ID.py ${WORK_DIR}
 
 
 
@@ -126,7 +126,7 @@ echo "Copying the the previous DECIPHER upload files ..."
 mkdir ${WORK_DIR}/previous_DECIPHER_upload
 while IFS=$'\t' read -r -a myArray
 do
-    old_DEC_FILE=${INPUT_DIR}/previous_DECIPHER_upload/*${myArray[2]}*
+    old_DEC_FILE=${INPUT_DIR}/previous_DECIPHER_upload/shared_affected/*${myArray[2]}*
     cp ${old_DEC_FILE} ${WORK_DIR}/previous_DECIPHER_upload/
 done < ${INPUT_DIR}/params/shared_affected.txt
 #cp -r ${INPUT_DIR}/previous_DECIPHER_upload ${WORK_DIR}
diff --git a/bin/redo_setup_solo.sh b/bin/redo_setup_solo.sh
index 67ef05ccc5a1503f4751daaf7804e90703647e93..fb10f0ac0787685a84403c72d2b767acfeabb610 100755
--- a/bin/redo_setup_solo.sh
+++ b/bin/redo_setup_solo.sh
@@ -8,7 +8,7 @@
 
 
 #       Author: MH
-#       last modified: AUG 29, 2023
+#       last modified: AUG 31, 2023
 
 
 ###################################################################################################
@@ -106,7 +106,7 @@ echo ""
 ### generate the FAM_IDs.txt, PRO_IDs.txt and FAM_PRO.txt *only for singleton* families ###
 ###########################################################################################
 
-time ${PYTHON2} ${SCRIPTS_DIR}/extract_solo_FAM_PRO_ID.py ${WORK_DIR}
+time ${PYTHON2} ${SCRIPTS_DIR}/redo_extract_solo_FAM_PRO_ID.py ${WORK_DIR}
 
 
 
@@ -119,7 +119,7 @@ echo "Copying the the previous DECIPHER upload files ..."
 mkdir ${WORK_DIR}/previous_DECIPHER_upload
 while IFS=$'\t' read -r -a myArray
 do
-    old_DEC_FILE=${INPUT_DIR}/previous_DECIPHER_upload/*${myArray[2]}*
+    old_DEC_FILE=${INPUT_DIR}/previous_DECIPHER_upload/singleton/*${myArray[2]}*
     cp ${old_DEC_FILE} ${WORK_DIR}/previous_DECIPHER_upload/
 done < ${INPUT_DIR}/params/singleton.txt
 #cp -r ${INPUT_DIR}/previous_DECIPHER_upload ${WORK_DIR}
diff --git a/bin/redo_setup_trio.sh b/bin/redo_setup_trio.sh
index 587ae56a4bdd85e6988dbfa9ede738a1778e4c0f..ed290f3baf620f9ca1472ef7a879566d42df79a2 100755
--- a/bin/redo_setup_trio.sh
+++ b/bin/redo_setup_trio.sh
@@ -8,7 +8,7 @@
 
 
 #       Author: MH
-#       last modified: AUG 29, 2023
+#       last modified: AUG 31, 2023
 
 
 ###########################################################################################
@@ -110,7 +110,7 @@ echo ""
 ### generate the FAM_IDs.txt, PRO_IDs.txt and FAM_PRO.txt *only for trio* families ###
 ######################################################################################
 
-time ${PYTHON2} ${SCRIPTS_DIR}/extract_trio_FAM_PRO_ID.py ${WORK_DIR}
+time ${PYTHON2} ${SCRIPTS_DIR}/redo_extract_trio_FAM_PRO_ID.py ${WORK_DIR}
 
 
 
@@ -126,7 +126,7 @@ echo "Copying the the previous DECIPHER upload files ..."
 mkdir ${WORK_DIR}/previous_DECIPHER_upload
 while IFS=$'\t' read -r -a myArray
 do
-    old_DEC_FILE=${INPUT_DIR}/previous_DECIPHER_upload/*${myArray[2]}*
+    old_DEC_FILE=${INPUT_DIR}/previous_DECIPHER_upload/trio/*${myArray[2]}*
     cp ${old_DEC_FILE} ${WORK_DIR}/previous_DECIPHER_upload/
 done < ${INPUT_DIR}/params/trio.txt
 #cp -r ${INPUT_DIR}/previous_DECIPHER_upload ${WORK_DIR}
@@ -135,7 +135,6 @@ echo ""
 
 
 
-
 echo ""
 echo ""
 echo "OK: Setup for *trio* PROJECT_ID = $PROJECT_ID successful"
diff --git a/bin/select_decipher_upload_files.pl b/bin/select_decipher_upload_files.pl
index b16a529a97c5b856cf20855f9cd6ea421a5d4ff5..0ff0b5a548833838564be9f7430812b910919b69 100644
--- a/bin/select_decipher_upload_files.pl
+++ b/bin/select_decipher_upload_files.pl
@@ -3,6 +3,20 @@
 use IO::File;
 use strict;
 
+my %all;
+my $all_fh = new IO::File;
+$all_fh->open("../params/all.txt", "r") or die "Could not open ../params/all.txt\n$!";
+
+while (my $line = <$all_fh>)
+{
+    chomp $line;
+    my ($project, $batch, $family) = split('\t', $line);
+
+    $all{$family} = $batch;
+}
+
+$all_fh->close();
+
 my %files;
 while (my $line = <>)
 {
@@ -11,6 +25,16 @@ while (my $line = <>)
     my $filename = $tokens[-1];
     my $family = $tokens[-2];
 
+    if ($line =~ /20220418_reanalysis/)
+    {
+	my $bare_family = $family;
+	$bare_family =~ s/_shared//;
+	
+	my $new_family = $all{$bare_family} . "_" . $family;
+#	print "Mapping $family to $new_family\n";
+	$family = $new_family;
+    }
+
     # get the last modified time of the file
     my $fh = new IO::File;
     $fh->open($line, "r") or die "Could not open $line\n$!";
@@ -52,12 +76,17 @@ foreach my $family (keys %files)
 	    }
 	   
 	    my @times = sort { $a <=> $b } keys %{ $files{$family}{$indv}{"DECIPHER_v11.xlsx"}{$filenames[0]} };
-	    my $file_to_use = $files{$family}{$indv}{"DECIPHER_v11.xlsx"}{$filenames[0]}{$times[-1]};
 
-	    `mkdir -p $family/$indv`;
-	    `cp $file_to_use $family/$indv/`;
+	    my $file_to_use = $files{$family}{$indv}{"DECIPHER_v11.xlsx"}{$filenames[0]}{$times[0]};
+	    if (scalar(@times) > 0)
+	    {
+		$file_to_use = $files{$family}{$indv}{"DECIPHER_v11.xlsx"}{$filenames[0]}{$times[-1]};
+	    }
+	    
+#	    `mkdir -p $family/$indv`;
+#	    `cp $file_to_use $family/$indv/`;
 
-#	    printf "$family\t$indv\tDECIPHER_v11\t$filenames[0]\t$times[-1]\t%s\n", $files{$family}{$indv}{"DECIPHER_v11.xlsx"}{$filenames[0]}{$times[-1]};
+	    printf "$family\t$indv\tDECIPHER_v11.xlsx\t$filenames[0]\t$times[-1]\t$file_to_use\n";
 	}
 	elsif (exists($files{$family}{$indv}{"DECIPHER_v10.xlsx"}))
 	{
@@ -67,14 +96,18 @@ foreach my $family (keys %files)
 		printf STDERR "More than one file for $family $indv DECIPHER_v10.xlsx: %s\n", join(", ", keys %{ $files{$family}{$indv}{"DECIPHER_v10.xlsx"} });
 		exit;
 	    }
-	   
+
 	    my @times = sort { $a <=> $b } keys %{ $files{$family}{$indv}{"DECIPHER_v10.xlsx"}{$filenames[0]} };
-	    my $file_to_use = $files{$family}{$indv}{"DECIPHER_v11.xlsx"}{$filenames[0]}{$times[-1]};
+	    my $file_to_use = $files{$family}{$indv}{"DECIPHER_v10.xlsx"}{$filenames[0]}{$times[0]};
+	    if (scalar(@times) > 0)
+	    {
+		$file_to_use = $files{$family}{$indv}{"DECIPHER_v10.xlsx"}{$filenames[0]}{$times[-1]};
+	    }
 
-	    `mkdir -p $family/$indv`;
-	    `cp $file_to_use $family/$indv/`;
+#	    `mkdir -p $family/$indv`;
+#	    `cp $file_to_use $family/$indv/`;
 
-#	    printf "$family\t$indv\tDECIPHER_v10\t$filenames[0]\t$times[-1]\t%s\n", $files{$family}{$indv}{"DECIPHER_v10.xlsx"}{$filenames[0]}{$times[-1]};
+	    printf "$family\t$indv\tDECIPHER_v10.xlsx\t$filenames[0]\t$times[-1]\t$file_to_use\n";
 	}
 	elsif (exists($files{$family}{$indv}{"DEC_FLT.csv"}))
 	{
@@ -86,12 +119,17 @@ foreach my $family (keys %files)
 	    }
 	   
 	    my @times = sort { $a <=> $b } keys %{ $files{$family}{$indv}{"DEC_FLT.csv"}{$filenames[0]} };
-	    my $file_to_use = $files{$family}{$indv}{"DECIPHER_v11.xlsx"}{$filenames[0]}{$times[-1]};
 
-	    `mkdir -p $family/$indv`;
-	    `cp $file_to_use $family/$indv/`;
+	    my $file_to_use = $files{$family}{$indv}{"DEC_FLT.csv"}{$filenames[0]}{$times[0]};
+	    if (scalar(@times) > 0)
+	    {
+		$file_to_use = $files{$family}{$indv}{"DEC_FLT.csv"}{$filenames[0]}{$times[-1]};
+	    }
+
+#	    `mkdir -p $family/$indv`;
+#	    `cp $file_to_use $family/$indv/`;
 
-#	    printf "$family\t$indv\tDECIPHER_v10\t$filenames[0]\t$times[-1]\t%s\n", $files{$family}{$indv}{"DEC_FLT.csv"}{$filenames[0]}{$times[-1]};
+	    printf "$family\t$indv\tDEC_FLT.csv\t$filenames[0]\t$times[-1]\t$file_to_use\n";
 	}
     }
 }
diff --git a/bin/submit_trio_wes_rerun_vep.sh b/bin/submit_trio_wes_rerun_vep.sh
index b01a032493893485a2adab96213b4983fbbed8d8..297488e0abc56d5bfcfa49bb5ddac0478c2392b4 100755
--- a/bin/submit_trio_wes_rerun_vep.sh
+++ b/bin/submit_trio_wes_rerun_vep.sh
@@ -43,7 +43,7 @@ export PATH=/mnt/e1000/home/u035/u035/shared/software/bcbio/anaconda/bin:"$PATH"
 -i $NOANN_VCF_FILE \
 --fork 16 \
 --species homo_sapiens \
---no_stats \
+--stats_file ${VCF_FILE%.vcf.gz}.stats.txt --stats_text \
 --cache \
 --offline \
 --dir /mnt/e1000/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \
diff --git a/bin/trio_wes_prepare_bcbio_config_crf.sh b/bin/trio_wes_prepare_bcbio_config_crf.sh
index c1c1d77a446b07312aee962fd66788707680a614..93e9605da48c031f6b235e5b0984bdfb304c1e2e 100755
--- a/bin/trio_wes_prepare_bcbio_config_crf.sh
+++ b/bin/trio_wes_prepare_bcbio_config_crf.sh
@@ -99,13 +99,16 @@ do
   COMPRESSED_ID=`echo "$FAMILY_ID" | perl -pe "s/\_//"`
 
   perl -i -pe "s/${COMPRESSED_ID}/${FAMILY_ID}/" $CONFIG_DIR/$PREFIX.yaml
+  perl -pi -e "s/\'\[/\[/" $CONFIG_DIR/${PREFIX}.yaml
+  perl -pi -e "s/\]\'/\]/" $CONFIG_DIR/${PREFIX}.yaml
+  perl -pi -e "s|\./results|$OUTPUT_DIR/${SHORT_PROJECT_ID}_${VERSION}/families|" $CONFIG_DIR/${PREFIX}.yaml
 
   rm -r $PREFIX
 
   mkdir -p ${OUTPUT_DIR}/${SHORT_PROJECT_ID}_${VERSION}/params/
   mv ${PREFIX}.csv ${OUTPUT_DIR}/${SHORT_PROJECT_ID}_${VERSION}/params/
-  mv ${PROJECT_ID}_${FAMILY_ID}.ped ${OUTPUT_DIR}/${SHORT_PROJECT_ID}_${VERSION}/params/
+  cp ${PROJECT_ID}_${FAMILY_ID}.ped ${OUTPUT_DIR}/${SHORT_PROJECT_ID}_${VERSION}/params/
 
 done
 
-mv *.txt *.log *.ped ${OUTPUT_DIR}/${SHORT_PROJECT_ID}_${VERSION}/params/
+cp *.txt *.log *.ped ${OUTPUT_DIR}/${SHORT_PROJECT_ID}_${VERSION}/params/
diff --git a/bin/trio_wes_prepare_bcbio_config_singleton_from_duo.sh b/bin/trio_wes_prepare_bcbio_config_singleton_from_duo.sh
index 03c7e4a5a30295cc374e778a5dee1824e52be056..1d36a7f6cfcd93544abf38cf693273383a702dc5 100755
--- a/bin/trio_wes_prepare_bcbio_config_singleton_from_duo.sh
+++ b/bin/trio_wes_prepare_bcbio_config_singleton_from_duo.sh
@@ -1,106 +1,49 @@
 #!/bin/bash
 #
-# trio_wes_prepare_bcbio_singleton_from_duo_config.sh <config.sh> <project_id> <params>
+# trio_wes_prepare_bcbio_singleton_from_duo_config.sh <config.sh> <project_id> <version>
 # 
-# Assumes that reads for the samples are in the path
-# $READS_DIR/<project_id>/<date>/<sample><sample_suffix>/*.gz,
-# and that no samples other than those with reads are listed in the 
-# PED file. $READS_DIR is specified in the <config.sh> file.
-#
-# Assumes that the sample names in the PED file match those 
-# specifying the read directories with the addition of a specified
-# suffix.
-#
 # All samples must be annotated with sex (1=male, 2=female) in the
 # 5th column and phenotype (1=unaffected, 2=affected) in the 6th
 # column of the PED file.
 #
-# Runs bcbio sample preparation and configuration file generation,
-# assuming the template configuration file is at $BCBIO_TEMPLATE,
-# specified in the <config.sh> file.
-#
-# Assumes bcbio is on the PATH (set in <config.sh>).
+# Assumes the duo config and ped file exists with the original family names (i.e. no duo suffix).
 #
 
 CONFIG_SH=$1
 PROJECT_ID=$2
-PARAMS=$3
+VERSION=$3
 
 source $CONFIG_SH
 
-#
-# Create the file $PROJECT_ID.family_ids.txt
-#
 cd $PARAMS_DIR
 
-cat *.ped | cut -f 1 > $PROJECT_ID.family_ids.txt
-
 SHORT_PROJECT_ID=`echo $PROJECT_ID | cut -f 1 -d '_'`
-
-COUNT=`wc -l ${PROJECT_ID}.family_ids.txt | awk '{ print $1 }'`
+COUNT=`wc -l ${PROJECT_ID}.singleton_from_duo.txt | awk '{ print $1 }'`
 
 for ((i = 1; i <= $COUNT; i = i + 1))
 do
+    FAMILY_ID=`head -n $i ${PROJECT_ID}.singleton_from_duo.txt | tail -n 1`
+    PROBAND_ID=`grep 2$ ${PROJECT_ID}_${FAMILY_ID}.ped | cut -f 2`
 
-    ORIG_PROJECT_ID=`head -n $i $PARAMS | tail -n 1 | cut -f 1 -d '_'`
-    ORIG_VERSION=`head -n $i $PARAMS | tail -n 1 | cut -f 1 | cut -f 2 -d '_'`
-    BATCH_ID=`head -n $i $PARAMS | tail -n 1 | cut -f 2`
-    FAMILY_ID=`head -n $i $PARAMS | tail -n 1 | cut -f 3`
-
-    SAMPLE=`cut -f 2 *_${FAMILY_ID}.ped`
-    SEX=`cut -f 5 *_${FAMILY_ID}.ped`
-    PHENOTYPE=`cut -f 6 *_${FAMILY_ID}.ped`
-
-    PREFIX=${ORIG_PROJECT_ID}_${ORIG_VERSION}_${BATCH_ID}_${FAMILY_ID}
-    echo "samplename,description,batch,sex,phenotype,variant_regions" > ${PREFIX}.csv
-    len=`expr length $ORIG_PROJECT_ID`
-
-    if [ $len -eq 5 ]
-    then
-	mkdir -p $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE
-
-	for FILE in `find $DOWNLOAD_DIR/$ORIG_PROJECT_ID* -wholename "*$SAMPLE*/*_1_*_1.fastq.gz"`
-	do
-	    newname=`basename $FILE | sed -e 's/_1_/_one_/'`
-	    ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz
-	done
-	for FILE in `find $DOWNLOAD_DIR/$ORIG_PROJECT_ID* -wholename "*$SAMPLE*/*_1_*_2.fastq.gz"`
-	do
-	    newname=`basename $FILE | sed -e 's/_1_/_one_/'`
-	    ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz
-	done
-	for FILE in `find $DOWNLOAD_DIR/$ORIG_PROJECT_ID* -wholename "*$SAMPLE*/*_2_*_1.fastq.gz"`
-	do
-	    newname=`basename $FILE | sed -e 's/_2_/_two_/'`
-	    ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz
-	done
-	for FILE in `find $DOWNLOAD_DIR/$ORIG_PROJECT_ID* -wholename "*$SAMPLE*/*_2_*_2.fastq.gz"`
-	do
-	    newname=`basename $FILE | sed -e 's/_2_/_two_/'`
-	    ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz
-	done
-
-	for FILE in `ls $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/*_R[1,2].fastq.gz`
-	do
-	    echo "$FILE,$SAMPLE,${BATCH_ID}_${FAMILY_ID},$SEX,$PHENOTYPE,$TARGET" >> ${PREFIX}.csv
-	done
-
-    else
-	for FILE in `ls $DOWNLOAD_DIR/$ORIG_PROJECT_ID*/*${SAMPLE}*.gz`
-	do
-	    echo "$FILE,$SAMPLE,${BATCH_ID}_${FAMILY_ID},$SEX,$PHENOTYPE,$TARGET" >> $PREFIX.csv
-	done
-    fi
-
-    bcbio_prepare_samples.py --out $READS_DIR/$PROJECT_ID --csv ${PREFIX}.csv
+    # rename the original PED file with duo suffix
+    mv ${PROJECT_ID}_${FAMILY_ID}.ped ${PROJECT_ID}_${FAMILY_ID}duo.ped
 
-    mv ${PREFIX}-merged.csv ${PREFIX}.csv
+    # create the proband PED file - set parent ids to 0
+    grep $PROBAND_ID ${PROJECT_ID}_${FAMILY_ID}duo.ped | awk '{ print $1 "\t" $2 "\t0\t0\t" $5 "\t" $6 }' > ${PROJECT_ID}_${FAMILY_ID}.ped
 
-    bcbio_nextgen.py -w template $BCBIO_TEMPLATE ${PREFIX}.csv $READS_DIR/$PROJECT_ID/*_${FAMILY_ID}_R[12].fastq.gz
+    # move into the config folder and rename the original YAML file with duo suffix
+    cd $CONFIG_DIR
+    mv ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}.yaml ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}duo.yaml
 
-    mv ${PREFIX}/config/${PREFIX}.yaml $CONFIG_DIR/
+    # create the singleton config file from the duo file
+    head -n 1 ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}duo.yaml > ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}.yaml
+    affected=`grep -n 'phenotype: 2' ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}duo.yaml | cut -f 1 -d ':'`
+    head -n $((affected + 1)) ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}duo.yaml | tail -n 30 >> ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}.yaml
+    tail -n 6 ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}duo.yaml >> ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}.yaml
 
-    perl -i -pe "s/${BATCH_ID}${FAMILY_ID}/${BATCH_ID}_${FAMILY_ID}/" $CONFIG_DIR/${PREFIX}.yaml
+    # change family id to add duo suffix for duo YAML
+    perl -pi -e "s/${FAMILY_ID}/${FAMILY_ID}duo/" ${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}duo.yaml
 
-    rm -r ${PREFIX}
+    # move back to params folder
+    cd $PARAMS_DIR
 done
diff --git a/bin/trio_whole_exome_config_illumina_twist.sh b/bin/trio_whole_exome_config_illumina_twist.sh
new file mode 100644
index 0000000000000000000000000000000000000000..377c9a0ebfef473614da25fa96173d213ee1fd65
--- /dev/null
+++ b/bin/trio_whole_exome_config_illumina_twist.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/bash
+#
+# Basic configuration options for trio WES pipeline
+#
+
+# primary locations
+BASE=/home/u035/u035/shared
+SCRIPTS=$BASE/scripts/bin
+DOWNLOAD_DIR=$BASE/data
+OUTPUT_DIR=$BASE/results
+
+# resource locations
+BCBIO_TEMPLATE=$BASE/scripts/assets/trio_whole_exome_bcbio_template.yaml
+TARGET=$BASE/resources/exome_targets/hg38_Twist_ILMN_Exome_2.0_Plus_Panel_annotated_June2022.plus15bp.bed
+REFERENCE_GENOME=$BASE/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa
+
+# temporary working files
+PARAMS_DIR=$BASE/analysis/params
+READS_DIR=$BASE/analysis/reads
+CONFIG_DIR=$BASE/analysis/config
+WORK_DIR=$BASE/analysis/work
+LOGS_DIR=$BASE/analysis/logs
+
+export PATH=$BASE/software/bcbio/tools/bin:$PATH
diff --git a/docs/SOP_reanalysis_preparation.md b/docs/SOP_reanalysis_preparation.md
index fb425255595d4992eb8f5ca8e2e30778eb100b61..283938dd152298f356aba2f6ea26b22efa9338dd 100644
--- a/docs/SOP_reanalysis_preparation.md
+++ b/docs/SOP_reanalysis_preparation.md
@@ -200,9 +200,9 @@ cd ..
 
 8. Copy DECIPHER upload files from previous analyses. If multiple files are possible, select in preference order from the most recent re-analysis (if available), or from the most recent version otherwise.
 
-a. <INDI>_<FAM>_DECIPHER_v11.xlsx (the best)
-b. <INDI>_<FAM>_DECIPHER_v10.xlsx (if we do not have v11 version)
-c. <INDI>_<FAM>_DEC_FLT.csv (least preferred, if we do not have a xlsx version -> this was the case at the beginning of the NHS Trio WES service)
+a. `<INDI>_<FAM>_DECIPHER_v11.xlsx` (the best)
+b. `<INDI>_<FAM>_DECIPHER_v10.xlsx` (if we do not have v11 version)
+c. `<INDI>_<FAM>_DEC_FLT.csv` (least preferred, if we do not have a xlsx version -> this was the case at the beginning of the NHS Trio WES service)
 
 Create a list of terms to exclude from path matches in `previous_DECIPHER_upload/exclude.txt`, e.g.