diff --git a/gather_quad_results.sh b/gather_quad_results.sh
index 0cb3af4415bc2d57701cbbe6e9302fda3946d025..c20a8b6b58929b0a9210a5a317079c3e379c6055 100755
--- a/gather_quad_results.sh
+++ b/gather_quad_results.sh
@@ -79,6 +79,8 @@ cp -r ${WORK_DIR}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}_shared ${IGV_SNAP_DIR}
 # copy proband coverage files
 cp ${WORK_DIR}/COV/${KID_1_ID}_${FAMILY_ID}.DD15.COV.txt ${FAM_DIR}
 cp ${WORK_DIR}/COV/${KID_2_ID}_${FAMILY_ID}.DD15.COV.txt ${FAM_DIR}
+cp ${WORK_DIR}/COV/${KID_1_ID}_${FAMILY_ID}.REC_SNP_COV.txt ${FAM_DIR}
+cp ${WORK_DIR}/COV/${KID_2_ID}_${FAMILY_ID}.REC_SNP_COV.txt ${FAM_DIR}
 
 
 
diff --git a/gather_shared_results.sh b/gather_shared_results.sh
index f7a05d9542dfcee7fc3de871bd1eb8071bd1a386..084b66bfb9febd6a2a103bc57ffa1e1a0fba4a01 100755
--- a/gather_shared_results.sh
+++ b/gather_shared_results.sh
@@ -59,6 +59,7 @@ cp ${WORK_DIR}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}/*.png ${FAM_DIR}
 
 # copy the coverage files
 cp ${WORK_DIR}/COV/*_${FAMILY_ID}.DD15.COV.txt ${FAM_DIR}
+cp ${WORK_DIR}/COV/*_${FAMILY_ID}.REC_SNP_COV.txt ${FAM_DIR}
 
 
 echo ""
diff --git a/gather_trio_results.sh b/gather_trio_results.sh
index 13535fe2af0acd3bb58ae61b1ab95e0282e3ed09..7325bffbcb4d76771d147504145f2a6fdf30adbe 100755
--- a/gather_trio_results.sh
+++ b/gather_trio_results.sh
@@ -80,9 +80,9 @@ cp ${WORK_DIR}/DECIPHER/${PROBAND_ID}_${FAMILY_ID}_DECIPHER_v10.xlsx ${FAM_DIR}
 cp ${WORK_DIR}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}/*.png ${FAM_DIR}
 
 
-# copy proband coverage file
+# copy proband coverage files
 cp ${WORK_DIR}/COV/${PROBAND_ID}_${FAMILY_ID}.DD15.COV.txt ${FAM_DIR}
-
+cp ${WORK_DIR}/COV/${PROBAND_ID}_${FAMILY_ID}.REC_SNP_COV.txt ${FAM_DIR}
 
 echo "OK: Results for ${FAMILY_ID} are stored in ${FAM_DIR}"
 
diff --git a/generate_DEC_IGV_aff_sib_scripts_from_quad.py b/generate_DEC_IGV_aff_sib_scripts_from_quad.py
index e5c889dd0adc21cbed5a82e48169c3a428f81bdb..29bfaa22a5ddaf9e6a049a9b6793d62945c0894d 100755
--- a/generate_DEC_IGV_aff_sib_scripts_from_quad.py
+++ b/generate_DEC_IGV_aff_sib_scripts_from_quad.py
@@ -12,7 +12,7 @@
 #		all G2P variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: DEC 07, 2021
+#       last modified: JAN 21, 2022
 
 
 
@@ -303,7 +303,8 @@ def read_G2P(in_file):
 
     global NUM_SHARED_G2P_VARS
 
-    known_OBS_states = ['monoallelic','biallelic','hemizygous','x-linked dominant','x-linked over-dominance']
+#.#    known_OBS_states = ['monoallelic','biallelic','hemizygous','x-linked dominant','x-linked over-dominance']
+    known_OBS_states = ['monoallelic_autosomal','biallelic_autosomal','monoallelic_X_hem','monoallelic_X_het']
 
     # to make sure no duplicate vars per indi
     CHECK_DICT =  defaultdict(dict)       # 1st level key: indi_fam_id:chr:start:end:ref:alt; 2nd level key: OBS_state; value: irrelevant
diff --git a/generate_DEC_IGV_scripts.py b/generate_DEC_IGV_scripts.py
index 3c3ced5a886a19895491486d6a2f2085b055d588..81892871a58e221e5c1928e1d8d73028bff0393b 100755
--- a/generate_DEC_IGV_scripts.py
+++ b/generate_DEC_IGV_scripts.py
@@ -15,7 +15,7 @@
 #		all VASE denovo variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: DEC 03, 2021
+#       last modified: JAN 18, 2022
 
 
 
@@ -650,8 +650,8 @@ def read_G2P(in_file):
 
     global NUM_UNIQ_G2P_VARS
 
-    known_OBS_states = ['monoallelic','biallelic','hemizygous','x-linked dominant','x-linked over-dominance']
-
+#.#    known_OBS_states = ['monoallelic','biallelic','hemizygous','x-linked dominant','x-linked over-dominance']
+    known_OBS_states = ['monoallelic_autosomal','biallelic_autosomal','monoallelic_X_hem','monoallelic_X_het']
 
     # first, read the G2P variants on canonical transcripts for each of the family members
     CHILD_DICT = defaultdict(dict)	# 1st level key: OBS state; 2nd level key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
@@ -757,113 +757,102 @@ def read_G2P(in_file):
     ### print out the number of unique G2P variants in CHILD ###
     child_mono = 0
     child_bi = 0
-    child_hemi = 0
-    child_xld = 0
-    child_xlod = 0
-
-    if 'monoallelic' in CHILD_DICT:
-        child_mono = len(CHILD_DICT['monoallelic'])
-    if 'biallelic' in CHILD_DICT:
-        child_bi = len(CHILD_DICT['biallelic'])
-    if 'hemizygous' in CHILD_DICT:
-        child_hemi = len(CHILD_DICT['hemizygous'])
-    if 'x-linked dominant' in CHILD_DICT:
-        child_xld = len(CHILD_DICT['x-linked dominant'])
-    if 'x-linked over-dominance' in CHILD_DICT:
-        child_xlod = len(CHILD_DICT['x-linked over-dominance'])
+    child_hem = 0
+    child_het = 0
+
+    if 'monoallelic_autosomal' in CHILD_DICT:
+        child_mono = len(CHILD_DICT['monoallelic_autosomal'])
+    if 'biallelic_autosomal' in CHILD_DICT:
+        child_bi = len(CHILD_DICT['biallelic_autosomal'])
+    if 'monoallelic_X_hem' in CHILD_DICT:
+        child_hem = len(CHILD_DICT['monoallelic_X_hem'])
+    if 'monoallelic_X_het' in CHILD_DICT:
+        child_het = len(CHILD_DICT['monoallelic_X_het'])
 
     print "CHILD (%s): number of unique G2P variants on canon transcript in the following OBS states" % (CHILD_ID)
-    print "    monoallelic: %s" % (child_mono)
-    print "    biallelic: %s" % (child_bi)
-    print "    hemizygous: %s" % (child_hemi)
-    print "    x-linked dominant: %s" % (child_xld)
-    print "    x-linked over-dominance: %s" % (child_xlod)
+    print "    monoallelic_autosomal: %s" % (child_mono)
+    print "    biallelic_autosomal: %s" % (child_bi)
+    print "    monoallelic_X_hem: %s" % (child_hem)
+    print "    monoallelic_X_het: %s" % (child_het)
+
 
 
 
     ### print out the number of unique G2P variants in MOM ###
     mom_mono = 0
     mom_bi = 0
-    mom_hemi = 0
-    mom_xld = 0
-    mom_xlod = 0
-
-    if 'monoallelic' in MOM_DICT:
-        mom_mono = len(MOM_DICT['monoallelic'])
-    if 'biallelic' in MOM_DICT:
-        mom_bi = len(MOM_DICT['biallelic'])
-    if 'hemizygous' in MOM_DICT:
-        mom_hemi = len(MOM_DICT['hemizygous'])
-    if 'x-linked dominant' in MOM_DICT:
-        mom_xld = len(MOM_DICT['x-linked dominant'])
-    if 'x-linked over-dominance' in MOM_DICT:
-        mom_xlod = len(MOM_DICT['x-linked over-dominance'])
+    mom_hem = 0
+    mom_het = 0
+
+    if 'monoallelic_autosomal' in MOM_DICT:
+        mom_mono = len(MOM_DICT['monoallelic_autosomal'])
+    if 'biallelic_autosomal' in MOM_DICT:
+        mom_bi = len(MOM_DICT['biallelic_autosomal'])
+    if 'monoallelic_X_hem' in MOM_DICT:
+        mom_hem = len(MOM_DICT['monoallelic_X_hem'])
+    if 'monoallelic_X_het' in MOM_DICT:
+        mom_het = len(MOM_DICT['monoallelic_X_het'])
 
     print "MOM (%s): number of unique G2P variants on canon transcript in the following OBS states" % (MOM_ID)
-    print "    monoallelic: %s" % (mom_mono)
-    print "    biallelic: %s" % (mom_bi)
-    print "    hemizygous: %s" % (mom_hemi)
-    print "    x-linked dominant: %s" % (mom_xld)
-    print "    x-linked over-dominance: %s" % (mom_xlod)
+    print "    monoallelic_autosomal: %s" % (mom_mono)
+    print "    biallelic_autosomal: %s" % (mom_bi)
+    print "    monoallelic_X_hem: %s" % (mom_hem)
+    print "    monoallelic_X_het: %s" % (mom_het)
+
 
 
 
     ### print out the number of unique G2P variants in DAD ###
     dad_mono = 0
     dad_bi = 0
-    dad_hemi = 0
-    dad_xld = 0
-    dad_xlod = 0
-
-    if 'monoallelic' in DAD_DICT:
-        dad_mono = len(DAD_DICT['monoallelic'])
-    if 'biallelic' in DAD_DICT:
-        dad_bi = len(DAD_DICT['biallelic'])
-    if 'hemizygous' in DAD_DICT:
-        dad_hemi = len(DAD_DICT['hemizygous'])
-    if 'x-linked dominant' in DAD_DICT:
-        dad_xld = len(DAD_DICT['x-linked dominant'])
-    if 'x-linked over-dominance' in DAD_DICT:
-        dad_xlod = len(DAD_DICT['x-linked over-dominance'])
+    dad_hem = 0
+    dad_het = 0
+
+    if 'monoallelic_autosomal' in DAD_DICT:
+        dad_mono = len(DAD_DICT['monoallelic_autosomal'])
+    if 'biallelic_autosomal' in DAD_DICT:
+        dad_bi = len(DAD_DICT['biallelic_autosomal'])
+    if 'monoallelic_X_hem' in DAD_DICT:
+        dad_hem = len(DAD_DICT['monoallelic_X_hem'])
+    if 'monoallelic_X_het' in DAD_DICT:
+        dad_het = len(DAD_DICT['monoallelic_X_het'])
 
     print "DAD (%s): number of unique G2P variants on canon transcript in the following OBS states" % (DAD_ID)
-    print "    monoallelic: %s" % (dad_mono)
-    print "    biallelic: %s" % (dad_bi)
-    print "    hemizygous: %s" % (dad_hemi)
-    print "    x-linked dominant: %s" % (dad_xld)
-    print "    x-linked over-dominance: %s" % (dad_xlod)
+    print "    monoallelic_autosomal: %s" % (dad_mono)
+    print "    biallelic_autosomal: %s" % (dad_bi)
+    print "    monoallelic_X_hem: %s" % (dad_hem)
+    print "    monoallelic_X_hem: %s" % (dad_het)
     sys.stdout.flush()
 
 
 
 
 
-
-    ###############################################################################################
-    ####    DOM filtering                                                                      ####
-    ####    if the gene has been considered under the dominant model (OBS == monoallelic)      ####
-    ####    exclude child variants seen in UNAFFECTED mother/father, regardless of GT          ####
-    ###############################################################################################
+    ######################################################################################################
+    ####    Dominant filtering                                                                        ####
+    ####    if the gene has been considered under the dominant model (OBS == monoallelic_autosomal)   ####
+    ####    exclude child variants seen in UNAFFECTED mother/father, regardless of GT                 ####
+    ######################################################################################################
 
 
     print ""
-    print "===   DOM filtering   ==="
+    print "===   monoallelic autosomal (DOMINANT) filtering   ==="
 
 
-    for key in CHILD_DICT['monoallelic']:	# this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
+    for key in CHILD_DICT['monoallelic_autosomal']:	# this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
 
-        CHILD_GT = CHILD_DICT['monoallelic'][key][0]
-        CHILD_GENE = CHILD_DICT['monoallelic'][key][1]
-        CHILD_TRANS = CHILD_DICT['monoallelic'][key][2]
+        CHILD_GT = CHILD_DICT['monoallelic_autosomal'][key][0]
+        CHILD_GENE = CHILD_DICT['monoallelic_autosomal'][key][1]
+        CHILD_TRANS = CHILD_DICT['monoallelic_autosomal'][key][2]
 
-        if (key in MOM_DICT['monoallelic']) and (MOM_STAT == "UNAFFECTED"):
-            MOM_GT = MOM_DICT['monoallelic'][key][0]
-            print "***[DOM model]*** Excluded CHILD var %s CHILD_GT = %s, MOM_GT = %s, MOM_STAT = %s" % (key,CHILD_GT,MOM_GT,MOM_STAT)
+        if (key in MOM_DICT['monoallelic_autosomal']) and (MOM_STAT == "UNAFFECTED"):
+            MOM_GT = MOM_DICT['monoallelic_autosomal'][key][0]
+            print "***[DOMINANT model]*** Excluded CHILD var %s in gene = %s, CHILD_GT = %s, MOM_GT = %s, MOM_STAT = %s" % (key,CHILD_GENE,CHILD_GT,MOM_GT,MOM_STAT)
             continue
 
-        if (key in DAD_DICT['monoallelic']) and (DAD_STAT == "UNAFFECTED"):
-            DAD_GT = DAD_DICT['monoallelic'][key][0]
-            print "***[DOM model]*** Excluded CHILD var %s CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GT,DAD_GT,DAD_STAT)
+        if (key in DAD_DICT['monoallelic_autosomal']) and (DAD_STAT == "UNAFFECTED"):
+            DAD_GT = DAD_DICT['monoallelic_autosomal'][key][0]
+            print "***[DOMINANT model]*** Excluded CHILD var %s in gene = %s, CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GENE,CHILD_GT,DAD_GT,DAD_STAT)
             continue
 
 
@@ -915,196 +904,9 @@ def read_G2P(in_file):
 
 
 
-
-
-
-
-
-
-
-    ####################################################################################################################
-    ####    X-linked filtering                                                                                      ####
-    ####    under the x-linked model (OBS == hemizygous or x-linked dominant, but NOT x-linked over-dominance)      ####
-    ####    exclude child HET variants if seen as HOM in UNAFFECTED father                                          ####
-    ####################################################################################################################
-
-
-    print ""
-    print "===   X-linked filtering   ==="
-
-    ################################
-    ### process hemizygous genes ###
-    ################################
-
-    for key in CHILD_DICT['hemizygous']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
-
-        CHILD_GT = CHILD_DICT['hemizygous'][key][0]
-        CHILD_GENE = CHILD_DICT['hemizygous'][key][1]
-        CHILD_TRANS = CHILD_DICT['hemizygous'][key][2]
-
-        if CHILD_GT == 'HOM':							# do NOT filter HOM variants in proband (i.e., hemizygous in boy or HOM in girl)
-            pass
-        else:
-            if (key in DAD_DICT['hemizygous']) and (DAD_STAT == "UNAFFECTED"):
-                DAD_GT = DAD_DICT['hemizygous'][key][0]
-                if DAD_GT == 'HOM':						# i.e., hemizygous variant in unaffected father
-                    print "***[X-linked model (hemizygous)]*** Excluded CHILD var %s CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GT,DAD_GT,DAD_STAT)
-                    continue
-
-        # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
-        chr,start,end,ref,alt = key.split(":")
-        if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
-            if len(ref) < len(alt):                                 # an INS
-                orig_start = start
-                orig_ref = ref
-                orig_alt = alt
-                start = orig_start
-                ref = '-'
-                alt = orig_alt[len(orig_ref):]
-                print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
-            else:                                                   # a DEL
-                print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
-                print line
-                raise SystemExit
-
-        new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
-
-        # record the data for CHILD G2P variants (for OBS=hemizygous)
-        if new_key not in G2P_DICT:
-            G2P_DICT[new_key] = 0
-        else:
-            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-            # raise SystemExit
-            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-            pass
-
-        # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
-        if new_key not in G2P_DATA:
-            G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
-        else:
-            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-            # raise SystemExit
-            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-            pass
-
-
-
-    #######################################
-    ### process x-linked dominant genes ###
-    #######################################
-
-    for key in CHILD_DICT['x-linked dominant']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
-
-        CHILD_GT = CHILD_DICT['x-linked dominant'][key][0]
-        CHILD_GENE = CHILD_DICT['x-linked dominant'][key][1]
-        CHILD_TRANS = CHILD_DICT['x-linked dominant'][key][2]
-
-        if CHILD_GT == 'HOM':                                                   # do NOT filter HOM variants (i.e., hemizygous in boy or HOM in girl)
-            pass
-        else:
-            if (key in DAD_DICT['x-linked dominant']) and (DAD_STAT == "UNAFFECTED"):
-                DAD_GT = DAD_DICT['x-linked dominant'][key][0]
-                if DAD_GT == 'HOM':                                             # i.e., x-linked dominant variant in unnafected father
-                    print "***[X-linked model (x-linked dominant)]*** Excluded CHILD var %s CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GT,DAD_GT,DAD_STAT)
-                    continue
-
-        # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
-        chr,start,end,ref,alt = key.split(":")
-        if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
-            if len(ref) < len(alt):                                 # an INS
-                orig_start = start
-                orig_ref = ref
-                orig_alt = alt
-                start = orig_start
-                ref = '-'
-                alt = orig_alt[len(orig_ref):]
-                print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
-            else:                                                   # a DEL
-                print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
-                print line
-                raise SystemExit
-
-        new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
-
-        # record the data for CHILD G2P variants (for OBS=x-linked dominant)
-        if new_key not in G2P_DICT:
-            G2P_DICT[new_key] = 0
-        else:
-            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-            # raise SystemExit
-            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-            pass
-
-        # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
-        if new_key not in G2P_DATA:
-            G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
-        else:
-            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-            # raise SystemExit
-            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-            pass
-
-
-    ########################################################################
-    ### process x-linked over-dominance  genes - no filtering to be done ###
-    ########################################################################
-
-    for key in CHILD_DICT['x-linked over-dominance']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
-
-        CHILD_GT = CHILD_DICT['x-linked over-dominance'][key][0]
-        CHILD_GENE = CHILD_DICT['x-linked over-dominance'][key][1]
-        CHILD_TRANS = CHILD_DICT['x-linked over-dominance'][key][2]
-
-        # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
-        chr,start,end,ref,alt = key.split(":")
-        if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
-            if len(ref) < len(alt):                                 # an INS
-                orig_start = start
-                orig_ref = ref
-                orig_alt = alt
-                start = orig_start
-                ref = '-'
-                alt = orig_alt[len(orig_ref):]
-                print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
-            else:                                                   # a DEL
-                print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
-                print line
-                raise SystemExit
-
-        new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
-
-        # record the data for CHILD G2P variants (for OBS=x-linked over-dominance)
-        if new_key not in G2P_DICT:
-            G2P_DICT[new_key] = 0
-        else:
-            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-            # raise SystemExit
-            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-            pass
-
-        # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
-        if new_key not in G2P_DATA:
-            G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
-        else:
-            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-            # raise SystemExit
-            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-            pass
-
-
-    NUM_UNIQ_G2P_VARS = len(G2P_DICT)
-    print "Found %s unique G2P variants in CHILD (%s) after considering MONOALLELIC and X-LINKED genes" % (NUM_UNIQ_G2P_VARS,CHILD_ID)
-    sys.stdout.flush()
-
-    print ""
-
-
-
-
-
     ##############################################################################################################
-    ####    BIALLELIC filtering                                                                               ####
-    ####    under the biallelic model (OBS == biallelic) - consider ALL variants per gene                     ####
+    ####    Recessive filtering                                                                               ####
+    ####    under the recessive model (OBS == biallelic_autosomal) - consider ALL variants per gene           ####
     ####    must all be HET in CHILD, GT in parent does not matter                                            ####
     ####    all of them must *clearly* come from only one of the parents (maternally/paternally + biparental) ####
     ####    and this parent must be unaffected                                                                ####
@@ -1113,16 +915,16 @@ def read_G2P(in_file):
 
 
     print ""
-    print "===   BIALLELIC filtering   ==="
+    print "===   biallelic autosomal (RECESSIVE) filtering   ==="
 
 
     GENE_KEY_GT = defaultdict(dict)		# for child - 1st level key: gene_name; 2nd level key: chr:start:end:ref:alt; value: (GT,trans)
 
     # process all variants in biallelic genes in child
-    for key in CHILD_DICT['biallelic']:		# this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
-        b_GT = CHILD_DICT['biallelic'][key][0]
-        b_gene = CHILD_DICT['biallelic'][key][1]
-        b_trans = CHILD_DICT['biallelic'][key][2]
+    for key in CHILD_DICT['biallelic_autosomal']:		# this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
+        b_GT = CHILD_DICT['biallelic_autosomal'][key][0]
+        b_gene = CHILD_DICT['biallelic_autosomal'][key][1]
+        b_trans = CHILD_DICT['biallelic_autosomal'][key][2]
         GENE_KEY_GT[b_gene][key] = (b_GT,b_trans)
 
     # iterate over genes in GENE_KEY_GT
@@ -1131,7 +933,6 @@ def read_G2P(in_file):
 
         # iterate over variants in this gene
         for kx in GENE_KEY_GT[g]:		# this the second key: chr:start:end:ref:alt
-###            if GENE_KEY_GT[g][kx] == 'HOM':	# there is a HOM variant in the child - NO filtering
             if GENE_KEY_GT[g][kx][0] == 'HOM':     # there is a HOM variant in the child - NO filtering
                 all_HET = False
                 break
@@ -1150,9 +951,9 @@ def read_G2P(in_file):
 
                 this_var_status = 'NONE'
 
-                if ((ky in MOM_DICT['biallelic']) or (ky in MOM_DICT['monoallelic'])) and (MOM_STAT == "UNAFFECTED"):
+                if ((ky in MOM_DICT['biallelic_autosomal']) or (ky in MOM_DICT['monoallelic_autosomal'])) and (MOM_STAT == "UNAFFECTED"):
                     this_var_status = 'MOM'
-                if ((ky in DAD_DICT['biallelic']) or (ky in DAD_DICT['monoallelic'])) and (DAD_STAT == "UNAFFECTED"):
+                if ((ky in DAD_DICT['biallelic_autosomal']) or (ky in DAD_DICT['monoallelic_autosomal'])) and (DAD_STAT == "UNAFFECTED"):
                     if this_var_status == 'NONE':
                         this_var_status = 'DAD'
                     elif this_var_status == 'MOM':
@@ -1187,7 +988,7 @@ def read_G2P(in_file):
             # if all variants in the child in this gene are found in single unaffected parent - filter out
             if all_from_one_parent:
                 for kz in GENE_KEY_GT[g]:
-                    print "***[Biallelic model]*** Excluded CHILD HET var %s in gene = %s, found in = %s, PARENT_STAT = UNAFFECTED" % (kz,g,VAR_SOURCE_LIST[kz])
+                    print "***[RECESSIVE model]*** Excluded CHILD HET var %s in gene = %s, found in = %s, PARENT_STAT = UNAFFECTED" % (kz,g,VAR_SOURCE_LIST[kz])
                 continue
 
         # end processing all HET variants in the proband - if all from single unaffected parent they have been excluded, message to the log written
@@ -1202,9 +1003,9 @@ def read_G2P(in_file):
         # iterate over variants in this gene
         for kkk in GENE_KEY_GT[g]:                # this the second key: chr:start:end:ref:alt
 
-            CHILD_GT = CHILD_DICT['biallelic'][kkk][0]
-            CHILD_GENE = CHILD_DICT['biallelic'][kkk][1]
-            CHILD_TRANS = CHILD_DICT['biallelic'][kkk][2]
+            CHILD_GT = CHILD_DICT['biallelic_autosomal'][kkk][0]
+            CHILD_GENE = CHILD_DICT['biallelic_autosomal'][kkk][1]
+            CHILD_TRANS = CHILD_DICT['biallelic_autosomal'][kkk][2]
 
             # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
             chr,start,end,ref,alt = kkk.split(":")
@@ -1243,8 +1044,203 @@ def read_G2P(in_file):
                 pass
 
     NUM_UNIQ_G2P_VARS = len(G2P_DICT)
-    print "Found %s unique G2P variants in CHILD (%s) after considering MONOALLELIC, X-LINKED and BIALLELIC genes" % (NUM_UNIQ_G2P_VARS,CHILD_ID)
+    print "Found %s unique G2P variants in CHILD (%s) after considering MONOALLELIC and BIALLELIC genes" % (NUM_UNIQ_G2P_VARS,CHILD_ID)
+    sys.stdout.flush()
+    print ""
+
+
+
+
+
+
+
+
+
+    ####################################################################################################################
+    ####    X-linked filtering                                                                                      ####
+#.#    ####    under the x-linked model (OBS == hemizygous or x-linked dominant, but NOT x-linked over-dominance)      ####
+    ####    under the chrX model (OBS == monoallelic_X_hem or monoallelic_X_het)                                    ####
+    ####    exclude child HET variants if seen as HOM in UNAFFECTED father                                          ####
+    ####													    ####
+    ####    Note 18/01/2022    									    		    ####
+    ####    This is a temporary solution, since x-linked dominant and x-linked over-dominance -> monoallelic_X_het  ####
+    ####    and we should filter x-linked dominant and monoallelic_X_hem, but not x-linked over-dominance           ####
+    ####    the code below treats x-linked over-dominance as the others (i.e. filters, while it should not)         ####
+    ####    Issue flagged to G2P plug-in team, awaiting their fix						    ####
+    ####    for now manually scan the output of G2P for the proband (both for boys and girls)                       ####
+    ####        to check if any variant has been called in PCDH19 and EFNB1                                         ####
+    ####    also for all the variants filtered out from monoallelic_X_het we will print in the log the gene name    ####
+    ####################################################################################################################
+
+
+    print ""
+    print "===   X-linked filtering   ==="
+
+    #######################################
+    ### process monoallelic_X_hem genes ###
+    #######################################
+
+    for key in CHILD_DICT['monoallelic_X_hem']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
+
+        CHILD_GT = CHILD_DICT['monoallelic_X_hem'][key][0]
+        CHILD_GENE = CHILD_DICT['monoallelic_X_hem'][key][1]
+        CHILD_TRANS = CHILD_DICT['monoallelic_X_hem'][key][2]
+
+        if CHILD_GT == 'HOM':							# do NOT filter HOM variants in proband (i.e., hemizygous in boy or HOM in girl)
+            pass
+        else:
+            if (key in DAD_DICT['monoallelic_X_hem']) and (DAD_STAT == "UNAFFECTED"):
+                DAD_GT = DAD_DICT['monoallelic_X_hem'][key][0]
+                if DAD_GT == 'HOM':						# i.e., hemizygous variant in unaffected father
+                    print "***[monoallelic_X_hem]*** Excluded CHILD var %s in gene = %s, CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GENE,CHILD_GT,DAD_GT,DAD_STAT)
+                    continue
+
+        # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
+        chr,start,end,ref,alt = key.split(":")
+        if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
+            if len(ref) < len(alt):                                 # an INS
+                orig_start = start
+                orig_ref = ref
+                orig_alt = alt
+                start = orig_start
+                ref = '-'
+                alt = orig_alt[len(orig_ref):]
+                print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
+            else:                                                   # a DEL
+                print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
+                print line
+                raise SystemExit
+
+        new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
+
+        # record the data for CHILD G2P variants (for OBS=monoallelic_X_hem)
+        if new_key not in G2P_DICT:
+            G2P_DICT[new_key] = 0
+        else:
+            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+            # raise SystemExit
+            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+            pass
+
+        # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
+        if new_key not in G2P_DATA:
+            G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
+        else:
+            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+            # raise SystemExit
+            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+            pass
+
+
+
+    #######################################
+    ### process monoallelic_X_het genes ###
+    #######################################
+
+    for key in CHILD_DICT['monoallelic_X_het']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
+
+        CHILD_GT = CHILD_DICT['monoallelic_X_het'][key][0]
+        CHILD_GENE = CHILD_DICT['monoallelic_X_het'][key][1]
+        CHILD_TRANS = CHILD_DICT['monoallelic_X_het'][key][2]
+
+        if CHILD_GT == 'HOM':                                                   # do NOT filter HOM variants (i.e., hemizygous in boy or HOM in girl)
+            pass
+        else:
+            if (key in DAD_DICT['monoallelic_X_het']) and (DAD_STAT == "UNAFFECTED"):
+                DAD_GT = DAD_DICT['monoallelic_X_het'][key][0]
+                if DAD_GT == 'HOM':                                             # i.e., x-linked dominant variant in unnafected father
+                    print "***[monoallelic_X_het]*** Excluded CHILD var %s in gene = %s, CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GENE,CHILD_GT,DAD_GT,DAD_STAT)
+                    continue
+
+        # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
+        chr,start,end,ref,alt = key.split(":")
+        if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
+            if len(ref) < len(alt):                                 # an INS
+                orig_start = start
+                orig_ref = ref
+                orig_alt = alt
+                start = orig_start
+                ref = '-'
+                alt = orig_alt[len(orig_ref):]
+                print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
+            else:                                                   # a DEL
+                print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
+                print line
+                raise SystemExit
+
+        new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
+
+        # record the data for CHILD G2P variants (for OBS=monoallelic_X_het)
+        if new_key not in G2P_DICT:
+            G2P_DICT[new_key] = 0
+        else:
+            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+            # raise SystemExit
+            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+            pass
+
+        # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
+        if new_key not in G2P_DATA:
+            G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
+        else:
+            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+            # raise SystemExit
+            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+            pass
+
+
+
+#.#    ########################################################################
+#.#    ### process x-linked over-dominance  genes - no filtering to be done ###
+#.#    ########################################################################
+
+#.#    for key in CHILD_DICT['x-linked over-dominance']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
+
+#.#        CHILD_GT = CHILD_DICT['x-linked over-dominance'][key][0]
+#.#        CHILD_GENE = CHILD_DICT['x-linked over-dominance'][key][1]
+#.#        CHILD_TRANS = CHILD_DICT['x-linked over-dominance'][key][2]
+
+#.#        # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
+#.#        chr,start,end,ref,alt = key.split(":")
+#.#        if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
+#.#            if len(ref) < len(alt):                                 # an INS
+#.#                orig_start = start
+#.#                orig_ref = ref
+#.#                orig_alt = alt
+#.#                start = orig_start
+#.#                ref = '-'
+#.#                alt = orig_alt[len(orig_ref):]
+#.#                print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
+#.#            else:                                                   # a DEL
+#.#                print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
+#.#                print line
+#.#                raise SystemExit
+
+#.#        new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
+
+#.#        # record the data for CHILD G2P variants (for OBS=x-linked over-dominance)
+#.#        if new_key not in G2P_DICT:
+#.#            G2P_DICT[new_key] = 0
+#.#        else:
+#.#            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+#.#            # raise SystemExit
+#.#            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+#.#            pass
+
+#.#        # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
+#.#        if new_key not in G2P_DATA:
+#.#            G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
+#.#        else:
+#.#            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+#.#            # raise SystemExit
+#.#            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+#.#            pass
+
+
+    NUM_UNIQ_G2P_VARS = len(G2P_DICT)
+    print "Found %s unique G2P variants in CHILD (%s) after considering MONOALLELIC, BIALLELIC and X-LINKED genes" % (NUM_UNIQ_G2P_VARS,CHILD_ID)
     sys.stdout.flush()
+
     print ""
     print ""
 
@@ -1260,6 +1256,10 @@ def read_G2P(in_file):
 
 
 
+
+
+
+
 def read_ped(in_file):
 
     global CHILD_ID
diff --git a/generate_DEC_IGV_shared_scripts.py b/generate_DEC_IGV_shared_scripts.py
index 78421450dfd382f6077d67827b57b3d0ff227213..687a3138f1685d9706e56a0c8729a80bf11310d3 100755
--- a/generate_DEC_IGV_shared_scripts.py
+++ b/generate_DEC_IGV_shared_scripts.py
@@ -12,7 +12,7 @@
 #		all G2P variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: DEC 07, 2021
+#       last modified: JAN 20, 2022
 
 
 
@@ -299,7 +299,8 @@ def read_G2P(in_file):
 
     global NUM_SHARED_G2P_VARS
 
-    known_OBS_states = ['monoallelic','biallelic','hemizygous','x-linked dominant','x-linked over-dominance']
+#.#    known_OBS_states = ['monoallelic','biallelic','hemizygous','x-linked dominant','x-linked over-dominance']
+    known_OBS_states = ['monoallelic_autosomal','biallelic_autosomal','monoallelic_X_hem','monoallelic_X_het']
 
     # to make sure no duplicate vars per indi
     CHECK_DICT =  defaultdict(dict)       # 1st level key: indi_fam_id:chr:start:end:ref:alt; 2nd level key: OBS_state; value: irrelevant
diff --git a/generate_DEC_IGV_trio_scripts_from_quad.py b/generate_DEC_IGV_trio_scripts_from_quad.py
index 3afdd794f3e4a95a9abc32baa77d6a22f83133c0..408a1240d6af518a960abef44f9e80227445f128 100755
--- a/generate_DEC_IGV_trio_scripts_from_quad.py
+++ b/generate_DEC_IGV_trio_scripts_from_quad.py
@@ -15,7 +15,7 @@
 #		all VASE denovo variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: DEC 07, 2021
+#       last modified: JAN 21, 2022
 
 
 
@@ -686,7 +686,8 @@ def read_G2P(in_file):
 
     global NUM_UNIQ_G2P_VARS
 
-    known_OBS_states = ['monoallelic','biallelic','hemizygous','x-linked dominant','x-linked over-dominance']
+#.#    known_OBS_states = ['monoallelic','biallelic','hemizygous','x-linked dominant','x-linked over-dominance']
+    known_OBS_states = ['monoallelic_autosomal','biallelic_autosomal','monoallelic_X_hem','monoallelic_X_het']
 
 
     # first, read the G2P variants on canonical transcripts for each of the family members
@@ -795,81 +796,68 @@ def read_G2P(in_file):
     ### print out the number of unique G2P variants in CHILD ###
     child_mono = 0
     child_bi = 0
-    child_hemi = 0
-    child_xld = 0
-    child_xlod = 0
-
-    if 'monoallelic' in CHILD_DICT:
-        child_mono = len(CHILD_DICT['monoallelic'])
-    if 'biallelic' in CHILD_DICT:
-        child_bi = len(CHILD_DICT['biallelic'])
-    if 'hemizygous' in CHILD_DICT:
-        child_hemi = len(CHILD_DICT['hemizygous'])
-    if 'x-linked dominant' in CHILD_DICT:
-        child_xld = len(CHILD_DICT['x-linked dominant'])
-    if 'x-linked over-dominance' in CHILD_DICT:
-        child_xlod = len(CHILD_DICT['x-linked over-dominance'])
+    child_hem = 0
+    child_het = 0
+
+    if 'monoallelic_autosomal' in CHILD_DICT:
+        child_mono = len(CHILD_DICT['monoallelic_autosomal'])
+    if 'biallelic_autosomal' in CHILD_DICT:
+        child_bi = len(CHILD_DICT['biallelic_autosomal'])
+    if 'monoallelic_X_hem' in CHILD_DICT:
+        child_hem = len(CHILD_DICT['monoallelic_X_hem'])
+    if 'monoallelic_X_het' in CHILD_DICT:
+        child_het = len(CHILD_DICT['monoallelic_X_het'])
 
     print "CHILD (%s): number of unique G2P variants on canon transcript in the following OBS states" % (CHILD_ID)
-    print "    monoallelic: %s" % (child_mono)
-    print "    biallelic: %s" % (child_bi)
-    print "    hemizygous: %s" % (child_hemi)
-    print "    x-linked dominant: %s" % (child_xld)
-    print "    x-linked over-dominance: %s" % (child_xlod)
-
+    print "    monoallelic_autosomal: %s" % (child_mono)
+    print "    biallelic_autosomal: %s" % (child_bi)
+    print "    monoallelic_X_hem: %s" % (child_hem)
+    print "    monoallelic_X_het: %s" % (child_het)
 
 
     ### print out the number of unique G2P variants in MOM ###
     mom_mono = 0
     mom_bi = 0
-    mom_hemi = 0
-    mom_xld = 0
-    mom_xlod = 0
-
-    if 'monoallelic' in MOM_DICT:
-        mom_mono = len(MOM_DICT['monoallelic'])
-    if 'biallelic' in MOM_DICT:
-        mom_bi = len(MOM_DICT['biallelic'])
-    if 'hemizygous' in MOM_DICT:
-        mom_hemi = len(MOM_DICT['hemizygous'])
-    if 'x-linked dominant' in MOM_DICT:
-        mom_xld = len(MOM_DICT['x-linked dominant'])
-    if 'x-linked over-dominance' in MOM_DICT:
-        mom_xlod = len(MOM_DICT['x-linked over-dominance'])
+    mom_hem = 0
+    mom_het = 0
+
+    if 'monoallelic_autosomal' in MOM_DICT:
+        mom_mono = len(MOM_DICT['monoallelic_autosomal'])
+    if 'biallelic_autosomal' in MOM_DICT:
+        mom_bi = len(MOM_DICT['biallelic_autosomal'])
+    if 'monoallelic_X_hem' in MOM_DICT:
+        mom_hem = len(MOM_DICT['monoallelic_X_hem'])
+    if 'monoallelic_X_het' in MOM_DICT:
+        mom_het = len(MOM_DICT['monoallelic_X_het'])
 
     print "MOM (%s): number of unique G2P variants on canon transcript in the following OBS states" % (MOM_ID)
-    print "    monoallelic: %s" % (mom_mono)
-    print "    biallelic: %s" % (mom_bi)
-    print "    hemizygous: %s" % (mom_hemi)
-    print "    x-linked dominant: %s" % (mom_xld)
-    print "    x-linked over-dominance: %s" % (mom_xlod)
+    print "    monoallelic_autosomal: %s" % (mom_mono)
+    print "    biallelic_autosomal: %s" % (mom_bi)
+    print "    monoallelic_X_hem: %s" % (mom_hem)
+    print "    monoallelic_X_het: %s" % (mom_het)
 
 
 
     ### print out the number of unique G2P variants in DAD ###
     dad_mono = 0
     dad_bi = 0
-    dad_hemi = 0
-    dad_xld = 0
-    dad_xlod = 0
-
-    if 'monoallelic' in DAD_DICT:
-        dad_mono = len(DAD_DICT['monoallelic'])
-    if 'biallelic' in DAD_DICT:
-        dad_bi = len(DAD_DICT['biallelic'])
-    if 'hemizygous' in DAD_DICT:
-        dad_hemi = len(DAD_DICT['hemizygous'])
-    if 'x-linked dominant' in DAD_DICT:
-        dad_xld = len(DAD_DICT['x-linked dominant'])
-    if 'x-linked over-dominance' in DAD_DICT:
-        dad_xlod = len(DAD_DICT['x-linked over-dominance'])
+    dad_hem = 0
+    dad_het = 0
+
+    if 'monoallelic_autosomal' in DAD_DICT:
+        dad_mono = len(DAD_DICT['monoallelic_autosomal'])
+    if 'biallelic_autosomal' in DAD_DICT:
+        dad_bi = len(DAD_DICT['biallelic_autosomal'])
+    if 'monoallelic_X_hem' in DAD_DICT:
+        dad_hem = len(DAD_DICT['monoallelic_X_hem'])
+    if 'monoallelic_X_het' in DAD_DICT:
+        dad_het = len(DAD_DICT['monoallelic_X_het'])
 
     print "DAD (%s): number of unique G2P variants on canon transcript in the following OBS states" % (DAD_ID)
-    print "    monoallelic: %s" % (dad_mono)
-    print "    biallelic: %s" % (dad_bi)
-    print "    hemizygous: %s" % (dad_hemi)
-    print "    x-linked dominant: %s" % (dad_xld)
-    print "    x-linked over-dominance: %s" % (dad_xlod)
+    print "    monoallelic_autosomal: %s" % (dad_mono)
+    print "    biallelic_autosomal: %s" % (dad_bi)
+    print "    monoallelic_X_hem: %s" % (dad_hem)
+    print "    monoallelic_X_het: %s" % (dad_het)
     sys.stdout.flush()
 
 
@@ -877,31 +865,31 @@ def read_G2P(in_file):
 
 
 
-    ###############################################################################################
-    ####    DOM filtering                                                                      ####
-    ####    if the gene has been considered under the dominant model (OBS == monoallelic)      ####
-    ####    exclude child variants seen in UNAFFECTED mother/father, regardless of GT          ####
-    ###############################################################################################
+    ######################################################################################################
+    ####    Dominant filtering                                                                        ####
+    ####    if the gene has been considered under the dominant model (OBS == monoallelic_autosomal)   ####
+    ####    exclude child variants seen in UNAFFECTED mother/father, regardless of GT                 ####
+    ######################################################################################################
 
 
     print ""
-    print "===   DOM filtering   ==="
+    print "===   monoallelic autosomal (DOMINANT) filtering   ==="
 
 
-    for key in CHILD_DICT['monoallelic']:	# this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
+    for key in CHILD_DICT['monoallelic_autosomal']:	# this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
 
-        CHILD_GT = CHILD_DICT['monoallelic'][key][0]
-        CHILD_GENE = CHILD_DICT['monoallelic'][key][1]
-        CHILD_TRANS = CHILD_DICT['monoallelic'][key][2]
+        CHILD_GT = CHILD_DICT['monoallelic_autosomal'][key][0]
+        CHILD_GENE = CHILD_DICT['monoallelic_autosomal'][key][1]
+        CHILD_TRANS = CHILD_DICT['monoallelic_autosomal'][key][2]
 
-        if (key in MOM_DICT['monoallelic']) and (MOM_STAT == "UNAFFECTED"):
-            MOM_GT = MOM_DICT['monoallelic'][key][0]
-            print "***[DOM model]*** Excluded CHILD var %s CHILD_GT = %s, MOM_GT = %s, MOM_STAT = %s" % (key,CHILD_GT,MOM_GT,MOM_STAT)
+        if (key in MOM_DICT['monoallelic_autosomal']) and (MOM_STAT == "UNAFFECTED"):
+            MOM_GT = MOM_DICT['monoallelic_autosomal'][key][0]
+            print "***[DOMINANT model]*** Excluded CHILD var %s in gene = %s, CHILD_GT = %s, MOM_GT = %s, MOM_STAT = %s" % (key,CHILD_GENE,CHILD_GT,MOM_GT,MOM_STAT)
             continue
 
-        if (key in DAD_DICT['monoallelic']) and (DAD_STAT == "UNAFFECTED"):
-            DAD_GT = DAD_DICT['monoallelic'][key][0]
-            print "***[DOM model]*** Excluded CHILD var %s CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GT,DAD_GT,DAD_STAT)
+        if (key in DAD_DICT['monoallelic_autosomal']) and (DAD_STAT == "UNAFFECTED"):
+            DAD_GT = DAD_DICT['monoallelic_autosomal'][key][0]
+            print "***[DOMINANT model]*** Excluded CHILD var %s in gene = %s, CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GENE,CHILD_GT,DAD_GT,DAD_STAT)
             continue
 
 
@@ -952,9 +940,149 @@ def read_G2P(in_file):
 
 
 
+    ##############################################################################################################
+    ####    Recessive filtering                                                                               ####
+    ####    under the recessive model (OBS == biallelic_autosomal) - consider ALL variants per gene           ####
+    ####    must all be HET in CHILD, GT in parent does not matter                                            ####
+    ####    all of them must *clearly* come from only one of the parents (maternally/paternally + biparental) ####
+    ####    and this parent must be unaffected                                                                ####
+    ####    if all these: then exclude all child variants in this gene                                        ####
+    ##############################################################################################################
+
+    print ""
+    print "===   biallelic autosomal (RECESSIVE) filtering   ==="
+
+
+    GENE_KEY_GT = defaultdict(dict)             # for child - 1st level key: gene_name; 2nd level key: chr:start:end:ref:alt; value: (GT,trans)
+
+    # process all variants in biallelic genes in child
+    for key in CHILD_DICT['biallelic_autosomal']:               # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
+        b_GT = CHILD_DICT['biallelic_autosomal'][key][0]
+        b_gene = CHILD_DICT['biallelic_autosomal'][key][1]
+        b_trans = CHILD_DICT['biallelic_autosomal'][key][2]
+        GENE_KEY_GT[b_gene][key] = (b_GT,b_trans)
+
+    # iterate over genes in GENE_KEY_GT
+    for g in GENE_KEY_GT:                       # this is the biallelic gene name
+        all_HET = True
+
+        # iterate over variants in this gene
+        for kx in GENE_KEY_GT[g]:               # this the second key: chr:start:end:ref:alt
+            if GENE_KEY_GT[g][kx][0] == 'HOM':     # there is a HOM variant in the child - NO filtering
+                all_HET = False
+                break
 
+        if all_HET:                             # for this gene
+        # all variants in this gene in the CHILD are HET, check if all come from a single unaffected parent
+        # if yes, filter out and write a message to the log file
+        # if not, to be added to G2P_DICT and G2P_DATA for further processing
 
+            all_from_one_parent = True
 
+            # iterate again over the variants in this gene
+            VAR_SOURCE_LIST = {}                # key: chr:start:end:ref:alt in child; value: (NONE) or (MOM or DAD or BOTH and the parent is UNAFFECTED)
+
+            for ky in GENE_KEY_GT[g]:           # this the second key: chr:start:end:ref:alt
+
+                this_var_status = 'NONE'
+
+                if ((ky in MOM_DICT['biallelic_autosomal']) or (ky in MOM_DICT['monoallelic_autosomal'])) and (MOM_STAT == "UNAFFECTED"):
+                    this_var_status = 'MOM'
+                if ((ky in DAD_DICT['biallelic_autosomal']) or (ky in DAD_DICT['monoallelic_autosomal'])) and (DAD_STAT == "UNAFFECTED"):
+                    if this_var_status == 'NONE':
+                        this_var_status = 'DAD'
+                    elif this_var_status == 'MOM':
+                        this_var_status = 'BOTH'
+
+                VAR_SOURCE_LIST[ky] = this_var_status
+
+            # have collected the parent source for all variants in this gene
+            tot_num_vars = len(VAR_SOURCE_LIST)
+            num_mom = 0
+            num_dad = 0
+            num_none = 0
+            for kt,v in VAR_SOURCE_LIST.iteritems():
+                if v == 'NONE':
+                    num_none += 1
+                elif v == 'MOM':
+                    num_mom += 1
+                elif v == 'DAD':
+                    num_dad += 1
+                elif v == 'BOTH':
+                    num_mom += 1
+                    num_dad += 1
+                else:
+                    print "ERROR: cannot understand the source parent = %s" % (v)
+                    raise SystemExit
+
+            if num_none > 0:
+                all_from_one_parent = False
+            elif num_mom < tot_num_vars and num_dad < tot_num_vars:
+                all_from_one_parent = False
+
+
+            # if all variants in the child in this gene are found in single unaffected parent - filter out
+            if all_from_one_parent:
+                for kz in GENE_KEY_GT[g]:
+                    print "***[RECESSIVE model]*** Excluded CHILD HET var %s in gene = %s, found in = %s, PARENT_STAT = UNAFFECTED" % (kz,g,VAR_SOURCE_LIST[kz])
+                continue
+
+        # end processing all HET variants in the proband - if all from single unaffected parent they have been excluded, message to the log written
+        # and gone to evaluating the next biallelic gene in the child
+
+        # if here
+        # - either not all CHILD variants in this gene are not HET, or
+        # - not all of them can be traced to a single unaffected parent
+        # --> add to be processed
+
+        # here we are at gene level, must iterate over all variants in this gene
+        # iterate over variants in this gene
+        for kkk in GENE_KEY_GT[g]:                # this the second key: chr:start:end:ref:alt
+
+            CHILD_GT = CHILD_DICT['biallelic_autosomal'][kkk][0]
+            CHILD_GENE = CHILD_DICT['biallelic_autosomal'][kkk][1]
+            CHILD_TRANS = CHILD_DICT['biallelic_autosomal'][kkk][2]
+
+            # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
+            chr,start,end,ref,alt = kkk.split(":")
+            if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
+                if len(ref) < len(alt):                                 # an INS
+                    orig_start = start
+                    orig_ref = ref
+                    orig_alt = alt
+                    start = orig_start
+                    ref = '-'
+                    alt = orig_alt[len(orig_ref):]
+                    print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
+                else:                                                   # a DEL
+                    print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
+                    print line
+                    raise SystemExit
+
+            new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
+
+            # record the data for CHILD G2P variants (for OBS=biallelic)
+            if new_key not in G2P_DICT:
+                G2P_DICT[new_key] = 0
+            else:
+                # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+                # raise SystemExit
+                # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+                pass
+
+            # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
+            if new_key not in G2P_DATA:
+                G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
+            else:
+                # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+                # raise SystemExit
+                # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+                pass
+
+    NUM_UNIQ_G2P_VARS = len(G2P_DICT)
+    print "Found %s unique G2P variants in CHILD (%s) after considering MONOALLELIC and BIALLELIC genes" % (NUM_UNIQ_G2P_VARS,CHILD_ID)
+    sys.stdout.flush()
+    print ""
 
 
 
@@ -962,31 +1090,41 @@ def read_G2P(in_file):
 
     ####################################################################################################################
     ####    X-linked filtering                                                                                      ####
-    ####    under the x-linked model (OBS == hemizygous or x-linked dominant, but NOT x-linked over-dominance)      ####
+#.#    ####    under the x-linked model (OBS == hemizygous or x-linked dominant, but NOT x-linked over-dominance)      ####
+    ####    under the chrX model (OBS == monoallelic_X_hem or monoallelic_X_het)                                    ####
     ####    exclude child HET variants if seen as HOM in UNAFFECTED father                                          ####
+    ####                                                                                                            ####
+    ####    Note 18/01/2022                                                                                         ####
+    ####    This is a temporary solution, since x-linked dominant and x-linked over-dominance -> monoallelic_X_het  ####
+    ####    and we should filter x-linked dominant and monoallelic_X_hem, but not x-linked over-dominance           ####
+    ####    the code below treats x-linked over-dominance as the others (i.e. filters, while it should not)         ####
+    ####    Issue flagged to G2P plug-in team, awaiting their fix                                                   ####
+    ####    for now manually scan the output of G2P for the proband (both for boys and girls)                       ####
+    ####        to check if any variant has been called in PCDH19 and EFNB1                                         ####
+    ####    also for all the variants filtered out from monoallelic_X_het we will print in the log the gene name    ####
     ####################################################################################################################
 
 
     print ""
     print "===   X-linked filtering   ==="
 
-    ################################
-    ### process hemizygous genes ###
-    ################################
+    #######################################
+    ### process monoallelic_X_hem genes ###
+    #######################################
 
-    for key in CHILD_DICT['hemizygous']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
+    for key in CHILD_DICT['monoallelic_X_hem']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
 
-        CHILD_GT = CHILD_DICT['hemizygous'][key][0]
-        CHILD_GENE = CHILD_DICT['hemizygous'][key][1]
-        CHILD_TRANS = CHILD_DICT['hemizygous'][key][2]
+        CHILD_GT = CHILD_DICT['monoallelic_X_hem'][key][0]
+        CHILD_GENE = CHILD_DICT['monoallelic_X_hem'][key][1]
+        CHILD_TRANS = CHILD_DICT['monoallelic_X_hem'][key][2]
 
-        if CHILD_GT == 'HOM':							# do NOT filter HOM variants in proband (i.e., hemizygous in boy or HOM in girl)
+        if CHILD_GT == 'HOM':					# do NOT filter HOM variants in proband (i.e., hemizygous in boy or HOM in girl)
             pass
         else:
-            if (key in DAD_DICT['hemizygous']) and (DAD_STAT == "UNAFFECTED"):
-                DAD_GT = DAD_DICT['hemizygous'][key][0]
-                if DAD_GT == 'HOM':						# i.e., hemizygous variant in unaffected father
-                    print "***[X-linked model (hemizygous)]*** Excluded CHILD var %s CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GT,DAD_GT,DAD_STAT)
+            if (key in DAD_DICT['monoallelic_X_hem']) and (DAD_STAT == "UNAFFECTED"):
+                DAD_GT = DAD_DICT['monoallelic_X_hem'][key][0]
+                if DAD_GT == 'HOM':                                             # i.e., hemizygous variant in unaffected father
+                    print "***[monoallelic_X_hem]*** Excluded CHILD var %s in gene = %s, CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GENE,CHILD_GT,DAD_GT,DAD_STAT)
                     continue
 
         # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
@@ -1007,7 +1145,7 @@ def read_G2P(in_file):
 
         new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
 
-        # record the data for CHILD G2P variants (for OBS=hemizygous)
+        # record the data for CHILD G2P variants (for OBS=monoallelic_X_hem)
         if new_key not in G2P_DICT:
             G2P_DICT[new_key] = 0
         else:
@@ -1028,22 +1166,22 @@ def read_G2P(in_file):
 
 
     #######################################
-    ### process x-linked dominant genes ###
+    ### process monoallelic_X_het genes ###
     #######################################
 
-    for key in CHILD_DICT['x-linked dominant']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
+    for key in CHILD_DICT['monoallelic_X_het']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
 
-        CHILD_GT = CHILD_DICT['x-linked dominant'][key][0]
-        CHILD_GENE = CHILD_DICT['x-linked dominant'][key][1]
-        CHILD_TRANS = CHILD_DICT['x-linked dominant'][key][2]
+        CHILD_GT = CHILD_DICT['monoallelic_X_het'][key][0]
+        CHILD_GENE = CHILD_DICT['monoallelic_X_het'][key][1]
+        CHILD_TRANS = CHILD_DICT['monoallelic_X_het'][key][2]
 
         if CHILD_GT == 'HOM':                                                   # do NOT filter HOM variants (i.e., hemizygous in boy or HOM in girl)
             pass
         else:
-            if (key in DAD_DICT['x-linked dominant']) and (DAD_STAT == "UNAFFECTED"):
-                DAD_GT = DAD_DICT['x-linked dominant'][key][0]
+            if (key in DAD_DICT['monoallelic_X_het']) and (DAD_STAT == "UNAFFECTED"):
+                DAD_GT = DAD_DICT['monoallelic_X_het'][key][0]
                 if DAD_GT == 'HOM':                                             # i.e., x-linked dominant variant in unnafected father
-                    print "***[X-linked model (x-linked dominant)]*** Excluded CHILD var %s CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GT,DAD_GT,DAD_STAT)
+                    print "***[monoallelic_X_het]*** Excluded CHILD var %s in gene = %s, CHILD_GT = %s, DAD_GT = %s, DAD_STAT = %s" % (key,CHILD_GENE,CHILD_GT,DAD_GT,DAD_STAT)
                     continue
 
         # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
@@ -1064,7 +1202,7 @@ def read_G2P(in_file):
 
         new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
 
-        # record the data for CHILD G2P variants (for OBS=x-linked dominant)
+        # record the data for CHILD G2P variants (for OBS=monoallelic_X_het)
         if new_key not in G2P_DICT:
             G2P_DICT[new_key] = 0
         else:
@@ -1083,208 +1221,66 @@ def read_G2P(in_file):
             pass
 
 
-    ########################################################################
-    ### process x-linked over-dominance  genes - no filtering to be done ###
-    ########################################################################
 
-    for key in CHILD_DICT['x-linked over-dominance']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
 
-        CHILD_GT = CHILD_DICT['x-linked over-dominance'][key][0]
-        CHILD_GENE = CHILD_DICT['x-linked over-dominance'][key][1]
-        CHILD_TRANS = CHILD_DICT['x-linked over-dominance'][key][2]
 
-        # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
-        chr,start,end,ref,alt = key.split(":")
-        if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
-            if len(ref) < len(alt):                                 # an INS
-                orig_start = start
-                orig_ref = ref
-                orig_alt = alt
-                start = orig_start
-                ref = '-'
-                alt = orig_alt[len(orig_ref):]
-                print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
-            else:                                                   # a DEL
-                print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
-                print line
-                raise SystemExit
+#.#    ########################################################################
+#.#    ### process x-linked over-dominance  genes - no filtering to be done ###
+#.#    ########################################################################
 
-        new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
+#.#    for key in CHILD_DICT['x-linked over-dominance']:       # this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
 
-        # record the data for CHILD G2P variants (for OBS=x-linked over-dominance)
-        if new_key not in G2P_DICT:
-            G2P_DICT[new_key] = 0
-        else:
-            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-            # raise SystemExit
-            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-            pass
-
-        # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
-        if new_key not in G2P_DATA:
-            G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
-        else:
-            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-            # raise SystemExit
-            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-            pass
+#.#        CHILD_GT = CHILD_DICT['x-linked over-dominance'][key][0]
+#.#        CHILD_GENE = CHILD_DICT['x-linked over-dominance'][key][1]
+#.#        CHILD_TRANS = CHILD_DICT['x-linked over-dominance'][key][2]
 
+#.#        # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
+#.#        chr,start,end,ref,alt = key.split(":")
+#.#        if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
+#.#            if len(ref) < len(alt):                                 # an INS
+#.#                orig_start = start
+#.#                orig_ref = ref
+#.#                orig_alt = alt
+#.#                start = orig_start
+#.#                ref = '-'
+#.#                alt = orig_alt[len(orig_ref):]
+#.#                print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
+#.#            else:                                                   # a DEL
+#.#                print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
+#.#                print line
+#.#                raise SystemExit
 
-    NUM_UNIQ_G2P_VARS = len(G2P_DICT)
-    print "Found %s unique G2P variants in CHILD (%s) after considering MONOALLELIC and X-LINKED genes" % (NUM_UNIQ_G2P_VARS,CHILD_ID)
-    sys.stdout.flush()
-
-    print ""
+#.#        new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
 
+#.#        # record the data for CHILD G2P variants (for OBS=x-linked over-dominance)
+#.#        if new_key not in G2P_DICT:
+#.#            G2P_DICT[new_key] = 0
+#.#        else:
+#.#            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+#.#            # raise SystemExit
+#.#            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+#.#            pass
 
+#.#        # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
+#.#        if new_key not in G2P_DATA:
+#.#            G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
+#.#        else:
+#.#            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
+#.#            # raise SystemExit
+#.#            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
+#.#            pass
 
 
-
-    ##############################################################################################################
-    ####    BIALLELIC filtering                                                                               ####
-    ####    under the biallelic model (OBS == biallelic) - consider ALL variants per gene                     ####
-    ####    must all be HET in CHILD, GT in parent does not matter                                            ####
-    ####    all of them must *clearly* come from only one of the parents (maternally/paternally + biparental) ####
-    ####    and this parent must be unaffected                                                                ####
-    ####    if all these: then exclude all child variants in this gene                                        ####
-    ##############################################################################################################
-
+    NUM_UNIQ_G2P_VARS = len(G2P_DICT)
+    print "Found %s unique G2P variants in CHILD (%s) after considering MONOALLELIC, BIALLELIC and X-LINKED genes" % (NUM_UNIQ_G2P_VARS,CHILD_ID)
+    sys.stdout.flush()
 
     print ""
-    print "===   BIALLELIC filtering   ==="
-
-
-    GENE_KEY_GT = defaultdict(dict)		# for child - 1st level key: gene_name; 2nd level key: chr:start:end:ref:alt; value: (GT,trans)
-
-    # process all variants in biallelic genes in child
-    for key in CHILD_DICT['biallelic']:		# this the second key: chr:start:end:ref:alt; value: (ZYG,gene,trans)
-        b_GT = CHILD_DICT['biallelic'][key][0]
-        b_gene = CHILD_DICT['biallelic'][key][1]
-        b_trans = CHILD_DICT['biallelic'][key][2]
-        GENE_KEY_GT[b_gene][key] = (b_GT,b_trans)
-
-    # iterate over genes in GENE_KEY_GT
-    for g in GENE_KEY_GT: 			# this is the biallelic gene name
-        all_HET = True
-
-        # iterate over variants in this gene
-        for kx in GENE_KEY_GT[g]:		# this the second key: chr:start:end:ref:alt
-###            if GENE_KEY_GT[g][kx] == 'HOM':	# there is a HOM variant in the child - NO filtering
-            if GENE_KEY_GT[g][kx][0] == 'HOM':     # there is a HOM variant in the child - NO filtering
-                all_HET = False
-                break
-
-        if all_HET:				# for this gene
-        # all variants in this gene in the CHILD are HET, check if all come from a single unaffected parent
-        # if yes, filter out and write a message to the log file
-        # if not, to be added to G2P_DICT and G2P_DATA for further processing
-
-            all_from_one_parent = True
-
-            # iterate again over the variants in this gene
-            VAR_SOURCE_LIST = {}		# key: chr:start:end:ref:alt in child; value: (NONE) or (MOM or DAD or BOTH and the parent is UNAFFECTED)
-
-            for ky in GENE_KEY_GT[g]:		# this the second key: chr:start:end:ref:alt
-
-                this_var_status = 'NONE'
-
-                if ((ky in MOM_DICT['biallelic']) or (ky in MOM_DICT['monoallelic'])) and (MOM_STAT == "UNAFFECTED"):
-                    this_var_status = 'MOM'
-                if ((ky in DAD_DICT['biallelic']) or (ky in DAD_DICT['monoallelic'])) and (DAD_STAT == "UNAFFECTED"):
-                    if this_var_status == 'NONE':
-                        this_var_status = 'DAD'
-                    elif this_var_status == 'MOM':
-                        this_var_status = 'BOTH'
-
-                VAR_SOURCE_LIST[ky] = this_var_status
-
-            # have collected the parent source for all variants in this gene
-            tot_num_vars = len(VAR_SOURCE_LIST)
-            num_mom = 0
-            num_dad = 0
-            num_none = 0
-            for kt,v in VAR_SOURCE_LIST.iteritems():
-                if v == 'NONE':
-                    num_none += 1
-                elif v == 'MOM':
-                    num_mom += 1
-                elif v == 'DAD':
-                    num_dad += 1
-                elif v == 'BOTH':
-                    num_mom += 1
-                    num_dad += 1
-                else:
-                    print "ERROR: cannot understand the source parent = %s" % (v)
-                    raise SystemExit
-
-            if num_none > 0:
-                all_from_one_parent = False
-            elif num_mom < tot_num_vars and num_dad < tot_num_vars:
-                all_from_one_parent = False
-
-            # if all variants in the child in this gene are found in single unaffected parent - filter out
-            if all_from_one_parent:
-                for kz in GENE_KEY_GT[g]:
-                    print "***[Biallelic model]*** Excluded CHILD HET var %s in gene = %s, found in = %s, PARENT_STAT = UNAFFECTED" % (kz,g,VAR_SOURCE_LIST[kz])
-                continue
-
-        # end processing all HET variants in the proband - if all from single unaffected parent they have been excluded, message to the log written
-        # and gone to evaluating the next biallelic gene in the child
-
-        # if here
-        # - either not all CHILD variants in this gene are not HET, or
-        # - not all of them can be traced to a single unaffected parent
-        # --> add to be processed
-
-        # here we are at gene level, must iterate over all variants in this gene
-        # iterate over variants in this gene
-        for kkk in GENE_KEY_GT[g]:                # this the second key: chr:start:end:ref:alt
-
-            CHILD_GT = CHILD_DICT['biallelic'][kkk][0]
-            CHILD_GENE = CHILD_DICT['biallelic'][kkk][1]
-            CHILD_TRANS = CHILD_DICT['biallelic'][kkk][2]
-
-            # if a non-normalized INDEL in child G2P - must adjust (should not happen really, we split, normalized and left-aligned the family VCF before sending it to VEP+G2P)
-            chr,start,end,ref,alt = kkk.split(":")
-            if len(ref) > 1 and len(alt) > 1:                           # an INDEL - not normalized
-                if len(ref) < len(alt):                                 # an INS
-                    orig_start = start
-                    orig_ref = ref
-                    orig_alt = alt
-                    start = orig_start
-                    ref = '-'
-                    alt = orig_alt[len(orig_ref):]
-                    print "    WARNING: original INS = %s:%s:%s:%s:%s --> replaced with INS = %s:%s:%s:%s" % (chr,orig_start,end,orig_ref,orig_alt,chr,start,ref,alt)
-                else:                                                   # a DEL
-                    print "ERROR: At the momemnt, cannot deal with this non-normalized deletion"
-                    print line
-                    raise SystemExit
+    print ""
 
-            new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
 
-            # record the data for CHILD G2P variants (for OBS=biallelic)
-            if new_key not in G2P_DICT:
-                G2P_DICT[new_key] = 0
-            else:
-                # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-                # raise SystemExit
-                # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-                pass
 
-            # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
-            if new_key not in G2P_DATA:
-                G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
-            else:
-                # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
-                # raise SystemExit
-                # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
-                pass
 
-    NUM_UNIQ_G2P_VARS = len(G2P_DICT)
-    print "Found %s unique G2P variants in CHILD (%s) after considering MONOALLELIC, X-LINKED and BIALLELIC genes" % (NUM_UNIQ_G2P_VARS,CHILD_ID)
-    sys.stdout.flush()
-    print ""
-    print ""
 
 
 
diff --git a/process_quad.sh b/process_quad.sh
index 2aa1cead9ae8306dba5453c3db3a611012258cd0..29cd8931f2b7a6130863eb2649e042055580b9f4 100755
--- a/process_quad.sh
+++ b/process_quad.sh
@@ -30,13 +30,16 @@ SCRIPTS_DIR=/home/u035/u035/shared/scripts
 
 
 # other files to be used
-TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed			# OK
-CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt	# OK
+TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20220113.plus15bp.merged.bed			# OK
+CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20220113.clinvar.20220109.plus15bp.txt	# OK
 BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt			# OK
 TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt			# OK
+REC_SNP=/home/u035/u035/shared/resources/reccurent/current_reccurent.bed                        # OK, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7116826/, Extended Data Table 1
+
 
 
 ### TOOLS ###
+SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools
 BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools
 BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip
 TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix
@@ -227,9 +230,9 @@ for KID_ID in ${KID_IDS[@]}; do
         --cache --cache_version 100 \
         --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \
         --individual all \
-        --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \
+        --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20220113.txt" \
         --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \
-        --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
+        --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20220113.csv',af_from_vcf=1,confidence_levels='definitive&strong',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
 
     echo ""
     echo ""
@@ -355,6 +358,51 @@ for KID_ID in ${KID_IDS[@]}; do
 
 
 
+    ################################################################################################
+    # check the coverage per each of the reccurent de novo SNPs (padded with 15bp both directions) #
+    ################################################################################################
+    echo "Performing recurrent coverage analysis for PROBAND_ID = ${KID_ID}_${FAMILY_ID} ..."
+
+    # we have identified the name of the proband's BAM file above (BAM_FILE), reuse it
+    # set the name of the file containing info about the coverage of the recurrent SNPs
+    REC_OUT_FILE=${COV_DIR}/${KID_ID}_${FAMILY_ID}.REC_SNP_COV.txt
+
+    while IFS=$'\t' read -ra var; do
+      gene="${var[0]}"
+      chr="${var[1]}"
+      pos="${var[2]}"
+      lo=$(expr $pos - 15)
+      hi=$(expr $pos + 15)
+      reg="$lo-$hi"
+      echo "============================================="
+      echo "$gene : recurrent variant at $chr:$pos"
+      echo "exploring coverage at $chr:$reg"
+
+      echo "---------------------------------------------"
+      echo "precisely at the position"
+      ${SAMTOOLS} depth -aa -Q 20 -r $chr:$reg ${BAM_FILE} | grep "$pos"
+
+      echo "---------------------------------------------"
+      echo "average in the +/- 15bp region"
+      ${SAMTOOLS} depth -aa -Q 20 -r $chr:$reg ${BAM_FILE} | awk '{sum+=$3} END { print "Average = ",sum/NR}'
+
+      echo "---------------------------------------------"
+      echo "detailed in the +/- 15bp region"
+      ${SAMTOOLS} depth -aa -Q 20 -r $chr:$reg ${BAM_FILE}
+    done < ${REC_SNP} > ${REC_OUT_FILE}
+
+
+    echo ""
+    echo ""
+    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+    echo "Coverage analysis of recurring SNPs for PROBAND_ID = ${KID_ID}_${FAMILY_ID}: done    "
+    echo "    Coverage file = ${REC_OUT_FILE}"
+    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+    echo ""
+    echo ""
+
+
+
 
 
     #############################################################################################
@@ -684,9 +732,9 @@ time ${VEP} \
     --cache --cache_version 100 \
     --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \
     --individual all \
-    --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \
+    --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20220113.txt" \
     --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \
-    --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
+    --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20220113.csv',af_from_vcf=1,confidence_levels='definitive&strong',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
 
 
 echo ""
@@ -700,7 +748,8 @@ echo ""
 
 
 ########################################################
-###      run coverage for each proband (DD genes)    ###
+###    run coverage for each proband (DD genes)      ###
+###    run reccurent SNP coverage for each proband   ###
 ###    already did it as part of the trio analysis   ###
 ########################################################
 
diff --git a/process_shared.sh b/process_shared.sh
index 0ac17aa790276a63427dc312cbc194c9277c215a..c3e10666114d645fc06a8eadb2e04d14d89c5598 100755
--- a/process_shared.sh
+++ b/process_shared.sh
@@ -29,16 +29,18 @@ SCRIPTS_DIR=/home/u035/u035/shared/scripts
 
 
 # other files to be used
-TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed			# OK
-CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt	# OK
+TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20220113.plus15bp.merged.bed			# OK
+CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20220113.clinvar.20220109.plus15bp.txt	# OK
 BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt			# OK
 TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt			# OK
+REC_SNP=/home/u035/u035/shared/resources/reccurent/current_reccurent.bed                        # OK, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7116826/, Extended Data Table 1
 
 
 
 
 
 ### TOOLS ###
+SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools
 BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools
 BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip
 TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix
@@ -179,9 +181,9 @@ time ${VEP} \
     --cache --cache_version 100 \
     --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \
     --individual all \
-    --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \
+    --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20220113.txt" \
     --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \
-    --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
+    --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20220113.csv',af_from_vcf=1,confidence_levels='definitive&strong',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
 
 
 echo ""
@@ -248,6 +250,49 @@ for INDI_ID in `${BCFTOOLS} query -l $file`; do
     echo ""
     echo ""
 
+
+
+
+    # check the coverage per each of the reccurent de novo SNPs (padded with 15bp both directions) #
+    echo "Performing recurrent coverage analysis for PROBAND_ID = ${INDI_ID} ..."
+
+    # we have identified the name of the proband's BAM file above (BAM_FILE), reuse it
+    # set the name of the file containing info about the coverage of the recurrent SNPs
+    REC_OUT_FILE=${COV_DIR}/${INDI_ID}.REC_SNP_COV.txt
+
+    while IFS=$'\t' read -ra var; do
+      gene="${var[0]}"
+      chr="${var[1]}"
+      pos="${var[2]}"
+      lo=$(expr $pos - 15)
+      hi=$(expr $pos + 15)
+      reg="$lo-$hi"
+      echo "============================================="
+      echo "$gene : recurrent variant at $chr:$pos"
+      echo "exploring coverage at $chr:$reg"
+
+      echo "---------------------------------------------"
+      echo "precisely at the position"
+      ${SAMTOOLS} depth -aa -Q 20 -r $chr:$reg ${BAM_FILE} | grep "$pos"
+
+      echo "---------------------------------------------"
+      echo "average in the +/- 15bp region"
+      ${SAMTOOLS} depth -aa -Q 20 -r $chr:$reg ${BAM_FILE} | awk '{sum+=$3} END { print "Average = ",sum/NR}'
+
+      echo "---------------------------------------------"
+      echo "detailed in the +/- 15bp region"
+      ${SAMTOOLS} depth -aa -Q 20 -r $chr:$reg ${BAM_FILE}
+    done < ${REC_SNP} > ${REC_OUT_FILE}
+
+    echo ""
+    echo ""
+    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+    echo "Coverage analysis of recurring SNPs for PROBAND_ID = ${INDI_ID}: done    "
+    echo "    Coverage file = ${REC_OUT_FILE}"
+    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+    echo ""
+    echo ""
+
 done
 
 
diff --git a/process_trio.sh b/process_trio.sh
index 5cbe9b40d3f86bc2cdfbc07e8bcba1f9333797cf..b0ae363af34b001a7da780fdbfda5ef9ba586093 100755
--- a/process_trio.sh
+++ b/process_trio.sh
@@ -33,14 +33,16 @@ SCRIPTS_DIR=/home/u035/u035/shared/scripts
 # other files to be used
 FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt								# created by trio_setup.sh
 CHILD_IDS=${WORK_DIR}/PRO_IDs.txt								# created by trio_setup.sh
-TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed			# OK
-CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt	# OK
+TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20220113.plus15bp.merged.bed			# OK
+CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20220113.clinvar.20220109.plus15bp.txt	# OK
 BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt			# OK
 TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt			# OK
+REC_SNP=/home/u035/u035/shared/resources/reccurent/current_reccurent.bed			# OK, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7116826/, Extended Data Table 1
 
 
 
 ### TOOLS ###
+SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools
 BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools
 BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip
 TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix
@@ -161,9 +163,9 @@ time ${VEP} \
     --cache --cache_version 100 \
     --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \
     --individual all \
-    --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \
+    --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20220113.txt" \
     --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \
-    --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
+    --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20220113.csv',af_from_vcf=1,confidence_levels='definitive&strong',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
 
 
 echo ""
@@ -329,6 +331,53 @@ echo ""
 
 
 
+################################################################################################
+# check the coverage per each of the reccurent de novo SNPs (padded with 15bp both directions) #
+################################################################################################
+echo "Performing recurrent coverage analysis for PROBAND_ID = ${PROBAND_ID}_${FAMILY_ID} ..."
+
+# we have identified the name of the proband's BAM file above (BAM_FILE), reuse it
+# set the name of the file containing info about the coverage of the recurrent SNPs
+REC_OUT_FILE=${COV_DIR}/${PROBAND_ID}_${FAMILY_ID}.REC_SNP_COV.txt
+
+while IFS=$'\t' read -ra var; do
+  gene="${var[0]}"
+  chr="${var[1]}"
+  pos="${var[2]}"
+  lo=$(expr $pos - 15)
+  hi=$(expr $pos + 15)
+  reg="$lo-$hi"
+  echo "============================================="
+  echo "$gene : recurrent variant at $chr:$pos"
+  echo "exploring coverage at $chr:$reg"
+
+  echo "---------------------------------------------"
+  echo "precisely at the position"
+  ${SAMTOOLS} depth -aa -Q 20 -r $chr:$reg ${BAM_FILE} | grep "$pos"
+
+  echo "---------------------------------------------"
+  echo "average in the +/- 15bp region"
+  ${SAMTOOLS} depth -aa -Q 20 -r $chr:$reg ${BAM_FILE} | awk '{sum+=$3} END { print "Average = ",sum/NR}'
+
+  echo "---------------------------------------------"
+  echo "detailed in the +/- 15bp region"
+  ${SAMTOOLS} depth -aa -Q 20 -r $chr:$reg ${BAM_FILE}
+done < ${REC_SNP} > ${REC_OUT_FILE}
+
+
+echo ""
+echo ""
+echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+echo "Coverage analysis of recurring SNPs for PROBAND_ID = ${PROBAND_ID}_${FAMILY_ID}: done    "
+echo "    Coverage file = ${REC_OUT_FILE}"
+echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+echo ""
+echo ""
+
+
+
+
+
 
 ###################################################################################
 ###      for each proband generate the DECIPHER file                            ###
@@ -367,7 +416,7 @@ FAM_BAM_DIR=${SOURCE_DIR}/${BATCH_NUM}_${VERSION_N}/families/????-??-??_${BATCH_
 
 
 ## call the python scrpit
-#~~#time ${PYTHON2} /home/u035/u035/shared/temp/generate_DEC_IGV_scripts.py \
+##time ${PYTHON2} /home/u035/u035/shared/temp/generate_DEC_IGV_scripts.py \
 time ${PYTHON2} ${SCRIPTS_DIR}/generate_DEC_IGV_scripts.py \
 ${DEC_MAP} \
 ${TRANS_MAP} \