From 889f393c27f9577e84017650d64b7c438a2aebbc Mon Sep 17 00:00:00 2001
From: ameyner2 <alison.meynert@igmm.ed.ac.uk>
Date: Mon, 4 Oct 2021 12:10:53 +0100
Subject: [PATCH] Updated generating IGV scripts

---
 NHS_WES_generate_DEC_IGV_aff_probands.py  | 20 ++++++++++++--------
 NHS_WES_generate_DEC_IGV_sib_from_quad.py | 20 ++++++++++++--------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/NHS_WES_generate_DEC_IGV_aff_probands.py b/NHS_WES_generate_DEC_IGV_aff_probands.py
index 25a965b..24f914f 100755
--- a/NHS_WES_generate_DEC_IGV_aff_probands.py
+++ b/NHS_WES_generate_DEC_IGV_aff_probands.py
@@ -12,7 +12,7 @@
 #		all G2P variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: MAR 23, 2020
+#       last modified: AUG 04, 2021
 
 
 
@@ -420,6 +420,7 @@ def read_G2P(in_file):
         # go thru all of their variants          
         pro_vars = KIDS_G2P_DICT[pro_id]            # a dict with keys: chr,start,end,ref,alt and values: (GT,gene_name,transcript) 
         for var_loc,var_info in pro_vars.iteritems():
+            var_chr,var_start,var_end,var_ref,var_alt = var_loc.split(':')		#~#
             found_in_all = True
             
             # check if seen in all probands excl this one
@@ -429,13 +430,16 @@ def read_G2P(in_file):
                     found_in_all = False
                     break
 
-                # if variant found, check if GT matches
+                # if variant found, check if GT matches (excl GT match requirement on chrX)
                 else:
-                    o_info = KIDS_G2P_DICT[o_id][var_loc]
-                    if var_info[0] != o_info[0]:
-                        print "  Excluding variant %s in %s (GT = %s); it is seen in %s but GT does not match (ST = %s)" % (var_loc,pro_id,var_info[0],o_id,o_info[0])
-                        found_in_all = False
-                        break
+                    if var_chr == 'chrX':                                               #~#
+                        pass                                                            #~#
+                    else:                                                               #~#
+                        o_info = KIDS_G2P_DICT[o_id][var_loc]
+                        if var_info[0] != o_info[0]:
+                            print "  Excluding variant %s in %s (GT = %s); it is seen in %s but GT does not match (ST = %s)" % (var_loc,pro_id,var_info[0],o_id,o_info[0])
+                            found_in_all = False
+                            break
 
             if found_in_all:	# this variant has been found in all affected probands with matching GT, keep it
                 if var_loc not in SHARED_DICT:		# it has not been recorded previously when considering another proband 
@@ -459,7 +463,7 @@ def read_G2P(in_file):
 
                     new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
                     SHARED_DICT[new_key] = var_info
-                    print "  Keeping %s found in all affected probands, same GT" % (new_key)
+                    print "  Keeping %s found in all affected probands, same GT (no check of GT match on chrX)" % (new_key)
 
 
         print "---------------------"     
diff --git a/NHS_WES_generate_DEC_IGV_sib_from_quad.py b/NHS_WES_generate_DEC_IGV_sib_from_quad.py
index eb978f2..c0449e6 100755
--- a/NHS_WES_generate_DEC_IGV_sib_from_quad.py
+++ b/NHS_WES_generate_DEC_IGV_sib_from_quad.py
@@ -12,7 +12,7 @@
 #		all G2P variants found in the individual VCF
 #
 #       Author: MH
-#       last modified: SEPT 16, 2020
+#       last modified: AUG 04, 2021
 
 
 
@@ -426,6 +426,7 @@ def read_G2P(in_file):
         # go thru all of their variants          
         pro_vars = KIDS_G2P_DICT[pro_id]            # a dict with keys: chr,start,end,ref,alt and values: (GT,gene_name,transcript) 
         for var_loc,var_info in pro_vars.iteritems():
+            var_chr,var_start,var_end,var_ref,var_alt = var_loc.split(':')		#~#
             found_in_all = True
             
             # check if seen in all probands excl this one
@@ -435,13 +436,16 @@ def read_G2P(in_file):
                     found_in_all = False
                     break
 
-                # if variant found, check if GT matches
+                # if variant found, check if GT matches (excl GT match requirement on chrX)
                 else:
-                    o_info = KIDS_G2P_DICT[o_id][var_loc]
-                    if var_info[0] != o_info[0]:
-                        print "  Excluding variant %s in %s (GT = %s); it is seen in %s but GT does not match (ST = %s)" % (var_loc,pro_id,var_info[0],o_id,o_info[0])
-                        found_in_all = False
-                        break
+                    if var_chr == 'chrX':						#~#
+                        pass								#~#
+                    else:								#~#
+                        o_info = KIDS_G2P_DICT[o_id][var_loc]
+                        if var_info[0] != o_info[0]:
+                            print "  Excluding variant %s in %s (GT = %s); it is seen in %s but GT does not match (ST = %s)" % (var_loc,pro_id,var_info[0],o_id,o_info[0])
+                            found_in_all = False
+                            break
 
             if found_in_all:	# this variant has been found in all affected probands with matching GT, keep it
                 if var_loc not in SHARED_DICT:		# it has not been recorded previously when considering another proband 
@@ -465,7 +469,7 @@ def read_G2P(in_file):
 
                     new_key = '%s:%s:%s:%s' % (chr,start,ref,alt)
                     SHARED_DICT[new_key] = var_info
-                    print "  Keeping %s found in all affected probands, same GT" % (new_key)
+                    print "  Keeping %s found in all affected probands, same GT (no check of GT match on chrX)" % (new_key)
 
 
         print "---------------------"     
-- 
GitLab