From e19e28e5108702fb97836dbcc3dc60cb9d863c20 Mon Sep 17 00:00:00 2001 From: ameyner2 <alison.meynert@igmm.ed.ac.uk> Date: Fri, 18 Jun 2021 11:39:12 +0100 Subject: [PATCH] Update to handle different G2P output --- NHS_WES_generate_DEC_IGV_aff_probands.py | 14 ++++++++------ NHS_WES_generate_DEC_IGV_sib_from_quad.py | 14 ++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/NHS_WES_generate_DEC_IGV_aff_probands.py b/NHS_WES_generate_DEC_IGV_aff_probands.py index 29023d3..25a965b 100755 --- a/NHS_WES_generate_DEC_IGV_aff_probands.py +++ b/NHS_WES_generate_DEC_IGV_aff_probands.py @@ -349,27 +349,29 @@ def read_G2P(in_file): ########################################## check_key = '%s:%s' % (sam_id,second_key) - if check_key not in CHECK_DICT: + if check_key not in CHECK_DICT: # first time we see this var in this sample, any OBS_state CHECK_DICT[check_key][OBS_state] = 1 if sam_id not in KIDS_G2P_DICT: KIDS_G2P_DICT[sam_id][second_key] = (GT,gene_name,transcript) elif second_key not in KIDS_G2P_DICT[sam_id]: KIDS_G2P_DICT[sam_id][second_key] = (GT,gene_name,transcript) - else: - print "ERROR: should not be here: variant %s in %s gene for CHILD = %s" % (second_key,gene_name,sam_id) + else: # sanity check + print "ERROR: first time var already seen?: variant %s in %s gene for CHILD = %s" % (second_key,gene_name,sam_id) raise SystemExit - elif OBS_state not in CHECK_DICT[check_key].keys(): + elif OBS_state not in CHECK_DICT[check_key].keys(): # first time we see this var in this sample with this OBS_state CHECK_DICT[check_key][OBS_state] = 1 if sam_id not in KIDS_G2P_DICT: KIDS_G2P_DICT[sam_id][second_key] = (GT,gene_name,transcript) elif second_key not in KIDS_G2P_DICT[sam_id]: KIDS_G2P_DICT[sam_id][second_key] = (GT,gene_name,transcript) + elif KIDS_G2P_DICT[sam_id][second_key] == (GT,gene_name,transcript): # diff OBS_state, but must have same (GT,gene_name,transcript) + pass else: - print "ERROR: should not be here: variant %s in %s gene for CHILD = %s" % (second_key,gene_name,sam_id) + print "ERROR: diff (GT,gene_name,transcript) for variant %s in %s gene for CHILD = %s" % (second_key,gene_name,sam_id) raise SystemExit - else: # same individual, same variant, same OBS_state + else: # same individual, same variant, known OBS_state # due to the new output of G2P we may have the same variant but with different gene names - ensembl/refseq # check the gene name in KIDS_G2P_DICT[sam_id][second_key] if not KIDS_G2P_DICT[sam_id][second_key][1].startswith('ENSG'): # recorded is refseq diff --git a/NHS_WES_generate_DEC_IGV_sib_from_quad.py b/NHS_WES_generate_DEC_IGV_sib_from_quad.py index 304ceb7..eb978f2 100755 --- a/NHS_WES_generate_DEC_IGV_sib_from_quad.py +++ b/NHS_WES_generate_DEC_IGV_sib_from_quad.py @@ -354,27 +354,29 @@ def read_G2P(in_file): ########################################## check_key = '%s:%s' % (sam_id,second_key) - if check_key not in CHECK_DICT: + if check_key not in CHECK_DICT: # first time we see this var in this sample, any OBS_state CHECK_DICT[check_key][OBS_state] = 1 if sam_id not in KIDS_G2P_DICT: KIDS_G2P_DICT[sam_id][second_key] = (GT,gene_name,transcript) elif second_key not in KIDS_G2P_DICT[sam_id]: KIDS_G2P_DICT[sam_id][second_key] = (GT,gene_name,transcript) - else: - print "ERROR: should not be here: variant %s in %s gene for CHILD = %s" % (second_key,gene_name,sam_id) + else: # sanity check + print "ERROR: first time var already seen?: variant %s in %s gene for CHILD = %s" % (second_key,gene_name,sam_id) raise SystemExit - elif OBS_state not in CHECK_DICT[check_key].keys(): + elif OBS_state not in CHECK_DICT[check_key].keys(): # first time we see this var in this sample with this OBS_state CHECK_DICT[check_key][OBS_state] = 1 if sam_id not in KIDS_G2P_DICT: KIDS_G2P_DICT[sam_id][second_key] = (GT,gene_name,transcript) elif second_key not in KIDS_G2P_DICT[sam_id]: KIDS_G2P_DICT[sam_id][second_key] = (GT,gene_name,transcript) + elif KIDS_G2P_DICT[sam_id][second_key] == (GT,gene_name,transcript): # diff OBS_state, but must have same (GT,gene_name,transcript) + pass else: - print "ERROR: should not be here: variant %s in %s gene for CHILD = %s" % (second_key,gene_name,sam_id) + print "ERROR: diff (GT,gene_name,transcript) for variant %s in %s gene for CHILD = %s" % (second_key,gene_name,sam_id) raise SystemExit - else: # same individual, same variant, same OBS_state + else: # same individual, same variant, known OBS_state # due to the new output of G2P we may have the same variant but with different gene names - ensembl/refseq # check the gene name in KIDS_G2P_DICT[sam_id][second_key] if not KIDS_G2P_DICT[sam_id][second_key][1].startswith('ENSG'): # recorded is refseq -- GitLab