Skip to content
Snippets Groups Projects
generate_DEC_IGV_solo_scripts.py 45.1 KiB
Newer Older
##.#        if new_key not in G2P_DICT:
##.#            G2P_DICT[new_key] = 0
##.#        else:
##.#            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
##.#            # raise SystemExit
##.#            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
##.#            pass
#
##.#        # and record the required data (CHILD_TRANS,CHILD_GENE,CHILD_GT) in G2P_DATA
##.#        if new_key not in G2P_DATA:
##.#            G2P_DATA[new_key] = (CHILD_TRANS,CHILD_GENE,CHILD_GT)
##.#        else:
##.#            # print "ERROR: duplicate G2P variant new_key = %s" % (new_key)
##.#            # raise SystemExit
##.#            # this will happen if a gene is e.g. hemizygous,x-linked dominant - there will be two separate lines in the output for each req
##.#            pass


    NUM_UNIQ_G2P_VARS = len(G2P_DICT)
    print "Found %s unique G2P variants in CHILD (%s) after considering MONOALLELIC, BIALLELIC and X-LINKED genes" % (NUM_UNIQ_G2P_VARS,CHILD_ID)
    sys.stdout.flush()

    print ""
    print ""

















def read_ped(in_file):

    global CHILD_ID
    global CHILD_SEX
    global DEC_CHILD_SEX

    CHILD_ID = 0
    CHILD_SEX = 0

    # no need to do PED checks, did them for singletons at trio_setup.sh
    in_han = open(in_file,'r')
    for line in in_han:
        data = [x.strip() for x in line.strip().split('\t')]
        CHILD_ID = data[1]
        CHILD_SEX = int(data[4])
        if CHILD_SEX == 1:          # boy
            DEC_CHILD_SEX = '46XY'
        elif CHILD_SEX == 2:        # girl
            DEC_CHILD_SEX = '46XX'
        else:
            print "ERROR: proband sex unknown"
            print line
            raise SystemExit




def read_map_file(in_file):
    in_han = open(in_file,'r')
    for line in in_han:
        data = [x.strip() for x in line.strip().split('\t')]
        dec_id = data[0]
        int_id = data[1]
        if dec_id not in MAP_DICT:
            MAP_DICT[dec_id] = int_id
        else:
            print "ERROR: duplicate DECIPHER/family ID = %s" % (dec_id)
            raise SystemExit
    in_han.close()




def read_trans_map(in_file):
    in_han = open(in_file,'r')
    for line in in_han:
        data = [x.strip() for x in line.strip().split('\t')]
        old_trans_id = data[0]
        new_trans_id = data[1]
        if old_trans_id not in TRANS_DICT:
            TRANS_DICT[old_trans_id] = new_trans_id
        else:
            print "ERROR: duplicate old transcript ID = %s" % (old_trans_id)
            raise SystemExit
    in_han.close()






if __name__ == '__main__':
    if len(sys.argv) == 11:
        go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10])
    else:
        print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \
        dec_map_file,trans_map_file,ped_file,in_g2p_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir"
        raise SystemExit