Skip to content
Snippets Groups Projects
NHS_WES_check_PED_aff_probands.py 1.7 KiB
Newer Older
#	input:	a PED file for a family with affected and related probands only
#	
#	checks that all individuals in the PED file are affected
#	checks that the parents for all individuals are missing (i.e., ID = 0)
#	if any problems SystemExit(1) - the value of $? to be checked by the bash script - if 0: all is well, if 1: the PED file failed the checks
#
#	output: for other family types, maybe write out a file wit a list of all affected probands and a list of all unaffected parents ?
#
#       Author: MH
#       last modified: FEB 25, 2020




import sys
import os


def go(in_file):

    AFF_PROBANDS = []

    in_han = open(in_file,'r')
    for line in in_han:
        data = [x.strip() for x in line.strip().split('\t')]
        pro_fam_id = data[1]
        par_1 = int(data[2])
        par_2 = int(data[3])
        aff = int(data[5])

        if (par_1 != 0) or (par_2 != 0):
            print "ERROR: Found a proband with a parent"
            print line
            raise SystemExit(1) 

        if aff != 2:
            print "ERROR: Found unaffected proband"
            print line
            raise SystemExit(1)

        if pro_fam_id not in AFF_PROBANDS:
            AFF_PROBANDS.append(pro_fam_id)
        else:
            print "ERROR: Found duplicate proband"
            print line
            raise SystemExit(1)

    in_han.close()
    print "PED file checks: success"
    print "Found %s affected probands with no parents in %s" % (len(AFF_PROBANDS),in_file)
    sys.stdout.flush()   






if __name__ == '__main__':
    if len(sys.argv) == 2:
        go(sys.argv[1])
    else:
        print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_check_PED_aff_probands.py a_ped_file"
        raise SystemExit