# input: a PED file for a quad family - 2 affected kids and 2 unaffected parrents # # checks that the there are exactly two kids in the PED file and both are affected # checks that the there are exactly two parents in the PED file and both are unafefcted # if any problems SystemExit(1) - the value of $? to be checked by the bash script - if 0: all is well, if 1: the PED file failed the checks # # output: for other family types, maybe write out a file wit a list of all affected probands and a list of all unaffected parents ? # # Author: MH # last modified: SEPT 15, 2020 import sys import os def go(in_file): AFF_PROBANDS = [] UNAFF_PARENTS = [] in_han = open(in_file,'r') for line in in_han: data = [x.strip() for x in line.strip().split('\t')] pro_fam_id = data[1] par_1 = data[2] par_2 = data[3] aff = int(data[5]) if (par_1 != "0") and (par_2 != "0"): # a proband if aff != 2: # not affected proband print "ERROR: Found unaffected proband" print line raise SystemExit(1) else: AFF_PROBANDS.append(pro_fam_id) if (par_1 == "0") and (par_2 == "0"): # a parent if aff != 1: # affected parent print "ERROR: Found affected parent" print line raise SystemExit(1) else: UNAFF_PARENTS.append(pro_fam_id) if len(AFF_PROBANDS) != 2: print "ERROR: Could not find exactly 2 affected probands" raise SystemExit(1) if len(UNAFF_PARENTS) != 2: print "ERROR: Could not find exactly 2 unaffected parents" raise SystemExit(1) in_han.close() print "PED file checks: success" print "Found %s affected probands with %s unaffected parents in %s" % (len(AFF_PROBANDS),len(UNAFF_PARENTS),in_file) sys.stdout.flush() if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_check_PED_quad.py a_ped_file" raise SystemExit