# input: a PED file for a family with affected and related probands only # # checks that all individuals in the PED file are affected # checks that the parents for all individuals are missing (i.e., ID = 0) # if any problems SystemExit(1) - the value of $? to be checked by the bash script - if 0: all is well, if 1: the PED file failed the checks # # output: for other family types, maybe write out a file wit a list of all affected probands and a list of all unaffected parents ? # # Author: MH # last modified: FEB 25, 2020 import sys import os def go(in_file): AFF_PROBANDS = [] in_han = open(in_file,'r') for line in in_han: data = [x.strip() for x in line.strip().split('\t')] pro_fam_id = data[1] par_1 = int(data[2]) par_2 = int(data[3]) aff = int(data[5]) if (par_1 != 0) or (par_2 != 0): print "ERROR: Found a proband with a parent" print line raise SystemExit(1) if aff != 2: print "ERROR: Found unaffected proband" print line raise SystemExit(1) if pro_fam_id not in AFF_PROBANDS: AFF_PROBANDS.append(pro_fam_id) else: print "ERROR: Found duplicate proband" print line raise SystemExit(1) in_han.close() print "PED file checks: success" print "Found %s affected probands with no parents in %s" % (len(AFF_PROBANDS),in_file) sys.stdout.flush() if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_check_PED_aff_probands.py a_ped_file" raise SystemExit