Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# input: a PED file for a quad family - 2 affected kids and 2 unaffected parrents
#
# checks that the there are exactly two kids in the PED file and both are affected
# checks that the there are exactly two parents in the PED file and both are unafefcted
# if any problems SystemExit(1) - the value of $? to be checked by the bash script - if 0: all is well, if 1: the PED file failed the checks
#
# output: for other family types, maybe write out a file wit a list of all affected probands and a list of all unaffected parents ?
#
# Author: MH
# last modified: SEPT 15, 2020
import sys
import os
def go(in_file):
AFF_PROBANDS = []
UNAFF_PARENTS = []
in_han = open(in_file,'r')
for line in in_han:
data = [x.strip() for x in line.strip().split('\t')]
pro_fam_id = data[1]
par_1 = data[2]
par_2 = data[3]
aff = int(data[5])
if (par_1 != "0") and (par_2 != "0"): # a proband
if aff != 2: # not affected proband
print "ERROR: Found unaffected proband"
print line
raise SystemExit(1)
else:
AFF_PROBANDS.append(pro_fam_id)
if (par_1 == "0") and (par_2 == "0"): # a parent
if aff != 1: # affected parent
print "ERROR: Found affected parent"
print line
raise SystemExit(1)
else:
UNAFF_PARENTS.append(pro_fam_id)
if len(AFF_PROBANDS) != 2:
print "ERROR: Could not find exactly 2 affected probands"
raise SystemExit(1)
if len(UNAFF_PARENTS) != 2:
print "ERROR: Could not find exactly 2 unaffected parents"
raise SystemExit(1)
in_han.close()
print "PED file checks: success"
print "Found %s affected probands with %s unaffected parents in %s" % (len(AFF_PROBANDS),len(UNAFF_PARENTS),in_file)
sys.stdout.flush()
if __name__ == '__main__':
if len(sys.argv) == 2:
go(sys.argv[1])
else:
print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_check_PED_quad.py a_ped_file"