Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# input: a PED file for a family with affected and related probands only
#
# checks that all individuals in the PED file are affected
# checks that the parents for all individuals are missing (i.e., ID = 0)
# if any problems SystemExit(1) - the value of $? to be checked by the bash script - if 0: all is well, if 1: the PED file failed the checks
#
# output: for other family types, maybe write out a file wit a list of all affected probands and a list of all unaffected parents ?
#
# Author: MH
# last modified: FEB 25, 2020
import sys
import os
def go(in_file):
AFF_PROBANDS = []
in_han = open(in_file,'r')
for line in in_han:
data = [x.strip() for x in line.strip().split('\t')]
pro_fam_id = data[1]
par_1 = int(data[2])
par_2 = int(data[3])
aff = int(data[5])
if (par_1 != 0) or (par_2 != 0):
print "ERROR: Found a proband with a parent"
print line
raise SystemExit(1)
if aff != 2:
print "ERROR: Found unaffected proband"
print line
raise SystemExit(1)
if pro_fam_id not in AFF_PROBANDS:
AFF_PROBANDS.append(pro_fam_id)
else:
print "ERROR: Found duplicate proband"
print line
raise SystemExit(1)
in_han.close()
print "PED file checks: success"
print "Found %s affected probands with no parents in %s" % (len(AFF_PROBANDS),in_file)
sys.stdout.flush()
if __name__ == '__main__':
if len(sys.argv) == 2:
go(sys.argv[1])
else:
print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_check_PED_aff_probands.py a_ped_file"