Newer
Older
# given a DECIPHER bulk upload file v9
# convert it to v10
#
# Author: MH
import sys
import os
import csv
import xlsxwriter
def go(inout_dir,id): # the folder where the bulk upload files are strored; id is in the format <indiv_id>_<family_id>
in_file = '%s/%s_DEC_FLT.csv' % (inout_dir,id)
out_file = '%s/%s_DECIPHER_v10.xlsx' % (inout_dir,id)
# create the workbook
workbook = xlsxwriter.Workbook(out_file)
# create the worksheet
worksheet = workbook.add_worksheet('Sequence Variants')
# write the header row
header = ('Patient internal reference number or ID','Shared','Assembly','HGVS code','Chromosome','Genomic start','Ref sequence','Alt sequence','Gene name','Transcript','Is intergenic','Genotype','Inheritance','Pathogenicity','Pathogenicity evidence','Contribution','Genotype groups')
worksheet.write_row(0,0,header)
# now, open and read the old file, for each variant collecting the information required for v10 and writing it in the v10 file
cntr = 0
with open(in_file,'r') as tsvfile:
reader = csv.reader(tsvfile, delimiter=',', quotechar='"')
for row in reader:
if row[0] == 'Internal reference number or ID': # ignore the header line
continue
id = str(row[0])
shared = 'NHS-SCE'
assembly = 'GRCh38'
HGVS = ''
chr = str(row[1])
start = str(row[2])
ref = str(row[4])
alt = str(row[5])
gene = str(row[7])
trans = str(row[6])
inter = str(row[8])
genotype = str(row[19])
inher = str(row[15])
if inher == 'Maternally inherited, constitutive in mother':
inher = 'Maternally inherited'
elif inher == 'Paternally inherited, constitutive in father':
inher = 'Paternally inherited'
patho = ''
evid = ''
cont = ''
gt_groups = ''
data = (id,shared,assembly,HGVS,chr,start,ref,alt,gene,trans,inter,genotype,inher,patho,evid,cont,gt_groups)
# write it
worksheet.write_row(cntr,0,data)
# close the workbook
workbook.close()
if __name__ == '__main__':
if len(sys.argv) == 3:
go(sys.argv[1],sys.argv[2])
else:
print ("Suggested use: time python convert_DEC_to_v10.py decipher_dir <indiv_id>_<family_id>")
raise SystemExit