diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..7cd56b326d9f0d3c9c8c0161c818b922e42aac7c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*_transfer_info_file.sh
diff --git a/NHS_WES_extract_shared_vars.py b/NHS_WES_extract_shared_vars.py
new file mode 100755
index 0000000000000000000000000000000000000000..83e94aa20a398592d7fc9aca1cf5a04c4bd1e409
--- /dev/null
+++ b/NHS_WES_extract_shared_vars.py
@@ -0,0 +1,131 @@
+#	given a VCF file and a list of IDs (as in the #CHROM line)
+#	extract a subset VCF containing only variants which are not HOM REF in *all* the listed IDs 
+#
+#
+#       Author: MH
+#       last modified: AUG 18, 2020
+
+
+
+
+import sys
+import os
+import gzip
+
+
+ID_DICT = {}	#	key: ID, value: field index (0 = chr, 1 = pos, ....)
+
+
+
+def go(in_file,list_ids,out_file):
+
+    ids = [x.strip() for x in list_ids.strip().split(',')]
+    print "Found %s IDs for which the shared variants are to be extracted" % (len(ids))
+    print ids
+
+    in_han = gzip.open(in_file,'r')
+    out_han = open(out_file,'w')
+    in_cntr = 0
+    out_cntr = 0
+
+    for line in in_han:
+        if line.startswith('#CHROM'):
+            print line
+            # find the index for each ID
+            # potential problems - ID more than once in the VCF (will return only the first match) - should not occur in VCFs
+            #                    - ID not in the list  
+            header_fields = [x.strip() for x in line.strip().split('\t')]
+            for i in ids:
+                try:
+                    index = header_fields.index(i)
+                    if i not in ID_DICT:
+                        ID_DICT[i] = index
+                    else:
+                        print "The supplied list of IDs contain duplicates for %s" % (i)
+                        raise SystemExit  
+                except ValueError:
+                    print "%s not found in %s" % (i,line)
+                    raise SystemExit  
+
+            if len(ID_DICT) != len(ids):
+                print "Could not find the indexes for all IDs"
+                print ID_DICT
+                raise SystemExit 
+            
+            # write the new line in out_file 
+            new_header = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t'
+            for z in xrange(0,len(ids)):
+                new_header = new_header + '%s\t' % (ids[z])
+            new_header = new_header[:-1] + '\n'
+            out_han.write(new_header)
+            continue
+
+        if line.startswith('#'):
+            out_han.write(line)
+            continue 
+
+        # startng to read the variants
+        in_cntr += 1
+        data = [x.strip() for x in line.strip().split('\t')]  
+        desired = {}	# key: id; value: the variant info
+        for i in ids:
+           ind = ID_DICT[i]
+           var = data[ind]
+           desired[i] = var 
+        print desired  
+
+        out_cntr += 1 
+
+    in_han.close()
+    out_han.close()
+    print "Read %s vars from the family file, wrote %s shared variants" % (in_cntr,out_cntr)
+    sys.stdout.flush()
+
+
+
+def temp():
+
+    AFF_PROBANDS = []
+
+    in_han = open(in_file,'r')
+    for line in in_han:
+        data = [x.strip() for x in line.strip().split('\t')]
+        pro_fam_id = data[1]
+        par_1 = int(data[2])
+        par_2 = int(data[3])
+        aff = int(data[5])
+
+        if (par_1 != 0) or (par_2 != 0):
+            print "ERROR: Found a proband with a parent"
+            print line
+            raise SystemExit(1) 
+
+        if aff != 2:
+            print "ERROR: Found unaffected proband"
+            print line
+            raise SystemExit(1)
+
+        if pro_fam_id not in AFF_PROBANDS:
+            AFF_PROBANDS.append(pro_fam_id)
+        else:
+            print "ERROR: Found duplicate proband"
+            print line
+            raise SystemExit(1)
+
+    in_han.close()
+    print "PED file checks: success"
+    print "Found %s affected probands with no parents in %s" % (len(AFF_PROBANDS),in_file)
+    sys.stdout.flush()   
+
+
+
+
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 4:
+        go(sys.argv[1],sys.argv[2],sys.argv[3])
+    else:
+        print "Suggested use: time python /home/u035/project/scripts/NHS_WES_extract_shared_vars.py in_vcf comma,sep,list,of,ids out_vcf"
+        raise SystemExit
+
diff --git a/submit_bcbio_wes_pilot.sh b/submit_bcbio_wes_pilot.sh
deleted file mode 100755
index 67fedbceaef67fd90c88956a16379a204d32861b..0000000000000000000000000000000000000000
--- a/submit_bcbio_wes_pilot.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-#PBS -l walltime=48:00:00
-#PBS -l ncpus=16,mem=8gb
-#PBS -q sgp
-#PBS -N nhss_wes_bcbio
-#PBS -j oe
-
-# enable running singletons
-if [ -z $PBS_ARRAY_INDEX ]
-then
-  if [ -z $INDEX ]
-  then
-    export PBS_ARRAY_INDEX=1
-  else
-    export PBS_ARRAY_INDEX=$INDEX
-  fi
-fi
-
-export PATH=$PATH:/home/u027/project/software/bcbio/tools/bin
-BCBIO_CONFIG=/scratch/u027/project/analysis/wes_pilot/bcbio-1.1.5_test/config
-BCBIO_WORK=/scratch/u027/project/analysis/wes_pilot/bcbio-1.1.5_test/work
-
-# Expects environment variables to be set
-# BATCH - nhsswesXXX
-# FAMILY_IDS - text file listing family ids
-
-FAMILY_ID=`head -n $PBS_ARRAY_INDEX $FAMILY_IDS | tail -n 1`
-
-CONFIG_FILE=$BCBIO_CONFIG/${BATCH}_${FAMILY_ID}.yaml
-mkdir -p $BCBIO_WORK/$FAMILY_ID
-cd $BCBIO_WORK/$FAMILY_ID
-
-bcbio_nextgen.py $CONFIG_FILE -n 16 -t local