diff --git a/docs/17200_ITPR1_mosaic_investigation.md b/docs/17200_ITPR1_mosaic_investigation.md
new file mode 100644
index 0000000000000000000000000000000000000000..b2181bb4c416810a3545945c4f0823c9ef00482b
--- /dev/null
+++ b/docs/17200_ITPR1_mosaic_investigation.md
@@ -0,0 +1,131 @@
+# First look at the one sample
+
+```
+bcftools view -m2 -M2 -v snps 18735_426481-gatk-haplotype-annotated.vcf.gz | bcftools filter -i 'GT[1] = "ref"' | bcftools filter -i 'GT[0] = "het"' | bcftools filter -i 'GT[2] != "ref" & GT[2] != "mis"' | bgzip -c > paternally_inherited_het.vcf.gz
+tabix paternally_inherited_het.vcf.gz
+
+/home/u035/project/software/bcbio/anaconda/bin/vase -i paternally_inherited_het.vcf.gz -o paternally_inherited_het_rare.vcf.gz --freq 0.001 --csq all
+tabix paternally_inherited_het_rare.vcf.gz
+
+header.txt 
+chr	pos	ref	alt	child_gt	child_ref_dp	child_alt_dp	mom_gt	mom_ref_dp	mom_alt_dp	dad_gt	dad_ref_dp	dad_alt_dp
+
+cp header.txt paternally_inherited_het_rare.txt
+bcftools query -f "%CHROM\t%POS\t%REF\t%ALT[\t%GT\t%AD]\n" paternally_inherited_het_rare.vcf.gz | sed -e 's/,/\t/g' >> paternally_inherited_het_rare.txt
+```
+
+```
+R
+y = read.table("paternally_inherited_het_rare.txt", header=T, stringsAsFactors=F, sep="\t", na.strings=".")
+y$mom_alt_af = y$mom_alt_dp / (y$mom_alt_dp + y$mom_ref_dp)
+y$dad_alt_af = y$dad_alt_dp / (y$dad_alt_dp + y$dad_ref_dp)
+y$child_alt_af = y$child_alt_dp / (y$child_alt_dp + y$child_ref_dp)
+length(y$chr)
+382
+
+q = subset(y, y$child_alt_af >= 0.2 & y$dad_alt_af >= 0.2)
+length(q$chr)
+225
+
+q = subset(y, y$child_alt_af >= 0.2 & y$dad_alt_af >= 0.2 & y$mom_alt_dp > 0)
+length(q$chr)
+19
+
+z = data.frame(id = c(rep("dad", length(q$chr)), rep("mom", length(q$chr)), rep("child", length(q$chr))), af = c(q$dad_alt_af, q$mom_alt_af, q$child_alt_af))
+library(lattice)
+pdf("paternally_inherited_het_rare_aaf_histogram.pdf", width=6, height=6)
+histogram(~af | id, z, layout=c(1,3), breaks=seq(0,1,0.01), type="c")
+dev.off()
+
+19 of 225 sites have some alternate allele reads in the mother
+
+q = subset(y, y$child_alt_af >= 0.2 & y$dad_alt_af >= 0.2)
+pdf("dad_vs_child_af.pdf", width=6, height=6)
+xyplot(child_alt_af ~ dad_alt_af, q, xlim=c(-0.05,1.05), ylim=c(-0.05,1.05))
+dev.off()
+          1 
+pdf("mom_vs_child_af.pdf", width=6, height=6)
+xyplot(child_alt_af ~ mom_alt_af, q, xlim=c(-0.05,1.05), ylim=c(-0.05,1.05))
+dev.off()
+```
+
+# All families in batch
+
+```
+for ((i = 1; i <= 32; i = i + 1))
+do
+  head -n $i affected_child.ped | tail -n 1 | cut -f 2 > child_id.txt
+  head -n $i affected_child.ped | tail -n 1 | cut -f 3 > father_id.txt
+  head -n $i affected_child.ped | tail -n 1 | cut -f 4 > mother_id.txt
+  family_id=`head -n $i affected_child.ped | tail -n 1 | cut -f 1`
+
+  bcftools view -m2 -M2 -v snps ${family_id}-gatk-haplotype-annotated.vcf.gz | bcftools filter -i 'GT[@mother_id.txt] = "ref" && GT[@child_id.txt] = "het" && GT[@father_id.txt] != "ref" && GT[@father_id.txt] != "mis"' | bgzip -c > ${family_id}_paternally_inherited_het.vcf.gz
+  tabix ${family_id}_paternally_inherited_het.vcf.gz
+
+  /home/u035/project/software/bcbio/anaconda/bin/vase -i ${family_id}_paternally_inherited_het.vcf.gz -o ${family_id}_paternally_inherited_het_rare.vcf.gz --freq 0.001 --csq all
+  tabix ${family_id}_paternally_inherited_het_rare.vcf.gz
+
+  cp header.txt ${family_id}_paternally_inherited_het_rare.txt
+  bcftools query -f "%CHROM\t%POS\t%REF\t%ALT[\t%GT\t%AD]\n" ${family_id}_paternally_inherited_het_rare.vcf.gz | sed -e 's/,/\t/g' >> ${family_id}_paternally_inherited_het_rare.txt
+done
+```
+
+Excluded the one family without VEP annotations
+
+```
+R
+ids = read.table("affected_child.ped", header=F, sep="\t", stringsAsFactors=F)
+x = data.frame()
+for (family_id in ids$V1) {
+  y = read.table(sprintf("%s_paternally_inherited_het_rare.txt", family_id), header=T, stringsAsFactors=F, sep="\t", na.strings=".")
+  y$family_id = family_id
+  x = rbind(x, y)
+}
+
+x$mom_alt_af = x$mom_alt_dp / (x$mom_alt_dp + x$mom_ref_dp)
+x$dad_alt_af = x$dad_alt_dp / (x$dad_alt_dp + x$dad_ref_dp)
+x$child_alt_af = x$child_alt_dp / (x$child_alt_dp + x$child_ref_dp)
+x$count = 1
+
+q = subset(x, x$child_alt_af >= 0.2 & x$dad_alt_af >= 0.2)
+total = aggregate(q$count, by = list(q$family_id), sum)
+
+q = subset(x, x$child_alt_af >= 0.2 & x$dad_alt_af >= 0.2 & x$mom_alt_dp > 0)
+mom_has_reads = aggregate(q$count, by = list(q$family_id), sum)
+
+output = data.frame(family_id = total$Group.1, total = total$x, mreads = mom_has_reads$x)
+output$mprop = output$mreads / output$total
+outsub = subset(output, output$total > 2)
+
+22 of 28 families have a decent number of sites - not sure what's up with the other 6, which only have 1 or 2 each, but these are excluded from analysis.
+
+summary(outsub)
+        family_id      total           mreads          mprop        
+ 18735_412876: 1   Min.   :136.0   Min.   : 6.00   Min.   :0.03109  
+ 18735_414260: 1   1st Qu.:178.0   1st Qu.:12.25   1st Qu.:0.06782  
+ 18735_415251: 1   Median :198.0   Median :16.00   Median :0.07705  
+ 18735_421930: 1   Mean   :192.6   Mean   :15.59   Mean   :0.08072  
+ 18735_424464: 1   3rd Qu.:204.8   3rd Qu.:18.00   3rd Qu.:0.09609  
+ 18735_424521: 1   Max.   :230.0   Max.   :26.00   Max.   :0.13065  
+ (Other)     :16                                                    
+
+quantile(q$mom_alt_dp + q$mom_ref_dp, seq(0,1,0.05))
+    0%     5%    10%    15%    20%    25%    30%    35%    40%    45%    50% 
+   7.0   19.5   29.0   45.5   57.0   69.5   79.0   88.0  101.0  110.0  124.0 
+   55%    60%    65%    70%    75%    80%    85%    90%    95%   100% 
+ 143.5  168.0  181.0  200.0  229.0  264.0  296.5  360.0  425.0 1023.0 
+
+quantile(q$dad_alt_dp + q$dad_ref_dp, seq(0,1,0.05))
+   0%    5%   10%   15%   20%   25%   30%   35%   40%   45%   50%   55%   60% 
+  2.0  13.0  27.0  47.0  55.0  65.5  78.0  87.0  95.0 111.0 126.0 138.0 149.0 
+  65%   70%   75%   80%   85%   90%   95%  100% 
+175.5 201.0 223.0 245.0 281.5 350.0 426.5 829.0 
+
+quantile(q$child_alt_dp + q$child_ref_dp, seq(0,1,0.05))
+   0%    5%   10%   15%   20%   25%   30%   35%   40%   45%   50%   55%   60% 
+  3.0  21.5  30.0  43.0  51.0  61.0  72.0  79.0  92.0 101.5 122.0 141.5 157.0 
+  65%   70%   75%   80%   85%   90%   95%  100% 
+174.0 190.0 215.0 238.0 261.0 315.0 402.5 846.0
+```
+
+
diff --git a/docs/18170_run_notes.md b/docs/18170_run_notes.md
new file mode 100644
index 0000000000000000000000000000000000000000..dbbb8243664ac20ebf8978b3c8667e298ae3949d
--- /dev/null
+++ b/docs/18170_run_notes.md
@@ -0,0 +1,99 @@
+# Run 18170
+
+There are 5 samples on this run that failed on run 18840, all are from different families. Weâ€™ve put the whole family into the PED file. The rest of the familyâ€™s data are on run 18840. The repeated samples on this run are:
+
+* 125778 (family 428004)
+* 125467 (family 427734)
+* 101846 (family 429213)
+* 125303 (family 427554)
+* 125783 (family 428055)
+
+Other analysis notes:
+* 430321 â€“ Quad, 2 affected children. Please perform trio-based analysis per child, the 2 are not similarly affected.
+* 429107 â€“ Quad, 2 similarly affected children. Please perform shared variant analysis between 88303 and 80901, and trio-based analysis per child.
+* 429453 â€“ Trio, mother & child similarly affected. Please perform trio-based analysis and shared variant analysis between 126602 and 126605.
+* 429187 â€“ Duo, for Congenica analysis only.
+
+## Read set up
+
+```
+cd /scratch/u035/project/trio_whole_exome/analysis/reads
+mkdir 18170_Ansari_Morad
+cd 18170_Ansari_Morad
+
+cp ../17710_Ansari_Morad/*428004* ./
+cp ../17710_Ansari_Morad/*427734* ./
+cp ../17710_Ansari_Morad/*429213* ./
+cp ../17710_Ansari_Morad/*427554* ./
+cp ../17710_Ansari_Morad/*428055* ./
+
+ls -lh 125778_428004*
+ls -lh 125467_427734*
+ls -lh 101846_429213*
+ls -lh 125303_427554*
+ls -lh 125783_428055*
+
+rm 125778_428004*
+rm 125467_427734*
+rm 101846_429213*
+rm 125303_427554*
+rm 125783_428055*
+```
+
+## Parameter file generation
+
+```
+cd /scratch/u035/project/trio_whole_exome/analysis/params
+
+version=v1
+project_id=18170_Ansari_Morad
+sample_suffix=_WESTwist_IDT-A
+ped_file=18957_WES_PED_110621.txt 
+
+ln -s $ped_file $project_id.ped
+```
+
+Edit the parameter generation script to exit after running the Perl script for producing the PED files.
+
+```
+/home/u035/project/scripts/prepare_bcbio_config.sh \
+   /home/u035/project/scripts/trio_whole_exome_config.sh \
+   $project_id $version $sample_suffix &> ${version}_${project_id}.log
+```
+
+Copy the PED files for the above families from the previous run and edit the batch id.
+
+```
+for family in 428004 427734 429213 427554 428055
+do
+  cp 17710_Ansari_Morad_18840_${family}.ped 18170_Ansari_Morad_18957_${family}.ped
+  perl -pi -e 's/18840/18957/' 18170_Ansari_Morad_18957_${family}.ped
+done
+```
+
+Re-generate the family ids list.
+
+```
+cut -f 1 /scratch/u035/project/trio_whole_exome/analysis/params/18170_Ansari_Morad.ped | sort -u  | awk '{ print "18957_" $0 }' > 18170_Ansari_Morad.family_ids.txt
+```
+
+Edit the parameter generation script to skip running the Perl script for producing the PED files.
+
+```
+/home/u035/project/scripts/prepare_bcbio_config.sh \
+   /home/u035/project/scripts/trio_whole_exome_config.sh \
+   $project_id $version $sample_suffix &> ${version}_${project_id}.log
+```
+
+Uncomment out the skipped section of the parameter generation script.
+
+Fix the config files for these families.
+
+```
+for family in 428004 427734 429213 427554 428055
+do
+  cp v1_17710_Ansari_Morad_18840_${family}.yaml v1_18170_Ansari_Morad_18957_${family}.yaml
+  perl -pi -e 's/18840/18957/' v1_18170_Ansari_Morad_18957_${family}.yaml
+  perl -pi -e 's/17710/18170/' v1_18170_Ansari_Morad_18957_${family}.yaml
+done
+```
diff --git a/docs/SOP_archiving.md b/docs/SOP_archiving.md
index 5c17c13aaa15c38b08d300da3ff1684858e44c9d..a0061e395d509927db8ccb2dd58e19c505a83eba 100644
--- a/docs/SOP_archiving.md
+++ b/docs/SOP_archiving.md
@@ -70,7 +70,15 @@ qsub -v \
   /home/u035/project/scripts/submit_trio_wes_priority_and_qc_checksums.sh
 ```
 
-5. Archive the output to /archive/u035/trio_whole_exome, excluding the BAM files. Confirm md5 checksums on the archived files. There should be no lines ending in â€˜FAILâ€™. If there are any, investigate which file(s) and manually copy these, then re-check the md5 of the archived copy.
+5. Clean up the output directory.
+
+```
+cd /home/u035/project/trio_whole_exome/
+mkdir ${version}_${project_id}
+mv *${version}_${project_id}* ${version}_${project_id}/
+```
+
+6. Archive the output to /archive/u035/trio_whole_exome, excluding the BAM files. Confirm md5 checksums on the archived files. There should be no lines ending in â€˜FAILâ€™. If there are any, investigate which file(s) and manually copy these, then re-check the md5 of the archived copy.
 
 ```
 qsub -v \
@@ -80,21 +88,13 @@ qsub -v \
 grep -c FAIL trio_whole_exome_archive_project.$project_id*
 ```
 
-6. Clean up the logs directory.
+7. Clean up the logs directory.
 
 ```
 cd /home/u035/project/trio_whole_exome/logs 
 mv *.project_id.* complete/
 ```
 
-7. Clean up the output directory.
-
-```
-cd /home/u035/project/trio_whole_exome/
-mkdir ${version}_${project_id}
-mv *${version}_${project_id}* ${version}_${project_id}/
-```
-
 8. Confirmation of copy to tape. EPCC runs a cron job on Monday mornings at 8am to check the DMF file state for all files in /archive/u035/trio_whole_exome. This produces a text report at /archive/u035/confirmation using the format yyyy-mm-dd. 
 
 Check for â€˜DULâ€™ entries for files in the confirmation report. The grep command below excludes directories, which are generally marked as â€˜REGâ€™, blank lines, and total file size lines. If not all entries are DUL, check the non-DUL ones for error states as below. The expectation is that some files may be in the process of migration; in which case, check again in the following weekâ€™s report.