Skip to content
Snippets Groups Projects
Commit 6103ee32 authored by ameyner2's avatar ameyner2
Browse files

Add new file

parent 699d2afd
No related branches found
No related tags found
No related merge requests found
# First look at the one sample
```
bcftools view -m2 -M2 -v snps 18735_426481-gatk-haplotype-annotated.vcf.gz | bcftools filter -i 'GT[1] = "ref"' | bcftools filter -i 'GT[0] = "het"' | bcftools filter -i 'GT[2] != "ref" & GT[2] != "mis"' | bgzip -c > paternally_inherited_het.vcf.gz
tabix paternally_inherited_het.vcf.gz
/home/u035/project/software/bcbio/anaconda/bin/vase -i paternally_inherited_het.vcf.gz -o paternally_inherited_het_rare.vcf.gz --freq 0.001 --csq all
tabix paternally_inherited_het_rare.vcf.gz
header.txt
chr pos ref alt child_gt child_ref_dp child_alt_dp mom_gt mom_ref_dp mom_alt_dp dad_gt dad_ref_dp dad_alt_dp
cp header.txt paternally_inherited_het_rare.txt
bcftools query -f "%CHROM\t%POS\t%REF\t%ALT[\t%GT\t%AD]\n" paternally_inherited_het_rare.vcf.gz | sed -e 's/,/\t/g' >> paternally_inherited_het_rare.txt
```
```
R
y = read.table("paternally_inherited_het_rare.txt", header=T, stringsAsFactors=F, sep="\t", na.strings=".")
y$mom_alt_af = y$mom_alt_dp / (y$mom_alt_dp + y$mom_ref_dp)
y$dad_alt_af = y$dad_alt_dp / (y$dad_alt_dp + y$dad_ref_dp)
y$child_alt_af = y$child_alt_dp / (y$child_alt_dp + y$child_ref_dp)
length(y$chr)
382
q = subset(y, y$child_alt_af >= 0.2 & y$dad_alt_af >= 0.2)
length(q$chr)
225
q = subset(y, y$child_alt_af >= 0.2 & y$dad_alt_af >= 0.2 & y$mom_alt_dp > 0)
length(q$chr)
19
z = data.frame(id = c(rep("dad", length(q$chr)), rep("mom", length(q$chr)), rep("child", length(q$chr))), af = c(q$dad_alt_af, q$mom_alt_af, q$child_alt_af))
library(lattice)
pdf("paternally_inherited_het_rare_aaf_histogram.pdf", width=6, height=6)
histogram(~af | id, z, layout=c(1,3), breaks=seq(0,1,0.01), type="c")
dev.off()
19 of 225 sites have some alternate allele reads in the mother
q = subset(y, y$child_alt_af >= 0.2 & y$dad_alt_af >= 0.2)
pdf("dad_vs_child_af.pdf", width=6, height=6)
xyplot(child_alt_af ~ dad_alt_af, q, xlim=c(-0.05,1.05), ylim=c(-0.05,1.05))
dev.off()
1
pdf("mom_vs_child_af.pdf", width=6, height=6)
xyplot(child_alt_af ~ mom_alt_af, q, xlim=c(-0.05,1.05), ylim=c(-0.05,1.05))
dev.off()
```
# All families in batch
```
for ((i = 1; i <= 32; i = i + 1))
do
head -n $i affected_child.ped | tail -n 1 | cut -f 2 > child_id.txt
head -n $i affected_child.ped | tail -n 1 | cut -f 3 > father_id.txt
head -n $i affected_child.ped | tail -n 1 | cut -f 4 > mother_id.txt
family_id=`head -n $i affected_child.ped | tail -n 1 | cut -f 1`
bcftools view -m2 -M2 -v snps ${family_id}-gatk-haplotype-annotated.vcf.gz | bcftools filter -i 'GT[@mother_id.txt] = "ref" && GT[@child_id.txt] = "het" && GT[@father_id.txt] != "ref" && GT[@father_id.txt] != "mis"' | bgzip -c > ${family_id}_paternally_inherited_het.vcf.gz
tabix ${family_id}_paternally_inherited_het.vcf.gz
/home/u035/project/software/bcbio/anaconda/bin/vase -i ${family_id}_paternally_inherited_het.vcf.gz -o ${family_id}_paternally_inherited_het_rare.vcf.gz --freq 0.001 --csq all
tabix ${family_id}_paternally_inherited_het_rare.vcf.gz
cp header.txt ${family_id}_paternally_inherited_het_rare.txt
bcftools query -f "%CHROM\t%POS\t%REF\t%ALT[\t%GT\t%AD]\n" ${family_id}_paternally_inherited_het_rare.vcf.gz | sed -e 's/,/\t/g' >> ${family_id}_paternally_inherited_het_rare.txt
done
```
Excluded the one family without VEP annotations
```
R
ids = read.table("affected_child.ped", header=F, sep="\t", stringsAsFactors=F)
x = data.frame()
for (family_id in ids$V1) {
y = read.table(sprintf("%s_paternally_inherited_het_rare.txt", family_id), header=T, stringsAsFactors=F, sep="\t", na.strings=".")
y$family_id = family_id
x = rbind(x, y)
}
x$mom_alt_af = x$mom_alt_dp / (x$mom_alt_dp + x$mom_ref_dp)
x$dad_alt_af = x$dad_alt_dp / (x$dad_alt_dp + x$dad_ref_dp)
x$child_alt_af = x$child_alt_dp / (x$child_alt_dp + x$child_ref_dp)
x$count = 1
q = subset(x, x$child_alt_af >= 0.2 & x$dad_alt_af >= 0.2)
total = aggregate(q$count, by = list(q$family_id), sum)
q = subset(x, x$child_alt_af >= 0.2 & x$dad_alt_af >= 0.2 & x$mom_alt_dp > 0)
mom_has_reads = aggregate(q$count, by = list(q$family_id), sum)
output = data.frame(family_id = total$Group.1, total = total$x, mreads = mom_has_reads$x)
output$mprop = output$mreads / output$total
outsub = subset(output, output$total > 2)
22 of 28 families have a decent number of sites - not sure what's up with the other 6, which only have 1 or 2 each, but these are excluded from analysis.
summary(outsub)
family_id total mreads mprop
18735_412876: 1 Min. :136.0 Min. : 6.00 Min. :0.03109
18735_414260: 1 1st Qu.:178.0 1st Qu.:12.25 1st Qu.:0.06782
18735_415251: 1 Median :198.0 Median :16.00 Median :0.07705
18735_421930: 1 Mean :192.6 Mean :15.59 Mean :0.08072
18735_424464: 1 3rd Qu.:204.8 3rd Qu.:18.00 3rd Qu.:0.09609
18735_424521: 1 Max. :230.0 Max. :26.00 Max. :0.13065
(Other) :16
quantile(q$mom_alt_dp + q$mom_ref_dp, seq(0,1,0.05))
0% 5% 10% 15% 20% 25% 30% 35% 40% 45% 50%
7.0 19.5 29.0 45.5 57.0 69.5 79.0 88.0 101.0 110.0 124.0
55% 60% 65% 70% 75% 80% 85% 90% 95% 100%
143.5 168.0 181.0 200.0 229.0 264.0 296.5 360.0 425.0 1023.0
quantile(q$dad_alt_dp + q$dad_ref_dp, seq(0,1,0.05))
0% 5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60%
2.0 13.0 27.0 47.0 55.0 65.5 78.0 87.0 95.0 111.0 126.0 138.0 149.0
65% 70% 75% 80% 85% 90% 95% 100%
175.5 201.0 223.0 245.0 281.5 350.0 426.5 829.0
quantile(q$child_alt_dp + q$child_ref_dp, seq(0,1,0.05))
0% 5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60%
3.0 21.5 30.0 43.0 51.0 61.0 72.0 79.0 92.0 101.5 122.0 141.5 157.0
65% 70% 75% 80% 85% 90% 95% 100%
174.0 190.0 215.0 238.0 261.0 315.0 402.5 846.0
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment