Generate plot of MAF comparison authored by ameyner2's avatar ameyner2
......@@ -72,14 +72,14 @@ cut -f 6 compare_alleles.txt | sort | uniq -c
558618 ukb_only
```
| Array | Match status | Position only | Allele + position match |
|---|---|---|---|
| UKB | Unique | 542810 / 66.0% | 558618 / 67.6% |
| UKB | Both | 279300 / 34.0% | 267308 / 32.4% |
| GSA | Unique | 435910 / 60.9% | 450562 / 62.8% |
| GSA | Both | 279300 / 39.1% | 267308 / 37.2% |
| Array | Match status | Position only | Allele + position match | Allele + position match with MAF |
|---|---|---|---|---|
| UKB | Unique | 542810 / 66.0% | 558618 / 67.6% | 505362 / 66.0% |
| UKB | Both | 279300 / 34.0% | 267308 / 32.4% | 259769 / 34.0% |
| GSA | Unique | 435910 / 60.9% | 450562 / 62.8% | n/a |
| GSA | Both | 279300 / 39.1% | 267308 / 37.2% | n/a |
### Extract the rsids and get the MAFs
## Extract the MAFs
**NB** Could try mapping by position and/or alleles again to the MAF files for any GSA-only sites that are imputed in UKB.
......@@ -95,4 +95,30 @@ wc -l rsids.*
cat UKB_MAFs/* | perl ../../scripts/extract_UKB_mafs_by_rsid.pl rsids.ukb_all.txt > mafs.ukb_all.txt
cat UKB_MAFs/* | perl ../../scripts/extract_UKB_mafs_by_rsid.pl rsids.ukb_only.txt > mafs.ukb_only.txt
cat UKB_MAFs/* | perl ../../scripts/extract_UKB_mafs_by_rsid.pl rsids.both.txt > mafs.both.txt
wc -l mafs.*
259769 mafs.both.txt
765131 mafs.ukb_all.txt
505362 mafs.ukb_only.txt
```
### Compare the MAF distributions
```
R
both = read.table("mafs.both.txt", header=F, col.names=c("id", "rsid", "pos", "A", "B", "MAF", "MA", "conf"))
ukball = read.table("mafs.ukb_all.txt", header=F, col.names=c("id", "rsid", "pos", "A", "B", "MAF", "MA", "conf"))
ukbonly = read.table("mafs.ukb_only.txt", header=F, col.names=c("id", "rsid", "pos", "A", "B", "MAF", "MA", "conf"))
both$label = "UKB Axiom & Illumina GSA"
ukball$label = "UKB Axiom all"
ukbonly$label = "UKB Axiom only"
x = rbind(both, ukbonly)
x$label = as.factor(x$label)
library(lattice)
histogram(~MAF | label, x, layout=c(1,2), breaks=seq(0,0.5,0.01), main="MAF from UK Biobank")
dev.print(png, "UKB_only_vs_both_MAF.png", width=600, height=600)
```
![alt text](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Logo Title Text 1")
\ No newline at end of file