diff --git a/bcbio_gnomad_install.sh b/bcbio_gnomad_install.sh new file mode 100755 index 0000000000000000000000000000000000000000..13563b6a751c400113a83ae4021d2d8a27cf07db --- /dev/null +++ b/bcbio_gnomad_install.sh @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -l walltime=96:00:00 +#PBS -l ncpus=1,mem=128gb +#PBS -q uv2000 +#PBS -N bcbio_gnomad_install +#PBS -j oe + +cd /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/txtmp + +PATH=$PATH:/home/u035/project/software/bcbio/anaconda/bin + +ref=../seq/hg38.fa +fields_to_keep="INFO/"$(cat gnomad_fields_to_keep.txt | paste -s | sed s/"\t"/",INFO\/"/g) + +bcftools view -f PASS gnomad.genomes.r3.0.sites.vcf.bgz | bcftools annotate -x "^$fields_to_keep" -Ov | vt decompose -s - | vt normalize -r $ref -n - | vt uniq - | bgzip -c > variation/gnomad_genome.vcf.gz + +tabix -f -p vcf variation/gnomad_genome.vcf.gz +tabix -f -p vcf --csi variation/gnomad_genome.vcf.gz +