From 467d20c48352f346881d718815a7cc27b35bc37c Mon Sep 17 00:00:00 2001
From: ameyner2 <alison.meynert@ed.ac.uk>
Date: Wed, 18 Aug 2021 14:30:08 +0100
Subject: [PATCH] Update Software_installation_ultra2.md

---
 docs/Software_installation_ultra2.md | 74 +++++++++++++---------------
 1 file changed, 34 insertions(+), 40 deletions(-)

diff --git a/docs/Software_installation_ultra2.md b/docs/Software_installation_ultra2.md
index d2793b0..a3b1972 100644
--- a/docs/Software_installation_ultra2.md
+++ b/docs/Software_installation_ultra2.md
@@ -39,18 +39,7 @@ DATE=`date +%Y%m%d%H%M`
 /home/u035/u035/shared/software/bcbio/tools/bin/bcbio_nextgen.py upgrade -u skip --datatarget vep &> bcbio_install_logs/bcbio_install_datatarget_vep_${DATE}.log
 ```
 
-Q: do we even need gnomAD annotations on these? Should I be skipping VEP altogether for the alignment & variant calling pipeline?
-
-We already had gnomAD 3.0 compiled and downloaded on the ultra2 bcbio installation, so this gets copied to `/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/variation`.
-
-```
-cd /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/variation
-scp ultra.epcc.ed.ac.uk:/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/variation/gnomad_genome.vcf.gz ./
-scp ultra.epcc.ed.ac.uk:/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/variation/gnomad_genome.vcf.gz.csi ./
-scp ultra.epcc.ed.ac.uk:/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/variation/gnomad_genome.vcf.gz.tbi ./
-```
-
-However, if needed, re-generate it like this. It took about 6 days on old ultra.
+Regenerate gnomAD for bcbio. It takes about a week, so instead the files generated from the old ultra installation were copied to `/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/variation`.
 
 ```
 DATE=`date +%Y%m%d%H%M`
@@ -68,15 +57,13 @@ Increase JVM memory for GATK in galaxy/bcbio_system.yaml
 
 See https://github.com/Ensembl/ensembl-variation/pull/621/files
 
-Edit /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0/Bio/EnsEMBL/Variation/BaseAnnotation.pm accordingly.
+Edit /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0/Bio/EnsEMBL/Variation/BaseAnnotation.pm accordingly.
 
 ### Verifybamid custom panel for exomes
 
 ```
-source /home/u035/project/scripts/trio_whole_exome_config.sh
-
-mkdir /home/u035/project/software/install/1000G_phase3_hg38
-cd /home/u035/project/software/install/1000G_phase3_hg38
+mkdir /home/u035/u035/shared/software/install/1000G_phase3_hg38
+cd /home/u035/u035/shared/software/install/1000G_phase3_hg38
 
 # download the 1000 Genomes autosomes + X site VCFs
 for ((i = 1; i <= 22; i = i + 1))
@@ -86,35 +73,37 @@ do
 done
 wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz
 wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz.tbi
-cd ..
 
 # create bare to prefixed chromosome map
 for ((i = 1; i <= 22; i = i + 1))
 do
   echo $i "chr"$i >> chr_prefix_map.txt
 done
-echo chrX >> chr_prefix_map.txt
+echo X chrX >> chr_prefix_map.txt
+
+# add bcbio tools to path
+PATH=/home/u035/u035/shared/software/bcbio/tools/bin:/home/u035/u035/shared/software/bcbio/anaconda/share/verifybamid2-1.0.6-0:$PATH
 
 # use the TWIST kit to subset the variants and add the chr prefix at the same time
-for file in 1000G_phase3_hg38/*vcf.gz
+sed -e 's/chr//' ../../../resources/Twist_Exome_Target_hg38.bed > targets.bed
+for file in *phased.vcf.gz
 do
   bname=`basename $file`
-  bcftools view -R /home/u035/project/resources/Twist_Exome_Target_hg38.bed -m2 -M2 -v snps -i 'AF >= 0.01' $file | bcftools annotate --rename-chrs chr_prefix_map.txt | bgzip -c > ${bname%.vcf.gz}.biallelic.snps.m\
-inAF0.01.vcf.gz
+  bcftools view -R targets.bed -m2 -M2 -v snps -i 'AF >= 0.01' $file | bcftools annotate --rename-chrs chr_prefix_map.txt | bgzip -c > ${bname%.vcf.gz}.biallelic.snps.minAF0.01.vcf.gz
   tabix ${bname%.vcf.gz}.biallelic.snps.minAF0.01.vcf.gz
 done
 
 # concatenate all the files in the correct order
-bcftools concat -o ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz -O z \
-  ALL.chr[1-9].shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz \
-  ALL.chr[12][0-9].shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz \
-  ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz
-tabix ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz
+bcftools concat -o ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz -O z \
+  ALL.chr[1-9].shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz \
+  ALL.chr[12][0-9].shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz \
+  ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz
+tabix ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz
 
 # use VerifyBamID to create the new panel
-/home/u035/project/software/bcbio/anaconda/share/verifybamid2-1.0.6-0/VerifyBamID \
-  --RefVCF ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz
-  --Reference bcbio-1.1.5/genomes/Hsapiens/hg38/seq/hg38.fa
+VerifyBamID \
+  --RefVCF ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz \
+  --Reference ../../bcbio/genomes/Hsapiens/hg38/seq/hg38.fa
 
 # rename the files to the correct format
 mv ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz.bed 1000g.phase3.100k.b38.vcf.gz.dat.bed
@@ -123,46 +112,51 @@ mv ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.sn
 mv ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz.UD 1000g.phase3.100k.b38.vcf.gz.dat.UD
 
 # move them into the correct location, backing up the original resource folder
-cd /home/u035/project/software/bcbio/anaconda/share/verifybamid2-1.0.6-0
+cd /home/u035/u035/shared/software/bcbio/anaconda/share/verifybamid2-1.0.6-0
 mv resource resource.bak
 mkdir resource
-mv /home/u035/project/software/install/1000G_phase3_hg38/1000g.phase3.100k.b38* resource/
+mv /home/u035/u035/shared/software/install/1000G_phase3_hg38/1000g.phase3.100k.b38* resource/
+
+# clean up intermediate files
+cd /home/u035/u035/shared/software/install
+rm -r 1000G_phase3_hg38
 ```
 
 ## Python modules
 
 ### VASE
 
-VASE v0.4 was installed 28 August 2020.
+VASE v0.4.2 was installed 18 August 2021.
 
 ```
-cd /home/u035/project/software
-./bcbio/anaconda/bin/pip3 install git+git://github.com/david-a-parry/vase.git#egg=project[BGZIP,REPORTER,MYGENE]
+cd /home/u035/u035/shared/software
+./bcbio/anaconda/bin/pip3 install git+git://github.com/david-a-parry/vase.git#egg=vase[BGZIP,REPORTER,MYGENE]
 ```
 
 ### XlsxWriter
 
-XlsxWriter 1.3.3 was installed 28 August 2020.
+XlsxWriter 3.0.1 was installed 18 August 2021.
 
 ```
-cd /home/u035/project/software
+cd /home/u035/u035/shared/software
 ./bcbio/anaconda/bin/pip3 install XlsxWriter
 ```
 
 ## GATK 3.8
 
 ```
-cd /home/u035/project/software/install
+cd /home/u035/u035/shared/software/install
 wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2
 bzip2 -d GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 
 tar -xf GenomeAnalysisTK-3.8-0-ge9d806836.tar
 mv GenomeAnalysisTK-3.8-0-ge9d806836 ../GenomeAnalysisTK-3.8
+rm GenomeAnalysisTK-3.8-0-ge9d806836.tar
 ```
 
 ## RTG tools
 
 ```
-cd /home/u035/project/software
+cd /home/u035/u035/shared/software
 wget https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.11/rtg-tools-3.11-linux-x64.zip
 unzip rtg-tools-3.11-linux-x64.zip
 rm rtg-tools-3.11-linux-x64.zip
@@ -171,7 +165,7 @@ rm rtg-tools-3.11-linux-x64.zip
 ## IGV
 
 ```
-cd /home/u035/project/software
+cd /home/u035/u035/shared/software
 wget https://data.broadinstitute.org/igv/projects/downloads/2.8/IGV_Linux_2.8.9.zip
 unzip IGV_Linux_2.8.9.zip
 rm IGV_Linux_2.8.9.zip
-- 
GitLab