From 467d20c48352f346881d718815a7cc27b35bc37c Mon Sep 17 00:00:00 2001 From: ameyner2 <alison.meynert@ed.ac.uk> Date: Wed, 18 Aug 2021 14:30:08 +0100 Subject: [PATCH] Update Software_installation_ultra2.md --- docs/Software_installation_ultra2.md | 74 +++++++++++++--------------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/docs/Software_installation_ultra2.md b/docs/Software_installation_ultra2.md index d2793b0..a3b1972 100644 --- a/docs/Software_installation_ultra2.md +++ b/docs/Software_installation_ultra2.md @@ -39,18 +39,7 @@ DATE=`date +%Y%m%d%H%M` /home/u035/u035/shared/software/bcbio/tools/bin/bcbio_nextgen.py upgrade -u skip --datatarget vep &> bcbio_install_logs/bcbio_install_datatarget_vep_${DATE}.log ``` -Q: do we even need gnomAD annotations on these? Should I be skipping VEP altogether for the alignment & variant calling pipeline? - -We already had gnomAD 3.0 compiled and downloaded on the ultra2 bcbio installation, so this gets copied to `/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/variation`. - -``` -cd /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/variation -scp ultra.epcc.ed.ac.uk:/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/variation/gnomad_genome.vcf.gz ./ -scp ultra.epcc.ed.ac.uk:/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/variation/gnomad_genome.vcf.gz.csi ./ -scp ultra.epcc.ed.ac.uk:/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/variation/gnomad_genome.vcf.gz.tbi ./ -``` - -However, if needed, re-generate it like this. It took about 6 days on old ultra. +Regenerate gnomAD for bcbio. It takes about a week, so instead the files generated from the old ultra installation were copied to `/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/variation`. ``` DATE=`date +%Y%m%d%H%M` @@ -68,15 +57,13 @@ Increase JVM memory for GATK in galaxy/bcbio_system.yaml See https://github.com/Ensembl/ensembl-variation/pull/621/files -Edit /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0/Bio/EnsEMBL/Variation/BaseAnnotation.pm accordingly. +Edit /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0/Bio/EnsEMBL/Variation/BaseAnnotation.pm accordingly. ### Verifybamid custom panel for exomes ``` -source /home/u035/project/scripts/trio_whole_exome_config.sh - -mkdir /home/u035/project/software/install/1000G_phase3_hg38 -cd /home/u035/project/software/install/1000G_phase3_hg38 +mkdir /home/u035/u035/shared/software/install/1000G_phase3_hg38 +cd /home/u035/u035/shared/software/install/1000G_phase3_hg38 # download the 1000 Genomes autosomes + X site VCFs for ((i = 1; i <= 22; i = i + 1)) @@ -86,35 +73,37 @@ do done wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz.tbi -cd .. # create bare to prefixed chromosome map for ((i = 1; i <= 22; i = i + 1)) do echo $i "chr"$i >> chr_prefix_map.txt done -echo chrX >> chr_prefix_map.txt +echo X chrX >> chr_prefix_map.txt + +# add bcbio tools to path +PATH=/home/u035/u035/shared/software/bcbio/tools/bin:/home/u035/u035/shared/software/bcbio/anaconda/share/verifybamid2-1.0.6-0:$PATH # use the TWIST kit to subset the variants and add the chr prefix at the same time -for file in 1000G_phase3_hg38/*vcf.gz +sed -e 's/chr//' ../../../resources/Twist_Exome_Target_hg38.bed > targets.bed +for file in *phased.vcf.gz do bname=`basename $file` - bcftools view -R /home/u035/project/resources/Twist_Exome_Target_hg38.bed -m2 -M2 -v snps -i 'AF >= 0.01' $file | bcftools annotate --rename-chrs chr_prefix_map.txt | bgzip -c > ${bname%.vcf.gz}.biallelic.snps.m\ -inAF0.01.vcf.gz + bcftools view -R targets.bed -m2 -M2 -v snps -i 'AF >= 0.01' $file | bcftools annotate --rename-chrs chr_prefix_map.txt | bgzip -c > ${bname%.vcf.gz}.biallelic.snps.minAF0.01.vcf.gz tabix ${bname%.vcf.gz}.biallelic.snps.minAF0.01.vcf.gz done # concatenate all the files in the correct order -bcftools concat -o ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz -O z \ - ALL.chr[1-9].shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz \ - ALL.chr[12][0-9].shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz \ - ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz -tabix ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz +bcftools concat -o ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz -O z \ + ALL.chr[1-9].shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz \ + ALL.chr[12][0-9].shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz \ + ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz +tabix ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz # use VerifyBamID to create the new panel -/home/u035/project/software/bcbio/anaconda/share/verifybamid2-1.0.6-0/VerifyBamID \ - --RefVCF ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz - --Reference bcbio-1.1.5/genomes/Hsapiens/hg38/seq/hg38.fa +VerifyBamID \ + --RefVCF ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.biallelic.snps.minAF0.01.vcf.gz \ + --Reference ../../bcbio/genomes/Hsapiens/hg38/seq/hg38.fa # rename the files to the correct format mv ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz.bed 1000g.phase3.100k.b38.vcf.gz.dat.bed @@ -123,46 +112,51 @@ mv ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.sn mv ALL.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.chr.biallelic.snps.minAF0.01.vcf.gz.UD 1000g.phase3.100k.b38.vcf.gz.dat.UD # move them into the correct location, backing up the original resource folder -cd /home/u035/project/software/bcbio/anaconda/share/verifybamid2-1.0.6-0 +cd /home/u035/u035/shared/software/bcbio/anaconda/share/verifybamid2-1.0.6-0 mv resource resource.bak mkdir resource -mv /home/u035/project/software/install/1000G_phase3_hg38/1000g.phase3.100k.b38* resource/ +mv /home/u035/u035/shared/software/install/1000G_phase3_hg38/1000g.phase3.100k.b38* resource/ + +# clean up intermediate files +cd /home/u035/u035/shared/software/install +rm -r 1000G_phase3_hg38 ``` ## Python modules ### VASE -VASE v0.4 was installed 28 August 2020. +VASE v0.4.2 was installed 18 August 2021. ``` -cd /home/u035/project/software -./bcbio/anaconda/bin/pip3 install git+git://github.com/david-a-parry/vase.git#egg=project[BGZIP,REPORTER,MYGENE] +cd /home/u035/u035/shared/software +./bcbio/anaconda/bin/pip3 install git+git://github.com/david-a-parry/vase.git#egg=vase[BGZIP,REPORTER,MYGENE] ``` ### XlsxWriter -XlsxWriter 1.3.3 was installed 28 August 2020. +XlsxWriter 3.0.1 was installed 18 August 2021. ``` -cd /home/u035/project/software +cd /home/u035/u035/shared/software ./bcbio/anaconda/bin/pip3 install XlsxWriter ``` ## GATK 3.8 ``` -cd /home/u035/project/software/install +cd /home/u035/u035/shared/software/install wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 bzip2 -d GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 tar -xf GenomeAnalysisTK-3.8-0-ge9d806836.tar mv GenomeAnalysisTK-3.8-0-ge9d806836 ../GenomeAnalysisTK-3.8 +rm GenomeAnalysisTK-3.8-0-ge9d806836.tar ``` ## RTG tools ``` -cd /home/u035/project/software +cd /home/u035/u035/shared/software wget https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.11/rtg-tools-3.11-linux-x64.zip unzip rtg-tools-3.11-linux-x64.zip rm rtg-tools-3.11-linux-x64.zip @@ -171,7 +165,7 @@ rm rtg-tools-3.11-linux-x64.zip ## IGV ``` -cd /home/u035/project/software +cd /home/u035/u035/shared/software wget https://data.broadinstitute.org/igv/projects/downloads/2.8/IGV_Linux_2.8.9.zip unzip IGV_Linux_2.8.9.zip rm IGV_Linux_2.8.9.zip -- GitLab