diff --git a/NHS_WES_check_PED_aff_probands.py b/NHS_WES_check_PED_aff_probands.py index 2ca0e8a33ed3507d234465c02a4b859761c97d5e..a3c84d3351dd038ab29500c240a64d856e09856b 100755 --- a/NHS_WES_check_PED_aff_probands.py +++ b/NHS_WES_check_PED_aff_probands.py @@ -59,6 +59,6 @@ if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_check_PED_aff_probands.py a_ped_file" + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_check_PED_aff_probands.py a_ped_file" raise SystemExit diff --git a/NHS_WES_check_PED_quad.py b/NHS_WES_check_PED_quad.py index 1603c4678d7c63b09144ac83b4007a5dd351f16f..998f4d01cf778abb2e6caf3e99f5bb89f529e40c 100755 --- a/NHS_WES_check_PED_quad.py +++ b/NHS_WES_check_PED_quad.py @@ -69,6 +69,6 @@ if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_check_PED_quad.py a_ped_file" + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_check_PED_quad.py a_ped_file" raise SystemExit diff --git a/NHS_WES_extract_shared_vars.py b/NHS_WES_extract_shared_vars.py index 83e94aa20a398592d7fc9aca1cf5a04c4bd1e409..f75c695949303e45ab3329e3522a1cb4c39d0a07 100755 --- a/NHS_WES_extract_shared_vars.py +++ b/NHS_WES_extract_shared_vars.py @@ -126,6 +126,6 @@ if __name__ == '__main__': if len(sys.argv) == 4: go(sys.argv[1],sys.argv[2],sys.argv[3]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_extract_shared_vars.py in_vcf comma,sep,list,of,ids out_vcf" + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_extract_shared_vars.py in_vcf comma,sep,list,of,ids out_vcf" raise SystemExit diff --git a/NHS_WES_extract_trio_FAM_PRO_ID.py b/NHS_WES_extract_trio_FAM_PRO_ID.py index e936cc71a751440d70d9d620b2e20a2569a8ef08..c215a0119840b8839964a4fa3d770f81a0f20c0b 100755 --- a/NHS_WES_extract_trio_FAM_PRO_ID.py +++ b/NHS_WES_extract_trio_FAM_PRO_ID.py @@ -115,6 +115,6 @@ if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_extract_trio_FAM_PRO_ID.py /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}" + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_extract_trio_FAM_PRO_ID.py /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV.py b/NHS_WES_generate_DEC_IGV.py index 0fdb863498ba8bbe4898a4113894bfc6e8862d42..95d204eacd86a269e79c37510f2756f3ce76c4c9 100755 --- a/NHS_WES_generate_DEC_IGV.py +++ b/NHS_WES_generate_DEC_IGV.py @@ -1395,7 +1395,7 @@ if __name__ == '__main__': if len(sys.argv) == 12: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \ dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV.py.v1 b/NHS_WES_generate_DEC_IGV.py.v1 index 00a1e4d9664c72d1f81d9fbe5a9b3971aaf51cab..87c4c1d5451972193b849989271f464e9d7d2180 100755 --- a/NHS_WES_generate_DEC_IGV.py.v1 +++ b/NHS_WES_generate_DEC_IGV.py.v1 @@ -920,7 +920,7 @@ if __name__ == '__main__': if len(sys.argv) == 12: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \ dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans b/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans index 3d08582cd939755fe442ca128c39fd68a3a5f754..7cbf1acc5bfa8a95a5e7fd5b4d211639e4a40471 100755 --- a/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans +++ b/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans @@ -1366,7 +1366,7 @@ if __name__ == '__main__': if len(sys.argv) == 12: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \ dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV_aff_probands.py b/NHS_WES_generate_DEC_IGV_aff_probands.py index 24f914f3b7189afb7ec980c74bbb79c84289ad4c..b36540996c446a5872d056d2f896618ca00bafdc 100755 --- a/NHS_WES_generate_DEC_IGV_aff_probands.py +++ b/NHS_WES_generate_DEC_IGV_aff_probands.py @@ -539,7 +539,7 @@ if __name__ == '__main__': if len(sys.argv) == 11: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV_aff_probands.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV_aff_probands.py \ dec_id,trans_map_file,ped_file,in_g2p_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV_sib_from_quad.py b/NHS_WES_generate_DEC_IGV_sib_from_quad.py index c0449e65d3d0c9ee61b8c4d8658ab673f9985c8b..8138e4d28711e608861c0992911b45ae680b28cc 100755 --- a/NHS_WES_generate_DEC_IGV_sib_from_quad.py +++ b/NHS_WES_generate_DEC_IGV_sib_from_quad.py @@ -545,7 +545,7 @@ if __name__ == '__main__': if len(sys.argv) == 11: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV_aff_probands.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV_aff_probands.py \ dec_id,trans_map_file,ped_file,in_g2p_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV_trio_from_quad.py b/NHS_WES_generate_DEC_IGV_trio_from_quad.py index 6b9e2d1a01d956f60a204903c74474a2de8180e9..33462d2a8ffa235d1f116d292ceeb96c81a9bb7f 100755 --- a/NHS_WES_generate_DEC_IGV_trio_from_quad.py +++ b/NHS_WES_generate_DEC_IGV_trio_from_quad.py @@ -1397,7 +1397,7 @@ if __name__ == '__main__': if len(sys.argv) == 13: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11],sys.argv[12]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \ dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir,indi_id_for_this_kid" raise SystemExit diff --git a/NHS_WES_generate_aff_sib_ped.py b/NHS_WES_generate_aff_sib_ped.py index 17b5b309e1488019b6ca5ba50beba22f8d48c1ed..4158999d7d81da69e662ca53c2ac68abcce30875 100644 --- a/NHS_WES_generate_aff_sib_ped.py +++ b/NHS_WES_generate_aff_sib_ped.py @@ -43,6 +43,6 @@ if __name__ == '__main__': if len(sys.argv) == 5: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/NHS_WES_generate_aff_sib_ped.py ${PED_DIR} ${quad_ped_file} ${KID_1_ID} ${KID_2_ID}" + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/NHS_WES_generate_aff_sib_ped.py ${PED_DIR} ${quad_ped_file} ${KID_1_ID} ${KID_2_ID}" raise SystemExit diff --git a/NHS_WES_generate_coverage_result_file.py b/NHS_WES_generate_coverage_result_file.py index 8c20a01710f6e4d7a79ee3a1117ae9a342601e94..6bd2d6a38181f8c011684f9f0b5aa574cdd8c40c 100644 --- a/NHS_WES_generate_coverage_result_file.py +++ b/NHS_WES_generate_coverage_result_file.py @@ -114,9 +114,9 @@ if __name__ == '__main__': if len(sys.argv) == 4: go(sys.argv[1],sys.argv[2],sys.argv[3]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/generate_coverage_result_file.py \ + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/generate_coverage_result_file.py \ DDG2P.s14-NFE-Twist-NA12878.sample_interval_summary \ - /home/u035/project/resources/DDG2P.20180830.ClinVar.20190520.plus15bp.txt \ + /home/u035/u035/shared/resources/G2P/DDG2P.20180830.ClinVar.20190520.plus15bp.txt \ DDG2P.s14-NFE-Twist-NA12878.COV.txt" raise SystemExit diff --git a/NHS_WES_generate_trio_VCF.py b/NHS_WES_generate_trio_VCF.py index 7d5425d4506610c6a07d9aa65146b4994c1071b7..d856ded0e3c0a32cea5aa533b2fabed68cdc5a81 100644 --- a/NHS_WES_generate_trio_VCF.py +++ b/NHS_WES_generate_trio_VCF.py @@ -42,6 +42,6 @@ if __name__ == '__main__': if len(sys.argv) == 6: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/NHS_WES_generate_trio_ped.py ${PED_DIR} ${quad_ped_file} ${KID_ID} ${PAR_1_ID} ${PAR_2_ID}" + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/NHS_WES_generate_trio_ped.py ${PED_DIR} ${quad_ped_file} ${KID_ID} ${PAR_1_ID} ${PAR_2_ID}" raise SystemExit diff --git a/NHS_WES_generate_trio_ped.py b/NHS_WES_generate_trio_ped.py index 9ea04f2c3a1ee0b2d66e220075e2dba30e32e10c..bd7d0bf6328978802fce15a8c52e221c250fcdf3 100644 --- a/NHS_WES_generate_trio_ped.py +++ b/NHS_WES_generate_trio_ped.py @@ -42,6 +42,6 @@ if __name__ == '__main__': if len(sys.argv) == 6: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/NHS_WES_generate_trio_ped.py ${PED_DIR} ${quad_ped_file} ${KID_ID} ${PAR_1_ID} ${PAR_2_ID}" + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/NHS_WES_generate_trio_ped.py ${PED_DIR} ${quad_ped_file} ${KID_ID} ${PAR_1_ID} ${PAR_2_ID}" raise SystemExit diff --git a/NHS_WES_trio_cram_setup.sh b/NHS_WES_trio_cram_setup.sh index c8f5f4544067304a4da542b1b29d9d8b4fe94dba..a8cc30d0cb9d05feefa4b16370a9ab0d5d2cfd7c 100755 --- a/NHS_WES_trio_cram_setup.sh +++ b/NHS_WES_trio_cram_setup.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -19,14 +19,14 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts ### Tools -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -SAMTOOLS=/home/u035/project/software/bcbio/anaconda/bin/samtools -PICARD=/home/u035/project/software/bcbio/anaconda/bin/picard -REFERENCE_GENOME=/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools +PICARD=/home/u035/u035/shared/software/bcbio/anaconda/bin/picard +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa # check if ${WORK_DIR} already exists - if so, exit - to prevent accidental overwriting @@ -38,7 +38,7 @@ fi -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/project/trio_whole_exome/analysis/output +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/NHS_WES_trio_delete_BAM.sh b/NHS_WES_trio_delete_BAM.sh index 5bd4af7deec17ecb4b5efad8739635d4cef46b23..5fb4a5f788d26cacd47b353a43a3e1e2e61075ad 100755 --- a/NHS_WES_trio_delete_BAM.sh +++ b/NHS_WES_trio_delete_BAM.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -19,10 +19,10 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/project/trio_whole_exome/analysis/output/${VERSION_N}_${PLATE_ID} +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/${VERSION_N}_${PLATE_ID} echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/NHS_WES_trio_setup.sh b/NHS_WES_trio_setup.sh index 891a20280db6b6acdd7b7778da63eb03ed9c9894..f82d34a4f1d9b8c35b16c1f61ac05b372de9cd73 100755 --- a/NHS_WES_trio_setup.sh +++ b/NHS_WES_trio_setup.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -19,11 +19,11 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts ### Tools -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 @@ -36,7 +36,7 @@ fi -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/project/trio_whole_exome/analysis/output +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/add_family_id_to_santosh_ped.pl b/add_family_id_to_santosh_ped.pl new file mode 100644 index 0000000000000000000000000000000000000000..8a4d7a24a3dc67b5d7ed68099212a750c8e88fe4 --- /dev/null +++ b/add_family_id_to_santosh_ped.pl @@ -0,0 +1,16 @@ +#!/usr/bin/perl -w + +use strict; + +while (my $line = <>) +{ + chomp $line; + my ($family, $sample, $father, $mother, $sex, $affected) = split(/\t/, $line); + $sample = sprintf("%s_%s", $sample, $family); + if ($father ne "0") { $father = sprintf("%s_%s", $father, $family); } + if ($mother ne "0") { $mother = sprintf("%s_%s", $mother, $family); } + $family = "99999_" . $family; + + printf "$family\t$sample\t$father\t$mother\t$sex\t$affected\n"; +} + diff --git a/bcbio_gnomad_install.sh b/bcbio_gnomad_install.sh index 13563b6a751c400113a83ae4021d2d8a27cf07db..705f1a5811689d4fe7360ab2eade85237e709679 100755 --- a/bcbio_gnomad_install.sh +++ b/bcbio_gnomad_install.sh @@ -5,9 +5,9 @@ #PBS -N bcbio_gnomad_install #PBS -j oe -cd /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/txtmp +cd /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/txtmp -PATH=$PATH:/home/u035/project/software/bcbio/anaconda/bin +PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/bin ref=../seq/hg38.fa fields_to_keep="INFO/"$(cat gnomad_fields_to_keep.txt | paste -s | sed s/"\t"/",INFO\/"/g) diff --git a/decipher_NHS_WES_trio.sh b/decipher_NHS_WES_trio.sh index a185044e8bf51c4bb51a9f4df87fbe3c4f5d2dc8..ee7210c279d852e276ec400b299d308914c87fac 100755 --- a/decipher_NHS_WES_trio.sh +++ b/decipher_NHS_WES_trio.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -#BASE=/scratch/u035/project/analysis/wes_pilot -BASE=/scratch/u035/project/trio_whole_exome/analysis +#BASE=/scratch/u035/u035/shared/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -24,36 +24,36 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/docs/Setup_variant_prioritization.md b/docs/Setup_variant_prioritization.md new file mode 100644 index 0000000000000000000000000000000000000000..fbb542a8f02978c8df4b0f1f508df6b33dad664d --- /dev/null +++ b/docs/Setup_variant_prioritization.md @@ -0,0 +1,407 @@ +# Standard operating procedure - Setup for variant prioritization in trio whole exome samples at the Edinburgh Parallel Computing Centre + +This SOP applies to batches of family/trio samples where trio whole exome sequencing has been performed by Edinburgh Genomics (EdGE). It assumes that data has been successfully aligned, variant called and annotated (see SOP_trio_whole_exome_EPCC_pipeline). Scripts and resource datasets are version controlled on the University of Edinburgh gitlab server gitlab.ecdf.ed.ac.uk/igmmbioinformatics/trio-whole-exome. Request access by e-mail: alison.meynert@ed.ac.uk. + +This setup must not be altered in any way unless sanctioned by NHSS. Contact Morad Ansari morad.ansari@nhs.net, morad.ansari@nhslothian.scot.nhs.uk. + +## Definitions + +Command lines starting with E> are to be executed on Eddie; U> refers to Ultra2 (EPCC). + +Text in angle brackets, e.g. <chr> indicates variable parameters. + + +## The Variant Effect Predictor (VEP) +Version: ensembl-vep-100.4-0 + +Location at EPCC: /home/u035/u035/shared/software/bcbio/anaconda/bin/vep + +Note: make sure the VEP cache files are tabix converted (significant speedup) + +### Check the downloaded VEP cache files were tabix converted + +``` +U> ls -l /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep/homo_sapiens_merged/100_GRCh38/<chr> +``` + +where <chr> is the chromosome name without the ‘chr’ prefix (e.g., 1,2, ..., 22). +A presence of a file called all_vars.gz.csi indicates that the VEP cache files were tabix converted. + +## The G2P plugin for VEP + +Version: 100 + +Source: https://github.com/Ensembl/VEP_plugins/blob/release/100/G2P.pm + +Date obtained: 21/04/2021 + +Location at EPCC: /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 + +File name: G2P.pm + +### Add splice region variants (1-3 bases of the exon or 3-8 bases of the intron) for consideration by G2P + +Edit the G2P.pm file by adding the type `splice_region_variant` at the end of: + +``` +types : SO consequence types to include ... (line 84) +types => {map {$_ => 1} qw(splice_donor_variant ... (line 145) +``` + +### Setting G2P in completely offline mode + +All external datasets listed for the `af_from_vcf_keys flag` (gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38) in: +* `/home/u035/u035/shared/scripts/process_NHS_WES_trio.sh` +* `/home/u035/u035/shared/scripts/process_NHS_WES_aff_probands.sh` + +must be available locally (see below for downloading gnomADe and gnomADg datasets) + +* gnomADg dataset (r3.1.1, downloaded 23/08/2021): `/home/u035/u035/shared/resources/gnomad/r3.1.1/genomes` +* gnomADe dataset (r2.1.1, downloaded 23/08/2021): `/home/u035/u035/shared/resources/gnomad/r2.1.1/exomes` + +To re-fetch the gnomADg dataset: +``` +U> cd /home/u035/u035/shared/resources/gnomad/r3.1.1/genomes +U> for i in {1..22} X Y +U> do +U> wget https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.1/vcf/genomes/gnomad.genomes.v3.1.1.sites.chr${i}.vcf.bgz +U> wget https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.1/vcf/genomes/gnomad.genomes.v3.1.1.sites.chr${i}.vcf.bgz.tbi +U> done +``` + +To re-fetch the gnomADe dataset: +``` +U> cd /home/u035/u035/shared/resources/gnomad/r2.1.1/exomes +U> for i in {1..22} X Y +U> do +U> wget https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/liftover_grch38/vcf/exomes/gnomad.exomes.r2.1.1.sites.${i}.liftover_grch38.vcf.bgz +U> wget https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/liftover_grch38/vcf/exomes/gnomad.exomes.r2.1.1.sites.${i}.liftover_grch38.vcf.bgz.tbi +U> done +``` + +Edit the `/home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0/Bio/EnsEMBL/Variation/DBSQL/vcf_config.json` file to update type (remote -> local) and filename_template (local path to datasets) variables for these local datasets. + +gnomADg (lines 138-143) +``` +"id": "gnomADg_r3.1.1_GRCh38", +"description": "Genome Aggregation Database genomes r3.1.1", +"species": "homo_sapiens", +"assembly": "GRCh38", +"type": "local", +"filename_template": "/home/u035/u035/shared/resources/gnomad/r3.1.1/genomes/gnomad.genomes.v3.1.1.sites.chr###CHR###.vcf.bgz", +``` + +gnomADe (lines 199-204) +``` +"id": "gnomADe_r2.1.1_GRCh38", +"description": "Genome Aggregation Database exomes r2.1.1 liftover to GRCh38", +"species": "homo_sapiens", +"assembly": "GRCh38", +"type": "local", +"filename_template": "/home/u035/u035/shared/resources/gnomad/r2.1.1/exomes/gnomad.exomes.r2.1.1.sites.###CHR###.liftover_grch38.vcf.bgz", +``` + +## Parameter Values for G2P call + +Files +* `/home/u035/u035/shared/scripts/process_NHS_WES_trio.sh` +* `/home/u035/u035/shared/scripts/process_NHS_WES_aff_probands.sh` + +``` +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" +# this points to ../share/ensembl-vep-100.4-0/vep + +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa + +IN_FILE=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf +G2P_LOG_DIR=${G2P_DIR}/${PLATE_ID}_${FAMILY_ID}_LOG_DIR +mkdir ${G2P_LOG_DIR} +TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt +HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html +VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' + +time ${VEP} \ + -i ${IN_FILE} \ + --output_file ${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_inter_out.txt \ + --force_overwrite \ + --assembly GRCh38 \ + --fasta ${REFERENCE_GENOME} \ + --offline \ + --merged \ + --use_given_ref \ + --cache --cache_version 100 \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ + --individual all \ + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20201208.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20201208.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},\ +log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} +``` + +## Developmental Disorders (DD) gene panel and list of unique gene names (with synonyms) + +Source: https://www.ebi.ac.uk/gene2phenotype/downloads + +Date obtained: 06/07/2021 + +Location at EPCC: /home/u035/u035/shared/resources/G2P + +File names: DDG2P.20210706.csv and genes_in_DDG2P.20210706.txt + +Example of <eddie_work_folder>: `/exports/igmm/eddie/IGMM-VariantAnalysis/mike/work_folder` +Example of <datastore_work_folder>: `I:\IGMM-VariantAnalysis\documentation\trio_whole_exome\work_folder` + +### Obtaining and pre-processing the gene panel file + +``` +E> cd <eddie_work_folder> +E> wget https://www.ebi.ac.uk/gene2phenotype/downloads/DDG2P.csv.gz +E> mv DDG2P.csv.gz DDG2P.orig.<date_downloaded>.csv.gz +E> gunzip -c DDG2P.orig.<date_downloaded>.csv.gz > DDG2P.orig.<date_downloaded>.csv +``` + +* Take a note of the date tag of the outdated DDG2P gene panel; call it `<date_downloaded_old>` +* Copy `DDG2P.orig.<date_downloaded>.csv` from `<eddie_work_folder>` to `<datastore_work_folder>` +* From `<datastore_work_folder>` open the file with Excel and sort by allelic requirement +* Remove entries with no allelic requirement listed (if any) +* Split records (rows) with multiple (comma separated) allelic requirements; sort again +* Save as `DDG2P.<date_downloaded>.csv` +* Copy `DDG2P.<date_downloaded>.csv` from `<datastore_work_folder>` to `<eddie_work_folder>` and to ultra at `/home/u035/u035/shared/resources/G2P` + +`DDG2P.20210706.csv` stats +``` + records with biallelic requirement: 1339 # 1310 in 20201208 + records with monoallelic requirement: 873 # 911 in 20201208 + records with hemizygous requirement: 159 # 179 in 20201208 + records with x-linked dominant requirement: 46 # 44 in 20201208 + records with x-linked over-dominance requirement: 2 # 2 in 20201208 + records with digenic requirement: 1 (G2P ignores them) + records with imprinted requirement: 10 (G2P ignores them) + records with mitochondrial requirement: 2 (G2P ignores them) + records with uncertain requirement: 0 (G2P ignores them) + records with mosaic requirement: 12 (G2P ignores them) + records with no allelic requirement: 3 (excluded) +``` + +### Obtaining the list of all unique gene names in the DD gene panel (current gene symbol and previous gene symbols) + +``` +E> cd <eddie_work_folder> +E> time python /exports/igmm/eddie/IGMM-VariantAnalysis/mike/scripts/extract_unique_genes.py DDG2P.<date_downloaded>.csv genes_in_DDG2P.<date_downloaded>.txt +Found 3553 unique gene names (incl.synonyms) in DDG2P.20210706.csv +recorded 3553 unique gene names (incl. synonyms); outfile = genes_in_DDG2P.20210706.txt +``` + +* Copy `genes_in_DDG2P.<date_downloaded>.txt` from `<eddie_work_folder>` to `<datastore_work_folder>` and to ultra at /home/u035/u035/shared/resources/G2P +* After updating the DDG2P list, update the resources for coverage analysis, see below: + * CCDS Dataset (check if updated CCDS is available) + * DD genes Dataset + * ClinVar Dataset + * ClinVar annotated DD exons from CCDS + +After the updates of the resources for the coverage analysis are completed, update the driver scripts for all implemented family structures: +* `/home/u035/u035/shared/scripts/process_NHS_WES_trio.sh` +* `/home/u035/u035/shared/scripts/process_NHS_WES_aff_probands.sh` +* `/home/u035/u035/shared/scripts/process_NHS_WES_quad.sh` + +to point to the updated files: +``` +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt +echo "Performing G2P analysis (DD genes)for FAMILY_ID = ${PLATE_ID}_${FAMILY_ID}..." +echo "Using ${TARGETS}" +--transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" +--plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf... +``` + +## Resources for coverage analysis + +### CCDS Dataset + +Source: https://www.ncbi.nlm.nih.gov/projects/CCDS + +Date obtained: 28/02/2019 + +Location at EPCC: /home/u035/u035/shared/resources/exome_targets + +File name: CCDS.20180614.plus15bp.merged.bed + +To check if updates are available, go to https://www.ncbi.nlm.nih.gov/projects/CCDS/CcdsBrowse.cgi?REQUEST=SHOW_STATISTICS + +Downloaded CCDS.20180614.txt (hg38) from the CCDS site, converted to BED format (exon number appended to CCDS id in order), added 15bp each side, sorted, and merged. + +``` +E> perl ../scripts/ccds_to_bed.pl +E> perl ../scripts/ccds_to_bed.pl -i CCDS.20180614.txt -o CCDS.20180614.bed +E> mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e "select chrom, size from hg38.chromInfo" > hg38.genome +E> bedtools slop -i CCDS.20180614.bed -b 15 -g hg38.genome > CCDS.20180614.plus15bp.bed +E> bedtools sort -i CCDS.20180614.plus15bp.bed -faidx /exports/igmm/eddie/bioinfsvice/ameynert/software/bcbio-1.0.7/genomes/Hsapiens/hg38/seq/hg38.fa.fai > CCDS.20180614.plus15bp.sorted.bed +E> bedtools merge -i CCDS.20180614.plus15bp.sorted.bed -c 4 -o distinct > CCDS.20180614.plus15bp.merged.bed +``` + +### DD genes Dataset + +Source: /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt + +Date obtained: 08/12/2020 + +Location at EPCC: /home/u035/u035/shared/resources/G2P + +File name: DDG2P.20210706.plus15bp.merged.bed + +From the CCDS BED file (above), extract a BED file for the DD genes + +``` +U> cd /home/u035/u035/shared/resources/G2P +U> PYTHON=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +U> time $PYTHON /home/u035/u035/shared/scripts/extract_BED_CCDS_DDG2P.py CCDS.20180614.plus15bp.merged.bed genes_in_DDG2P.20210706.txt DDG2P.20210706.plus15bp.merged.bed +Found 3553 unique gene names in genes_in_DDG2P.20210706.txt +Read 193346 records from the input BED file = CCDS.20180614.plus15bp.merged.bed +Wrote 33275 record for the DDG2P genes in the output BED file = DDG2P.20210706.plus15bp.merged.bed +Found intervals for 2156 uniq DDG2P genes +``` + +### ClinVar Dataset + +Source: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/ + +Date obtained: 06/07/2021 + +Location at EPCC: /home/u035/u035/shared/resources/clinvar + +File name: clinvar_20210626.P_LP.ACP.vcf + +Description of ClinVar VCF @ `https://www.ncbi.nlm.nih.gov/variation/docs/ClinVar_vcf_files/` + +``` +U> cd /home/u035/u035/shared/resources/clinvar +U> wget ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar_20210626.vcf.gz +U> wget ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar_20210626.vcf.gz.tbi + +# grab the header and only variants annotated as Pathogenic or Likely Pathogenic # and not with conflicting interpretation +U> zgrep '^#' clinvar_20210626.vcf.gz > clinvar_20210626.P_LP.vcf && zgrep -E 'CLNSIG=Likely_pathogenic;|CLNSIG=Pathogenic;' clinvar_20210626.vcf.gz >> clinvar_20210626.P_LP.vcf + +# need to add chr prefix in the clinvar_20201128.P_LP.vcf file +U> awk '{if($0 !~ /^#/) print "chr"$0; else print $0}' clinvar_20210626.P_LP.vcf > clinvar_20210626.P_LP.chr.vcf + +# Exclude variants with “no assertion criteria provided†+U> grep '^#' clinvar_20210626.P_LP.chr.vcf > clinvar_20210626.P_LP.ACP.vcf && grep -v 'CLNREVSTAT=no_assertion_criteria_provided' clinvar_20210626.P_LP.chr.vcf >> clinvar_20210626.P_LP.ACP.vcf +``` + +### ClinVar annotated DD exons from CCDS + +Source: DDG2P.20210706.plus15bp.merged.bed & clinvar_20210626.P_LP.ACP.vcf (see above) + +Location at EPCC: /home/u035/u035/shared/resources/G2P + +File name: DDG2P.20210706.clinvar.20210626.plus15bp.txt + + +A BED file for all CCDS exons (15bp padded) found in the DD genes, annotated with the number of “relevant†variants reported in ClinVar (pathogenic or likely pathogenic with provided assertion criteria) for which the proband coverage is to be computed. + +Use bedtools to count and record the number of P/LP variants per each interval + +``` +U> cd /home/u035/u035/shared/resources/G2P +U> BEDTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/bedtools +U> $BEDTOOLS intersect -wa -c -a DDG2P.20210706.plus15bp.merged.bed -b ../clinvar/clinvar_20210626.P_LP.ACP.vcf > DDG2P.20210706.clinvar.20210626.plus15bp.txt + +# proportion of all P/LP variants with assertion criteria provided in DD genes +U> grep -v '^#' ../clinvar/clinvar_20210626.P_LP.ACP.vcf | wc -l +Total of 102341 ClinVar vars + +U> cat DDG2P.20210706.clinvar.20210626.plus15bp.txt | awk '{sum += $5} END {print sum}' +67623 of the ClinVar vars are in DD genes; 67623/102341=66% of all are in DD gene +``` + +### Mapping the Family_ID (aka DECIPHER_ID) to Proband_ID (aka DECIPHER internal ID) + +Source: https://git.ecdf.ed.ac.uk/igmmbioinformatics/decipher-id-mapping/tree/master/bin (in-house script) + +Date obtained: 19/09/2019 + +Location: a suitable and secure folder on your local laptop/PC + +File name: decipher-id-mapping.jar + +To get access to the source, email alison.meynert@ed.ac.uk + +* Make sure your java version is reasonably recent (e.g. java version "13" 2019-09-17) +* Download the decipher-id-mapping.jar file (downloads as jar.zip, delete .zip) +* Get the Selenium Chrome driver for the laptop’s Chrome browser version from https://chromedriver.chromium.org/downloads, store and unzip in the same folder as the decipher-id-mapping.jar file +* Chrome v77 can be downloaded from: https://www.neowin.net/news/google-chrome-770386575-offline-installer/ (64bit) + +### Variant Blacklist + +Source: NHSS + +Date obtained: 25/09/2019 + +Location at EPCC: /home/u035/u035/shared/resources/blacklist + +File name: current_blacklist.txt + +This is a file which contains variant which were assessed by NHSS as safe to be excluded from analysis and reports (e.g., cannot be lifted over from GRCh38 to DECIPHER’s v37 and/or seen too frequently in previously analyzed batches, etc.). It should be provided by NHSS before the variants in a new batch are to be prioritized (contact Morad Ansari morad.ansari@nhs.net). + + +Open the Excel file provided by NHSS and store the information in a tab-separated file named `blacklist.<date_received>.txt` with the format chr pos ref alt, adding the ‘chr’ prefix if necessary. Create a copy of the file named `current_blacklist.txt` which is looked for and used by `NHS_WES_filter_LQ_GT.py`. + +``` +U> cd /home/u035/u035/shared/resources/blacklist +U> nano blacklist.2019-11-27.txt +U> cp blacklist.2019-11-27.txt current_blacklist.txt +``` + +### Transcript Replacement + +Source: NHSS + +Date obtained: 25/09/2019 + +Location at EPCC: /home/u035/u035/shared/resources/trans_map + +File name: current_trans_map.txt + +Some of the VEP (v97) GRCh38 transcripts are not currently recognized by DECIPHER during bulk upload. NHSS will generate a transcript replacement file (usually included in the same excel file as the variant blacklist) for the preferred transcript ID (available in DECIPHER). It should be provided by NHSS before the variants in a new batch are to be prioritized (contact Morad Ansari morad.ansari@nhs.net). + +Open the Excel file provided by NHSS and store the information in a tab-separated file named `trans_map.<date_received>.txt` with the format `Unrecognized_transcript Replacement_transcript`. Create a copy of the file named `current_trans_map.txt` which is looked for and used by `NHS_WES_filter_LQ_GT.py`. + +``` +U> cd /home/u035/u035/shared/resources/trans_map +U> nano trans_map.2019-11-27.txt +U> cp trans_map.2019-11-27.txt current_trans_map.txt +``` + +### VASE setup in STRICT mode + +Location at EPCC: /home/u035/u035/shared/software/bcbio/anaconda/bin/vase + +Parameter Values for VASE STRICT + +``` +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase + +IN_FILE=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.ready.vcf.gz +OUT_FILE=${VASE_DIR}/${PLATE_ID}_${FAMILY_ID}.strict.denovo.vcf +PED_FILE=${PED_DIR}/${BATCH_ID}_${PLATE_ID}_${FAMILY_ID}.ped + +time ${VASE} \ + -i ${IN_FILE} \ + -o ${OUT_FILE} \ + --log_progress \ + --prog_interval 100000 \ + --freq 0.0001 \ + --gq 30 --dp 10 \ + --het_ab 0.3 \ + --max_alt_alleles 1 \ + --csq all \ + --biotypes all \ + --control_gq 15 --control_dp 5 \ + --control_het_ab 0.01 \ + --control_max_ref_ab 0.05 \ + --de_novo \ + --ped ${PED_FILE} +``` + + + diff --git a/docs/run-notes/19188_run_notes.md b/docs/run-notes/19188_run_notes.md new file mode 100644 index 0000000000000000000000000000000000000000..a2f8000e39e70cc3858e2811192f2c12b26e5eb4 --- /dev/null +++ b/docs/run-notes/19188_run_notes.md @@ -0,0 +1,27 @@ +# Trio whole exome - 19188 (EdGe) + +* Fam 435981 - urgent trio +* Fam 404190, Individual 128589 is a repeat maternal sample - other 2 samples are from 14110_Ansari_Morad +* Fam 433855, Individual 128681 is a repeat proband sample - other 2 samples are from 18610_Ansari_Morad +* Fam 428986 affected duo, for shared variant analysis +* Fam 435139 quad – similarly affected brothers. Shared & trio analysis. +* Fam 434642 duo for Congenica +* Fam 434889 duo for Congenica + +## QC notes + +**Note:** family 433924 had to be re-started and is not included in these notes yet. + +* All passed pedigree checks +* Coverage a little low for: + * 128450_434883 - 35X - unaffected father + * 128360_434801 - 37X - proband + * 84873_435139 - 37X - proband + * 128102_429275 - 43X - unaffected father + * 128540_434918 - 43X - unaffected mother +* Slight contamination for: + * 128540_434918 - 2.9% - unaffected mother (also with slightly low coverage 43X above) +* Relatedness looks fine +* All correct sex +* 120967_434490 (male proband) has slightly odd X/Y mapping percentages at 80/20. Most males are in a tight range of 88-90%, it's a clear outlier. +* 128102_429275 (unaffected father, also with slightly low coverage 43X above) has very odd chromosome mapping percentages and GC content distribution. \ No newline at end of file diff --git a/docs/run-notes/19258_run_notes.md b/docs/run-notes/19258_run_notes.md new file mode 100644 index 0000000000000000000000000000000000000000..b54413b5460832b5be0e035e193f086d9a0e7cf9 --- /dev/null +++ b/docs/run-notes/19258_run_notes.md @@ -0,0 +1,21 @@ +# Trio whole exome batch 19285 (EdGe) + +* The urgent family is 438158. +* We know that sample 129421 (proband from family 436427) is going to fail – there was an issue during set up for this one. +* Family 438023 – Quad, for shared analysis between two sibs and trio analysis per sib (129760 and 129850). + +Duos/singleton – for Congenica only +* 435163 (duo) +* 436090 (duo) +* 436525 (singleton) + +# QC + +* Pedigree check ok +* Samples with slightly low coverage: + * 128940_435163 43X + * 128993_429270 47X + * 126656_429218 48X +* Contamination check ok +* Sample 129421 was not delivered - don't analyze family 436427, parents QC'd ok +* Sequencing QC check ok diff --git a/download_gnomADg.sh b/download_gnomADg.sh deleted file mode 100755 index a5a54e9e84cc83af11504e8017ae91758eccd575..0000000000000000000000000000000000000000 --- a/download_gnomADg.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -#PBS -l walltime=48:00:00 -#PBS -l ncpus=1,mem=2gb -#PBS -q uv2000 -#PBS -N download_gnomADg -#PBS -j oe - - -DATA_DIR=/home/u035/project/resources/gnomad/r2.1/genomes - -cd ${DATA_DIR} - -wget ftp://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_genotype/gnomad/r2.1/genomes/*.gz -wget ftp://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_genotype/gnomad/r2.1/genomes/*.tbi - diff --git a/downstream_setup.sh b/downstream_setup.sh index d610ebb41b8b75452e7dc3d0dd0dfe1e78059970..fa25954fbd3b834e9a896384740229e6a6599855 100755 --- a/downstream_setup.sh +++ b/downstream_setup.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -18,10 +18,10 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts ### Tools -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 diff --git a/extract_BED_CCDS_DDG2P.py b/extract_BED_CCDS_DDG2P.py index e12ab94c2364d67560fec908647756a8eb9773b8..33e1e11be76a4bced5af681a39f3788788d1fa05 100644 --- a/extract_BED_CCDS_DDG2P.py +++ b/extract_BED_CCDS_DDG2P.py @@ -1,6 +1,6 @@ # given -# the BED file for all genes (/home/u035/project/resources/CCDS.20180614.plus15bp.merged.bed) -# and the file for the genes in the DDG2P (unique gene names, inluding synonyms, i.e., /home/u035/project/resources/genes_in_DDG2P.30082018.txt) +# the BED file for all genes (/home/u035/u035/shared/resources/exome_targets/CCDS.20180614.plus15bp.merged.bed) +# and the file for the genes in the DDG2P (unique gene names, inluding synonyms, i.e., /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.30082018.txt) # extract # the BED file for the DDG2P genes # diff --git a/extract_trio_FAM_PRO_ID.py b/extract_trio_FAM_PRO_ID.py index e58c60acd0e2cb1661e4a8359c16e68e3f957ead..e66d8f4cef2867cc6ffabc94881ea97abcc58d60 100755 --- a/extract_trio_FAM_PRO_ID.py +++ b/extract_trio_FAM_PRO_ID.py @@ -111,6 +111,6 @@ if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: - print "Suggested use: time python /home/u035/project/scripts/extract_trio_FAM_PRO_ID.py /scratch/u035/project/analysis/wes_pilot/03062019" + print "Suggested use: time python /home/u035/u035/shared/scripts/extract_trio_FAM_PRO_ID.py /scratch/u035/u035/shared/analysis/wes_pilot/03062019" raise SystemExit diff --git a/full_process_NHS_WES_trio.sh b/full_process_NHS_WES_trio.sh index 38fd7b924be43d72674de8964973f0a8c0399a10..f2bc21eb8e893cf15c916f0575a0cfddf17d302a 100755 --- a/full_process_NHS_WES_trio.sh +++ b/full_process_NHS_WES_trio.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -#BASE=/scratch/u035/project/analysis/wes_pilot -BASE=/scratch/u035/project/trio_whole_exome/analysis +#BASE=/scratch/u035/u035/shared/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -24,36 +24,36 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -153,7 +153,7 @@ G2P_LOG_DIR=${G2P_DIR}/${FAMILY_ID}_LOG_DIR mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${FAMILY_ID}.report.html -VCF_KEYS='gnomADe|gnomADg' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -166,11 +166,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 97 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.19092019.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-97.3-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.19092019.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF', + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.19092019.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-97.3-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.19092019.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF', af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -225,7 +225,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${FAMILY_ID}.strict.24chr.sort.denovo.vcf diff --git a/gather_NHS_WES_aff_probands_results.sh b/gather_NHS_WES_aff_probands_results.sh index 8576ca793cdb65aabe4193b5b433516bfd11677a..01b6f99f09ddd5b1f404081a4e9acd38d22c90bb 100755 --- a/gather_NHS_WES_aff_probands_results.sh +++ b/gather_NHS_WES_aff_probands_results.sh @@ -8,7 +8,7 @@ ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=${BASE}/${PROJECT_ID} NHS_DIR=${WORK_DIR}/${PLATE_ID}_${VERSION_N}_results diff --git a/gather_NHS_WES_quad_results.sh b/gather_NHS_WES_quad_results.sh index a89d093c4c0dda93c772c56bae411c25e154a05d..f619b11ce43b7b942b379c60ccb50ba2a5464467 100755 --- a/gather_NHS_WES_quad_results.sh +++ b/gather_NHS_WES_quad_results.sh @@ -8,7 +8,7 @@ ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=${BASE}/${PROJECT_ID} NHS_DIR=${WORK_DIR}/${PLATE_ID}_${VERSION_N}_results diff --git a/gather_NHS_WES_trio_results.sh b/gather_NHS_WES_trio_results.sh index 6ff0dd1c32385bea699e225e0a8cbc1e9c36e609..59379f6631a4c01703bb86988984dc688b64ce4d 100755 --- a/gather_NHS_WES_trio_results.sh +++ b/gather_NHS_WES_trio_results.sh @@ -8,7 +8,7 @@ ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=${BASE}/${PROJECT_ID} NHS_DIR=${WORK_DIR}/${PLATE_ID}_${VERSION_N}_results diff --git a/generate_DEC_IGV.py b/generate_DEC_IGV.py index e293d2ce7de8bfb841833958b920410747fc60c0..21b24f168da0208afedcc4c214e548eaf5b8111a 100755 --- a/generate_DEC_IGV.py +++ b/generate_DEC_IGV.py @@ -942,10 +942,10 @@ if __name__ == '__main__': if len(sys.argv) == 12: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11]) else: - print "Suggested use: time python /home/u035/project/scripts/generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/generate_DEC_IGV.py \ 2820-gatk-haplotype-annotated.2820_2820.vcf.gz \ ../output_dd/2820_log_dir/2820.report.txt \ - /scratch/u035/project/analysis/wes_pilot/VASE/08042019/output/2820_2820.strict.denovo.vcf \ + /scratch/u035/u035/shared/analysis/wes_pilot/VASE/08042019/output/2820_2820.strict.denovo.vcf \ 2820_2820 \ 2820_2820.DEC.txt \ DECIPHER_DIR \ diff --git a/generate_G2P_out_VCF.py b/generate_G2P_out_VCF.py index cfae641c0934b9d33c4b7986987cb508a24f80fa..b178d0a722a3943b0222672d26897e0554d82825 100755 --- a/generate_G2P_out_VCF.py +++ b/generate_G2P_out_VCF.py @@ -166,7 +166,7 @@ if __name__ == '__main__': if len(sys.argv) == 5: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4]) else: - print "Suggested use: time python /home/u035/project/scripts/generate_G2P_out_VCF.py 2820-gatk-haplotype-annotated.2820_2820.vcf.gz ../output_dd/2820_log_dir/2820.report.txt 2820_2820 2820_2820.G2P.vcf " + print "Suggested use: time python /home/u035/u035/shared/scripts/generate_G2P_out_VCF.py 2820-gatk-haplotype-annotated.2820_2820.vcf.gz ../output_dd/2820_log_dir/2820.report.txt 2820_2820 2820_2820.G2P.vcf " raise SystemExit diff --git a/generate_coverage_result_file.py b/generate_coverage_result_file.py index 50e8c6f03675390324ad102d5ac0440a2700503d..ba8712e46d4aaa413eb61c125504f53c9bffa0a8 100644 --- a/generate_coverage_result_file.py +++ b/generate_coverage_result_file.py @@ -114,9 +114,9 @@ if __name__ == '__main__': if len(sys.argv) == 4: go(sys.argv[1],sys.argv[2],sys.argv[3]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/generate_coverage_result_file.py \ + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/generate_coverage_result_file.py \ DDG2P.s14-NFE-Twist-NA12878.sample_interval_summary \ - /home/u035/project/resources/DDG2P.20180830.ClinVar.20190520.plus15bp.txt \ + /home/u035/u035/shared/resources/G2P/DDG2P.20180830.ClinVar.20190520.plus15bp.txt \ DDG2P.s14-NFE-Twist-NA12878.COV.txt" raise SystemExit diff --git a/old_downstream_setup.sh b/old_downstream_setup.sh index b29e00b969cbc7c6b53157a5ff49d4f4ef77de79..1c8806b16483b7aeb30702f0b5d2dbb82e8fda10 100755 --- a/old_downstream_setup.sh +++ b/old_downstream_setup.sh @@ -13,9 +13,9 @@ DATE_BATCH=${DATE}_${BATCH} echo "DATE_BATCH = ${DATE_BATCH}" -BASE=/scratch/u035/project/analysis/wes_pilot -SOURCE_DIR=/scratch/u035/project/analysis/wes_pilot/bcbio/final -PED_DIR=/scratch/u035/project/analysis/wes_pilot/params +BASE=/scratch/u035/u035/shared/analysis/wes_pilot +SOURCE_DIR=/scratch/u035/u035/shared/analysis/wes_pilot/bcbio/final +PED_DIR=/scratch/u035/u035/shared/analysis/wes_pilot/params WORK_DIR=$BASE/${PROJECT_ID} @@ -24,9 +24,9 @@ VASE_DIR=${WORK_DIR}/VASE COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 # create the working dir and the required subfolders diff --git a/old_submit_downstream.sh b/old_submit_downstream.sh index 18b4e94cbcdb224472c012c530487a32444ddc5c..9b52a3d641d96bd99519dc854a4e54d291a64423 100755 --- a/old_submit_downstream.sh +++ b/old_submit_downstream.sh @@ -14,15 +14,15 @@ echo "DATE_BATCH = ${DATE_BATCH}" # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 # where the VCF and BAM files are after the alignemnt and variant calling steps -SOURCE_DIR=/scratch/u035/project/analysis/wes_pilot/bcbio/final +SOURCE_DIR=/scratch/u035/u035/shared/analysis/wes_pilot/bcbio/final -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} G2P_DIR=${WORK_DIR}/G2P VASE_DIR=${WORK_DIR}/VASE @@ -33,28 +33,28 @@ CNV_DIR=${WORK_DIR}/CNV LOG_DIR=${WORK_DIR}/LOG VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt CHILD_IDS=${WORK_DIR}/PRO_IDs.txt -TARGETS=/home/u035/project/resources/DDG2P.20180830.plus15bp.merged.bed -CLINVAR=/home/u035/project/resources/DDG2P.20180830.clinvar.20190603.plus15bp.txt +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20180830.plus15bp.merged.bed +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20180830.clinvar.20190603.plus15bp.txt ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa @@ -158,11 +158,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 96 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.30082018.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-96.0-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.30082018.csv',af_from_vcf=1,log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.30082018.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-96.0-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.30082018.csv',af_from_vcf=1,log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" echo "" @@ -212,7 +212,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${FAMILY_ID}.strict.24chr.sort.denovo.vcf @@ -224,7 +224,7 @@ time ${GATK4} IndexFeatureFile -F ${FAMILY_ID}.strict.24chr.sort.denovo.vcf # remove variants from LCR and telo-/centro-mere regions time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${FAMILY_ID}.strict.24chr.sort.denovo.vcf -O ${FAMILY_ID}.clean.denovo.vcf \ --XL /home/u035/project/resources/LCR.bed -XL /home/u035/project/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants +-XL /home/u035/u035/shared/resources/LCR.bed -XL /home/u035/u035/shared/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants # split multi-allelic sites [by -m -any] # left-alignment and normalization [by adding the -f] diff --git a/old_submit_trio_wes_aspera_download.sh b/old_submit_trio_wes_aspera_download.sh index 2a1221a0efc34bd5a7ecae4172424f34b30f1376..776b53f6da668eec350e4d121372d8a6bb6ab78c 100755 --- a/old_submit_trio_wes_aspera_download.sh +++ b/old_submit_trio_wes_aspera_download.sh @@ -7,12 +7,12 @@ source $TRANSFER_INFO_FILE -/home/u035/project/software/aspera/connect/bin/ascp \ +/home/u035/u035/shared/software/aspera/connect/bin/ascp \ -T -P 33001 -O 33001 -l 500M -k2 --overwrite=diff \ $ASPERA_SCP_USER@transfer.genomics.ed.ac.uk:$PROJECT \ - /scratch/u035/project/trio_whole_exome/data + /scratch/u035/u035/shared/trio_whole_exome/data -cd /scratch/u035/project/trio_whole_exome/data/$PROJECT/raw_data +cd /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT/raw_data rm ../md5_check.txt 2> /dev/null for DATE in 20*[0-9] diff --git a/process_NHS_WES_aff_probands.sh b/process_NHS_WES_aff_probands.sh index 3afbfa1bedc001209aa6c6944723cf6603d6221e..7cb0c832035ed2c93f20bd628b3a1494ebfbe39a 100755 --- a/process_NHS_WES_aff_probands.sh +++ b/process_NHS_WES_aff_probands.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh, done previously by the stanard trio-based pipeline ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -25,37 +25,37 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used -TARGETS=/home/u035/project/resources/DDG2P.20210706.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON3=/home/u035/project/software/bcbio/anaconda/bin/python3 # points to python3.6 -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep -REFERENCE_GENOME=/home/u035/project/resources/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON3=/home/u035/u035/shared/software/bcbio/anaconda/bin/python3 # points to python3.6 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -165,7 +165,7 @@ mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html #VCF_KEYS='gnomADe|gnomADg' # old VEP version 97 -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -178,11 +178,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20210706.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -437,7 +437,7 @@ done ############################################################################################## ## write the IGV batch file for each affected individual in this family based on the bamouts # -## to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # +## to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # ## ${PROBAND_ID}_${FAMILY_ID} == ${aff_pro_arr[$key] # ################################################################## @@ -446,7 +446,7 @@ for key in "${!aff_pro_arr[@]}"; do echo "" echo "Generating the IGV batch file for ${aff_pro_arr[$key]}"; - snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.snapshot.txt + snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -457,7 +457,7 @@ for key in "${!aff_pro_arr[@]}"; do # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} - echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} + echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} echo "" >> ${snap_file} diff --git a/process_NHS_WES_quad.sh b/process_NHS_WES_quad.sh index 87076ef01d500e94037f0938b34baad02f5e6d7a..da99c05c40394fd319dbdb03f93568b0d994f0ca 100755 --- a/process_NHS_WES_quad.sh +++ b/process_NHS_WES_quad.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh, done previously by the stanard trio-based pipeline ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -25,37 +25,37 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used -TARGETS=/home/u035/project/resources/DDG2P.20210706.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON3=/home/u035/project/software/bcbio/anaconda/bin/python3 # points to python3.6 -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep -REFERENCE_GENOME=/home/u035/project/resources/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON3=/home/u035/u035/shared/software/bcbio/anaconda/bin/python3 # points to python3.6 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -217,7 +217,7 @@ for KID_ID in ${KID_IDS[@]}; do mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_${KID_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_${KID_ID}.report.html - VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' + VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ -i ${IN_FILE} \ @@ -229,11 +229,11 @@ for KID_ID in ${KID_IDS[@]}; do --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20210706.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" echo "" @@ -284,7 +284,7 @@ for KID_ID in ${KID_IDS[@]}; do time ${GATK4} IndexFeatureFile -I ${OUT_FILE} # select only variants on the 24 chromosomes - time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}_${KID_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants + time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}_${KID_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${PLATE_ID}_${FAMILY_ID}_${KID_ID}.strict.24chr.sort.denovo.vcf @@ -488,10 +488,10 @@ for KID_ID in ${KID_IDS[@]}; do ################################################################# # write the IGV batch file for this family based on the bamouts # - # to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${KID_ID}_${FAMILY_ID}.snapshot.txt # + # to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${KID_ID}_${FAMILY_ID}.snapshot.txt # ################################################################# - snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${KID_ID}_${FAMILY_ID}.snapshot.txt + snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${KID_ID}_${FAMILY_ID}.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -502,7 +502,7 @@ for KID_ID in ${KID_IDS[@]}; do # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} - echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}_${KID_ID}\"" >> ${snap_file} + echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}_${KID_ID}\"" >> ${snap_file} echo "" >> ${snap_file} # now, go again over the variants in the DECIPHER file and generate one snapshot file for all the variants @@ -680,7 +680,7 @@ G2P_LOG_DIR=${G2P_DIR}/${PLATE_ID}_${FAMILY_ID}_shared_LOG_DIR mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_shared.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_shared.report.html -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ -i ${IN_FILE} \ @@ -692,11 +692,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20210706.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -877,7 +877,7 @@ done ############################################################################################## ## write the IGV batch file for each affected individual in this family based on the bamouts # -## to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.shared.snapshot.txt # +## to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.shared.snapshot.txt # ## ${PROBAND_ID}_${FAMILY_ID} == ${aff_pro_arr[$key] # ################################################################## @@ -886,7 +886,7 @@ for key in "${!aff_pro_arr[@]}"; do echo "" echo "Generating the IGV batch file for ${aff_pro_arr[$key]}"; - snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.shared.snapshot.txt + snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.shared.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -898,7 +898,7 @@ for key in "${!aff_pro_arr[@]}"; do # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} - echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}_shared\"" >> ${snap_file} + echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}_shared\"" >> ${snap_file} echo "" >> ${snap_file} diff --git a/process_NHS_WES_quad_full.sh b/process_NHS_WES_quad_full.sh index 9de198d410c97f88c56809257f46d71560dfc3fa..d56865541e228c6642f2c821d299a4ed72db1e68 100755 --- a/process_NHS_WES_quad_full.sh +++ b/process_NHS_WES_quad_full.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh, done previously by the stanard trio-based pipeline ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -25,37 +25,37 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used -TARGETS=/home/u035/project/resources/DDG2P.20200601.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20200601.clinvar.20200520.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20200601.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20200601.clinvar.20200520.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON3=/home/u035/project/software/bcbio/anaconda/bin/python3 # points to python3.6 -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep -REFERENCE_GENOME=/home/u035/project/resources/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON3=/home/u035/u035/shared/software/bcbio/anaconda/bin/python3 # points to python3.6 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -166,7 +166,7 @@ mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html #VCF_KEYS='gnomADe|gnomADg' # old VEP version 97 -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -179,11 +179,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.01062020.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.01062020.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -437,7 +437,7 @@ done ############################################################################################## ## write the IGV batch file for each affected individual in this family based on the bamouts # -## to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # +## to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # ## ${PROBAND_ID}_${FAMILY_ID} == ${aff_pro_arr[$key] # ################################################################## @@ -446,7 +446,7 @@ for key in "${!aff_pro_arr[@]}"; do echo "" echo "Generating the IGV batch file for ${aff_pro_arr[$key]}"; - snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.snapshot.txt + snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -457,7 +457,7 @@ for key in "${!aff_pro_arr[@]}"; do # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} - echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} + echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} echo "" >> ${snap_file} diff --git a/process_NHS_WES_trio.sh b/process_NHS_WES_trio.sh index bf8b8c0544fb66fcfdf9915e1b3784130594cf11..b31d29bf5e8418281003aa4429a4e3130cc89a7c 100755 --- a/process_NHS_WES_trio.sh +++ b/process_NHS_WES_trio.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -24,37 +24,36 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20210706.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON3=/home/u035/project/software/bcbio/anaconda/bin/python3 # points to python3.6 -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep -#REFERENCE_GENOME=/home/u035/project/resources/hg38.fa -REFERENCE_GENOME=/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa - - - -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON3=/home/u035/u035/shared/software/bcbio/anaconda/bin/python3 # points to python3.6 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa + + + +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -158,7 +157,7 @@ mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html #VCF_KEYS='gnomADe|gnomADg' # old VEP version 97 -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ -i ${IN_FILE} \ @@ -170,11 +169,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20210706.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -248,7 +247,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -I ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${PLATE_ID}_${FAMILY_ID}.strict.24chr.sort.denovo.vcf @@ -505,12 +504,12 @@ done ################################################################# # write the IGV batch file for this family based on the bamouts # -# to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # +# to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # ################################################################# -snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt +snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -522,7 +521,7 @@ fi # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} -echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} +echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} echo "" >> ${snap_file} diff --git a/process_NHS_WES_trio_before_BAMOUT.sh b/process_NHS_WES_trio_before_BAMOUT.sh index d736a1a120e9b06c4b00fa85efd953df3833e7aa..f6f043ba5b2d1f1007e1b00f29241e967035dad1 100755 --- a/process_NHS_WES_trio_before_BAMOUT.sh +++ b/process_NHS_WES_trio_before_BAMOUT.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,36 +23,36 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -153,7 +153,7 @@ G2P_LOG_DIR=${G2P_DIR}/${PLATE_ID}_${FAMILY_ID}_LOG_DIR mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html -VCF_KEYS='gnomADe|gnomADg' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -166,11 +166,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 97 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.19092019.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-97.3-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.19092019.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.19092019.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-97.3-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.19092019.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -226,7 +226,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${PLATE_ID}_${FAMILY_ID}.strict.24chr.sort.denovo.vcf diff --git a/processing_setup.sh b/processing_setup.sh index 0356e8dc23b5c80fa3cb63db7e7bfee444707b12..509ce35956eb42f7781c1954c45d48224c855b7a 100755 --- a/processing_setup.sh +++ b/processing_setup.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -18,11 +18,11 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts ### Tools -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 diff --git a/run_processing.sh b/run_processing.sh index f6208eb1756da7bb46d3e447849908699feed610..edf2168cce15a6fb15a2a40fdc1743a01b5be1f6 100755 --- a/run_processing.sh +++ b/run_processing.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by processing_setup.sh ### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,31 +23,31 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by processing_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by processing_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/resources/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa @@ -150,7 +150,7 @@ mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${FAMILY_ID}.report.html #VCF_KEYS='gnomADe|gnomADg' # old VEP -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -163,11 +163,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.01062020.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.01062020.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" echo "" @@ -223,7 +223,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${FAMILY_ID}.strict.24chr.sort.denovo.vcf @@ -237,7 +237,7 @@ time ${GATK4} IndexFeatureFile -F ${FAMILY_ID}.strict.24chr.sort.denovo.vcf ######################################################################################################################################## #### remove variants from LCR and telo-/centro-mere regions ###time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${FAMILY_ID}.strict.24chr.sort.denovo.vcf -O ${FAMILY_ID}.clean.denovo.vcf \ -###-XL /home/u035/project/resources/LCR.bed -XL /home/u035/project/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants +###-XL /home/u035/u035/shared/resources/LCR.bed -XL /home/u035/u035/shared/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants #### split multi-allelic sites [by -m -any] #### left-alignment and normalization [by adding the -f] diff --git a/submit_depth_of_coverage_MQ20_BQ20.sh b/submit_depth_of_coverage_MQ20_BQ20.sh index 14e7e34d421f07008e95ddf93c7207075db57f65..0a1a6a64cfdbc0220bec59755a0a71784809236e 100644 --- a/submit_depth_of_coverage_MQ20_BQ20.sh +++ b/submit_depth_of_coverage_MQ20_BQ20.sh @@ -18,9 +18,9 @@ then fi fi -export PATH=$PATH:/home/u035/project/software/bcbio-1.1.3/tools/bin -BCBIO_CONFIG=/scratch/u035/project/analysis/wes_pilot/bcbio/config -BCBIO_WORK=/scratch/u035/project/analysis/wes_pilot/bcbio/work +export PATH=$PATH:/home/u035/u035/shared/software/bcbio-1.1.3/tools/bin +BCBIO_CONFIG=/scratch/u035/u035/shared/analysis/wes_pilot/bcbio/config +BCBIO_WORK=/scratch/u035/u035/shared/analysis/wes_pilot/bcbio/work # Expects environment variables to be set # BATCH - date yyyymmdd batch diff --git a/submit_downstream.sh b/submit_downstream.sh index ed9006cf54abaa4b03cc6d1758ebf88031030926..ed4ae1c6daed10cfab07d3010d47fb81c093e74a 100755 --- a/submit_downstream.sh +++ b/submit_downstream.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by downstream_setup.sh### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,28 +23,28 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by downstream_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by downstream_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190613.plus15bp.merged.bed -CLINVAR=/home/u035/project/resources/DDG2P.20190613.clinvar.20190603.plus15bp.txt +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190613.plus15bp.merged.bed +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190613.clinvar.20190603.plus15bp.txt ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa @@ -154,11 +154,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 96 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.13062019.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-96.0-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.13062019.csv',af_from_vcf=1,log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.13062019.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-96.0-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.13062019.csv',af_from_vcf=1,log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" echo "" @@ -212,7 +212,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${FAMILY_ID}.strict.24chr.sort.denovo.vcf @@ -224,7 +224,7 @@ time ${GATK4} IndexFeatureFile -F ${FAMILY_ID}.strict.24chr.sort.denovo.vcf # remove variants from LCR and telo-/centro-mere regions time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${FAMILY_ID}.strict.24chr.sort.denovo.vcf -O ${FAMILY_ID}.clean.denovo.vcf \ --XL /home/u035/project/resources/LCR.bed -XL /home/u035/project/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants +-XL /home/u035/u035/shared/resources/LCR.bed -XL /home/u035/u035/shared/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants # split multi-allelic sites [by -m -any] # left-alignment and normalization [by adding the -f] diff --git a/submit_trio_wes_aspera_download.sh b/submit_trio_wes_aspera_download.sh index 013dcb3e2f1ea2dd2dd5e7c60a902739ddf56e72..c6bb2e99e8f08a638ba8b116f12d941fbc224dcd 100755 --- a/submit_trio_wes_aspera_download.sh +++ b/submit_trio_wes_aspera_download.sh @@ -8,16 +8,16 @@ source $TRANSFER_INFO_FILE -/home/u035/project/software/aspera/connect/bin/ascp \ +/home/u035/u035/shared/software/aspera/connect/bin/ascp \ -T -P 33001 -O 33001 -l 500M -k2 --overwrite=diff \ $ASPERA_SCP_USER@transfer.genomics.ed.ac.uk:$PROJECT/raw_data \ - /scratch/u035/project/trio_whole_exome/data + /scratch/u035/u035/shared/trio_whole_exome/data -cd /scratch/u035/project/trio_whole_exome/data/ +cd /scratch/u035/u035/shared/trio_whole_exome/data/ mkdir $PROJECT mv raw_data $PROJECT/ -cd /scratch/u035/project/trio_whole_exome/data/$PROJECT/raw_data +cd /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT/raw_data rm ../md5_check.txt 2> /dev/null diff --git a/submit_trio_wes_lftp_download.sh b/submit_trio_wes_lftp_download.sh index f7dba577e6faa2bf01a99e4d10dfdb34a012caec..92e2adaf1a8871be6f10ce75bc258efa7ee02b54 100755 --- a/submit_trio_wes_lftp_download.sh +++ b/submit_trio_wes_lftp_download.sh @@ -6,10 +6,10 @@ #PBS -j oe ###source $TRANSFER_INFO_FILE -###/home/u035/project/software/aspera/connect/bin/ascp \ +###/home/u035/u035/shared/software/aspera/connect/bin/ascp \ ### -T -P 33001 -O 33001 -l 500M -k2 --overwrite=diff \ ### $ASPERA_SCP_USER@transfer.genomics.ed.ac.uk:$PROJECT/raw_data \ -### /scratch/u035/project/trio_whole_exome/data +### /scratch/u035/u035/shared/trio_whole_exome/data PROJ_CONN="login anonymous lftp@ ; mirror -vv ${TOKEN}/${PROJECT}/raw_data ." @@ -17,8 +17,8 @@ echo ${PROJ_CONN} # set up an EPCC folder for this project -mkdir /scratch/u035/project/trio_whole_exome/data/$PROJECT -cd /scratch/u035/project/trio_whole_exome/data/$PROJECT +mkdir /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT +cd /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT # download the data @@ -26,7 +26,7 @@ lftp transfer.genomics.ed.ac.uk <<<${PROJ_CONN} # go into raw_data to perform the md5_check -# cd /scratch/u035/project/trio_whole_exome/data/$PROJECT/raw_data +# cd /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT/raw_data rm md5_check.txt 2> /dev/null for DATE in 20*[0-9] diff --git a/test_process_NHS_WES_trio.sh b/test_process_NHS_WES_trio.sh index ab929cfd716fdcf248fcaf09cbbf4007d75b2278..db2c4e04be1dd64da3ee2ae1a97dea4987e71b22 100755 --- a/test_process_NHS_WES_trio.sh +++ b/test_process_NHS_WES_trio.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,36 +23,36 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/test_run_processing.sh b/test_run_processing.sh index 81bc65240d1cb1e2bc0ed62cfbafeab9926a4d10..e51fff49d2c6e1bf56f379ac2ac3d99251d61f17 100755 --- a/test_run_processing.sh +++ b/test_run_processing.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by processing_setup.sh ### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,27 +23,27 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by processing_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by processing_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190613.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190613.clinvar.20190902.plus15bp.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190613.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190613.clinvar.20190902.plus15bp.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa diff --git a/trio_whole_exome_bcbio_crf_template.yaml b/trio_whole_exome_bcbio_crf_template.yaml index 0d87d96e7ce02e1bcb19e447d83b0b1a31691985..e379642d408852e6aff9309fbc3be6ae0502a6ff 100644 --- a/trio_whole_exome_bcbio_crf_template.yaml +++ b/trio_whole_exome_bcbio_crf_template.yaml @@ -19,4 +19,4 @@ details: analysis: variant2 genome_build: hg38 upload: - dir: /scratch/u035/project/trio_whole_exome/analysis/output + dir: /scratch/u035/u035/shared/trio_whole_exome/analysis/output diff --git a/trio_whole_exome_bcbio_template.yaml b/trio_whole_exome_bcbio_template.yaml index e960be92364ba6eed2dea18cd11361f25b485d69..f6ebbb44f3e55484bf6eab03decdbf5ecb9263f6 100644 --- a/trio_whole_exome_bcbio_template.yaml +++ b/trio_whole_exome_bcbio_template.yaml @@ -16,4 +16,4 @@ details: analysis: variant2 genome_build: hg38 upload: - dir: /scratch/u035/project/trio_whole_exome/analysis/output + dir: /scratch/u035/u035/shared/trio_whole_exome/analysis/output diff --git a/trio_whole_exome_config.sh b/trio_whole_exome_config.sh index e4da287a7b950a0e5bd3636f9fe3ac1488c2cfb5..5c291d3d156d343b7595ed8415980aa939d63033 100644 --- a/trio_whole_exome_config.sh +++ b/trio_whole_exome_config.sh @@ -3,13 +3,13 @@ # Basic configuration options for trio WES pipeline # -SCRIPTS=/home/u035/project/scripts +SCRIPTS=/home/u035/u035/shared/scripts BCBIO_TEMPLATE=$SCRIPTS/trio_whole_exome_bcbio_template.yaml -TARGET=/home/u035/project/resources/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed -DOWNLOAD_DIR=/scratch/u035/project/trio_whole_exome/data -REFERENCE_GENOME=/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa +TARGET=/home/u035/u035/shared/resources/exome_targets/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed +DOWNLOAD_DIR=/scratch/u035/u035/shared/data +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/analysis PARAMS_DIR=$BASE/params READS_DIR=$BASE/reads CONFIG_DIR=$BASE/config @@ -18,4 +18,4 @@ OUTPUT_DIR=$BASE/output ARCHIVE_DIR=/archive/u035/trio_whole_exome -export PATH=/home/u035/project/software/bcbio/tools/bin:$PATH +export PATH=/home/u035/u035/shared/software/bcbio/tools/bin:$PATH diff --git a/trio_whole_exome_crf_config.sh b/trio_whole_exome_crf_config.sh index ce1495088587377cc4007789fad7824eac281f37..3b7a89fdf2e885ce50a550b089645f45c9bcd194 100644 --- a/trio_whole_exome_crf_config.sh +++ b/trio_whole_exome_crf_config.sh @@ -3,13 +3,13 @@ # Basic configuration options for trio WES pipeline # -SCRIPTS=/home/u035/project/scripts +SCRIPTS=/home/u035/u035/shared/scripts BCBIO_TEMPLATE=$SCRIPTS/trio_whole_exome_bcbio_crf_template.yaml -TARGET=/home/u035/project/resources/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed -DOWNLOAD_DIR=/scratch/u035/project/trio_whole_exome/data -REFERENCE_GENOME=/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa +TARGET=/home/u035/u035/shared/resources/exome_targets/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed +DOWNLOAD_DIR=/scratch/u035/u035/shared/data +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/analysis PARAMS_DIR=$BASE/params READS_DIR=$BASE/reads CONFIG_DIR=$BASE/config @@ -18,4 +18,4 @@ OUTPUT_DIR=$BASE/output ARCHIVE_DIR=/archive/u035/trio_whole_exome -export PATH=/home/u035/project/software/bcbio/tools/bin:$PATH +export PATH=/home/u035/u035/shared/software/bcbio/tools/bin:$PATH diff --git a/vcf_config.json.backup b/vcf_config.json.backup index fc5941cb39692ca6ac46a3703cb0d4df1f8f50d6..59464bf2fe878e1df7d9f0d50bab569d787f737f 100644 --- a/vcf_config.json.backup +++ b/vcf_config.json.backup @@ -140,7 +140,7 @@ "species": "homo_sapiens", "assembly": "GRCh38", "type": "local", - "filename_template": "/home/u035/project/resources/gnomad/r3.0/genomes/gnomad.genomes.r3.0.sites.chr###CHR###_trimmed_info.vcf.bgz", + "filename_template": "/home/u035/u035/shared/resources/gnomad/r3.0/genomes/gnomad.genomes.r3.0.sites.chr###CHR###_trimmed_info.vcf.bgz", "chromosomes": [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y" @@ -201,7 +201,7 @@ "species": "homo_sapiens", "assembly": "GRCh38", "type": "local", - "filename_template": "/home/u035/project/resources/gnomad/r2.1/exomes/gnomad.exomes.r2.1.sites.grch38.chr###CHR###_noVEP.vcf.gz", + "filename_template": "/home/u035/u035/shared/resources/gnomad/r2.1/exomes/gnomad.exomes.r2.1.sites.grch38.chr###CHR###_noVEP.vcf.gz", "chromosomes": [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y"