diff --git a/NHS_WES_check_PED_aff_probands.py b/NHS_WES_check_PED_aff_probands.py index 2ca0e8a33ed3507d234465c02a4b859761c97d5e..a3c84d3351dd038ab29500c240a64d856e09856b 100755 --- a/NHS_WES_check_PED_aff_probands.py +++ b/NHS_WES_check_PED_aff_probands.py @@ -59,6 +59,6 @@ if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_check_PED_aff_probands.py a_ped_file" + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_check_PED_aff_probands.py a_ped_file" raise SystemExit diff --git a/NHS_WES_check_PED_quad.py b/NHS_WES_check_PED_quad.py index 1603c4678d7c63b09144ac83b4007a5dd351f16f..998f4d01cf778abb2e6caf3e99f5bb89f529e40c 100755 --- a/NHS_WES_check_PED_quad.py +++ b/NHS_WES_check_PED_quad.py @@ -69,6 +69,6 @@ if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_check_PED_quad.py a_ped_file" + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_check_PED_quad.py a_ped_file" raise SystemExit diff --git a/NHS_WES_extract_shared_vars.py b/NHS_WES_extract_shared_vars.py index 83e94aa20a398592d7fc9aca1cf5a04c4bd1e409..f75c695949303e45ab3329e3522a1cb4c39d0a07 100755 --- a/NHS_WES_extract_shared_vars.py +++ b/NHS_WES_extract_shared_vars.py @@ -126,6 +126,6 @@ if __name__ == '__main__': if len(sys.argv) == 4: go(sys.argv[1],sys.argv[2],sys.argv[3]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_extract_shared_vars.py in_vcf comma,sep,list,of,ids out_vcf" + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_extract_shared_vars.py in_vcf comma,sep,list,of,ids out_vcf" raise SystemExit diff --git a/NHS_WES_extract_trio_FAM_PRO_ID.py b/NHS_WES_extract_trio_FAM_PRO_ID.py index e936cc71a751440d70d9d620b2e20a2569a8ef08..c215a0119840b8839964a4fa3d770f81a0f20c0b 100755 --- a/NHS_WES_extract_trio_FAM_PRO_ID.py +++ b/NHS_WES_extract_trio_FAM_PRO_ID.py @@ -115,6 +115,6 @@ if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_extract_trio_FAM_PRO_ID.py /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}" + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_extract_trio_FAM_PRO_ID.py /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV.py b/NHS_WES_generate_DEC_IGV.py index 0fdb863498ba8bbe4898a4113894bfc6e8862d42..95d204eacd86a269e79c37510f2756f3ce76c4c9 100755 --- a/NHS_WES_generate_DEC_IGV.py +++ b/NHS_WES_generate_DEC_IGV.py @@ -1395,7 +1395,7 @@ if __name__ == '__main__': if len(sys.argv) == 12: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \ dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV.py.v1 b/NHS_WES_generate_DEC_IGV.py.v1 index 00a1e4d9664c72d1f81d9fbe5a9b3971aaf51cab..87c4c1d5451972193b849989271f464e9d7d2180 100755 --- a/NHS_WES_generate_DEC_IGV.py.v1 +++ b/NHS_WES_generate_DEC_IGV.py.v1 @@ -920,7 +920,7 @@ if __name__ == '__main__': if len(sys.argv) == 12: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \ dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans b/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans index 3d08582cd939755fe442ca128c39fd68a3a5f754..7cbf1acc5bfa8a95a5e7fd5b4d211639e4a40471 100755 --- a/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans +++ b/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans @@ -1366,7 +1366,7 @@ if __name__ == '__main__': if len(sys.argv) == 12: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \ dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV_aff_probands.py b/NHS_WES_generate_DEC_IGV_aff_probands.py index 25a965b83bba82bb543a96375eacdeccc40c9e01..7014b2d3a6e8c2c16ec805af9ff681e93ae62ea8 100755 --- a/NHS_WES_generate_DEC_IGV_aff_probands.py +++ b/NHS_WES_generate_DEC_IGV_aff_probands.py @@ -535,7 +535,7 @@ if __name__ == '__main__': if len(sys.argv) == 11: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV_aff_probands.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV_aff_probands.py \ dec_id,trans_map_file,ped_file,in_g2p_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV_sib_from_quad.py b/NHS_WES_generate_DEC_IGV_sib_from_quad.py index eb978f20ef453440264bdbcaca17263b83914aa8..2946a2d02219934a3afa437c4541de23eae0bad9 100755 --- a/NHS_WES_generate_DEC_IGV_sib_from_quad.py +++ b/NHS_WES_generate_DEC_IGV_sib_from_quad.py @@ -541,7 +541,7 @@ if __name__ == '__main__': if len(sys.argv) == 11: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV_aff_probands.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV_aff_probands.py \ dec_id,trans_map_file,ped_file,in_g2p_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir" raise SystemExit diff --git a/NHS_WES_generate_DEC_IGV_trio_from_quad.py b/NHS_WES_generate_DEC_IGV_trio_from_quad.py index 6b9e2d1a01d956f60a204903c74474a2de8180e9..33462d2a8ffa235d1f116d292ceeb96c81a9bb7f 100755 --- a/NHS_WES_generate_DEC_IGV_trio_from_quad.py +++ b/NHS_WES_generate_DEC_IGV_trio_from_quad.py @@ -1397,7 +1397,7 @@ if __name__ == '__main__': if len(sys.argv) == 13: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11],sys.argv[12]) else: - print "Suggested use: time python /home/u035/project/scripts/NHS_WES_generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/NHS_WES_generate_DEC_IGV.py \ dec_map_file,trans_map_file,ped_file,in_g2p_file,in_vase_file,fam_igv_dir,vcf_dir,plate_id,fam_id,dec_dir,fam_bam_dir,indi_id_for_this_kid" raise SystemExit diff --git a/NHS_WES_generate_aff_sib_ped.py b/NHS_WES_generate_aff_sib_ped.py index 17b5b309e1488019b6ca5ba50beba22f8d48c1ed..4158999d7d81da69e662ca53c2ac68abcce30875 100644 --- a/NHS_WES_generate_aff_sib_ped.py +++ b/NHS_WES_generate_aff_sib_ped.py @@ -43,6 +43,6 @@ if __name__ == '__main__': if len(sys.argv) == 5: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/NHS_WES_generate_aff_sib_ped.py ${PED_DIR} ${quad_ped_file} ${KID_1_ID} ${KID_2_ID}" + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/NHS_WES_generate_aff_sib_ped.py ${PED_DIR} ${quad_ped_file} ${KID_1_ID} ${KID_2_ID}" raise SystemExit diff --git a/NHS_WES_generate_coverage_result_file.py b/NHS_WES_generate_coverage_result_file.py index 8c20a01710f6e4d7a79ee3a1117ae9a342601e94..6bd2d6a38181f8c011684f9f0b5aa574cdd8c40c 100644 --- a/NHS_WES_generate_coverage_result_file.py +++ b/NHS_WES_generate_coverage_result_file.py @@ -114,9 +114,9 @@ if __name__ == '__main__': if len(sys.argv) == 4: go(sys.argv[1],sys.argv[2],sys.argv[3]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/generate_coverage_result_file.py \ + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/generate_coverage_result_file.py \ DDG2P.s14-NFE-Twist-NA12878.sample_interval_summary \ - /home/u035/project/resources/DDG2P.20180830.ClinVar.20190520.plus15bp.txt \ + /home/u035/u035/shared/resources/G2P/DDG2P.20180830.ClinVar.20190520.plus15bp.txt \ DDG2P.s14-NFE-Twist-NA12878.COV.txt" raise SystemExit diff --git a/NHS_WES_generate_trio_VCF.py b/NHS_WES_generate_trio_VCF.py index 7d5425d4506610c6a07d9aa65146b4994c1071b7..d856ded0e3c0a32cea5aa533b2fabed68cdc5a81 100644 --- a/NHS_WES_generate_trio_VCF.py +++ b/NHS_WES_generate_trio_VCF.py @@ -42,6 +42,6 @@ if __name__ == '__main__': if len(sys.argv) == 6: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/NHS_WES_generate_trio_ped.py ${PED_DIR} ${quad_ped_file} ${KID_ID} ${PAR_1_ID} ${PAR_2_ID}" + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/NHS_WES_generate_trio_ped.py ${PED_DIR} ${quad_ped_file} ${KID_ID} ${PAR_1_ID} ${PAR_2_ID}" raise SystemExit diff --git a/NHS_WES_generate_trio_ped.py b/NHS_WES_generate_trio_ped.py index 9ea04f2c3a1ee0b2d66e220075e2dba30e32e10c..bd7d0bf6328978802fce15a8c52e221c250fcdf3 100644 --- a/NHS_WES_generate_trio_ped.py +++ b/NHS_WES_generate_trio_ped.py @@ -42,6 +42,6 @@ if __name__ == '__main__': if len(sys.argv) == 6: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/NHS_WES_generate_trio_ped.py ${PED_DIR} ${quad_ped_file} ${KID_ID} ${PAR_1_ID} ${PAR_2_ID}" + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/NHS_WES_generate_trio_ped.py ${PED_DIR} ${quad_ped_file} ${KID_ID} ${PAR_1_ID} ${PAR_2_ID}" raise SystemExit diff --git a/NHS_WES_trio_cram_setup.sh b/NHS_WES_trio_cram_setup.sh index c8f5f4544067304a4da542b1b29d9d8b4fe94dba..a8cc30d0cb9d05feefa4b16370a9ab0d5d2cfd7c 100755 --- a/NHS_WES_trio_cram_setup.sh +++ b/NHS_WES_trio_cram_setup.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -19,14 +19,14 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts ### Tools -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -SAMTOOLS=/home/u035/project/software/bcbio/anaconda/bin/samtools -PICARD=/home/u035/project/software/bcbio/anaconda/bin/picard -REFERENCE_GENOME=/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools +PICARD=/home/u035/u035/shared/software/bcbio/anaconda/bin/picard +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa # check if ${WORK_DIR} already exists - if so, exit - to prevent accidental overwriting @@ -38,7 +38,7 @@ fi -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/project/trio_whole_exome/analysis/output +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/NHS_WES_trio_delete_BAM.sh b/NHS_WES_trio_delete_BAM.sh index 5bd4af7deec17ecb4b5efad8739635d4cef46b23..5fb4a5f788d26cacd47b353a43a3e1e2e61075ad 100755 --- a/NHS_WES_trio_delete_BAM.sh +++ b/NHS_WES_trio_delete_BAM.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -19,10 +19,10 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/project/trio_whole_exome/analysis/output/${VERSION_N}_${PLATE_ID} +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/${VERSION_N}_${PLATE_ID} echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/NHS_WES_trio_setup.sh b/NHS_WES_trio_setup.sh index 891a20280db6b6acdd7b7778da63eb03ed9c9894..f82d34a4f1d9b8c35b16c1f61ac05b372de9cd73 100755 --- a/NHS_WES_trio_setup.sh +++ b/NHS_WES_trio_setup.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -19,11 +19,11 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts ### Tools -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 @@ -36,7 +36,7 @@ fi -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/project/trio_whole_exome/analysis/output +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source VCF, BAM and PED files i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/bcbio_gnomad_install.sh b/bcbio_gnomad_install.sh index 13563b6a751c400113a83ae4021d2d8a27cf07db..705f1a5811689d4fe7360ab2eade85237e709679 100755 --- a/bcbio_gnomad_install.sh +++ b/bcbio_gnomad_install.sh @@ -5,9 +5,9 @@ #PBS -N bcbio_gnomad_install #PBS -j oe -cd /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/txtmp +cd /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/txtmp -PATH=$PATH:/home/u035/project/software/bcbio/anaconda/bin +PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/bin ref=../seq/hg38.fa fields_to_keep="INFO/"$(cat gnomad_fields_to_keep.txt | paste -s | sed s/"\t"/",INFO\/"/g) diff --git a/decipher_NHS_WES_trio.sh b/decipher_NHS_WES_trio.sh index a185044e8bf51c4bb51a9f4df87fbe3c4f5d2dc8..ee7210c279d852e276ec400b299d308914c87fac 100755 --- a/decipher_NHS_WES_trio.sh +++ b/decipher_NHS_WES_trio.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -#BASE=/scratch/u035/project/analysis/wes_pilot -BASE=/scratch/u035/project/trio_whole_exome/analysis +#BASE=/scratch/u035/u035/shared/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -24,36 +24,36 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/download_gnomADg.sh b/download_gnomADg.sh deleted file mode 100755 index a5a54e9e84cc83af11504e8017ae91758eccd575..0000000000000000000000000000000000000000 --- a/download_gnomADg.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -#PBS -l walltime=48:00:00 -#PBS -l ncpus=1,mem=2gb -#PBS -q uv2000 -#PBS -N download_gnomADg -#PBS -j oe - - -DATA_DIR=/home/u035/project/resources/gnomad/r2.1/genomes - -cd ${DATA_DIR} - -wget ftp://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_genotype/gnomad/r2.1/genomes/*.gz -wget ftp://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_genotype/gnomad/r2.1/genomes/*.tbi - diff --git a/downstream_setup.sh b/downstream_setup.sh index d610ebb41b8b75452e7dc3d0dd0dfe1e78059970..fa25954fbd3b834e9a896384740229e6a6599855 100755 --- a/downstream_setup.sh +++ b/downstream_setup.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -18,10 +18,10 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts ### Tools -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 diff --git a/extract_BED_CCDS_DDG2P.py b/extract_BED_CCDS_DDG2P.py index e12ab94c2364d67560fec908647756a8eb9773b8..33e1e11be76a4bced5af681a39f3788788d1fa05 100644 --- a/extract_BED_CCDS_DDG2P.py +++ b/extract_BED_CCDS_DDG2P.py @@ -1,6 +1,6 @@ # given -# the BED file for all genes (/home/u035/project/resources/CCDS.20180614.plus15bp.merged.bed) -# and the file for the genes in the DDG2P (unique gene names, inluding synonyms, i.e., /home/u035/project/resources/genes_in_DDG2P.30082018.txt) +# the BED file for all genes (/home/u035/u035/shared/resources/exome_targets/CCDS.20180614.plus15bp.merged.bed) +# and the file for the genes in the DDG2P (unique gene names, inluding synonyms, i.e., /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.30082018.txt) # extract # the BED file for the DDG2P genes # diff --git a/extract_trio_FAM_PRO_ID.py b/extract_trio_FAM_PRO_ID.py index e58c60acd0e2cb1661e4a8359c16e68e3f957ead..e66d8f4cef2867cc6ffabc94881ea97abcc58d60 100755 --- a/extract_trio_FAM_PRO_ID.py +++ b/extract_trio_FAM_PRO_ID.py @@ -111,6 +111,6 @@ if __name__ == '__main__': if len(sys.argv) == 2: go(sys.argv[1]) else: - print "Suggested use: time python /home/u035/project/scripts/extract_trio_FAM_PRO_ID.py /scratch/u035/project/analysis/wes_pilot/03062019" + print "Suggested use: time python /home/u035/u035/shared/scripts/extract_trio_FAM_PRO_ID.py /scratch/u035/u035/shared/analysis/wes_pilot/03062019" raise SystemExit diff --git a/full_process_NHS_WES_trio.sh b/full_process_NHS_WES_trio.sh index 38fd7b924be43d72674de8964973f0a8c0399a10..f2bc21eb8e893cf15c916f0575a0cfddf17d302a 100755 --- a/full_process_NHS_WES_trio.sh +++ b/full_process_NHS_WES_trio.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -#BASE=/scratch/u035/project/analysis/wes_pilot -BASE=/scratch/u035/project/trio_whole_exome/analysis +#BASE=/scratch/u035/u035/shared/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -24,36 +24,36 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -153,7 +153,7 @@ G2P_LOG_DIR=${G2P_DIR}/${FAMILY_ID}_LOG_DIR mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${FAMILY_ID}.report.html -VCF_KEYS='gnomADe|gnomADg' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -166,11 +166,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 97 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.19092019.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-97.3-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.19092019.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF', + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.19092019.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-97.3-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.19092019.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF', af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -225,7 +225,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${FAMILY_ID}.strict.24chr.sort.denovo.vcf diff --git a/gather_NHS_WES_aff_probands_results.sh b/gather_NHS_WES_aff_probands_results.sh index 8576ca793cdb65aabe4193b5b433516bfd11677a..01b6f99f09ddd5b1f404081a4e9acd38d22c90bb 100755 --- a/gather_NHS_WES_aff_probands_results.sh +++ b/gather_NHS_WES_aff_probands_results.sh @@ -8,7 +8,7 @@ ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=${BASE}/${PROJECT_ID} NHS_DIR=${WORK_DIR}/${PLATE_ID}_${VERSION_N}_results diff --git a/gather_NHS_WES_quad_results.sh b/gather_NHS_WES_quad_results.sh index a89d093c4c0dda93c772c56bae411c25e154a05d..f619b11ce43b7b942b379c60ccb50ba2a5464467 100755 --- a/gather_NHS_WES_quad_results.sh +++ b/gather_NHS_WES_quad_results.sh @@ -8,7 +8,7 @@ ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=${BASE}/${PROJECT_ID} NHS_DIR=${WORK_DIR}/${PLATE_ID}_${VERSION_N}_results diff --git a/gather_NHS_WES_trio_results.sh b/gather_NHS_WES_trio_results.sh index 6ff0dd1c32385bea699e225e0a8cbc1e9c36e609..59379f6631a4c01703bb86988984dc688b64ce4d 100755 --- a/gather_NHS_WES_trio_results.sh +++ b/gather_NHS_WES_trio_results.sh @@ -8,7 +8,7 @@ ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=${BASE}/${PROJECT_ID} NHS_DIR=${WORK_DIR}/${PLATE_ID}_${VERSION_N}_results diff --git a/generate_DEC_IGV.py b/generate_DEC_IGV.py index e293d2ce7de8bfb841833958b920410747fc60c0..21b24f168da0208afedcc4c214e548eaf5b8111a 100755 --- a/generate_DEC_IGV.py +++ b/generate_DEC_IGV.py @@ -942,10 +942,10 @@ if __name__ == '__main__': if len(sys.argv) == 12: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],sys.argv[8],sys.argv[9],sys.argv[10],sys.argv[11]) else: - print "Suggested use: time python /home/u035/project/scripts/generate_DEC_IGV.py \ + print "Suggested use: time python /home/u035/u035/shared/scripts/generate_DEC_IGV.py \ 2820-gatk-haplotype-annotated.2820_2820.vcf.gz \ ../output_dd/2820_log_dir/2820.report.txt \ - /scratch/u035/project/analysis/wes_pilot/VASE/08042019/output/2820_2820.strict.denovo.vcf \ + /scratch/u035/u035/shared/analysis/wes_pilot/VASE/08042019/output/2820_2820.strict.denovo.vcf \ 2820_2820 \ 2820_2820.DEC.txt \ DECIPHER_DIR \ diff --git a/generate_G2P_out_VCF.py b/generate_G2P_out_VCF.py index cfae641c0934b9d33c4b7986987cb508a24f80fa..b178d0a722a3943b0222672d26897e0554d82825 100755 --- a/generate_G2P_out_VCF.py +++ b/generate_G2P_out_VCF.py @@ -166,7 +166,7 @@ if __name__ == '__main__': if len(sys.argv) == 5: go(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4]) else: - print "Suggested use: time python /home/u035/project/scripts/generate_G2P_out_VCF.py 2820-gatk-haplotype-annotated.2820_2820.vcf.gz ../output_dd/2820_log_dir/2820.report.txt 2820_2820 2820_2820.G2P.vcf " + print "Suggested use: time python /home/u035/u035/shared/scripts/generate_G2P_out_VCF.py 2820-gatk-haplotype-annotated.2820_2820.vcf.gz ../output_dd/2820_log_dir/2820.report.txt 2820_2820 2820_2820.G2P.vcf " raise SystemExit diff --git a/generate_coverage_result_file.py b/generate_coverage_result_file.py index 50e8c6f03675390324ad102d5ac0440a2700503d..ba8712e46d4aaa413eb61c125504f53c9bffa0a8 100644 --- a/generate_coverage_result_file.py +++ b/generate_coverage_result_file.py @@ -114,9 +114,9 @@ if __name__ == '__main__': if len(sys.argv) == 4: go(sys.argv[1],sys.argv[2],sys.argv[3]) else: - print "Suggested use: time $PYTHON /home/u035/project/scripts/generate_coverage_result_file.py \ + print "Suggested use: time $PYTHON /home/u035/u035/shared/scripts/generate_coverage_result_file.py \ DDG2P.s14-NFE-Twist-NA12878.sample_interval_summary \ - /home/u035/project/resources/DDG2P.20180830.ClinVar.20190520.plus15bp.txt \ + /home/u035/u035/shared/resources/G2P/DDG2P.20180830.ClinVar.20190520.plus15bp.txt \ DDG2P.s14-NFE-Twist-NA12878.COV.txt" raise SystemExit diff --git a/old_downstream_setup.sh b/old_downstream_setup.sh index b29e00b969cbc7c6b53157a5ff49d4f4ef77de79..1c8806b16483b7aeb30702f0b5d2dbb82e8fda10 100755 --- a/old_downstream_setup.sh +++ b/old_downstream_setup.sh @@ -13,9 +13,9 @@ DATE_BATCH=${DATE}_${BATCH} echo "DATE_BATCH = ${DATE_BATCH}" -BASE=/scratch/u035/project/analysis/wes_pilot -SOURCE_DIR=/scratch/u035/project/analysis/wes_pilot/bcbio/final -PED_DIR=/scratch/u035/project/analysis/wes_pilot/params +BASE=/scratch/u035/u035/shared/analysis/wes_pilot +SOURCE_DIR=/scratch/u035/u035/shared/analysis/wes_pilot/bcbio/final +PED_DIR=/scratch/u035/u035/shared/analysis/wes_pilot/params WORK_DIR=$BASE/${PROJECT_ID} @@ -24,9 +24,9 @@ VASE_DIR=${WORK_DIR}/VASE COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 # create the working dir and the required subfolders diff --git a/old_submit_downstream.sh b/old_submit_downstream.sh index 18b4e94cbcdb224472c012c530487a32444ddc5c..9b52a3d641d96bd99519dc854a4e54d291a64423 100755 --- a/old_submit_downstream.sh +++ b/old_submit_downstream.sh @@ -14,15 +14,15 @@ echo "DATE_BATCH = ${DATE_BATCH}" # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 # where the VCF and BAM files are after the alignemnt and variant calling steps -SOURCE_DIR=/scratch/u035/project/analysis/wes_pilot/bcbio/final +SOURCE_DIR=/scratch/u035/u035/shared/analysis/wes_pilot/bcbio/final -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} G2P_DIR=${WORK_DIR}/G2P VASE_DIR=${WORK_DIR}/VASE @@ -33,28 +33,28 @@ CNV_DIR=${WORK_DIR}/CNV LOG_DIR=${WORK_DIR}/LOG VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt CHILD_IDS=${WORK_DIR}/PRO_IDs.txt -TARGETS=/home/u035/project/resources/DDG2P.20180830.plus15bp.merged.bed -CLINVAR=/home/u035/project/resources/DDG2P.20180830.clinvar.20190603.plus15bp.txt +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20180830.plus15bp.merged.bed +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20180830.clinvar.20190603.plus15bp.txt ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa @@ -158,11 +158,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 96 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.30082018.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-96.0-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.30082018.csv',af_from_vcf=1,log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.30082018.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-96.0-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.30082018.csv',af_from_vcf=1,log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" echo "" @@ -212,7 +212,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${FAMILY_ID}.strict.24chr.sort.denovo.vcf @@ -224,7 +224,7 @@ time ${GATK4} IndexFeatureFile -F ${FAMILY_ID}.strict.24chr.sort.denovo.vcf # remove variants from LCR and telo-/centro-mere regions time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${FAMILY_ID}.strict.24chr.sort.denovo.vcf -O ${FAMILY_ID}.clean.denovo.vcf \ --XL /home/u035/project/resources/LCR.bed -XL /home/u035/project/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants +-XL /home/u035/u035/shared/resources/LCR.bed -XL /home/u035/u035/shared/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants # split multi-allelic sites [by -m -any] # left-alignment and normalization [by adding the -f] diff --git a/old_submit_trio_wes_aspera_download.sh b/old_submit_trio_wes_aspera_download.sh index 2a1221a0efc34bd5a7ecae4172424f34b30f1376..776b53f6da668eec350e4d121372d8a6bb6ab78c 100755 --- a/old_submit_trio_wes_aspera_download.sh +++ b/old_submit_trio_wes_aspera_download.sh @@ -7,12 +7,12 @@ source $TRANSFER_INFO_FILE -/home/u035/project/software/aspera/connect/bin/ascp \ +/home/u035/u035/shared/software/aspera/connect/bin/ascp \ -T -P 33001 -O 33001 -l 500M -k2 --overwrite=diff \ $ASPERA_SCP_USER@transfer.genomics.ed.ac.uk:$PROJECT \ - /scratch/u035/project/trio_whole_exome/data + /scratch/u035/u035/shared/trio_whole_exome/data -cd /scratch/u035/project/trio_whole_exome/data/$PROJECT/raw_data +cd /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT/raw_data rm ../md5_check.txt 2> /dev/null for DATE in 20*[0-9] diff --git a/process_NHS_WES_aff_probands.sh b/process_NHS_WES_aff_probands.sh index 3afbfa1bedc001209aa6c6944723cf6603d6221e..7cb0c832035ed2c93f20bd628b3a1494ebfbe39a 100755 --- a/process_NHS_WES_aff_probands.sh +++ b/process_NHS_WES_aff_probands.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh, done previously by the stanard trio-based pipeline ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -25,37 +25,37 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used -TARGETS=/home/u035/project/resources/DDG2P.20210706.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON3=/home/u035/project/software/bcbio/anaconda/bin/python3 # points to python3.6 -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep -REFERENCE_GENOME=/home/u035/project/resources/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON3=/home/u035/u035/shared/software/bcbio/anaconda/bin/python3 # points to python3.6 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -165,7 +165,7 @@ mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html #VCF_KEYS='gnomADe|gnomADg' # old VEP version 97 -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -178,11 +178,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20210706.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -437,7 +437,7 @@ done ############################################################################################## ## write the IGV batch file for each affected individual in this family based on the bamouts # -## to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # +## to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # ## ${PROBAND_ID}_${FAMILY_ID} == ${aff_pro_arr[$key] # ################################################################## @@ -446,7 +446,7 @@ for key in "${!aff_pro_arr[@]}"; do echo "" echo "Generating the IGV batch file for ${aff_pro_arr[$key]}"; - snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.snapshot.txt + snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -457,7 +457,7 @@ for key in "${!aff_pro_arr[@]}"; do # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} - echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} + echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} echo "" >> ${snap_file} diff --git a/process_NHS_WES_quad.sh b/process_NHS_WES_quad.sh index 87076ef01d500e94037f0938b34baad02f5e6d7a..da99c05c40394fd319dbdb03f93568b0d994f0ca 100755 --- a/process_NHS_WES_quad.sh +++ b/process_NHS_WES_quad.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh, done previously by the stanard trio-based pipeline ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -25,37 +25,37 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used -TARGETS=/home/u035/project/resources/DDG2P.20210706.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON3=/home/u035/project/software/bcbio/anaconda/bin/python3 # points to python3.6 -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep -REFERENCE_GENOME=/home/u035/project/resources/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON3=/home/u035/u035/shared/software/bcbio/anaconda/bin/python3 # points to python3.6 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -217,7 +217,7 @@ for KID_ID in ${KID_IDS[@]}; do mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_${KID_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_${KID_ID}.report.html - VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' + VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ -i ${IN_FILE} \ @@ -229,11 +229,11 @@ for KID_ID in ${KID_IDS[@]}; do --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20210706.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" echo "" @@ -284,7 +284,7 @@ for KID_ID in ${KID_IDS[@]}; do time ${GATK4} IndexFeatureFile -I ${OUT_FILE} # select only variants on the 24 chromosomes - time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}_${KID_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants + time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}_${KID_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${PLATE_ID}_${FAMILY_ID}_${KID_ID}.strict.24chr.sort.denovo.vcf @@ -488,10 +488,10 @@ for KID_ID in ${KID_IDS[@]}; do ################################################################# # write the IGV batch file for this family based on the bamouts # - # to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${KID_ID}_${FAMILY_ID}.snapshot.txt # + # to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${KID_ID}_${FAMILY_ID}.snapshot.txt # ################################################################# - snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${KID_ID}_${FAMILY_ID}.snapshot.txt + snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${KID_ID}_${FAMILY_ID}.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -502,7 +502,7 @@ for KID_ID in ${KID_IDS[@]}; do # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} - echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}_${KID_ID}\"" >> ${snap_file} + echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}_${KID_ID}\"" >> ${snap_file} echo "" >> ${snap_file} # now, go again over the variants in the DECIPHER file and generate one snapshot file for all the variants @@ -680,7 +680,7 @@ G2P_LOG_DIR=${G2P_DIR}/${PLATE_ID}_${FAMILY_ID}_shared_LOG_DIR mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_shared.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}_shared.report.html -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ -i ${IN_FILE} \ @@ -692,11 +692,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20210706.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -877,7 +877,7 @@ done ############################################################################################## ## write the IGV batch file for each affected individual in this family based on the bamouts # -## to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.shared.snapshot.txt # +## to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.shared.snapshot.txt # ## ${PROBAND_ID}_${FAMILY_ID} == ${aff_pro_arr[$key] # ################################################################## @@ -886,7 +886,7 @@ for key in "${!aff_pro_arr[@]}"; do echo "" echo "Generating the IGV batch file for ${aff_pro_arr[$key]}"; - snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.shared.snapshot.txt + snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.shared.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -898,7 +898,7 @@ for key in "${!aff_pro_arr[@]}"; do # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} - echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}_shared\"" >> ${snap_file} + echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}_shared\"" >> ${snap_file} echo "" >> ${snap_file} diff --git a/process_NHS_WES_quad_full.sh b/process_NHS_WES_quad_full.sh index 9de198d410c97f88c56809257f46d71560dfc3fa..d56865541e228c6642f2c821d299a4ed72db1e68 100755 --- a/process_NHS_WES_quad_full.sh +++ b/process_NHS_WES_quad_full.sh @@ -7,13 +7,13 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh, done previously by the stanard trio-based pipeline ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -25,37 +25,37 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used -TARGETS=/home/u035/project/resources/DDG2P.20200601.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20200601.clinvar.20200520.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20200601.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20200601.clinvar.20200520.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON3=/home/u035/project/software/bcbio/anaconda/bin/python3 # points to python3.6 -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep -REFERENCE_GENOME=/home/u035/project/resources/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON3=/home/u035/u035/shared/software/bcbio/anaconda/bin/python3 # points to python3.6 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -166,7 +166,7 @@ mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html #VCF_KEYS='gnomADe|gnomADg' # old VEP version 97 -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -179,11 +179,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.01062020.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.01062020.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -437,7 +437,7 @@ done ############################################################################################## ## write the IGV batch file for each affected individual in this family based on the bamouts # -## to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # +## to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # ## ${PROBAND_ID}_${FAMILY_ID} == ${aff_pro_arr[$key] # ################################################################## @@ -446,7 +446,7 @@ for key in "${!aff_pro_arr[@]}"; do echo "" echo "Generating the IGV batch file for ${aff_pro_arr[$key]}"; - snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.snapshot.txt + snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${aff_pro_arr[$key]}.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -457,7 +457,7 @@ for key in "${!aff_pro_arr[@]}"; do # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} - echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} + echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} echo "" >> ${snap_file} diff --git a/process_NHS_WES_trio.sh b/process_NHS_WES_trio.sh index bf8b8c0544fb66fcfdf9915e1b3784130594cf11..b31d29bf5e8418281003aa4429a4e3130cc89a7c 100755 --- a/process_NHS_WES_trio.sh +++ b/process_NHS_WES_trio.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -24,37 +24,36 @@ DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV BAMOUT_DIR=${WORK_DIR}/BAMOUT -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20210706.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20210706.clinvar.20210626.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON3=/home/u035/project/software/bcbio/anaconda/bin/python3 # points to python3.6 -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep -#REFERENCE_GENOME=/home/u035/project/resources/hg38.fa -REFERENCE_GENOME=/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa - - - -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk # points to ../share/gatk4-4.1.8.1-0/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON3=/home/u035/u035/shared/software/bcbio/anaconda/bin/python3 # points to python3.6 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-100.4-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa + + + +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -158,7 +157,7 @@ mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html #VCF_KEYS='gnomADe|gnomADg' # old VEP version 97 -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ -i ${IN_FILE} \ @@ -170,11 +169,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20210706.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.20210706.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.20210706.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -248,7 +247,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -I ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${PLATE_ID}_${FAMILY_ID}.strict.24chr.sort.denovo.vcf @@ -505,12 +504,12 @@ done ################################################################# # write the IGV batch file for this family based on the bamouts # -# to be stored as /scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # +# to be stored as /scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # ################################################################# -snap_file=/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt +snap_file=/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/bamout_${PROBAND_ID}_${FAMILY_ID}.snapshot.txt # check if previous version exist, if so - delete it if [ -f "${snap_file}" ]; then @@ -522,7 +521,7 @@ fi # write the header for the IGV batch file echo "new" >> ${snap_file} echo "genome hg38" >> ${snap_file} -echo "snapshotDirectory \"/scratch/u035/project/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} +echo "snapshotDirectory \"/scratch/u035/u035/shared/trio_whole_exome/analysis/${PROJECT_ID}/DECIPHER/IGV/${PLATE_ID}_${FAMILY_ID}\"" >> ${snap_file} echo "" >> ${snap_file} diff --git a/process_NHS_WES_trio_before_BAMOUT.sh b/process_NHS_WES_trio_before_BAMOUT.sh index d736a1a120e9b06c4b00fa85efd953df3833e7aa..f6f043ba5b2d1f1007e1b00f29241e967035dad1 100755 --- a/process_NHS_WES_trio_before_BAMOUT.sh +++ b/process_NHS_WES_trio_before_BAMOUT.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,36 +23,36 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done @@ -153,7 +153,7 @@ G2P_LOG_DIR=${G2P_DIR}/${PLATE_ID}_${FAMILY_ID}_LOG_DIR mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${PLATE_ID}_${FAMILY_ID}.report.html -VCF_KEYS='gnomADe|gnomADg' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -166,11 +166,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 97 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.19092019.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-97.3-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.19092019.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.19092019.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-97.3-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.19092019.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" @@ -226,7 +226,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${PLATE_ID}_${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${PLATE_ID}_${FAMILY_ID}.strict.24chr.sort.denovo.vcf diff --git a/processing_setup.sh b/processing_setup.sh index 0356e8dc23b5c80fa3cb63db7e7bfee444707b12..509ce35956eb42f7781c1954c45d48224c855b7a 100755 --- a/processing_setup.sh +++ b/processing_setup.sh @@ -7,7 +7,7 @@ ### Setup the folder structure for the downstream analysis### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -18,11 +18,11 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts ### Tools -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 diff --git a/run_processing.sh b/run_processing.sh index f6208eb1756da7bb46d3e447849908699feed610..1e146f0477d9ec97cc29010b12adcb9f4a782dc0 100755 --- a/run_processing.sh +++ b/run_processing.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by processing_setup.sh ### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,31 +23,31 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by processing_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by processing_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/blacklist/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/resources/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa @@ -150,7 +150,7 @@ mkdir ${G2P_LOG_DIR} TXT_OUT=${G2P_LOG_DIR}/${FAMILY_ID}.report.txt HTML_OUT=${G2P_LOG_DIR}/${FAMILY_ID}.report.html #VCF_KEYS='gnomADe|gnomADg' # old VEP -VCF_KEYS='gnomADe_GRCh38|gnomADg_r3.0_GRCh38' +VCF_KEYS='gnomADe_r2.1.1_GRCh38|gnomADg_r3.1.1_GRCh38' time ${VEP} \ @@ -163,11 +163,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 100 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.01062020.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.01062020.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" echo "" @@ -223,7 +223,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${FAMILY_ID}.strict.24chr.sort.denovo.vcf @@ -237,7 +237,7 @@ time ${GATK4} IndexFeatureFile -F ${FAMILY_ID}.strict.24chr.sort.denovo.vcf ######################################################################################################################################## #### remove variants from LCR and telo-/centro-mere regions ###time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${FAMILY_ID}.strict.24chr.sort.denovo.vcf -O ${FAMILY_ID}.clean.denovo.vcf \ -###-XL /home/u035/project/resources/LCR.bed -XL /home/u035/project/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants +###-XL /home/u035/u035/shared/resources/LCR.bed -XL /home/u035/u035/shared/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants #### split multi-allelic sites [by -m -any] #### left-alignment and normalization [by adding the -f] diff --git a/submit_depth_of_coverage_MQ20_BQ20.sh b/submit_depth_of_coverage_MQ20_BQ20.sh index 14e7e34d421f07008e95ddf93c7207075db57f65..0a1a6a64cfdbc0220bec59755a0a71784809236e 100644 --- a/submit_depth_of_coverage_MQ20_BQ20.sh +++ b/submit_depth_of_coverage_MQ20_BQ20.sh @@ -18,9 +18,9 @@ then fi fi -export PATH=$PATH:/home/u035/project/software/bcbio-1.1.3/tools/bin -BCBIO_CONFIG=/scratch/u035/project/analysis/wes_pilot/bcbio/config -BCBIO_WORK=/scratch/u035/project/analysis/wes_pilot/bcbio/work +export PATH=$PATH:/home/u035/u035/shared/software/bcbio-1.1.3/tools/bin +BCBIO_CONFIG=/scratch/u035/u035/shared/analysis/wes_pilot/bcbio/config +BCBIO_WORK=/scratch/u035/u035/shared/analysis/wes_pilot/bcbio/work # Expects environment variables to be set # BATCH - date yyyymmdd batch diff --git a/submit_downstream.sh b/submit_downstream.sh index ed9006cf54abaa4b03cc6d1758ebf88031030926..ed4ae1c6daed10cfab07d3010d47fb81c093e74a 100755 --- a/submit_downstream.sh +++ b/submit_downstream.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by downstream_setup.sh### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,28 +23,28 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by downstream_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by downstream_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190613.plus15bp.merged.bed -CLINVAR=/home/u035/project/resources/DDG2P.20190613.clinvar.20190603.plus15bp.txt +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190613.plus15bp.merged.bed +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190613.clinvar.20190603.plus15bp.txt ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa @@ -154,11 +154,11 @@ time ${VEP} \ --merged \ --use_given_ref \ --cache --cache_version 96 \ - --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \ + --dir_cache /home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/vep \ --individual all \ - --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.13062019.txt" \ - --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-96.0-0 \ - --plugin G2P,file='/home/u035/project/resources/DDG2P.13062019.csv',af_from_vcf=1,log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} + --transcript_filter "gene_symbol in /home/u035/u035/shared/resources/G2P/genes_in_DDG2P.13062019.txt" \ + --dir_plugins /home/u035/u035/shared/software/bcbio/anaconda/share/ensembl-vep-96.0-0 \ + --plugin G2P,file='/home/u035/u035/shared/resources/G2P/DDG2P.13062019.csv',af_from_vcf=1,log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT} echo "" echo "" @@ -212,7 +212,7 @@ cd ${VASE_DIR} time ${GATK4} IndexFeatureFile -F ${OUT_FILE} # select only variants on the 24 chromosomes -time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/project/resources/24_chr.list --exclude-non-variants +time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${OUT_FILE} -O ${FAMILY_ID}.strict.24chr.denovo.vcf -L /home/u035/u035/shared/resources/24_chr.list --exclude-non-variants # sort the VCF (maybe not needed?, but just in case, and it is quick) rm ${FAMILY_ID}.strict.24chr.sort.denovo.vcf @@ -224,7 +224,7 @@ time ${GATK4} IndexFeatureFile -F ${FAMILY_ID}.strict.24chr.sort.denovo.vcf # remove variants from LCR and telo-/centro-mere regions time ${GATK4} SelectVariants -R ${REFERENCE_GENOME} -V ${FAMILY_ID}.strict.24chr.sort.denovo.vcf -O ${FAMILY_ID}.clean.denovo.vcf \ --XL /home/u035/project/resources/LCR.bed -XL /home/u035/project/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants +-XL /home/u035/u035/shared/resources/LCR.bed -XL /home/u035/u035/shared/resources/sv_repeat_telomere_centromere.bed --exclude-non-variants # split multi-allelic sites [by -m -any] # left-alignment and normalization [by adding the -f] diff --git a/submit_trio_wes_aspera_download.sh b/submit_trio_wes_aspera_download.sh index 013dcb3e2f1ea2dd2dd5e7c60a902739ddf56e72..c6bb2e99e8f08a638ba8b116f12d941fbc224dcd 100755 --- a/submit_trio_wes_aspera_download.sh +++ b/submit_trio_wes_aspera_download.sh @@ -8,16 +8,16 @@ source $TRANSFER_INFO_FILE -/home/u035/project/software/aspera/connect/bin/ascp \ +/home/u035/u035/shared/software/aspera/connect/bin/ascp \ -T -P 33001 -O 33001 -l 500M -k2 --overwrite=diff \ $ASPERA_SCP_USER@transfer.genomics.ed.ac.uk:$PROJECT/raw_data \ - /scratch/u035/project/trio_whole_exome/data + /scratch/u035/u035/shared/trio_whole_exome/data -cd /scratch/u035/project/trio_whole_exome/data/ +cd /scratch/u035/u035/shared/trio_whole_exome/data/ mkdir $PROJECT mv raw_data $PROJECT/ -cd /scratch/u035/project/trio_whole_exome/data/$PROJECT/raw_data +cd /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT/raw_data rm ../md5_check.txt 2> /dev/null diff --git a/submit_trio_wes_lftp_download.sh b/submit_trio_wes_lftp_download.sh index f7dba577e6faa2bf01a99e4d10dfdb34a012caec..92e2adaf1a8871be6f10ce75bc258efa7ee02b54 100755 --- a/submit_trio_wes_lftp_download.sh +++ b/submit_trio_wes_lftp_download.sh @@ -6,10 +6,10 @@ #PBS -j oe ###source $TRANSFER_INFO_FILE -###/home/u035/project/software/aspera/connect/bin/ascp \ +###/home/u035/u035/shared/software/aspera/connect/bin/ascp \ ### -T -P 33001 -O 33001 -l 500M -k2 --overwrite=diff \ ### $ASPERA_SCP_USER@transfer.genomics.ed.ac.uk:$PROJECT/raw_data \ -### /scratch/u035/project/trio_whole_exome/data +### /scratch/u035/u035/shared/trio_whole_exome/data PROJ_CONN="login anonymous lftp@ ; mirror -vv ${TOKEN}/${PROJECT}/raw_data ." @@ -17,8 +17,8 @@ echo ${PROJ_CONN} # set up an EPCC folder for this project -mkdir /scratch/u035/project/trio_whole_exome/data/$PROJECT -cd /scratch/u035/project/trio_whole_exome/data/$PROJECT +mkdir /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT +cd /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT # download the data @@ -26,7 +26,7 @@ lftp transfer.genomics.ed.ac.uk <<<${PROJ_CONN} # go into raw_data to perform the md5_check -# cd /scratch/u035/project/trio_whole_exome/data/$PROJECT/raw_data +# cd /scratch/u035/u035/shared/trio_whole_exome/data/$PROJECT/raw_data rm md5_check.txt 2> /dev/null for DATE in 20*[0-9] diff --git a/test_process_NHS_WES_trio.sh b/test_process_NHS_WES_trio.sh index ab929cfd716fdcf248fcaf09cbbf4007d75b2278..db2c4e04be1dd64da3ee2ae1a97dea4987e71b22 100755 --- a/test_process_NHS_WES_trio.sh +++ b/test_process_NHS_WES_trio.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by NHS_WES_trio_setup.sh ### -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/trio_whole_exome/analysis WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,36 +23,36 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by NHS_WES_trio_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by NHS_WES_trio_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190919.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK -BLACKLIST=/home/u035/project/resources/current_blacklist.txt # OK -TRANS_MAP=/home/u035/project/resources/current_trans_map.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190919.clinvar.20190916.plus15bp.txt # OK +BLACKLIST=/home/u035/u035/shared/resources/blacklist/current_blacklist.txt # OK +TRANS_MAP=/home/u035/u035/shared/resources/trans_map/current_trans_map.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/project/trio_whole_exome/analysis/output/ +echo "SOURCE_DIR = ${SOURCE_DIR}" # the general path to the source BAM files (VCF and PED already copied) i.e. /scratch/u035/u035/shared/trio_whole_exome/analysis/output/ echo "BATCH_ID = ${BATCH_ID}" # the ID of the batch being processed e.g. 11870_Germain_Lorna echo "PLATE_ID = ${PLATE_ID}" # the PCR plate ID of the batch being currently processed, e.g. 16862 echo "PROJECT_ID = ${PROJECT_ID}" # this the the folder (${BASE}/${PROJECT_ID}) where the downstream analysis will be done diff --git a/test_run_processing.sh b/test_run_processing.sh index 81bc65240d1cb1e2bc0ed62cfbafeab9926a4d10..e51fff49d2c6e1bf56f379ac2ac3d99251d61f17 100755 --- a/test_run_processing.sh +++ b/test_run_processing.sh @@ -7,12 +7,12 @@ # setup PATH -export PATH=$PATH:/home/u035/project/software/bcbio/anaconda/envs/python2/bin:/home/u035/project/software/bcbio/anaconda/bin -export PERL5LIB=$PERL5LIB:/home/u035/project/software/bcbio/anaconda/lib/site_perl/5.26.2 +export PATH=$PATH:/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin:/home/u035/u035/shared/software/bcbio/anaconda/bin +export PERL5LIB=$PERL5LIB:/home/u035/u035/shared/software/bcbio/anaconda/lib/site_perl/5.26.2 ### folder structure for the downstream analysis - created by processing_setup.sh ### -BASE=/scratch/u035/project/analysis/wes_pilot +BASE=/scratch/u035/u035/shared/analysis/wes_pilot WORK_DIR=$BASE/${PROJECT_ID} VCF_DIR=${WORK_DIR}/VCF PED_DIR=${WORK_DIR}/PED @@ -23,27 +23,27 @@ COV_DIR=${WORK_DIR}/COV DEC_DIR=${WORK_DIR}/DECIPHER IGV_DIR=${DEC_DIR}/IGV CNV_DIR=${WORK_DIR}/CNV -SCRIPTS_DIR=/home/u035/project/scripts +SCRIPTS_DIR=/home/u035/u035/shared/scripts # other files to be used FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt # created by processing_setup.sh CHILD_IDS=${WORK_DIR}/PRO_IDs.txt # created by processing_setup.sh -TARGETS=/home/u035/project/resources/DDG2P.20190613.plus15bp.merged.bed # OK -CLINVAR=/home/u035/project/resources/DDG2P.20190613.clinvar.20190902.plus15bp.txt # OK +TARGETS=/home/u035/u035/shared/resources/G2P/DDG2P.20190613.plus15bp.merged.bed # OK +CLINVAR=/home/u035/u035/shared/resources/G2P/DDG2P.20190613.clinvar.20190902.plus15bp.txt # OK ### TOOLS ### -BCFTOOLS=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bcftools -BGZIP=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/bgzip -TABIX=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/tabix -VT=/home/u035/project/software/bcbio/anaconda/bin/vt -VASE=/home/u035/project/software/bcbio/anaconda/bin/vase -GATK4=/home/u035/project/software/bcbio/anaconda/bin/gatk -GATK3=/home/u035/project/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar -PYTHON2=/home/u035/project/software/bcbio/anaconda/envs/python2/bin/python2.7 -VEP="/home/u035/project/software/bcbio/anaconda/bin/perl /home/u035/project/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep -REFERENCE_GENOME=/home/u035/project/data/reference/hg38.fa +BCFTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bcftools +BGZIP=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/bgzip +TABIX=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/tabix +VT=/home/u035/u035/shared/software/bcbio/anaconda/bin/vt +VASE=/home/u035/u035/shared/software/bcbio/anaconda/bin/vase +GATK4=/home/u035/u035/shared/software/bcbio/anaconda/bin/gatk +GATK3=/home/u035/u035/shared/software/GenomeAnalysisTK-3.8/GenomeAnalysisTK.jar +PYTHON2=/home/u035/u035/shared/software/bcbio/anaconda/envs/python2/bin/python2.7 +VEP="/home/u035/u035/shared/software/bcbio/anaconda/bin/perl /home/u035/u035/shared/software/bcbio/anaconda/bin/vep" # points to ../share/ensembl-vep-97.3-0/vep +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa diff --git a/trio_whole_exome_bcbio_crf_template.yaml b/trio_whole_exome_bcbio_crf_template.yaml index 0d87d96e7ce02e1bcb19e447d83b0b1a31691985..e379642d408852e6aff9309fbc3be6ae0502a6ff 100644 --- a/trio_whole_exome_bcbio_crf_template.yaml +++ b/trio_whole_exome_bcbio_crf_template.yaml @@ -19,4 +19,4 @@ details: analysis: variant2 genome_build: hg38 upload: - dir: /scratch/u035/project/trio_whole_exome/analysis/output + dir: /scratch/u035/u035/shared/trio_whole_exome/analysis/output diff --git a/trio_whole_exome_bcbio_template.yaml b/trio_whole_exome_bcbio_template.yaml index e960be92364ba6eed2dea18cd11361f25b485d69..f6ebbb44f3e55484bf6eab03decdbf5ecb9263f6 100644 --- a/trio_whole_exome_bcbio_template.yaml +++ b/trio_whole_exome_bcbio_template.yaml @@ -16,4 +16,4 @@ details: analysis: variant2 genome_build: hg38 upload: - dir: /scratch/u035/project/trio_whole_exome/analysis/output + dir: /scratch/u035/u035/shared/trio_whole_exome/analysis/output diff --git a/trio_whole_exome_config.sh b/trio_whole_exome_config.sh index e4da287a7b950a0e5bd3636f9fe3ac1488c2cfb5..5c291d3d156d343b7595ed8415980aa939d63033 100644 --- a/trio_whole_exome_config.sh +++ b/trio_whole_exome_config.sh @@ -3,13 +3,13 @@ # Basic configuration options for trio WES pipeline # -SCRIPTS=/home/u035/project/scripts +SCRIPTS=/home/u035/u035/shared/scripts BCBIO_TEMPLATE=$SCRIPTS/trio_whole_exome_bcbio_template.yaml -TARGET=/home/u035/project/resources/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed -DOWNLOAD_DIR=/scratch/u035/project/trio_whole_exome/data -REFERENCE_GENOME=/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa +TARGET=/home/u035/u035/shared/resources/exome_targets/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed +DOWNLOAD_DIR=/scratch/u035/u035/shared/data +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/analysis PARAMS_DIR=$BASE/params READS_DIR=$BASE/reads CONFIG_DIR=$BASE/config @@ -18,4 +18,4 @@ OUTPUT_DIR=$BASE/output ARCHIVE_DIR=/archive/u035/trio_whole_exome -export PATH=/home/u035/project/software/bcbio/tools/bin:$PATH +export PATH=/home/u035/u035/shared/software/bcbio/tools/bin:$PATH diff --git a/trio_whole_exome_crf_config.sh b/trio_whole_exome_crf_config.sh index ce1495088587377cc4007789fad7824eac281f37..3b7a89fdf2e885ce50a550b089645f45c9bcd194 100644 --- a/trio_whole_exome_crf_config.sh +++ b/trio_whole_exome_crf_config.sh @@ -3,13 +3,13 @@ # Basic configuration options for trio WES pipeline # -SCRIPTS=/home/u035/project/scripts +SCRIPTS=/home/u035/u035/shared/scripts BCBIO_TEMPLATE=$SCRIPTS/trio_whole_exome_bcbio_crf_template.yaml -TARGET=/home/u035/project/resources/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed -DOWNLOAD_DIR=/scratch/u035/project/trio_whole_exome/data -REFERENCE_GENOME=/home/u035/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa +TARGET=/home/u035/u035/shared/resources/exome_targets/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed +DOWNLOAD_DIR=/scratch/u035/u035/shared/data +REFERENCE_GENOME=/home/u035/u035/shared/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa -BASE=/scratch/u035/project/trio_whole_exome/analysis +BASE=/scratch/u035/u035/shared/analysis PARAMS_DIR=$BASE/params READS_DIR=$BASE/reads CONFIG_DIR=$BASE/config @@ -18,4 +18,4 @@ OUTPUT_DIR=$BASE/output ARCHIVE_DIR=/archive/u035/trio_whole_exome -export PATH=/home/u035/project/software/bcbio/tools/bin:$PATH +export PATH=/home/u035/u035/shared/software/bcbio/tools/bin:$PATH diff --git a/vcf_config.json.backup b/vcf_config.json.backup index fc5941cb39692ca6ac46a3703cb0d4df1f8f50d6..59464bf2fe878e1df7d9f0d50bab569d787f737f 100644 --- a/vcf_config.json.backup +++ b/vcf_config.json.backup @@ -140,7 +140,7 @@ "species": "homo_sapiens", "assembly": "GRCh38", "type": "local", - "filename_template": "/home/u035/project/resources/gnomad/r3.0/genomes/gnomad.genomes.r3.0.sites.chr###CHR###_trimmed_info.vcf.bgz", + "filename_template": "/home/u035/u035/shared/resources/gnomad/r3.0/genomes/gnomad.genomes.r3.0.sites.chr###CHR###_trimmed_info.vcf.bgz", "chromosomes": [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y" @@ -201,7 +201,7 @@ "species": "homo_sapiens", "assembly": "GRCh38", "type": "local", - "filename_template": "/home/u035/project/resources/gnomad/r2.1/exomes/gnomad.exomes.r2.1.sites.grch38.chr###CHR###_noVEP.vcf.gz", + "filename_template": "/home/u035/u035/shared/resources/gnomad/r2.1/exomes/gnomad.exomes.r2.1.sites.grch38.chr###CHR###_noVEP.vcf.gz", "chromosomes": [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y"