From dbb4a10f82e2bdf25d18f417f8b59eb53668842c Mon Sep 17 00:00:00 2001
From: ameyner2 <alison.meynert@igmm.ed.ac.uk>
Date: Tue, 8 Dec 2020 16:26:30 +0000
Subject: [PATCH] Updated scripts

---
 gather_NHS_WES_trio_results.sh               | 11 +++++++++++
 process_NHS_WES_aff_probands.sh              | 10 +++++-----
 process_NHS_WES_trio.sh                      | 10 +++++-----
 submit_trio_wes_archive_project.sh           |  9 +++++----
 submit_trio_wes_aspera_download.sh           |  8 +++++++-
 submit_trio_wes_priority_and_qc_checksums.sh | 13 +++++++------
 6 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/gather_NHS_WES_trio_results.sh b/gather_NHS_WES_trio_results.sh
index f8cb723..f794ff5 100755
--- a/gather_NHS_WES_trio_results.sh
+++ b/gather_NHS_WES_trio_results.sh
@@ -30,6 +30,17 @@ if [ ! -d "${NHS_DIR}" ]; then
 fi
 
 
+# enable running singletons
+if [ -z $PBS_ARRAY_INDEX ]
+then
+  if [ -z $INDEX ]
+  then
+    export PBS_ARRAY_INDEX=1
+  else
+    export PBS_ARRAY_INDEX=$INDEX
+  fi
+fi
+
 
 
 FAMILY_ID=`head -n ${PBS_ARRAY_INDEX} ${FAMILY_IDS} | tail -n 1`				# contains only the family IDs (e.g.385295)
diff --git a/process_NHS_WES_aff_probands.sh b/process_NHS_WES_aff_probands.sh
index a5917b7..c494661 100755
--- a/process_NHS_WES_aff_probands.sh
+++ b/process_NHS_WES_aff_probands.sh
@@ -30,8 +30,8 @@ SCRIPTS_DIR=/home/u035/project/scripts
 
 
 # other files to be used
-TARGETS=/home/u035/project/resources/DDG2P.20200601.plus15bp.merged.bed			# OK
-CLINVAR=/home/u035/project/resources/DDG2P.20200601.clinvar.20200520.plus15bp.txt	# OK
+TARGETS=/home/u035/project/resources/DDG2P.20201208.plus15bp.merged.bed			# OK
+CLINVAR=/home/u035/project/resources/DDG2P.20201208.clinvar.20201128.plus15bp.txt	# OK
 BLACKLIST=/home/u035/project/resources/current_blacklist.txt				# OK
 TRANS_MAP=/home/u035/project/resources/current_trans_map.txt				# OK
 
@@ -156,7 +156,7 @@ echo ""
 
 
 echo "Performing G2P analysis (DD genes)for FAMILY_ID = ${PLATE_ID}_${FAMILY_ID}..."
-echo "Using DDG2P.01062020.csv"
+echo "Using DDG2P.20201208.csv"
 
 
 IN_FILE=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf   
@@ -180,9 +180,9 @@ time ${VEP} \
     --cache --cache_version 100 \
     --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \
     --individual all \
-    --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.01062020.txt" \
+    --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20201208.txt" \
     --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \
-    --plugin G2P,file='/home/u035/project/resources/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
+    --plugin G2P,file='/home/u035/project/resources/DDG2P.20201208.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
 
 
 echo ""
diff --git a/process_NHS_WES_trio.sh b/process_NHS_WES_trio.sh
index c585992..43361de 100755
--- a/process_NHS_WES_trio.sh
+++ b/process_NHS_WES_trio.sh
@@ -31,8 +31,8 @@ SCRIPTS_DIR=/home/u035/project/scripts
 # other files to be used
 FAMILY_IDS=${WORK_DIR}/FAM_IDs.txt							# created by NHS_WES_trio_setup.sh
 CHILD_IDS=${WORK_DIR}/PRO_IDs.txt							# created by NHS_WES_trio_setup.sh
-TARGETS=/home/u035/project/resources/DDG2P.20200601.plus15bp.merged.bed			# OK
-CLINVAR=/home/u035/project/resources/DDG2P.20200601.clinvar.20200520.plus15bp.txt	# OK
+TARGETS=/home/u035/project/resources/DDG2P.20201208.plus15bp.merged.bed			# OK
+CLINVAR=/home/u035/project/resources/DDG2P.20201208.clinvar.20201128.plus15bp.txt	# OK
 BLACKLIST=/home/u035/project/resources/current_blacklist.txt				# OK
 TRANS_MAP=/home/u035/project/resources/current_trans_map.txt				# OK
 
@@ -148,7 +148,7 @@ echo ""
 
 
 echo "Performing G2P analysis (DD genes)for FAMILY_ID = ${PLATE_ID}_${FAMILY_ID}..."
-echo "Using DDG2P.01062020.csv"
+echo "Using DDG2P.20201208.csv"
 
 IN_FILE=${VCF_DIR}/${PLATE_ID}_${FAMILY_ID}.clean.vcf   
 G2P_LOG_DIR=${G2P_DIR}/${PLATE_ID}_${FAMILY_ID}_LOG_DIR
@@ -170,9 +170,9 @@ time ${VEP} \
     --cache --cache_version 100 \
     --dir_cache /home/u035/project/software/bcbio/genomes/Hsapiens/hg38/vep \
     --individual all \
-    --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.01062020.txt" \
+    --transcript_filter "gene_symbol in /home/u035/project/resources/genes_in_DDG2P.20201208.txt" \
     --dir_plugins /home/u035/project/software/bcbio/anaconda/share/ensembl-vep-100.4-0 \
-    --plugin G2P,file='/home/u035/project/resources/DDG2P.01062020.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
+    --plugin G2P,file='/home/u035/project/resources/DDG2P.20201208.csv',af_from_vcf=1,confidence_levels='confirmed&probable&both RD and IF',af_from_vcf_keys=${VCF_KEYS},log_dir=${G2P_LOG_DIR},txt_report=${TXT_OUT},html_report=${HTML_OUT}
 
 
 echo ""
diff --git a/submit_trio_wes_archive_project.sh b/submit_trio_wes_archive_project.sh
index 542e16a..9414f28 100755
--- a/submit_trio_wes_archive_project.sh
+++ b/submit_trio_wes_archive_project.sh
@@ -7,6 +7,7 @@
 
 # Expects environment variables to be set
 # PROJECT_ID - e.g. 12345_LastnameFirstname
+# VERSION - e.g. v1, v2
 # PRIORITY_DIRS - e.g. 05122019,07122019 (colon delimited if more than one)
 # CONFIG_SH - absolute path to configuration script setting environment variables
 
@@ -17,7 +18,7 @@ source $CONFIG_SH
 cd $OUTPUT_DIR
 
 # Copy bcbio output files
-for family_dir in *_$PROJECT_ID*
+for family_dir in *${VERSION}_${PROJECT_ID}*
 do
   rsync -av --exclude '*.bam*' $family_dir $ARCHIVE_DIR/
 done
@@ -25,7 +26,7 @@ done
 # Copy qc files
 cd qc
 mkdir -p $ARCHIVE_DIR/qc
-rsync -av $PROJECT_ID* $ARCHIVE_DIR/qc/
+rsync -av ${VERSION}_${PROJECT_ID}* $ARCHIVE_DIR/qc/
 
 # Copy prioritization files
 cd ../prioritization
@@ -42,7 +43,7 @@ done
 # move to the archive area and check the md5s
 cd $ARCHIVE_DIR
 
-for family_dir in *_$PROJECT_ID*
+for family_dir in *${VERSION}_${PROJECT_ID}*
 do
   cd $family_dir
   md5sum --check md5sum.txt
@@ -50,7 +51,7 @@ do
 done
 
 cd qc
-md5sum --check ${PROJECT_ID}_qc_report.md5sum.txt
+md5sum --check ${VERSION}_${PROJECT_ID}_qc_report.md5sum.txt
 
 cd ../prioritization
 
diff --git a/submit_trio_wes_aspera_download.sh b/submit_trio_wes_aspera_download.sh
index 2a1221a..013dcb3 100755
--- a/submit_trio_wes_aspera_download.sh
+++ b/submit_trio_wes_aspera_download.sh
@@ -7,13 +7,19 @@
 
 source $TRANSFER_INFO_FILE
 
+
 /home/u035/project/software/aspera/connect/bin/ascp \
   -T -P 33001 -O 33001 -l 500M -k2 --overwrite=diff \
-  $ASPERA_SCP_USER@transfer.genomics.ed.ac.uk:$PROJECT \
+  $ASPERA_SCP_USER@transfer.genomics.ed.ac.uk:$PROJECT/raw_data \
   /scratch/u035/project/trio_whole_exome/data
 
+
+cd /scratch/u035/project/trio_whole_exome/data/
+mkdir $PROJECT
+mv raw_data $PROJECT/
 cd /scratch/u035/project/trio_whole_exome/data/$PROJECT/raw_data
 
+
 rm ../md5_check.txt 2> /dev/null
 for DATE in 20*[0-9]
 do
diff --git a/submit_trio_wes_priority_and_qc_checksums.sh b/submit_trio_wes_priority_and_qc_checksums.sh
index 138bfe4..143b757 100755
--- a/submit_trio_wes_priority_and_qc_checksums.sh
+++ b/submit_trio_wes_priority_and_qc_checksums.sh
@@ -7,6 +7,7 @@
 
 # Expects environment variables to be set
 # PROJECT_ID - e.g. 12345_LastnameFirstname
+# VERSION - e.g. v1, v2
 # PRIORITY_DIRS - e.g. 05122019,07122019 (colon delimited if more than one)
 # CONFIG_SH - absolute path to configuration script setting environment variables
 
@@ -16,19 +17,19 @@ source $CONFIG_SH
 
 cd $OUTPUT_DIR/qc
 
-for file in ${PROJECT_ID}_qc_report*.html
+for file in ${VERSION}_${PROJECT_ID}_qc_report*.html
 do
-  md5sum $file >> ${PROJECT_ID}_qc_report.md5sum.txt
+  md5sum $file >> ${VERSION}_${PROJECT_ID}_qc_report.md5sum.txt
 done
 
-for file in ${PROJECT_ID}.ped_check*.txt
+for file in ${VERSION}_${PROJECT_ID}.ped_check*.txt
 do
-  md5sum $file >> ${PROJECT_ID}_qc_report.md5sum.txt
+  md5sum $file >> ${VERSION}_${PROJECT_ID}_qc_report.md5sum.txt
 done
 
-for file in `find ${PROJECT_ID}_qc_report*_data -type f`
+for file in `find ${VERSION}_${PROJECT_ID}_qc_report*_data -type f`
 do
-  md5sum $file >> ${PROJECT_ID}_qc_report.md5sum.txt
+  md5sum $file >> ${VERSION}_${PROJECT_ID}_qc_report.md5sum.txt
 done
 
 # calculate checksusms on the prioritization files for this project
-- 
GitLab