From fd2160e739c03b08f6eaf3924e907f98b1f24b8a Mon Sep 17 00:00:00 2001
From: ameyner2 <alison.meynert@igmm.ed.ac.uk>
Date: Wed, 7 Apr 2021 08:56:32 +0100
Subject: [PATCH] Changed symlink creation to a new temporary folder

---
 prepare_bcbio_config.sh | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/prepare_bcbio_config.sh b/prepare_bcbio_config.sh
index fdcae78..31793d8 100755
--- a/prepare_bcbio_config.sh
+++ b/prepare_bcbio_config.sh
@@ -45,8 +45,8 @@ cd $PARAMS_DIR
 # remove DOS newline characters if necessary
 perl -pi -e 's/\r//' $PROJECT_ID.ped
 
-# create reads directory for project
-mkdir -p $READS_DIR/$PROJECT_ID
+# create reads directory for project and symlink directory underneath
+mkdir -p $READS_DIR/$PROJECT_ID/symlinks
 
 cat $DOWNLOAD_DIR/$PROJECT_ID/*/file_list.tsv | \
   perl $SCRIPTS/trio_whole_exome_create_parameter_files.pl \
@@ -66,28 +66,29 @@ do
     PHENOTYPE=`head -n $i ${PROJECT_ID}_${FAMILY_ID}.ped | tail -n 1 | cut -f 6`
 
     # create symlinks for problematic filenames
-    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_1_*_1.fastq.gz`
+    mkdir $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE
+    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*$SAMPLE*/*_1_*_1.fastq.gz`
     do
-      newname=`echo $FILE | sed -e 's/_1_/_one_/'`
-      ln -s $FILE ${newname%1.fastq.gz}R1.fastq.gz
+      newname=`basename $FILE | sed -e 's/_1_/_one_/'`
+      ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz
     done
-    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_1_*_2.fastq.gz`
+    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*$SAMPLE*/*_1_*_2.fastq.gz`
     do
-      newname=`echo $FILE | sed -e 's/_1_/_one_/'`
-      ln -s $FILE ${newname%2.fastq.gz}R2.fastq.gz
+      newname=`basename $FILE | sed -e 's/_1_/_one_/'`
+      ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz
     done
-    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_2_*_1.fastq.gz`
+    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*$SAMPLE*/*_2_*_1.fastq.gz`
     do
-      newname=`echo $FILE | sed -e 's/_2_/_two_/'`
-      ln -s $FILE ${newname%1.fastq.gz}R1.fastq.gz
+      newname=`basename $FILE | sed -e 's/_2_/_two_/'`
+      ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz
     done
-    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_2_*_2.fastq.gz`
+    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*$SAMPLE*/*_2_*_2.fastq.gz`
     do
-      newname=`echo $FILE | sed -e 's/_2_/_two_/'`
-      ln -s $FILE ${newname%2.fastq.gz}R2.fastq.gz
+      newname=`basename $FILE | sed -e 's/_2_/_two_/'`
+      ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz
     done
 
-    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_R[1,2].fastq.gz`
+    for FILE in `ls $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/*_R[1,2].fastq.gz`
     do
       echo "$FILE,$SAMPLE,$FAMILY_ID,$SEX,$PHENOTYPE,$TARGET" >> ${VERSION}_${PROJECT_ID}_${FAMILY_ID}.csv
     done
@@ -111,3 +112,6 @@ do
   rm -r ${VERSION}_${PROJECT_ID}_${FAMILY_ID}
 
 done
+
+# clean up symlinks temporary folder
+rm -r $READS_DIR/$PROJECT_ID/symlinks
-- 
GitLab