From e785f1108ce23938a7b34a540ad3467a0c7106bb Mon Sep 17 00:00:00 2001
From: ameyner2 <alison.meynert@igmm.ed.ac.uk>
Date: Fri, 28 Aug 2020 17:02:08 +0100
Subject: [PATCH] Using symlinks to get around bcbio issues with file merging

---
 prepare_bcbio_config.sh | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/prepare_bcbio_config.sh b/prepare_bcbio_config.sh
index e81baac..65d47c4 100755
--- a/prepare_bcbio_config.sh
+++ b/prepare_bcbio_config.sh
@@ -64,7 +64,29 @@ do
     SEX=`head -n $i ${PROJECT_ID}_${FAMILY_ID}.ped | tail -n 1 | cut -f 5`
     PHENOTYPE=`head -n $i ${PROJECT_ID}_${FAMILY_ID}.ped | tail -n 1 | cut -f 6`
 
-    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/raw_data/*/*${SAMPLE}*/*.gz`
+    # create symlinks for problematic filenames
+    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/raw_data/*/*${SAMPLE}*/*_1_*_1.fastq.gz`
+    do
+      newname=`echo $FILE | sed -e 's/_1_/_one_/'`
+      ln -s $FILE ${newname%1.fastq.gz}R1.fastq.gz
+    done
+    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/raw_data/*/*${SAMPLE}*/*_1_*_2.fastq.gz`
+    do
+      newname=`echo $FILE | sed -e 's/_1_/_one_/'`
+      ln -s $FILE ${newname%2.fastq.gz}R2.fastq.gz
+    done
+    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/raw_data/*/*${SAMPLE}*/*_2_*_1.fastq.gz`
+    do
+      newname=`echo $FILE | sed -e 's/_2_/_two_/'`
+      ln -s $FILE ${newname%1.fastq.gz}R1.fastq.gz
+    done
+    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/raw_data/*/*${SAMPLE}*/*_2_*_2.fastq.gz`
+    do
+      newname=`echo $FILE | sed -e 's/_2_/_two_/'`
+      ln -s $FILE ${newname%2.fastq.gz}R2.fastq.gz
+    done
+
+    for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/raw_data/*/*${SAMPLE}*/*_R[1,2].fastq.gz`
     do
       echo "$FILE,$SAMPLE,$FAMILY_ID,$SEX,$PHENOTYPE,$TARGET" >> ${PROJECT_ID}_${FAMILY_ID}.csv
     done
-- 
GitLab