diff --git a/prepare_bcbio_config.sh b/prepare_bcbio_config.sh index fdcae78184b0e844735901ade2fa7b340a45097a..31793d873b0f429ab072104beeb2e229130461e8 100755 --- a/prepare_bcbio_config.sh +++ b/prepare_bcbio_config.sh @@ -45,8 +45,8 @@ cd $PARAMS_DIR # remove DOS newline characters if necessary perl -pi -e 's/\r//' $PROJECT_ID.ped -# create reads directory for project -mkdir -p $READS_DIR/$PROJECT_ID +# create reads directory for project and symlink directory underneath +mkdir -p $READS_DIR/$PROJECT_ID/symlinks cat $DOWNLOAD_DIR/$PROJECT_ID/*/file_list.tsv | \ perl $SCRIPTS/trio_whole_exome_create_parameter_files.pl \ @@ -66,28 +66,29 @@ do PHENOTYPE=`head -n $i ${PROJECT_ID}_${FAMILY_ID}.ped | tail -n 1 | cut -f 6` # create symlinks for problematic filenames - for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_1_*_1.fastq.gz` + mkdir $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE + for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*$SAMPLE*/*_1_*_1.fastq.gz` do - newname=`echo $FILE | sed -e 's/_1_/_one_/'` - ln -s $FILE ${newname%1.fastq.gz}R1.fastq.gz + newname=`basename $FILE | sed -e 's/_1_/_one_/'` + ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz done - for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_1_*_2.fastq.gz` + for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*$SAMPLE*/*_1_*_2.fastq.gz` do - newname=`echo $FILE | sed -e 's/_1_/_one_/'` - ln -s $FILE ${newname%2.fastq.gz}R2.fastq.gz + newname=`basename $FILE | sed -e 's/_1_/_one_/'` + ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz done - for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_2_*_1.fastq.gz` + for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*$SAMPLE*/*_2_*_1.fastq.gz` do - newname=`echo $FILE | sed -e 's/_2_/_two_/'` - ln -s $FILE ${newname%1.fastq.gz}R1.fastq.gz + newname=`basename $FILE | sed -e 's/_2_/_two_/'` + ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz done - for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_2_*_2.fastq.gz` + for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*$SAMPLE*/*_2_*_2.fastq.gz` do - newname=`echo $FILE | sed -e 's/_2_/_two_/'` - ln -s $FILE ${newname%2.fastq.gz}R2.fastq.gz + newname=`basename $FILE | sed -e 's/_2_/_two_/'` + ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz done - for FILE in `ls $DOWNLOAD_DIR/$PROJECT_ID/*/*${SAMPLE}*/*_R[1,2].fastq.gz` + for FILE in `ls $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/*_R[1,2].fastq.gz` do echo "$FILE,$SAMPLE,$FAMILY_ID,$SEX,$PHENOTYPE,$TARGET" >> ${VERSION}_${PROJECT_ID}_${FAMILY_ID}.csv done @@ -111,3 +112,6 @@ do rm -r ${VERSION}_${PROJECT_ID}_${FAMILY_ID} done + +# clean up symlinks temporary folder +rm -r $READS_DIR/$PROJECT_ID/symlinks