Skip to content
Snippets Groups Projects
add_samples_from_previous_EdGe_runs.sh 2.07 KiB
Newer Older
#!/bin/bash
#
# add_samples_from_previous_runs.sh <config.sh> <project_id> <version> <samples>
# 
#

CONFIG_SH=$1
PROJECT_ID=$2
VERSION=$3
SAMPLES=$4

source $CONFIG_SH

cd $PARAMS_DIR

# create reads directory for project and symlink directory underneath
mkdir -p $READS_DIR/$PROJECT_ID/symlinks

SHORT_PROJECT_ID=`echo $PROJECT_ID | cut -f 1 -d '_'`

N=`wc -l $SAMPLES | awk '{ print $1 }'`

for ((i = 1; i <= $N; i = i + 1))
do
  FAMILY_ID=`head -n $i $SAMPLES | tail -n 1 | cut -f 1`
  SAMPLE=`head -n $i $SAMPLES | tail -n 1 | cut -f 2`
  SEX=`head -n $i $SAMPLES | tail -n 1 | cut -f 3`
  PHENOTYPE=`head -n $i $SAMPLES | tail -n 1 | cut -f 4`
  ORIGINAL_PROJECT_ID=`head -n $i $SAMPLES | tail -n 1 | cut -f 5`
  ORIGINAL_SAMPLE_ID=`head -n $i $SAMPLES | tail -n 1 | cut -f 6`
  
  PREFIX=${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID}
  
  # create symlinks for problematic filenames
  mkdir -p $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE
  for FILE in `ls $DOWNLOAD_DIR/$ORIGINAL_PROJECT_ID/*/*/*$ORIGINAL_SAMPLE_ID*/*_1_*_1.fastq.gz`
  do
    newname=`basename $FILE | sed -e 's/_1_/_one_/'`
    ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz
  done
  for FILE in `ls $DOWNLOAD_DIR/$ORIGINAL_PROJECT_ID/*/*/*$ORIGINAL_SAMPLE_ID*/*_1_*_2.fastq.gz`
  do
    newname=`basename $FILE | sed -e 's/_1_/_one_/'`
    ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz
  done
  for FILE in `ls $DOWNLOAD_DIR/$ORIGINAL_PROJECT_ID/*/*/*$ORIGINAL_SAMPLE_ID*/*_2_*_1.fastq.gz`
  do
    newname=`basename $FILE | sed -e 's/_2_/_two_/'`
    ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz
  done
  for FILE in `ls $DOWNLOAD_DIR/$ORIGINAL_PROJECT_ID/*/*/*$ORIGINAL_SAMPLE_ID*/*_2_*_2.fastq.gz`
  do
    newname=`basename $FILE | sed -e 's/_2_/_two_/'`
    ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz
  done

#  for FILE in `ls $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/*_R[1,2].fastq.gz`
#  do
#    echo "$FILE,$SAMPLE,$FAMILY_ID,$SEX,$PHENOTYPE,$TARGET" >> ${PREFIX}.csv
#  done

done