diff --git a/add_samples_from_previous_CRF_runs.sh b/add_samples_from_previous_CRF_runs.sh new file mode 100755 index 0000000000000000000000000000000000000000..3f7ddc865a77aacaa2541178b5fd25eb5242d122 --- /dev/null +++ b/add_samples_from_previous_CRF_runs.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# +# add_samples_from_previous_runs.sh <config.sh> <project_id> <version> <samples> +# +# + +CONFIG_SH=$1 +PROJECT_ID=$2 +VERSION=$3 +SAMPLES=$4 + +source $CONFIG_SH + +cd $PARAMS_DIR + +# create reads directory for project and symlink directory underneath +mkdir -p $READS_DIR/$PROJECT_ID/symlinks + +SHORT_PROJECT_ID=`echo $PROJECT_ID | cut -f 1 -d '_'` + +N=`wc -l $SAMPLES | awk '{ print $1 }'` + +for ((i = 1; i <= $N; i = i + 1)) +do + FAMILY_ID=`head -n $i $SAMPLES | tail -n 1 | cut -f 1` + SAMPLE=`head -n $i $SAMPLES | tail -n 1 | cut -f 2` + SEX=`head -n $i $SAMPLES | tail -n 1 | cut -f 3` + PHENOTYPE=`head -n $i $SAMPLES | tail -n 1 | cut -f 4` + ORIGINAL_PROJECT_ID=`head -n $i $SAMPLES | tail -n 1 | cut -f 5` + ORIGINAL_SAMPLE_ID=`head -n $i $SAMPLES | tail -n 1 | cut -f 6` + + PREFIX=${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID} + + for FILE in `ls $DOWNLOAD_DIR/$ORIGINAL_PROJECT_ID/*${ORIGINAL_SAMPLE_ID}*.gz` + do + echo "$FILE,$SAMPLE,$FAMILY_ID,$SEX,$PHENOTYPE,$TARGET" >> $PREFIX.csv + done + +done diff --git a/add_samples_from_previous_EdGe_runs.sh b/add_samples_from_previous_EdGe_runs.sh new file mode 100755 index 0000000000000000000000000000000000000000..745e20ba13cdb244e0b81ffcbc1fc572a49f6741 --- /dev/null +++ b/add_samples_from_previous_EdGe_runs.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# +# add_samples_from_previous_runs.sh <config.sh> <project_id> <version> <samples> +# +# + +CONFIG_SH=$1 +PROJECT_ID=$2 +VERSION=$3 +SAMPLES=$4 + +source $CONFIG_SH + +cd $PARAMS_DIR + +# create reads directory for project and symlink directory underneath +mkdir -p $READS_DIR/$PROJECT_ID/symlinks + +SHORT_PROJECT_ID=`echo $PROJECT_ID | cut -f 1 -d '_'` + +N=`wc -l $SAMPLES | awk '{ print $1 }'` + +for ((i = 1; i <= $N; i = i + 1)) +do + FAMILY_ID=`head -n $i $SAMPLES | tail -n 1 | cut -f 1` + SAMPLE=`head -n $i $SAMPLES | tail -n 1 | cut -f 2` + SEX=`head -n $i $SAMPLES | tail -n 1 | cut -f 3` + PHENOTYPE=`head -n $i $SAMPLES | tail -n 1 | cut -f 4` + ORIGINAL_PROJECT_ID=`head -n $i $SAMPLES | tail -n 1 | cut -f 5` + ORIGINAL_SAMPLE_ID=`head -n $i $SAMPLES | tail -n 1 | cut -f 6` + + PREFIX=${SHORT_PROJECT_ID}_${VERSION}_${FAMILY_ID} + + # create symlinks for problematic filenames + mkdir -p $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE + for FILE in `ls $DOWNLOAD_DIR/$ORIGINAL_PROJECT_ID/*/*/*$ORIGINAL_SAMPLE_ID*/*_1_*_1.fastq.gz` + do + newname=`basename $FILE | sed -e 's/_1_/_one_/'` + ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz + done + for FILE in `ls $DOWNLOAD_DIR/$ORIGINAL_PROJECT_ID/*/*/*$ORIGINAL_SAMPLE_ID*/*_1_*_2.fastq.gz` + do + newname=`basename $FILE | sed -e 's/_1_/_one_/'` + ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz + done + for FILE in `ls $DOWNLOAD_DIR/$ORIGINAL_PROJECT_ID/*/*/*$ORIGINAL_SAMPLE_ID*/*_2_*_1.fastq.gz` + do + newname=`basename $FILE | sed -e 's/_2_/_two_/'` + ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%1.fastq.gz}R1.fastq.gz + done + for FILE in `ls $DOWNLOAD_DIR/$ORIGINAL_PROJECT_ID/*/*/*$ORIGINAL_SAMPLE_ID*/*_2_*_2.fastq.gz` + do + newname=`basename $FILE | sed -e 's/_2_/_two_/'` + ln -s $FILE $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/${newname%2.fastq.gz}R2.fastq.gz + done + +# for FILE in `ls $READS_DIR/$PROJECT_ID/symlinks/$SAMPLE/*_R[1,2].fastq.gz` +# do +# echo "$FILE,$SAMPLE,$FAMILY_ID,$SEX,$PHENOTYPE,$TARGET" >> ${PREFIX}.csv +# done + +done