Skip to content
Snippets Groups Projects
Commit ce1b6a18 authored by ameyner2's avatar ameyner2
Browse files

Initial commit of CRAM compression script and addition of reference genome path to config file

parent 796251ce
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
#PBS -l walltime=48:00:00
#PBS -l ncpus=16,mem=8gb
#PBS -q sgp
#PBS -N trio_whole_exome_cram_compression
#PBS -j oe
# enable running singletons
if [ -z $PBS_ARRAY_INDEX ]
then
if [ -z $INDEX ]
then
export PBS_ARRAY_INDX=1
else
export PBS_ARRAY_INDEX=$INDEX
fi
fi
# Expects environment variables to be set
# PROJECT_ID - e.g. 12345_LastnameFirstname
# CONFIG_SH - absolute path to configuration script setting environment variables
source $CONFIG_SH
FAMILY_ID=`head -n $PBS_ARRAY_INDEX $PARAMS_DIR/$PROJECT_ID.family_ids.txt | tail -n 1`
# This assumes that ${PROJECT_ID}_${FAMILY_ID} is unique, and it should be - if there was
# a re-run of a family, it should have a new project id.
cd $OUTPUT_DIR/*${PROJECT_ID}_${FAMILY_ID}*
for BAM in */*.bam
do
# 1. Compress to CRAM format without quality score binning
CRAM=${BAM%.bam}.cram
samtools view -@ 16 -T $REFERENCE_GENOME -C -o $CRAM $BAM
# 2. Index the CRAM file - good sanity check
samtools index $CRAM
# 3. Compare the stats from the BAM and CRAM files
samtools flagstat $BAM > $BAM.flagstat.txt
samtools flagstat $CRAM > $CRAM.flagstat.txt
diff $BAM.flagstat.txt $CRAM.flagstat.txt
done
......@@ -7,6 +7,7 @@ SCRIPTS=/home/u027/project/scripts
BCBIO_TEMPLATE=$SCRIPTS/trio_whole_exome_bcbio_template.yaml
TARGET=/home/u027/project/resources/Twist_Exome_Target_hg38.bed
DOWNLOAD_DIR=/scratch/u027/project/trio_whole_exome/data
REFERENCE_GENOME=/home/u027/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa
BASE=/scratch/u027/project/trio_whole_exome/analysis
PARAMS_DIR=$BASE/params
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment