diff --git a/submit_trio_wes_cram_compression.sh b/submit_trio_wes_cram_compression.sh new file mode 100755 index 0000000000000000000000000000000000000000..5843f06092c1b0664f5c405036355fe93715f1f0 --- /dev/null +++ b/submit_trio_wes_cram_compression.sh @@ -0,0 +1,46 @@ +#!/bin/bash +#PBS -l walltime=48:00:00 +#PBS -l ncpus=16,mem=8gb +#PBS -q sgp +#PBS -N trio_whole_exome_cram_compression +#PBS -j oe + +# enable running singletons +if [ -z $PBS_ARRAY_INDEX ] +then + if [ -z $INDEX ] + then + export PBS_ARRAY_INDX=1 + else + export PBS_ARRAY_INDEX=$INDEX + fi +fi + +# Expects environment variables to be set +# PROJECT_ID - e.g. 12345_LastnameFirstname +# CONFIG_SH - absolute path to configuration script setting environment variables + +source $CONFIG_SH + +FAMILY_ID=`head -n $PBS_ARRAY_INDEX $PARAMS_DIR/$PROJECT_ID.family_ids.txt | tail -n 1` + +# This assumes that ${PROJECT_ID}_${FAMILY_ID} is unique, and it should be - if there was +# a re-run of a family, it should have a new project id. +cd $OUTPUT_DIR/*${PROJECT_ID}_${FAMILY_ID}* + +for BAM in */*.bam +do + + # 1. Compress to CRAM format without quality score binning + CRAM=${BAM%.bam}.cram + samtools view -@ 16 -T $REFERENCE_GENOME -C -o $CRAM $BAM + + #Â 2. Index the CRAM file - good sanity check + samtools index $CRAM + + # 3. Compare the stats from the BAM and CRAM files + samtools flagstat $BAM > $BAM.flagstat.txt + samtools flagstat $CRAM > $CRAM.flagstat.txt + diff $BAM.flagstat.txt $CRAM.flagstat.txt + +done diff --git a/trio_whole_exome_config.sh b/trio_whole_exome_config.sh index bc0b78a8fd3b9857e2b220637d8408fcc11a5ca2..46e297803b51ce543949f7bc7bfc59db75fd512d 100644 --- a/trio_whole_exome_config.sh +++ b/trio_whole_exome_config.sh @@ -7,6 +7,7 @@ SCRIPTS=/home/u027/project/scripts BCBIO_TEMPLATE=$SCRIPTS/trio_whole_exome_bcbio_template.yaml TARGET=/home/u027/project/resources/Twist_Exome_Target_hg38.bed DOWNLOAD_DIR=/scratch/u027/project/trio_whole_exome/data +REFERENCE_GENOME=/home/u027/project/software/bcbio/genomes/Hsapiens/hg38/seq/hg38.fa BASE=/scratch/u027/project/trio_whole_exome/analysis PARAMS_DIR=$BASE/params