diff --git a/tests/scripts/bwa_stochasticity_check.sh b/tests/scripts/bwa_stochasticity_check.sh new file mode 100644 index 0000000000000000000000000000000000000000..441d29f1bccc91952d18e814c221a1dec99cb5e1 --- /dev/null +++ b/tests/scripts/bwa_stochasticity_check.sh @@ -0,0 +1,45 @@ +#!/bin/bash +#SBATCH --cpus-per-task=16 +#SBATCH --mem=72GB +#SBATCH --time=24:00:00 +#SBATCH --job-name=bwa_test +#SBATCH --output=bwa_test.out +#SBATCH --error=bwa_test.err + +BWA=/home/u035/u035/shared/software/bcbio/anaconda/bin/bwa +SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools + +#Point to appropriate bwa index +#In the pipeline, a softlink to this would be provided in the working directory +INDEX=`find -L ./ -name "*.amb" | sed 's/\.amb$//'` + +#Run bwa on identical fastq input three times +#These example input files are from 20240902_Ansari_Morad +#We are picking them up from the pipeline immediately after outpu by fastp + +for i in $(seq 1 3) +do +$BWA mem \ + -R '@RG\tID:158063\tPL:illumina\tPU:158063\tSM:158063' -c 250 -M \ + -t 16 \ + $INDEX \ + subset_158063_1.fastq.gz subset_158063_2.fastq.gz \ + | $SAMTOOLS view --threads 16 -o "${i}_158063_.bam" - +done + +#We expect the headers to differ +#To be confident, let's compare headerless sam +for i in $(seq 1 3) +do +$SAMTOOLS view "${i}_158063_.bam" > "${i}_158063_.sam" +done + +#Finally, generate md5 checksums for each sam +#These should be identical +for i in $(seq 1 3) +do +md5sum "${i}_158063_.sam" > "${i}_158063_.sam.md5" +done + + +