From a04a47a7ab78fe79a9dcc1c2834f2277cb6a1d56 Mon Sep 17 00:00:00 2001 From: user name <kdonnel2@sdf-cs1.eidf.epcc.ed.ac.uk> Date: Tue, 12 Nov 2024 11:02:27 +0000 Subject: [PATCH] Added script to tests/scripts directory to test stochasticity of bwa-mem --- tests/scripts/bwa_stochasticity_check.sh | 45 ++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/scripts/bwa_stochasticity_check.sh diff --git a/tests/scripts/bwa_stochasticity_check.sh b/tests/scripts/bwa_stochasticity_check.sh new file mode 100644 index 0000000..441d29f --- /dev/null +++ b/tests/scripts/bwa_stochasticity_check.sh @@ -0,0 +1,45 @@ +#!/bin/bash +#SBATCH --cpus-per-task=16 +#SBATCH --mem=72GB +#SBATCH --time=24:00:00 +#SBATCH --job-name=bwa_test +#SBATCH --output=bwa_test.out +#SBATCH --error=bwa_test.err + +BWA=/home/u035/u035/shared/software/bcbio/anaconda/bin/bwa +SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools + +#Point to appropriate bwa index +#In the pipeline, a softlink to this would be provided in the working directory +INDEX=`find -L ./ -name "*.amb" | sed 's/\.amb$//'` + +#Run bwa on identical fastq input three times +#These example input files are from 20240902_Ansari_Morad +#We are picking them up from the pipeline immediately after outpu by fastp + +for i in $(seq 1 3) +do +$BWA mem \ + -R '@RG\tID:158063\tPL:illumina\tPU:158063\tSM:158063' -c 250 -M \ + -t 16 \ + $INDEX \ + subset_158063_1.fastq.gz subset_158063_2.fastq.gz \ + | $SAMTOOLS view --threads 16 -o "${i}_158063_.bam" - +done + +#We expect the headers to differ +#To be confident, let's compare headerless sam +for i in $(seq 1 3) +do +$SAMTOOLS view "${i}_158063_.bam" > "${i}_158063_.sam" +done + +#Finally, generate md5 checksums for each sam +#These should be identical +for i in $(seq 1 3) +do +md5sum "${i}_158063_.sam" > "${i}_158063_.sam.md5" +done + + + -- GitLab