Skip to content
Snippets Groups Projects
Commit 012db0fb authored by user name's avatar user name
Browse files

Moved nextflow testing scripts to new bcbio vs nextflow directory

parent 1eb6a80b
No related branches found
No related tags found
No related merge requests found
Pipeline #48289 canceled
#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --mem=10GB
#SBATCH --time=48:00:00
#SBATCH --job-name=bqsr_test
#SBATCH --output=bqsr_test.out
#SBATCH --error=bqsr_test.err
SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools
#The outputs of bcbio and nf-core bqsr initially appear different
#However, this can be accounted for by arbitrary ordering of reads with identical coordinates
#They can be made identical by a sort operation - samtools sort is not suitable as they are already adequately sorted by samtools' definition
#First, convert to sam
#bcbio recal bam
$SAMTOOLS view 158063_519317-sort-recal.bcbio.bam > 158063_519317-sort-recal.bcbio.sam
#nf-core recal bam
$SAMTOOLS view 158063_519317-recal.nf-core.bam > 158063_519317-recal.nf-core.sam
#Sort the sam files using bash sort to confirm that they are indentical
sort 158063_519317-recal.nf-core.sort.sam > 158063_519317-recal.nf-core.bashsort.sam
sort 158063_519317-sort-recal.bcbio.sort.sam > 158063_519317-sort-recal.bcbio.bashsort.sam
#Generate md5sums - these should be identical
md5sum 158063_519317-recal.nf-core.bashsort.sam > 158063_519317-recal.nf-core.bashsort.sam.md5
md5sum 158063_519317-sort-recal.bcbio.bashsort.sam > 158063_519317-sort-recal.bcbio.bashsort.sam.md5
#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --mem=10GB
#SBATCH --time=24:00:00
#SBATCH --job-name=bwa_identity
#SBATCH --output=bwa_identity.out
#SBATCH --error=bwa_identity.err
SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools
#We know that running bwa mem multiple times on identical input produces identical output
#We also wish to test that the nextflow and bcbio pipeline produce identical bam files when provided with identical fastqs
#In this example, nextflow is run with the trimmed fastqs produced by the bcbio pipeline (via fastp)
#We now wish to test that the resulting bam files are identical following bamsormadup (sort and mark duplicates)
#Convert bams to sam - remove read group tags as naming convention differs slightly between pipelines
#bcbio bam
$SAMTOOLS view -x RG 158063_519317-sort.bam > 158063_519317-sort.noRG.sam
#nextflow bam
$SAMTOOLS view -x RG 158063.bam > 158063.noRG.sam
#Generate md5sums - these should be identical
md5sum 158063_519317-sort.noRG.sam > 158063_519317-sort.noRG.sam.md5
md5sum 158063.noRG.sam > 158063.noRG.sam.md5
#!/bin/bash
#SBATCH --cpus-per-task=16
#SBATCH --mem=72GB
#SBATCH --time=24:00:00
#SBATCH --job-name=bwa_test
#SBATCH --output=bwa_test.out
#SBATCH --error=bwa_test.err
BWA=/home/u035/u035/shared/software/bcbio/anaconda/bin/bwa
SAMTOOLS=/home/u035/u035/shared/software/bcbio/anaconda/bin/samtools
#Is bwa mem deterministic - i.e. will it produce the same alignment given identical commands and input
#The script below is designed to test this (the answer is 'yes')
#Point to appropriate bwa index
#In the pipeline, a softlink to this would be provided in the working directory
INDEX=`find -L ./ -name "*.amb" | sed 's/\.amb$//'`
#Run bwa on identical fastq input three times
#These example input files are from 20240902_Ansari_Morad
#We are picking them up from the pipeline immediately after outpu by fastp
for i in $(seq 1 3)
do
$BWA mem \
-R '@RG\tID:158063\tPL:illumina\tPU:158063\tSM:158063' -c 250 -M \
-t 16 \
$INDEX \
subset_158063_1.fastq.gz subset_158063_2.fastq.gz \
| $SAMTOOLS view --threads 16 -o "${i}_158063_.bam" -
done
#We expect the headers to differ
#To be confident, let's compare headerless sam
for i in $(seq 1 3)
do
$SAMTOOLS view "${i}_158063_.bam" > "${i}_158063_.sam"
done
#Finally, generate md5 checksums for each sam
#These should be identical
for i in $(seq 1 3)
do
md5sum "${i}_158063_.sam" > "${i}_158063_.sam.md5"
done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment