diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6ed459d7b786ecb8deb098b4a0053547afa6a9af..f6ee5ec797065681d96a0c8d8f445459d0d4939c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,6 +1,7 @@
+---
+
+image: trio-whole-exome:v1
+
 test:
   script:
-  - conda env create -p $PWD/conda
-  - conda activate $PWD/conda
-  - cd tests/ && ./run_tests.sh
-  - conda deactivate
+  - cd tests/ && ./run_stubs.sh
diff --git a/trio_whole_exome_bcbio_template.yaml b/assets/trio_whole_exome_bcbio_template.yaml
similarity index 85%
rename from trio_whole_exome_bcbio_template.yaml
rename to assets/trio_whole_exome_bcbio_template.yaml
index 8c8a98940d79514a66e551b3bf966a4179ae036c..24d6aaee3f0019c47c0ae00dc7778840e7034912 100644
--- a/trio_whole_exome_bcbio_template.yaml
+++ b/assets/trio_whole_exome_bcbio_template.yaml
@@ -19,4 +19,5 @@ details:
   analysis: variant2
   genome_build: hg38
 upload:
-  dir: /home/u035/u035/shared/results
+  # relative path will output locally to the bcbio run folder
+  dir: ./results 
diff --git a/G2P.pm b/bin/G2P.pm
similarity index 100%
rename from G2P.pm
rename to bin/G2P.pm
diff --git a/NHS_WES_check_PED_aff_probands.py b/bin/NHS_WES_check_PED_aff_probands.py
similarity index 100%
rename from NHS_WES_check_PED_aff_probands.py
rename to bin/NHS_WES_check_PED_aff_probands.py
diff --git a/NHS_WES_check_PED_quad.py b/bin/NHS_WES_check_PED_quad.py
similarity index 100%
rename from NHS_WES_check_PED_quad.py
rename to bin/NHS_WES_check_PED_quad.py
diff --git a/NHS_WES_extract_shared_vars.py b/bin/NHS_WES_extract_shared_vars.py
similarity index 100%
rename from NHS_WES_extract_shared_vars.py
rename to bin/NHS_WES_extract_shared_vars.py
diff --git a/NHS_WES_extract_trio_FAM_PRO_ID.py b/bin/NHS_WES_extract_trio_FAM_PRO_ID.py
similarity index 100%
rename from NHS_WES_extract_trio_FAM_PRO_ID.py
rename to bin/NHS_WES_extract_trio_FAM_PRO_ID.py
diff --git a/NHS_WES_filter_LQ_GT.py b/bin/NHS_WES_filter_LQ_GT.py
similarity index 100%
rename from NHS_WES_filter_LQ_GT.py
rename to bin/NHS_WES_filter_LQ_GT.py
diff --git a/NHS_WES_generate_DEC_IGV.py b/bin/NHS_WES_generate_DEC_IGV.py
similarity index 100%
rename from NHS_WES_generate_DEC_IGV.py
rename to bin/NHS_WES_generate_DEC_IGV.py
diff --git a/NHS_WES_generate_DEC_IGV.py.v1 b/bin/NHS_WES_generate_DEC_IGV.py.v1
similarity index 100%
rename from NHS_WES_generate_DEC_IGV.py.v1
rename to bin/NHS_WES_generate_DEC_IGV.py.v1
diff --git a/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans b/bin/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans
similarity index 100%
rename from NHS_WES_generate_DEC_IGV.py_wrong_gene_trans
rename to bin/NHS_WES_generate_DEC_IGV.py_wrong_gene_trans
diff --git a/NHS_WES_generate_DEC_IGV_aff_probands.py b/bin/NHS_WES_generate_DEC_IGV_aff_probands.py
similarity index 100%
rename from NHS_WES_generate_DEC_IGV_aff_probands.py
rename to bin/NHS_WES_generate_DEC_IGV_aff_probands.py
diff --git a/NHS_WES_generate_DEC_IGV_sib_from_quad.py b/bin/NHS_WES_generate_DEC_IGV_sib_from_quad.py
similarity index 100%
rename from NHS_WES_generate_DEC_IGV_sib_from_quad.py
rename to bin/NHS_WES_generate_DEC_IGV_sib_from_quad.py
diff --git a/NHS_WES_generate_DEC_IGV_trio_from_quad.py b/bin/NHS_WES_generate_DEC_IGV_trio_from_quad.py
similarity index 100%
rename from NHS_WES_generate_DEC_IGV_trio_from_quad.py
rename to bin/NHS_WES_generate_DEC_IGV_trio_from_quad.py
diff --git a/NHS_WES_generate_aff_sib_ped.py b/bin/NHS_WES_generate_aff_sib_ped.py
similarity index 100%
rename from NHS_WES_generate_aff_sib_ped.py
rename to bin/NHS_WES_generate_aff_sib_ped.py
diff --git a/NHS_WES_generate_coverage_result_file.py b/bin/NHS_WES_generate_coverage_result_file.py
similarity index 100%
rename from NHS_WES_generate_coverage_result_file.py
rename to bin/NHS_WES_generate_coverage_result_file.py
diff --git a/NHS_WES_generate_trio_VCF.py b/bin/NHS_WES_generate_trio_VCF.py
similarity index 100%
rename from NHS_WES_generate_trio_VCF.py
rename to bin/NHS_WES_generate_trio_VCF.py
diff --git a/NHS_WES_generate_trio_ped.py b/bin/NHS_WES_generate_trio_ped.py
similarity index 100%
rename from NHS_WES_generate_trio_ped.py
rename to bin/NHS_WES_generate_trio_ped.py
diff --git a/NHS_WES_trio_cram_setup.sh b/bin/NHS_WES_trio_cram_setup.sh
similarity index 100%
rename from NHS_WES_trio_cram_setup.sh
rename to bin/NHS_WES_trio_cram_setup.sh
diff --git a/NHS_WES_trio_delete_BAM.sh b/bin/NHS_WES_trio_delete_BAM.sh
similarity index 100%
rename from NHS_WES_trio_delete_BAM.sh
rename to bin/NHS_WES_trio_delete_BAM.sh
diff --git a/NHS_WES_trio_setup.sh b/bin/NHS_WES_trio_setup.sh
similarity index 100%
rename from NHS_WES_trio_setup.sh
rename to bin/NHS_WES_trio_setup.sh
diff --git a/add_plate_and_family_id_to_ped.pl b/bin/add_plate_and_family_id_to_ped.pl
similarity index 100%
rename from add_plate_and_family_id_to_ped.pl
rename to bin/add_plate_and_family_id_to_ped.pl
diff --git a/add_samples_from_previous_CRF_runs.sh b/bin/add_samples_from_previous_CRF_runs.sh
similarity index 100%
rename from add_samples_from_previous_CRF_runs.sh
rename to bin/add_samples_from_previous_CRF_runs.sh
diff --git a/add_samples_from_previous_EdGe_runs.sh b/bin/add_samples_from_previous_EdGe_runs.sh
similarity index 100%
rename from add_samples_from_previous_EdGe_runs.sh
rename to bin/add_samples_from_previous_EdGe_runs.sh
diff --git a/bcbio_gnomad_install.sh b/bin/bcbio_gnomad_install.sh
similarity index 100%
rename from bcbio_gnomad_install.sh
rename to bin/bcbio_gnomad_install.sh
diff --git a/check_quad_PED.py b/bin/check_quad_PED.py
similarity index 100%
rename from check_quad_PED.py
rename to bin/check_quad_PED.py
diff --git a/check_shared_PED.py b/bin/check_shared_PED.py
similarity index 100%
rename from check_shared_PED.py
rename to bin/check_shared_PED.py
diff --git a/compare_indi_vars_by_version.py b/bin/compare_indi_vars_by_version.py
similarity index 100%
rename from compare_indi_vars_by_version.py
rename to bin/compare_indi_vars_by_version.py
diff --git a/convert_DEC_to_v10.py b/bin/convert_DEC_to_v10.py
similarity index 100%
rename from convert_DEC_to_v10.py
rename to bin/convert_DEC_to_v10.py
diff --git a/decipher_NHS_WES_trio.sh b/bin/decipher_NHS_WES_trio.sh
similarity index 100%
rename from decipher_NHS_WES_trio.sh
rename to bin/decipher_NHS_WES_trio.sh
diff --git a/delete_BAM.sh b/bin/delete_BAM.sh
similarity index 100%
rename from delete_BAM.sh
rename to bin/delete_BAM.sh
diff --git a/downstream_setup.sh b/bin/downstream_setup.sh
similarity index 100%
rename from downstream_setup.sh
rename to bin/downstream_setup.sh
diff --git a/extract_BED_CCDS_DDG2P.py b/bin/extract_BED_CCDS_DDG2P.py
similarity index 100%
rename from extract_BED_CCDS_DDG2P.py
rename to bin/extract_BED_CCDS_DDG2P.py
diff --git a/extract_solo_FAM_PRO_ID.py b/bin/extract_solo_FAM_PRO_ID.py
similarity index 100%
rename from extract_solo_FAM_PRO_ID.py
rename to bin/extract_solo_FAM_PRO_ID.py
diff --git a/extract_trio_FAM_PRO_ID.py b/bin/extract_trio_FAM_PRO_ID.py
similarity index 100%
rename from extract_trio_FAM_PRO_ID.py
rename to bin/extract_trio_FAM_PRO_ID.py
diff --git a/filter_LQ_GT.py b/bin/filter_LQ_GT.py
similarity index 100%
rename from filter_LQ_GT.py
rename to bin/filter_LQ_GT.py
diff --git a/full_process_NHS_WES_trio.sh b/bin/full_process_NHS_WES_trio.sh
similarity index 100%
rename from full_process_NHS_WES_trio.sh
rename to bin/full_process_NHS_WES_trio.sh
diff --git a/gather_NHS_WES_aff_probands_results.sh b/bin/gather_NHS_WES_aff_probands_results.sh
similarity index 100%
rename from gather_NHS_WES_aff_probands_results.sh
rename to bin/gather_NHS_WES_aff_probands_results.sh
diff --git a/gather_NHS_WES_quad_results.sh b/bin/gather_NHS_WES_quad_results.sh
similarity index 100%
rename from gather_NHS_WES_quad_results.sh
rename to bin/gather_NHS_WES_quad_results.sh
diff --git a/gather_NHS_WES_trio_results.sh b/bin/gather_NHS_WES_trio_results.sh
similarity index 100%
rename from gather_NHS_WES_trio_results.sh
rename to bin/gather_NHS_WES_trio_results.sh
diff --git a/gather_quad_results.sh b/bin/gather_quad_results.sh
similarity index 100%
rename from gather_quad_results.sh
rename to bin/gather_quad_results.sh
diff --git a/gather_shared_results.sh b/bin/gather_shared_results.sh
similarity index 100%
rename from gather_shared_results.sh
rename to bin/gather_shared_results.sh
diff --git a/gather_solo_results.sh b/bin/gather_solo_results.sh
similarity index 100%
rename from gather_solo_results.sh
rename to bin/gather_solo_results.sh
diff --git a/gather_trio_results.sh b/bin/gather_trio_results.sh
similarity index 100%
rename from gather_trio_results.sh
rename to bin/gather_trio_results.sh
diff --git a/generate_DEC_IGV.py b/bin/generate_DEC_IGV.py
similarity index 100%
rename from generate_DEC_IGV.py
rename to bin/generate_DEC_IGV.py
diff --git a/generate_DEC_IGV_aff_sib_scripts_from_quad.py b/bin/generate_DEC_IGV_aff_sib_scripts_from_quad.py
similarity index 100%
rename from generate_DEC_IGV_aff_sib_scripts_from_quad.py
rename to bin/generate_DEC_IGV_aff_sib_scripts_from_quad.py
diff --git a/generate_DEC_IGV_scripts.py b/bin/generate_DEC_IGV_scripts.py
similarity index 100%
rename from generate_DEC_IGV_scripts.py
rename to bin/generate_DEC_IGV_scripts.py
diff --git a/generate_DEC_IGV_shared_scripts.py b/bin/generate_DEC_IGV_shared_scripts.py
similarity index 100%
rename from generate_DEC_IGV_shared_scripts.py
rename to bin/generate_DEC_IGV_shared_scripts.py
diff --git a/generate_DEC_IGV_solo_scripts.py b/bin/generate_DEC_IGV_solo_scripts.py
similarity index 100%
rename from generate_DEC_IGV_solo_scripts.py
rename to bin/generate_DEC_IGV_solo_scripts.py
diff --git a/generate_DEC_IGV_trio_scripts_from_quad.py b/bin/generate_DEC_IGV_trio_scripts_from_quad.py
similarity index 100%
rename from generate_DEC_IGV_trio_scripts_from_quad.py
rename to bin/generate_DEC_IGV_trio_scripts_from_quad.py
diff --git a/generate_G2P_out_VCF.py b/bin/generate_G2P_out_VCF.py
similarity index 100%
rename from generate_G2P_out_VCF.py
rename to bin/generate_G2P_out_VCF.py
diff --git a/generate_aff_sib_PED_from_quad.py b/bin/generate_aff_sib_PED_from_quad.py
similarity index 100%
rename from generate_aff_sib_PED_from_quad.py
rename to bin/generate_aff_sib_PED_from_quad.py
diff --git a/generate_coverage_result_file.py b/bin/generate_coverage_result_file.py
similarity index 100%
rename from generate_coverage_result_file.py
rename to bin/generate_coverage_result_file.py
diff --git a/generate_trio_PED_from_quad.py b/bin/generate_trio_PED_from_quad.py
similarity index 100%
rename from generate_trio_PED_from_quad.py
rename to bin/generate_trio_PED_from_quad.py
diff --git a/get_cov_output.py b/bin/get_cov_output.py
similarity index 100%
rename from get_cov_output.py
rename to bin/get_cov_output.py
diff --git a/old_downstream_setup.sh b/bin/old_downstream_setup.sh
similarity index 100%
rename from old_downstream_setup.sh
rename to bin/old_downstream_setup.sh
diff --git a/old_submit_downstream.sh b/bin/old_submit_downstream.sh
similarity index 100%
rename from old_submit_downstream.sh
rename to bin/old_submit_downstream.sh
diff --git a/old_submit_trio_wes_aspera_download.sh b/bin/old_submit_trio_wes_aspera_download.sh
similarity index 100%
rename from old_submit_trio_wes_aspera_download.sh
rename to bin/old_submit_trio_wes_aspera_download.sh
diff --git a/trio_whole_exome_parse_peddy_ped_csv.pl b/bin/peddy_validation.pl
old mode 100644
new mode 100755
similarity index 78%
rename from trio_whole_exome_parse_peddy_ped_csv.pl
rename to bin/peddy_validation.pl
index 05e4d02085e235d092d7926321c3715b1adc8de1..02482134378b4249a992d7808995547e7a4c7814
--- a/trio_whole_exome_parse_peddy_ped_csv.pl
+++ b/bin/peddy_validation.pl
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-trio_whole_exome_parse_peddy_ped_csv.pl
+peddy_validation.pl
 
 =head1 AUTHOR
 
@@ -16,6 +16,7 @@ Checks the parent-child and parent-parent relationships from peddy output.
 
 use strict;
 
+use Cwd;
 use Getopt::Long;
 use IO::File;
 
@@ -35,7 +36,6 @@ my $fam_dir;
 my $project_id;
 my $version;
 my $out_file;
-my $batch_id;
 
 GetOptions(
     'help'       => \$help,
@@ -43,11 +43,10 @@ GetOptions(
     'ped=s'      => \$ped_file,
     'output=s'   => \$out_file,
     'families=s' => \$fam_dir,
-    'version=s'  => \$version,
-    'batch=s'    => \$batch_id
+    'version=s'  => \$version
 ) or die $usage;
 
-if ($help || !$project_id || !$ped_file || !$out_file || !$batch_id || !$version || !$fam_dir)
+if ($help || !$project_id || !$ped_file || !$out_file || !$version || !$fam_dir)
 {
     print $usage;
     exit(0);
@@ -76,16 +75,16 @@ $in_fh->close();
 my $out_fh = new IO::File;
 $out_fh->open($out_file, "w") or die "Could not open $out_file\n$!";
 
-printf $out_fh "project_id\tbatch_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
+printf $out_fh "project_id\tsample_a\tsample_b\tpedigree_parents\tpredicted_parents\tparent_error\n";
 
 foreach my $family_id (sort keys %ped)
 {
-	my @peddy_glob = glob(sprintf("$fam_dir/*_%s_%s_%s_%s/%s_%s/qc/peddy/%s%s.ped_check.csv", 
-	        $project_id, $version, $batch_id, $family_id, $ped{$family_id}{'aff'}, $family_id, $batch_id, $family_id));
-	next if (scalar(@peddy_glob) == 0);
+    my $glob_str = sprintf("$fam_dir/*%s/%s/qc/peddy/*.ped_check.csv", $family_id, $ped{$family_id}{'aff'});
+    my @peddy_glob = glob($glob_str);
+    next if (scalar(@peddy_glob) == 0);
 
-	my $peddy_fh = new IO::File;
-	$peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
+    my $peddy_fh = new IO::File;
+    $peddy_fh->open($peddy_glob[0], "r") or die "Could not open $peddy_glob[0]\n$!";
 
 	my @headers;
 	my %info;
@@ -129,7 +128,7 @@ foreach my $family_id (sort keys %ped)
 
 		$info{'parent_error'}{$sample_pair} = $info{'pedigree_parents'}{$sample_pair} eq $info{'predicted_parents'}{$sample_pair} ? 'False' : 'True';
 
-		printf $out_fh "$project_id\t$batch_id\t$sample_pair\t%s\t%s\t%s\n", 
+		printf $out_fh "$project_id\t$sample_pair\t%s\t%s\t%s\n", 
 		    $info{'pedigree_parents'}{$sample_pair}, 
 		    $info{'predicted_parents'}{$sample_pair},
 		    $info{'parent_error'}{$sample_pair};
@@ -137,5 +136,3 @@ foreach my $family_id (sort keys %ped)
 }
 
 $out_fh->close();
-
-
diff --git a/process_NHS_WES_aff_probands.sh b/bin/process_NHS_WES_aff_probands.sh
similarity index 100%
rename from process_NHS_WES_aff_probands.sh
rename to bin/process_NHS_WES_aff_probands.sh
diff --git a/process_NHS_WES_quad.sh b/bin/process_NHS_WES_quad.sh
similarity index 100%
rename from process_NHS_WES_quad.sh
rename to bin/process_NHS_WES_quad.sh
diff --git a/process_NHS_WES_quad_full.sh b/bin/process_NHS_WES_quad_full.sh
similarity index 100%
rename from process_NHS_WES_quad_full.sh
rename to bin/process_NHS_WES_quad_full.sh
diff --git a/process_NHS_WES_trio.sh b/bin/process_NHS_WES_trio.sh
similarity index 100%
rename from process_NHS_WES_trio.sh
rename to bin/process_NHS_WES_trio.sh
diff --git a/process_NHS_WES_trio_before_BAMOUT.sh b/bin/process_NHS_WES_trio_before_BAMOUT.sh
similarity index 100%
rename from process_NHS_WES_trio_before_BAMOUT.sh
rename to bin/process_NHS_WES_trio_before_BAMOUT.sh
diff --git a/process_quad.sh b/bin/process_quad.sh
similarity index 100%
rename from process_quad.sh
rename to bin/process_quad.sh
diff --git a/process_shared.sh b/bin/process_shared.sh
similarity index 100%
rename from process_shared.sh
rename to bin/process_shared.sh
diff --git a/process_solo.sh b/bin/process_solo.sh
similarity index 100%
rename from process_solo.sh
rename to bin/process_solo.sh
diff --git a/process_trio.sh b/bin/process_trio.sh
similarity index 100%
rename from process_trio.sh
rename to bin/process_trio.sh
diff --git a/processing_setup.sh b/bin/processing_setup.sh
similarity index 100%
rename from processing_setup.sh
rename to bin/processing_setup.sh
diff --git a/reanalysis_preparation.sh b/bin/reanalysis_preparation.sh
similarity index 100%
rename from reanalysis_preparation.sh
rename to bin/reanalysis_preparation.sh
diff --git a/run_processing.sh b/bin/run_processing.sh
similarity index 100%
rename from run_processing.sh
rename to bin/run_processing.sh
diff --git a/submit_depth_of_coverage_MQ20_BQ20.sh b/bin/submit_depth_of_coverage_MQ20_BQ20.sh
similarity index 100%
rename from submit_depth_of_coverage_MQ20_BQ20.sh
rename to bin/submit_depth_of_coverage_MQ20_BQ20.sh
diff --git a/submit_downstream.sh b/bin/submit_downstream.sh
similarity index 100%
rename from submit_downstream.sh
rename to bin/submit_downstream.sh
diff --git a/submit_trio_wes_aspera_download.sh b/bin/submit_trio_wes_aspera_download.sh
similarity index 100%
rename from submit_trio_wes_aspera_download.sh
rename to bin/submit_trio_wes_aspera_download.sh
diff --git a/submit_trio_wes_bcbio.sh b/bin/submit_trio_wes_bcbio.sh
similarity index 100%
rename from submit_trio_wes_bcbio.sh
rename to bin/submit_trio_wes_bcbio.sh
diff --git a/submit_trio_wes_cram_compression.sh b/bin/submit_trio_wes_cram_compression.sh
similarity index 100%
rename from submit_trio_wes_cram_compression.sh
rename to bin/submit_trio_wes_cram_compression.sh
diff --git a/submit_trio_wes_family_checksums.sh b/bin/submit_trio_wes_family_checksums.sh
similarity index 100%
rename from submit_trio_wes_family_checksums.sh
rename to bin/submit_trio_wes_family_checksums.sh
diff --git a/submit_trio_wes_lftp_download.sh b/bin/submit_trio_wes_lftp_download.sh
similarity index 100%
rename from submit_trio_wes_lftp_download.sh
rename to bin/submit_trio_wes_lftp_download.sh
diff --git a/submit_trio_wes_project_checksums.sh b/bin/submit_trio_wes_project_checksums.sh
similarity index 100%
rename from submit_trio_wes_project_checksums.sh
rename to bin/submit_trio_wes_project_checksums.sh
diff --git a/submit_trio_wes_wget_download.sh b/bin/submit_trio_wes_wget_download.sh
similarity index 100%
rename from submit_trio_wes_wget_download.sh
rename to bin/submit_trio_wes_wget_download.sh
diff --git a/test_process_NHS_WES_trio.sh b/bin/test_process_NHS_WES_trio.sh
similarity index 100%
rename from test_process_NHS_WES_trio.sh
rename to bin/test_process_NHS_WES_trio.sh
diff --git a/test_run_processing.sh b/bin/test_run_processing.sh
similarity index 100%
rename from test_run_processing.sh
rename to bin/test_run_processing.sh
diff --git a/trio_cram_setup.sh b/bin/trio_cram_setup.sh
similarity index 100%
rename from trio_cram_setup.sh
rename to bin/trio_cram_setup.sh
diff --git a/trio_setup.sh b/bin/trio_setup.sh
similarity index 100%
rename from trio_setup.sh
rename to bin/trio_setup.sh
diff --git a/trio_wes_prepare_bcbio_config.sh b/bin/trio_wes_prepare_bcbio_config.sh
similarity index 100%
rename from trio_wes_prepare_bcbio_config.sh
rename to bin/trio_wes_prepare_bcbio_config.sh
diff --git a/trio_wes_prepare_bcbio_config_crf.sh b/bin/trio_wes_prepare_bcbio_config_crf.sh
similarity index 100%
rename from trio_wes_prepare_bcbio_config_crf.sh
rename to bin/trio_wes_prepare_bcbio_config_crf.sh
diff --git a/trio_wes_prepare_bcbio_config_singleton_from_duo.sh b/bin/trio_wes_prepare_bcbio_config_singleton_from_duo.sh
similarity index 100%
rename from trio_wes_prepare_bcbio_config_singleton_from_duo.sh
rename to bin/trio_wes_prepare_bcbio_config_singleton_from_duo.sh
diff --git a/trio_whole_exome_config.sh b/bin/trio_whole_exome_config.sh
similarity index 100%
rename from trio_whole_exome_config.sh
rename to bin/trio_whole_exome_config.sh
diff --git a/trio_whole_exome_create_parameter_files.pl b/bin/trio_whole_exome_create_parameter_files.pl
similarity index 100%
rename from trio_whole_exome_create_parameter_files.pl
rename to bin/trio_whole_exome_create_parameter_files.pl
diff --git a/main.nf b/main.nf
index 58404a7d07ba9e4286a326ad7cb4778d9672db66..f2b8c278b715a6658514e8eb80a7df741f264916 100644
--- a/main.nf
+++ b/main.nf
@@ -26,9 +26,6 @@ params.target_bed = null
 // hg38 reference genome in fasta format
 params.reference_genome = null
 
-// path to the parse_peddy_output Perl script. Todo: remove once scripts are in bin/
-params.parse_peddy_output = null
-
 // path to a Ped file describing all the families in the pipeline batch
 params.ped_file = null
 
@@ -44,4 +41,3 @@ workflow {
         exit 1, 'params.workflow required - variant-calling or variant-prioritisation'
     }
 }
-
diff --git a/nextflow.config b/nextflow.config
index d861ce32ec03ef7a18eff3d0e130a5f1832709af..752635b6125693fc39a154bd9edd8a2dca481efd 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,30 +1,104 @@
 
+params {
+    max_cpus = 16
+    max_mem = 32.GB
+    max_time = 48.h
+
+    min_cpus = 1
+    min_mem = 1.GB
+    min_time = 2.h
+
+    bcbio_template = "$projectDir/assets/trio_whole_exome_bcbio_template.yaml"
+}
+
+
+profiles {
+    standard {
+        process.executor = 'local'
+    }
+
+    debug {
+        process.echo = true
+    }
+
+    stubs {
+        process.executor = 'local'
+        params.max_cpus = 1
+        params.max_mem = 1.GB
+        params.max_time = 1.h
+
+        params.bcbio = "$projectDir/tests/scripts/bcbio_nextgen.py"
+        params.target_bed = "$projectDir/tests/assets/input_data/Twist_Exome_RefSeq_targets_hg38.plus15bp.bed"
+        params.reference_genome = "$projectDir/tests/assets/ref.fa"
+        params.output_dir = "$projectDir/tests/outputs"
+    }
+
+    slurm {
+        process.executor = 'slurm'
+    }
+
+    sge {
+        process.executor = 'sge'
+    }
+}
+
+
 process {
-    executor = 'slurm'
-    cpus = 4
-    memory = 8.GB
-    time = '6h'
+    cpus = get_cpus(4)
+    memory = get_mem(8.GB)
+    time = get_time(6.h)
 
-    withLabel: small {
+    withLabel: local {
         executor = 'local'
-        cpus = 2
-        memory = 2.GB
+    }
+
+    withLabel: small {
+        cpus = get_cpus(2)
+        memory = get_mem(2.GB)
     }
 
     withLabel: medium {
-        cpus = 4
-        memory = 8.GB
+        cpus = get_cpus(4)
+        memory = get_mem(8.GB)
     }
 
     withLabel: large {
-        cpus = 16
-        memory = 32.GB
+        cpus = get_cpus(16)
+        memory = get_mem(32.GB)
     }
-}
 
-profiles {
-    debug {
-        process.echo = true
+    withLabel: long {
+        time = get_time(48.h)
     }
 }
 
+
+def get_cpus(cpus) {
+    return Math.min(
+        params.max_cpus,
+        Math.max(
+            params.min_cpus,
+            cpus
+        )
+    )
+}
+
+def get_mem(mem) {
+    return Math.min(
+        params.max_mem.size,
+        Math.max(
+            params.min_mem.size,
+            mem.size
+        )
+    ) as nextflow.util.MemoryUnit
+}
+
+def get_time(time) {
+    return Math.min(
+        params.max_time.toMillis(),
+        Math.max(
+            params.min_time.toMillis(),
+            time.toMillis()
+        )
+    ) as nextflow.util.Duration
+}
diff --git a/pipeline/var_calling.nf b/pipeline/var_calling.nf
index 4ef7a4af39877a7e371967e949d11c771f6ed620..2e0210e4b90d4f55b707e379d2555ed9733d998b 100644
--- a/pipeline/var_calling.nf
+++ b/pipeline/var_calling.nf
@@ -37,13 +37,13 @@ process write_bcbio_csv {
 
     script:
     """
-    #!/usr/bin/env python
+    #!/usr/bin/env python3
     import os
 
     target_bed = os.path.realpath('${target_bed}')
     individual_info = '$individual_info'
     lines = individual_info.lstrip('[').rstrip(']').split('], [')
-    
+
     with open('${family_id}.csv', 'w') as f:
         f.write('samplename,description,batch,sex,phenotype,variant_regions\\n')
         for l in lines:
@@ -71,6 +71,38 @@ process bcbio_family_processing {
     cd ${family_id}-merged &&
     ../${bcbio}/anaconda/bin/bcbio_nextgen.py config/${family_id}-merged.yaml -n 16 -t local
     """
+
+    stub:
+    """
+    output_dir=${family_id}-merged/results
+    family_dir="${family_id}-merged/results/\$(date '+%Y-%m-%d')_${family_id}-merged"
+    mkdir -p \$family_dir
+    mkdir ${family_id}-merged/config
+    touch ${family_id}-merged/config/${family_id}-merged{.csv,.yaml,-template.yaml}
+    cd \$family_dir
+    touch "\$(echo ${family_id} | sed 's/_//g')-gatk-haplotype-annotated.vcf.gz{,.tbi}" bcbio-nextgen{,-commands}.log data_versions.csv
+    touch project-summary.yaml metadata.csv programs.txt
+    mkdir multiqc
+    touch list_files_final.txt multiqc_config.yaml multiqc_report.html
+    mkdir multiqc_data report
+    cd ..
+
+    for i in ${individuals.collect().join(' ')}
+    do
+        mkdir -p \$i/qc
+        cd \$i
+        touch \$i-{callable.bed,ready.bam,ready.bam.bai}
+        cd qc
+        mkdir contamination coverage fastqc peddy samtools variants
+        touch contamination/\$i-verifybamid.{out,selfSM}
+        touch coverage/cleaned-Twist_Exome_RefSeq_targets_hg38.plus15bp-merged-padded.bed
+        touch fastqc/{\$i.zip,fastqc_data.txt,fastqc_report.html}
+        touch peddy/{\$i.ped_check.csv,\$i.peddy.ped,\$i.sex_check.csv}
+        touch samtools/{\$i-idxstats.txt,\$i.txt}
+        touch variants/\${i}_bcftools_stats.txt
+        cd ../..
+    done
+    """
 }
 
 
@@ -96,6 +128,7 @@ process format_bcbio_individual_outputs {
         ln -s \$indv_input/\${i}-callable.bed \$indv_output/\${i}-callable.bed &&
         ln -s \$indv_input/qc \$indv_output/qc &&
 
+        # todo: make cram compression its own process
         bam=\$indv_input/\$i-ready.bam
         cram="\$indv_output/\$i-ready.cram" &&
         \$samtools view -@ ${task.cpus} -T ${reference_genome} -C -o \$cram \$bam &&
@@ -113,6 +146,29 @@ process format_bcbio_individual_outputs {
         fi
     done
     """
+
+    stub:
+    """
+    mkdir individual_outputs
+    for i in ${individuals.join(' ')}
+    do
+        indv_input=\$PWD/${bcbio_output_dir}/results/\$i
+        indv_output=individual_outputs/\$i &&
+        mkdir -p \$indv_output &&
+
+        ln -s \$indv_input/\${i}-callable.bed \$indv_output/\${i}-callable.bed &&
+        ln -s \$indv_input/qc \$indv_output/qc &&
+
+        bam=\$indv_input/\$i-ready.bam
+        cram="\$indv_output/\$i-ready.cram" &&
+        cp \$bam \$cram &&
+        touch \$cram.crai &&
+        bam_flagstat=./\$i-ready.bam.flagstat.txt &&
+        cram_flagstat=\$cram.flagstat.txt &&
+        touch \$bam_flagstat &&
+        touch \$cram_flagstat
+    done
+    """
 }
 
 
@@ -176,7 +232,6 @@ process collate_pipeline_outputs {
     path(ped_file)
     path(samplesheet)
     path(bcbio)
-    path(parse_peddy_output)
 
     output:
     path("${params.pipeline_project_id}_${params.pipeline_project_version}")
@@ -186,7 +241,7 @@ process collate_pipeline_outputs {
     outputs=${params.pipeline_project_id}_${params.pipeline_project_version}
     mkdir \$outputs &&
     mkdir \$outputs/{config,families,params,prioritization,qc} &&
-    
+
     for d in ${bcbio_family_output_dirs.join(' ')}
     do
         cp -rL \$d \$outputs/families/\$(basename \$d)
@@ -198,28 +253,72 @@ process collate_pipeline_outputs {
     done &&
 
     cd \$outputs/families &&
+
+    # todo: make multiqc its own process
     ../../${bcbio}/anaconda/bin/multiqc \
         --title "Trio whole exome QC report: ${params.pipeline_project_id}_${params.pipeline_project_version}" \
         --outdir ../qc \
         --filename ${params.pipeline_project_id}_${params.pipeline_project_version}_qc_report.html \
         . &&
-    
-    peddy_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
-    perl ../../${parse_peddy_output} \
-        --output \$peddy_output \
+
+    peddy_validation_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
+    peddy_validation.pl \
+        --output \$peddy_validation_output \
         --project ${params.pipeline_project_id} \
-        --batch ${bcbio_family_output_dirs[0].getName().split('_')[1]} \
         --version ${params.pipeline_project_version} \
         --ped ../../${ped_file} \
         --families . &&
 
     # no && here - exit status checked below
-    grep -v 'False\$' \$peddy_output
-    if [ \$? -ne 0 ]
-    then
-        echo "Found Peddy mismatches in \$peddy_output"
-        exit 1
-    fi &&
+#    grep -v 'False\$' \$peddy_validation_output
+#    if [ \$? -ne 0 ]
+#    then
+#        echo "Found Peddy mismatches in \$peddy_validation_output"
+#        exit 1
+#    fi &&
+
+    cd ../.. &&
+
+    for d in ${raw_bcbio_output_dirs.join(' ')}
+    do
+        family_id_merged=\$(basename \$d)
+        family_id=\$(echo \$family_id_merged | sed 's/-merged//') &&
+        dest_basename=${params.pipeline_project_id}_${params.pipeline_project_version}_\$family_id &&
+        cp -L \$d/config/\$family_id_merged.csv \$outputs/params/\$dest_basename.csv &&
+        cp -L \$d/config/\$family_id_merged.yaml \$outputs/config/\$dest_basename.yaml &&
+        cp -L ${ped_file} \$outputs/params/ &&
+        cp -L ${samplesheet} \$outputs/params/
+    done
+    """
+
+    stub:
+    """
+    outputs=${params.pipeline_project_id}_${params.pipeline_project_version}
+    mkdir \$outputs &&
+    mkdir \$outputs/{config,families,params,prioritization,qc} &&
+
+    for d in ${bcbio_family_output_dirs.join(' ')}
+    do
+        cp -rL \$d \$outputs/families/\$(basename \$d)
+    done &&
+
+    for f in ${family_ids.join(' ')}
+    do
+        grep \$f ${ped_file} > \$outputs/params/\$f.ped
+    done &&
+
+    cd \$outputs/families &&
+
+    # todo: make multiqc its own process
+    echo "Trio whole exome QC report: ${params.pipeline_project_id}_${params.pipeline_project_version}" > ../qc/${params.pipeline_project_id}_${params.pipeline_project_version}_qc_report.html &&
+
+    peddy_validation_output=../qc/${params.pipeline_project_id}_${params.pipeline_project_version}.ped_check.txt &&
+    peddy_validation.pl \
+        --output \$peddy_validation_output \
+        --project ${params.pipeline_project_id} \
+        --version ${params.pipeline_project_version} \
+        --ped ../../${ped_file} \
+        --families . &&
 
     cd ../.. &&
 
@@ -252,12 +351,11 @@ workflow process_families {
         ch_individuals
         ch_ped_file
         ch_samplesheet
-    
+
     main:
         ch_bcbio = file(params.bcbio, checkIfExists: true)
         ch_bcbio_template = file(params.bcbio_template, checkIfExists: true)
         ch_target_bed = file(params.target_bed, checkIfExists: true)
-        ch_parse_peddy_output = file(params.parse_peddy_output, checkIfExists: true)
         ch_reference_genome = file(params.reference_genome, checkIfExists: true)
 
         ch_merged_fastqs = merge_fastqs(
@@ -304,7 +402,7 @@ workflow process_families {
             ch_bcbio_family_outputs,
             ch_bcbio,
             ch_reference_genome
-            
+
         )
         ch_formatted_bcbio_outputs = format_bcbio_family_outputs(
             ch_bcbio_family_outputs.join(ch_individual_folders)
@@ -316,8 +414,7 @@ workflow process_families {
             ch_formatted_bcbio_outputs.map({it[2]}).collect(),
             ch_ped_file,
             ch_samplesheet,
-            ch_bcbio,
-            ch_parse_peddy_output
+            ch_bcbio
         )
 }
 
diff --git a/tests/Dockerfile b/tests/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..10aea40c248fea6ef5a417cc79b51d3742329a6f
--- /dev/null
+++ b/tests/Dockerfile
@@ -0,0 +1,13 @@
+# Docker image for continuous integration testing in GitLab CI with
+# Docker executor. The image derives from Alma Linux 8.5, and adds
+# NextFlow and basic dependencies (Java, Python, Perl, etc.) but not
+# bioinformatics tools - these should be mocked up in CI with stubs
+
+FROM almalinux:8.6
+
+WORKDIR /opt
+
+RUN dnf install -y java-11-openjdk python3 perl > dnf_install.log 2>&1
+RUN curl -L -o ./nextflow https://github.com/nextflow-io/nextflow/releases/download/v22.04.3/nextflow-22.04.3-all && chmod u+x nextflow
+ENV PATH /opt:$PATH
+ENTRYPOINT /bin/bash
diff --git a/tests/assets/bcbio/bcbio_template.yaml b/tests/assets/bcbio/bcbio_template.yaml
deleted file mode 100644
index c0dad7fbc7e8c7ededcf3f67236a4d79495468ad..0000000000000000000000000000000000000000
--- a/tests/assets/bcbio/bcbio_template.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-details:
-- algorithm:
-    platform: illumina
-    quality_format: standard
-    aligner: bwa
-    align_split_size: false
-    trim_reads: fastp
-    adapters: [nextera2, polyg]
-    mark_duplicates: true
-    realign: false
-    recalibrate: true
-    effects: vep
-    effects_transcripts: all
-    variantcaller: gatk-haplotype
-    indelcaller: false
-    remove_lcr: true
-    tools_on:
-    - vep_splicesite_annotations
-  analysis: variant2
-  genome_build: hg38
-upload:
-  dir: outputs/bcbio/results
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..85aba36b871d16431e0db24f23ff6176ce1b0a99 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9b0a2921dcf5633ec0e7daa7a9a1b7bbe55cdc67 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..722bf399a6088d7ac14dba5f1ce078e02d4c8d5e 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f083a1e2f8e715e26de70b342520a2711b53bf44 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..056b1546cf625a4ee69cc677cba1b514e16c9e20 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e5fae9a928745c0c690b8e66780eb15c949e2c6c 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3633eb9658db1a3a603bf1f67f2a4a5e3cf7cdc6 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6468cdc61ebfc7fd590251640a0e37e4c77e8c27 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..64fba04bf15e0427410ff8724fbcf2af1cd5cf60 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0b284f1103cfdb87453d4d61257be54d68ffa4a0 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ee2e900b9e3c381ed07d10928b3d1e2223ee0862 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9cbf91ea0630bd7a451f1407b95338a32809c203 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0dd5c42d17fd1270f4ad4ba511b5f707eb3e4db2 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..56de8ddd9cb573e84e900ec03d1ffa8c3be2bf24 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..638b479498358ea1cab7dfa6bf852e66ae2d61e4 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..fc3fd1c33127bd61216aff2435c048ad7da1f36b 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..baaa0fe5ed2157a21ffd191ecdf283625dd86a6c 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..02c0c019682846324511f6935cd3c7c7d0f742b3 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..254508d90010216816ed28b81bd3552f8e3fe7c7 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9fe2616c6e3849fa15eb459b0411af8e179fc60b 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_2.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/md5sums.txt b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/md5sums.txt
index 25e1b82de4ce7cfd878dbb3b55ff01c3754ebca9..3c29ce5c179783f894943003b6e0590761745844 100644
--- a/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/md5sums.txt
+++ b/tests/assets/input_data/edinburgh_genomics/X12345_A_Researcher/20210922/md5sums.txt
@@ -1,32 +1,40 @@
 d41d8cd98f00b204e9800998ecf8427e  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_1.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_1.fastq.gz
+ff8e9778919b20dd68feb85d98dd71c1  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_1.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_2.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_2.fastq.gz
+c29c49cb98fa142d55f15593db614bb1  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_1_00001AM0001L01_2.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_1.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_1.fastq.gz
+a8be92b4125079047231d908878b629b  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_1.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_2.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_2.fastq.gz
+fdfff34f486e4983ed7fcfb01a27b94b  12345_000001_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_2_00001AM0001L01_2.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_1.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_1.fastq.gz
+5ce6fd40ff99de36cc1349b10ed4e84d  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_1.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_2.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_2.fastq.gz
+a61eaa2e396dd876331cbc7d80e0a97e  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_3_00002AM0001L01_2.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_1.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_1.fastq.gz
+cc18d33f52a274ca942e5ab6658eb2ea  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_1.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_2.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_2.fastq.gz
+e194ff7dd2f6ccccb91924fe4ec5e42f  12345_000002_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_4_00002AM0001L01_2.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_1.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_1.fastq.gz
+df1a24a8a06148c630bc28e13e781afd  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_1.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_2.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_2.fastq.gz
+698fe14b092b458d55999bb93e03e5b7  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_5_00003AM0001L01_2.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_1.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_1.fastq.gz
+57ca2fbf2e23d090db7b259263ab7745  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_1.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_2.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_2.fastq.gz
+b5b39a73e5a795f95ec619d392beb396  12345_000003_000001_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_6_00003AM0001L01_2.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_1.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_1.fastq.gz
+21b06127503d2ab8b177d68c9a91721d  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_1.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_2.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_2.fastq.gz
+4392717bb3aa768a9eb0c04aef9c69bc  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_7_00004AM0001L01_2.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_1.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_1.fastq.gz
+9583a802f8adfe12b7231475af7ee0bd  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_1.fastq.gz
 d41d8cd98f00b204e9800998ecf8427e  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_2.fastq.count
-d41d8cd98f00b204e9800998ecf8427e  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_2.fastq.gz
+70cb93d77d74cd2c891987151a92f38a  12345_000004_000002_WESTwist_IDT-B/200922_A00001_0001_BHNTGMDMXX_8_00004AM0001L01_2.fastq.gz
+d41d8cd98f00b204e9800998ecf8427e  12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_1.fastq.count
+dc9b03bd11623d4849c51f2ca048a4fc  12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_1.fastq.gz
+d41d8cd98f00b204e9800998ecf8427e  12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_2.fastq.count
+df6f6f0def68997002213e94fedbe4c5  12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_1_00005AM0001L01_2.fastq.gz
+d41d8cd98f00b204e9800998ecf8427e  12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_1.fastq.count
+7ca1184e141e53014a8330aca0674315  12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_1.fastq.gz
+d41d8cd98f00b204e9800998ecf8427e  12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_2.fastq.count
+d7f2cafffa026c278b79faf113fc79e9  12345_000005_000002_WESTwist_IDT-B/200923_A00001_0002_BRLSHNMKBX_2_00005AM0001L01_2.fastq.gz
diff --git a/tests/assets/input_data/edinburgh_genomics/X12346_MD5_Errors/20211005/12346_000006_000003_WESTwist_IDT-B/211005_A00002_0002_AJTHSNRLXX_1_00002AM0002L01_1.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12346_MD5_Errors/20211005/12346_000006_000003_WESTwist_IDT-B/211005_A00002_0002_AJTHSNRLXX_1_00002AM0002L01_1.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..24a16700621ab0599e85b5ce068e84307636d94d 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12346_MD5_Errors/20211005/12346_000006_000003_WESTwist_IDT-B/211005_A00002_0002_AJTHSNRLXX_1_00002AM0002L01_1.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12346_MD5_Errors/20211005/12346_000006_000003_WESTwist_IDT-B/211005_A00002_0002_AJTHSNRLXX_1_00002AM0002L01_1.fastq.gz differ
diff --git a/tests/assets/input_data/edinburgh_genomics/X12346_MD5_Errors/20211005/12346_000006_000003_WESTwist_IDT-B/211005_A00002_0002_AJTHSNRLXX_1_00002AM0002L01_2.fastq.gz b/tests/assets/input_data/edinburgh_genomics/X12346_MD5_Errors/20211005/12346_000006_000003_WESTwist_IDT-B/211005_A00002_0002_AJTHSNRLXX_1_00002AM0002L01_2.fastq.gz
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9cb0f0feb57cd189699be9eadde365a4a2011c68 100644
Binary files a/tests/assets/input_data/edinburgh_genomics/X12346_MD5_Errors/20211005/12346_000006_000003_WESTwist_IDT-B/211005_A00002_0002_AJTHSNRLXX_1_00002AM0002L01_2.fastq.gz and b/tests/assets/input_data/edinburgh_genomics/X12346_MD5_Errors/20211005/12346_000006_000003_WESTwist_IDT-B/211005_A00002_0002_AJTHSNRLXX_1_00002AM0002L01_2.fastq.gz differ
diff --git a/tests/assets/ref.fa b/tests/assets/ref.fa
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tests/run_stubs.sh b/tests/run_stubs.sh
new file mode 100755
index 0000000000000000000000000000000000000000..3f8f86418e86414da673d550086d53b052a097d3
--- /dev/null
+++ b/tests/run_stubs.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+source scripts/nextflow_detached.sh
+bcbio=$PWD/scripts/bcbio_nextgen.py
+
+test_exit_status=0
+
+rm -r work/*/*
+
+echo "Test case 1: simple trio"
+run_nextflow ../main.nf \
+    -stub-run -profile stubs \
+    --workflow variant-calling \
+    --pipeline_project_id test_stub \
+    --pipeline_project_version v1 \
+    --ped_file assets/input_data/ped_files/batch_1.ped \
+    --sample_sheet assets/input_data/sample_sheets/batch_1.tsv
+
+test_exit_status=$(( $test_exit_status + $? ))
+
+echo "Test case 2: MD5 errors"
+run_nextflow ../main.nf \
+    -stub-run -profile stubs \
+    --workflow variant-calling \
+    --pipeline_project_id test_stub_md5_errors \
+    --pipeline_project_version v1 \
+    --ped_file assets/input_data/ped_files/batch_2_md5_errors.ped \
+    --sample_sheet assets/input_data/sample_sheets/batch_2_md5_errors.tsv
+
+if [ $? == 0 ]
+then
+    test_exit_status=$(( $test_exit_status + 1 ))
+fi
+
+echo "Tests finished with exit status $test_exit_status"
+exit $test_exit_status
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
deleted file mode 100755
index 18d0898efc95f666e9d3b8aba5e7f0e55001abd5..0000000000000000000000000000000000000000
--- a/tests/run_tests.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-
-source scripts/nextflow_detached.sh
-bcbio=$PWD/scripts/bcbio_nextgen.py
-bcbio_prepare_samples=$PWD/scripts/bcbio_prepare_samples.py
-
-common_args="--bcbio $bcbio --bcbio_prepare_samples $bcbio_prepare_samples --bcbio_template $PWD/bcbio/bcbio_template.yaml"
-
-test_exit_status=0
-
-nextflow clean -f
-
-echo "Test case 1: simple trio"
-run_nextflow ../pipeline/main.nf --ped_file assets/input_data/ped_files/batch_1.ped  --sample_sheet assets/input_data/sample_sheets/batch_1.tsv $common_args
-test_exit_status=$(( $test_exit_status + $? ))
-for f in "
-    outputs/individuals/000001/merged_fastqs/000001_merged_r1.fastq.gz
-    outputs/individuals/000001/merged_fastqs/000001_merged_r2.fastq.gz
-    outputs/individuals/000002/merged_fastqs/000002_merged_r1.fastq.gz
-    outputs/individuals/000002/merged_fastqs/000002_merged_r2.fastq.gz
-    outputs/individuals/000003/merged_fastqs/000003_merged_r1.fastq.gz
-    outputs/individuals/000003/merged_fastqs/000003_merged_r2.fastq.gz
-    outputs/families/000001/000001.csv
-"
-do
-    ls $f > /dev/null
-    test_exit_status=$(( $test_exit_status + $? ))
-done
-
-echo "Test case 2: MD5 errors"
-run_nextflow ../pipeline/main.nf --ped_file assets/input_data/ped_files/batch_2_md5_errors.ped  --sample_sheet assets/input_data/sample_sheets/batch_2_md5_errors.tsv $common_args
-if [ $? == 0 ]
-then
-    test_exit_status=$(( $test_exit_status + 1 ))
-fi
-
-echo "Tests finished with exit status $test_exit_status"