From 61be2bf6b021275f10558e6de55723d90ab89e68 Mon Sep 17 00:00:00 2001
From: ameyner2 <alison.meynert@igmm.ed.ac.uk>
Date: Mon, 23 Sep 2019 13:55:31 +0100
Subject: [PATCH] Initial commit of md5 checksum script for archiving

---
 submit_trio_wes_checksums.sh | 37 ++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100755 submit_trio_wes_checksums.sh

diff --git a/submit_trio_wes_checksums.sh b/submit_trio_wes_checksums.sh
new file mode 100755
index 0000000..4f6cd6c
--- /dev/null
+++ b/submit_trio_wes_checksums.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#PBS -l walltime=48:00:00
+#PBS -l ncpus=1,mem=2gb
+#PBS -q sgp
+#PBS -N trio_whole_exome_checksums
+#PBS -j oe
+
+# enable running singletons
+if [ -z $PBS_ARRAY_INDEX ]
+then
+  if [ -z $INDEX ]
+  then
+    export PBS_ARRAY_INDX=1
+  else
+    export PBS_ARRAY_INDEX=$INDEX
+  fi
+fi
+
+# Expects environment variables to be set
+# PROJECT_ID - e.g. 12345_LastnameFirstname
+# CONFIG_SH - absolute path to configuration script setting environment variables
+
+source $CONFIG_SH
+
+FAMILY_ID=`head -n $PBS_ARRAY_INDEX $PARAMS_DIR/$PROJECT_ID.family_ids.txt | tail -n 1`
+
+# This assumes that ${PROJECT_ID}_${FAMILY_ID} is unique, and it should be - if there was
+# a re-run of a family, it should have a new project id.
+cd $OUTPUT_DIR/*${PROJECT_ID}_${FAMILY_ID}*
+
+rm md5sum.txt 2> /dev/null
+
+for file in `find . -type f | grep -v '\.bam'`
+do
+  md5sum $file >> md5sum.txt
+done
+
-- 
GitLab