From fc8726eff5dec34bfa6c68340e7e7ba4b4153169 Mon Sep 17 00:00:00 2001
From: Murray Wham <murray.wham@ed.ac.uk>
Date: Fri, 2 Aug 2024 16:30:31 +0100
Subject: [PATCH 1/3] Pulling in changes from newer GitHub repo

---
 README.md         | 20 +++++++++++---
 conf/eddie.config | 40 ----------------------------
 main.nf           | 67 ++++++++++++++++++++---------------------------
 nextflow.config   | 23 +++++++++++++---
 4 files changed, 63 insertions(+), 87 deletions(-)
 delete mode 100644 conf/eddie.config

diff --git a/README.md b/README.md
index e241b53..a909df8 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 
 ## Resources
 
-Download the EGA Cryptor JAR file. It's at bin/ega-cryptor-2.0.0.jar in this repository if the EGA link doesn't work.
+Download the EGA Cryptor JAR file. It's at `bin/ega-cryptor-2.0.0.jar` in this repository if the EGA link doesn't work.
 
 ```
 wget https://ega-archive.org/files/EgaCryptor.zip
@@ -18,7 +18,9 @@ rm EgaCryptor.zip
 
 ## Running
 
-The CSV file used to upload sample metadata to EGA must be provided. It links the internal EGA sample alias to its name. This pipeline assumes that the FASTQ files for upload are named in the format sample_R1.fastq.gz, sample_R2.fastq.gz.
+The CSV file used to upload sample metadata to EGA must be provided. It links the internal EGA sample alias to its name. This pipeline assumes that the FASTQ files for upload are named in the format `sample_R1.fastq.gz`, `sample_R2.fastq.gz`, where `sample` is the entry in the `subjectId` field of the sample CSV file.
+
+To run and upload automatically:
 
 ```
 nextflow run https://git.ecdf.ed.ac.uk/igmmbioinformatics/ega-submission-via-portal \
@@ -31,9 +33,19 @@ nextflow run https://git.ecdf.ed.ac.uk/igmmbioinformatics/ega-submission-via-por
   --ega_password password
 ```
 
+To encrypt and produce a `runs.csv` file without uploading:
+
+```
+nextflow run ameynert/ega-submission-via-portal \
+  -profile conda \
+  --reads '*_R{1,2}.fastq.gz' \
+  --samples /absolute/path/to/samples.csv \
+  --ega_cryptor /absolute/path/to/ega-cryptor-2.0.0.jar \
+  --outdir output
+```
+
 The CSV file for connecting uploaded paired-end FASTQ files to their sample aliases in the EGA Submitter Portal will be in the specified output folder as runs.csv.
 
 ## Credits
 
-ega-submission-via-portal was originally written by Alison Meynert (alison.meynert@igmm.ed.ac.uk).
-
+Alison Meynert (alison.meynert@ed.ac.uk).
diff --git a/conf/eddie.config b/conf/eddie.config
deleted file mode 100644
index fa62c11..0000000
--- a/conf/eddie.config
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * ----------------------------------------------------
- *  University of Edinburgh eddie config file
- * ----------------------------------------------------
- */
-
-executor = "local"
-
-process {
-
-  beforeScript = """
-  . /etc/profile.d/modules.sh
-  sleep 2;
-  """
-
-  penv = "sharedmem"
-
-  cpus = 2
-  memory = 4.GB
-  time = 4.h
-  clusterOptions = "-l h_vmem=${memory.toString().replaceAll(/[\sB]/,'')}"
-
-  errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'terminate' }
-  maxRetries = 1
-  maxErrors = '-1'
-
-  withName: encrypt {
-    cpus = { check_max( 8, 'cpus' ) }
-    memory = { check_max( 16.GB * task.attempt, 'memory' ) }
-    time = { check_max( 4.h * task.attempt, 'time' ) }
-  }
-
-}
-
-params {
-  // Defaults only, expecting to be overwritten
-  max_memory = 256.GB
-  max_cpus = 16
-  max_time = 240.h
-}
diff --git a/main.nf b/main.nf
index 5e7c66d..bdad8db 100644
--- a/main.nf
+++ b/main.nf
@@ -13,20 +13,24 @@ def helpMessage() {
 
     The typical command for running the pipeline is as follows:
 
-    nextflow run https://git.ecdf.ed.ac.uk/igmmbioinformatics/ega-submission-via-portal --reads '*_R{1,2}.fastq.gz' -profile conda
+    nextflow run https://git.ecdf.ed.ac.uk/igmmbioinformatics/ega-submission-via-portal \
+        --samples samples.csv \
+        --reads '*_R{1,2}.fastq.gz' \
+        --ega_cryptor /path/to/ega_cryptor.jar \
+        -profile conda
 
     Mandatory arguments:
-      --reads [file]                Path to input data (must be surrounded with quotes)
-      --samples [file]              Path to EGA sample.csv file
+      --samples [file]              Path to samples CSV file
+      --reads [file]                Path to input data (must be surrounded with quotes, e.g. '*_R[1,2].fastq.gz]')
       --ega_cryptor [file]          Absolute path to EGA Cryptor JAR file (included in bin/ega-cryptor-2.0.0.jar)
-      --ega_user [str]              EGA upload box account (e.g. ega-box-1234)
-      --ega_pass [str]              Password for EGA upload box account (TODO: securely pass this through to the upload process)
       -profile [str]                Configuration profile to use. Can use multiple (comma separated)
                                     Available: conda
 
     Other options:
-      --outdir [file]                 The output directory where the results will be saved
-      -name [str]                     Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic
+      --outdir [file]               The output directory where the results will be saved
+      --ega_user [str]              EGA upload box account (e.g. ega-box-1234)
+      --ega_password [str]          Password for EGA upload box account, must be specified if --ega-user is specified
+      -name [str]                   Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic
 
     """.stripIndent()
 }
@@ -56,48 +60,34 @@ ch_read_files = Channel
     .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!" }
 
 /*
- * STEP 1 - Encrypt
+ * STEP 1 - Encrypt the FASTQ files. Generate a line of CSV output for runs, if not uploading to EGA, move the
+ * encrypted files and md5 checksums to the output directory.
  */
 process encrypt {
     tag "$name"
 
+    if (params.ega_user.length() == 0) {
+      publishDir "${params.outdir}", pattern: '*.{gpg,md5}', mode: 'move'
+    }
+
     input:
-    set val(name), file(reads) from ch_read_files
+    set val(sample), file(reads) from ch_read_files
 
     output:
-    set val(name), file('*') into ch_encrypt_results
+    file '*.csv' into ch_runs_csv_output
+    set val(sample), file('*') into ch_upload_input
 
     script:
     """
     java -Xmx8g -jar ${params.ega_cryptor} -i ${reads[0]} -t 8 -o .
     java -Xmx8g -jar ${params.ega_cryptor} -i ${reads[1]} -t 8 -o .
-    """
-}
-
-/*
- * Duplicate the encrypted reads channel
- */
-ch_encrypt_results.into { ch_runs_csv_input; ch_upload_input }
-
-/*
- * STEP 2 - Generate a line of CSV output for runs
- */
-process runs_csv {
 
-    input:
-    set sample, file(files) from ch_runs_csv_input
-
-    output:
-    file "*.csv" into ch_runs_csv_output
-
-    script:
-    """
-    echo "${sample},${sample}_R1.fastq.gz,`cat ${files[1]}`,`cat ${files[2]}`,${sample}_R2.fastq.gz,`cat ${files[4]}`,`cat ${files[5]}`" > ${sample}.csv
+    echo "sample_${sample},${sample}_R1.fastq.gz,`cat ${sample}_R1.fastq.gz.gpg.md5`,`cat ${sample}_R1.fastq.gz.md5`,${sample}_R2.fastq.gz,`cat ${sample}_R2.fastq.gz.gpg.md5`,`cat ${sample}_R2.fastq.gz.md5`" > ${sample}.csv
     """
 }
 
 /*
- * STEP 3 - Collect the CSV output for runs
+ * STEP 2 - Collect the CSV output for runs
  */
 process collect_runs_csv {
 
@@ -112,16 +102,15 @@ process collect_runs_csv {
     script:
     runs = "runs.csv"
     """
-    echo \"Sample alias\",\"First Fastq File\",\"First Checksum\",\"First Unencrypted checksum\",\"Second Fastq File\",\"Second Checksum\",\"Second Unencrypted checksum\" > runs_pre.csv
-    cat ${files} >> runs_pre.csv
-    map_sample_alias.pl -i runs_pre.csv -s ${params.samples} -o ${runs}
+    echo \"Sample alias\",\"First Fastq File\",\"First Checksum\",\"First Unencrypted checksum\",\"Second Fastq File\",\"Second Checksum\",\"Second Unencrypted checksum\" > runs.csv
+    cat ${files} >> runs.csv
     """
 }
 
 /*
- * STEP 4 - Upload output via Aspera to EGA box
+ * STEP 3 - Upload output via Aspera to EGA box
  */
-/*process upload {
+process upload {
 
     input:
     set sample, file(files) from ch_upload_input
@@ -130,7 +119,7 @@ process collect_runs_csv {
 
     script:
     """
-    export ASPERA_SCP_PASS=${params.ega_pass}
+    export ASPERA_SCP_PASS=${params.ega_password}
     ascp -T -P 33001 -O 33001 -l 300M -QT -L- -k 1 ${sample}* ${params.ega_user}@fasp.ega.ebi.ac.uk:/.
     """
-}*/
+}
diff --git a/nextflow.config b/nextflow.config
index 12b87ba..56f35fc 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -11,8 +11,8 @@ params {
   // Workflow flags
   reads = "data/*.fastq.gz"
   ega_cryptor = "bin/ega-cryptor-2.0.0.jar"
-  ega_user = "ega-box-1234"
-  ega_pass = "password"
+  ega_user = ""
+  ega_password = ""
   outdir = './results'
   samples = 'samples.csv'
 
@@ -31,8 +31,23 @@ params {
 
 }
 
-// Load eddie.config by default for all pipelines
-includeConfig 'conf/eddie.config'
+process {
+  withName: encrypt {
+    cpus = 8
+    memory = 12.GB
+    time = 2.h
+  }
+  withName: collect_runs_csv {
+    cpus = 1
+    memory = 2.GB
+    time = 10.m
+  }
+  withName: upload {
+    cpus = 1
+    memory = 2.GB
+    time = 8.h
+  }
+}
 
 profiles {
   conda { process.conda = "$baseDir/environment.yml" }
-- 
GitLab


From c0620ade55d7c74a97db8450622b6c4e68d65bb2 Mon Sep 17 00:00:00 2001
From: Murray Wham <murray.wham@ed.ac.uk>
Date: Fri, 27 Sep 2024 15:02:58 +0100
Subject: [PATCH 2/3] Porting to dsl2. Using egapass file instead of
 --ega_password, moving ega-cryptor JAR to moduleDir - removes need to specify
 --ega_cryptor

---
 README.md                                     |  12 +-
 main.nf                                       | 118 ++++--------------
 modules/local/ega/collectruncsvs.nf           |  18 +++
 modules/local/ega/encrypt/environment.yml     |   8 ++
 modules/local/ega/encrypt/main.nf             |  33 +++++
 .../encrypt/resources}/ega-cryptor-2.0.0.jar  | Bin
 modules/local/ega/upload/environment.yml      |   9 ++
 modules/local/ega/upload/main.nf              |  27 ++++
 nextflow.config                               |  24 +++-
 9 files changed, 145 insertions(+), 104 deletions(-)
 create mode 100644 modules/local/ega/collectruncsvs.nf
 create mode 100644 modules/local/ega/encrypt/environment.yml
 create mode 100644 modules/local/ega/encrypt/main.nf
 rename {bin => modules/local/ega/encrypt/resources}/ega-cryptor-2.0.0.jar (100%)
 create mode 100644 modules/local/ega/upload/environment.yml
 create mode 100644 modules/local/ega/upload/main.nf

diff --git a/README.md b/README.md
index a909df8..7edbc77 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 
 ## Resources
 
-Download the EGA Cryptor JAR file. It's at `bin/ega-cryptor-2.0.0.jar` in this repository if the EGA link doesn't work.
+The EGA-Cryptor JAR file is from ega-archive.org and stored at `/modules/local/ega/encrypt/resources`.
 
 ```
 wget https://ega-archive.org/files/EgaCryptor.zip
@@ -23,14 +23,13 @@ The CSV file used to upload sample metadata to EGA must be provided. It links th
 To run and upload automatically:
 
 ```
-nextflow run https://git.ecdf.ed.ac.uk/igmmbioinformatics/ega-submission-via-portal \
+nextflow https://git.ecdf.ed.ac.uk/igmmbioinformatics/ega-submission-via-portal \
   -profile conda \
   --reads '*_R{1,2}.fastq.gz' \
   --samples /absolute/path/to/samples.csv \
-  --ega_cryptor /absolute/path/to/ega-cryptor-2.0.0.jar \
   --outdir output \
   --ega_user ega-box-1234 \
-  --ega_password password
+  --egapass /absolute/path/to/egapass
 ```
 
 To encrypt and produce a `runs.csv` file without uploading:
@@ -40,7 +39,6 @@ nextflow run ameynert/ega-submission-via-portal \
   -profile conda \
   --reads '*_R{1,2}.fastq.gz' \
   --samples /absolute/path/to/samples.csv \
-  --ega_cryptor /absolute/path/to/ega-cryptor-2.0.0.jar \
   --outdir output
 ```
 
@@ -48,4 +46,6 @@ The CSV file for connecting uploaded paired-end FASTQ files to their sample alia
 
 ## Credits
 
-Alison Meynert (alison.meynert@ed.ac.uk).
+Alison Meynert (alison.meynert@ed.ac.uk)
+Murray Wham (murray.wham@ed.ac.uk)
+
diff --git a/main.nf b/main.nf
index bdad8db..7168406 100644
--- a/main.nf
+++ b/main.nf
@@ -1,4 +1,6 @@
 #!/usr/bin/env nextflow
+nextflow.enable.dsl=2
+
 /*
 ========================================================================================
                          ega-submission-via-portal
@@ -7,119 +9,49 @@
 ----------------------------------------------------------------------------------------
 */
 
-def helpMessage() {
-    log.info"""
+include { EGA_ENCRYPT } from './modules/local/ega/encrypt'
+include { EGA_COLLECTRUNCSVS } from './modules/local/ega/collectruncsvs'
+include { EGA_UPLOAD } from './modules/local/ega/upload'
+
+def helpMessage = """
     Usage:
 
     The typical command for running the pipeline is as follows:
 
-    nextflow run https://git.ecdf.ed.ac.uk/igmmbioinformatics/ega-submission-via-portal \
-        --samples samples.csv \
-        --reads '*_R{1,2}.fastq.gz' \
-        --ega_cryptor /path/to/ega_cryptor.jar \
+    nextflow run https://git.ecdf.ed.ac.uk/igmmbioinformatics/ega-submission-via-portal
+        --samples samples.csv
+        --reads '*_R{1,2}.fastq.gz'
         -profile conda
 
+
     Mandatory arguments:
       --samples [file]              Path to samples CSV file
       --reads [file]                Path to input data (must be surrounded with quotes, e.g. '*_R[1,2].fastq.gz]')
-      --ega_cryptor [file]          Absolute path to EGA Cryptor JAR file (included in bin/ega-cryptor-2.0.0.jar)
       -profile [str]                Configuration profile to use. Can use multiple (comma separated)
-                                    Available: conda
+                                    Available: conda, stubs
 
     Other options:
       --outdir [file]               The output directory where the results will be saved
       --ega_user [str]              EGA upload box account (e.g. ega-box-1234)
-      --ega_password [str]          Password for EGA upload box account, must be specified if --ega-user is specified
+      --egapass [str]               Absolute path to a file containing password for EGA upload box account, must be specified if --ega-user is specified
       -name [str]                   Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic
+""".stripIndent()
 
-    """.stripIndent()
-}
-
-// Show help message
-if (params.help) {
-    helpMessage()
-    exit 0
-}
-
-/*
- * SET UP CONFIGURATION VARIABLES
- */
-
-// Has the run name been specified by the user?
-//  this has the bonus effect of catching both -name and --name
-custom_runName = params.name
-if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
-    custom_runName = workflow.runName
-}
-
-/*
- * Create a channel for input read files
- */
-ch_read_files = Channel
-    .fromFilePairs(params.reads, size : 2)
-    .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!" }
-
-/*
- * STEP 1 - Encrypt the FASTQ files. Generate a line of CSV output for runs, if not uploading to EGA, move the
- * encrypted files and md5 checksums to the output directory.
- */
-process encrypt {
-    tag "$name"
 
-    if (params.ega_user.length() == 0) {
-      publishDir "${params.outdir}", pattern: '*.{gpg,md5}', mode: 'move'
+workflow {
+    if (params.help) {
+        log.info(helpMessage)
+        exit 0
     }
 
-    input:
-    set val(sample), file(reads) from ch_read_files
+    ch_read_files = Channel
+        .fromFilePairs(params.reads, size: 2)
+        .ifEmpty({ exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!" })
 
-    output:
-    file '*.csv' into ch_runs_csv_output
-    set val(sample), file('*') into ch_upload_input
+    ch_egapass = params.egapass && file(params.egapass).exists() ? Channel.fromPath(params.egapass).collect() : Channel.value('')
 
-    script:
-    """
-    java -Xmx8g -jar ${params.ega_cryptor} -i ${reads[0]} -t 8 -o .
-    java -Xmx8g -jar ${params.ega_cryptor} -i ${reads[1]} -t 8 -o .
-
-    echo "sample_${sample},${sample}_R1.fastq.gz,`cat ${sample}_R1.fastq.gz.gpg.md5`,`cat ${sample}_R1.fastq.gz.md5`,${sample}_R2.fastq.gz,`cat ${sample}_R2.fastq.gz.gpg.md5`,`cat ${sample}_R2.fastq.gz.md5`" > ${sample}.csv
-    """
+    EGA_ENCRYPT(ch_read_files)
+    EGA_COLLECTRUNCSVS(EGA_ENCRYPT.out.csv.map({sample, csv -> csv}).collect())
+    EGA_UPLOAD(EGA_ENCRYPT.out.all, ch_egapass)
 }
 
-/*
- * STEP 2 - Collect the CSV output for runs
- */
-process collect_runs_csv {
-
-    publishDir "${params.outdir}", mode: 'copy'
-
-    input:
-    file(files) from ch_runs_csv_output.collect()
-
-    output:
-    file(runs)
-
-    script:
-    runs = "runs.csv"
-    """
-    echo \"Sample alias\",\"First Fastq File\",\"First Checksum\",\"First Unencrypted checksum\",\"Second Fastq File\",\"Second Checksum\",\"Second Unencrypted checksum\" > runs.csv
-    cat ${files} >> runs.csv
-    """
-}
-
-/*
- * STEP 3 - Upload output via Aspera to EGA box
- */
-process upload {
-
-    input:
-    set sample, file(files) from ch_upload_input
-
-    output:
-
-    script:
-    """
-    export ASPERA_SCP_PASS=${params.ega_password}
-    ascp -T -P 33001 -O 33001 -l 300M -QT -L- -k 1 ${sample}* ${params.ega_user}@fasp.ega.ebi.ac.uk:/.
-    """
-}
diff --git a/modules/local/ega/collectruncsvs.nf b/modules/local/ega/collectruncsvs.nf
new file mode 100644
index 0000000..9d1796e
--- /dev/null
+++ b/modules/local/ega/collectruncsvs.nf
@@ -0,0 +1,18 @@
+/*
+ * STEP 2 - Collect the CSV output for runs
+ */
+
+process EGA_COLLECTRUNCSVS {
+    input:
+    path(files)
+
+    output:
+    path("runs.csv")
+
+    script:
+    """
+    echo \"Sample alias\",\"First Fastq File\",\"First Checksum\",\"First Unencrypted checksum\",\"Second Fastq File\",\"Second Checksum\",\"Second Unencrypted checksum\" > runs.csv
+    cat ${files} >> runs.csv
+    """
+}
+
diff --git a/modules/local/ega/encrypt/environment.yml b/modules/local/ega/encrypt/environment.yml
new file mode 100644
index 0000000..f192694
--- /dev/null
+++ b/modules/local/ega/encrypt/environment.yml
@@ -0,0 +1,8 @@
+---
+name: ega-encrypt
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - conda-forge::openjdk
+
diff --git a/modules/local/ega/encrypt/main.nf b/modules/local/ega/encrypt/main.nf
new file mode 100644
index 0000000..657ada3
--- /dev/null
+++ b/modules/local/ega/encrypt/main.nf
@@ -0,0 +1,33 @@
+/*
+ * STEP 1 - Encrypt the FASTQ files. Generate a line of CSV output for runs, if not uploading to EGA, move the
+ * encrypted files and md5 checksums to the output directory.
+ */
+process EGA_ENCRYPT {
+    tag "$sample"
+
+    input:
+    tuple val(sample), path(reads)
+
+    output:
+    tuple val(sample), path('*.csv'), emit: csv
+    tuple val(sample), path('*.*'), emit: all
+
+    script:
+    """
+    java -Xmx8g -jar ${moduleDir}/resources/ega-cryptor-2.0.0.jar -i ${reads[0]} -t 8 -o .
+    java -Xmx8g -jar ${moduleDir}/resources/ega-cryptor-2.0.0.jar -i ${reads[1]} -t 8 -o .
+
+    echo "sample_${sample},${sample}_R1.fastq.gz,`cat ${sample}_R1.fastq.gz.gpg.md5`,`cat ${sample}_R1.fastq.gz.md5`,${sample}_R2.fastq.gz,`cat ${sample}_R2.fastq.gz.gpg.md5`,`cat ${sample}_R2.fastq.gz.md5`" > ${sample}.csv
+    """
+
+    stub:
+    """
+    for f in ${reads}
+    do
+        touch \${f}.{md5,gpg,gpg.md5}
+    done
+
+    echo "sample_${sample},${sample}_R1.fastq.gz,`cat ${sample}_R1.fastq.gz.gpg.md5`,`cat ${sample}_R1.fastq.gz.md5`,${sample}_R2.fastq.gz,`cat ${sample}_R2.fastq.gz.gpg.md5`,`cat ${sample}_R2.fastq.gz.md5`" > ${sample}.csv
+    """
+}
+
diff --git a/bin/ega-cryptor-2.0.0.jar b/modules/local/ega/encrypt/resources/ega-cryptor-2.0.0.jar
similarity index 100%
rename from bin/ega-cryptor-2.0.0.jar
rename to modules/local/ega/encrypt/resources/ega-cryptor-2.0.0.jar
diff --git a/modules/local/ega/upload/environment.yml b/modules/local/ega/upload/environment.yml
new file mode 100644
index 0000000..5430bb4
--- /dev/null
+++ b/modules/local/ega/upload/environment.yml
@@ -0,0 +1,9 @@
+---
+name: ega-upload
+channels:
+  - hcc
+  - conda-forge
+  - defaults
+dependencies:
+  - aspera-cli
+
diff --git a/modules/local/ega/upload/main.nf b/modules/local/ega/upload/main.nf
new file mode 100644
index 0000000..03ef357
--- /dev/null
+++ b/modules/local/ega/upload/main.nf
@@ -0,0 +1,27 @@
+/*
+ * STEP 3 - Upload output via Aspera to EGA box
+ */
+process EGA_UPLOAD {
+    tag "${sample}"
+    conda "${moduleDir}/environment.yml"
+
+    when: params.ega_user
+
+    input:
+    tuple val(sample), path(files)
+    val(egapass)  // not a path to avoid excessive linking and accidental copying of pass file if stageInMode 'copy' is used
+
+    script:
+    """
+    ls ${egapass[0]}
+    export ASPERA_SCP_PASS=\$(cat ${egapass[0]})
+    ascp -T -P 33001 -O 33001 -l 300M -QT -L- -k 1 ${sample}* ${params.ega_user}@fasp.ega.ebi.ac.uk:/.
+    """
+
+    stub:
+    """
+    echo 'Would run:'
+    echo 'export ASPERA_SCP_PASS=\$(cat ${egapass[0]})'
+    echo 'ascp -T -P 33001 -O 33001 -l 300M -QT -L- -k 1 ${sample}* ${params.ega_user}@fasp.ega.ebi.ac.uk:/.'
+    """
+}
diff --git a/nextflow.config b/nextflow.config
index 56f35fc..1dcc4e6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -10,7 +10,6 @@ params {
 
   // Workflow flags
   reads = "data/*.fastq.gz"
-  ega_cryptor = "bin/ega-cryptor-2.0.0.jar"
   ega_user = ""
   ega_password = ""
   outdir = './results'
@@ -32,17 +31,31 @@ params {
 }
 
 process {
-  withName: encrypt {
+  withName: EGA_ENCRYPT {
     cpus = 8
     memory = 12.GB
     time = 2.h
+
+    publishDir = [
+        path: { params.outdir },
+        mode: 'copy',
+        pattern: '*.{gpg,md5}',
+        enabled: params.ega_user == null || params.ega_user == ''
+    ]
   }
-  withName: collect_runs_csv {
+
+  withName: EGA_COLLECTRUNCSVS {
     cpus = 1
     memory = 2.GB
     time = 10.m
+
+    publishDir = [
+        path: { params.outdir },
+        mode: 'copy'
+    ]
   }
-  withName: upload {
+
+  withName: EGA_UPLOAD {
     cpus = 1
     memory = 2.GB
     time = 8.h
@@ -50,7 +63,8 @@ process {
 }
 
 profiles {
-  conda { process.conda = "$baseDir/environment.yml" }
+    conda { conda.enabled = true }
+    stubs { conda.enabled = false }
 }
 
 // Export this variable to prevent local Python libraries from conflicting with those in the container
-- 
GitLab


From 31eedaa033a782461689190f54839e3fe79aac85 Mon Sep 17 00:00:00 2001
From: Murray Wham <murray.wham@ed.ac.uk>
Date: Wed, 23 Oct 2024 15:35:47 +0100
Subject: [PATCH 3/3] Adding conda directove to encryption step. Defaulting
 publish mode to symlink - makes sense for large uploads, and FTP/Aspera can
 deal with symlinks anyway

---
 modules/local/ega/encrypt/main.nf | 1 +
 nextflow.config                   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/local/ega/encrypt/main.nf b/modules/local/ega/encrypt/main.nf
index 657ada3..25b9f14 100644
--- a/modules/local/ega/encrypt/main.nf
+++ b/modules/local/ega/encrypt/main.nf
@@ -4,6 +4,7 @@
  */
 process EGA_ENCRYPT {
     tag "$sample"
+    conda "${moduleDir}/environment.yml"
 
     input:
     tuple val(sample), path(reads)
diff --git a/nextflow.config b/nextflow.config
index 1dcc4e6..75513ae 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -38,7 +38,7 @@ process {
 
     publishDir = [
         path: { params.outdir },
-        mode: 'copy',
+        mode: 'symlink',
         pattern: '*.{gpg,md5}',
         enabled: params.ega_user == null || params.ega_user == ''
     ]
-- 
GitLab