From b05eb4fd558eaeea1e51e621624d58874ad31748 Mon Sep 17 00:00:00 2001
From: Murray Wham <murray.wham@ed.ac.uk>
Date: Tue, 18 Mar 2025 18:01:06 +0000
Subject: [PATCH] Adding --policy_accession to createdataset (falls back on
 --policy_receipt). Fixing metadata/tests/samples.csv

---
 metadata/ega_metadata.py   | 10 +++++++---
 metadata/tests/samples.csv | 22 +++++++++++-----------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/metadata/ega_metadata.py b/metadata/ega_metadata.py
index d9ccbbe..8dae105 100644
--- a/metadata/ega_metadata.py
+++ b/metadata/ega_metadata.py
@@ -64,7 +64,8 @@ options = {
     },
     'createdataset': {
         'run_receipt': {'help': 'Receipt file containing Runs to include in this dataset. Can be specified multiple times, e.g. if the upload was done in batches.', 'nargs': '+'},
-        'policy_receipt': {'help': 'Receipt file containing a Policy to use in this dataset.'},
+        'policy_accession': {'help': 'EGAP accession number of a policy to use. Either this or policy_receipt must be specified'},
+        'policy_receipt': {'help': 'Receipt file containing a policy to use in this dataset. Must be specified if policy_accession is not.'},
         'dataset_title': {'help': 'Dataset title'},
         'dataset_type': {'help': 'Dataset type accepted by EGA, e.g. Whole genome sequencing', 'choices': ('Whole genome sequencing', 'Exome sequencing', 'Genotyping by array', 'Transcriptome profiling by high-throughput sequencing', 'Transcriptome profiling by array', 'Amplicon sequencing', 'Methylation binding domain sequencing', 'Methylation profiling by high-throughput sequencing', 'Phenotype information', 'Study summary information', 'Genomic variant calling', 'Chromatin accessibility profiling by high-throughput sequencing', 'Histone modification profiling by high-throughput sequencing', 'Chip-Seq')}
     },
@@ -363,8 +364,11 @@ def createdataset(config):
         for r in dom.getElementsByTagName('RUN'):
             run_accessions.add(r.attributes['accession'].value)
 
-    dom = xml.dom.minidom.parse(config['policy_receipt'])
-    policy_accession = dom.getElementsByTagName('POLICY')[0].attributes['accession'].value
+    if 'policy_accession' in config:
+        policy_accession = config['policy_accession']
+    else:
+        dom = xml.dom.minidom.parse(config['policy_receipt'])
+        policy_accession = dom.getElementsByTagName('POLICY')[0].attributes['accession'].value
 
     idgen = IDGenerator(xml_dir)
     write_xml(
diff --git a/metadata/tests/samples.csv b/metadata/tests/samples.csv
index 6e7dde1..90fcc1e 100644
--- a/metadata/tests/samples.csv
+++ b/metadata/tests/samples.csv
@@ -1,11 +1,11 @@
-id,title,taxon_id,scientific_name,common_name,description,sex,phenotype,some_other_annotation
-sample1,Sample 1,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,this
-sample2,Sample 2,9606,,,A whole-genome sequenced human sample,male,normal,that
-sample3,Sample 3,9606,homo_sapiens,human,A whole-genome sequenced human sample,male,affected,other
-sample4,Sample 4,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,another
-sample5,Sample 5,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,more
-sample6,Sample 6,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,
-sample7,Sample 7,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,
-sample8,Sample 8,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,
-sample9,Sample 9,9606,homo sapiens,human,A whole-genome sequenced human sample,male,normal,
-sample10,Sample 10,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,
+id,taxon_id,scientific_name,common_name,description,sex,phenotype,some_other_annotation
+sample1,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,this
+sample2,9606,,,A whole-genome sequenced human sample,male,normal,that
+sample3,9606,homo_sapiens,human,A whole-genome sequenced human sample,male,affected,other
+sample4,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,another
+sample5,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,more
+sample6,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,
+sample7,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,
+sample8,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,
+sample9,9606,homo sapiens,human,A whole-genome sequenced human sample,male,normal,
+sample10,9606,homo sapiens,human,A whole-genome sequenced human sample,female,normal,
-- 
GitLab