From 37028a73f37f246dc0fbe1f728b1ef539faa616a Mon Sep 17 00:00:00 2001
From: Fabian Wachsmann <k204210@cmip-dps.cloud.dkrz.de>
Date: Fri, 21 Oct 2022 11:15:21 +0200
Subject: [PATCH] Updated tests

---
 tests/check_load_catalog_cmip5.py  |  2 +-
 tests/check_load_catalog_cmip6.py  | 42 +++++++++++++++++++++---------
 tests/check_load_catalog_cordex.py |  8 +++---
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/tests/check_load_catalog_cmip5.py b/tests/check_load_catalog_cmip5.py
index 60ed0ea..2c7b474 100755
--- a/tests/check_load_catalog_cmip5.py
+++ b/tests/check_load_catalog_cmip5.py
@@ -5,7 +5,7 @@ import unittest
 
 class TestCatalog(unittest.TestCase):
     def test_load_catalog(self):
-        col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cmip5_disk.json"
+        col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cmip5_disk.json"
         self.assertTrue(intake.open_esm_datastore(col_url),
                         msg="Intake could not load catalog '{0}'".format(col_url))
         
diff --git a/tests/check_load_catalog_cmip6.py b/tests/check_load_catalog_cmip6.py
index 762aa96..9fe182e 100755
--- a/tests/check_load_catalog_cmip6.py
+++ b/tests/check_load_catalog_cmip6.py
@@ -3,30 +3,48 @@
 import intake
 import unittest
 import pandas as pd
+import json
 
 class TestCatalog(unittest.TestCase):
+    usecols=['project', 'activity_id', 'institution_id',
+    'source_id', 'experiment_id', 'member_id', 'table_id', 'variable_id',
+    'grid_label', 'dcpp_init_year', 'version', 'time_range','format', 'uri']
+    
     def test_load_catalog(self):
-        col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json"
-        testdf=pd.read_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz")
+        col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json"
+        with open(col_url, "r") as f:
+            col_json=json.load(f)
         
-        self.assertTrue(intake.open_esm_datastore(testdf,
-                                                  esmcol_data=col_url),
-                        msg="Intake could not load catalog '{0}'".format(col_url))
+        cs=10 ** 6
+        for testdf in pd.read_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz",
+                                 chunksize=cs) :        
+            self.assertTrue(intake.open_esm_datastore(testdf,
+                                                      esmcol_data=col_json),
+                            msg="Intake could not load catalog '{0}'".format(col_url))
     def test_load_cmipScenario(self):
-        col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json"
-        testdf=pd.read_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz")
-        col = intake.open_esm_datastore(testdf,esmcol_data=col_url)
+        col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json"
+
+        with open(col_url, "r") as f:    
+            col_json=json.load(f)
+
+        testdf=pd.read_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz",
+                                usecols=self.usecols)
+        col = intake.open_esm_datastore(testdf,esmcol_data=col_json)
         cat = col.search(source_id=["MPI-ESM1-2-HR", "AWI-CM-1-2-HR"], activity_id=["CMIP", "ScenarioMIP"])
         noOfExps = cat.unique(columns=["experiment_id"])["experiment_id"]["count"]
         self.assertTrue(noOfExps >= 9,
                        msg="Number of experiments in intake catalog for DICAD-Sources and"
                        "CMIP-Experiments is lower than 9.")
     def test_len_catalog(self):
-        col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json"
-        testdf=pd.read_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz")
-        col = intake.open_esm_datastore(testdf,esmcol_data=col_url)
+        col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json"
+        with open(col_url, "r") as f:    
+            col_json=json.load(f)
+
+        testdf=pd.read_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz",
+                          usecols=self.usecols)
+        col = intake.open_esm_datastore(testdf,esmcol_data=col_json)
         len_new = len(col.df)
-        col_url_old="/home/k/k204210/intake-esm/tests/dkrz_cmip6_disk_old.json"
+        col_url_old="/home/k/k204210/volume/data-infrastructure-services/intake-esm/tests/dkrz_cmip6_disk_old.json"
         col_old=intake.open_esm_datastore(col_url_old)
         len_old = len(col.df)
         self.assertTrue(len_new >= len_old,
diff --git a/tests/check_load_catalog_cordex.py b/tests/check_load_catalog_cordex.py
index 9c45766..5774be8 100644
--- a/tests/check_load_catalog_cordex.py
+++ b/tests/check_load_catalog_cordex.py
@@ -5,11 +5,11 @@ import unittest
 
 class TestCatalog(unittest.TestCase):
     def test_load_catalog(self):
-        col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json"
+        col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json"
         self.assertTrue(intake.open_esm_datastore(col_url),
                         msg="Intake could not load catalog '{0}'".format(col_url))
     def test_load_cmipScenario(self):
-        col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json"
+        col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json"
         col = intake.open_esm_datastore(col_url)
         cat = col.search(experiment_id=["historical"])
         noOfExps = cat.unique(columns=["model_id"])["model_id"]["count"]
@@ -17,10 +17,10 @@ class TestCatalog(unittest.TestCase):
                        msg="Number of models in intake catalog for"
                        "historical is lower than 2.")
     def test_len_catalog(self):
-        col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json"
+        col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json"
         col = intake.open_esm_datastore(col_url)
         len_new = len(col.df)
-        col_url_old="/home/k/k204210/intake-esm/mistral-cordex_old.json"
+        col_url_old="/home/k/k204210/volume/data-infrastructure-services/intake-esm/mistral-cordex_old.json"
         col_old=intake.open_esm_datastore(col_url_old)
         len_old = len(col.df)
         self.assertTrue(len_new >= len_old,
-- 
GitLab