diff --git a/tests/check_load_catalog_cmip5.py b/tests/check_load_catalog_cmip5.py index 60ed0ead65c7d977ed1d7ee965fea72b031d5bbd..2c7b474edec51baa4e7580abe090b3851c8278e4 100755 --- a/tests/check_load_catalog_cmip5.py +++ b/tests/check_load_catalog_cmip5.py @@ -5,7 +5,7 @@ import unittest class TestCatalog(unittest.TestCase): def test_load_catalog(self): - col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cmip5_disk.json" + col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cmip5_disk.json" self.assertTrue(intake.open_esm_datastore(col_url), msg="Intake could not load catalog '{0}'".format(col_url)) diff --git a/tests/check_load_catalog_cmip6.py b/tests/check_load_catalog_cmip6.py index 762aa96699e49ebd6fba076ac60c16537b60639a..9fe182e4f167c5c06a8bcf1e45401f509c854a7d 100755 --- a/tests/check_load_catalog_cmip6.py +++ b/tests/check_load_catalog_cmip6.py @@ -3,30 +3,48 @@ import intake import unittest import pandas as pd +import json class TestCatalog(unittest.TestCase): + usecols=['project', 'activity_id', 'institution_id', + 'source_id', 'experiment_id', 'member_id', 'table_id', 'variable_id', + 'grid_label', 'dcpp_init_year', 'version', 'time_range','format', 'uri'] + def test_load_catalog(self): - col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json" - testdf=pd.read_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz") + col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json" + with open(col_url, "r") as f: + col_json=json.load(f) - self.assertTrue(intake.open_esm_datastore(testdf, - esmcol_data=col_url), - msg="Intake could not load catalog '{0}'".format(col_url)) + cs=10 ** 6 + for testdf in pd.read_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz", + chunksize=cs) : + self.assertTrue(intake.open_esm_datastore(testdf, + esmcol_data=col_json), + msg="Intake could not load catalog '{0}'".format(col_url)) def test_load_cmipScenario(self): - col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json" - testdf=pd.read_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz") - col = intake.open_esm_datastore(testdf,esmcol_data=col_url) + col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json" + + with open(col_url, "r") as f: + col_json=json.load(f) + + testdf=pd.read_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz", + usecols=self.usecols) + col = intake.open_esm_datastore(testdf,esmcol_data=col_json) cat = col.search(source_id=["MPI-ESM1-2-HR", "AWI-CM-1-2-HR"], activity_id=["CMIP", "ScenarioMIP"]) noOfExps = cat.unique(columns=["experiment_id"])["experiment_id"]["count"] self.assertTrue(noOfExps >= 9, msg="Number of experiments in intake catalog for DICAD-Sources and" "CMIP-Experiments is lower than 9.") def test_len_catalog(self): - col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json" - testdf=pd.read_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz") - col = intake.open_esm_datastore(testdf,esmcol_data=col_url) + col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cmip6_disk.json" + with open(col_url, "r") as f: + col_json=json.load(f) + + testdf=pd.read_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz", + usecols=self.usecols) + col = intake.open_esm_datastore(testdf,esmcol_data=col_json) len_new = len(col.df) - col_url_old="/home/k/k204210/intake-esm/tests/dkrz_cmip6_disk_old.json" + col_url_old="/home/k/k204210/volume/data-infrastructure-services/intake-esm/tests/dkrz_cmip6_disk_old.json" col_old=intake.open_esm_datastore(col_url_old) len_old = len(col.df) self.assertTrue(len_new >= len_old, diff --git a/tests/check_load_catalog_cordex.py b/tests/check_load_catalog_cordex.py index 9c4576667e3303fe3eafee1b92e680f0d6fb1ef7..5774be8265d93565216eadfded6b97719c9de7c5 100644 --- a/tests/check_load_catalog_cordex.py +++ b/tests/check_load_catalog_cordex.py @@ -5,11 +5,11 @@ import unittest class TestCatalog(unittest.TestCase): def test_load_catalog(self): - col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json" + col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json" self.assertTrue(intake.open_esm_datastore(col_url), msg="Intake could not load catalog '{0}'".format(col_url)) def test_load_cmipScenario(self): - col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json" + col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json" col = intake.open_esm_datastore(col_url) cat = col.search(experiment_id=["historical"]) noOfExps = cat.unique(columns=["model_id"])["model_id"]["count"] @@ -17,10 +17,10 @@ class TestCatalog(unittest.TestCase): msg="Number of models in intake catalog for" "historical is lower than 2.") def test_len_catalog(self): - col_url = "/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json" + col_url = "/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_cordex_disk.json" col = intake.open_esm_datastore(col_url) len_new = len(col.df) - col_url_old="/home/k/k204210/intake-esm/mistral-cordex_old.json" + col_url_old="/home/k/k204210/volume/data-infrastructure-services/intake-esm/mistral-cordex_old.json" col_old=intake.open_esm_datastore(col_url_old) len_old = len(col.df) self.assertTrue(len_new >= len_old,