From 206c2a971b1b8dffffb408a68dc921df435fcdd1 Mon Sep 17 00:00:00 2001 From: Fabian Wachsmann <k204210@l40103.atos.local> Date: Tue, 18 Oct 2022 12:08:25 +0200 Subject: [PATCH] Added mpige catalog --- .../cloud-access/dkrz_catalog.yaml | 35 ++++- ..._mpi-ge_disk.json => dkrz_mpige_disk.json} | 140 +++++++----------- esm-collections/disk-access/dkrz_catalog.yaml | 35 ++++- .../disk-access/dkrz_mpi-ge_disk.json | 81 ---------- .../disk-access/dkrz_mpige_disk.json | 95 ++++++++++++ 5 files changed, 208 insertions(+), 178 deletions(-) rename esm-collections/cloud-access/{dkrz_mpi-ge_disk.json => dkrz_mpige_disk.json} (50%) mode change 100755 => 100644 delete mode 100755 esm-collections/disk-access/dkrz_mpi-ge_disk.json create mode 100644 esm-collections/disk-access/dkrz_mpige_disk.json diff --git a/esm-collections/cloud-access/dkrz_catalog.yaml b/esm-collections/cloud-access/dkrz_catalog.yaml index 49c88b6..b14103e 100755 --- a/esm-collections/cloud-access/dkrz_catalog.yaml +++ b/esm-collections/cloud-access/dkrz_catalog.yaml @@ -1,20 +1,20 @@ description: DKRZ master catalog for all /pool/data catalogs available metadata: parameters: - additional_cmip6_columns: + additional_cmip6_disk_columns: default: - - opendap_url - - long_name - path + - long_name + - opendap_url - units type: list[str] - additional_era5_columns: + additional_era5_disk_columns: default: + - units - step - - long_name - short_name - path - - units + - long_name type: list[str] cataloonie_columns: default: @@ -200,6 +200,29 @@ sources: disk storage system in /work/bk1099/data/ driver: - intake.open_esm_datastore + dkrz_mpige_disk: + args: + csv_kwargs: + usecols: + - product_id + - institute + - model + - experiment + - frequency + - modeling_realm + - mip_table + - ensemble_member + - variable + - temporal_subset + - version + - uri + - format + esmcol_obj: https://gitlab.dkrz.de/data-infrastructure-services/intake-esm/-/raw/master/esm-collections/cloud-access/dkrz_mpige_disk.json + description: This is an ESM collection for the Max Planck Institute Grand Ensemble + (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible + on the DKRZ's Levante disk storage system in /work/mh1007/CMOR/MPI-GE + driver: + - intake.open_esm_datastore dkrz_nextgems_disk: args: csv_kwargs: diff --git a/esm-collections/cloud-access/dkrz_mpi-ge_disk.json b/esm-collections/cloud-access/dkrz_mpige_disk.json old mode 100755 new mode 100644 similarity index 50% rename from esm-collections/cloud-access/dkrz_mpi-ge_disk.json rename to esm-collections/cloud-access/dkrz_mpige_disk.json index ebf0d16..cccc351 --- a/esm-collections/cloud-access/dkrz_mpi-ge_disk.json +++ b/esm-collections/cloud-access/dkrz_mpige_disk.json @@ -1,8 +1,40 @@ { - "esmcat_version": "0.1.0", - "id": "dkrz_mpige_disk", - "description": "This is an ESM collection for the Max Planck Institute Grand Ensemble (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible on the DKRZ's disk storage system in /work/mh1007/CMOR/MPI-GE which will be loaded from a source file which is in the cloud (see catalog_file)", - "catalog_file": "https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/dkrz_mpige_disk.csv.gz", + "aggregation_control": { + "aggregations": [ + { + "attribute_name": "variable", + "type": "union" + }, + { + "attribute_name": "temporal_subset", + "options": { + "compat": "override", + "coords": "minimal", + "dim": "time" + }, + "type": "join_existing" + }, + { + "attribute_name": "ensemble_member", + "options": { + "compat": "override", + "coords": "minimal" + }, + "type": "join_new" + } + ], + "groupby_attrs": [ + "institute", + "model", + "experiment", + "mip_table" + ], + "variable_column_name": "variable" + }, + "assets": { + "column_name": "uri", + "format_column_name": "format" + }, "attributes": [ { "column_name": "product_id", @@ -39,87 +71,25 @@ { "column_name": "version", "vocabulary": "" - }, - { - "column_name": "project" - }, - { - "column_name": "institution_id" - }, - { - "column_name": "source_id" - }, - { - "column_name": "experiment_id" - }, - { - "column_name": "simulation_id" - }, - { - "column_name": "realm" - }, - { - "column_name": "time_reduction" - }, - { - "column_name": "grid_label" - }, - { - "column_name": "grid_id" - }, - { - "column_name": "level_type" - }, - { - "column_name": "time_min" - }, - { - "column_name": "time_max" - }, - { - "column_name": "format" - }, - { - "column_name": "uri" - }, - { - "column_name": "variable_id" } ], - "assets": { - "format_column_name": "format", - "column_name": "uri" - }, - "aggregation_control": { - "variable_column_name": "variable", - "groupby_attrs": [ - "institute", - "model", - "experiment", - "mip_table" - ], - "aggregations": [ - { - "type": "union", - "attribute_name": "variable" - }, - { - "type": "join_existing", - "attribute_name": "temporal_subset", - "options": { - "dim": "time", - "coords": "minimal", - "compat": "override" - } - }, - { - "type": "join_new", - "attribute_name": "ensemble_member", - "options": { - "coords": "minimal", - "compat": "override" - } - } - ] - } + "catalog_file": "https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/dkrz_mpige_disk.csv.gz", + "default_columns": [ + "product_id", + "institute", + "model", + "experiment", + "frequency", + "modeling_realm", + "mip_table", + "ensemble_member", + "variable", + "temporal_subset", + "version", + "uri", + "format" + ], + "description": "This is an ESM collection for the Max Planck Institute Grand Ensemble (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible on the DKRZ's Levante disk storage system in /work/mh1007/CMOR/MPI-GE", + "esmcat_version": "0.1.0", + "id": "/work/ik1017/Catalogs/dkrz_mpige_disk" } \ No newline at end of file diff --git a/esm-collections/disk-access/dkrz_catalog.yaml b/esm-collections/disk-access/dkrz_catalog.yaml index 4990c65..7ab082d 100755 --- a/esm-collections/disk-access/dkrz_catalog.yaml +++ b/esm-collections/disk-access/dkrz_catalog.yaml @@ -1,20 +1,20 @@ description: DKRZ master catalog for all /pool/data catalogs available metadata: parameters: - additional_cmip6_columns: + additional_cmip6_disk_columns: default: - - opendap_url - - long_name - path + - long_name + - opendap_url - units type: list[str] - additional_era5_columns: + additional_era5_disk_columns: default: + - units - step - - long_name - short_name - path - - units + - long_name type: list[str] cataloonie_columns: default: @@ -200,6 +200,29 @@ sources: disk storage system in /work/bk1099/data/ driver: - intake.open_esm_datastore + dkrz_mpige_disk: + args: + csv_kwargs: + usecols: + - product_id + - institute + - model + - experiment + - frequency + - modeling_realm + - mip_table + - ensemble_member + - variable + - temporal_subset + - version + - uri + - format + esmcol_obj: /pool/data/Catalogs/dkrz_mpige_disk.json + description: This is an ESM collection for the Max Planck Institute Grand Ensemble + (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible + on the DKRZ's Levante disk storage system in /work/mh1007/CMOR/MPI-GE + driver: + - intake.open_esm_datastore dkrz_nextgems_disk: args: csv_kwargs: diff --git a/esm-collections/disk-access/dkrz_mpi-ge_disk.json b/esm-collections/disk-access/dkrz_mpi-ge_disk.json deleted file mode 100755 index 798c47c..0000000 --- a/esm-collections/disk-access/dkrz_mpi-ge_disk.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "esmcat_version": "0.1.0", - "id": "dkrz_mpige_disk", - "description": "This is an ESM collection for the Max Planck Institute Grand Ensemble (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible on the DKRZ's disk storage system in /work/mh1007/CMOR/MPI-GE", - "catalog_file":"/mnt/lustre/work/ik1017/Catalogs/dkrz_mpi-ge_disk.csv.gz", - "attributes": [ - { - "column_name": "product_id", - "vocabulary": "" - }, - { - "column_name": "model", - "vocabulary": "" - }, - { - "column_name": "institute", - "vocabulary": "" - }, - { - "column_name": "experiment", - "vocabulary": "" - }, - { "column_name": "ensemble_member", - "vocabulary": "" }, - { - "column_name": "mip_table", - "vocabulary": "" - }, - { "column_name": "variable", - "vocabulary": "" }, - { - "column_name": "modeling_realm", - "vocabulary": "" - }, - { - "column_name": "version", - "vocabulary": "" - }, - {"column_name": "project"}, - {"column_name": "institution_id"}, - {"column_name": "source_id"}, - {"column_name": "experiment_id"}, - {"column_name": "simulation_id"}, - {"column_name": "realm"}, - {"column_name": "time_reduction"}, - {"column_name": "grid_label"}, - {"column_name": "grid_id"}, - {"column_name": "level_type"}, - {"column_name": "time_min"}, - {"column_name": "time_max"}, - {"column_name": "format"}, - {"column_name": "uri"}, - {"column_name": "variable_id"}], - "assets": {"format_column_name": "format", - "column_name": "uri"}, - "aggregation_control": { - "variable_column_name": "variable", - "groupby_attrs": [ - "institute", - "model", - "experiment", - "mip_table" - ], - "aggregations": [ - { - "type": "union", - "attribute_name": "variable" - }, - { - "type": "join_existing", - "attribute_name": "temporal_subset", - "options": { "dim": "time", "coords": "minimal", "compat": "override" } - }, - { - "type": "join_new", - "attribute_name": "ensemble_member", - "options": { "coords": "minimal", "compat": "override" } - } - ] - } -} diff --git a/esm-collections/disk-access/dkrz_mpige_disk.json b/esm-collections/disk-access/dkrz_mpige_disk.json new file mode 100644 index 0000000..1ee9984 --- /dev/null +++ b/esm-collections/disk-access/dkrz_mpige_disk.json @@ -0,0 +1,95 @@ +{ + "aggregation_control": { + "aggregations": [ + { + "attribute_name": "variable", + "type": "union" + }, + { + "attribute_name": "temporal_subset", + "options": { + "compat": "override", + "coords": "minimal", + "dim": "time" + }, + "type": "join_existing" + }, + { + "attribute_name": "ensemble_member", + "options": { + "compat": "override", + "coords": "minimal" + }, + "type": "join_new" + } + ], + "groupby_attrs": [ + "institute", + "model", + "experiment", + "mip_table" + ], + "variable_column_name": "variable" + }, + "assets": { + "column_name": "uri", + "format_column_name": "format" + }, + "attributes": [ + { + "column_name": "product_id", + "vocabulary": "" + }, + { + "column_name": "model", + "vocabulary": "" + }, + { + "column_name": "institute", + "vocabulary": "" + }, + { + "column_name": "experiment", + "vocabulary": "" + }, + { + "column_name": "ensemble_member", + "vocabulary": "" + }, + { + "column_name": "mip_table", + "vocabulary": "" + }, + { + "column_name": "variable", + "vocabulary": "" + }, + { + "column_name": "modeling_realm", + "vocabulary": "" + }, + { + "column_name": "version", + "vocabulary": "" + } + ], + "catalog_file": "/work/ik1017/Catalogs/dkrz_mpige_disk.csv.gz", + "default_columns": [ + "product_id", + "institute", + "model", + "experiment", + "frequency", + "modeling_realm", + "mip_table", + "ensemble_member", + "variable", + "temporal_subset", + "version", + "uri", + "format" + ], + "description": "This is an ESM collection for the Max Planck Institute Grand Ensemble (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible on the DKRZ's Levante disk storage system in /work/mh1007/CMOR/MPI-GE", + "esmcat_version": "0.1.0", + "id": "/work/ik1017/Catalogs/dkrz_mpige_disk" +} \ No newline at end of file -- GitLab