diff --git a/esm-collections/cloud-access/dkrz_catalog.yaml b/esm-collections/cloud-access/dkrz_catalog.yaml index 49c88b637ae1cde6f07c8305ff797d8ed849a64a..b14103ef00506267392949f19a86bdfe0f21f832 100755 --- a/esm-collections/cloud-access/dkrz_catalog.yaml +++ b/esm-collections/cloud-access/dkrz_catalog.yaml @@ -1,20 +1,20 @@ description: DKRZ master catalog for all /pool/data catalogs available metadata: parameters: - additional_cmip6_columns: + additional_cmip6_disk_columns: default: - - opendap_url - - long_name - path + - long_name + - opendap_url - units type: list[str] - additional_era5_columns: + additional_era5_disk_columns: default: + - units - step - - long_name - short_name - path - - units + - long_name type: list[str] cataloonie_columns: default: @@ -200,6 +200,29 @@ sources: disk storage system in /work/bk1099/data/ driver: - intake.open_esm_datastore + dkrz_mpige_disk: + args: + csv_kwargs: + usecols: + - product_id + - institute + - model + - experiment + - frequency + - modeling_realm + - mip_table + - ensemble_member + - variable + - temporal_subset + - version + - uri + - format + esmcol_obj: https://gitlab.dkrz.de/data-infrastructure-services/intake-esm/-/raw/master/esm-collections/cloud-access/dkrz_mpige_disk.json + description: This is an ESM collection for the Max Planck Institute Grand Ensemble + (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible + on the DKRZ's Levante disk storage system in /work/mh1007/CMOR/MPI-GE + driver: + - intake.open_esm_datastore dkrz_nextgems_disk: args: csv_kwargs: diff --git a/esm-collections/cloud-access/dkrz_era5_disk.json b/esm-collections/cloud-access/dkrz_era5_disk.json index cb44f2fd3f5226fac22e558035da85a1109906f8..4e12bd3b0aadf324046446c61068cb29cff63d34 100755 --- a/esm-collections/cloud-access/dkrz_era5_disk.json +++ b/esm-collections/cloud-access/dkrz_era5_disk.json @@ -93,11 +93,26 @@ "aggregation_control": { "aggregations": [ { - "attribute_name": "code", - "type": "union" - } - ], - "variable_column_name": "code", + "attribute_name": "validation_date", + "options": { + "compat": "override", + "coords": "minimal", + "dim": "time" + }, + "type": "join_existing" + }, + { + "attribute_name": "initialization_date", + "options": { + "compat": "override", + "coords": "minimal", + "dim": "time" + }, + "type": "join_existing" + }, + {"attribute_name": "short_name", + "type": "union"}], + "variable_column_name": "short_name", "groupby_attrs": [ "table_id", "stepType", diff --git a/esm-collections/cloud-access/dkrz_mpi-ge_disk.json b/esm-collections/cloud-access/dkrz_mpige_disk.json old mode 100755 new mode 100644 similarity index 50% rename from esm-collections/cloud-access/dkrz_mpi-ge_disk.json rename to esm-collections/cloud-access/dkrz_mpige_disk.json index ebf0d16037efe591a8d3370d5bc78a7c4dc3151d..cccc35194b5da0ebc968534591626e5b5711a30b --- a/esm-collections/cloud-access/dkrz_mpi-ge_disk.json +++ b/esm-collections/cloud-access/dkrz_mpige_disk.json @@ -1,8 +1,40 @@ { - "esmcat_version": "0.1.0", - "id": "dkrz_mpige_disk", - "description": "This is an ESM collection for the Max Planck Institute Grand Ensemble (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible on the DKRZ's disk storage system in /work/mh1007/CMOR/MPI-GE which will be loaded from a source file which is in the cloud (see catalog_file)", - "catalog_file": "https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/dkrz_mpige_disk.csv.gz", + "aggregation_control": { + "aggregations": [ + { + "attribute_name": "variable", + "type": "union" + }, + { + "attribute_name": "temporal_subset", + "options": { + "compat": "override", + "coords": "minimal", + "dim": "time" + }, + "type": "join_existing" + }, + { + "attribute_name": "ensemble_member", + "options": { + "compat": "override", + "coords": "minimal" + }, + "type": "join_new" + } + ], + "groupby_attrs": [ + "institute", + "model", + "experiment", + "mip_table" + ], + "variable_column_name": "variable" + }, + "assets": { + "column_name": "uri", + "format_column_name": "format" + }, "attributes": [ { "column_name": "product_id", @@ -39,87 +71,25 @@ { "column_name": "version", "vocabulary": "" - }, - { - "column_name": "project" - }, - { - "column_name": "institution_id" - }, - { - "column_name": "source_id" - }, - { - "column_name": "experiment_id" - }, - { - "column_name": "simulation_id" - }, - { - "column_name": "realm" - }, - { - "column_name": "time_reduction" - }, - { - "column_name": "grid_label" - }, - { - "column_name": "grid_id" - }, - { - "column_name": "level_type" - }, - { - "column_name": "time_min" - }, - { - "column_name": "time_max" - }, - { - "column_name": "format" - }, - { - "column_name": "uri" - }, - { - "column_name": "variable_id" } ], - "assets": { - "format_column_name": "format", - "column_name": "uri" - }, - "aggregation_control": { - "variable_column_name": "variable", - "groupby_attrs": [ - "institute", - "model", - "experiment", - "mip_table" - ], - "aggregations": [ - { - "type": "union", - "attribute_name": "variable" - }, - { - "type": "join_existing", - "attribute_name": "temporal_subset", - "options": { - "dim": "time", - "coords": "minimal", - "compat": "override" - } - }, - { - "type": "join_new", - "attribute_name": "ensemble_member", - "options": { - "coords": "minimal", - "compat": "override" - } - } - ] - } + "catalog_file": "https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/dkrz_mpige_disk.csv.gz", + "default_columns": [ + "product_id", + "institute", + "model", + "experiment", + "frequency", + "modeling_realm", + "mip_table", + "ensemble_member", + "variable", + "temporal_subset", + "version", + "uri", + "format" + ], + "description": "This is an ESM collection for the Max Planck Institute Grand Ensemble (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible on the DKRZ's Levante disk storage system in /work/mh1007/CMOR/MPI-GE", + "esmcat_version": "0.1.0", + "id": "/work/ik1017/Catalogs/dkrz_mpige_disk" } \ No newline at end of file diff --git a/esm-collections/disk-access/dkrz_catalog.yaml b/esm-collections/disk-access/dkrz_catalog.yaml index 4990c65425910f00cc1d53966ee8be6e2c73f8aa..7ab082dd45590b7f8b468a24bfff9a699f09545e 100755 --- a/esm-collections/disk-access/dkrz_catalog.yaml +++ b/esm-collections/disk-access/dkrz_catalog.yaml @@ -1,20 +1,20 @@ description: DKRZ master catalog for all /pool/data catalogs available metadata: parameters: - additional_cmip6_columns: + additional_cmip6_disk_columns: default: - - opendap_url - - long_name - path + - long_name + - opendap_url - units type: list[str] - additional_era5_columns: + additional_era5_disk_columns: default: + - units - step - - long_name - short_name - path - - units + - long_name type: list[str] cataloonie_columns: default: @@ -200,6 +200,29 @@ sources: disk storage system in /work/bk1099/data/ driver: - intake.open_esm_datastore + dkrz_mpige_disk: + args: + csv_kwargs: + usecols: + - product_id + - institute + - model + - experiment + - frequency + - modeling_realm + - mip_table + - ensemble_member + - variable + - temporal_subset + - version + - uri + - format + esmcol_obj: /pool/data/Catalogs/dkrz_mpige_disk.json + description: This is an ESM collection for the Max Planck Institute Grand Ensemble + (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible + on the DKRZ's Levante disk storage system in /work/mh1007/CMOR/MPI-GE + driver: + - intake.open_esm_datastore dkrz_nextgems_disk: args: csv_kwargs: diff --git a/esm-collections/disk-access/dkrz_era5_disk.json b/esm-collections/disk-access/dkrz_era5_disk.json index ee926c1975ba0ab7f4876d0335d3287baa6083b1..c061248883d753a96404be7ee82817dc0045e6cf 100755 --- a/esm-collections/disk-access/dkrz_era5_disk.json +++ b/esm-collections/disk-access/dkrz_era5_disk.json @@ -77,9 +77,28 @@ {"column_name": "format"}], "assets": {"format_column_name": "format", "column_name": "uri"}, "aggregation_control": { - "aggregations": [{"attribute_name": "code", - "type": "union"}], - "variable_column_name": "code", + "aggregations": [ + { + "attribute_name": "validation_date", + "options": { + "compat": "override", + "coords": "minimal", + "dim": "time" + }, + "type": "join_existing" + }, + { + "attribute_name": "initialization_date", + "options": { + "compat": "override", + "coords": "minimal", + "dim": "time" + }, + "type": "join_existing" + }, + {"attribute_name": "short_name", + "type": "union"}], + "variable_column_name": "short_name", "groupby_attrs": [ "table_id", "stepType", diff --git a/esm-collections/disk-access/dkrz_mpi-ge_disk.json b/esm-collections/disk-access/dkrz_mpi-ge_disk.json deleted file mode 100755 index 798c47c577ba42f47960692d130f620c51fc168d..0000000000000000000000000000000000000000 --- a/esm-collections/disk-access/dkrz_mpi-ge_disk.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "esmcat_version": "0.1.0", - "id": "dkrz_mpige_disk", - "description": "This is an ESM collection for the Max Planck Institute Grand Ensemble (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible on the DKRZ's disk storage system in /work/mh1007/CMOR/MPI-GE", - "catalog_file":"/mnt/lustre/work/ik1017/Catalogs/dkrz_mpi-ge_disk.csv.gz", - "attributes": [ - { - "column_name": "product_id", - "vocabulary": "" - }, - { - "column_name": "model", - "vocabulary": "" - }, - { - "column_name": "institute", - "vocabulary": "" - }, - { - "column_name": "experiment", - "vocabulary": "" - }, - { "column_name": "ensemble_member", - "vocabulary": "" }, - { - "column_name": "mip_table", - "vocabulary": "" - }, - { "column_name": "variable", - "vocabulary": "" }, - { - "column_name": "modeling_realm", - "vocabulary": "" - }, - { - "column_name": "version", - "vocabulary": "" - }, - {"column_name": "project"}, - {"column_name": "institution_id"}, - {"column_name": "source_id"}, - {"column_name": "experiment_id"}, - {"column_name": "simulation_id"}, - {"column_name": "realm"}, - {"column_name": "time_reduction"}, - {"column_name": "grid_label"}, - {"column_name": "grid_id"}, - {"column_name": "level_type"}, - {"column_name": "time_min"}, - {"column_name": "time_max"}, - {"column_name": "format"}, - {"column_name": "uri"}, - {"column_name": "variable_id"}], - "assets": {"format_column_name": "format", - "column_name": "uri"}, - "aggregation_control": { - "variable_column_name": "variable", - "groupby_attrs": [ - "institute", - "model", - "experiment", - "mip_table" - ], - "aggregations": [ - { - "type": "union", - "attribute_name": "variable" - }, - { - "type": "join_existing", - "attribute_name": "temporal_subset", - "options": { "dim": "time", "coords": "minimal", "compat": "override" } - }, - { - "type": "join_new", - "attribute_name": "ensemble_member", - "options": { "coords": "minimal", "compat": "override" } - } - ] - } -} diff --git a/esm-collections/disk-access/dkrz_mpige_disk.json b/esm-collections/disk-access/dkrz_mpige_disk.json new file mode 100644 index 0000000000000000000000000000000000000000..1ee998447aeb0dac2a1ee5e58f04161d278d9811 --- /dev/null +++ b/esm-collections/disk-access/dkrz_mpige_disk.json @@ -0,0 +1,95 @@ +{ + "aggregation_control": { + "aggregations": [ + { + "attribute_name": "variable", + "type": "union" + }, + { + "attribute_name": "temporal_subset", + "options": { + "compat": "override", + "coords": "minimal", + "dim": "time" + }, + "type": "join_existing" + }, + { + "attribute_name": "ensemble_member", + "options": { + "compat": "override", + "coords": "minimal" + }, + "type": "join_new" + } + ], + "groupby_attrs": [ + "institute", + "model", + "experiment", + "mip_table" + ], + "variable_column_name": "variable" + }, + "assets": { + "column_name": "uri", + "format_column_name": "format" + }, + "attributes": [ + { + "column_name": "product_id", + "vocabulary": "" + }, + { + "column_name": "model", + "vocabulary": "" + }, + { + "column_name": "institute", + "vocabulary": "" + }, + { + "column_name": "experiment", + "vocabulary": "" + }, + { + "column_name": "ensemble_member", + "vocabulary": "" + }, + { + "column_name": "mip_table", + "vocabulary": "" + }, + { + "column_name": "variable", + "vocabulary": "" + }, + { + "column_name": "modeling_realm", + "vocabulary": "" + }, + { + "column_name": "version", + "vocabulary": "" + } + ], + "catalog_file": "/work/ik1017/Catalogs/dkrz_mpige_disk.csv.gz", + "default_columns": [ + "product_id", + "institute", + "model", + "experiment", + "frequency", + "modeling_realm", + "mip_table", + "ensemble_member", + "variable", + "temporal_subset", + "version", + "uri", + "format" + ], + "description": "This is an ESM collection for the Max Planck Institute Grand Ensemble (Maher et al. 2019 https://doi.org/10/gf3kgt) cmorized by CMIP5-standards accessible on the DKRZ's Levante disk storage system in /work/mh1007/CMOR/MPI-GE", + "esmcat_version": "0.1.0", + "id": "/work/ik1017/Catalogs/dkrz_mpige_disk" +} \ No newline at end of file