Commit 9a577c56 authored by Anderson Banihirwe's avatar Anderson Banihirwe

update catalogs and add json files

parent 3c237372
......@@ -91,6 +91,37 @@
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['CFMIP',\n",
" 'CMIP',\n",
" 'LUMIP',\n",
" 'LS3MIP',\n",
" 'OMIP',\n",
" 'HighResMIP',\n",
" 'DCPP',\n",
" 'AerChemMIP',\n",
" 'PAMIP',\n",
" 'ScenarioMIP']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"activity_ids = list(Path(persist_path).rglob(\"*.txt\"))\n",
"activity_ids = [activity_id.stem for activity_id in activity_ids]\n",
"activity_ids"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
......@@ -150,7 +181,7 @@
"4 /glade/collections/cmip/CMIP6/AerChemMIP/BCC/B..."
]
},
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
......@@ -163,7 +194,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [
{
......@@ -172,7 +203,7 @@
"1027617"
]
},
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
......@@ -183,7 +214,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
......@@ -231,15 +262,15 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.31 s, sys: 0 ns, total: 3.31 s\n",
"Wall time: 3.31 s\n"
"CPU times: user 3.66 s, sys: 13.2 ms, total: 3.67 s\n",
"Wall time: 3.68 s\n"
]
}
],
......@@ -251,7 +282,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [
{
......@@ -260,7 +291,7 @@
"609911"
]
},
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
......@@ -271,7 +302,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
......@@ -306,15 +337,15 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 13.5 s, sys: 560 ms, total: 14.1 s\n",
"Wall time: 14.1 s\n"
"CPU times: user 15.4 s, sys: 622 ms, total: 16.1 s\n",
"Wall time: 16.1 s\n"
]
}
],
......@@ -325,7 +356,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"outputs": [
{
......@@ -344,7 +375,7 @@
" 'path': '/glade/collections/cmip/CMIP6/AerChemMIP/BCC/BCC-ESM1/ssp370/r2i1p1f1/Amon/hfls/gn/v20190624/hfls/hfls_Amon_BCC-ESM1_ssp370_r2i1p1f1_gn_201501-205512.nc'}"
]
},
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
......@@ -355,7 +386,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"metadata": {},
"outputs": [
{
......@@ -364,7 +395,7 @@
"609911"
]
},
"execution_count": 12,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
......@@ -375,7 +406,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"outputs": [
{
......@@ -510,7 +541,7 @@
"4 /glade/collections/cmip/CMIP6/AerChemMIP/BCC/B... "
]
},
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
......@@ -523,7 +554,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"metadata": {},
"outputs": [
{
......@@ -532,7 +563,217 @@
"609911"
]
},
"execution_count": 14,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"invalids = df[~df.activity_id.isin(activity_ids)]\n",
"df = df[df.activity_id.isin(activity_ids)]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>variable_id</th>\n",
" <th>table_id</th>\n",
" <th>source_id</th>\n",
" <th>experiment_id</th>\n",
" <th>member_id</th>\n",
" <th>grid_label</th>\n",
" <th>time_range</th>\n",
" <th>activity_id</th>\n",
" <th>institution_id</th>\n",
" <th>version</th>\n",
" <th>path</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>507544</td>\n",
" <td>sftof</td>\n",
" <td>Ofx</td>\n",
" <td>historical</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>gn</td>\n",
" <td>NaN</td>\n",
" <td>NCC</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>v20190815</td>\n",
" <td>/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>507546</td>\n",
" <td>basin</td>\n",
" <td>Ofx</td>\n",
" <td>historical</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>gn</td>\n",
" <td>NaN</td>\n",
" <td>NCC</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>v20190815</td>\n",
" <td>/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>507549</td>\n",
" <td>volcello</td>\n",
" <td>Ofx</td>\n",
" <td>historical</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>gr</td>\n",
" <td>NaN</td>\n",
" <td>NCC</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>v20190815</td>\n",
" <td>/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>508393</td>\n",
" <td>areacello</td>\n",
" <td>Ofx</td>\n",
" <td>piControl</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>gn</td>\n",
" <td>NaN</td>\n",
" <td>NCC</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>v20190815</td>\n",
" <td>/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>508395</td>\n",
" <td>basin</td>\n",
" <td>Ofx</td>\n",
" <td>piControl</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>gn</td>\n",
" <td>NaN</td>\n",
" <td>NCC</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>v20190815</td>\n",
" <td>/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>508396</td>\n",
" <td>volcello</td>\n",
" <td>Ofx</td>\n",
" <td>piControl</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>gr</td>\n",
" <td>NaN</td>\n",
" <td>NCC</td>\n",
" <td>NorESM2-LM</td>\n",
" <td>v20190815</td>\n",
" <td>/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>509418</td>\n",
" <td>thetao</td>\n",
" <td>Omon</td>\n",
" <td>PCMDI-test-1-0</td>\n",
" <td>piControl-withism</td>\n",
" <td>r3i1p1f1</td>\n",
" <td>gn</td>\n",
" <td>016201-016201</td>\n",
" <td>v20190926</td>\n",
" <td>thetao</td>\n",
" <td>v20190926</td>\n",
" <td>/glade/collections/cmip/CMIP6/CMIP/FIO-QLNM/FI...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" variable_id table_id source_id experiment_id member_id \\\n",
"507544 sftof Ofx historical NorESM2-LM r1i1p1f1 \n",
"507546 basin Ofx historical NorESM2-LM r1i1p1f1 \n",
"507549 volcello Ofx historical NorESM2-LM r1i1p1f1 \n",
"508393 areacello Ofx piControl NorESM2-LM r1i1p1f1 \n",
"508395 basin Ofx piControl NorESM2-LM r1i1p1f1 \n",
"508396 volcello Ofx piControl NorESM2-LM r1i1p1f1 \n",
"509418 thetao Omon PCMDI-test-1-0 piControl-withism r3i1p1f1 \n",
"\n",
" grid_label time_range activity_id institution_id version \\\n",
"507544 gn NaN NCC NorESM2-LM v20190815 \n",
"507546 gn NaN NCC NorESM2-LM v20190815 \n",
"507549 gr NaN NCC NorESM2-LM v20190815 \n",
"508393 gn NaN NCC NorESM2-LM v20190815 \n",
"508395 gn NaN NCC NorESM2-LM v20190815 \n",
"508396 gr NaN NCC NorESM2-LM v20190815 \n",
"509418 gn 016201-016201 v20190926 thetao v20190926 \n",
"\n",
" path \n",
"507544 /glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2... \n",
"507546 /glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2... \n",
"507549 /glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2... \n",
"508393 /glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2... \n",
"508395 /glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2... \n",
"508396 /glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2... \n",
"509418 /glade/collections/cmip/CMIP6/CMIP/FIO-QLNM/FI... "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"invalids"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"609904"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
......@@ -552,7 +793,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
......@@ -562,7 +803,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 20,
"metadata": {},
"outputs": [
{
......@@ -697,7 +938,7 @@
"4 /glade/collections/cmip/CMIP6/AerChemMIP/BCC/B... "
]
},
"execution_count": 16,
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
......@@ -711,7 +952,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
......@@ -721,7 +962,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 22,
"metadata": {},
"outputs": [
{
......@@ -856,7 +1097,7 @@
"4 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... "
]
},
"execution_count": 18,
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
......@@ -867,7 +1108,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 23,
"metadata": {},
"outputs": [
{
......@@ -1008,7 +1249,7 @@
"4 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 1975 "
]
},
"execution_count": 19,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
......@@ -1021,7 +1262,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 24,
"metadata": {},
"outputs": [
{
......@@ -1162,7 +1403,7 @@
"4 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... "
]
},
"execution_count": 20,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
......@@ -1176,7 +1417,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
......
{
"esmcat_version": "0.1.0",
"id": "glade-cmip6-dcpp",
"description": "This is an ESM collection for CMIP6 Decadal Climate Prediction data accessible on the NCAR's GLADE disk storage system in /glade/collections/cmip/CMIP6",
"catalog_file": "/glade/collections/cmip/catalog/intake-esm-datastore/catalogs/glade-cmip6-dcpp.csv.gz",
"attributes": [
{
"column_name": "activity_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_activity_id.json"
},
{
"column_name": "source_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_source_id.json"
},
{
"column_name": "institution_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_institution_id.json"
},
{
"column_name": "experiment_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_experiment_id.json"
},
{ "column_name": "member_id", "vocabulary": "" },
{
"column_name": "table_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_table_id.json"
},
{ "column_name": "variable_id", "vocabulary": "" },
{
"column_name": "grid_label",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_grid_label.json"
},
{
"column_name": "start_year",
"vocabulary": ""
},
{
"column_name": "version",
"vocabulary": ""
}
],
"assets": {
"column_name": "path",
"format": "netcdf"
},
"aggregation_control": {
"variable_column_name": "variable_id",
"groupby_attrs": [
"activity_id",
"institution_id",
"source_id",
"experiment_id",
"table_id",
"grid_label"
],
"aggregations": [
{
"type": "join_new",
"attribute_name": "member_id",
"options": { "coords": "minimal", "compat": "override" }
},
{
"type": "join_new",
"attribute_name": "start_year",
"options": { "coords": "minimal", "compat": "override" }
},
{
"type": "join_existing",
"attribute_name": "time_range",
"options": { "dim": "time" }
},
{
"type": "union",
"attribute_name": "variable_id"
}
]
}
}
{
"esmcat_version": "0.1.0",
"id": "glade-cmip6",
"description": "This is an ESM collection for CMIP6 data accessible on the NCAR's GLADE disk storage system in /glade/collections/cmip/CMIP6",
"catalog_file": "/glade/collections/cmip/catalog/intake-esm-datastore/catalogs/glade-cmip6.csv.gz",
"attributes": [
{
"column_name": "activity_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_activity_id.json"
},
{
"column_name": "source_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_source_id.json"
},
{
"column_name": "institution_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_institution_id.json"
},
{
"column_name": "experiment_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_experiment_id.json"
},
{ "column_name": "member_id", "vocabulary": "" },
{
"column_name": "table_id",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_table_id.json"
},
{ "column_name": "variable_id", "vocabulary": "" },
{
"column_name": "grid_label",
"vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_grid_label.json"
},
{
"column_name": "version",
"vocabulary": ""
}
],
"assets": {
"column_name": "path",
"format": "netcdf"
},
"aggregation_control": {
"variable_column_name": "variable_id",
"groupby_attrs": [
"activity_id",
"institution_id",
"source_id",
"experiment_id",
"table_id",
"grid_label"
],
"aggregations": [
{
"type": "join_new",
"attribute_name": "member_id",
"options": { "coords": "minimal", "compat": "override" }
},
{