Unverified Commit acb00f85 authored by Anderson Banihirwe's avatar Anderson Banihirwe Committed by GitHub

Merge pull request #22 from andersy005/master

Fix CMIP6 DCPP catalog
parents 5a954903 876e7172
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
"import subprocess\n", "import subprocess\n",
"from tqdm.auto import tqdm\n", "from tqdm.auto import tqdm\n",
"from pathlib import Path\n", "from pathlib import Path\n",
"import shutil" "import shutil\n",
"import numpy as np"
] ]
}, },
{ {
...@@ -269,8 +270,8 @@ ...@@ -269,8 +270,8 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"CPU times: user 3.66 s, sys: 13.2 ms, total: 3.67 s\n", "CPU times: user 4.01 s, sys: 37.7 ms, total: 4.04 s\n",
"Wall time: 3.68 s\n" "Wall time: 4.05 s\n"
] ]
} }
], ],
...@@ -344,8 +345,8 @@ ...@@ -344,8 +345,8 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"CPU times: user 15.4 s, sys: 622 ms, total: 16.1 s\n", "CPU times: user 16.2 s, sys: 642 ms, total: 16.9 s\n",
"Wall time: 16.1 s\n" "Wall time: 16.9 s\n"
] ]
} }
], ],
...@@ -578,6 +579,7 @@ ...@@ -578,6 +579,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Some entries are invalid\n",
"invalids = df[~df.activity_id.isin(activity_ids)]\n", "invalids = df[~df.activity_id.isin(activity_ids)]\n",
"df = df[df.activity_id.isin(activity_ids)]" "df = df[df.activity_id.isin(activity_ids)]"
] ]
...@@ -956,7 +958,6 @@ ...@@ -956,7 +958,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"rest.to_csv(\"/glade/collections/cmip/catalog/glade-cmip6.csv.gz\", compression=\"gzip\", index=False)\n",
"rest.to_csv(\"../catalogs/glade-cmip6.csv.gz\", compression=\"gzip\", index=False)" "rest.to_csv(\"../catalogs/glade-cmip6.csv.gz\", compression=\"gzip\", index=False)"
] ]
}, },
...@@ -1160,7 +1161,7 @@ ...@@ -1160,7 +1161,7 @@
" <td>CCCma</td>\n", " <td>CCCma</td>\n",
" <td>v20190429</td>\n", " <td>v20190429</td>\n",
" <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n", " <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n",
" <td>2015</td>\n", " <td>2015.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <td>1</td>\n", " <td>1</td>\n",
...@@ -1175,7 +1176,7 @@ ...@@ -1175,7 +1176,7 @@
" <td>CCCma</td>\n", " <td>CCCma</td>\n",
" <td>v20190429</td>\n", " <td>v20190429</td>\n",
" <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n", " <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n",
" <td>1977</td>\n", " <td>1977.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <td>2</td>\n", " <td>2</td>\n",
...@@ -1190,7 +1191,7 @@ ...@@ -1190,7 +1191,7 @@
" <td>CCCma</td>\n", " <td>CCCma</td>\n",
" <td>v20190429</td>\n", " <td>v20190429</td>\n",
" <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n", " <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n",
" <td>1977</td>\n", " <td>1977.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <td>3</td>\n", " <td>3</td>\n",
...@@ -1205,7 +1206,7 @@ ...@@ -1205,7 +1206,7 @@
" <td>CCCma</td>\n", " <td>CCCma</td>\n",
" <td>v20190429</td>\n", " <td>v20190429</td>\n",
" <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n", " <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n",
" <td>1975</td>\n", " <td>1975.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <td>4</td>\n", " <td>4</td>\n",
...@@ -1220,7 +1221,7 @@ ...@@ -1220,7 +1221,7 @@
" <td>CCCma</td>\n", " <td>CCCma</td>\n",
" <td>v20190429</td>\n", " <td>v20190429</td>\n",
" <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n", " <td>/glade/collections/cmip/CMIP6/DCPP/CCCma/CanES...</td>\n",
" <td>1975</td>\n", " <td>1975.0</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
...@@ -1241,12 +1242,12 @@ ...@@ -1241,12 +1242,12 @@
"3 19760101-19851231 DCPP CCCma v20190429 \n", "3 19760101-19851231 DCPP CCCma v20190429 \n",
"4 197601-198512 DCPP CCCma v20190429 \n", "4 197601-198512 DCPP CCCma v20190429 \n",
"\n", "\n",
" path start_year \n", " path start_year \n",
"0 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 2015 \n", "0 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 2015.0 \n",
"1 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 1977 \n", "1 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 1977.0 \n",
"2 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 1977 \n", "2 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 1977.0 \n",
"3 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 1975 \n", "3 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 1975.0 \n",
"4 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 1975 " "4 /glade/collections/cmip/CMIP6/DCPP/CCCma/CanES... 1975.0 "
] ]
}, },
"execution_count": 23, "execution_count": 23,
...@@ -1255,8 +1256,9 @@ ...@@ -1255,8 +1256,9 @@
} }
], ],
"source": [ "source": [
"dcpp[\"start_year\"] = dcpp.member_id.map(lambda x: x.split(\"-\")[0][1:])\n", "# Note: For 'dcppA-assim' experiment_id, there's no start year\n",
"dcpp[\"member_id\"] = dcpp[\"member_id\"].map(lambda x: x.split(\"-\")[-1])\n", "dcpp[\"start_year\"] = dcpp.member_id.map(lambda x: float(x.split(\"-\")[0][1:] if x.startswith(\"s\") else np.nan))\n",
"dcpp[\"member_id\"] = dcpp[\"member_id\"].map(lambda x: x.split(\"-\")[-1] if x.startswith(\"s\") else x)\n",
"dcpp.head()" "dcpp.head()"
] ]
}, },
...@@ -1308,7 +1310,7 @@ ...@@ -1308,7 +1310,7 @@
" <td>CanESM5</td>\n", " <td>CanESM5</td>\n",
" <td>dcppA-hindcast</td>\n", " <td>dcppA-hindcast</td>\n",
" <td>r7i1p2f1</td>\n", " <td>r7i1p2f1</td>\n",
" <td>2015</td>\n", " <td>2015.0</td>\n",
" <td>day</td>\n", " <td>day</td>\n",
" <td>tas</td>\n", " <td>tas</td>\n",
" <td>gn</td>\n", " <td>gn</td>\n",
...@@ -1323,7 +1325,7 @@ ...@@ -1323,7 +1325,7 @@
" <td>CanESM5</td>\n", " <td>CanESM5</td>\n",
" <td>dcppA-hindcast</td>\n", " <td>dcppA-hindcast</td>\n",
" <td>r2i1p2f1</td>\n", " <td>r2i1p2f1</td>\n",
" <td>1977</td>\n", " <td>1977.0</td>\n",
" <td>day</td>\n", " <td>day</td>\n",
" <td>tas</td>\n", " <td>tas</td>\n",
" <td>gn</td>\n", " <td>gn</td>\n",
...@@ -1338,7 +1340,7 @@ ...@@ -1338,7 +1340,7 @@
" <td>CanESM5</td>\n", " <td>CanESM5</td>\n",
" <td>dcppA-hindcast</td>\n", " <td>dcppA-hindcast</td>\n",
" <td>r2i1p2f1</td>\n", " <td>r2i1p2f1</td>\n",
" <td>1977</td>\n", " <td>1977.0</td>\n",
" <td>Amon</td>\n", " <td>Amon</td>\n",
" <td>tas</td>\n", " <td>tas</td>\n",
" <td>gn</td>\n", " <td>gn</td>\n",
...@@ -1353,7 +1355,7 @@ ...@@ -1353,7 +1355,7 @@
" <td>CanESM5</td>\n", " <td>CanESM5</td>\n",
" <td>dcppA-hindcast</td>\n", " <td>dcppA-hindcast</td>\n",
" <td>r8i1p2f1</td>\n", " <td>r8i1p2f1</td>\n",
" <td>1975</td>\n", " <td>1975.0</td>\n",
" <td>day</td>\n", " <td>day</td>\n",
" <td>tas</td>\n", " <td>tas</td>\n",
" <td>gn</td>\n", " <td>gn</td>\n",
...@@ -1368,7 +1370,7 @@ ...@@ -1368,7 +1370,7 @@
" <td>CanESM5</td>\n", " <td>CanESM5</td>\n",
" <td>dcppA-hindcast</td>\n", " <td>dcppA-hindcast</td>\n",
" <td>r8i1p2f1</td>\n", " <td>r8i1p2f1</td>\n",
" <td>1975</td>\n", " <td>1975.0</td>\n",
" <td>Amon</td>\n", " <td>Amon</td>\n",
" <td>tas</td>\n", " <td>tas</td>\n",
" <td>gn</td>\n", " <td>gn</td>\n",
...@@ -1381,12 +1383,12 @@ ...@@ -1381,12 +1383,12 @@
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" activity_id institution_id source_id experiment_id member_id start_year \\\n", " activity_id institution_id source_id experiment_id member_id start_year \\\n",
"0 DCPP CCCma CanESM5 dcppA-hindcast r7i1p2f1 2015 \n", "0 DCPP CCCma CanESM5 dcppA-hindcast r7i1p2f1 2015.0 \n",
"1 DCPP CCCma CanESM5 dcppA-hindcast r2i1p2f1 1977 \n", "1 DCPP CCCma CanESM5 dcppA-hindcast r2i1p2f1 1977.0 \n",
"2 DCPP CCCma CanESM5 dcppA-hindcast r2i1p2f1 1977 \n", "2 DCPP CCCma CanESM5 dcppA-hindcast r2i1p2f1 1977.0 \n",
"3 DCPP CCCma CanESM5 dcppA-hindcast r8i1p2f1 1975 \n", "3 DCPP CCCma CanESM5 dcppA-hindcast r8i1p2f1 1975.0 \n",
"4 DCPP CCCma CanESM5 dcppA-hindcast r8i1p2f1 1975 \n", "4 DCPP CCCma CanESM5 dcppA-hindcast r8i1p2f1 1975.0 \n",
"\n", "\n",
" table_id variable_id grid_label version time_range \\\n", " table_id variable_id grid_label version time_range \\\n",
"0 day tas gn v20190429 20160101-20251231 \n", "0 day tas gn v20190429 20160101-20251231 \n",
...@@ -1421,7 +1423,6 @@ ...@@ -1421,7 +1423,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"dcpp.to_csv(\"/glade/collections/cmip/catalog/glade-cmip6-dcpp.csv.gz\", compression=\"gzip\", index=False)\n",
"dcpp.to_csv(\"../catalogs/glade-cmip6-dcpp.csv.gz\", compression=\"gzip\", index=False)" "dcpp.to_csv(\"../catalogs/glade-cmip6-dcpp.csv.gz\", compression=\"gzip\", index=False)"
] ]
}, },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment