Commit 7ba242eb authored by Georg Siemund's avatar Georg Siemund
Browse files

Delete zarr_objectstorage_performancetest.ipynb

parent e87ff237
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import dask.array as dsa\n",
"import xarray\n",
"import intake\n",
"import zarr\n",
"import fsspec\n",
"import numpy as np\n",
"import pandas as pd\n",
"from contextlib import contextmanager\n",
"import time\n",
"import dask\n",
"import fsspec\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hardware = '256GB 24Core'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#use intake to browse catalog\n",
"col_url = \"https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/swift-cmip6.json\"\n",
"col = intake.open_esm_datastore(col_url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"cat = col.search(variable_id=\"tasmax\", table_id=\"day\")\n",
"zarrpath = cat.unique(\"zarr_path\")[\"zarr_path\"][\"values\"][0]\n",
"fsmap = fsspec.get_mapper(zarrpath)\n",
"openzarr = xarray.open_zarr(fsmap, consolidated=True)\n",
"data = openzarr.tasmax.data\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class DevNullStore:\n",
"\n",
" def __init__(self):\n",
" pass\n",
"\n",
" def __setitem__(*args, **kwargs):\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"null_store = DevNullStore()\n",
"# this line produces no error but actually does nothing\n",
"null_store['foo'] = 'bar'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time dsa.store(data, null_store, lock=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set up Cluster"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from dask.distributed import Client\n",
"#\n",
"memory_limit=\"48GB\"\n",
"threads = 5\n",
"nworker = 5\n",
"client = Client(processes=True, threads_per_worker=threads, n_workers=nworker, memory_limit=memory_limit)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class DiagnosticTimer:\n",
" def __init__(self):\n",
" self.diagnostics = []\n",
"\n",
" @contextmanager\n",
" def time(self, **kwargs):\n",
" tic = time.time()\n",
" yield\n",
" toc = time.time()\n",
" kwargs[\"runtime\"] = toc - tic\n",
" self.diagnostics.append(kwargs)\n",
"\n",
" def dataframe(self):\n",
" return pd.DataFrame(self.diagnostics)\n",
"\n",
"diag_timer = DiagnosticTimer()\n",
"\n",
"chunksize = np.prod(data.chunksize) * data.dtype.itemsize"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def total_nthreads():\n",
" return sum([v for v in client.nthreads().values()])\n",
"\n",
"def total_ncores():\n",
" return sum([v for v in client.ncores().values()])\n",
"\n",
"def total_workers():\n",
" return len(client.ncores())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"diag_kwargs = dict(nbytes=data.nbytes, chunksize=chunksize,\n",
" cloud='swift', format='zarr', hardware=hardware)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with diag_timer.time(nthreads=total_nthreads(),\n",
" ncores=total_ncores(),\n",
" nworkers=total_workers(),\n",
" **diag_kwargs):\n",
"\n",
" future = dsa.store(data, null_store, lock=False, compute=False)\n",
" dask.compute(future, retries=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = diag_timer.dataframe()\n",
"df = df.reset_index(drop=True)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Datenspeicherung in eine Json-Datei"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#df.to_json('/pf/b/b381359/zarr-in-swift-objectstorage/notebooks/zarr_performance.json')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"already = pd.read_json('/pf/b/b381359/zarr-in-swift-objectstorage/notebooks/zarr_performance.json')\n",
"already"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"update = already.append(df, ignore_index='true')\n",
"update"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"update.to_json('/pf/b/b381359/zarr-in-swift-objectstorage/notebooks/zarr_performance.json')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Throughput-Berechnung und Visualisierung"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nthreads</th>\n",
" <th>ncores</th>\n",
" <th>nworkers</th>\n",
" <th>nbytes</th>\n",
" <th>chunksize</th>\n",
" <th>cloud</th>\n",
" <th>format</th>\n",
" <th>hardware</th>\n",
" <th>runtime</th>\n",
" <th>throughput_MBps</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>5GB 1Core</td>\n",
" <td>154.495057</td>\n",
" <td>59.959723</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>5GB 1Core</td>\n",
" <td>66.103309</td>\n",
" <td>140.136416</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>5GB 1Core</td>\n",
" <td>65.318598</td>\n",
" <td>141.819958</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>4</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>5GB 1Core</td>\n",
" <td>68.609839</td>\n",
" <td>135.016799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>25</td>\n",
" <td>25</td>\n",
" <td>5</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>5GB 1Core</td>\n",
" <td>69.241626</td>\n",
" <td>133.784855</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>10GB 2Core</td>\n",
" <td>157.674033</td>\n",
" <td>58.750833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>10GB 2Core</td>\n",
" <td>55.825154</td>\n",
" <td>165.937399</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>10GB 2Core</td>\n",
" <td>38.352534</td>\n",
" <td>241.535041</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>4</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>10GB 2Core</td>\n",
" <td>37.740102</td>\n",
" <td>245.454581</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>25</td>\n",
" <td>25</td>\n",
" <td>5</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>10GB 2Core</td>\n",
" <td>38.901415</td>\n",
" <td>238.127094</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>20GB 4Core</td>\n",
" <td>162.045562</td>\n",
" <td>57.165903</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>20GB 4Core</td>\n",
" <td>58.851281</td>\n",
" <td>157.404914</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>20GB 4Core</td>\n",
" <td>38.345927</td>\n",
" <td>241.576659</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>4</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>20GB 4Core</td>\n",
" <td>24.214650</td>\n",
" <td>382.556869</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>25</td>\n",
" <td>25</td>\n",
" <td>5</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>20GB 4Core</td>\n",
" <td>23.369606</td>\n",
" <td>396.390112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>256GB 24Core</td>\n",
" <td>165.742089</td>\n",
" <td>55.890938</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>256GB 24Core</td>\n",
" <td>61.005581</td>\n",
" <td>151.846448</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>256GB 24Core</td>\n",
" <td>38.872968</td>\n",
" <td>238.301349</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>4</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>256GB 24Core</td>\n",
" <td>32.983231</td>\n",
" <td>280.854258</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>25</td>\n",
" <td>25</td>\n",
" <td>5</td>\n",
" <td>9263480832</td>\n",
" <td>8847360</td>\n",
" <td>swift</td>\n",
" <td>zarr</td>\n",
" <td>256GB 24Core</td>\n",
" <td>30.150028</td>\n",
" <td>307.246182</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nthreads ncores nworkers nbytes chunksize cloud format \\\n",
"0 1 1 1 9263480832 8847360 swift zarr \n",
"1 4 4 2 9263480832 8847360 swift zarr \n",
"2 9 9 3 9263480832 8847360 swift zarr \n",
"3 16 16 4 9263480832 8847360 swift zarr \n",
"4 25 25 5 9263480832 8847360 swift zarr \n",
"5 1 1 1 9263480832 8847360 swift zarr \n",
"6 4 4 2 9263480832 8847360 swift zarr \n",
"7 9 9 3 9263480832 8847360 swift zarr \n",
"8 16 16 4 9263480832 8847360 swift zarr \n",
"9 25 25 5 9263480832 8847360 swift zarr \n",
"10 1 1 1 9263480832 8847360 swift zarr \n",
"11 4 4 2 9263480832 8847360 swift zarr \n",
"12 9 9 3 9263480832 8847360 swift zarr \n",
"13 16 16 4 9263480832 8847360 swift zarr \n",
"14 25 25 5 9263480832 8847360 swift zarr \n",
"15 1 1 1 9263480832 8847360 swift zarr \n",
"16 4 4 2 9263480832 8847360 swift zarr \n",
"17 9 9 3 9263480832 8847360 swift zarr \n",
"18 16 16 4 9263480832 8847360 swift zarr \n",
"19 25 25 5 9263480832 8847360 swift zarr \n",
"\n",
" hardware runtime throughput_MBps \n",
"0 5GB 1Core 154.495057 59.959723 \n",
"1 5GB 1Core 66.103309 140.136416 \n",
"2 5GB 1Core 65.318598 141.819958 \n",
"3 5GB 1Core 68.609839 135.016799 \n",
"4 5GB 1Core 69.241626 133.784855 \n",
"5 10GB 2Core 157.674033 58.750833 \n",
"6 10GB 2Core 55.825154 165.937399 \n",
"7 10GB 2Core 38.352534 241.535041 \n",
"8 10GB 2Core 37.740102 245.454581 \n",
"9 10GB 2Core 38.901415 238.127094 \n",
"10 20GB 4Core 162.045562 57.165903 \n",
"11 20GB 4Core 58.851281 157.404914 \n",
"12 20GB 4Core 38.345927 241.576659 \n",
"13 20GB 4Core 24.214650 382.556869 \n",
"14 20GB 4Core 23.369606 396.390112 \n",
"15 256GB 24Core 165.742089 55.890938 \n",
"16 256GB 24Core 61.005581 151.846448 \n",
"17 256GB 24Core 38.872968 238.301349 \n",
"18 256GB 24Core 32.983231 280.854258 \n",
"19 256GB 24Core 30.150028 307.246182 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},