diff --git a/archive-catalog.sh b/archive-catalog.sh
index b7ae2bae4e75c9eb31705458a96c8e7798701a52..21aca2e626fd4754a5c2ef08b3a0a614c61bbf52 100755
--- a/archive-catalog.sh
+++ b/archive-catalog.sh
@@ -3,7 +3,7 @@ set -e
 project=$1
 #path with /mnt/mount2
 path=$2
-newcatalogzip=/home/k/k204210/intake-esm/catalogs/dkrz_${project}_disk.csv.gz
+newcatalogzip=/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_${project}_disk.csv.gz
 oldcatalogzip=${path}/Catalogs/dkrz_${project}_disk.csv.gz
 oldcatalogArchDir="${path}/Catalogs/archive"
 mkdir -p ${oldcatalogArchDir}
diff --git a/builder/data-pool_collect-create-main.ipynb b/builder/data-pool_collect-create-main.ipynb
index 388aaa5e1bbc5ec95fd7609814e0d31732a6ec1f..a00e96a4122ab1cf57e016ff40b57437fd86f107 100644
--- a/builder/data-pool_collect-create-main.ipynb
+++ b/builder/data-pool_collect-create-main.ipynb
@@ -190,7 +190,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(\"/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_catalog.yaml\",\"w\") as f:\n",
+    "with open(\"/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_catalog.yaml\",\"w\") as f:\n",
     "    f.write(yaml.dump(header))\n",
     "    f.write(yaml.dump(sourcesdict))"
    ]
diff --git a/builder/dkrz_cmip5_disk_catalog.py b/builder/dkrz_cmip5_disk_catalog.py
index 528804062bb362fd78a7c4a06aba1f458bfaaa39..b0d916b9d53e8f468906c46a8a0c5722a406b8fc 100644
--- a/builder/dkrz_cmip5_disk_catalog.py
+++ b/builder/dkrz_cmip5_disk_catalog.py
@@ -1,24 +1,25 @@
 import sys
-sys.path.insert(0, '/home/k/k204210/intake-esm/builder/ncar-builder/builders/')
+sys.path.insert(0, '/home/k/k204210/volume/data-infrastructure-services/intake-esm/builder/ncar-builder/builders/')
 from cmip import build_cmip
 import pandas as pd
 from tqdm import tqdm
 import ast
 
-root_path="/mnt/lustre/work/kd0956/CMIP5/data/cmip5"
+root_path="/work/kd0956/CMIP5/data/cmip5"
 depth=4
 pick_latest_version="y"
 cmip_version=5
-csv_filepath="/home/k/k204210/intake-esm/catalogs/dkrz_cmip5_disk.csv.gz"
+csv_filepath="/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip5_disk.csv.gz"
 
 df = build_cmip(root_path, cmip_version, depth=depth, pick_latest_version=pick_latest_version)
 
+print(len(df["temporal_subset"]))
+
 df["path"]=df["path"].str.replace("/mnt/lustre/",'/')
 
 df["project"]="cmip5"
 df["institution_id"]=df["institute"]
 df["source_id"]=df["model"]
-df["experiment_id"]=df["experiment"]
 df["simulation_id"]=df["ensemble_member"]
 df["realm"]=df["modeling_realm"]
 df["experiment_id"]=df["experiment"]
@@ -28,9 +29,39 @@ df["time_max"]=df["time_range"].str.split('-').str[1]
 df["format"]="netcdf"
 df["uri"]=df["path"]
 df["variable_id"]=df["variable"]
-df['grid_id']="unkown"
-df['level_type']="unkown"
-df['time_reduction']="unkown"
-df['grid_label']="unkown"
+df['grid_id']="None"
+df['level_type']="None"
+df['time_reduction']="None"
+df['grid_label']="None"
+
+save_columns = ["project",
+             "product_id",
+             "institute",
+             "model",
+             "experiment",
+             "frequency",
+             "modeling_realm",
+             "mip_table",
+             "ensemble_member",
+             "version",
+             "variable",
+             "temporal_subset",
+                "institution_id",
+                "source_id",
+             "experiment_id",
+             "variable_id",
+             "grid_label",
+               "realm",
+               "level_type",
+             "time_range",
+             "time_min",
+             "time_max",
+             "simulation_id",
+             "grid_id",
+             "time_reduction",
+             "format",
+                "uri"]
 
+df = df[save_columns]
+df = df.sort_values(save_columns, ascending = True).reset_index(drop=True)
 df.to_csv(csv_filepath, compression='gzip', index=False)
diff --git a/builder/dkrz_cmip6_disk_catalog.py b/builder/dkrz_cmip6_disk_catalog.py
index b9bcaacc67c196962df6d6b404d733fdd05cea35..4a17107227a56eb000aea5097b0635ee50bfcbf7 100755
--- a/builder/dkrz_cmip6_disk_catalog.py
+++ b/builder/dkrz_cmip6_disk_catalog.py
@@ -1,12 +1,12 @@
 import sys
-sys.path.insert(0, '/home/k/k204210/intake-esm/builder/ncar-builder/builders/')
+sys.path.insert(0, '/home/k/k204210/volume/data-infrastructure-services/intake-esm/builder/ncar-builder/builders/')
 from cmip import build_cmip
 import pandas as pd
 from tqdm import tqdm
 import ast
 import json
 
-root_path="/mnt/lustre/work/ik1017/CMIP6/data/CMIP6"
+root_path="/work/ik1017/CMIP6/data/CMIP6"
 depth=4
 pick_latest_version="y"
 cmip_version=6
@@ -19,7 +19,8 @@ df.to_csv(csv_filepath, compression='gzip', index=False)
 # Add PIDs and OpenDAPs to catalog
 
 df=pd.read_csv(csv_filepath)
-pids=pd.read_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cmip6_disk_filepids.csv.gz")
+pids=pd.read_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip6_disk_filepids.csv.gz",
+                low_memory=True)
 pids["file_pids"]=pids["file_pids"].str.replace("'",'"')
 pids_dict=[]
 for index,row in tqdm(pids.iterrows()):
@@ -34,15 +35,18 @@ pids_df=pd.DataFrame.from_records(pids_dict, columns=["path", "file_pid"])
 pids_df["path"]=pids_df["path"].str.replace("lustre02","lustre")
 df=df.merge(pids_df, how='left', on='path')
 
-df.to_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cmip6pids_disk.csv.gz", compression="gzip", index=False)
+df.to_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip6pids_disk.csv.gz", compression="gzip", index=False)
 
 df.drop(columns="file_pid", inplace=True)
 
+del pids
+del pids_df
+
 df["path"]=df["path"].str.replace("/mnt/lustre/",'/')
 # Add long name information
 
-catalog_basedir="/mnt/lustre02/work/ik1017/Catalogs"
-c6tables_basedir="/home/k/k204210/cmip6-cmor-tables/Tables/"
+catalog_basedir="/work/ik1017/Catalogs"
+c6tables_basedir="/home/k/k204210/volume/dicad/cdo-incl-cmor/configuration/cmip6/cmip6-cmor-tables/Tables/"
 
 mip_tables={}
 for table in df["table_id"].unique():
@@ -92,8 +96,9 @@ df["uri"]=df["path"]
 # Add opendapurl
 
 def get_opendap(row):
-    filename=row["path"]
-    opendaptrunk="/".join(filename.split("/")[8:])
+    #filename=row["path"]
+    filename=row
+    opendaptrunk="/".join(filename.split("/")[6:])
 #    try:
 #        opendapurl="http://esgf3.dkrz.de/thredds/dodsC/cmip6/"+opendaptrunk
 #        headers=requests.head(opendapurl).headers
@@ -105,7 +110,19 @@ def get_opendap(row):
 #        return np.nan
     return "http://esgf3.dkrz.de/thredds/dodsC/cmip6/"+opendaptrunk
 
-df["opendap_url"] =df.apply(lambda row: get_opendap(row), axis=1)
+# compress
+category_cols = ['activity_id', 'institution_id',
+    'source_id', 'experiment_id', 'member_id', 'table_id', 'variable_id',
+    'grid_label',
+    'project', 'simulation_id', 'grid_id', 'frequency',
+    'time_reduction', 'long_name', 'units', 'realm',
+    'format']
+
+df=df.astype({key: "category"
+    for key in category_cols
+    })
+
+df["opendap_url"] =df["path"].map(lambda row: get_opendap(row))
 
 columns = ['activity_id', 'institution_id',
     'source_id', 'experiment_id', 'member_id', 'table_id', 'variable_id',
@@ -116,4 +133,4 @@ columns = ['activity_id', 'institution_id',
 
 df = df[columns]
 df = df.sort_values(columns, ascending = True).reset_index(drop=True)
-df.to_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz", compression="gzip", index=False)
\ No newline at end of file
+df.to_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cmip6_disk.csv.gz", compression="gzip", index=False)
diff --git a/builder/dkrz_cordex_disk_catalog.py b/builder/dkrz_cordex_disk_catalog.py
index e68e45ea1c7964fbf14f6fc75ab773d933efd483..eb7fc76ea105812427a98101fbe71554501926fc 100755
--- a/builder/dkrz_cordex_disk_catalog.py
+++ b/builder/dkrz_cordex_disk_catalog.py
@@ -206,7 +206,7 @@ invalids
 # In[43]:
 
 
-with open('/home/k/k204210/intake-esm/invalids-cordex.txt', 'w') as f :
+with open('/home/k/k204210/volume/data-infrastructure-services/intake-esm/invalids-cordex.txt', 'w') as f :
     for file in invalids.path.values :
         f.write(file+"\n")
 
@@ -283,8 +283,34 @@ df.loc[:,"format"]="netcdf"
 df.loc[:,"uri"]=df["path"]
 df[["time_min","time_max"]]=df["time_range"].str.split('-',expand=True,n=1)
 
-columns = ["project","product_id", "CORDEX_domain", "institute_id", "driving_model_id", "experiment_id", "member", "model_id", "institution_id", "simulation_id", "source_id", "grid_label","grid_id", "time_reduction", "realm", "level_type",
-           "rcm_version_id", "frequency", "variable_id", "version", "time_range", "time_min", "time_max", "path", "format", "uri", "opendap_url"]
+columns = ["project",
+           "product_id",
+           "CORDEX_domain",
+           "institute_id",
+           "driving_model_id",
+           "experiment_id",
+           "member",
+           "model_id",
+           "rcm_version_id",
+           "frequency", 
+           "variable_id", 
+           "version",
+           "time_range", 
+           "uri",
+           "institution_id",
+           "source_id",
+           "simulation_id",
+           "grid_label",
+           "grid_id",
+           "time_reduction",
+           "realm",
+           "level_type",
+           "time_min", 
+           "time_max", 
+           "path",
+           "format",
+           "opendap_url"]
+
 df = df[columns]
 df = df.sort_values(columns, ascending = True).reset_index(drop=True)
 df.head()
@@ -294,7 +320,7 @@ df.head()
 
 
 #df.to_csv("./mistral-cmip6.csv.gz", compression="gzip", index=False)
-df.to_csv("/home/k/k204210/intake-esm/catalogs/dkrz_cordex_disk.csv.gz", compression="gzip", index=False)
+df.to_csv("/home/k/k204210/volume/data-infrastructure-services/intake-esm/catalogs/dkrz_cordex_disk.csv.gz", compression="gzip", index=False)
 
 
 # In[ ]:
diff --git a/builder/mistral-cmip6_catalog.ipynb b/builder/mistral-cmip6_catalog.ipynb
deleted file mode 100755
index 9082eb72c391dd266da3cf450e24ebd0bd389241..0000000000000000000000000000000000000000
--- a/builder/mistral-cmip6_catalog.ipynb
+++ /dev/null
@@ -1,533 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import fnmatch\n",
-    "import dask.dataframe as dd\n",
-    "from intake.source.utils import reverse_format\n",
-    "import os\n",
-    "import re\n",
-    "import subprocess\n",
-    "from tqdm.auto import tqdm\n",
-    "from pathlib import Path\n",
-    "import shutil\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_file_list(persist_path):\n",
-    "    persist_path = Path(persist_path)\n",
-    "    if persist_path.exists():\n",
-    "        shutil.rmtree(persist_path)\n",
-    "    persist_path.mkdir()\n",
-    "    root = Path(\"/mnt/lustre02/work/ik1017/CMIP6/data/CMIP6\")\n",
-    "    dirs = [x for x in root.iterdir() if x.is_dir()]\n",
-    "    for directory in tqdm(dirs):\n",
-    "        print(directory)\n",
-    "        stem = directory.stem\n",
-    "        f = open(f\"{persist_path}/{stem}.txt\", \"w\")\n",
-    "        cmd = [\"find\", \"-L\", directory.as_posix(), \"-name\", \"*.nc\", \"-perm\", \"-444\"]\n",
-    "        p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=f)\n",
-    "        p.wait()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "persist_path = \"/home/dkrz/k204210/intake-esm/CMIP6_filelist\"\n",
-    "get_file_list(persist_path)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "\n",
-    "## Extract attributes of a file using information from CMIP6 DRS.\n",
-    "\n",
-    "\n",
-    "References\n",
-    " 1. CMIP6 DRS: http://goo.gl/v1drZl\n",
-    " 2. Controlled Vocabularies (CVs) for use in CMIP6:\n",
-    "    https://github.com/WCRP-CMIP/CMIP6_CVs\n",
-    "    \n",
-    "    \n",
-    "Directory structure =\n",
-    "```<mip_era>/\n",
-    "    <activity_id>/\n",
-    "        <institution_id>/\n",
-    "            <source_id>/\n",
-    "                <experiment_id>/\n",
-    "                    <member_id>/\n",
-    "                        <table_id>/\n",
-    "                            <variable_id>/\n",
-    "                                <grid_label>/\n",
-    "                                    <version>\n",
-    "```\n",
-    "file name =\n",
-    "```<variable_id>_<table_id>_<source_id>_<experiment_id >_<member_id>_<grid_label>[_<time_range>].nc```\n",
-    "For time-invariant fields, the last segment (time_range) above is omitted.\n",
-    "Example when there is no sub-experiment: `tas_Amon_GFDL-CM4_historical_r1i1p1f1_gn_196001-199912.nc`\n",
-    "Example with a sub-experiment:   `pr_day_CNRM-CM6-1_dcppA-hindcast_s1960-r2i1p1f1_gn_198001-198412.nc`\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "activity_ids = list(Path(persist_path).rglob(\"*.txt\"))\n",
-    "activity_ids = [activity_id.stem for activity_id in activity_ids]\n",
-    "activity_ids"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = dd.read_csv(f\"{persist_path}/*.txt\", header=None).compute()\n",
-    "df.columns = [\"path\"]\n",
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def _reverse_filename_format(file_basename, filename_template=None, gridspec_template=None):\n",
-    "    \"\"\"\n",
-    "    Uses intake's ``reverse_format`` utility to reverse the string method format.\n",
-    "    Given format_string and resolved_string, find arguments\n",
-    "    that would give format_string.format(arguments) == resolved_string\n",
-    "    \"\"\"\n",
-    "    try:\n",
-    "        return reverse_format(filename_template, file_basename)\n",
-    "    except ValueError:\n",
-    "        try:\n",
-    "            return reverse_format(gridspec_template, file_basename)\n",
-    "        except:\n",
-    "            print(\n",
-    "                f'Failed to parse file: {file_basename} using patterns: {filename_template} and {gridspec_template}'\n",
-    "            )\n",
-    "            return {}\n",
-    "            \n",
-    "def _extract_attr_with_regex(input_str, regex, strip_chars=None):\n",
-    "    pattern = re.compile(regex, re.IGNORECASE)\n",
-    "    match = re.findall(pattern, input_str)\n",
-    "    if match:\n",
-    "        match = max(match, key=len)\n",
-    "        if strip_chars:\n",
-    "            match = match.strip(strip_chars)\n",
-    "\n",
-    "        else:\n",
-    "            match = match.strip()\n",
-    "\n",
-    "        return match\n",
-    "\n",
-    "    else:\n",
-    "        return None\n",
-    "    \n",
-    "\n",
-    "exclude_patterns = ['*/files/*', '*/latest/*']\n",
-    "def _filter_func(path):\n",
-    "    return not any(\n",
-    "        fnmatch.fnmatch(path, pat=exclude_pattern) for exclude_pattern in exclude_patterns\n",
-    "    )\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Add a filter for the retracted files:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "retracted_list=\"/mnt/lustre02/work/ik1017/CMIP6/meta/File.lst/Retracted/CMIP6_retracted_2020-12-10.list\"\n",
-    "df_ret = dd.read_csv(retracted_list, header=None).compute()\n",
-    "df_ret[0]=df_ret[0].str.split(\"/\").str[1:].str.join(\"/\")\n",
-    "#\n",
-    "df[\"path\"]=df[\"path\"].str.split(\"/\").str[7:].str.join(\"/\")\n",
-    "df = df[~df[\"path\"].isin(df_ret[0])].reset_index(drop=True)\n",
-    "df[\"path\"]=\"/mnt/lustre02/work/ik1017/CMIP6/data/\"+df[\"path\"].astype(str)\n",
-    "#\n",
-    "del df_ret"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "files = df.path.tolist()\n",
-    "filelist = list(filter(_filter_func, files))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(filelist)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_attrs(filepath):\n",
-    "    basename = os.path.basename(filepath)\n",
-    "    dirname = os.path.dirname(filepath)\n",
-    "    filename_template = '{variable_id}_{table_id}_{source_id}_{experiment_id}_{member_id}_{grid_label}_{time_range}.nc'\n",
-    "\n",
-    "    gridspec_template = (\n",
-    "                '{variable_id}_{table_id}_{source_id}_{experiment_id}_{member_id}_{grid_label}.nc'\n",
-    "            )\n",
-    "\n",
-    "    f = _reverse_filename_format(\n",
-    "            basename, filename_template=filename_template, gridspec_template=gridspec_template\n",
-    "        )\n",
-    "\n",
-    "    fileparts = {}\n",
-    "    fileparts.update(f)\n",
-    "    parent = os.path.dirname(filepath).strip('/')\n",
-    "    parent_split = parent.split(f\"/{fileparts['source_id']}/\")\n",
-    "    part_1 = parent_split[0].strip('/').split('/')\n",
-    "    grid_label = parent.split(f\"/{fileparts['variable_id']}/\")[1].strip('/').split('/')[0]\n",
-    "    frequency = parent.split(f\"/{fileparts['variable_id']}/\")[1].strip('/').split('/')[0]\n",
-    "    fileparts['grid_label'] = grid_label\n",
-    "    fileparts['frequency'] = frequency\n",
-    "    fileparts['activity_id'] = part_1[-2]\n",
-    "    fileparts['institution_id'] = part_1[-1]\n",
-    "    version_regex = r'v\\d{4}\\d{2}\\d{2}|v\\d{1}'\n",
-    "    version = _extract_attr_with_regex(parent, regex=version_regex) or 'v0'\n",
-    "    fileparts['version'] = version\n",
-    "    fileparts['path'] = filepath\n",
-    "    return fileparts "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "entries = list(map(get_attrs, filelist))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "entries[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(entries)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df1 = pd.DataFrame(entries)\n",
-    "df1.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Some entries are invalid\n",
-    "invalids = df1[~df1.activity_id.isin(activity_ids)]\n",
-    "df = df1[df1.activity_id.isin(activity_ids)]\n",
-    "invalids"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open('/home/dkrz/k204210/intake-esm/invalids-cmip6.txt', 'w') as f :\n",
-    "    for file in invalids.path.values :\n",
-    "        f.write(file+\"\\n\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "invalids.path.tolist()\n",
-    "\n",
-    "## Keep latest version"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Pick the latest versions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#df = df1\n",
-    "grpby = list(set(df.columns.tolist()) - {'path', 'version', 'time_range'})\n",
-    "groups = df.groupby(grpby)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "idx_to_remove = []\n",
-    "for _, group in groups:\n",
-    "    if group.version.nunique() > 1:\n",
-    "        recentVersion=group.sort_values(by=['version'], ascending=False)[\"version\"].iloc[0]\n",
-    "        idx_to_remove.extend(group[group[\"version\"]!= recentVersion].index[:].values.tolist())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(idx_to_remove)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open('/home/dkrz/k204210/intake-esm/oldVersions.txt', 'w') as f:\n",
-    "    for file in df.path[idx_to_remove].values:\n",
-    "        f.write(file+\"\\n\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = df.drop(index=idx_to_remove)\n",
-    "len(df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df[\"dcpp_init_year\"] = df.member_id.map(lambda x: float(x.split(\"-\")[0][1:] if x.startswith(\"s\") else np.nan))\n",
-    "df[\"member_id\"] = df[\"member_id\"].map(lambda x: x.split(\"-\")[-1] if x.startswith(\"s\") else x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Add PIDs and OpenDAPs to catalog"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#def get_pid(row):\n",
-    "#    file=row[\"path\"]\n",
-    "#    try:\n",
-    "#        filepid = !ncdump -h {file} | grep \"tracking_id =\" | cut -d '\"' -f 2 | cut -d ':' -f 2\n",
-    "#        return filepid\n",
-    "#    except:\n",
-    "#        return np.nan"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_opendap(row):\n",
-    "    filename=row[\"path\"]\n",
-    "    opendaptrunk=\"/\".join(filename.split(\"/\")[8:])\n",
-    "    try:\n",
-    "        opendapurl=\"http://esgf3.dkrz.de/thredds/dodsC/cmip6/\"+opendaptrunk\n",
-    "        return opendapurl\n",
-    "    except:\n",
-    "        return np.nan"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#df[\"pid\"] = df.apply(lambda row: get_pid(row), axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df[\"opendap_url\"] =df.apply(lambda row: get_opendap(row), axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "columns = [\"activity_id\", \"institution_id\", \"source_id\", \"experiment_id\", \"member_id\", \"table_id\", \"variable_id\",\n",
-    "           \"grid_label\", \"dcpp_init_year\", \"version\", \"time_range\", \"path\", \"opendap_url\"]\n",
-    "df = df[columns]\n",
-    "df = df.sort_values(columns, ascending = True).reset_index(drop=True)\n",
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.to_csv(\"/home/dkrz/k204210/intake-esm/mistral-cmip6.csv.gz\", compression=\"gzip\", index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#len(df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python [conda env:root] *",
-   "language": "python",
-   "name": "conda-root-py"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}