Skip to content
Snippets Groups Projects
Commit f1c3db32 authored by Fabian Wachsmann's avatar Fabian Wachsmann
Browse files

Enabled yamls

parent b156d337
No related branches found
No related tags found
No related merge requests found
Pipeline #24635 passed
%% Cell type:code id:3e5313a4-8e12-4ba3-8517-7ac34896bc6c tags:
``` python
import os
import yaml
import json
import itertools
```
%% Cell type:code id:6b814955-2ca4-40c1-b4cb-0b70cf58c078 tags:
``` python
TRUNK="/pool/data/Catalogs"
TEMPLATETRUNK=TRUNK+"/"+"Templates"
```
%% Cell type:code id:49f39d3f-383d-4ca4-9387-f718bc0ec3a9 tags:
``` python
catalogs=[cat
for cat in os.listdir(TRUNK)
if cat.endswith('.json')]
if cat.endswith('.json') or ((cat.endswith('.yaml')) & ('dkrz_catalog.yaml' not in cat))]
```
%% Cell type:code id:dd7ad1df-9e36-4a98-9092-f0b91dff543f tags:
``` python
catalogs
```
%% Cell type:code id:b8b86147-b04d-4414-9849-a30287dfaedc tags:
``` python
templates=[cat
for cat in os.listdir(TEMPLATETRUNK)
if cat.endswith('.json') and 'cmip6' not in cat]
```
%% Cell type:code id:2d81eb6d-cfa6-4e61-91f9-9872b968f8b4 tags:
``` python
coltmpl={}
for templ in templates:
with open(TEMPLATETRUNK+"/"+templ, "r") as f:
templ_json=json.load(f)
templname=templ.split('.')[0].split('_')[-1]
coltmpl[templname]=[col["column_name"] for col in templ_json["attributes"]]
```
%% Cell type:code id:d3b81ddb-9506-4fb9-95c9-bffe3d1129a6 tags:
``` python
coladd={}
allcols={}
usecols={}
for cat in catalogs:
if "yaml" in cat:
continue
with open(TRUNK+"/"+cat, "r") as f:
cat_json=json.load(f)
catname='_'.join(cat.split('.')[0].split('_')[1:])
allcols[catname]=[col["column_name"] for col in cat_json["attributes"]]
if "default_columns" in cat_json.keys():
usecols[catname]=cat_json["default_columns"]
else:
usecols[catname]=list(set(list(itertools.chain.from_iterable(coltmpl.values()))))
print(catname,usecols[catname])
add=list(set(allcols[catname])-set(list(itertools.chain.from_iterable(coltmpl.values()))+usecols[catname]))
if add and not "cloud" in cat:
coladd[catname]=add
```
%% Cell type:code id:da891779-890b-4292-95cf-92a055076923 tags:
``` python
header=dict(
description="DKRZ master catalog for all /pool/data catalogs available",
plugins=dict(
source=[dict(module="intake-esm")]
),
metadata=dict(
parameters=dict()
)
)
```
%% Cell type:code id:a8e3b581-c2aa-4cb1-9d12-dea50f7ecfdc tags:
``` python
for k,v in coltmpl.items():
header["metadata"]["parameters"][k+"_columns"]=dict(
type="list[str]",
default=v
)
```
%% Cell type:code id:4c6dacbc-b5ce-4a26-bd62-7514626cacfe tags:
``` python
for k,v in coladd.items():
header["metadata"]["parameters"]["additional_"+k+"_columns"]=dict(
type="list[str]",
default=v
)
```
%% Cell type:code id:bda172ee-8537-4bc2-adf8-5baad765b19b tags:
``` python
sources={}
for k in catalogs:
kentry=k.split('.')[0]
catname='_'.join(k.split('.')[0].split('_')[1:])
if "yaml" in k:
if "monsoon" in k:
sources[kentry]=dict(
args=dict(
path="/pool/data/Catalogs/dkrz_monsoon_disk.yaml"
),
description="Monsoon 2.0",
driver="yaml_file_cat"
)
continue
kpath=TRUNK+'/'+k
with open(kpath,'r') as f:
content=json.load(f)
descr=content["description"]
use_cols=usecols[catname]
print(catname, use_cols)
if "format" not in use_cols:
print(k)
use_cols.append("format")
if "uri" not in use_cols:
print(k)
use_cols.append("uri")
if "dyamond" in k or "nextgems" in k:
use_cols=coltmpl["cataloonie"]
sources[kentry]=dict(
args=dict(
esmcol_obj=kpath,
csv_kwargs=dict(
usecols=use_cols
)
),
description=descr,
driver=["intake.open_esm_datastore"]
)
sourcesdict={"sources":sources}
```
%% Cell type:code id:23fc6855-f1c0-4545-8361-e074450636a3 tags:
``` python
sources
```
%% Cell type:code id:f5f68748-c415-48f4-82cd-37390d2fc51e tags:
``` python
with open("/home/k/k204210/volume/data-infrastructure-services/intake-esm/esm-collections/disk-access/dkrz_catalog.yaml","w") as f:
with open("/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_catalog.yaml","w") as f:
f.write(yaml.dump(header))
f.write(yaml.dump(sourcesdict))
```
%% Cell type:code id:18d86aac-ad20-4c4c-a971-39a9e62c6740 tags:
``` python
```
%% Cell type:code id:c401b8a7-1bb8-452e-83d1-207434ae5c60 tags:
``` python
```
%% Cell type:code id:aac720d5-2de7-4517-a7ee-6b540b9892fa tags:
``` python
```
%% Cell type:code id:dfd9ffb8-291b-4854-aa10-031f60550211 tags:
``` python
```
......
%% Cell type:code id:116bb321-0bbb-4e85-804a-8a75932d9e46 tags:
``` python
import glob
import pandas as pd
import json
```
%% Cell type:code id:60ccbb3d-0f0c-467e-bb43-4c025fe3ea05 tags:
``` python
cats=glob.glob("disk-access/dkrz*.json")
cats+=[f
for f in glob.glob("disk-access/dkrz*.yaml")
if "dkrz_catalog.yaml" not in f
]
```
%% Cell type:code id:7dc15e3e-b391-4ac3-8364-40bdb970afec tags:
``` python
cats
```
%% Cell type:code id:b98dd45c-aa97-4b36-9fce-28158467e107 tags:
``` python
for cat in cats:
targetfile=cat.split('/')[1]
target="cloud-access/"+targetfile
if "dyamond" in cat or "nextgems" in cat or "monsoon" in cat :
continue
with open(cat, "r") as f:
catjson=json.load(f)
catjson["description"]+=" which will be loaded from a source file which is in the cloud (see catalog_file)"
catjson["catalog_file"]=f"https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/{catjson['id']}.csv.gz"
with open(target, "w") as f:
json.dump(catjson,f,indent=4)
```
%% Cell type:code id:2d1dfe72-e476-45a8-a1d6-2a2b8f3cbbb3 tags:
``` python
!sed 's;/pool/data/Catalogs/;https://gitlab.dkrz.de/data-infrastructure-services/intake-esm/-/raw/master/esm-collections/cloud-access/;g' disk-access/dkrz_catalog.yaml >cloud-access/dkrz_catalog.yaml
```
%% Cell type:code id:2d985cb4-ebc0-4906-9f21-fed42ffcb4a3 tags:
%% Cell type:code id:c93e028c-35e4-4d0f-9f7a-4d39da7c32c0 tags:
``` python
import intake
cat=intake.open_catalog(["/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_catalog.yaml"])
list(cat)
```
%% Cell type:code id:f12eb1e6-ea16-429b-ac69-a5116b480260 tags:
``` python
cat.dkrz_monsoon_disk.luk1000
```
%% Cell type:code id:a8b061f6-089d-4edd-a9eb-6a2f206acb21 tags:
``` python
cat=intake.open_catalog(["https://dkrz.de/s/intake"])
```
%% Cell type:code id:7d1c7b3c-414d-46ad-965d-b5701786d244 tags:
``` python
list(cat)
```
%% Cell type:code id:f2999fd8-5505-4d62-965a-fdc69a71b571 tags:
``` python
cat.dkrz_mpige_disk
```
%% Cell type:code id:cf1f02ac-5c49-4148-b73e-940991dab446 tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment