Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
intake-esm_support
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
data-infrastructure-services
intake-esm_support
Commits
f1c3db32
Commit
f1c3db32
authored
2 years ago
by
Fabian Wachsmann
Browse files
Options
Downloads
Patches
Plain Diff
Enabled yamls
parent
b156d337
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Pipeline
#24635
passed
2 years ago
Stage: build
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
builder/data-pool_collect-create-main.ipynb
+24
-2
24 additions, 2 deletions
builder/data-pool_collect-create-main.ipynb
esm-collections/create_cloud_parent.ipynb
+77
-9
77 additions, 9 deletions
esm-collections/create_cloud_parent.ipynb
with
101 additions
and
11 deletions
builder/data-pool_collect-create-main.ipynb
+
24
−
2
View file @
f1c3db32
...
...
@@ -33,7 +33,7 @@
"source": [
"catalogs=[cat \n",
" for cat in os.listdir(TRUNK)\n",
" if cat.endswith('.json')]"
" if cat.endswith('.json')
or ((cat.endswith('.yaml')) & ('dkrz_catalog.yaml' not in cat))
]"
]
},
{
...
...
@@ -84,6 +84,8 @@
"allcols={}\n",
"usecols={}\n",
"for cat in catalogs:\n",
" if \"yaml\" in cat:\n",
" continue\n",
" with open(TRUNK+\"/\"+cat, \"r\") as f:\n",
" cat_json=json.load(f)\n",
" catname='_'.join(cat.split('.')[0].split('_')[1:])\n",
...
...
@@ -156,6 +158,16 @@
"for k in catalogs:\n",
" kentry=k.split('.')[0]\n",
" catname='_'.join(k.split('.')[0].split('_')[1:])\n",
" if \"yaml\" in k:\n",
" if \"monsoon\" in k:\n",
" sources[kentry]=dict(\n",
" args=dict(\n",
" path=\"/pool/data/Catalogs/dkrz_monsoon_disk.yaml\"\n",
" ),\n",
" description=\"Monsoon 2.0\",\n",
" driver=\"yaml_file_cat\"\n",
" )\n",
" continue\n",
" kpath=TRUNK+'/'+k\n",
" with open(kpath,'r') as f:\n",
" content=json.load(f)\n",
...
...
@@ -183,6 +195,16 @@
"sourcesdict={\"sources\":sources}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23fc6855-f1c0-4545-8361-e074450636a3",
"metadata": {},
"outputs": [],
"source": [
"sources"
]
},
{
"cell_type": "code",
"execution_count": null,
...
...
@@ -190,7 +212,7 @@
"metadata": {},
"outputs": [],
"source": [
"with open(\"/home/k/k204210/
volume/data-infrastructure-services/
intake-esm/esm-collections/disk-access/dkrz_catalog.yaml\",\"w\") as f:\n",
"with open(\"/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_catalog.yaml\",\"w\") as f:\n",
" f.write(yaml.dump(header))\n",
" f.write(yaml.dump(sourcesdict))"
]
...
...
%% Cell type:code id:3e5313a4-8e12-4ba3-8517-7ac34896bc6c tags:
```
python
import
os
import
yaml
import
json
import
itertools
```
%% Cell type:code id:6b814955-2ca4-40c1-b4cb-0b70cf58c078 tags:
```
python
TRUNK
=
"
/pool/data/Catalogs
"
TEMPLATETRUNK
=
TRUNK
+
"
/
"
+
"
Templates
"
```
%% Cell type:code id:49f39d3f-383d-4ca4-9387-f718bc0ec3a9 tags:
```
python
catalogs
=
[
cat
for
cat
in
os
.
listdir
(
TRUNK
)
if
cat
.
endswith
(
'
.json
'
)]
if
cat
.
endswith
(
'
.json
'
)
or
((
cat
.
endswith
(
'
.yaml
'
))
&
(
'
dkrz_catalog.yaml
'
not
in
cat
))
]
```
%% Cell type:code id:dd7ad1df-9e36-4a98-9092-f0b91dff543f tags:
```
python
catalogs
```
%% Cell type:code id:b8b86147-b04d-4414-9849-a30287dfaedc tags:
```
python
templates
=
[
cat
for
cat
in
os
.
listdir
(
TEMPLATETRUNK
)
if
cat
.
endswith
(
'
.json
'
)
and
'
cmip6
'
not
in
cat
]
```
%% Cell type:code id:2d81eb6d-cfa6-4e61-91f9-9872b968f8b4 tags:
```
python
coltmpl
=
{}
for
templ
in
templates
:
with
open
(
TEMPLATETRUNK
+
"
/
"
+
templ
,
"
r
"
)
as
f
:
templ_json
=
json
.
load
(
f
)
templname
=
templ
.
split
(
'
.
'
)[
0
].
split
(
'
_
'
)[
-
1
]
coltmpl
[
templname
]
=
[
col
[
"
column_name
"
]
for
col
in
templ_json
[
"
attributes
"
]]
```
%% Cell type:code id:d3b81ddb-9506-4fb9-95c9-bffe3d1129a6 tags:
```
python
coladd
=
{}
allcols
=
{}
usecols
=
{}
for
cat
in
catalogs
:
if
"
yaml
"
in
cat
:
continue
with
open
(
TRUNK
+
"
/
"
+
cat
,
"
r
"
)
as
f
:
cat_json
=
json
.
load
(
f
)
catname
=
'
_
'
.
join
(
cat
.
split
(
'
.
'
)[
0
].
split
(
'
_
'
)[
1
:])
allcols
[
catname
]
=
[
col
[
"
column_name
"
]
for
col
in
cat_json
[
"
attributes
"
]]
if
"
default_columns
"
in
cat_json
.
keys
():
usecols
[
catname
]
=
cat_json
[
"
default_columns
"
]
else
:
usecols
[
catname
]
=
list
(
set
(
list
(
itertools
.
chain
.
from_iterable
(
coltmpl
.
values
()))))
print
(
catname
,
usecols
[
catname
])
add
=
list
(
set
(
allcols
[
catname
])
-
set
(
list
(
itertools
.
chain
.
from_iterable
(
coltmpl
.
values
()))
+
usecols
[
catname
]))
if
add
and
not
"
cloud
"
in
cat
:
coladd
[
catname
]
=
add
```
%% Cell type:code id:da891779-890b-4292-95cf-92a055076923 tags:
```
python
header
=
dict
(
description
=
"
DKRZ master catalog for all /pool/data catalogs available
"
,
plugins
=
dict
(
source
=
[
dict
(
module
=
"
intake-esm
"
)]
),
metadata
=
dict
(
parameters
=
dict
()
)
)
```
%% Cell type:code id:a8e3b581-c2aa-4cb1-9d12-dea50f7ecfdc tags:
```
python
for
k
,
v
in
coltmpl
.
items
():
header
[
"
metadata
"
][
"
parameters
"
][
k
+
"
_columns
"
]
=
dict
(
type
=
"
list[str]
"
,
default
=
v
)
```
%% Cell type:code id:4c6dacbc-b5ce-4a26-bd62-7514626cacfe tags:
```
python
for
k
,
v
in
coladd
.
items
():
header
[
"
metadata
"
][
"
parameters
"
][
"
additional_
"
+
k
+
"
_columns
"
]
=
dict
(
type
=
"
list[str]
"
,
default
=
v
)
```
%% Cell type:code id:bda172ee-8537-4bc2-adf8-5baad765b19b tags:
```
python
sources
=
{}
for
k
in
catalogs
:
kentry
=
k
.
split
(
'
.
'
)[
0
]
catname
=
'
_
'
.
join
(
k
.
split
(
'
.
'
)[
0
].
split
(
'
_
'
)[
1
:])
if
"
yaml
"
in
k
:
if
"
monsoon
"
in
k
:
sources
[
kentry
]
=
dict
(
args
=
dict
(
path
=
"
/pool/data/Catalogs/dkrz_monsoon_disk.yaml
"
),
description
=
"
Monsoon 2.0
"
,
driver
=
"
yaml_file_cat
"
)
continue
kpath
=
TRUNK
+
'
/
'
+
k
with
open
(
kpath
,
'
r
'
)
as
f
:
content
=
json
.
load
(
f
)
descr
=
content
[
"
description
"
]
use_cols
=
usecols
[
catname
]
print
(
catname
,
use_cols
)
if
"
format
"
not
in
use_cols
:
print
(
k
)
use_cols
.
append
(
"
format
"
)
if
"
uri
"
not
in
use_cols
:
print
(
k
)
use_cols
.
append
(
"
uri
"
)
if
"
dyamond
"
in
k
or
"
nextgems
"
in
k
:
use_cols
=
coltmpl
[
"
cataloonie
"
]
sources
[
kentry
]
=
dict
(
args
=
dict
(
esmcol_obj
=
kpath
,
csv_kwargs
=
dict
(
usecols
=
use_cols
)
),
description
=
descr
,
driver
=
[
"
intake.open_esm_datastore
"
]
)
sourcesdict
=
{
"
sources
"
:
sources
}
```
%% Cell type:code id:23fc6855-f1c0-4545-8361-e074450636a3 tags:
```
python
sources
```
%% Cell type:code id:f5f68748-c415-48f4-82cd-37390d2fc51e tags:
```
python
with
open
(
"
/home/k/k204210/
volume/data-infrastructure-services/
intake-esm/esm-collections/disk-access/dkrz_catalog.yaml
"
,
"
w
"
)
as
f
:
with
open
(
"
/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_catalog.yaml
"
,
"
w
"
)
as
f
:
f
.
write
(
yaml
.
dump
(
header
))
f
.
write
(
yaml
.
dump
(
sourcesdict
))
```
%% Cell type:code id:18d86aac-ad20-4c4c-a971-39a9e62c6740 tags:
```
python
``
`
%%
Cell
type
:
code
id
:
c401b8a7
-
1
bb8
-
452e-83
d1
-
207434
ae5c60
tags
:
```
python
```
%% Cell type:code id:aac720d5-2de7-4517-a7ee-6b540b9892fa tags:
```
python
```
%% Cell type:code id:dfd9ffb8-291b-4854-aa10-031f60550211 tags:
```
python
```
...
...
This diff is collapsed.
Click to expand it.
esm-collections/create_cloud_parent.ipynb
+
77
−
9
View file @
f1c3db32
...
...
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count":
1
,
"execution_count":
null
,
"id": "116bb321-0bbb-4e85-804a-8a75932d9e46",
"metadata": {},
"outputs": [],
...
...
@@ -14,17 +14,31 @@
},
{
"cell_type": "code",
"execution_count":
2
,
"execution_count":
null
,
"id": "60ccbb3d-0f0c-467e-bb43-4c025fe3ea05",
"metadata": {},
"outputs": [],
"source": [
"cats=glob.glob(\"disk-access/dkrz*.json\")"
"cats=glob.glob(\"disk-access/dkrz*.json\")\n",
"cats+=[f\n",
" for f in glob.glob(\"disk-access/dkrz*.yaml\")\n",
" if \"dkrz_catalog.yaml\" not in f\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "7dc15e3e-b391-4ac3-8364-40bdb970afec",
"metadata": {},
"outputs": [],
"source": [
"cats"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b98dd45c-aa97-4b36-9fce-28158467e107",
"metadata": {},
"outputs": [],
...
...
@@ -32,6 +46,8 @@
"for cat in cats:\n",
" targetfile=cat.split('/')[1]\n",
" target=\"cloud-access/\"+targetfile\n",
" if \"dyamond\" in cat or \"nextgems\" in cat or \"monsoon\" in cat :\n",
" continue\n",
" with open(cat, \"r\") as f:\n",
" catjson=json.load(f)\n",
" catjson[\"description\"]+=\" which will be loaded from a source file which is in the cloud (see catalog_file)\"\n",
...
...
@@ -42,7 +58,7 @@
},
{
"cell_type": "code",
"execution_count":
5
,
"execution_count":
null
,
"id": "2d1dfe72-e476-45a8-a1d6-2a2b8f3cbbb3",
"metadata": {},
"outputs": [],
...
...
@@ -53,7 +69,59 @@
{
"cell_type": "code",
"execution_count": null,
"id": "2d985cb4-ebc0-4906-9f21-fed42ffcb4a3",
"id": "c93e028c-35e4-4d0f-9f7a-4d39da7c32c0",
"metadata": {},
"outputs": [],
"source": [
"import intake\n",
"cat=intake.open_catalog([\"/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_catalog.yaml\"])\n",
"list(cat)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f12eb1e6-ea16-429b-ac69-a5116b480260",
"metadata": {},
"outputs": [],
"source": [
"cat.dkrz_monsoon_disk.luk1000"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8b061f6-089d-4edd-a9eb-6a2f206acb21",
"metadata": {},
"outputs": [],
"source": [
"cat=intake.open_catalog([\"https://dkrz.de/s/intake\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d1c7b3c-414d-46ad-965d-b5701786d244",
"metadata": {},
"outputs": [],
"source": [
"list(cat)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2999fd8-5505-4d62-965a-fdc69a71b571",
"metadata": {},
"outputs": [],
"source": [
"cat.dkrz_mpige_disk"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf1f02ac-5c49-4148-b73e-940991dab446",
"metadata": {},
"outputs": [],
"source": []
...
...
@@ -61,9 +129,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "
taucenv
",
"display_name": "
Python 3 (based on the module python3/2022.01)
",
"language": "python",
"name": "
taucenv
"
"name": "
python3_2022_01
"
},
"language_info": {
"codemirror_mode": {
...
...
@@ -75,7 +143,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.
10.4
"
"version": "3.
9.9
"
}
},
"nbformat": 4,
...
...
%% Cell type:code id:116bb321-0bbb-4e85-804a-8a75932d9e46 tags:
```
python
import
glob
import
pandas
as
pd
import
json
```
%% Cell type:code id:60ccbb3d-0f0c-467e-bb43-4c025fe3ea05 tags:
```
python
cats
=
glob
.
glob
(
"
disk-access/dkrz*.json
"
)
cats
+=
[
f
for
f
in
glob
.
glob
(
"
disk-access/dkrz*.yaml
"
)
if
"
dkrz_catalog.yaml
"
not
in
f
]
```
%% Cell type:code id:7dc15e3e-b391-4ac3-8364-40bdb970afec tags:
```
python
cats
```
%% Cell type:code id:b98dd45c-aa97-4b36-9fce-28158467e107 tags:
```
python
for
cat
in
cats
:
targetfile
=
cat
.
split
(
'
/
'
)[
1
]
target
=
"
cloud-access/
"
+
targetfile
if
"
dyamond
"
in
cat
or
"
nextgems
"
in
cat
or
"
monsoon
"
in
cat
:
continue
with
open
(
cat
,
"
r
"
)
as
f
:
catjson
=
json
.
load
(
f
)
catjson
[
"
description
"
]
+=
"
which will be loaded from a source file which is in the cloud (see catalog_file)
"
catjson
[
"
catalog_file
"
]
=
f
"
https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/
{
catjson
[
'
id
'
]
}
.csv.gz
"
with
open
(
target
,
"
w
"
)
as
f
:
json
.
dump
(
catjson
,
f
,
indent
=
4
)
```
%% Cell type:code id:2d1dfe72-e476-45a8-a1d6-2a2b8f3cbbb3 tags:
```
python
!
sed
'
s;/pool/data/Catalogs/;https://gitlab.dkrz.de/data-infrastructure-services/intake-esm/-/raw/master/esm-collections/cloud-access/;g
'
disk
-
access
/
dkrz_catalog
.
yaml
>
cloud
-
access
/
dkrz_catalog
.
yaml
```
%% Cell type:code id:2d985cb4-ebc0-4906-9f21-fed42ffcb4a3 tags:
%% Cell type:code id:c93e028c-35e4-4d0f-9f7a-4d39da7c32c0 tags:
```
python
import
intake
cat
=
intake
.
open_catalog
([
"
/home/k/k204210/intake-esm/esm-collections/disk-access/dkrz_catalog.yaml
"
])
list
(
cat
)
```
%% Cell type:code id:f12eb1e6-ea16-429b-ac69-a5116b480260 tags:
```
python
cat
.
dkrz_monsoon_disk
.
luk1000
```
%% Cell type:code id:a8b061f6-089d-4edd-a9eb-6a2f206acb21 tags:
```
python
cat
=
intake
.
open_catalog
([
"
https://dkrz.de/s/intake
"
])
```
%% Cell type:code id:7d1c7b3c-414d-46ad-965d-b5701786d244 tags:
```
python
list
(
cat
)
```
%% Cell type:code id:f2999fd8-5505-4d62-965a-fdc69a71b571 tags:
```
python
cat
.
dkrz_mpige_disk
```
%% Cell type:code id:cf1f02ac-5c49-4148-b73e-940991dab446 tags:
```
python
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment