Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
cloudify
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
data-infrastructure-services
cloudify
Commits
3df9df7b
Commit
3df9df7b
authored
4 months ago
by
Fabian Wachsmann
Browse files
Options
Downloads
Patches
Plain Diff
Updated patches
parent
9c935536
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
patches/eerie.conf
+16
-7
16 additions, 7 deletions
patches/eerie.conf
patches/intakeplugin.py
+188
-0
188 additions, 0 deletions
patches/intakeplugin.py
with
204 additions
and
7 deletions
patches/eerie.conf
100644 → 100755
+
16
−
7
View file @
3df9df7b
#limit_conn_zone $binary_remote_addr zone=addr:10m;
#limit_conn_zone $binary_remote_addr zone=addr:10m;
limit_req_zone
$
binary_remote_addr
zone
=
two
:
10
m
rate
=
128
r
/
s
;
#
limit_req_zone $binary_remote_addr zone=two:10m rate=128r/s;
limit_req_zone
$
binary_remote_addr
zone
=
one
:
10
m
rate
=
8
r
/
s
;
limit_req_zone
$
binary_remote_addr
zone
=
one
:
10
m
rate
=
6
r
/
s
;
server
{
server
{
listen
80
default_server
;
listen
80
default_server
;
...
@@ -23,7 +23,9 @@ server {
...
@@ -23,7 +23,9 @@ server {
# Load configuration files for the default server block.
# Load configuration files for the default server block.
# include /etc/nginx/default.d/*.conf;
# include /etc/nginx/default.d/*.conf;
location
= / {
return
301
https
://
swift
.
dkrz
.
de
/
v1
/
dkrz_7fa6baba
-
db43
-
4
d12
-
a295
-
8
e3ebb1a01ed
/
apps
/
stac
-
browser
/
index
.
html
;
}
location
/ {
location
/ {
proxy_no_cache
1
;
proxy_no_cache
1
;
proxy_cache_bypass
1
;
proxy_cache_bypass
1
;
...
@@ -32,11 +34,18 @@ server {
...
@@ -32,11 +34,18 @@ server {
proxy_read_timeout
3600
;
proxy_read_timeout
3600
;
proxy_pass
http
://
eerie
.
cloud
.
dkrz
.
de
:
9000
;
proxy_pass
http
://
eerie
.
cloud
.
dkrz
.
de
:
9000
;
# limit_conn addr 4;
limit_req
zone
=
one
burst
=
1000
;
}
}
location
~* /
cmor
/ {
proxy_no_cache
1
;
proxy_cache_bypass
1
;
proxy_set_header
Host
$
host
;
proxy_set_header
X
-
Real
-
IP
$
remote_addr
;
location
~* /
kerchunk
/ {
proxy_read_timeout
3600
;
rewrite
^/
cmor
/(.*)$ /$
1
break
;
proxy_pass
http
://
136
.
172
.
32
.
38
;
}
location
~* /
zarr
/ {
proxy_no_cache
1
;
proxy_no_cache
1
;
proxy_cache_bypass
1
;
proxy_cache_bypass
1
;
proxy_set_header
Host
$
host
;
proxy_set_header
Host
$
host
;
...
@@ -45,7 +54,7 @@ server {
...
@@ -45,7 +54,7 @@ server {
proxy_read_timeout
3600
;
proxy_read_timeout
3600
;
proxy_pass
http
://
eerie
.
cloud
.
dkrz
.
de
:
9000
;
proxy_pass
http
://
eerie
.
cloud
.
dkrz
.
de
:
9000
;
# limit_conn addr 4;
# limit_conn addr 4;
limit_req
zone
=
two
burst
=
1000
;
limit_req
zone
=
one
burst
=
1000
;
}
}
error_page
404
/
404
.
html
;
error_page
404
/
404
.
html
;
...
...
This diff is collapsed.
Click to expand it.
patches/intakeplugin.py
0 → 100755
+
188
−
0
View file @
3df9df7b
import
logging
from
copy
import
deepcopy
as
copy
from
typing
import
Sequence
import
yaml
from
fastapi
import
APIRouter
,
Depends
,
Request
,
Response
from
starlette.routing
import
NoMatchFound
from
xpublish.plugins
import
Dependencies
,
Plugin
,
hookimpl
from
xpublish.utils.api
import
DATASET_ID_ATTR_KEY
logger
=
logging
.
getLogger
(
'
intake_catalog
'
)
def
get_dataset_id
(
ds
,
url
):
xpublish_id
=
ds
.
attrs
.
get
(
DATASET_ID_ATTR_KEY
)
cf_dataset_id
=
"
.
"
.
join
(
[
x
for
x
in
[
ds
.
attrs
.
get
(
'
naming_authority
'
),
ds
.
attrs
.
get
(
'
id
'
)
]
if
x
]
)
dataset_id_by_url
=
url
.
split
(
'
/
'
)[
-
2
]
dataset_id_options
=
[
xpublish_id
,
dataset_id_by_url
,
cf_dataset_id
,
'
dataset
'
]
return
next
(
x
for
x
in
dataset_id_options
if
x
)
def
get_zarr_source
(
xpublish_id
,
dataset
,
request
):
url
=
''
try
:
from
xpublish.plugins.included.zarr
import
ZarrPlugin
# noqa
url
=
request
.
url_for
(
"
get_zarr_metadata
"
)
except
NoMatchFound
:
# On multi-dataset servers add the dataset_id to the route
url
=
request
.
url_for
(
"
get_zarr_metadata
"
,
dataset_id
=
xpublish_id
)
# Convert url object from <class 'starlette.datastructures.URL'> to a string
url
=
str
(
url
)
# Remove .zmetadata from the URL to get the root zarr URL
url
=
url
.
replace
(
"
/.zmetadata
"
,
""
)
dataset_id_by_url
=
url
.
split
(
'
/
'
)[
-
2
]
l_consolidated
=
True
#if "native" in url:
# l_consolidated=False
# if "remap" in url:
# url = "reference::"+'/'.join(url.split('/')[0:-3])+"/static/kerchunks2/"+dataset_id_by_url+".json"
# l_consolidated=False
if
not
url
:
return
{}
return
{
'
driver
'
:
'
zarr
'
,
'
description
'
:
dataset
.
attrs
.
get
(
'
summary
'
,
''
),
'
args
'
:
{
'
consolidated
'
:
l_consolidated
,
'
urlpath
'
:
url
.
replace
(
"
http://
"
,
"
https://
"
)
}
}
class
IntakePlugin
(
Plugin
):
"""
Adds an Intake catalog endpoint
"""
name
:
str
=
'
intake_catalog
'
dataset_metadata
:
dict
=
dict
()
app_router_prefix
:
str
=
'
/intake
'
app_router_tags
:
Sequence
[
str
]
=
[
'
intake
'
]
dataset_router_prefix
:
str
=
''
dataset_router_tags
:
Sequence
[
str
]
=
[
'
intake
'
]
@hookimpl
def
app_router
(
self
,
deps
:
Dependencies
):
"""
Register an application level router for app level intake catalog
"""
router
=
APIRouter
(
prefix
=
self
.
app_router_prefix
,
tags
=
self
.
app_router_tags
)
def
get_request
(
request
:
Request
)
->
str
:
return
request
@router.get
(
"
.yaml
"
,
summary
=
"
Root intake catalog
"
)
def
get_root_catalog
(
request
=
Depends
(
get_request
),
dataset_ids
=
Depends
(
deps
.
dataset_ids
)
):
data
=
{
'
metadata
'
:
{
'
source
'
:
'
Served via `xpublish-intake`
'
,
'
access_url
'
:
str
(
request
.
url
).
replace
(
"
http://
"
,
"
https://
"
),
}
}
if
dataset_ids
:
# data['sources'] = {
# d: {
# 'description': self.dataset_metadata.get(d, {}).get('description', ''),
# 'driver': 'intake.catalog.local.YAMLFileCatalog',
# 'metadata': self.dataset_metadata.get(d, {}),
# 'args': {
# 'path': str(request.url_for('get_dataset_catalog', dataset_id=d)).replace("http://","https://")
# }
# }
# for d in dataset_ids
data
[
'
sources
'
]
=
{
d
:
{
'
description
'
:
self
.
dataset_metadata
.
get
(
d
,
{}).
get
(
'
description
'
,
''
),
'
driver
'
:
'
zarr
'
,
'
metadata
'
:
self
.
dataset_metadata
.
get
(
d
,
{}),
'
args
'
:
{
'
urlpath
'
:
str
(
request
.
url_for
(
'
get_dataset_catalog
'
,
dataset_id
=
d
)).
replace
(
"
http://
"
,
"
https://
"
).
replace
(
"
catalog.yaml
"
,
"
{{ method }}
"
),
'
consolidated
'
:
True
},
'
parameters
'
:
dict
(
method
=
dict
(
allowed
=
[
"
kerchunk
"
,
"
zarr
"
],
default
=
"
kerchunk
"
,
type
=
"
str
"
,
description
=
"
server-side loading method
"
)
)
}
for
d
in
dataset_ids
}
else
:
data
[
'
sources
'
]
=
{
'
dataset
'
:
{
'
description
'
:
self
.
dataset_metadata
.
get
(
'
default
'
,
{}).
get
(
'
description
'
,
''
),
'
driver
'
:
'
intake.catalog.local.YAMLFileCatalog
'
,
'
metadata
'
:
self
.
dataset_metadata
.
get
(
'
default
'
,
{}),
'
args
'
:
{
'
path
'
:
str
(
request
.
url_for
(
'
get_dataset_catalog
'
)).
replace
(
"
http://
"
,
"
https://
"
)
}
}
}
return
Response
(
yaml
.
dump
(
data
),
media_type
=
"
text/yaml
"
)
return
router
@hookimpl
def
dataset_router
(
self
,
deps
:
Dependencies
):
router
=
APIRouter
(
prefix
=
self
.
dataset_router_prefix
,
tags
=
list
(
self
.
dataset_router_tags
))
def
get_request
(
request
:
Request
)
->
str
:
return
request
@router.get
(
'
/catalog.yaml
'
,
summary
=
"
Dataset intake catalog
"
)
def
get_dataset_catalog
(
request
=
Depends
(
get_request
),
dataset
=
Depends
(
deps
.
dataset
),
):
xpublish_id
=
get_dataset_id
(
dataset
,
str
(
request
.
url
))
sources
=
{
'
zarr
'
:
get_zarr_source
(
xpublish_id
,
dataset
,
request
),
}
if
"
source
"
in
dataset
.
encoding
:
sources
.
update
(
{
'
kerchunk
'
:
copy
(
sources
[
'
zarr
'
])
}
)
sources
[
'
kerchunk
'
][
'
args
'
][
'
urlpath
'
]
=
sources
[
'
kerchunk
'
][
'
args
'
][
'
urlpath
'
].
replace
(
'
zarr
'
,
'
kerchunk
'
)
data
=
{
'
name
'
:
xpublish_id
,
'
metadata
'
:
{
'
source
'
:
'
Served via `xpublish-intake`
'
,
'
access_url
'
:
str
(
request
.
url
).
replace
(
"
http://
"
,
"
https://
"
),
},
'
sources
'
:
{
f
'
{
xpublish_id
}
-
{
k
}
'
:
v
for
k
,
v
in
sources
.
items
()
if
v
}
}
return
Response
(
yaml
.
dump
(
data
),
media_type
=
"
text/yaml
"
)
return
router
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment