Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Philipp Sommer
BSRN-conversion
Commits
d796f8a0
Commit
d796f8a0
authored
Jun 23, 2020
by
Philipp Sommer
Browse files
Outsourced formatting into separate function
parent
c819fe02
Changes
1
Hide whitespace changes
Inline
Side-by-side
convert-stations.py
View file @
d796f8a0
...
...
@@ -74,6 +74,82 @@ def log_progress(iterator, total=None):
print
(
f
"
\n
Finished at
{
t1
}
. Time needed:
{
td
:
1.3
f
}
minutes"
)
def
format_dataset
(
ds
,
source
):
for
key
in
list
(
ds
):
var
=
ds
[
key
]
var
.
attrs
[
'bsrn_name'
]
=
key
try
:
row
=
mapping
.
loc
[
key
]
except
KeyError
:
if
key
not
in
[
'station'
,
'stationid'
]:
del
ds
[
key
]
else
:
for
attr
,
val
in
row
[
row
.
notnull
()].
items
():
if
attr
!=
'name'
:
var
.
attrs
[
attr
]
=
val
ds
=
ds
.
rename
({
key
:
row
[
'name'
]})
ds
=
ds
.
set_index
(
index
=
'time'
).
rename
(
index
=
'time'
)
ds
=
ds
.
expand_dims
(
'stationid'
,
axis
=
1
)
station_meta
=
meta
.
isel
(
stationid
=
np
.
where
(
meta
.
station
==
stationid
)[
0
])
for
key
in
list
(
station_meta
):
if
station_meta
[
key
].
isnull
().
all
():
del
station_meta
[
key
]
ds
.
update
(
station_meta
)
ds
.
attrs
[
'featureType'
]
=
'timeSeries'
ds
.
attrs
[
'Conventions'
]
=
'CF-1.8'
ds
.
attrs
[
'station_id'
]
=
stationid
ds
.
attrs
[
'source'
]
=
"surface observation"
ds
.
attrs
[
'conventionsURL'
]
=
(
'http://www.unidata.ucar.edu/packages/'
'netcdf/conventions.html'
)
ds
.
attrs
[
'download_site'
]
=
"ftp://ftp.bsrn.awi.de/"
ds
.
attrs
[
'station'
]
=
ds
[
'station'
].
values
[
0
]
ds
.
attrs
[
'creation_date'
]
=
'transformation to netCDF: '
+
now
mtime
=
dt
.
datetime
.
fromtimestamp
(
osp
.
getmtime
(
source
)).
isoformat
()
ds
.
attrs
[
'modification_date'
]
=
(
"Modification date of source file %s: %s"
%
(
osp
.
basename
(
source
),
mtime
))
ds
.
attrs
[
'history'
]
=
'
\n
'
.
join
(
[
ds
.
attrs
[
'creation_date'
],
ds
.
attrs
[
'modification_date'
]])
ds
.
attrs
[
'references'
]
=
(
"Driemel, A., Augustine, J., Behrens, K., Colle, S., Cox, C., "
"Cuevas-Agulló, E., Denn, F. M., Duprat, T., Fukuda, M., "
"Grobe, H., Haeffelin, M., Hodges, G., Hyett, N., Ijima, O., "
"Kallis, A., Knap, W., Kustov, V., Long, C. N., Longenecker, D., "
"Lupi, A., Maturilli, M., Mimouni, M., Ntsangwane, L., "
"Ogihara, H., Olano, X., Olefs, M., Omori, M., Passamani, L., "
"Pereira, E. B., Schmithüsen, H., Schumacher, S., Sieger, R., "
"Tamlyn, J., Vogt, R., Vuilleumier, L., Xia, X., Ohmura, A., and "
"König-Langlo, G.: Baseline Surface Radiation Network (BSRN): "
"structure and data description (1992–2017), "
"Earth Syst. Sci. Data, 10, 1491-1501, "
"doi:10.5194/essd-10-1491-2018, 2018."
)
if
'institution'
in
ds
:
ds
.
attrs
[
'institution'
]
=
ds
[
'institution'
].
values
[
0
]
if
'station_name'
in
ds
:
ds
.
attrs
[
'station'
]
=
ds
[
'station_name'
].
values
[
0
]
if
'time'
in
ds
:
ds
[
'time'
]
=
ds
[
'time'
].
copy
(
data
=
pd
.
to_datetime
(
ds
[
'time'
]))
if
'units'
in
ds
[
'time'
].
attrs
:
ds
[
'time'
].
encoding
[
'units'
]
=
ds
[
'time'
].
attrs
.
pop
(
'units'
)
return
ds
args
=
parser
.
parse_args
()
with
open
(
args
.
input
)
as
f
:
...
...
@@ -95,104 +171,65 @@ ids = defaultdict(list)
now
=
dt
.
datetime
.
now
().
isoformat
()
for
i
,
(
path
,
source
)
in
log_progress
(
enumerate
(
zip
(
files
,
source_files
)),
len
(
files
)):
for
i
,
(
path
,
source
)
in
enumerate
(
zip
(
files
,
source_files
)):
# read BSRN data file
base
=
osp
.
splitext
(
path
)[
0
]
stationid
=
osp
.
basename
(
base
.
split
(
'_'
)[
0
])
output
=
base
+
'.nc'
ids
[
stationid
].
append
(
output
)
ids
[
stationid
].
append
((
path
,
source
,
base
,
output
))
for
stationid
,
files
in
ids
.
items
():
full_df
=
None
name
=
meta
.
isel
(
stationid
=
np
.
where
(
meta
.
station
==
stationid
)[
0
][
0
]).
station
.
values
[()]
print
(
"Processing %i files of station %s"
%
(
len
(
files
),
name
))
files
.
sort
()
for
path
,
source
,
base
,
output
in
log_progress
(
files
):
if
not
args
.
combine_only
and
(
args
.
overwrite
or
not
osp
.
exists
(
output
)):
df
=
pd
.
read_csv
(
path
,
'
\t
'
)
full_df
=
df
if
full_df
is
None
else
pd
.
concat
(
[
full_df
,
df
],
ignore_index
=
True
,
sort
=
False
)
ds
=
df
.
to_xarray
()
for
key
in
list
(
ds
):
var
=
ds
[
key
]
var
.
attrs
[
'bsrn_name'
]
=
key
try
:
row
=
mapping
.
loc
[
key
]
except
KeyError
:
if
key
not
in
[
'station'
,
'stationid'
]:
del
ds
[
key
]
else
:
for
attr
,
val
in
row
[
row
.
notnull
()].
items
():
if
attr
!=
'name'
:
var
.
attrs
[
attr
]
=
val
ds
=
ds
.
rename
({
key
:
row
[
'name'
]})
ds
=
ds
.
set_index
(
index
=
'time'
).
rename
(
index
=
'time'
)
ds
=
ds
.
expand_dims
(
'stationid'
)
station_meta
=
meta
.
sel
(
stationid
=
[
stationid
])
for
key
in
list
(
station_meta
):
if
station_meta
[
key
].
isnull
().
all
():
del
station_meta
[
key
]
ds
.
update
(
station_meta
)
ds
.
attrs
[
'featureType'
]
=
'timeSeries'
ds
.
attrs
[
'Conventions'
]
=
'CF-1.8'
ds
.
attrs
[
'station_id'
]
=
stationid
ds
.
attrs
[
'source'
]
=
"surface observation"
ds
.
attrs
[
'conventionsURL'
]
=
(
'http://www.unidata.ucar.edu/packages/'
'netcdf/conventions.html'
)
ds
.
attrs
[
'download_site'
]
=
"ftp://ftp.bsrn.awi.de/"
ds
.
attrs
[
'station'
]
=
ds
[
'station'
].
values
[
0
]
ds
.
attrs
[
'creation_date'
]
=
'transformation to netCDF: '
+
now
mtime
=
dt
.
datetime
.
fromtimestamp
(
osp
.
getmtime
(
source
)).
isoformat
()
ds
.
attrs
[
'modification_date'
]
=
(
"Modification date of source file %s: %s"
%
(
osp
.
basename
(
source
),
mtime
))
ds
.
attrs
[
'history'
]
=
'
\n
'
.
join
(
[
ds
.
attrs
[
'creation_date'
],
ds
.
attrs
[
'modification_date'
]])
ds
.
attrs
[
'references'
]
=
(
"Driemel, A., Augustine, J., Behrens, K., Colle, S., Cox, C., "
"Cuevas-Agulló, E., Denn, F. M., Duprat, T., Fukuda, M., "
"Grobe, H., Haeffelin, M., Hodges, G., Hyett, N., Ijima, O., "
"Kallis, A., Knap, W., Kustov, V., Long, C. N., Longenecker, D., "
"Lupi, A., Maturilli, M., Mimouni, M., Ntsangwane, L., "
"Ogihara, H., Olano, X., Olefs, M., Omori, M., Passamani, L., "
"Pereira, E. B., Schmithüsen, H., Schumacher, S., Sieger, R., "
"Tamlyn, J., Vogt, R., Vuilleumier, L., Xia, X., Ohmura, A., and "
"König-Langlo, G.: Baseline Surface Radiation Network (BSRN): "
"structure and data description (1992–2017), "
"Earth Syst. Sci. Data, 10, 1491-1501, "
"doi:10.5194/essd-10-1491-2018, 2018."
)
if
'institution'
in
ds
:
ds
.
attrs
[
'institution'
]
=
ds
[
'institution'
].
values
[
0
]
for
attr
in
[
'institution'
,
'station'
]:
if
attr
in
ds
:
ds
.
attrs
[
attr
]
=
str
(
ds
[
attr
].
values
[
0
])
if
'time'
in
ds
:
ds
[
'time'
]
=
ds
[
'time'
].
copy
(
data
=
pd
.
to_datetime
(
ds
[
'time'
]))
if
'units'
in
ds
[
'time'
].
attrs
:
ds
[
'time'
].
encoding
[
'units'
]
=
ds
[
'time'
].
attrs
.
pop
(
'units'
)
ds
=
format_dataset
(
ds
,
source
)
ds
.
to_netcdf
(
output
,
encoding
=
{
key
:
{
'zlib'
:
True
,
'complevel'
:
4
}
for
key
in
ds
.
variables
})
if
not
args
.
combine_only
and
(
args
.
overwrite
or
not
osp
.
exists
(
output
)):
encoding
=
{
key
:
{
'zlib'
:
True
,
'complevel'
:
4
}
for
key
in
ds
.
variables
}
encoding
[
'time'
][
'dtype'
]
=
float
ds
.
to_netcdf
(
output
,
encoding
=
encoding
)
if
full_df
is
not
None
:
ds
=
full_df
.
to_xarray
()
ds
=
format_dataset
(
ds
,
source
)
t0
=
str
(
ds
.
time
.
min
().
dt
.
strftime
(
'%Y-%m-%d'
).
values
)
t1
=
str
(
ds
.
time
.
max
().
dt
.
strftime
(
'%Y-%m-%d'
).
values
)
name
=
ds
[
'station_name'
].
values
[
0
].
split
(
','
)[
0
].
replace
(
' '
,
'_'
).
lower
()
ds
=
ds
.
sortby
(
'time'
)
full_output
=
osp
.
join
(
outdir
,
f
'BSRN-stationdata.
{
name
}
.
{
t0
}
-
{
t1
}
.nc'
)
encoding
=
{
key
:
{
'zlib'
:
True
,
'complevel'
:
4
}
for
key
in
ds
.
variables
}
encoding
[
'time'
][
'dtype'
]
=
float
ds
.
to_netcdf
(
full_output
,
encoding
=
encoding
)
for
stationid
,
files
in
ids
.
items
():
print
(
f
"Concatenating files for
{
stationid
}
"
)
ds
=
xr
.
open_mfdataset
(
files
,
combine
=
'nested'
,
join
=
'override'
,
data_vars
=
'minimal'
,
coords
=
'minimal'
,
concat_dim
=
'time'
)
name
=
meta
.
sel
(
stationid
=
stationid
).
station
.
values
[()]
output
=
osp
.
join
(
outdir
,
name
+
'.nc'
)
if
not
osp
.
exists
(
output
)
or
args
.
overwrite
:
ds
.
to_netcdf
(
output
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment