Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Philipp Sommer
BSRN-conversion
Commits
d796f8a0
Commit
d796f8a0
authored
Jun 23, 2020
by
Philipp Sommer
Browse files
Outsourced formatting into separate function
parent
c819fe02
Changes
1
Show whitespace changes
Inline
Side-by-side
convert-stations.py
View file @
d796f8a0
...
...
@@ -74,41 +74,7 @@ def log_progress(iterator, total=None):
print
(
f
"
\n
Finished at
{
t1
}
. Time needed:
{
td
:
1.3
f
}
minutes"
)
args
=
parser
.
parse_args
()
with
open
(
args
.
input
)
as
f
:
files
=
sorted
(
map
(
str
.
strip
,
f
.
readlines
()))
outdir
=
args
.
output
mapping
=
pd
.
read_csv
(
args
.
mapping
,
index_col
=
0
)
meta
=
xr
.
open_dataset
(
args
.
meta
)
if
args
.
source
is
not
None
:
with
open
(
args
.
source
)
as
f
:
source_files
=
list
(
map
(
str
.
strip
,
f
.
readlines
()))
else
:
source_files
=
files
[:]
ids
=
defaultdict
(
list
)
now
=
dt
.
datetime
.
now
().
isoformat
()
for
i
,
(
path
,
source
)
in
log_progress
(
enumerate
(
zip
(
files
,
source_files
)),
len
(
files
)):
# read BSRN data file
base
=
osp
.
splitext
(
path
)[
0
]
stationid
=
osp
.
basename
(
base
.
split
(
'_'
)[
0
])
output
=
base
+
'.nc'
ids
[
stationid
].
append
(
output
)
if
not
args
.
combine_only
and
(
args
.
overwrite
or
not
osp
.
exists
(
output
)):
df
=
pd
.
read_csv
(
path
,
'
\t
'
)
ds
=
df
.
to_xarray
()
def
format_dataset
(
ds
,
source
):
for
key
in
list
(
ds
):
var
=
ds
[
key
]
var
.
attrs
[
'bsrn_name'
]
=
key
...
...
@@ -125,9 +91,10 @@ for i, (path, source) in log_progress(
ds
=
ds
.
set_index
(
index
=
'time'
).
rename
(
index
=
'time'
)
ds
=
ds
.
expand_dims
(
'stationid'
)
ds
=
ds
.
expand_dims
(
'stationid'
,
axis
=
1
)
station_meta
=
meta
.
sel
(
stationid
=
[
stationid
])
station_meta
=
meta
.
isel
(
stationid
=
np
.
where
(
meta
.
station
==
stationid
)[
0
])
for
key
in
list
(
station_meta
):
if
station_meta
[
key
].
isnull
().
all
():
del
station_meta
[
key
]
...
...
@@ -172,27 +139,97 @@ for i, (path, source) in log_progress(
if
'institution'
in
ds
:
ds
.
attrs
[
'institution'
]
=
ds
[
'institution'
].
values
[
0
]
for
attr
in
[
'institution'
,
'station'
]:
if
attr
in
ds
:
ds
.
attrs
[
attr
]
=
str
(
ds
[
attr
].
values
[
0
])
if
'station_name'
in
ds
:
ds
.
attrs
[
'station'
]
=
ds
[
'station_name'
].
values
[
0
]
if
'time'
in
ds
:
ds
[
'time'
]
=
ds
[
'time'
].
copy
(
data
=
pd
.
to_datetime
(
ds
[
'time'
]))
if
'units'
in
ds
[
'time'
].
attrs
:
ds
[
'time'
].
encoding
[
'units'
]
=
ds
[
'time'
].
attrs
.
pop
(
'units'
)
ds
.
to_netcdf
(
output
,
encoding
=
{
key
:
{
'zlib'
:
True
,
'complevel'
:
4
}
for
key
in
ds
.
variables
})
return
ds
args
=
parser
.
parse_args
()
with
open
(
args
.
input
)
as
f
:
files
=
sorted
(
map
(
str
.
strip
,
f
.
readlines
()))
outdir
=
args
.
output
mapping
=
pd
.
read_csv
(
args
.
mapping
,
index_col
=
0
)
meta
=
xr
.
open_dataset
(
args
.
meta
)
if
args
.
source
is
not
None
:
with
open
(
args
.
source
)
as
f
:
source_files
=
list
(
map
(
str
.
strip
,
f
.
readlines
()))
else
:
source_files
=
files
[:]
ids
=
defaultdict
(
list
)
now
=
dt
.
datetime
.
now
().
isoformat
()
for
i
,
(
path
,
source
)
in
enumerate
(
zip
(
files
,
source_files
)):
# read BSRN data file
base
=
osp
.
splitext
(
path
)[
0
]
stationid
=
osp
.
basename
(
base
.
split
(
'_'
)[
0
])
output
=
base
+
'.nc'
ids
[
stationid
].
append
((
path
,
source
,
base
,
output
))
for
stationid
,
files
in
ids
.
items
():
print
(
f
"Concatenating files for
{
stationid
}
"
)
ds
=
xr
.
open_mfdataset
(
files
,
combine
=
'nested'
,
join
=
'override'
,
data_vars
=
'minimal'
,
coords
=
'minimal'
,
concat_dim
=
'time'
)
name
=
meta
.
sel
(
stationid
=
stationid
).
station
.
values
[()]
output
=
osp
.
join
(
outdir
,
name
+
'.nc'
)
if
not
osp
.
exists
(
output
)
or
args
.
overwrite
:
ds
.
to_netcdf
(
output
)
full_df
=
None
name
=
meta
.
isel
(
stationid
=
np
.
where
(
meta
.
station
==
stationid
)[
0
][
0
]).
station
.
values
[()]
print
(
"Processing %i files of station %s"
%
(
len
(
files
),
name
))
files
.
sort
()
for
path
,
source
,
base
,
output
in
log_progress
(
files
):
df
=
pd
.
read_csv
(
path
,
'
\t
'
)
full_df
=
df
if
full_df
is
None
else
pd
.
concat
(
[
full_df
,
df
],
ignore_index
=
True
,
sort
=
False
)
ds
=
df
.
to_xarray
()
ds
=
format_dataset
(
ds
,
source
)
if
not
args
.
combine_only
and
(
args
.
overwrite
or
not
osp
.
exists
(
output
)):
encoding
=
{
key
:
{
'zlib'
:
True
,
'complevel'
:
4
}
for
key
in
ds
.
variables
}
encoding
[
'time'
][
'dtype'
]
=
float
ds
.
to_netcdf
(
output
,
encoding
=
encoding
)
if
full_df
is
not
None
:
ds
=
full_df
.
to_xarray
()
ds
=
format_dataset
(
ds
,
source
)
t0
=
str
(
ds
.
time
.
min
().
dt
.
strftime
(
'%Y-%m-%d'
).
values
)
t1
=
str
(
ds
.
time
.
max
().
dt
.
strftime
(
'%Y-%m-%d'
).
values
)
name
=
ds
[
'station_name'
].
values
[
0
].
split
(
','
)[
0
].
replace
(
' '
,
'_'
).
lower
()
ds
=
ds
.
sortby
(
'time'
)
full_output
=
osp
.
join
(
outdir
,
f
'BSRN-stationdata.
{
name
}
.
{
t0
}
-
{
t1
}
.nc'
)
encoding
=
{
key
:
{
'zlib'
:
True
,
'complevel'
:
4
}
for
key
in
ds
.
variables
}
encoding
[
'time'
][
'dtype'
]
=
float
ds
.
to_netcdf
(
full_output
,
encoding
=
encoding
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment