Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
era5-tables
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bm1159
cosodax
era5-tables
Commits
15f99089
Commit
15f99089
authored
3 weeks ago
by
Etor Lucio Eceiza
Browse files
Options
Downloads
Patches
Plain Diff
WIP: test for csv to json tool
parent
eb797d5c
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
.gitignore
+1
-0
1 addition, 0 deletions
.gitignore
src/converter.py
+128
-10
128 additions, 10 deletions
src/converter.py
with
129 additions
and
10 deletions
.gitignore
+
1
−
0
View file @
15f99089
...
...
@@ -8,6 +8,7 @@
*.log
.idea*
conda_env/
**/*.xlsx
**/*.sw[pco]
**/*.*.sw[pco]
**/.ipynb_checkpoints*
...
...
This diff is collapsed.
Click to expand it.
src/converter.py
+
128
−
10
View file @
15f99089
import
pandas
as
pd
import
json
import
csv
import
sys
,
os
,
shutil
from
pathlib
import
Path
def
excel_to_csv
(
excel_path
,
csv_path
,
sheet_name
=
0
,
field_separator
=
"
|
"
):
df
=
pd
.
read_excel
(
excel_path
,
sheet_name
=
sheet_name
)
tables_dir
:
str
=
f
"
{
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
}
/../Tables
"
csv_file
:
str
=
f
"
{
tables_dir
}
/original_tables/ct_ecmwf.rc
"
cmip6_tables
:
str
=
f
"
{
tables_dir
}
/source_tables/cmip6-cmor-tables/Tables
"
obs_tables
:
str
=
f
"
{
tables_dir
}
/source_tables/obs4MIPs-cmor-tables/Tables
"
era5_tables
:
str
=
f
"
{
tables_dir
}
/era5-cmor-tables/Tables
"
header_info
:
dict
[
str
,
str
]
=
{
"
Conventions
"
:
"
CF-1.7 ODS-2.1
"
,
"
approx_interval
"
:
"
0.0416666666
"
,
# should change depending on day, month, year
"
cmor_version
"
:
"
3.5
"
,
# tbd
"
data_specs_version
"
:
"
2.1.0
"
,
# tbd
"
generic_levels
"
:
""
,
"
int_missing_value
"
:
"
-999
"
,
# tbd
"
mip_era
"
:
"
CMIP6
"
,
"
missing_value
"
:
"
1e20
"
,
"
product
"
:
"
model-output
"
,
"
realm
"
:
"
atmos
"
,
# should change depending on the realm
"
table_date
"
:
"
18 November 2020
"
,
# tbd
"
table_id
"
:
"
Table obs4MIPs_A1hr
"
# should relate to the filename
}
def
excel_to_csv
(
excel_path
,
csv_path
,
sheet_name
=
0
,
field_separator
=
"
|
"
)
->
Any
:
df
:
pd
.
DataFrame
=
pd
.
read_excel
(
excel_path
,
sheet_name
=
sheet_name
)
df
.
to_csv
(
csv_path
,
sep
=
field_separator
,
index
=
False
)
return
csv_path
def
csv_to_json
(
csv_path
,
json_path
):
with
open
(
csv_path
,
encoding
=
'
utf-8
'
)
as
csvf
:
csv_reader
=
csv
.
DictReader
(
csvf
)
data
=
[
row
for
row
in
csv_reader
]
def
csv_to_excel
(
csv_path
,
excel_path
,
sheet_name
=
"
cmor_table
"
,
field_separator
=
"
|
"
)
->
str
:
df
:
pd
.
DataFrame
=
pd
.
read_csv
(
csv_path
,
sep
=
field_separator
)
df
.
to_excel
(
excel_path
,
sheet_name
=
sheet_name
,
index
=
False
)
return
excel_path
def
csv_to_cmor_json
(
csv_filepath
=
csv_file
,
json_output_path
=
era5_tables
,
default_cell_measures
=
"
area: areacella
"
,
frequency
=
"
mon
"
):
with
open
(
csv_filepath
,
'
r
'
,
encoding
=
'
utf-8
'
)
as
f
:
lines
=
f
.
readlines
()
# Find the header line
header_line_idx
=
next
(
idx
for
idx
,
line
in
enumerate
(
lines
)
if
line
.
startswith
(
"
#CCC|
"
))
# Extract headers
headers
=
lines
[
header_line_idx
].
strip
().
lstrip
(
'
#
'
).
split
(
'
|
'
)
# Read data after header
data_lines
=
lines
[
header_line_idx
+
1
:]
# Use pandas to read CSV from the string
from
io
import
StringIO
df
=
pd
.
read_csv
(
StringIO
(
''
.
join
(
data_lines
)),
sep
=
'
|
'
,
names
=
headers
,
engine
=
'
python
'
)
json_filename
=
f
"
ERA5_
{
frequency
.
capitalize
()
}
.json
"
json_filepath
=
f
"
{
json_output_path
}
/
{
json_filename
}
"
# Initialize the structure for the JSON
cmor_json
=
{
"
Header
"
:
{
"
Conventions
"
:
"
CF-1.7 ODS-2.1
"
,
"
approx_interval
"
:
"
0.0416666666
"
,
"
cmor_version
"
:
"
3.5
"
,
"
data_specs_version
"
:
"
2.1.0
"
,
"
generic_levels
"
:
""
,
"
int_missing_value
"
:
"
-999
"
,
"
mip_era
"
:
"
CMIP6
"
,
"
missing_value
"
:
"
1e20
"
,
"
product
"
:
"
model-output
"
,
"
realm
"
:
"
atmos
"
,
"
table_date
"
:
"
18 November 2020
"
,
"
table_id
"
:
f
"
Table obs4MIPs_
{
frequency
.
capitalize
()
}
"
},
"
variable_entry
"
:
{}
}
# Iterate over dataframe rows to fill variable entries
for
_
,
row
in
df
.
iterrows
():
var_name
=
row
[
"
CMPAR
"
]
cmfactor
=
row
[
"
CMFACT
"
]
comment
=
row
[
"
COMMENT
"
]
if
pd
.
notna
(
row
[
"
COMMENT
"
])
else
""
# Determine positivity
if
pd
.
notna
(
row
[
"
CMUNIT
"
])
and
"
W m-2
"
in
str
(
row
[
"
CMUNIT
"
]):
if
"
-
"
in
str
(
cmfactor
):
positive
=
"
down
"
else
:
positive
=
"
up
"
else
:
positive
:
Literal
[
''
]
=
""
# Cell_methods based on frequency
if
frequency
==
"
1hr
"
:
dimensions
=
"
longitude latitude time
"
cell_methods
=
"
area: time: mean
"
elif
frequency
==
"
day
"
:
dimensions
=
"
longitude latitude time
"
cell_methods
=
"
area: time: mean
"
else
:
# monthly or other defaults
dimensions
=
"
longitude latitude time
"
cell_methods
=
"
area: time: mean
"
# Construct variable entry
cmor_json
[
"
variable_entry
"
][
var_name
]
=
{
"
cell_measures
"
:
default_cell_measures
,
"
cell_methods
"
:
cell_methods
,
"
comment
"
:
comment
,
"
dimensions
"
:
dimensions
,
"
frequency
"
:
frequency
,
"
long_name
"
:
row
[
"
CMLNAME
"
],
"
modeling_realm
"
:
row
[
"
REALM
"
],
"
ok_max_mean_abs
"
:
""
,
"
ok_min_mean_abs
"
:
""
,
"
out_name
"
:
var_name
,
"
positive
"
:
positive
,
"
standard_name
"
:
row
[
"
CFNAME
"
],
"
type
"
:
"
real
"
,
"
units
"
:
row
[
"
CMUNIT
"
],
"
valid_max
"
:
""
,
"
valid_min
"
:
""
,
}
# Write JSON file
with
open
(
json_filepath
,
'
w
'
,
encoding
=
'
utf-8
'
)
as
json_file
:
json
.
dump
(
cmor_json
,
json_file
,
indent
=
4
)
print
(
f
"
JSON file successfully created at
{
json_output_path
}
"
)
# Example usage:
csv_to_cmor_json
(
csv_filepath
=
'
path_to_your_csv_file.csv
'
,
json_output_path
=
'
cmor_variables.json
'
,
default_cell_measures
=
"
area: areacella
"
,
frequency
=
"
mon
"
)
with
open
(
json_path
,
'
w
'
,
encoding
=
'
utf-8
'
)
as
jsonf
:
json
.
dump
(
data
,
jsonf
,
indent
=
2
)
return
json_path
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment