Skip to content
Snippets Groups Projects
Commit 15f99089 authored by Etor Lucio Eceiza's avatar Etor Lucio Eceiza
Browse files

WIP: test for csv to json tool

parent eb797d5c
No related branches found
No related tags found
No related merge requests found
......@@ -8,6 +8,7 @@
*.log
.idea*
conda_env/
**/*.xlsx
**/*.sw[pco]
**/*.*.sw[pco]
**/.ipynb_checkpoints*
......
import pandas as pd
import json
import csv
import sys, os, shutil
from pathlib import Path
def excel_to_csv(excel_path, csv_path, sheet_name=0, field_separator="|"):
df = pd.read_excel(excel_path, sheet_name=sheet_name)
tables_dir: str = f"{os.path.dirname(os.path.abspath(__file__))}/../Tables"
csv_file: str = f"{tables_dir}/original_tables/ct_ecmwf.rc"
cmip6_tables: str = f"{tables_dir}/source_tables/cmip6-cmor-tables/Tables"
obs_tables: str = f"{tables_dir}/source_tables/obs4MIPs-cmor-tables/Tables"
era5_tables: str = f"{tables_dir}/era5-cmor-tables/Tables"
header_info: dict[str, str] = {
"Conventions":"CF-1.7 ODS-2.1",
"approx_interval":"0.0416666666", # should change depending on day, month, year
"cmor_version":"3.5", # tbd
"data_specs_version":"2.1.0", # tbd
"generic_levels":"",
"int_missing_value":"-999", # tbd
"mip_era":"CMIP6",
"missing_value":"1e20",
"product":"model-output",
"realm":"atmos", # should change depending on the realm
"table_date":"18 November 2020", # tbd
"table_id":"Table obs4MIPs_A1hr" # should relate to the filename
}
def excel_to_csv(excel_path, csv_path, sheet_name=0, field_separator="|") -> Any:
df: pd.DataFrame = pd.read_excel(excel_path, sheet_name=sheet_name)
df.to_csv(csv_path, sep=field_separator, index=False)
return csv_path
def csv_to_json(csv_path, json_path):
with open(csv_path, encoding='utf-8') as csvf:
csv_reader = csv.DictReader(csvf)
data = [row for row in csv_reader]
def csv_to_excel(csv_path, excel_path, sheet_name="cmor_table", field_separator="|") -> str:
df: pd.DataFrame = pd.read_csv(csv_path, sep=field_separator)
df.to_excel(excel_path, sheet_name=sheet_name, index=False)
return excel_path
def csv_to_cmor_json(csv_filepath=csv_file, json_output_path=era5_tables, default_cell_measures="area: areacella", frequency="mon"):
with open(csv_filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Find the header line
header_line_idx = next(idx for idx, line in enumerate(lines) if line.startswith("#CCC|"))
# Extract headers
headers = lines[header_line_idx].strip().lstrip('#').split('|')
# Read data after header
data_lines = lines[header_line_idx + 1:]
# Use pandas to read CSV from the string
from io import StringIO
df = pd.read_csv(StringIO(''.join(data_lines)), sep='|', names=headers, engine='python')
json_filename = f"ERA5_{frequency.capitalize()}.json"
json_filepath = f"{json_output_path}/{json_filename}"
# Initialize the structure for the JSON
cmor_json = {
"Header": {
"Conventions": "CF-1.7 ODS-2.1",
"approx_interval": "0.0416666666",
"cmor_version": "3.5",
"data_specs_version": "2.1.0",
"generic_levels": "",
"int_missing_value": "-999",
"mip_era": "CMIP6",
"missing_value": "1e20",
"product": "model-output",
"realm": "atmos",
"table_date": "18 November 2020",
"table_id": f"Table obs4MIPs_{frequency.capitalize()}"
},
"variable_entry": {}
}
# Iterate over dataframe rows to fill variable entries
for _, row in df.iterrows():
var_name = row["CMPAR"]
cmfactor = row["CMFACT"]
comment = row["COMMENT"] if pd.notna(row["COMMENT"]) else ""
# Determine positivity
if pd.notna(row["CMUNIT"]) and "W m-2" in str(row["CMUNIT"]):
if "-" in str(cmfactor):
positive = "down"
else:
positive = "up"
else:
positive: Literal[''] = ""
# Cell_methods based on frequency
if frequency == "1hr":
dimensions = "longitude latitude time"
cell_methods = "area: time: mean"
elif frequency == "day":
dimensions = "longitude latitude time"
cell_methods = "area: time: mean"
else: # monthly or other defaults
dimensions = "longitude latitude time"
cell_methods = "area: time: mean"
# Construct variable entry
cmor_json["variable_entry"][var_name] = {
"cell_measures": default_cell_measures,
"cell_methods": cell_methods,
"comment": comment,
"dimensions": dimensions,
"frequency": frequency,
"long_name": row["CMLNAME"],
"modeling_realm": row["REALM"],
"ok_max_mean_abs": "",
"ok_min_mean_abs": "",
"out_name": var_name,
"positive": positive,
"standard_name": row["CFNAME"],
"type": "real",
"units": row["CMUNIT"],
"valid_max": "",
"valid_min": "",
}
# Write JSON file
with open(json_filepath, 'w', encoding='utf-8') as json_file:
json.dump(cmor_json, json_file, indent=4)
print(f"JSON file successfully created at {json_output_path}")
# Example usage:
csv_to_cmor_json(
csv_filepath='path_to_your_csv_file.csv',
json_output_path='cmor_variables.json',
default_cell_measures="area: areacella",
frequency="mon"
)
with open(json_path, 'w', encoding='utf-8') as jsonf:
json.dump(data, jsonf, indent=2)
return json_path
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment