Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • bm1159/cosodax/era5-tables
1 result
Show changes
Commits on Source (2)
......@@ -1037,7 +1037,7 @@
"snc": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean where land time: mean",
"comment": "",
"comment": "Percentage of each grid cell that is occupied by snow that rests on land portion of cell.",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Snow Area Percentage",
......
......@@ -827,7 +827,7 @@
"snd": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean where land time: mean",
"comment": "",
"comment": "where land over land, this is computed as the mean thickness of snow in the land portion of the grid cell (averaging over the entire land portion, including the snow-free fraction). Reported as 0.0 where the land fraction is 0.",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Snow Depth",
......@@ -1067,7 +1067,7 @@
"zmla": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "The atmosphere boundary layer thickness is the 'depth' or 'height' of the (atmosphere) planetary boundary layer.",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Height of Boundary Layer",
......@@ -1217,7 +1217,7 @@
"tdps": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "Dew point temperature is the temperature at which a parcel of air reaches saturation upon being cooled at constant pressure and specific humidity.",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "2m Dewpoint Temperature",
......@@ -1337,7 +1337,7 @@
"rss": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "Net downward shortwave radiation at the surface",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Surface Net Solar Radiation",
......@@ -1367,7 +1367,7 @@
"rls": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "Net longwave surface radiation",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Net Longwave Surface Radiation",
......@@ -1667,7 +1667,7 @@
"xgwdparam": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "Parameterised x-component of gravity wave drag",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Eastward Gravity Wave Drag",
......@@ -1697,7 +1697,7 @@
"ygwdparam": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "Parameterised y- component of gravity wave drag",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Northward Gravity Wave Drag",
......@@ -1787,7 +1787,7 @@
"tasmax": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: maximum",
"comment": "",
"comment": "maximum near-surface (usually, 2 meter) air temperature (add cell_method attribute 'time: max')",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Maximum Near-Surface Air Temperature",
......@@ -1817,7 +1817,7 @@
"tasmin": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: minimum",
"comment": "",
"comment": "minimum near-surface (usually, 2 meter) air temperature (add cell_method attribute 'time: min')",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Minimum Near-Surface Air Temperature",
......@@ -2147,7 +2147,7 @@
"tsn": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean where land time: mean",
"comment": "",
"comment": "This temperature is averaged over all the snow in the grid cell that rests on land or land ice. When computing the time-mean here, the time samples, weighted by the mass of snow on the land portion of the grid cell, are accumulated and then divided by the sum of the weights. Reported as missing in regions free of snow on land.",
"dimensions": "longitude latitude time",
"frequency": "1hr",
"long_name": "Snow Internal Temperature",
......
......@@ -407,7 +407,7 @@
"o3": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "Mole fraction is used in the construction mole_fraction_of_X_in_Y, where X is a material constituent of Y.",
"dimensions": "longitude latitude plev37 time",
"frequency": "day",
"long_name": "Ozone Mass Mixing Ratio",
......
......@@ -467,7 +467,7 @@
"wsgmax10m": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: maximum",
"comment": "",
"comment": "Wind speed gust maximum at 10m above surface",
"dimensions": "longitude latitude time",
"frequency": "day",
"long_name": "Maximum Wind Speed of Gust at 10m",
......@@ -1667,7 +1667,7 @@
"xgwdparam": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "Parameterised x-component of gravity wave drag",
"dimensions": "longitude latitude time",
"frequency": "day",
"long_name": "Eastward Gravity Wave Drag",
......@@ -1697,7 +1697,7 @@
"ygwdparam": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "Parameterised y- component of gravity wave drag",
"dimensions": "longitude latitude time",
"frequency": "day",
"long_name": "Northward Gravity Wave Drag",
......
......@@ -467,7 +467,7 @@
"wsgmax10m": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: maximum within days time: mean over days",
"comment": "",
"comment": "Wind speed gust maximum at 10m above surface",
"dimensions": "longitude latitude time",
"frequency": "mon",
"long_name": "Maximum Wind Speed of Gust at 10m",
......@@ -1067,7 +1067,7 @@
"zmla": {
"cell_measures": "area: areacella",
"cell_methods": "area: mean time: mean",
"comment": "",
"comment": "The atmosphere boundary layer thickness is the 'depth' or 'height' of the (atmosphere) planetary boundary layer.",
"dimensions": "longitude latitude time",
"frequency": "mon",
"long_name": "Height of Boundary Layer",
......
from pathlib import Path
from typing import Dict, List
# Base directories
BASE_DIR: Path = Path(__file__).resolve().parent
csv_file: Path = BASE_DIR.parent / "Tables/original_tables/ct_ecmwf.rc"
json_output_path: Path = BASE_DIR.parent / "Tables/era5-cmor-tables/Tables"
search_dirs: List[Path] = [
BASE_DIR.parent / "Tables/source_tables/obs4MIPs-cmor-tables/Tables",
BASE_DIR.parent / "Tables/source_tables/cmip6-cmor-tables/Tables"
]
# Frequency priority
frequency_priority: Dict[str, List[str]] = {
"1hr": [
"A1hr", "A3hr", "A6hr", # Atmospheric hourly tables
"E1hr", "E3hr", "E6hrZ", # Earth system / energy hourly
"E1hrClimMon", "E3hrPt", # Other energy system hourly
"CF3hr", "CFsubhr", "Esubhr", # High-res and subhourly
"3hr", "6hrLev", "6hrPlev", "6hrPlevPt", # More coarse-hourly or pressure-level
"AERhr" # Aerosol-specific hourly
],
"day": [
"Aday", "Eday", "EdayZ", # Standard and energy/dynamics daily
"CFday", # High-res daily
"SIday", # Sea ice daily
"Oday", # Ocean daily
"AERday", # Aerosol daily
"day" # Generic fallback
],
"mon": [
"Amon", "Lmon", "Omon", # Atmosphere, land, ocean monthly
"Emon", "EmonZ", # Energy system monthly
"CFmon", # High-res monthly
"SImon", # Sea ice monthly
"AERmon", "AERmonZ", # Aerosol monthly
"ImonAnt", "ImonGre", "LImon" # Ice-specific monthly
],
"fx": [
"fx", "Ofx", "Efx", "IfxAnt", "IfxGre" # Fixed fields (land, ocean, earth system, ice)
]
}
# Level groupings
level_categories: Dict[str, List[str]] = {
"sfc": ["sfc_an", "sfc_fc"],
"sfc_land": ["sfc_an_land", "sfc_fc_land"],
"pl": ["pl_an", "pl_fc"],
"ml": ["ml_an"]
}
# Realm → prefix
realm_prefix_map: Dict[str, str] = {
"aerosol": "AER",
"atmos": "A",
"atmosChem": "AER",
"ice": "I",
"land": "L",
"landIce": "LI",
"ocean": "O",
"seaIce": "SI"
}
# Approximate intervals in days
approx_interval_map: Dict[str, float] = {
"1hr": round(1 / 24, 5), # 0.04167
"day": 1.00000,
"mon": 30.00000,
"fx": 0.00000
}
# Number of levels
level_number: Dict[str, int] = {
"pl": 37,
"ml": 137
}
......@@ -3,81 +3,23 @@ import json
import pandas as pd
from io import StringIO
from pathlib import Path
from typing import Optional, Dict, List
from typing import Optional, Union, Dict, List
from datetime import datetime
import re
import glob
from config import (
csv_file,
json_output_path,
search_dirs,
frequency_priority,
level_categories,
approx_interval_map,
realm_prefix_map,
level_number,
)
today_date: str = datetime.today().strftime("%d %B %Y")
BASE_DIR: Path = Path(__file__).resolve().parent
csv_file: Path = BASE_DIR.parent / "Tables/original_tables/ct_ecmwf.rc"
json_output_path: Path = BASE_DIR.parent / "Tables/era5-cmor-tables/Tables"
search_dirs: List[Path] = [
BASE_DIR.parent / "Tables/source_tables/obs4MIPs-cmor-tables/Tables",
BASE_DIR.parent / "Tables/source_tables/cmip6-cmor-tables/Tables"
]
frequency_priority: Dict[str, List[str]] = {
"1hr": [
"A1hr", "A3hr", "A6hr", # Atmospheric hourly tables
"E1hr", "E3hr", "E6hrZ", # Earth system / energy hourly
"E1hrClimMon", "E3hrPt", # Other energy system hourly
"CF3hr", "CFsubhr", "Esubhr", # High-res and subhourly
"3hr", "6hrLev", "6hrPlev", "6hrPlevPt", # More coarse-hourly or pressure-level
"AERhr" # Aerosol-specific hourly
],
"day": [
"Aday", "Eday", "EdayZ", # Standard and energy/dynamics daily
"CFday", # High-res daily
"SIday", # Sea ice daily
"Oday", # Ocean daily
"AERday", # Aerosol daily
"day" # Generic fallback
],
"mon": [
"Amon", "Lmon", "Omon", # Atmosphere, land, ocean monthly
"Emon", "EmonZ", # Energy system monthly
"CFmon", # High-res monthly
"SImon", # Sea ice monthly
"AERmon", "AERmonZ", # Aerosol monthly
"ImonAnt", "ImonGre", "LImon" # Ice-specific monthly
],
"fx": [
"fx", "Ofx", "Efx", "IfxAnt", "IfxGre" # Fixed fields (land, ocean, earth system, ice)
]
}
level_categories: Dict[str, List[str]] = {
"sfc": ["sfc_an", "sfc_fc"],
"sfc_land": ["sfc_an_land", "sfc_fc_land"],
"pl": ["pl_an", "pl_fc"],
"ml": ["ml_an"]
}
approx_interval_map: Dict[str, float] = {
"1hr": round(1 / 24, 5), # 0.04167
"day": 1.00000,
"mon": 30.00000,
"fx": 0.00000
}
realm_prefix_map: Dict[str, str] = {
"aerosol": "AER",
"atmos": "A",
"atmosChem": "AER",
"ice": "I",
"land": "L",
"landIce": "LI",
"ocean": "O",
"seaIce": "SI"
}
level_number: Dict[str, int] = {
"pl": 37,
"ml": 137
}
def _determine_level_category(level_type: str) -> str:
for category, values in level_categories.items():
if level_type in values:
......@@ -325,12 +267,11 @@ def _add_areacella(grouped_json: dict, search_dirs: List[str]) -> None:
Adds areacella into fx_sfc group if not already present.
Pulls the entry from matching source table, and adds minimal required metadata.
"""
print("in areacella")
fx_sfc_keys: List[tuple[str, str]] = [("fx", "sfc"), ("fx", "sfc_land")]
if all(key not in grouped_json or "areacella" in grouped_json[key] for key in fx_sfc_keys):
return
matched = _find_best_matching_variable("areacella", "fx", search_dirs)
matched: Optional[Dict] = _find_best_matching_variable("areacella", "fx", search_dirs)
if not matched:
print("⚠️ Warning: 'areacella' not found in any fx tables.")
return
......@@ -361,23 +302,22 @@ def _add_areacella(grouped_json: dict, search_dirs: List[str]) -> None:
grouped_json[fx_sfc_key]["areacella"] = {**matched, **extra_metadata}
print("✅ added areacella into fx_sfc.")
def _ensure_list(val):
def _ensure_list(val) -> Optional[List[str]]:
if val is None:
return None
return val if isinstance(val, list) else [val]
def csv_to_cmor_json(
csv_filepath=csv_file,
json_output_path=json_output_path,
var=None,
freq=None,
ltype=None,
csv_filepath: Path = csv_file,
json_output_path: Path = json_output_path,
var: Optional[Union[str, List[str]]] = None,
freq: Optional[Union[str, List[str]]] = None,
ltype: Optional[Union[str, List[str]]] = None,
clean_output: bool = False
) -> None:
var = _ensure_list(var)
freq = _ensure_list(freq)
ltype = _ensure_list(ltype)
var: Optional[List[str]] = _ensure_list(var)
freq: Optional[List[str]] = _ensure_list(freq)
ltype: Optional[List[str]] = _ensure_list(ltype)
if clean_output:
for file in glob.glob(os.path.join(json_output_path, "*.json")):
......@@ -459,6 +399,47 @@ def csv_to_cmor_json(
print(f"✅ Written: {output_path}")
def harmonize_variables(json_dir: Path = json_output_path) -> None:
grouped: dict[str, dict[str, dict]] = {}
for json_file in sorted(json_dir.glob("ERA5*.json")):
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
frequency = data["Header"]["frequency"] = data["variable_entry"].values().__iter__().__next__().get("frequency")
variable_entry = data["variable_entry"]
for var, content in variable_entry.items():
grouped.setdefault(var, {}).setdefault(frequency, {})["content"] = content
grouped[var][frequency]["file"] = json_file
for var, freq_dict in grouped.items():
preferred_comment = None
for freq in ["mon", "day", "1hr"]: # Priority order
content = freq_dict.get(freq, {}).get("content", {})
if (
content.get("source_table", "").endswith(".json")
and content.get("comment", "").strip()
):
preferred_comment = content["comment"]
break
if not preferred_comment:
continue
for freq, info in freq_dict.items():
content = info["content"]
if not content.get("comment"):
print(f"➕ Adding comment to {var} ({freq})")
content["comment"] = preferred_comment
# Write back to file
filepath = info["file"]
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
data["variable_entry"][var] = content
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=4)
if __name__ == "__main__":
csv_to_cmor_json()
\ No newline at end of file
csv_to_cmor_json()
harmonize_variables()
\ No newline at end of file