From 5be1b6ba868267f2bcff0a15e288b1e0855ef7d1 Mon Sep 17 00:00:00 2001 From: k204229 <lucio-eceiza@dkrz.de> Date: Thu, 3 Apr 2025 19:43:11 +0200 Subject: [PATCH] separate config parameters --- src/config.py | 76 +++++++++++++++++++++++++++++++++++ src/converter.py | 102 ++++++++++------------------------------------- 2 files changed, 97 insertions(+), 81 deletions(-) create mode 100644 src/config.py diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..d24b539 --- /dev/null +++ b/src/config.py @@ -0,0 +1,76 @@ +from pathlib import Path +from typing import Dict, List + +# Base directories +BASE_DIR: Path = Path(__file__).resolve().parent +csv_file: Path = BASE_DIR.parent / "Tables/original_tables/ct_ecmwf.rc" +json_output_path: Path = BASE_DIR.parent / "Tables/era5-cmor-tables/Tables" +search_dirs: List[Path] = [ + BASE_DIR.parent / "Tables/source_tables/obs4MIPs-cmor-tables/Tables", + BASE_DIR.parent / "Tables/source_tables/cmip6-cmor-tables/Tables" +] + +# Frequency priority +frequency_priority: Dict[str, List[str]] = { + "1hr": [ + "A1hr", "A3hr", "A6hr", # Atmospheric hourly tables + "E1hr", "E3hr", "E6hrZ", # Earth system / energy hourly + "E1hrClimMon", "E3hrPt", # Other energy system hourly + "CF3hr", "CFsubhr", "Esubhr", # High-res and subhourly + "3hr", "6hrLev", "6hrPlev", "6hrPlevPt", # More coarse-hourly or pressure-level + "AERhr" # Aerosol-specific hourly + ], + "day": [ + "Aday", "Eday", "EdayZ", # Standard and energy/dynamics daily + "CFday", # High-res daily + "SIday", # Sea ice daily + "Oday", # Ocean daily + "AERday", # Aerosol daily + "day" # Generic fallback + ], + "mon": [ + "Amon", "Lmon", "Omon", # Atmosphere, land, ocean monthly + "Emon", "EmonZ", # Energy system monthly + "CFmon", # High-res monthly + "SImon", # Sea ice monthly + "AERmon", "AERmonZ", # Aerosol monthly + "ImonAnt", "ImonGre", "LImon" # Ice-specific monthly + ], + "fx": [ + "fx", "Ofx", "Efx", "IfxAnt", "IfxGre" # Fixed fields (land, ocean, earth system, ice) + ] +} + +# Level groupings +level_categories: Dict[str, List[str]] = { + "sfc": ["sfc_an", "sfc_fc"], + "sfc_land": ["sfc_an_land", "sfc_fc_land"], + "pl": ["pl_an", "pl_fc"], + "ml": ["ml_an"] +} + +# Realm → prefix +realm_prefix_map: Dict[str, str] = { + "aerosol": "AER", + "atmos": "A", + "atmosChem": "AER", + "ice": "I", + "land": "L", + "landIce": "LI", + "ocean": "O", + "seaIce": "SI" +} + +# Approximate intervals in days +approx_interval_map: Dict[str, float] = { + "1hr": round(1 / 24, 5), # 0.04167 + "day": 1.00000, + "mon": 30.00000, + "fx": 0.00000 +} + +# Number of levels +level_number: Dict[str, int] = { + "pl": 37, + "ml": 137 +} diff --git a/src/converter.py b/src/converter.py index 8c3deae..d6b2e51 100644 --- a/src/converter.py +++ b/src/converter.py @@ -3,81 +3,23 @@ import json import pandas as pd from io import StringIO from pathlib import Path -from typing import Optional, Dict, List +from typing import Optional, Union, Dict, List from datetime import datetime import re import glob +from config import ( + csv_file, + json_output_path, + search_dirs, + frequency_priority, + level_categories, + approx_interval_map, + realm_prefix_map, + level_number, +) today_date: str = datetime.today().strftime("%d %B %Y") -BASE_DIR: Path = Path(__file__).resolve().parent -csv_file: Path = BASE_DIR.parent / "Tables/original_tables/ct_ecmwf.rc" -json_output_path: Path = BASE_DIR.parent / "Tables/era5-cmor-tables/Tables" -search_dirs: List[Path] = [ - BASE_DIR.parent / "Tables/source_tables/obs4MIPs-cmor-tables/Tables", - BASE_DIR.parent / "Tables/source_tables/cmip6-cmor-tables/Tables" -] - -frequency_priority: Dict[str, List[str]] = { - "1hr": [ - "A1hr", "A3hr", "A6hr", # Atmospheric hourly tables - "E1hr", "E3hr", "E6hrZ", # Earth system / energy hourly - "E1hrClimMon", "E3hrPt", # Other energy system hourly - "CF3hr", "CFsubhr", "Esubhr", # High-res and subhourly - "3hr", "6hrLev", "6hrPlev", "6hrPlevPt", # More coarse-hourly or pressure-level - "AERhr" # Aerosol-specific hourly - ], - "day": [ - "Aday", "Eday", "EdayZ", # Standard and energy/dynamics daily - "CFday", # High-res daily - "SIday", # Sea ice daily - "Oday", # Ocean daily - "AERday", # Aerosol daily - "day" # Generic fallback - ], - "mon": [ - "Amon", "Lmon", "Omon", # Atmosphere, land, ocean monthly - "Emon", "EmonZ", # Energy system monthly - "CFmon", # High-res monthly - "SImon", # Sea ice monthly - "AERmon", "AERmonZ", # Aerosol monthly - "ImonAnt", "ImonGre", "LImon" # Ice-specific monthly - ], - "fx": [ - "fx", "Ofx", "Efx", "IfxAnt", "IfxGre" # Fixed fields (land, ocean, earth system, ice) - ] -} - -level_categories: Dict[str, List[str]] = { - "sfc": ["sfc_an", "sfc_fc"], - "sfc_land": ["sfc_an_land", "sfc_fc_land"], - "pl": ["pl_an", "pl_fc"], - "ml": ["ml_an"] -} - -approx_interval_map: Dict[str, float] = { - "1hr": round(1 / 24, 5), # 0.04167 - "day": 1.00000, - "mon": 30.00000, - "fx": 0.00000 -} - -realm_prefix_map: Dict[str, str] = { - "aerosol": "AER", - "atmos": "A", - "atmosChem": "AER", - "ice": "I", - "land": "L", - "landIce": "LI", - "ocean": "O", - "seaIce": "SI" -} - -level_number: Dict[str, int] = { - "pl": 37, - "ml": 137 -} - def _determine_level_category(level_type: str) -> str: for category, values in level_categories.items(): if level_type in values: @@ -325,12 +267,11 @@ def _add_areacella(grouped_json: dict, search_dirs: List[str]) -> None: Adds areacella into fx_sfc group if not already present. Pulls the entry from matching source table, and adds minimal required metadata. """ - print("in areacella") fx_sfc_keys: List[tuple[str, str]] = [("fx", "sfc"), ("fx", "sfc_land")] if all(key not in grouped_json or "areacella" in grouped_json[key] for key in fx_sfc_keys): return - matched = _find_best_matching_variable("areacella", "fx", search_dirs) + matched: Optional[Dict] = _find_best_matching_variable("areacella", "fx", search_dirs) if not matched: print("âš ï¸ Warning: 'areacella' not found in any fx tables.") return @@ -361,23 +302,22 @@ def _add_areacella(grouped_json: dict, search_dirs: List[str]) -> None: grouped_json[fx_sfc_key]["areacella"] = {**matched, **extra_metadata} print("✅ added areacella into fx_sfc.") -def _ensure_list(val): +def _ensure_list(val) -> Optional[List[str]]: if val is None: return None return val if isinstance(val, list) else [val] def csv_to_cmor_json( - csv_filepath=csv_file, - json_output_path=json_output_path, - var=None, - freq=None, - ltype=None, + csv_filepath: Path = csv_file, + json_output_path: Path = json_output_path, + var: Optional[Union[str, List[str]]] = None, + freq: Optional[Union[str, List[str]]] = None, + ltype: Optional[Union[str, List[str]]] = None, clean_output: bool = False ) -> None: - - var = _ensure_list(var) - freq = _ensure_list(freq) - ltype = _ensure_list(ltype) + var: Optional[List[str]] = _ensure_list(var) + freq: Optional[List[str]] = _ensure_list(freq) + ltype: Optional[List[str]] = _ensure_list(ltype) if clean_output: for file in glob.glob(os.path.join(json_output_path, "*.json")): -- GitLab