Skip to content
Snippets Groups Projects
Commit 49a33cf9 authored by Etor Lucio Eceiza's avatar Etor Lucio Eceiza
Browse files

add verbose, and progress bars

parent a693dde0
No related branches found
No related tags found
No related merge requests found
......@@ -19,6 +19,7 @@ setup(
"cfchecker>=4.1.0",
"netCDF4>=1.6.0",
"numpy>=1.20.0",
"tqdm>=4.64.1",
],
extras_require={
"dev": [
......
......@@ -63,6 +63,14 @@ def main():
help="Clean output JSON dir before generation",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
default=False,
help="Print each variable being processed",
)
args = parser.parse_args()
if args.csv_to_json:
......@@ -76,8 +84,9 @@ def main():
var=args.var,
freq=args.freq,
ltype=args.ltype,
verbose=args.verbose,
)
harmonize_variables(json_dir=json_path)
harmonize_variables(json_dir=json_path, verbose=args.verbose)
elif args.clean:
if not args.json:
......
......@@ -8,6 +8,7 @@ from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Union
import pandas as pd
from tqdm import tqdm
from config import (
approx_interval_map,
......@@ -415,12 +416,33 @@ def _build_cmor_entry(
}
def _add_extra_variables(grouped_json: dict, search_dirs: List[Path]) -> None:
def _add_extra_variables(
grouped_json: dict, search_dirs: List[Path], verbose: bool = False
) -> None:
"""
Adds extra variables defined in config.extra_variables into grouped_json,
merging matched info and filling in configured metadata.
Parameters:
grouped_json (dict): A dictionary containing the current grouped
configuration of variables with their metadata.
search_dirs (List[Path]): List of directory paths to search for matching
variable metadata in external tables.
verbose (bool, optional): Flag to control the verbosity of the function's
output. If True, additional information is printed. Default is False.
Returns:
None: The function modifies the grouped_json in place.
"""
for var_name, meta in extra_variables.items():
iterable = extra_variables.items()
if not verbose:
iterable = tqdm(
iterable,
total=len(extra_variables.items()),
desc="➕ Adding extra variables",
colour="magenta",
)
for var_name, meta in iterable:
frequencies = meta.get("frequencies", [])
levels = meta.get("levels", [])
configured_comment = meta.get("comment", "")
......@@ -479,8 +501,8 @@ def _add_extra_variables(grouped_json: dict, search_dirs: List[Path]) -> None:
**matched,
**extra_metadata,
}
print(f"➕ Added '{var_name}'{key}")
if verbose:
print(f"➕ Added '{var_name}'{key}")
def _str_to_list(val) -> Optional[List[str]]:
......@@ -505,6 +527,7 @@ def csv_to_cmor_json(
freq: Optional[Union[str, List[str]]] = None,
ltype: Optional[Union[str, List[str]]] = None,
clean_output: bool = False,
verbose: bool = False,
) -> None:
"""
Main function to convert .rc CSV variable definitions to CMOR JSON files.
......@@ -516,6 +539,7 @@ def csv_to_cmor_json(
freq (Optional[str or List[str]]): Frequencies to include.
ltype (Optional[str or List[str]]): Level types to include.
clean_output (bool): Whether to clean output directory before writing.
verbose (bool): Whether to show per-variable output.
"""
var = _str_to_list(var)
......@@ -534,7 +558,16 @@ def csv_to_cmor_json(
grouped_json: dict[tuple[str, str], dict[str, dict]] = {}
for _, row in df.iterrows():
iterable = df.iterrows()
if not verbose:
iterable = tqdm(
iterable,
total=len(df),
desc="🔄 Processing variables",
colour="blue",
)
for _, row in iterable:
var_name: str = row["CMPAR"]
time_representation: list[str] = [
t.strip().upper()
......@@ -562,10 +595,11 @@ def csv_to_cmor_json(
for frequency in applicable_frequencies:
if freq and frequency not in freq:
continue
print(
f"🔄 Processing variable: {var_name:15} | {frequency:4} "
f"| Level: {level_group}"
)
if verbose:
print(
f"🔄 Processing variable: {var_name:15} | {frequency:4} | "
f"Level: {level_group}"
)
key: tuple[str, str] = (frequency, level_group)
matched: dict[Any, Any] | None = _find_best_matching_variable(
var_name, frequency, search_dirs
......@@ -575,8 +609,16 @@ def csv_to_cmor_json(
)
grouped_json.setdefault(key, {})[var_name] = cmor_entry
_add_extra_variables(grouped_json, search_dirs)
for (frequency, level_group), variable_entry in grouped_json.items():
_add_extra_variables(grouped_json, search_dirs, verbose)
iterable = grouped_json.items()
if not verbose:
iterable = tqdm(
iterable,
total=len(grouped_json.items()),
desc="💾 Writting JSON files",
colour="green",
)
for (frequency, level_group), variable_entry in iterable:
if "land" in level_group:
level_group = level_group.replace("_land", "")
filename = f"ERA5Land_{frequency}_{level_group}"
......@@ -613,15 +655,19 @@ def csv_to_cmor_json(
with open(output_path, "w", encoding="utf-8") as f:
json.dump(cmor_json, f, indent=4)
print(f"💾 Written file: {output_path}")
if verbose:
print(f"💾 Written file: {output_path}")
def harmonize_variables(json_dir: Path = json_output_path) -> None:
def harmonize_variables(
json_dir: Path = json_output_path, verbose: bool = False
) -> None:
"""
Post-process pass to fill in missing comments from higher-priority tables.
Parameters:
json_dir (Path): Directory with generated CMOR JSON files.
verbose (bool): Enable verbose output.
"""
grouped: dict[str, dict[str, dict]] = {}
for json_file in sorted(json_dir.glob("ERA5*.json")):
......@@ -643,7 +689,15 @@ def harmonize_variables(json_dir: Path = json_output_path) -> None:
] = content
grouped[var][frequency]["file"] = json_file
for var, freq_dict in grouped.items():
iterable = grouped.items()
if not verbose:
iterable = tqdm(
iterable,
total=len(grouped.items()),
desc="✏️ updating JSON files",
colour="yellow",
)
for var, freq_dict in iterable:
preferred_comment = None
for freq in ["mon", "day", "1hr"]: # Priority order
content = freq_dict.get(freq, {}).get("content", {})
......@@ -659,7 +713,8 @@ def harmonize_variables(json_dir: Path = json_output_path) -> None:
for freq, info in freq_dict.items():
content = info["content"]
if not content.get("comment"):
print(f"✏️ Updating comment → {var:15} | {freq:4}")
if verbose:
print(f"✏️ Updating comment → {var:15} | {freq:4}")
content["comment"] = preferred_comment
filepath = info["file"]
with open(filepath, "r", encoding="utf-8") as f:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment