add verbose, and progress bars

49a33cf9 · Etor Lucio Eceiza · a693dde0 · 49a33cf9 · 49a33cf9 · 49a33cf9
Commit 49a33cf9 authored 1 week ago by Etor Lucio Eceiza
--- a/setup.py
+++ b/setup.py
@@ -19,6 +19,7 @@ setup(
        "cfchecker>=4.1.0",
        "netCDF4>=1.6.0",
        "numpy>=1.20.0",
+        "tqdm>=4.64.1",
    ],
    extras_require={
        "dev": [

--- a/src/era5_tables/cli.py
+++ b/src/era5_tables/cli.py
@@ -63,6 +63,14 @@ def main():
        help="Clean output JSON dir before generation",
    )

+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        default=False,
+        help="Print each variable being processed",
+    )
+
    args = parser.parse_args()

    if args.csv_to_json:
@@ -76,8 +84,9 @@ def main():
            var=args.var,
            freq=args.freq,
            ltype=args.ltype,
+            verbose=args.verbose,
        )
-        harmonize_variables(json_dir=json_path)
+        harmonize_variables(json_dir=json_path, verbose=args.verbose)

    elif args.clean:
        if not args.json:

--- a/src/era5_tables/converter.py
+++ b/src/era5_tables/converter.py
@@ -8,6 +8,7 @@ from pathlib import Path
 from typing import Any, Dict, List, Literal, Optional, Union

 import pandas as pd
+from tqdm import tqdm

 from config import (
    approx_interval_map,
@@ -415,12 +416,33 @@ def _build_cmor_entry(
    }


-def _add_extra_variables(grouped_json: dict, search_dirs: List[Path]) -> None:
+def _add_extra_variables(
+    grouped_json: dict, search_dirs: List[Path], verbose: bool = False
+) -> None:
    """
    Adds extra variables defined in config.extra_variables into grouped_json,
    merging matched info and filling in configured metadata.
+
+    Parameters:
+        grouped_json (dict): A dictionary containing the current grouped
+            configuration of variables with their metadata.
+        search_dirs (List[Path]): List of directory paths to search for matching
+            variable metadata in external tables.
+        verbose (bool, optional): Flag to control the verbosity of the function's
+            output. If True, additional information is printed. Default is False.
+
+    Returns:
+        None: The function modifies the grouped_json in place.
    """
-    for var_name, meta in extra_variables.items():
+    iterable = extra_variables.items()
+    if not verbose:
+        iterable = tqdm(
+            iterable,
+            total=len(extra_variables.items()),
+            desc="➕ Adding extra variables",
+            colour="magenta",
+        )
+    for var_name, meta in iterable:
        frequencies = meta.get("frequencies", [])
        levels = meta.get("levels", [])
        configured_comment = meta.get("comment", "")
@@ -479,8 +501,8 @@ def _add_extra_variables(grouped_json: dict, search_dirs: List[Path]) -> None:
                    **matched,
                    **extra_metadata,
                }
-
-                print(f"➕ Added '{var_name}' ➤ {key}")
+                if verbose:
+                    print(f"➕ Added '{var_name}' ➤ {key}")


 def _str_to_list(val) -> Optional[List[str]]:
@@ -505,6 +527,7 @@ def csv_to_cmor_json(
    freq: Optional[Union[str, List[str]]] = None,
    ltype: Optional[Union[str, List[str]]] = None,
    clean_output: bool = False,
+    verbose: bool = False,
 ) -> None:
    """
    Main function to convert .rc CSV variable definitions to CMOR JSON files.
@@ -516,6 +539,7 @@ def csv_to_cmor_json(
        freq (Optional[str or List[str]]): Frequencies to include.
        ltype (Optional[str or List[str]]): Level types to include.
        clean_output (bool): Whether to clean output directory before writing.
+        verbose (bool): Whether to show per-variable output.
    """

    var = _str_to_list(var)
@@ -534,7 +558,16 @@ def csv_to_cmor_json(

    grouped_json: dict[tuple[str, str], dict[str, dict]] = {}

-    for _, row in df.iterrows():
+    iterable = df.iterrows()
+    if not verbose:
+        iterable = tqdm(
+            iterable,
+            total=len(df),
+            desc="🔄 Processing variables",
+            colour="blue",
+        )
+
+    for _, row in iterable:
        var_name: str = row["CMPAR"]
        time_representation: list[str] = [
            t.strip().upper()
@@ -562,10 +595,11 @@ def csv_to_cmor_json(
            for frequency in applicable_frequencies:
                if freq and frequency not in freq:
                    continue
-                print(
-                    f"🔄 Processing variable: {var_name:15} | {frequency:4} "
-                    f"| Level: {level_group}"
-                )
+                if verbose:
+                    print(
+                        f"🔄 Processing variable: {var_name:15} | {frequency:4} | "
+                        f"Level: {level_group}"
+                    )
                key: tuple[str, str] = (frequency, level_group)
                matched: dict[Any, Any] | None = _find_best_matching_variable(
                    var_name, frequency, search_dirs
@@ -575,8 +609,16 @@ def csv_to_cmor_json(
                )
                grouped_json.setdefault(key, {})[var_name] = cmor_entry

-    _add_extra_variables(grouped_json, search_dirs)
-    for (frequency, level_group), variable_entry in grouped_json.items():
+    _add_extra_variables(grouped_json, search_dirs, verbose)
+    iterable = grouped_json.items()
+    if not verbose:
+        iterable = tqdm(
+            iterable,
+            total=len(grouped_json.items()),
+            desc="💾 Writting JSON files",
+            colour="green",
+        )
+    for (frequency, level_group), variable_entry in iterable:
        if "land" in level_group:
            level_group = level_group.replace("_land", "")
            filename = f"ERA5Land_{frequency}_{level_group}"
@@ -613,15 +655,19 @@ def csv_to_cmor_json(
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(cmor_json, f, indent=4)

-        print(f"💾 Written file: {output_path}")
+        if verbose:
+            print(f"💾 Written file: {output_path}")


-def harmonize_variables(json_dir: Path = json_output_path) -> None:
+def harmonize_variables(
+    json_dir: Path = json_output_path, verbose: bool = False
+) -> None:
    """
    Post-process pass to fill in missing comments from higher-priority tables.

    Parameters:
        json_dir (Path): Directory with generated CMOR JSON files.
+        verbose (bool): Enable verbose output.
    """
    grouped: dict[str, dict[str, dict]] = {}
    for json_file in sorted(json_dir.glob("ERA5*.json")):
@@ -643,7 +689,15 @@ def harmonize_variables(json_dir: Path = json_output_path) -> None:
            ] = content
            grouped[var][frequency]["file"] = json_file

-    for var, freq_dict in grouped.items():
+    iterable = grouped.items()
+    if not verbose:
+        iterable = tqdm(
+            iterable,
+            total=len(grouped.items()),
+            desc="✏️ updating JSON files",
+            colour="yellow",
+        )
+    for var, freq_dict in iterable:
        preferred_comment = None
        for freq in ["mon", "day", "1hr"]:  # Priority order
            content = freq_dict.get(freq, {}).get("content", {})
@@ -659,7 +713,8 @@ def harmonize_variables(json_dir: Path = json_output_path) -> None:
        for freq, info in freq_dict.items():
            content = info["content"]
            if not content.get("comment"):
-                print(f"✏️ Updating comment → {var:15} | {freq:4}")
+                if verbose:
+                    print(f"✏️ Updating comment → {var:15} | {freq:4}")
                content["comment"] = preferred_comment
                filepath = info["file"]
                with open(filepath, "r", encoding="utf-8") as f: