diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67236fedcaa5a5826927b1123e4d311e34d099fd..ac31c7fc009546ecb6e5b22f0ab656da6a3155c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,13 +1,21 @@ repos: + - repo: https://github.com/pre-commit/mirrors-isort + rev: v5.10.1 # ↠Last known good tag + hooks: + - id: isort + args: ["--profile=black"] + - repo: https://github.com/psf/black rev: 25.1.0 hooks: - id: black + args: ["--target-version=py311", "--line-length=79"] - repo: https://github.com/pycqa/flake8 - rev: 7.1.2 + rev: 7.2.0 hooks: - id: flake8 + args: [--max-line-length=88] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.15.0 diff --git a/Makefile b/Makefile index 58f95ed8c2ff2b2c159859699daacd1c506b3d00..d1f7a73a37af5867f6499ef5813499071a73c919 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,9 @@ format: isort --profile=black src black -t py311 -l 79 src +precommit: + pre-commit run --all-files + lint: mypy --install-types --non-interactive isort --check-only --profile=black src diff --git a/setup.py b/setup.py index c74514d524fdd8202ed9c80166cfdc97aa7abd7d..8c20d91fb7a1279c4f3a4a69ab6a44fd5cd25e09 100644 --- a/setup.py +++ b/setup.py @@ -1,30 +1,30 @@ -from setuptools import setup, find_packages +from setuptools import find_packages, setup setup( - name='era5-tables', - version='0.1', - author='Etor E. Lucio Eceiza', - author_email='lucio-eceiza@dkrz.de', - license='MIT', - url='https://gitlab.dkrz.de/bm1159/cosodax/era5-tables', - packages=find_packages(where='src'), + name="era5-tables", + version="0.1", + author="Etor E. Lucio Eceiza", + author_email="lucio-eceiza@dkrz.de", + license="MIT", + url="https://gitlab.dkrz.de/bm1159/cosodax/era5-tables", + packages=find_packages(where="src"), install_requires=[ - 'pandas>=2.0.0', - 'openpyxl>=3.1.0', - 'cfchecker>=4.1.0', - 'netCDF4>=1.6.0', - 'numpy>=1.20.0' + "pandas>=2.0.0", + "openpyxl>=3.1.0", + "cfchecker>=4.1.0", + "netCDF4>=1.6.0", + "numpy>=1.20.0", ], extras_require={ - 'dev': [ - 'pytest', - 'mypy', - 'black', - 'flake8', - 'ipython', - 'pre-commit', - 'isort' + "dev": [ + "pytest", + "mypy", + "black", + "flake8", + "ipython", + "pre-commit", + "isort", ] }, - python_requires='>=3.9', + python_requires=">=3.9", ) diff --git a/src/converter.py b/src/converter.py index d730b7cde9d32746fb065e04043e6f67415c0bad..adf5cf670e1af7a2e277a7a0b9ee76eec195518e 100644 --- a/src/converter.py +++ b/src/converter.py @@ -20,12 +20,14 @@ from config import ( search_dirs, ) +GridType = Literal["redGG-N1280", "redGG-N320", "specG-T639", ""] today_date: str = datetime.today().strftime("%d %B %Y") def _determine_level_category(level_type: str) -> str: """ - Determines the level category (e.g., 'sfc', 'pl', etc.) for a given level type. + Determines the level category (e.g., 'sfc', 'pl', etc.) for a given level + type. Parameters: level_type (str): The level type string from the dataset. @@ -115,7 +117,8 @@ def _read_csv(csv_filepath: str) -> pd.DataFrame: csv_filepath (str): Path to the .rc file. Returns: - pd.DataFrame: Cleaned DataFrame with column names and filled missing values. + pd.DataFrame: Cleaned DataFrame with column names and filled missing + values. """ with open(csv_filepath, "r", encoding="utf-8") as f: lines: List[str] = f.readlines() @@ -136,8 +139,8 @@ def _get_mapping_source(cmip_val, source_table: str = "") -> str: Resolves the mapping origin for a variable (e.g., 'ECMWF', 'CMIP6'). The mapping goes as follows in order of priority: Obs4MIPs, CMIP6, CF, ECMWF When possible it looks at Obs4MIPs tables, then CMIP6, if not it relies on - the CF CV that was manually written in the csv file. Finally for new variables - we resort to ECMWF naming. + the CF CV that was manually written in the csv file. Finally for new + variables we resort to ECMWF naming. Parameters: cmip_val (int or str): Mapping indicator value. @@ -178,7 +181,6 @@ def _filter_level_grid(row: dict, level_type: str) -> dict: Returns: dict: Dictionary with keys 'level_type' and 'orig_grid'. """ - all_level_types: list[str] = [ lt.strip() for lt in row.get("LTYPE", "").split(",") if lt.strip() ] @@ -189,25 +191,20 @@ def _filter_level_grid(row: dict, level_type: str) -> dict: if level_type not in all_level_types: return {"level_type": "", "orig_grid": ""} + grid: GridType = "" + if level_type.startswith("sfc"): - grid: Literal["redGG-N1280"] | Literal["redGG-N320"] = ( - "redGG-N1280" if "land" in level_type else "redGG-N320" - ) + grid = "redGG-N1280" if "land" in level_type else "redGG-N320" elif level_type.startswith("ml"): if "specG-T639" in all_grids: grid = "specG-T639" elif "redGG-N320" in all_grids: grid = "redGG-N320" - else: - grid = "" elif level_type.startswith("pl"): grid = "redGG-N320" - else: - grid = "" - return {"level_type": level_type, "orig_grid": grid} @@ -247,8 +244,8 @@ def _get_dimensions( matched: Optional[Dict], frequency: str, level_type: str ) -> str: """ - Build dimensions string, resolving appropriate vertical level and ensuring no - duplicates. Replaces time-like variants (e.g. time1, time2) with + Build dimensions string, resolving appropriate vertical level and ensuring + no duplicates. Replaces time-like variants (e.g. time1, time2) with standard 'time'. """ default_dims: ( @@ -262,7 +259,7 @@ def _get_dimensions( matched.get("dimensions", default_dims) if matched else default_dims ) dims_parts = dims.split() - dims_parts: list[str] = [ + dims_parts = [ "time" if re.match(r"time\d*$", dim) else dim for dim in dims_parts ] dims_parts = ["longitude" if dim == "site" else dim for dim in dims_parts] @@ -317,9 +314,7 @@ def _get_cell_info( elif "min" in var_name.lower(): extreme = "minimum" if frequency == "mon": - time_method: str = ( - f"time: {extreme} within days time: mean over days" - ) + time_method = f"time: {extreme} within days time: mean over days" else: time_method = f"time: {extreme}" if "ocean" in realm_lc or "seaice" in realm_lc: @@ -485,7 +480,7 @@ def _ensure_list(val) -> Optional[List[str]]: def csv_to_cmor_json( - csv_filepath: Path = csv_file, + csv_filepath: str = csv_file, json_output_path: Path = json_output_path, var: Optional[Union[str, List[str]]] = None, freq: Optional[Union[str, List[str]]] = None, @@ -504,9 +499,9 @@ def csv_to_cmor_json( clean_output (bool): Whether to clean output directory before writing. """ - var: Optional[List[str]] = _ensure_list(var) - freq: Optional[List[str]] = _ensure_list(freq) - ltype: Optional[List[str]] = _ensure_list(ltype) + var = _ensure_list(var) + freq = _ensure_list(freq) + ltype = _ensure_list(ltype) if clean_output: for file in glob.glob(os.path.join(json_output_path, "*.json")): @@ -550,7 +545,7 @@ def csv_to_cmor_json( continue print(f"{'-'*28}{frequency}{'-'*28}") key: tuple[str, str] = (frequency, level_group) - matched: dict = _find_best_matching_variable( + matched: dict[Any, Any] | None = _find_best_matching_variable( var_name, frequency, search_dirs ) cmor_entry: dict = _build_cmor_entry(