From 07c0f7232241184f9bc95dcd1ee96b6537012ab9 Mon Sep 17 00:00:00 2001
From: Yen-Chen Chen <yen-chen.chen@kit.edu>
Date: Mon, 8 Apr 2024 12:01:20 +0000
Subject: [PATCH] Add typo check pipeline
 (icon-libraries/libfortran-support!83)

## What is the new feature
Add a typo check pipeline.
## How is it implemented
Use https://github.com/crate-ci/typos

Approved-by: Jonas Jucker <jonas.jucker@env.ethz.ch>
Merged-by: Jonas Jucker <jonas.jucker@env.ethz.ch>
Changelog: feature
---
 .gitlab-ci.yml                     | 24 +++++++++++++---------
 _typos.toml                        | 32 ++++++++++++++++++++++++++++++
 src/mo_expression.F90              |  2 +-
 src/mo_hash_table.F90              |  2 +-
 src/mo_io_units.F90                |  2 +-
 src/mo_util_rusage.F90             |  4 ++--
 src/mo_util_string.F90             | 12 +++++------
 src/util_hash.c                    |  8 ++++----
 src/util_stride.c                  |  2 +-
 src/util_sysinfo.c                 |  2 +-
 src/util_system.c                  |  2 +-
 src/util_timer.c                   |  2 +-
 test/c/ctest_util_string_parse.cpp |  8 ++++----
 test/fortran/test_util_string.f90  |  4 ++--
 14 files changed, 72 insertions(+), 34 deletions(-)
 create mode 100644 _typos.toml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6547245..e9e6df2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -42,7 +42,7 @@ nag:
     - ctest --output-on-failure
   tags:
     - levante-fake, hpc, dkrz
-  needs: ["Check OpenACC Style", "Check Style", "Check License"]
+  needs: ["Check Typo", "Check OpenACC Style", "Check Style", "Check License"]
 
 gcc11:
   stage: build_and_test
@@ -57,7 +57,7 @@ gcc11:
     - ctest --output-on-failure
   tags:
     - levante-fake, hpc, dkrz
-  needs: ["Check OpenACC Style", "Check Style", "Check License"]
+  needs: ["Check Typo", "Check OpenACC Style", "Check Style", "Check License"]
 
 intel22:
   stage: build_and_test
@@ -72,7 +72,7 @@ intel22:
     - ctest --output-on-failure
   tags:
     - levante-fake, hpc, dkrz
-  needs: ["Check OpenACC Style", "Check Style", "Check License"]
+  needs: ["Check Typo", "Check OpenACC Style", "Check Style", "Check License"]
 
 nvhpc:
   stage: build_and_test
@@ -87,7 +87,7 @@ nvhpc:
     - ctest --output-on-failure
   tags:
     - levante-fake, hpc, dkrz
-  needs: ["Check OpenACC Style", "Check Style", "Check License"]
+  needs: ["Check Typo", "Check OpenACC Style", "Check Style", "Check License"]
 
 OpenACC:
   stage: build_OpenACC
@@ -103,6 +103,17 @@ OpenACC:
     - levante-fake, hpc, dkrz
   needs: ["nvhpc"]
 
+Check Typo:
+  stage: lint
+  before_script:
+    # install typo check package
+    - conda install typos
+  script: typos
+  tags:
+    # choose conda available runner
+    - conda
+  needs: []
+
 Check OpenACC Style:
   extends: .colorized
   stage: lint
@@ -110,10 +121,8 @@ Check OpenACC Style:
     BEAUTIFIER_TAG: "v0.3.0"
     BEAUTIFIER_REPO: "https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.dkrz.de/dwd-sw/icon-openacc-beautifier.git"
     GIT_SUBMODULE_STRATEGY: none
-
   before_script:
     - git clone --branch "${BEAUTIFIER_TAG}" --depth 1 "${BEAUTIFIER_REPO}"
-
   script:
     # apply beautifier:
     - python3 icon-openacc-beautifier/main.py src/
@@ -127,11 +136,9 @@ Check OpenACC Style:
       (see artifacts).${DEFAULT}\n" >&2
         exit 1
       }
-
   tags:
     # choose python capable runner:
     - sphinx
-
   artifacts:
     paths:
       - acc_style.patch
@@ -140,7 +147,6 @@ Check OpenACC Style:
     when: on_failure
   needs: []
 
-
 Check Style:
   stage: lint
   before_script:
diff --git a/_typos.toml b/_typos.toml
new file mode 100644
index 0000000..1e213fa
--- /dev/null
+++ b/_typos.toml
@@ -0,0 +1,32 @@
+# ICON
+#
+# ---------------------------------------------------------------
+# Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+# Contact information: icon-model.org
+#
+# See AUTHORS.TXT for a list of authors
+# See LICENSES/ for license information
+# SPDX-License-Identifier: CC0-1.0
+# ---------------------------------------------------------------
+
+[files]
+extend-exclude = [
+	# Ragel generated files
+	"src/nml_annotate.c",
+	"src/util_arithmetic_expr.c",
+	"src/util_string_parse.c"
+]
+
+[default]
+extend-ignore-re = [
+	# 2nd is detected as typo in the current version
+	".*_2nd_.*",
+]
+
+[default.extend-words]
+inout = "inout" # in and out
+nin = "nin" # number of inputs
+ans = "ans" # short for answer
+
+[default.extend-identifiers]
+lowcase = "lowcase" # ICON legacy subroutine name
diff --git a/src/mo_expression.F90 b/src/mo_expression.F90
index 4bcdf73..6a841c5 100644
--- a/src/mo_expression.F90
+++ b/src/mo_expression.F90
@@ -15,7 +15,7 @@
 ! Machine (FSM) and Dijkstra's shunting yard algorithm.
 ! It is possible to include mathematical functions, operators, and
 ! constants, see the LaTeX documentation for this module in the
-! appendix of the namelist documentaion. Besides, Fortran variables
+! appendix of the namelist documentation. Besides, Fortran variables
 ! can be linked to the expression and used in the evaluation. The
 ! implementation supports scalar input variables as well as 2D and 3D
 ! fields, where it is implicitly assumed that 2D fields are embedded
diff --git a/src/mo_hash_table.F90 b/src/mo_hash_table.F90
index fdeb48d..ff13319 100644
--- a/src/mo_hash_table.F90
+++ b/src/mo_hash_table.F90
@@ -70,7 +70,7 @@ MODULE mo_hash_table
     TYPE(t_HashEntry), POINTER :: curEntry => NULL()
   CONTAINS
     PROCEDURE :: init => hashIterator_init
-    PROCEDURE :: nextEntry => hashIterator_nextEntry ! returns .TRUE. IF the operation was successfull
+    PROCEDURE :: nextEntry => hashIterator_nextEntry ! returns .TRUE. IF the operation was successful
     PROCEDURE :: reset => hashIterator_reset
   END TYPE
 
diff --git a/src/mo_io_units.F90 b/src/mo_io_units.F90
index af5f881..72ab57b 100644
--- a/src/mo_io_units.F90
+++ b/src/mo_io_units.F90
@@ -25,7 +25,7 @@ MODULE mo_io_units
 
   PUBLIC
 
-! This paramter is taken from /usr/include/stdio.h (ANSI C standard). If problems
+! This parameter is taken from /usr/include/stdio.h (ANSI C standard). If problems
 ! with filename length appear, check the before mentioned file.
 
   INTEGER, PARAMETER :: filename_max = 1024
diff --git a/src/mo_util_rusage.F90 b/src/mo_util_rusage.F90
index fd441d4..0d2f39f 100644
--- a/src/mo_util_rusage.F90
+++ b/src/mo_util_rusage.F90
@@ -93,7 +93,7 @@ CONTAINS
     CHARACTER(len=10), INTENT(INOUT), OPTIONAL :: tag
 
     TYPE(rss_list), ALLOCATABLE :: tmp_rss_lists(:)
-    INTEGER :: ist
+    INTEGER :: iostat
 
     IF (.NOT. ALLOCATED(rss_lists)) THEN
       ALLOCATE (rss_lists(max_lists))
@@ -118,7 +118,7 @@ CONTAINS
     IF (PRESENT(tag)) THEN
       rss_lists(idx)%filename = TRIM(name)//'_'//TRIM(tag)//'.log'
       rss_lists(idx)%fileunit = find_next_free_unit(10, 999)
-      OPEN (UNIT=rss_lists(idx)%fileunit, FILE=rss_lists(idx)%filename, IOSTAT=ist, Recl=line_length)
+      OPEN (UNIT=rss_lists(idx)%fileunit, FILE=rss_lists(idx)%filename, IOSTAT=iostat, Recl=line_length)
       WRITE (rss_lists(idx)%fileunit, '(1x,a)') 'idx        maxrss      majflt      minflt       nvcsw      nivcsw'
     ELSE
       rss_lists(idx)%filename = ''
diff --git a/src/mo_util_string.F90 b/src/mo_util_string.F90
index a17d59d..3fcdd45 100644
--- a/src/mo_util_string.F90
+++ b/src/mo_util_string.F90
@@ -130,7 +130,7 @@ CONTAINS
     CHARACTER(len=*), INTENT(IN) :: uppercase
     CHARACTER(len=LEN_TRIM(uppercase)) :: tolower
     !
-    INTEGER, PARAMETER :: idel = ICHAR('a') - ICHAR('A')
+    INTEGER, PARAMETER :: idiff = ICHAR('a') - ICHAR('A')
     INTEGER, PARAMETER :: ia = ICHAR('A')
     INTEGER, PARAMETER :: iz = ICHAR('Z')
     INTEGER :: i, ic
@@ -138,7 +138,7 @@ CONTAINS
     DO i = 1, LEN_TRIM(uppercase)
       ic = ICHAR(uppercase(i:i))
       IF (ic >= ia .AND. ic <= iz) THEN
-        tolower(i:i) = CHAR(ic + idel)
+        tolower(i:i) = CHAR(ic + idiff)
       ELSE
         tolower(i:i) = uppercase(i:i)
       END IF
@@ -151,7 +151,7 @@ CONTAINS
   !!
   ELEMENTAL SUBROUTINE lowcase(s)
     CHARACTER(len=*), INTENT(INOUT) :: s
-    INTEGER, PARAMETER :: idel = ICHAR('a') - ICHAR('A')
+    INTEGER, PARAMETER :: idiff = ICHAR('a') - ICHAR('A')
     INTEGER, PARAMETER :: ia = ICHAR('A')
     INTEGER, PARAMETER :: iz = ICHAR('Z')
     INTEGER :: i, ic, n
@@ -159,7 +159,7 @@ CONTAINS
     n = LEN_TRIM(s)
     DO i = 1, n
       ic = ICHAR(s(i:i))
-      s(i:i) = CHAR(ic + MERGE(idel, 0, ic >= ia .AND. ic <= iz))
+      s(i:i) = CHAR(ic + MERGE(idiff, 0, ic >= ia .AND. ic <= iz))
     END DO
   END SUBROUTINE lowcase
   !------------------------------------------------------------------------------------------------
@@ -170,7 +170,7 @@ CONTAINS
     CHARACTER(len=*), INTENT(IN) :: lowercase
     CHARACTER(len=LEN_TRIM(lowercase)) :: toupper
     !
-    INTEGER, PARAMETER :: idel = ICHAR('A') - ICHAR('a')
+    INTEGER, PARAMETER :: idiff = ICHAR('A') - ICHAR('a')
     INTEGER, PARAMETER :: ia = ICHAR('a')
     INTEGER, PARAMETER :: iz = ICHAR('z')
     INTEGER :: i, ic
@@ -178,7 +178,7 @@ CONTAINS
     DO i = 1, LEN_TRIM(lowercase)
       ic = ICHAR(lowercase(i:i))
       IF (ic >= ia .AND. ic <= iz) THEN
-        toupper(i:i) = CHAR(ic + idel)
+        toupper(i:i) = CHAR(ic + idiff)
       ELSE
         toupper(i:i) = lowercase(i:i)
       END IF
diff --git a/src/util_hash.c b/src/util_hash.c
index 217ced2..85d1d0c 100644
--- a/src/util_hash.c
+++ b/src/util_hash.c
@@ -9,7 +9,7 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
-/* lookup3 by Bob Jekins, May 2006, Public Domain.
+/* lookup3 by Bob Jenkins, May 2006, Public Domain.
  * Original version downloaded from: http://burtleburtle.net/bob/
  * -------------------------------------------------------------------------------
  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
@@ -93,7 +93,7 @@
 
 #else
 
-#error "Couldn't determine endianess."
+#error "Couldn't determine endianness."
 
 #endif
 
@@ -273,7 +273,7 @@ uint32_t util_hashword(const void *key, size_t length, uint32_t initval) {
          * rest of the string.  Every machine with memory protection I've seen
          * does it on word boundaries, so is OK with this.  But VALGRIND will
          * still catch it and complain.  The masking trick does make the hash
-         * noticably faster for short strings (like English words).
+         * noticeably faster for short strings (like English words).
          */
 #ifndef VALGRIND
 
@@ -487,7 +487,7 @@ uint32_t util_hashword(const void *key, size_t length, uint32_t initval) {
          * rest of the string.  Every machine with memory protection I've seen
          * does it on word boundaries, so is OK with this.  But VALGRIND will
          * still catch it and complain.  The masking trick does make the hash
-         * noticably faster for short strings (like English words).
+         * noticeably faster for short strings (like English words).
          */
 #ifndef VALGRIND
 
diff --git a/src/util_stride.c b/src/util_stride.c
index 9437af3..98f6fec 100644
--- a/src/util_stride.c
+++ b/src/util_stride.c
@@ -9,7 +9,7 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
-// Fortan interface to the following functions is
+// Fortran interface to the following functions is
 // implemented in ../src/shared/mo_util_stride.f90
 
 #include <stddef.h>
diff --git a/src/util_sysinfo.c b/src/util_sysinfo.c
index 37296d9..851083e 100644
--- a/src/util_sysinfo.c
+++ b/src/util_sysinfo.c
@@ -29,7 +29,7 @@
 #include <sys/systemcfg.h>
 #endif
 
-/* funcion implemetations */
+/* function implementations */
 
 void util_user_name(char *name, int *actual_len) {
     struct passwd *current;
diff --git a/src/util_system.c b/src/util_system.c
index 578befd..14695aa 100644
--- a/src/util_system.c
+++ b/src/util_system.c
@@ -9,7 +9,7 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
-// Fortan interface to the following functions is
+// Fortran interface to the following functions is
 // implemented in ../src/shared/mo_util_system.f90
 
 #include <stdlib.h>
diff --git a/src/util_timer.c b/src/util_timer.c
index 983a593..6677d53 100644
--- a/src/util_timer.c
+++ b/src/util_timer.c
@@ -11,7 +11,7 @@
 
 /* Portable CPU-timer (User + Sys); also WALL CLOCK-timer */
 
-// Fortan interface to the following functions is
+// Fortran interface to the following functions is
 // implemented in ../src/shared/mo_util_timer.f90
 
 #include <unistd.h>
diff --git a/test/c/ctest_util_string_parse.cpp b/test/c/ctest_util_string_parse.cpp
index cf5792a..4fdbc7d 100644
--- a/test/c/ctest_util_string_parse.cpp
+++ b/test/c/ctest_util_string_parse.cpp
@@ -46,7 +46,7 @@ TEST_F(UtilStringParseTest, ParseIntListIsCorrect) {
     std::string parse_line = "1,2,3;nlev";
     std::vector<int> result = { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1 };
 
-    // One extra index [0] unused becuase Fortran index starts from 1
+    // One extra index [0] unused because Fortran index starts from 1
     std::vector<int> output(nlev + 1);
     int ierr;
 
@@ -64,7 +64,7 @@ TEST_F(UtilStringParseTest, ParseIntListIsCorrect2) {
     std::string parse_line = "1;3,4...7";
     std::vector<int> result = { 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0 };
 
-    // One extra index [0] unused becuase Fortran index starts from 1
+    // One extra index [0] unused because Fortran index starts from 1
     std::vector<int> output(nlev + 1);
     int ierr;
 
@@ -86,7 +86,7 @@ TEST_F(UtilStringParseTest, ParseIntListIsCorrect3) {
                                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
     // clang-format on
 
-    // One extra index [0] unused becuase Fortran index starts from 1
+    // One extra index [0] unused because Fortran index starts from 1
     std::vector<int> output(nlev + 1);
     int ierr;
 
@@ -108,7 +108,7 @@ TEST_F(UtilStringParseTest, ParseIntListIsCorrect4) {
                                    1, 1, 0, 0, 0, 0, 0, 1, 0, 0 };
     // clang-format on
 
-    // One extra index [0] unused becuase Fortran index starts from 1
+    // One extra index [0] unused because Fortran index starts from 1
     std::vector<int> output(nlev + 1);
     int ierr;
 
diff --git a/test/fortran/test_util_string.f90 b/test/fortran/test_util_string.f90
index 812f46a..6429d9b 100644
--- a/test/fortran/test_util_string.f90
+++ b/test/fortran/test_util_string.f90
@@ -24,9 +24,9 @@ CONTAINS
     CALL STRING_CONTAINS('allcapital', lowercase)
   END SUBROUTINE
 
-  SUBROUTINE TEST_string_lowcase
+  SUBROUTINE TEST_string_low_case
     CHARACTER(len=10) :: testcase
-    CALL TAG_TEST("TEST_lowcase")
+    CALL TAG_TEST("TEST_low_case")
     testcase = 'ALLCAPITAL'
     CALL lowcase(testcase)
     CALL STRING_CONTAINS('allcapital', testcase)
-- 
GitLab