From 01786a998c10367b28126cc217f4603c406797ea Mon Sep 17 00:00:00 2001
From: Roland Wirth <roland.wirth@dwd.de>
Date: Mon, 13 May 2024 06:48:59 +0000
Subject: [PATCH] util_string: dynamically allocate and expand string lists
 (icon-libraries/libfortran-support!88)

String lists grow dynamically, removing the need for preallocated arrays. This simplifies
handling of large variable groups in the icon initialization phase.

The output lists of `add_to_list` are now `ALLOCATABLE`. They may be passed an unallocated
argument, which is treated like an empty list and allocated using the new `new_list`
subroutine. That subroutine initially allocates space for 8 strings. Every time the list
needs to grow, the allocated size doubles. This ensures constant amortized runtime for
adding to the list. No provisions are made for shrinking the list size, because they are
usually short-lived.

The tests for `add_to_list` are adapted for the new interface, checking list expansion.
A new test for `new_list` is added.

Approved-by: Yen-Chen Chen <yen-chen.chen@kit.edu>
Merged-by: Yen-Chen Chen <yen-chen.chen@kit.edu>
Changelog: feature
---
 src/mo_util_string.F90            | 60 +++++++++++++++++++++++++++----
 test/fortran/test_util_string.f90 | 45 ++++++++++++++++-------
 2 files changed, 86 insertions(+), 19 deletions(-)

diff --git a/src/mo_util_string.F90 b/src/mo_util_string.F90
index 3fcdd45..52bfc69 100644
--- a/src/mo_util_string.F90
+++ b/src/mo_util_string.F90
@@ -77,6 +77,8 @@ MODULE mo_util_string
     MODULE PROCEDURE charArray_equal_char
   END INTERFACE charArray_equal
 
+  PUBLIC :: new_list
+
   INTERFACE add_to_list
     MODULE PROCEDURE add_to_list, add_to_list_1
   END INTERFACE add_to_list
@@ -575,6 +577,20 @@ CONTAINS
     END DO
   END SUBROUTINE difference
 
+  !==============================================================================
+  !> Create a new empty string list
+  !!
+  !! Allocates space for 8 strings initially. Size doubles every time the list
+  !! has to be expanded, so adding to the list is constant amortized time.
+  SUBROUTINE new_list(str_list, nitems)
+    CHARACTER(len=*), ALLOCATABLE, INTENT(OUT) :: str_list(:)
+    INTEGER, INTENT(OUT) :: nitems
+
+    ALLOCATE (str_list(8))
+    nitems = 0
+
+  END SUBROUTINE new_list
+
   !==============================================================================
   !+ Add entries from list 2 to list 1, if they are not already present
   !+ in list 1.
@@ -582,20 +598,38 @@ CONTAINS
   ! This is a very crude implementation, quadratic complexity.
   !
   SUBROUTINE add_to_list(str_list1, nitems1, str_list2, nitems2)
-    CHARACTER(len=*), INTENT(INOUT) :: str_list1(:)
+    CHARACTER(len=*), ALLOCATABLE, INTENT(INOUT) :: str_list1(:)
     INTEGER, INTENT(INOUT) :: nitems1
-    CHARACTER(len=*), INTENT(IN)    :: str_list2(:)
-    INTEGER, INTENT(IN)    :: nitems2
+    CHARACTER(len=*), INTENT(IN) :: str_list2(:)
+    INTEGER, OPTIONAL, INTENT(IN) :: nitems2
     ! local variables
-    INTEGER :: iread, i
+    INTEGER :: iread, i, nitems2_
+    CHARACTER(len=LEN(str_list1)), ALLOCATABLE :: tmp(:)
+
+    IF (.NOT. ALLOCATED(str_list1)) THEN
+      CALL new_list(str_list1, nitems1)
+    END IF
+
+    IF (PRESENT(nitems2)) THEN
+      nitems2_ = nitems2
+    ELSE
+      nitems2_ = SIZE(str_list2)
+    END IF
 
     ! Loop over all items that should potentially be added
-    item_add_loop: DO iread = 1, nitems2
+    item_add_loop: DO iread = 1, nitems2_
       ! Loop over all items in the target list (list 1) to
       ! check if item is already in string list 1:
       DO i = 1, nitems1
         IF (str_list1(i) == str_list2(iread)) CYCLE item_add_loop
       END DO
+
+      IF (nitems1 >= SIZE(str_list1)) THEN
+        ALLOCATE (tmp(2*SIZE(str_list1)))
+        tmp(1:nitems1) = str_list1(1:nitems1)
+        CALL MOVE_ALLOC(tmp, str_list1)
+      END IF
+
       nitems1 = nitems1 + 1
       str_list1(nitems1) = str_list2(iread)
     END DO item_add_loop
@@ -603,17 +637,29 @@ CONTAINS
   END SUBROUTINE add_to_list
 
   SUBROUTINE add_to_list_1(str_list, nitems, str)
-    CHARACTER(len=*), INTENT(INOUT) :: str_list(:)
+    CHARACTER(len=*), ALLOCATABLE, INTENT(INOUT) :: str_list(:)
     INTEGER, INTENT(INOUT) :: nitems
     CHARACTER(len=*), INTENT(IN)    :: str
     ! local variables
-    INTEGER :: iread, i
+    INTEGER :: i
+    CHARACTER(len=LEN(str_list)), ALLOCATABLE :: tmp(:)
+
+    IF (.NOT. ALLOCATED(str_list)) THEN
+      CALL new_list(str_list, nitems)
+    END IF
 
     ! Loop over all items in the target list to
     ! check if item is already in that list
     DO i = 1, nitems
       IF (str_list(i) == str) RETURN
     END DO
+
+    IF (nitems >= SIZE(str_list)) THEN
+      ALLOCATE (tmp(2*SIZE(str_list)))
+      tmp(1:nitems) = str_list(1:nitems)
+      CALL MOVE_ALLOC(tmp, str_list)
+    END IF
+
     nitems = nitems + 1
     str_list(nitems) = str
 
diff --git a/test/fortran/test_util_string.f90 b/test/fortran/test_util_string.f90
index 6429d9b..3839e9a 100644
--- a/test/fortran/test_util_string.f90
+++ b/test/fortran/test_util_string.f90
@@ -111,33 +111,55 @@ CONTAINS
                        .AND. str_list1(3) == 'word5'), .TRUE.)
   END SUBROUTINE TEST_difference
 
+  SUBROUTINE TEST_new_list
+    CHARACTER(len=30), ALLOCATABLE :: str_list(:)
+    INTEGER :: nitems
+    CALL TAG_TEST("TEST_new_list")
+    CALL new_list(str_list, nitems)
+    CALL ASSERT_EQUAL(nitems == 0 .AND. ALLOCATED(str_list), .TRUE.)
+  END SUBROUTINE TEST_new_list
+
   SUBROUTINE TEST_add_to_list
-    CHARACTER(len=30), DIMENSION(5) :: str_list1
+    CHARACTER(len=30), ALLOCATABLE, DIMENSION(:) :: str_list1
     CHARACTER(len=30), DIMENSION(2) :: str_list2
     INTEGER :: nitems1, nitems2
-    CALL TAG_TEST("TEST_add_to_list")
-    str_list1 = ['word1', 'word2', 'word3', '     ', '     ']
+    CALL add_to_list(str_list1, nitems1, ['word1', 'word2', 'word3']) ! Implicit allocation
+    CALL TAG_TEST("TEST_add_to_list (1)")
+    CALL ASSERT_EQUAL(nitems1 == 3 .AND. ALLOCATED(str_list1) .AND. SIZE(str_list1) >= nitems1, .TRUE.)
     str_list2 = ['word2', 'word4']
-    nitems1 = 3
     nitems2 = SIZE(str_list2)
     CALL add_to_list(str_list1, nitems1, str_list2, nitems2)
+    CALL TAG_TEST("TEST_add_to_list (2)")
     CALL ASSERT_EQUAL((nitems1 == 4 .AND. str_list1(1) == 'word1' .AND. &
                        str_list1(2) == 'word2' .AND. str_list1(3) == 'word3' &
                        .AND. str_list1(4) == 'word4'), .TRUE.)
+    CALL add_to_list(str_list1, nitems1, ['5', '6', '7', '8', '9'])
+    CALL TAG_TEST("TEST_add_to_list (3)")
+    CALL ASSERT_EQUAL(nitems1 == 9 .AND. ALLOCATED(str_list1) .AND. SIZE(str_list1) >= nitems1 .AND. &
+                      ALL(str_list1(1:nitems1) == [ &
+                          'word1', 'word2', 'word3', 'word4', '5    ', '6    ', '7    ', '8    ', '9    ']), .TRUE.)
   END SUBROUTINE TEST_add_to_list
 
   SUBROUTINE TEST_add_to_list_1
-    CHARACTER(len=30), DIMENSION(5) :: str_list
+    CHARACTER(len=30), ALLOCATABLE, DIMENSION(:) :: str_list
     INTEGER :: nitems
-    CHARACTER(len=30) :: str
-    CALL TAG_TEST("TEST_add_to_list_1")
-    str_list = ['word1', 'word2', 'word3', '     ', '     ']
-    nitems = 3
-    str = 'word4'
-    CALL add_to_list(str_list, nitems, str)
+    CALL add_to_list(str_list, nitems, 'word1') ! Implicit allocation
+    CALL TAG_TEST("TEST_add_to_list_1 (1)")
+    CALL ASSERT_EQUAL(nitems == 1 .AND. ALLOCATED(str_list) .AND. SIZE(str_list) >= nitems, .TRUE.)
+    CALL add_to_list(str_list, nitems, 'word2')
+    CALL add_to_list(str_list, nitems, 'word3')
+    CALL add_to_list(str_list, nitems, 'word4')
+    CALL add_to_list(str_list, nitems, 'word2') ! Try to add duplicate
+    CALL TAG_TEST("TEST_add_to_list_1 (2)")
     CALL ASSERT_EQUAL((nitems == 4 .AND. str_list(1) == 'word1' &
                        .AND. str_list(2) == 'word2' .AND. str_list(3) == 'word3' &
                        .AND. str_list(4) == 'word4'), .TRUE.)
+    CALL add_to_list(str_list, nitems, ['5', '6', '7', '8']) ! Fill to allocated size
+    CALL add_to_list(str_list, nitems, '9') ! Expand list
+    CALL TAG_TEST("TEST_add_to_list_1 (3)")
+    CALL ASSERT_EQUAL(nitems == 9 .AND. ALLOCATED(str_list) .AND. SIZE(str_list) >= nitems .AND. &
+                      ALL(str_list(1:nitems) == [ &
+                          'word1', 'word2', 'word3', 'word4', '5    ', '6    ', '7    ', '8    ', '9    ']), .TRUE.)
   END SUBROUTINE TEST_add_to_list_1
 
   SUBROUTINE TEST_remove_whitespace
@@ -265,4 +287,3 @@ CONTAINS
   END SUBROUTINE TEST_associate_keyword_with_keywords
 
 END MODULE TEST_STRING
-
-- 
GitLab