diff --git a/_quarto.yml b/_quarto.yml index f0fbb58e4abfb671b2c26cce5744483b73b37880..a196151a9b7a993f8fee7f03566c53ca8acbbea8 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -31,7 +31,7 @@ website: - "lectures/programming-paradigms/slides.qmd" - "lectures/data-structures/slides.qmd" - "lectures/complexity/slides.qmd" - # - "lectures/debugging/slides.qmd" + - "lectures/debugging-strategies/slides.qmd" # - "lectures/good-scientific-practice/slides.qmd" # - "lectures/user-experience/slides.qmd" # - "lectures/testing/slides.qmd" @@ -47,7 +47,7 @@ website: - "exercises/programming_paradigms.qmd" - "exercises/data_structures.qmd" - "exercises/complexity.qmd" - # - "exercises/debugging.qmd" + - "exercises/debugging-strategies.qmd" # - "exercises/good_scientific_practice.qmd" # - "exercises/user_experience.qmd" # - "exercises/testing.qmd" diff --git a/exercises/debugging-strategies.qmd b/exercises/debugging-strategies.qmd new file mode 100644 index 0000000000000000000000000000000000000000..0b62253b8997a46650234ac4dcd71a5785e71723 --- /dev/null +++ b/exercises/debugging-strategies.qmd @@ -0,0 +1,45 @@ +--- +title: "Debugging strategies" +--- + +### 1. Approach on locating Python bugs + +_The tasks should be done using the test repository <https://github.com/gweis/isodate>, +specifically commit 8856fdf0 (HEAD of `master` branch at the moment of writing this)._ + +_It is a Python library to parse date and times according to the ISO 8601 standard. +Although it works great in many use cases, there are some documented bugs in the code. +But the linked repository was not chosen to cast a bad light on it in any way. +In fact, it does a good job providing tests and stating which problems still occur in the code._ + +**Tasks** + +1. Check out the code and run the test suite. This can typically be done by issueing +`python -m unittest` in the `src/isodate` directory (not within the `tests` directory). +You should find that it reports two failures and one error. +Document the actual output stating that. + +2. Pick one of the three issues and investigate it. Narrow down the code area, where the issue occurs. +Document your strategy of narrowing it down step by step. +**This task is NOT about fixing any of the issues^[You can try, if you want. But this is NOT part of the homework]!** + + +### 2. Approach on locating Fortran bugs + +For this exercise, check out the Fortran code [schnecke_flt.f90](../lectures/debugging-strategies/static/schnecke_flt.f90). +You can compile it on Levante e.g. by loading GCC 11.2.0 compilers with + +```bash +module load gcc/11.2.0-gcc-11.2.0 +``` + +and then run + +```bash +gfortran schnecke_flt.f90 +``` + +**Task** + +- Use additional compiler flags and `gdb` to locate the error. Document your approach. + diff --git a/lectures/custom.scss b/lectures/custom.scss index d09613149e5861343d7041a189a8708b98d5eae2..31184f94364a323c47038e59f9a522abdf33e52b 100644 --- a/lectures/custom.scss +++ b/lectures/custom.scss @@ -95,7 +95,7 @@ kbd, .kbd { background-position: right top; } -.sourceCode > pre.fortran > code.fortran { +.sourceCode > pre.fortranfree > code.fortranfree { background: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACXBIWXMAAAHaAAAB2gGFomX7AAAAGXRFWHRTb2Z0d2FyZQB3d3cuaW5rc2NhcGUub3Jnm+48GgAAAxZJREFUWIW9lltsDGEUx3/fN1Nsu922mxZFXUoqrdIUiUuDelCJKE1DSLxIPLhL3N49iAcVJJ4kJIggQYi6JnhAkEqoPiDrmm6FrW5aY9de2p3xUmm3s+2sYfyfZr7vnO/85pwzM0fQqz01DVm6K/OAQKwHPDijKHBbT4htR65v/gwgAPbWHZ9sGIlGA0odCjxQn6Uwljdc3dosdq4+4ZXx+BOg5D8F75Vo04dlVKgiHj/WP/is6hImlRU6EjLg7+RhY0vvnTFOxOIHVaCuv1FxWSFzasocAfA1t/UDACFYIwVkOhItPbnVgSu3zjZx//ILsnJcFE7wUr9pIYoik2ya7r7m3sXnSWsj3MMRgJqhkFeQzczqEkpnjbckMAGEtAghLUIwoNHqC7BkzWxy891JNpFwnGBAS3YM9F1+evOV5kdvWbV5kWU55ZC7QCKhW5mklGHA9TNPiUW6h7QzZWCg9DQB5i0twzvKQ0+Pzp1zTQBEQjF8zX6mzyse1M8yA3rCSAugur6SxfWVLFoxI2n9w6svQ/qlAWCdgey8TLwjU3+9O758/zuAdHpg+txihEi9F/w6NICtHlAzFLJzXXi8WUwuH0vN2tmD+n/vCNP2/hsAXR0hOwDmHqhaVk7VsnIrVwBi0W6O7r406L5lCZyWLYBgQKP1bTthLfrXAJYlSKWWx++5cfopAEVTCli3awn5Y3JsAVhnIEV3S9nn5n/3jTvnn9kKnh5AKieZTOV76XcOQKR4wftnACCsRQlpEVsAtnpAKmYov6+dkUV5dMd6nAdQVHPiTu6/aeeoNEogzU+rqIqtYLYAPHnmic3lHv7PAEwlKBibS35hDm6Pi4mlo03TEMDUyiJWbqiiva2TrmCYsBal1Rcw2dkCqF5ZYTlGKYpkQW3ffz/g76Rh+wVbABLQLK2cU0Q1ME4JxI7fK29e+PkZiv3RKT+67H0DgAdiX+3xzB8y0QRMs3uKTUUNKefKfY0bfwo9USvg9X8M3iUMVh2+sumlBDjUuP1jj4jNB3EKMI8t/05hEJdUXak4dG3LDYBfenr5XAJ1sX8AAAAASUVORK5CYII='); background-repeat: no-repeat; background-attachment: fixed; diff --git a/lectures/debugging-strategies/slides.qmd b/lectures/debugging-strategies/slides.qmd new file mode 100644 index 0000000000000000000000000000000000000000..ada31330097fcf7f4a113042004e6ee78f476fbd --- /dev/null +++ b/lectures/debugging-strategies/slides.qmd @@ -0,0 +1,880 @@ +--- +title: "Debugging Strategies" +author: "Dominik Zobel and René Redler" +--- + +# Debugging vs. Testing {.leftalign} + +**Testing:** Cover as many use cases as possible to make a program more robust + +**Debugging:** Identify issue(s) and fix a broken program + + +## Different kind of errors + +```{dot} +digraph { + node [ shape="box" ]; + + compile_ask [ label="Compile program\nSuccessfull?", shape="invhouse", style="filled", fillcolor="#ccccff" ]; + compile_fail [ label="Fix compile-time error(s)" ]; + compile_ok [ label="Run it!\nDid everything\nwork as intended?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + run_fail [ label="Fix run-time error(s)" ]; + run_ok [ label="Great!", shape="house", style="filled", fillcolor="#ccccff" ]; + + compile_ask -> compile_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + compile_ask -> compile_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + compile_ok -> run_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + compile_ok -> run_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; +} +``` + +## Strategies covered in this lecture + + - Understand compiler usage and messages + - Include debug output/logging mechanisms + - Narrow down the code where the error occured (divide and conquer) + - Use a debugger + + +## Other strategies + + - Consider the last working state and focus on what changed since + - If changes are committed in a repo, + find the last valid commit (`git bisect`) + - Search online for this or similar issues + - Ask colleagues working with the same code + - Call for vendor support + + + +# Compile-time errors {.leftalign} + + - Make the compiler say what you need to know + - Learn to understand what the compiler tries to tell you + + +## GCC C++ compiler examples {auto-animate=true} + +::::::::{.columns} + +:::{.column width=45%} + +```cpp +int main() { + return 0 +} + + + + + +``` + +Compiling with + +`gcc test.cpp -o test` + +::: + +:::{.column width=55% .smaller} + + +``` +test.cpp: In function ‘int main()’: +test.cpp:2:12: error: expected ‘;’ before ‘}’ token + 2 | return 0 + | ^ + | ; + 3 | } + | ~ + + + + + + + + + + + + + +``` + +::: + +:::::::: + + +## GCC C++ compiler examples {auto-animate=true} + +::::::::{.columns} + +:::{.column width=45%} + +```cpp +int main() { + using namespace std; + cout << "Hello" << endl; + return 0; +} + + + +``` + +Compiling with + +`gcc test.cpp -o test` + +::: + +:::{.column width=55% .smaller} + + +``` +test.cpp: In function ‘int main()’: +test.cpp:3:4: error: ‘cout’ was not declared in this scope + 3 | cout << "Hello" << endl; + | ^~~~ +test.cpp:1:1: note: ‘std::cout’ is defined in header ‘<iostream>’; did you forget to ‘#include <iostream>’? + +++ |+#include <iostream> + 1 | int main() { +test.cpp:3:23: error: ‘endl’ was not declared in this scope + 3 | cout << "Hello" << endl; + | ^~~~ +test.cpp:1:1: note: ‘std::endl’ is defined in header ‘<ostream>’; did you forget to ‘#include <ostream>’? + +++ |+#include <ostream> + 1 | int main() { + + + + + + +``` + +::: + +:::::::: + + +## GCC C++ compiler examples {auto-animate=true} + +::::::::{.columns} + +:::{.column width=45%} + +```cpp +#include <iostream> + +int main { + using namespace std; + cout << "Hello" << endl; + return 0; +} +``` + +Compiling with + +`gcc test.cpp -o test` + +::: + +:::{.column width=55% .smaller} + + +``` +test.cpp:3:5: error: cannot declare ‘::main’ to be a global variable + 3 | int main { + | ^~~~ +test.cpp:4:4: error: expected primary-expression before ‘using’ + 4 | using namespace std; + | ^~~~~ +test.cpp:4:4: error: expected ‘}’ before ‘using’ +test.cpp:3:10: note: to match this ‘{’ + 3 | int main { + | ^ +test.cpp:5:4: error: ‘cout’ does not name a type + 5 | cout << "Hello" << endl; + | ^~~~ +test.cpp:6:4: error: expected unqualified-id before ‘return’ + 6 | return 0; + | ^~~~~~ +test.cpp:7:1: error: expected declaration before ‘}’ token + 7 | } + | ^ +``` + +::: + +:::::::: + + +## GCC C++ compiler examples {auto-animate=true} + +::::::::{.columns} + +:::{.column width=45%} + +```cpp +#include <iostream> + +int main() { + using namespace std; + cout << "Hello" << endl; + return 0; +} +``` + +Compiling with + +`gcc test.cpp -o test` + +::: + +:::{.column width=55% .smaller} + + +``` +/usr/bin/ld: /tmp/ccrbNm7k.o: warning: relocation against `_ZSt4cout' in read-only section `.text' +/usr/bin/ld: /tmp/ccrbNm7k.o: in function `main': +test.cpp:(.text+0x15): undefined reference to `std::cout' +/usr/bin/ld: test.cpp:(.text+0x1d): undefined reference to `std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)' +/usr/bin/ld: test.cpp:(.text+0x24): undefined reference to `std::basic_ostream<char, std::char_traits<char> >& std::endl<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&)' +/usr/bin/ld: test.cpp:(.text+0x2f): undefined reference to `std::ostream::operator<<(std::ostream& (*)(std::ostream&))' +/usr/bin/ld: /tmp/ccrbNm7k.o: in function `__static_initialization_and_destruction_0(int, int)': +test.cpp:(.text+0x66): undefined reference to `std::ios_base::Init::Init()' +/usr/bin/ld: test.cpp:(.text+0x81): undefined reference to `std::ios_base::Init::~Init()' +/usr/bin/ld: warning: creating DT_TEXTREL in a PIE +collect2: error: ld returned 1 exit status +``` + +::: + +:::::::: + + +## GCC C++ compiler examples {auto-animate=true} + +::::::::{.columns} + +:::{.column width=45%} + +```cpp +#include <iostream> + +int main() { + using namespace std; + cout << "Hello" << endl; + return 0; +} +``` + +Compiling with + +`g++ test.cpp -o test` + +::: + +:::{.column width=55% .smaller} + + +Compilation succeeded + +::: + +:::::::: + + + +## Remarks + + - The issue is always at or before the first error + - Most compilers give good feedback by default + + + +## Compiler flags for compilation output {.leftalign} + +_Setting compiler flags for compiled languages like Fortran and C_ + +Check the manuals + + - [GCC manuals](https://gcc.gnu.org/onlinedocs/) + - [Intel Fortran compiler options](https://www.intel.com/content/www/us/en/docs/fortran-compiler/developer-guide-reference/2024-1/compiler-options-001.html) + +Also gcc compile time checks and options like + + - `-Wall` and + - `-Wextra` + + + +# Run-time errors {.leftalign} + + - Let the compiler help you if something goes wrong + - Understand where to look for issues + + + +## Look out for compiler warnings {.leftalign auto-animate=true} + +::::::::{.columns} + +:::{.column width=48%} + +```c +#include <stdlib.h> + +int main() { + int number = 1; + int* ptr; + ptr = &number; + + free(ptr); + // ERROR: Trying to free + // memory from stack + + return 0; +} +``` + +Compiling with + +`gcc inv_ptr.c -o inv_ptr` + +::: + +:::{.column width=52% .fragment} + +Compiler output + +``` +inv_ptr.c: In function ‘main’: +inv_ptr.c:8:4: warning: ‘free’ called on unallocated object ‘number’ [-Wfree-nonheap-object] + 8 | free(ptr); + | ^~~~~~~~~ +inv_ptr.c:4:8: note: declared here + 4 | int number = 1; + | ^~~~~~ +``` + +Run output e.g. + +``` +Segmentation fault +``` + +::: + +:::::::: + + +## Look out for compiler warnings {.leftalign auto-animate=true} + +::::::::{.columns} + +:::{.column width=48%} + +```c +#include <stdlib.h> + +int main() { + int number = 1; + int* ptr; + ptr = &number; + + free(ptr); + // ERROR: Trying to free + // memory from stack + + return 0; +} +``` + +Compiling with + +`gcc inv_ptr.c -Werror -o inv_ptr` + +::: + +:::{.column width=52%} + +Compiler output + +``` +inv_ptr.c: In function ‘main’: +inv_ptr.c:8:4: error: ‘free’ called on unallocated object ‘number’ [-Werror=free-nonheap-object] + 8 | free(ptr); + | ^~~~~~~~~ +inv_ptr.c:4:8: note: declared here + 4 | int number = 1; + | ^~~~~~ +cc1: all warnings being treated as errors +``` + +::: + +:::::::: + + + +## Compiler flags for run-time output (1/2) {.leftalign} + +_Setting compiler flags for compiled languages like Fortran and C_ + +Typically using `-g` for Intel, GCC and many other compilers + +In the Fortran world, also + +- gfortran: `-fbacktrace`, `-fbounds-check` +- intel (ifort): `-traceback`, `-check bounds`, `-check all` + + +## Compiler flags for run-time output (2/2) {.leftalign} + +Try with more than just one compiler + +$\Rightarrow$ HPC systems usally provide native compiler plus gcc/gfortan. Try both variants. + + + +## Locating the right error message {.leftalign} + +::::::::{.columns .smaller} + +:::{.column width=50%} + +```python +def _extend_number(num): + return 10*num + (num % 10) -1 + +def extend_number(num): + try: + for idx in range(num): + num = _extend_number(num) + except: + num = extend_number(num) + + return num + +print(extend_number('6')) +``` + +Expected output: + +``` +6543210 +``` + +::: + +:::{.column width=50% .fragment} + + +Actual output (last 17 lines) + +``` +Traceback (most recent call last): + File "number_extension.py", line 6, in extend_number + for idx in range(num): +RecursionError: maximum recursion depth exceeded while calling a Python object + +During handling of the above exception, another exception occurred: +Traceback (most recent call last): + File "number_extension.py", line 13, in <module> + print(extend_number('6')) + File "number_extension.py", line 9, in extend_number + num = extend_number(num) + File "number_extension.py", line 9, in extend_number + num = extend_number(num) + File "number_extension.py", line 9, in extend_number + num = extend_number(num) + [Previous line repeated 996 more times] +RecursionError: maximum recursion depth exceeded +``` + +<!-- +issues in line 9 and 13 +--> + +::: + +:::::::: + + +## Hands-On! (5-10min) {.handson} + +1. What are the actual issues in the code from the previous slide based on the error messages? +2. How to rectify them? + + +## Fixing run-time errors + + - Produce sensible debug messages to determine code area with the issue + - Use proper compiler flags for debug output + - Check prerequisites/environment + - Create minimal working example + - Use debugger + + +## Prevent errors + + - Testing! + - Using linters (static code analysis) like [ruff](https://github.com/astral-sh/ruff) for Python + - Even ensure proper formatting with tools like [black](https://github.com/psf/black) for Python + - Consider using an IDE with its tools + + +# Errors due to data access and communication + +## Accessing data {.leftalign} + + - Data has to be available and accessible + - Typical errors are out-of-bounds accesses + (resulting in a segmentation fault in languages like C) + - Usually manual checks are needed + + +## Postprocessing example (1/4) + + - Extract data from netCDF file and calculate interpolation weights + - Python script located [here](static/postprocessing_example.py), netCDF file [here](static/vmro3_input4MIPs_ozone_1850-1855_rev.nc) + - Works for the active time stamps, but not for the ones in the comment + +```python{.python startFrom="46"} +filename = 'vmro3_input4MIPs_ozone_1850-1855_rev.nc' +timestamp_start = '1851-01-01T00:00:00.000' # '1850-01-01T00:00:00.000' +timestamp_end = '1851-03-31T00:00:00.000' # '1850-03-31T00:00:00.000' +timestep_days = 10 + +Calculate_Weights(filename, timestamp_start, timestamp_end, timestep_days) +``` + + +## Postprocessing example (2/4) + +:::{.smaller} + +```{.python startFrom="23"} +def Calculate_Weights(filename, timestamp_start, timestamp_end, timestep_days): + start_date, end_date, timestep = Get_Time_Objects( + timestamp_start=timestamp_start, timestamp_end=timestamp_end, + timestep_days=10) + + ds = Open_File(filename=filename) + + model_date = start_date + while ( model_date < end_date ): + o3_prev_date = Select_Date(ds=ds, model_date=model_date, method='ffill') + o3_next_date = Select_Date(ds=ds, model_date=model_date, method='bfill') + + if ( o3_next_date == o3_prev_date ): + prev_weight = 0.5 + else : + delta_step_sec = (o3_next_date - o3_prev_date).total_seconds() + delta_sec = (model_date - o3_prev_date).total_seconds() + prev_weight = 1.0 - delta_sec/delta_step_sec + + next_weight = 1.0 - prev_weight + print('weights for', model_date, 'are', prev_weight, 'and', next_weight) + model_date = model_date + timestep +``` + +::: + + +## Postprocessing example (3/4) + + +```python +def Open_File(filename): + import xarray as xr + + return xr.open_dataset(filename)\ + .convert_calendar('standard', use_cftime=True) + +def Select_Date(ds, model_date, method): + import datetime as dt + + ds_elem = ds.sel(time=model_date, method=method) + return dt.datetime.strptime(str(ds_elem['time'].values), + '%Y-%m-%d %H:%M:%S') +``` + +<!-- +```python +def Select_Date(ds, model_date, method='ffill'): + import datetime as dt + + if ( model_date < dt.datetime(1850, 1, 16, 12, 0, 0) ) \ + or ( model_date > dt.datetime(1854, 12, 31, 12, 0, 0) ): + ds_elem = ds.sel(time=model_date, method='nearest') + else : + ds_elem = ds.sel(time=model_date, method=method) + + return dt.datetime.strptime(str(ds_elem['time'].values), + '%Y-%m-%d %H:%M:%S') +``` +--> + + +## Postprocessing example (4/4) + + +```{.python startFrom="14"} +def Get_Time_Objects(timestamp_start, timestamp_end, timestep_days): + import datetime as dt + + time_format = '%Y-%m-%dT%H:%M:%S.%f' + start_date = dt.datetime.strptime(timestamp_start, time_format) + end_date = dt.datetime.strptime(timestamp_end, time_format) + timestep = dt.timedelta(days=timestep_days) + return [start_date, end_date, timestep] +``` + + +## Hands-On! (~15min) {.handson} + +1. Which command in the code causes the problem? +2. Describe what would you need to do? + (you can inspect the data e.g. with `print(ds['time'])` + or in your terminal with `cdo infon <file>.nc`) + +<!-- +``` +ncdump -h +``` +--> + +## Parallel programs + +Possible issues due to mishandled communication or data access + + - "Just" wrong results + - Deadlock + - Race conditions + + +## Remarks {.leftalign} + +If you found a bug in someone's code: + + - Communicate with the developer(s) + - Try to write a reproducer + +If you also managed to solve the bug + + - Share your solution (developers, colleagues, support) + - Write a test for it + + + +# Debugging broken code + +## Logging (1/2) + +_Make debugging easier with proper logging_ + + - Very minimal logger for Python + +```Python +import logging + +logging.warning('Example warning message') +logging.error('Example error message') +``` + +## Logging (2/2) {.leftalign} + +Optionally: Adjust the logger to your needs + + - Set output parameters + - Set log level and formatting + +Example: + +```python +logging.basicConfig(filename='output.log', level=logging.WARNING, + datefmt='%H:%M:%S', format='[%(asctime)s] %(levelname)-8s in ' \ + + '%(pathname)s:%(lineno)d %(message)s') +``` + + +## Hands-On! (~10min) {.handson .leftalign} + +Use the Python code for the Postprocessing example and add logging output at appropriate points. +Afterwards your code should log + + - if a file is opened and which, + - what date is being processed, and + - the calculated weights. + + +## Using debuggers {.leftalign} + +::::::::{.columns} + +:::{.column width=50%} + +Idea of debuggers: + + - Investigate code before/at the issue interactively + - Set breakpoints to check actual values at specific code positions + +::: + +:::{.column width=50%} + +Some debuggers: + + - `pdb` (Python debugger) + - `gdb` (GNU debugger) + - commercial debuggers (like `ddt` on Levante, see also [Tech Talk](https://www.youtube.com/watch?v=q-JTpxsT0wA)) + +::: + +:::::::: + + +_Debuggers can also be used in IDEs and JupyterHub_ + + + +## gdb {.leftalign} + +Using gdb to inspect a core dump + + - to get a core dump + +```bash +ulimit -c unlimited +a.out +``` + +:::{.smaller} + +For core dumps, also `man 5 core` + +::: + + - to inspect the `core` dump + +```bash +gdb a.out core +``` + + +## gdb example + + +::::::::{.columns .smaller} + +:::{.column width=50%} + +```fortranfree +program loop_count_2d + implicit none + integer, parameter :: leni = 3 + integer, parameter :: lenj = 5 + integer, parameter :: len = leni*leni + ! ERROR: leni used twice for len + + integer :: my_ij(2,len) + integer :: i, j, n, ii, jj + + n = 0 + do j = 1, lenj + do i = 1, leni + n = n+1 + my_ij(1,n) = i + my_ij(2,n) = j + enddo + enddo +``` + +::: + +:::{.column width=50%} + +```{.fortranfree startFrom="20"} + do n = 1, len + jj = ( n - 1) / leni + 1 + ii = n - (jj-1) * leni + + if ( ii /= my_ij(1,n) ) then + print *, ' wrong i ', n, & + ii, my_ij(1,n) + end if + + if ( jj /= my_ij(2,n) ) then + print *, ' wrong j ', n, & + jj, my_ij(2,n) + end if + enddo +end program loop_count_2d +``` + +::: + +:::::::: + + +## Some gdb Commands + +::::::::{.columns .smaller} + +:::{.column width=50%} + + - `b <line>`: Add breakpoint at given line number + - `info b`: Show current breakpoints + - `clear`: Clear current breakpoint + - `d`: Clear all breakpoints + - `run`: Run the program + - `l`: Print code + - `info locals`: Print value of local variables + +::: + +:::{.column width=50%} + + - `p <var>`: Print value of variable + - `up`, `down` or `frame <num>`: Navigate stack + - `n`: Next line (without descending) + - `step`: Next command + - `h`: Show help + - `bt`: Create backtrace + - `c`: Continue execution + - `q`: Quit gdb + +::: + +:::::::: + + +## gdb Live Demo + + +# Debugging Flowchart + +::::::::{.columns} + +:::{.column width=35%} + +[](static/debug-flow.png) + +::: + +:::{.column width=60%} + + - Click on flowchart to enlarge + - Intended as a guideline, not strict rule + +::: + +:::::::: + + diff --git a/lectures/debugging-strategies/static/debug-flow.dot b/lectures/debugging-strategies/static/debug-flow.dot new file mode 100644 index 0000000000000000000000000000000000000000..74300618d7f3ed481a8d885f08f19aeceaeb1bc7 --- /dev/null +++ b/lectures/debugging-strategies/static/debug-flow.dot @@ -0,0 +1,99 @@ +digraph { + node [ shape="box" ]; + + compile_stmt [ label="Compile program", shape="invhouse", style="filled", fillcolor="#ccccff" ]; + compile_ask [ label="Compilation\nsuccessfull?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + compile_fail [ label="Locate first\nerror message" ]; + compile_ok [ label="Nice! Try running\nthe program" ]; + + compile_stmt -> compile_ask; + compile_ask -> compile_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + compile_ask -> compile_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + run_ask [ label="Did everything\nwork as intended?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + run_ok [ label="Great! That's how\nthings should be", shape="house", style="filled", fillcolor="#ccccff" ]; + run_fail [ label="Was there an error\nmessage/stack trace?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + + compile_ok -> run_ask; + run_ask -> run_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + run_ask -> run_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + fix_first_ask [ label="Do you know\nhow to fix\nthis error?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + fix_first_ok [ label="Fix it and\nrecompile" ]; + fix_first_fail [ label="Search online for the error\nmessage (stackoverflow, ...).\nSee if anybody has had a\nsimilar issue" ]; + + compile_fail -> fix_first_ask; + fix_first_ask -> fix_first_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + fix_first_ok -> compile_ask; + fix_first_ask -> fix_first_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + online_ask [ label="Is this a known issue\nand you can apply the fix\nfor your situation?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + online_ok [ label="Apply the fix and recompile" ]; + online_fail [ label="Do you have read the\ndocumentation and FAQs?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + + fix_first_fail -> online_ask; + online_ask -> online_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + online_ok -> compile_ask; + online_ask -> online_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + trace_ok [ label="Does it provide\nenough information\nto fix this error?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + trace_fail [ label="Recompile program with\nproper debugging support\nand rerun it" ]; + + run_fail -> trace_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + run_fail -> trace_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + trace_fail -> compile_ask; + + docread_ok [ label="Try running a simpler\nversion of that program\n(deactivate components,\ncomment out code) or\nincrease output verbosity" ]; + docread_fail [ label="Locate documentation\nand FAQ of this program\n(and for machine/site if needed)" ]; + + online_fail -> doc_fail [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + online_fail -> docread_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + doc_ask [ label="Do you see\ndifferences which\nmight be related\nto this error?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + doc_ok [ label="Follow closer\nto the official\ndocumentation and\nrecompile" ]; + doc_fail [ label="Ask your colleagues or\nworking group if anybody\nknows about that issue" ]; + + docread_fail -> doc_ask; + doc_ask -> doc_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + doc_ok -> compile_ask; + doc_ask -> doc_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + cmphint_ask [ label="Are there any suggestions\nfor what to do in your\nsituation?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + cmphint_ok [ label="Try to adapt them\nto your situation and\nrecompile" ]; + + doc_fail -> cmphint_ask; + cmphint_ask -> cmphint_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + cmphint_ok -> compile_ask; + cmphint_ask -> docread_ok [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + autofix_ask [ label="Can you fix it\nby iteratively using\nthis approach?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + autofix_ok [ label="Well done! If the issue is\nnot specific to your situation,\nconsider reporting it with\nyour solution how to fix it", shape="house", style="filled", fillcolor="#ccccff" ]; + autofix_fail [ label="Try to create a minimal\nreproducer, i.e. a very\nsmall program which (still)\nhas the same issue" ]; + autofix_ask -> docread_ok; + + docread_ok -> autofix_ask; + autofix_ask -> autofix_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + autofix_ask -> autofix_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + debug_ok [ label="Fix it and recompile/rerun" ]; + debug_fail [ label="Use a debugger or\nincrease output verbosity\nto locate the issue" ]; + + trace_ok -> debug_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + debug_ok -> compile_ask; + trace_ok -> debug_fail [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + pinpoint_ask [ label="Can you pinpoint the\nissue to a specific part\nor region in the code?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + pinpoint_ok [ label="Fix it and recompile/rerun" ]; + + debug_fail -> pinpoint_ask + pinpoint_ask -> pinpoint_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + pinpoint_ok -> compile_ask; + pinpoint_ask -> docread_ok [ label="no", color="#ff0000", fontcolor="#ff0000" ]; + + reproducer_ask [ label="Did creating the reproducer\nprovide new insights which can\nbe used to solve the issue?", shape="hexagon", style="filled", fillcolor="#dddddd" ]; + support [ label="Gather relevant information\n(error message and log files,\nprogram version, what you tried,\nminimal reproducible expample,\ncontext and job IDs) and reach\nout for support. If supported by\nDKRZ, contact support@dkrz.de", shape="house", style="filled", fillcolor="#ccccff" ]; + + autofix_fail -> reproducer_ask; + reproducer_ask -> autofix_ok [ label="yes", color="#32dd32", fontcolor="#32dd32" ]; + reproducer_ask -> support [ label="no", color="#ff0000", fontcolor="#ff0000" ]; +} diff --git a/lectures/debugging-strategies/static/debug-flow.png b/lectures/debugging-strategies/static/debug-flow.png new file mode 100644 index 0000000000000000000000000000000000000000..c0c45f2ba7e50ae5672848a76f66662e3af245e5 Binary files /dev/null and b/lectures/debugging-strategies/static/debug-flow.png differ diff --git a/lectures/debugging-strategies/static/postprocessing_example.py b/lectures/debugging-strategies/static/postprocessing_example.py new file mode 100644 index 0000000000000000000000000000000000000000..54700831451e2f89a9fafbbc138b0a98c7f01cb0 --- /dev/null +++ b/lectures/debugging-strategies/static/postprocessing_example.py @@ -0,0 +1,53 @@ +def Open_File(filename): + import xarray as xr + + return xr.open_dataset(filename).convert_calendar("standard", use_cftime=True) + + +def Select_Date(ds, model_date, method): + import datetime as dt + + ds_elem = ds.sel(time=model_date, method=method) + return dt.datetime.strptime(str(ds_elem["time"].values), "%Y-%m-%d %H:%M:%S") + + +def Get_Time_Objects(timestamp_start, timestamp_end, timestep_days): + import datetime as dt + + time_format = "%Y-%m-%dT%H:%M:%S.%f" + start_date = dt.datetime.strptime(timestamp_start, time_format) + end_date = dt.datetime.strptime(timestamp_end, time_format) + timestep = dt.timedelta(days=timestep_days) + return [start_date, end_date, timestep] + + +def Calculate_Weights(filename, timestamp_start, timestamp_end, timestep_days): + start_date, end_date, timestep = Get_Time_Objects( + timestamp_start=timestamp_start, timestamp_end=timestamp_end, timestep_days=10 + ) + + ds = Open_File(filename=filename) + + model_date = start_date + while model_date < end_date: + o3_prev_date = Select_Date(ds=ds, model_date=model_date, method="ffill") + o3_next_date = Select_Date(ds=ds, model_date=model_date, method="bfill") + + if o3_next_date == o3_prev_date: + prev_weight = 0.5 + else: + delta_step_sec = (o3_next_date - o3_prev_date).total_seconds() + delta_sec = (model_date - o3_prev_date).total_seconds() + prev_weight = 1.0 - delta_sec / delta_step_sec + + next_weight = 1.0 - prev_weight + print("weights for", model_date, "are", prev_weight, "and", next_weight) + model_date = model_date + timestep + + +filename = "vmro3_input4MIPs_ozone_1850-1855_rev.nc" +timestamp_start = "1851-01-01T00:00:00.000" # '1850-01-01T00:00:00.000' +timestamp_end = "1851-03-31T00:00:00.000" # '1850-03-31T00:00:00.000' +timestep_days = 10 + +Calculate_Weights(filename, timestamp_start, timestamp_end, timestep_days) diff --git a/lectures/debugging-strategies/static/schnecke_flt.f90 b/lectures/debugging-strategies/static/schnecke_flt.f90 new file mode 100644 index 0000000000000000000000000000000000000000..584d56f7eacbed6ccd63374b90ee172fed231d19 --- /dev/null +++ b/lectures/debugging-strategies/static/schnecke_flt.f90 @@ -0,0 +1,211 @@ +program schnecke + + ! The code flags 42 pairs of (i,j) indices as True + ! and relocates these in a larger domain by starting + ! at index (is,js) + + + implicit none + + Integer, parameter :: m = 1 + Integer, parameter :: idim = 128 + Integer, parameter :: jdim = 64 + + Type memo + Integer :: i + Integer :: j + Integer :: dir + End Type + + Integer :: i, j + Integer :: index, nn + Integer :: is, js + + Integer :: stacksize + Integer :: current_stacksize + + Integer :: nbr_cells_tot + Integer :: nbr_cells_inc + + Type (memo), Pointer :: stack(:) + Type (memo), Pointer :: new_stack(:) + + Integer :: nbr_cells(m) + + Integer, Pointer :: tmp_neighcells_3d(:,:) + Integer, Pointer :: new_neighcells_3d(:,:) + + Logical :: cyclic(2) + + Logical :: flagged(idim,jdim) + Integer :: visited(idim,jdim) + + Integer :: direction(2,4) + + data direction / -1, 0, 0, -1, 1, 0, 0, 1 / + +! +! Initialise variables for test case +!----------------------------------- + + cyclic = .false. + + current_stacksize = 0 + + flagged = .false. + + do j = 45, 50 + do i = 30, 36 + flagged(i,j) = .true. + enddo + enddo + + is = 34 + js = 48 +! +! ---------------------------------- +! + stacksize = 4 + nbr_cells_inc = 4 + nbr_cells_tot = nbr_cells_inc + + allocate(stack(stacksize)) + + allocate(tmp_neighcells_3d(2,nbr_cells_tot)) + + visited = 0 + nbr_cells = 0 + + ! + ! Initialise first stack entry + ! + index = 1 + stack(index)%i = is + stack(index)%j = js + stack(index)%dir = 0 + ! + ! ... maybe we should test first whether our initial guess is really good + ! + visited(is,js) = m + + nn = 1 + + tmp_neighcells_3d(1,nn) = stack(index)%i + tmp_neighcells_3d(2,nn) = stack(index)%j + + nbr_cells(1) = 1 + ! + ! Andreas Algorithmus + ! + do while ( index > 0 ) + ! + ! Change direction + ! + stack(index)%dir = stack(index)%dir + 1 + + if ( stack(index)%dir > 4 ) then + ! + ! All 4 directions are already tested + ! for this cell so we have to go back + ! + index = index - 1 + cycle + endif + ! + ! Go to the next cell + ! + is = stack(index)%i + direction(1,stack(index)%dir) + js = stack(index)%j + direction(2,stack(index)%dir) + ! + ! Apply cyclic boundary conditions if required + ! + if ( cyclic(1) ) is = mod(is-1,idim)+1 + if ( cyclic(2) ) js = mod(js-1,jdim)+1 + ! + ! Check that indices are in the allowed range + ! + if ( is < 1 .or. is > idim ) cycle + if ( js < 1 .or. js > jdim ) cycle + ! + ! When cell is already checked for this target ... + ! + if ( visited(is,js) == m ) cycle + ! + ! Test current cell + ! + if ( .not. flagged(is,js) ) then + ! + ! Mark cell as checked for this cycle + ! + visited(is,js) = m + cycle + + else + ! + ! We found a new source cell + ! + nbr_cells(m) = nbr_cells(m) + 1 + ! + ! ... allocate new memory if necessary + ! + if ( nn + 1 > nbr_cells_tot ) then + nbr_cells_tot = nbr_cells_tot + nbr_cells_inc + allocate(new_neighcells_3d(2,nbr_cells_tot)) + new_neighcells_3d(1:2,1:nn) = tmp_neighcells_3d(1:2,1:nn) + deallocate(tmp_neighcells_3d) + tmp_neighcells_3d => new_neighcells_3d + endif + + nn = nn + 1 + + tmp_neighcells_3d(1,nn) = is + tmp_neighcells_3d(2,nn) = js + ! + ! Prepare for the next source cell to test + ! + ! ... allocate new memory for the stack if necessary + ! + if ( index + 1 > current_stacksize ) then + current_stacksize = stacksize + allocate(new_stack(current_stacksize)) + new_stack(1:index) = stack(1:index) + deallocate(stack) + stack => new_stack + endif + + index = index + 1 + stack(index)%i = is + stack(index)%j = js + stack(index)%dir = 0 + + visited(is,js) = m + + endif + + enddo + ! + ! Finally shrink tmp_neighcells_3d to required size + ! + if ( nn < nbr_cells_tot ) then + nbr_cells_tot = nn + allocate(new_neighcells_3d(2,nbr_cells_tot)) + new_neighcells_3d(1:2,1:nn) = tmp_neighcells_3d(1:2,1:nn) + deallocate(tmp_neighcells_3d) + tmp_neighcells_3d => new_neighcells_3d + endif + ! + ! Test + ! + do i = 1, nbr_cells_tot + if ( tmp_neighcells_3d(2,i) < 45 .or. tmp_neighcells_3d(2,i) > 50 .or. & + tmp_neighcells_3d(1,i) < 30 .or. tmp_neighcells_3d(1,i) > 36 ) then + print *, 'WARNING at i ', i, tmp_neighcells_3d(1,i), tmp_neighcells_3d(2,i) + endif + print *, ' Found ', tmp_neighcells_3d(1,i), tmp_neighcells_3d(2,i) + enddo + + print *, ' Found ', nbr_cells, ' cells.' + +end program schnecke + + diff --git a/lectures/debugging-strategies/static/vmro3_input4MIPs_ozone_1850-1855_rev.nc b/lectures/debugging-strategies/static/vmro3_input4MIPs_ozone_1850-1855_rev.nc new file mode 100644 index 0000000000000000000000000000000000000000..b102115868e3b14a465796935fb8d74ac573921e Binary files /dev/null and b/lectures/debugging-strategies/static/vmro3_input4MIPs_ozone_1850-1855_rev.nc differ