diff --git a/doc/quickstart/get-started.rst b/doc/quickstart/get-started.rst index 376e483e2f88ebdf4cde85a8891a6d4884e2efe6..3e9780f8bb048b7cbb1591bedf40a8c5ab859079 100644 --- a/doc/quickstart/get-started.rst +++ b/doc/quickstart/get-started.rst @@ -186,6 +186,10 @@ Running Veros through MPI requires some addititonal dependencies: After you have installed everything, you can start Veros on multiple processes like so::: + $ python my_setup.py -n 2 2 + +Or, by explicitly using mpirun (might be required on some architectures):: + $ mpirun -n 4 python my_setup.py -n 2 2 In this case, Veros would run on 4 processes, each process computing one-quarter of the domain. The arguments of the `-n` flag specify the number of chunks in x and y-direction, respectively. diff --git a/veros/restart.py b/veros/restart.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/veros/tools/cli.py b/veros/tools/cli.py index 4266fa6e83efe7782c6698379487bceacd80904f..4518f6f3faa557b8946c233f152068672dd5535d 100644 --- a/veros/tools/cli.py +++ b/veros/tools/cli.py @@ -76,8 +76,9 @@ def cli(run): @click.option('-p', '--profile-mode', is_flag=True, default=False, type=click.BOOL, envvar='VEROS_PROFILE', help='Write a performance profile for debugging (default: false)') @click.option('-n', '--num-proc', nargs=2, default=[1, 1], type=click.INT, - help='Number of processes in x and y dimension (requires execution via mpirun)') - @click.option('--slave', default=False, is_flag=True, hidden=True) + help='Number of processes in x and y dimension') + @click.option('--slave', default=False, is_flag=True, hidden=True, + help='Indicates that this process is an MPI worker (for internal use)') @functools.wraps(run) def wrapped(*args, slave, **kwargs): from veros import runtime_settings, runtime_state @@ -94,7 +95,15 @@ def cli(run): ) futures = [comm.irecv(source=p) for p in range(total_proc)] - while not all(f.test()[0] for f in futures): + while True: + done, success = zip(*(f.test() for f in futures)) + + if any(s is False for s in success): + raise RuntimeError('An MPI worker encountered an error') + + if all(done): + break + time.sleep(0.1) return @@ -106,8 +115,13 @@ def cli(run): try: run(*args, **kwargs) + except: # noqa: E722 + status = False + raise + else: + status = True finally: if slave: - runtime_settings.mpi_comm.Get_parent().send(None, dest=0) + runtime_settings.mpi_comm.Get_parent().send(status, dest=0) return wrapped