• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pyiron / executorlib / 13089731186

01 Feb 2025 03:00PM UTC coverage: 95.552% (-1.0%) from 96.536%
13089731186

Pull #548

github

web-flow
Merge 8f01dfac5 into 2a5c10963
Pull Request #548: [major] Refactor the Executor interface

118 of 137 new or added lines in 4 files covered. (86.13%)

1074 of 1124 relevant lines covered (95.55%)

0.96 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.95
/executorlib/interfaces/slurm.py
1
from typing import Callable, Optional, Union
1✔
2

3
from executorlib.interactive.executor import ExecutorWithDependencies
1✔
4
from executorlib.interactive.shared import (
1✔
5
    InteractiveExecutor,
6
    InteractiveStepExecutor,
7
)
8
from executorlib.interactive.slurm import SrunSpawner
1✔
9
from executorlib.interactive.slurm import (
1✔
10
    validate_max_workers as validate_max_workers_slurm,
11
)
12
from executorlib.standalone.inputcheck import (
1✔
13
    check_init_function,
14
    check_plot_dependency_graph,
15
    check_refresh_rate,
16
    validate_number_of_cores,
17
)
18

19

20
class SlurmSubmissionExecutor:
1✔
21
    """
22
    The executorlib.Executor leverages either the message passing interface (MPI), the SLURM workload manager or
23
    preferable the flux framework for distributing python functions within a given resource allocation. In contrast to
24
    the mpi4py.futures.MPIPoolExecutor the executorlib.Executor can be executed in a serial python process and does not
25
    require the python script to be executed with MPI. It is even possible to execute the executorlib.Executor directly
26
    in an interactive Jupyter notebook.
27

28
    Args:
29
        max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
30
                           cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
31
                           recommended, as computers have a limited number of compute cores.
32
        cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
33
        max_cores (int): defines the number cores which can be used in parallel
34
        resource_dict (dict): A dictionary of resources required by the task. With the following keys:
35
                              - cores (int): number of MPI cores to be used for each function call
36
                              - threads_per_core (int): number of OpenMP threads to be used for each function call
37
                              - gpus_per_core (int): number of GPUs per worker - defaults to 0
38
                              - cwd (str/None): current working directory where the parallel python task is executed
39
                              - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
40
                                                              SLURM only) - default False
41
                              - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
42
        pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
43
        hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
44
                                      context of an HPC cluster this essential to be able to communicate to an
45
                                      Executor running on a different compute node within the same allocation. And
46
                                      in principle any computer should be able to resolve that their own hostname
47
                                      points to the same address as localhost. Still MacOS >= 12 seems to disable
48
                                      this look up for security reasons. So on MacOS it is required to set this
49
                                      option to true
50
        block_allocation (boolean): To accelerate the submission of a series of python functions with the same resource
51
                                    requirements, executorlib supports block allocation. In this case all resources have
52
                                    to be defined on the executor, rather than during the submission of the individual
53
                                    function.
54
        init_function (None): optional function to preset arguments for functions which are submitted later
55
        disable_dependencies (boolean): Disable resolving future objects during the submission.
56
        refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
57
        plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
58
                                      debugging purposes and to get an overview of the specified dependencies.
59
        plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
60

61
    Examples:
62
        ```
63
        >>> import numpy as np
64
        >>> from executorlib.interfaces.slurm import SlurmSubmissionExecutor
65
        >>>
66
        >>> def calc(i, j, k):
67
        >>>     from mpi4py import MPI
68
        >>>     size = MPI.COMM_WORLD.Get_size()
69
        >>>     rank = MPI.COMM_WORLD.Get_rank()
70
        >>>     return np.array([i, j, k]), size, rank
71
        >>>
72
        >>> def init_k():
73
        >>>     return {"k": 3}
74
        >>>
75
        >>> with SlurmSubmissionExecutor(cores=2, init_function=init_k) as p:
76
        >>>     fs = p.submit(calc, 2, j=4)
77
        >>>     print(fs.result())
78
        [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)]
79
        ```
80
    """
81

82
    def __init__(
1✔
83
        self,
84
        max_workers: Optional[int] = None,
85
        cache_directory: Optional[str] = None,
86
        max_cores: Optional[int] = None,
87
        resource_dict: Optional[dict] = None,
88
        pysqa_config_directory: Optional[str] = None,
89
        hostname_localhost: Optional[bool] = None,
90
        block_allocation: bool = False,
91
        init_function: Optional[Callable] = None,
92
        disable_dependencies: bool = False,
93
        refresh_rate: float = 0.01,
94
        plot_dependency_graph: bool = False,
95
        plot_dependency_graph_filename: Optional[str] = None,
96
    ):
97
        # Use __new__() instead of __init__(). This function is only implemented to enable auto-completion.
NEW
98
        pass
×
99

100
    def __new__(
1✔
101
        cls,
102
        max_workers: Optional[int] = None,
103
        cache_directory: Optional[str] = None,
104
        max_cores: Optional[int] = None,
105
        resource_dict: Optional[dict] = None,
106
        pysqa_config_directory: Optional[str] = None,
107
        hostname_localhost: Optional[bool] = None,
108
        block_allocation: bool = False,
109
        init_function: Optional[Callable] = None,
110
        disable_dependencies: bool = False,
111
        refresh_rate: float = 0.01,
112
        plot_dependency_graph: bool = False,
113
        plot_dependency_graph_filename: Optional[str] = None,
114
    ):
115
        """
116
        Instead of returning a executorlib.Executor object this function returns either a executorlib.mpi.PyMPIExecutor,
117
        executorlib.slurm.PySlurmExecutor or executorlib.flux.PyFluxExecutor depending on which backend is available. The
118
        executorlib.flux.PyFluxExecutor is the preferred choice while the executorlib.mpi.PyMPIExecutor is primarily used
119
        for development and testing. The executorlib.flux.PyFluxExecutor requires flux-core from the flux-framework to be
120
        installed and in addition flux-sched to enable GPU scheduling. Finally, the executorlib.slurm.PySlurmExecutor
121
        requires the SLURM workload manager to be installed on the system.
122

123
        Args:
124
            max_workers (int): for backwards compatibility with the standard library, max_workers also defines the
125
                               number of cores which can be used in parallel - just like the max_cores parameter. Using
126
                               max_cores is recommended, as computers have a limited number of compute cores.
127
            cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
128
            max_cores (int): defines the number cores which can be used in parallel
129
            resource_dict (dict): A dictionary of resources required by the task. With the following keys:
130
                                  - cores (int): number of MPI cores to be used for each function call
131
                                  - threads_per_core (int): number of OpenMP threads to be used for each function call
132
                                  - gpus_per_core (int): number of GPUs per worker - defaults to 0
133
                                  - cwd (str/None): current working directory where the parallel python task is executed
134
                                  - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI
135
                                                                  and SLURM only) - default False
136
                                  - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
137
                                                           only)
138
            pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
139
            hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
140
                                      context of an HPC cluster this essential to be able to communicate to an
141
                                      Executor running on a different compute node within the same allocation. And
142
                                      in principle any computer should be able to resolve that their own hostname
143
                                      points to the same address as localhost. Still MacOS >= 12 seems to disable
144
                                      this look up for security reasons. So on MacOS it is required to set this
145
                                      option to true
146
            block_allocation (boolean): To accelerate the submission of a series of python functions with the same
147
                                        resource requirements, executorlib supports block allocation. In this case all
148
                                        resources have to be defined on the executor, rather than during the submission
149
                                        of the individual function.
150
            init_function (None): optional function to preset arguments for functions which are submitted later
151
            disable_dependencies (boolean): Disable resolving future objects during the submission.
152
            refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
153
            plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
154
                                          debugging purposes and to get an overview of the specified dependencies.
155
            plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
156

157
        """
158
        default_resource_dict: dict = {
1✔
159
            "cores": 1,
160
            "threads_per_core": 1,
161
            "gpus_per_core": 0,
162
            "cwd": None,
163
            "openmpi_oversubscribe": False,
164
            "slurm_cmd_args": [],
165
        }
166
        if resource_dict is None:
1✔
167
            resource_dict = {}
1✔
168
        resource_dict.update(
1✔
169
            {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
170
        )
171
        if not plot_dependency_graph:
1✔
NEW
172
            from executorlib.cache.executor import create_file_executor
×
173

NEW
174
            return create_file_executor(
×
175
                max_workers=max_workers,
176
                backend="slurm_submission",
177
                max_cores=max_cores,
178
                cache_directory=cache_directory,
179
                resource_dict=resource_dict,
180
                flux_executor=None,
181
                flux_executor_pmi_mode=None,
182
                flux_executor_nesting=False,
183
                flux_log_files=False,
184
                pysqa_config_directory=pysqa_config_directory,
185
                hostname_localhost=hostname_localhost,
186
                block_allocation=block_allocation,
187
                init_function=init_function,
188
                disable_dependencies=disable_dependencies,
189
            )
190
        else:
191
            return ExecutorWithDependencies(
1✔
192
                executor=create_slurm_executor(
193
                    max_workers=max_workers,
194
                    cache_directory=cache_directory,
195
                    max_cores=max_cores,
196
                    resource_dict=resource_dict,
197
                    hostname_localhost=hostname_localhost,
198
                    block_allocation=block_allocation,
199
                    init_function=init_function,
200
                ),
201
                max_cores=max_cores,
202
                refresh_rate=refresh_rate,
203
                plot_dependency_graph=plot_dependency_graph,
204
                plot_dependency_graph_filename=plot_dependency_graph_filename,
205
            )
206

207

208
class SlurmAllocationExecutor:
1✔
209
    """
210
    The executorlib.Executor leverages either the message passing interface (MPI), the SLURM workload manager or
211
    preferable the flux framework for distributing python functions within a given resource allocation. In contrast to
212
    the mpi4py.futures.MPIPoolExecutor the executorlib.Executor can be executed in a serial python process and does not
213
    require the python script to be executed with MPI. It is even possible to execute the executorlib.Executor directly
214
    in an interactive Jupyter notebook.
215

216
    Args:
217
        max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
218
                           cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
219
                           recommended, as computers have a limited number of compute cores.
220
        cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
221
        max_cores (int): defines the number cores which can be used in parallel
222
        resource_dict (dict): A dictionary of resources required by the task. With the following keys:
223
                              - cores (int): number of MPI cores to be used for each function call
224
                              - threads_per_core (int): number of OpenMP threads to be used for each function call
225
                              - gpus_per_core (int): number of GPUs per worker - defaults to 0
226
                              - cwd (str/None): current working directory where the parallel python task is executed
227
                              - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
228
                                                              SLURM only) - default False
229
                              - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
230
        hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
231
                                      context of an HPC cluster this essential to be able to communicate to an
232
                                      Executor running on a different compute node within the same allocation. And
233
                                      in principle any computer should be able to resolve that their own hostname
234
                                      points to the same address as localhost. Still MacOS >= 12 seems to disable
235
                                      this look up for security reasons. So on MacOS it is required to set this
236
                                      option to true
237
        block_allocation (boolean): To accelerate the submission of a series of python functions with the same resource
238
                                    requirements, executorlib supports block allocation. In this case all resources have
239
                                    to be defined on the executor, rather than during the submission of the individual
240
                                    function.
241
        init_function (None): optional function to preset arguments for functions which are submitted later
242
        disable_dependencies (boolean): Disable resolving future objects during the submission.
243
        refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
244
        plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
245
                                      debugging purposes and to get an overview of the specified dependencies.
246
        plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
247

248
    Examples:
249
        ```
250
        >>> import numpy as np
251
        >>> from executorlib.interfaces.slurm import SlurmAllocationExecutor
252
        >>>
253
        >>> def calc(i, j, k):
254
        >>>     from mpi4py import MPI
255
        >>>     size = MPI.COMM_WORLD.Get_size()
256
        >>>     rank = MPI.COMM_WORLD.Get_rank()
257
        >>>     return np.array([i, j, k]), size, rank
258
        >>>
259
        >>> def init_k():
260
        >>>     return {"k": 3}
261
        >>>
262
        >>> with SlurmAllocationExecutor(cores=2, init_function=init_k) as p:
263
        >>>     fs = p.submit(calc, 2, j=4)
264
        >>>     print(fs.result())
265
        [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)]
266
        ```
267
    """
268

269
    def __init__(
1✔
270
        self,
271
        max_workers: Optional[int] = None,
272
        cache_directory: Optional[str] = None,
273
        max_cores: Optional[int] = None,
274
        resource_dict: Optional[dict] = None,
275
        hostname_localhost: Optional[bool] = None,
276
        block_allocation: bool = False,
277
        init_function: Optional[Callable] = None,
278
        disable_dependencies: bool = False,
279
        refresh_rate: float = 0.01,
280
        plot_dependency_graph: bool = False,
281
        plot_dependency_graph_filename: Optional[str] = None,
282
    ):
283
        # Use __new__() instead of __init__(). This function is only implemented to enable auto-completion.
NEW
284
        pass
×
285

286
    def __new__(
1✔
287
        cls,
288
        max_workers: Optional[int] = None,
289
        cache_directory: Optional[str] = None,
290
        max_cores: Optional[int] = None,
291
        resource_dict: Optional[dict] = None,
292
        hostname_localhost: Optional[bool] = None,
293
        block_allocation: bool = False,
294
        init_function: Optional[Callable] = None,
295
        disable_dependencies: bool = False,
296
        refresh_rate: float = 0.01,
297
        plot_dependency_graph: bool = False,
298
        plot_dependency_graph_filename: Optional[str] = None,
299
    ):
300
        """
301
        Instead of returning a executorlib.Executor object this function returns either a executorlib.mpi.PyMPIExecutor,
302
        executorlib.slurm.PySlurmExecutor or executorlib.flux.PyFluxExecutor depending on which backend is available. The
303
        executorlib.flux.PyFluxExecutor is the preferred choice while the executorlib.mpi.PyMPIExecutor is primarily used
304
        for development and testing. The executorlib.flux.PyFluxExecutor requires flux-core from the flux-framework to be
305
        installed and in addition flux-sched to enable GPU scheduling. Finally, the executorlib.slurm.PySlurmExecutor
306
        requires the SLURM workload manager to be installed on the system.
307

308
        Args:
309
            max_workers (int): for backwards compatibility with the standard library, max_workers also defines the
310
                               number of cores which can be used in parallel - just like the max_cores parameter. Using
311
                               max_cores is recommended, as computers have a limited number of compute cores.
312
            cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
313
            max_cores (int): defines the number cores which can be used in parallel
314
            resource_dict (dict): A dictionary of resources required by the task. With the following keys:
315
                                  - cores (int): number of MPI cores to be used for each function call
316
                                  - threads_per_core (int): number of OpenMP threads to be used for each function call
317
                                  - gpus_per_core (int): number of GPUs per worker - defaults to 0
318
                                  - cwd (str/None): current working directory where the parallel python task is executed
319
                                  - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI
320
                                                                  and SLURM only) - default False
321
                                  - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
322
                                                           only)
323
            hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
324
                                      context of an HPC cluster this essential to be able to communicate to an
325
                                      Executor running on a different compute node within the same allocation. And
326
                                      in principle any computer should be able to resolve that their own hostname
327
                                      points to the same address as localhost. Still MacOS >= 12 seems to disable
328
                                      this look up for security reasons. So on MacOS it is required to set this
329
                                      option to true
330
            block_allocation (boolean): To accelerate the submission of a series of python functions with the same
331
                                        resource requirements, executorlib supports block allocation. In this case all
332
                                        resources have to be defined on the executor, rather than during the submission
333
                                        of the individual function.
334
            init_function (None): optional function to preset arguments for functions which are submitted later
335
            disable_dependencies (boolean): Disable resolving future objects during the submission.
336
            refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
337
            plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
338
                                          debugging purposes and to get an overview of the specified dependencies.
339
            plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
340

341
        """
342
        default_resource_dict: dict = {
1✔
343
            "cores": 1,
344
            "threads_per_core": 1,
345
            "gpus_per_core": 0,
346
            "cwd": None,
347
            "openmpi_oversubscribe": False,
348
            "slurm_cmd_args": [],
349
        }
350
        if resource_dict is None:
1✔
351
            resource_dict = {}
1✔
352
        resource_dict.update(
1✔
353
            {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
354
        )
355
        if not disable_dependencies:
1✔
356
            return ExecutorWithDependencies(
1✔
357
                executor=create_slurm_executor(
358
                    max_workers=max_workers,
359
                    cache_directory=cache_directory,
360
                    max_cores=max_cores,
361
                    resource_dict=resource_dict,
362
                    hostname_localhost=hostname_localhost,
363
                    block_allocation=block_allocation,
364
                    init_function=init_function,
365
                ),
366
                max_cores=max_cores,
367
                refresh_rate=refresh_rate,
368
                plot_dependency_graph=plot_dependency_graph,
369
                plot_dependency_graph_filename=plot_dependency_graph_filename,
370
            )
371
        else:
NEW
372
            check_plot_dependency_graph(plot_dependency_graph=plot_dependency_graph)
×
NEW
373
            check_refresh_rate(refresh_rate=refresh_rate)
×
NEW
374
            return create_slurm_executor(
×
375
                max_workers=max_workers,
376
                cache_directory=cache_directory,
377
                max_cores=max_cores,
378
                resource_dict=resource_dict,
379
                hostname_localhost=hostname_localhost,
380
                block_allocation=block_allocation,
381
                init_function=init_function,
382
            )
383

384

385
def create_slurm_executor(
1✔
386
    max_workers: Optional[int] = None,
387
    max_cores: Optional[int] = None,
388
    cache_directory: Optional[str] = None,
389
    resource_dict: dict = {},
390
    hostname_localhost: Optional[bool] = None,
391
    block_allocation: bool = False,
392
    init_function: Optional[Callable] = None,
393
) -> Union[InteractiveStepExecutor, InteractiveExecutor]:
394
    check_init_function(block_allocation=block_allocation, init_function=init_function)
1✔
395
    cores_per_worker = resource_dict.get("cores", 1)
1✔
396
    resource_dict["cache_directory"] = cache_directory
1✔
397
    resource_dict["hostname_localhost"] = hostname_localhost
1✔
398
    if block_allocation:
1✔
399
        resource_dict["init_function"] = init_function
1✔
400
        max_workers = validate_number_of_cores(
1✔
401
            max_cores=max_cores,
402
            max_workers=max_workers,
403
            cores_per_worker=cores_per_worker,
404
            set_local_cores=False,
405
        )
406
        validate_max_workers_slurm(
1✔
407
            max_workers=max_workers,
408
            cores=cores_per_worker,
409
            threads_per_core=resource_dict.get("threads_per_core", 1),
410
        )
NEW
411
        return InteractiveExecutor(
×
412
            max_workers=max_workers,
413
            executor_kwargs=resource_dict,
414
            spawner=SrunSpawner,
415
        )
416
    else:
417
        return InteractiveStepExecutor(
1✔
418
            max_cores=max_cores,
419
            max_workers=max_workers,
420
            executor_kwargs=resource_dict,
421
            spawner=SrunSpawner,
422
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc