12308492040

Committed 13 Dec 2024 02:32AM UTC coverage: 95.286% (-0.4%) from 95.639%

Build # 12308492040

Build Type

Pull #519

github

Committed by

web-flow

Commit Message

Merge 48a8e680f into f1c4ffa9c

Pull Request Pull Request #519: Add option to write flux log files

Run Details

7 of 11 new or added lines in 4 files covered. (63.64%)

6 existing lines in 3 files now uncovered.

950 of 997 relevant lines covered (95.29%)

0.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.3

/executorlib/__init__.py

from typing import Optional

from executorlib._version import get_versions as _get_versions
from executorlib.interactive.executor import (
    ExecutorWithDependencies as _ExecutorWithDependencies,
)
from executorlib.interactive.executor import create_executor as _create_executor
from executorlib.standalone.inputcheck import (
    check_plot_dependency_graph as _check_plot_dependency_graph,
)
from executorlib.standalone.inputcheck import (
    check_pysqa_config_directory as _check_pysqa_config_directory,
)
from executorlib.standalone.inputcheck import (
    check_refresh_rate as _check_refresh_rate,
)

__version__ = _get_versions()["version"]
__all__ = []


class Executor:
    """
    The executorlib.Executor leverages either the message passing interface (MPI), the SLURM workload manager or
    preferable the flux framework for distributing python functions within a given resource allocation. In contrast to
    the mpi4py.futures.MPIPoolExecutor the executorlib.Executor can be executed in a serial python process and does not
    require the python script to be executed with MPI. It is even possible to execute the executorlib.Executor directly
    in an interactive Jupyter notebook.

    Args:
        max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
                           cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
                           recommended, as computers have a limited number of compute cores.
        backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
        cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
        max_cores (int): defines the number cores which can be used in parallel
        resource_dict (dict): A dictionary of resources required by the task. With the following keys:
                              - cores_per_worker (int): number of MPI cores to be used for each function call
                              - threads_per_core (int): number of OpenMP threads to be used for each function call
                              - gpus_per_worker (int): number of GPUs per worker - defaults to 0
                              - cwd (str/None): current working directory where the parallel python task is executed
                              - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
                                                              SLURM only) - default False
                              - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
        flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
        flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
        flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
        pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
        hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                      context of an HPC cluster this essential to be able to communicate to an
                                      Executor running on a different compute node within the same allocation. And
                                      in principle any computer should be able to resolve that their own hostname
                                      points to the same address as localhost. Still MacOS >= 12 seems to disable
                                      this look up for security reasons. So on MacOS it is required to set this
                                      option to true
        block_allocation (boolean): To accelerate the submission of a series of python functions with the same resource
                                    requirements, executorlib supports block allocation. In this case all resources have
                                    to be defined on the executor, rather than during the submission of the individual
                                    function.
        init_function (None): optional function to preset arguments for functions which are submitted later
        disable_dependencies (boolean): Disable resolving future objects during the submission.
        refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
        plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
                                      debugging purposes and to get an overview of the specified dependencies.

    Examples:
        ```
        >>> import numpy as np
        >>> from executorlib import Executor
        >>>
        >>> def calc(i, j, k):
        >>>     from mpi4py import MPI
        >>>     size = MPI.COMM_WORLD.Get_size()
        >>>     rank = MPI.COMM_WORLD.Get_rank()
        >>>     return np.array([i, j, k]), size, rank
        >>>
        >>> def init_k():
        >>>     return {"k": 3}
        >>>
        >>> with Executor(cores=2, init_function=init_k) as p:
        >>>     fs = p.submit(calc, 2, j=4)
        >>>     print(fs.result())
        [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)]
        ```
    """

    def __init__(
        self,
        max_workers: Optional[int] = None,
        backend: str = "local",
        cache_directory: Optional[str] = None,
        max_cores: Optional[int] = None,
        resource_dict: Optional[dict] = None,
        flux_executor=None,
        flux_executor_pmi_mode: Optional[str] = None,
        flux_executor_nesting: bool = False,
        flux_log_files: bool = False,
        pysqa_config_directory: Optional[str] = None,
        hostname_localhost: Optional[bool] = None,
        block_allocation: bool = False,
        init_function: Optional[callable] = None,
        disable_dependencies: bool = False,
        refresh_rate: float = 0.01,
        plot_dependency_graph: bool = False,
    ):
        # Use __new__() instead of __init__(). This function is only implemented to enable auto-completion.
        pass

    def __new__(
        cls,
        max_workers: Optional[int] = None,
        backend: str = "local",
        cache_directory: Optional[str] = None,
        max_cores: Optional[int] = None,
        resource_dict: Optional[dict] = None,
        flux_executor=None,
        flux_executor_pmi_mode: Optional[str] = None,
        flux_executor_nesting: bool = False,
        flux_log_files: bool = False,
        pysqa_config_directory: Optional[str] = None,
        hostname_localhost: Optional[bool] = None,
        block_allocation: bool = False,
        init_function: Optional[callable] = None,
        disable_dependencies: bool = False,
        refresh_rate: float = 0.01,
        plot_dependency_graph: bool = False,
    ):
        """
        Instead of returning a executorlib.Executor object this function returns either a executorlib.mpi.PyMPIExecutor,
        executorlib.slurm.PySlurmExecutor or executorlib.flux.PyFluxExecutor depending on which backend is available. The
        executorlib.flux.PyFluxExecutor is the preferred choice while the executorlib.mpi.PyMPIExecutor is primarily used
        for development and testing. The executorlib.flux.PyFluxExecutor requires flux-core from the flux-framework to be
        installed and in addition flux-sched to enable GPU scheduling. Finally, the executorlib.slurm.PySlurmExecutor
        requires the SLURM workload manager to be installed on the system.

        Args:
            max_workers (int): for backwards compatibility with the standard library, max_workers also defines the
                               number of cores which can be used in parallel - just like the max_cores parameter. Using
                               max_cores is recommended, as computers have a limited number of compute cores.
            backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
            cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
            max_cores (int): defines the number cores which can be used in parallel
            resource_dict (dict): A dictionary of resources required by the task. With the following keys:
                                  - cores (int): number of MPI cores to be used for each function call
                                  - threads_per_core (int): number of OpenMP threads to be used for each function call
                                  - gpus_per_core (int): number of GPUs per worker - defaults to 0
                                  - cwd (str/None): current working directory where the parallel python task is executed
                                  - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI
                                                                  and SLURM only) - default False
                                  - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
                                                           only)
            flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
            flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
            flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
            pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
            hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                      context of an HPC cluster this essential to be able to communicate to an
                                      Executor running on a different compute node within the same allocation. And
                                      in principle any computer should be able to resolve that their own hostname
                                      points to the same address as localhost. Still MacOS >= 12 seems to disable
                                      this look up for security reasons. So on MacOS it is required to set this
                                      option to true
            block_allocation (boolean): To accelerate the submission of a series of python functions with the same
                                        resource requirements, executorlib supports block allocation. In this case all
                                        resources have to be defined on the executor, rather than during the submission
                                        of the individual function.
            init_function (None): optional function to preset arguments for functions which are submitted later
            disable_dependencies (boolean): Disable resolving future objects during the submission.
            refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
            plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
                                          debugging purposes and to get an overview of the specified dependencies.

        """
        default_resource_dict = {
            "cores": 1,
            "threads_per_core": 1,
            "gpus_per_core": 0,
            "cwd": None,
            "openmpi_oversubscribe": False,
            "slurm_cmd_args": [],
        }
        if resource_dict is None:
            resource_dict = {}
        resource_dict.update(
            {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
        )
        if "_submission" in backend and not plot_dependency_graph:
            from executorlib.cache.executor import create_file_executor

            return create_file_executor(
                max_workers=max_workers,
                backend=backend,
                max_cores=max_cores,
                cache_directory=cache_directory,
                resource_dict=resource_dict,
                flux_executor=flux_executor,
                flux_executor_pmi_mode=flux_executor_pmi_mode,
                flux_executor_nesting=flux_executor_nesting,
                flux_log_files=flux_log_files,
                pysqa_config_directory=pysqa_config_directory,
                hostname_localhost=hostname_localhost,
                block_allocation=block_allocation,
                init_function=init_function,
                disable_dependencies=disable_dependencies,
            )
        elif not disable_dependencies:
            _check_pysqa_config_directory(pysqa_config_directory=pysqa_config_directory)
            return _ExecutorWithDependencies(
                max_workers=max_workers,
                backend=backend,
                cache_directory=cache_directory,
                max_cores=max_cores,
                resource_dict=resource_dict,
                flux_executor=flux_executor,
                flux_executor_pmi_mode=flux_executor_pmi_mode,
                flux_executor_nesting=flux_executor_nesting,
                flux_log_files=flux_log_files,
                hostname_localhost=hostname_localhost,
                block_allocation=block_allocation,
                init_function=init_function,
                refresh_rate=refresh_rate,
                plot_dependency_graph=plot_dependency_graph,
            )
        else:
            _check_pysqa_config_directory(pysqa_config_directory=pysqa_config_directory)
            _check_plot_dependency_graph(plot_dependency_graph=plot_dependency_graph)
            _check_refresh_rate(refresh_rate=refresh_rate)
            return _create_executor(
                max_workers=max_workers,
                backend=backend,
                cache_directory=cache_directory,
                max_cores=max_cores,
                resource_dict=resource_dict,
                flux_executor=flux_executor,
                flux_executor_pmi_mode=flux_executor_pmi_mode,
                flux_executor_nesting=flux_executor_nesting,
                flux_log_files=flux_log_files,
                hostname_localhost=hostname_localhost,
                block_allocation=block_allocation,
                init_function=init_function,
            )

1	from typing import Optional	1✔
2
3	from executorlib._version import get_versions as _get_versions	1✔
4	from executorlib.interactive.executor import (	1✔
5	ExecutorWithDependencies as _ExecutorWithDependencies,
6	)
7	from executorlib.interactive.executor import create_executor as _create_executor	1✔
8	from executorlib.standalone.inputcheck import (	1✔
9	check_plot_dependency_graph as _check_plot_dependency_graph,
10	)
11	from executorlib.standalone.inputcheck import (	1✔
12	check_pysqa_config_directory as _check_pysqa_config_directory,
13	)
14	from executorlib.standalone.inputcheck import (	1✔
15	check_refresh_rate as _check_refresh_rate,
16	)
17
18	__version__ = _get_versions()["version"]	1✔
19	__all__ = []	1✔
20
21
22	class Executor:	1✔
23	"""
24	The executorlib.Executor leverages either the message passing interface (MPI), the SLURM workload manager or
25	preferable the flux framework for distributing python functions within a given resource allocation. In contrast to
26	the mpi4py.futures.MPIPoolExecutor the executorlib.Executor can be executed in a serial python process and does not
27	require the python script to be executed with MPI. It is even possible to execute the executorlib.Executor directly
28	in an interactive Jupyter notebook.
29
30	Args:
31	max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
32	cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
33	recommended, as computers have a limited number of compute cores.
34	backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
35	cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
36	max_cores (int): defines the number cores which can be used in parallel
37	resource_dict (dict): A dictionary of resources required by the task. With the following keys:
38	- cores_per_worker (int): number of MPI cores to be used for each function call
39	- threads_per_core (int): number of OpenMP threads to be used for each function call
40	- gpus_per_worker (int): number of GPUs per worker - defaults to 0
41	- cwd (str/None): current working directory where the parallel python task is executed
42	- openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
43	SLURM only) - default False
44	- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
45	flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
46	flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
47	flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
48	pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
49	hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
50	context of an HPC cluster this essential to be able to communicate to an
51	Executor running on a different compute node within the same allocation. And
52	in principle any computer should be able to resolve that their own hostname
53	points to the same address as localhost. Still MacOS >= 12 seems to disable
54	this look up for security reasons. So on MacOS it is required to set this
55	option to true
56	block_allocation (boolean): To accelerate the submission of a series of python functions with the same resource
57	requirements, executorlib supports block allocation. In this case all resources have
58	to be defined on the executor, rather than during the submission of the individual
59	function.
60	init_function (None): optional function to preset arguments for functions which are submitted later
61	disable_dependencies (boolean): Disable resolving future objects during the submission.
62	refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
63	plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
64	debugging purposes and to get an overview of the specified dependencies.
65
66	Examples:
67	```
68	>>> import numpy as np
69	>>> from executorlib import Executor
70	>>>
71	>>> def calc(i, j, k):
72	>>> from mpi4py import MPI
73	>>> size = MPI.COMM_WORLD.Get_size()
74	>>> rank = MPI.COMM_WORLD.Get_rank()
75	>>> return np.array([i, j, k]), size, rank
76	>>>
77	>>> def init_k():
78	>>> return {"k": 3}
79	>>>
80	>>> with Executor(cores=2, init_function=init_k) as p:
81	>>> fs = p.submit(calc, 2, j=4)
82	>>> print(fs.result())
83	[(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)]
84	```
85	"""
86
87	def __init__(	1✔
88	self,
89	max_workers: Optional[int] = None,
90	backend: str = "local",
91	cache_directory: Optional[str] = None,
92	max_cores: Optional[int] = None,
93	resource_dict: Optional[dict] = None,
94	flux_executor=None,
95	flux_executor_pmi_mode: Optional[str] = None,
96	flux_executor_nesting: bool = False,
97	flux_log_files: bool = False,
98	pysqa_config_directory: Optional[str] = None,
99	hostname_localhost: Optional[bool] = None,
100	block_allocation: bool = False,
101	init_function: Optional[callable] = None,
102	disable_dependencies: bool = False,
103	refresh_rate: float = 0.01,
104	plot_dependency_graph: bool = False,
105	):
106	# Use __new__() instead of __init__(). This function is only implemented to enable auto-completion.
UNCOV 107	pass	×
108
109	def __new__(	1✔
110	cls,
111	max_workers: Optional[int] = None,
112	backend: str = "local",
113	cache_directory: Optional[str] = None,
114	max_cores: Optional[int] = None,
115	resource_dict: Optional[dict] = None,
116	flux_executor=None,
117	flux_executor_pmi_mode: Optional[str] = None,
118	flux_executor_nesting: bool = False,
119	flux_log_files: bool = False,
120	pysqa_config_directory: Optional[str] = None,
121	hostname_localhost: Optional[bool] = None,
122	block_allocation: bool = False,
123	init_function: Optional[callable] = None,
124	disable_dependencies: bool = False,
125	refresh_rate: float = 0.01,
126	plot_dependency_graph: bool = False,
127	):
128	"""
129	Instead of returning a executorlib.Executor object this function returns either a executorlib.mpi.PyMPIExecutor,
130	executorlib.slurm.PySlurmExecutor or executorlib.flux.PyFluxExecutor depending on which backend is available. The
131	executorlib.flux.PyFluxExecutor is the preferred choice while the executorlib.mpi.PyMPIExecutor is primarily used
132	for development and testing. The executorlib.flux.PyFluxExecutor requires flux-core from the flux-framework to be
133	installed and in addition flux-sched to enable GPU scheduling. Finally, the executorlib.slurm.PySlurmExecutor
134	requires the SLURM workload manager to be installed on the system.
135
136	Args:
137	max_workers (int): for backwards compatibility with the standard library, max_workers also defines the
138	number of cores which can be used in parallel - just like the max_cores parameter. Using
139	max_cores is recommended, as computers have a limited number of compute cores.
140	backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
141	cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
142	max_cores (int): defines the number cores which can be used in parallel
143	resource_dict (dict): A dictionary of resources required by the task. With the following keys:
144	- cores (int): number of MPI cores to be used for each function call
145	- threads_per_core (int): number of OpenMP threads to be used for each function call
146	- gpus_per_core (int): number of GPUs per worker - defaults to 0
147	- cwd (str/None): current working directory where the parallel python task is executed
148	- openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI
149	and SLURM only) - default False
150	- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
151	only)
152	flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
153	flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
154	flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
155	pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
156	hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
157	context of an HPC cluster this essential to be able to communicate to an
158	Executor running on a different compute node within the same allocation. And
159	in principle any computer should be able to resolve that their own hostname
160	points to the same address as localhost. Still MacOS >= 12 seems to disable
161	this look up for security reasons. So on MacOS it is required to set this
162	option to true
163	block_allocation (boolean): To accelerate the submission of a series of python functions with the same
164	resource requirements, executorlib supports block allocation. In this case all
165	resources have to be defined on the executor, rather than during the submission
166	of the individual function.
167	init_function (None): optional function to preset arguments for functions which are submitted later
168	disable_dependencies (boolean): Disable resolving future objects during the submission.
169	refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
170	plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
171	debugging purposes and to get an overview of the specified dependencies.
172
173	"""
174	default_resource_dict = {	1✔
175	"cores": 1,
176	"threads_per_core": 1,
177	"gpus_per_core": 0,
178	"cwd": None,
179	"openmpi_oversubscribe": False,
180	"slurm_cmd_args": [],
181	}
182	if resource_dict is None:	1✔
183	resource_dict = {}	1✔
184	resource_dict.update(	1✔
185	{k: v for k, v in default_resource_dict.items() if k not in resource_dict}
186	)
187	if "_submission" in backend and not plot_dependency_graph:	1✔
188	from executorlib.cache.executor import create_file_executor	1✔
189
190	return create_file_executor(	1✔
191	max_workers=max_workers,
192	backend=backend,
193	max_cores=max_cores,
194	cache_directory=cache_directory,
195	resource_dict=resource_dict,
196	flux_executor=flux_executor,
197	flux_executor_pmi_mode=flux_executor_pmi_mode,
198	flux_executor_nesting=flux_executor_nesting,
199	flux_log_files=flux_log_files,
200	pysqa_config_directory=pysqa_config_directory,
201	hostname_localhost=hostname_localhost,
202	block_allocation=block_allocation,
203	init_function=init_function,
204	disable_dependencies=disable_dependencies,
205	)
206	elif not disable_dependencies:	1✔
207	_check_pysqa_config_directory(pysqa_config_directory=pysqa_config_directory)	1✔
208	return _ExecutorWithDependencies(	1✔
209	max_workers=max_workers,
210	backend=backend,
211	cache_directory=cache_directory,
212	max_cores=max_cores,
213	resource_dict=resource_dict,
214	flux_executor=flux_executor,
215	flux_executor_pmi_mode=flux_executor_pmi_mode,
216	flux_executor_nesting=flux_executor_nesting,
217	flux_log_files=flux_log_files,
218	hostname_localhost=hostname_localhost,
219	block_allocation=block_allocation,
220	init_function=init_function,
221	refresh_rate=refresh_rate,
222	plot_dependency_graph=plot_dependency_graph,
223	)
224	else:
225	_check_pysqa_config_directory(pysqa_config_directory=pysqa_config_directory)	1✔
226	_check_plot_dependency_graph(plot_dependency_graph=plot_dependency_graph)	1✔
227	_check_refresh_rate(refresh_rate=refresh_rate)	1✔
228	return _create_executor(	1✔
229	max_workers=max_workers,
230	backend=backend,
231	cache_directory=cache_directory,
232	max_cores=max_cores,
233	resource_dict=resource_dict,
234	flux_executor=flux_executor,
235	flux_executor_pmi_mode=flux_executor_pmi_mode,
236	flux_executor_nesting=flux_executor_nesting,
237	flux_log_files=flux_log_files,
238	hostname_localhost=hostname_localhost,
239	block_allocation=block_allocation,
240	init_function=init_function,
241	)

pyiron / executorlib / 12308492040

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous