12230940701

Committed 09 Dec 2024 07:30AM UTC coverage: 69.278%. First build

Build # 12230940701

Build Type

Pull #4

github

Committed by

web-flow

Commit Message

Merge c95cf46b9 into fc05764ac

Pull Request Pull Request #4: Integrate Test App into Actions

Run Details

56 of 129 new or added lines in 22 files covered. (43.41%)

1353 of 1953 relevant lines covered (69.28%)

0.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

18.86

/src/holoscan_cli/common/dockerutils.py

# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import logging
import os
import posixpath
import re
import subprocess
from pathlib import Path
from typing import Optional

from python_on_whales import docker

from ..common.utils import run_cmd_output
from .constants import DefaultValues, EnvironmentVariables
from .enum_types import PlatformConfiguration, SdkType
from .exceptions import GpuResourceError, InvalidManifestError, RunContainerError
from .utils import get_gpu_count, get_requested_gpus

logger = logging.getLogger("common")


def parse_docker_image_name_and_tag(
    image_name: str,
) -> tuple[Optional[str], Optional[str]]:
    """Parse a given Docker image name and tag.

    Args:
        image_name (str): Docker image name and optionally a tag

    Returns:
        Tuple[Optional[str], Optional[str]]: a tuple with first item as the name of the image
        and tag as the second item
    """
    match = re.search(
        r"^(?P<name>([\w.\-_]+((:\d+|)(?=/[a-z0-9._-]+/[a-z0-9._-]+))|)(/?)([a-z0-9.\-_/]+(/[a-z0-9.\-_]+|)))(:(?P<tag>[\w.\-_]{1,127})|)$",
        image_name,
    )

    if match is None or match.group("name") is None:
        return None, None

    name = match.group("name")
    tag = match.group("tag") if match.group("tag") else None

    return (name, tag)


def create_or_use_network(network: Optional[str], image_name: Optional[str]) -> str:
    """Create a Docker network by the given name if not already exists.

    Args:
        network (Optional[str]): name of the network to create
        image_name (Optional[str]): name of the image used to generate a network name from

    Raises:
        RunContainerError: when unable to retrieve the specified network or failed to create one.

    Returns:
        str: network name
    """
    if network is None and image_name is not None:
        network = image_name.split(":")[0]
        network += "-network"

    assert network is not None

    try:
        networks = docker.network.list(filters={"name": f"^{network}$"})
        if len(networks) > 0:
            return networks[0].name
    except Exception as ex:
        raise RunContainerError(f"error retrieving network information: {ex}") from ex

    try:
        return docker.network.create(network, driver="bridge").name
    except Exception as ex:
        raise RunContainerError(f"error creating Docker network: {ex}") from ex


def image_exists(image_name: str) -> bool:
    """Checks if the Docker image exists.

    Args:
        image_name (str): name of the Docker image

    Returns:
        bool: whether the image exists or not.
    """
    if image_name is None:
        return False
    try:
        if not docker.image.exists(image_name):
            logger.info(f"Attempting to pull image {image_name}..")
            docker.image.pull(image_name)
        return docker.image.exists(image_name)
    except Exception as e:
        logger.error(str(e))
        return False


def docker_export_tarball(file: str, tag: str):
    """Exports the docker image to a file

    Args:
        file (str): name of the exported file
        tag (str): source Docker image tag
    """
    docker.image.save(tag, file)


def create_and_get_builder(builder_name: str):
    """Creates a Docker BuildX builder

    Args:
        builder_name (str): name of the builder to create

    Returns:
        _type_: name of the builder created
    """
    builders = docker.buildx.list()
    for builder in builders:
        if builder.name == builder_name:
            logger.info(f"Using existing Docker BuildKit builder `{builder_name}`")
            return builder_name

    logger.info(
        f"Creating Docker BuildKit builder `{builder_name}` using `docker-container`"
    )
    builder = docker.buildx.create(
        name=builder_name, driver="docker-container", driver_options={"network": "host"}
    )
    return builder.name


def build_docker_image(**kwargs):
    """Builds a Docker image"""
    _ = docker.buildx.build(**kwargs)


def docker_run(
    name: str,
    image_name: str,
    input_path: Optional[Path],
    output_path: Optional[Path],
    app_info: dict,
    pkg_info: dict,
    quiet: bool,
    commands: list[str],
    health_check: bool,
    network: str,
    network_interface: Optional[str],
    use_all_nics: bool,
    gpu_enum: Optional[str],
    config: Optional[Path],
    render: bool,
    user: str,
    terminal: bool,
    devices: list[str],
    platform_config: str,
    shared_memory_size: str = "1GB",
    is_root: bool = False,
):
    """Creates and runs a Docker container

    `HOLOSCAN_HOSTING_SERVICE` environment variable is used for hiding the help message
    inside the tools.sh when the users run the container using holoscan run.

    Args:
        image_name (str): Docker image name
        input_path (Optional[Path]): input data path
        output_path (Optional[Path]): output data path
        app_info (dict): app manifest
        pkg_info (dict): package manifest
        quiet (bool): prints only stderr when True, otherwise, prints all logs
        commands (List[str]): list of arguments to provide to the container
        health_check (bool): whether or not to enable the gRPC health check service
        network (str): Docker network to associate the container with
        network_interface (Optional[str]): Name of the network interface for setting
            UCX_NET_DEVICES
        use_all_nics (bool): Sets UCX_CM_USE_ALL_DEVICES to 'y' if True
        config (Optional[Path]): optional configuration file for overriding the embedded one
        render (bool): whether or not to enable graphic rendering
        user (str): UID and GID to associate with the container
        terminal (bool): whether or not to enter bash terminal
        devices (List[str]): list of devices to be mapped into the container
        platformConfig (str): platform configuration value used when packaging the application,
        shared_memory_size (str): size of /dev/shm,
        is_root (bool): whether the user is root (UID = 0) or not
    """

    volumes = []
    environment_variables = {
        "NVIDIA_DRIVER_CAPABILITIES": "all",
        "HOLOSCAN_HOSTING_SERVICE": "HOLOSCAN_RUN",
        "UCX_CM_USE_ALL_DEVICES": "y" if use_all_nics else "n",
    }

    if network_interface is not None:
        environment_variables["UCX_NET_DEVICES"] = network_interface

    if health_check:
        environment_variables["HOLOSCAN_ENABLE_HEALTH_CHECK"] = "true"

    if logger.root.level == logging.DEBUG:
        environment_variables["UCX_LOG_LEVEL"] = "DEBUG"
        environment_variables["VK_LOADER_DEBUG"] = "all"

    if render:
        volumes.append(("/tmp/.X11-unix", "/tmp/.X11-unix"))
        display = os.environ.get("DISPLAY", None)
        if display is not None:
            environment_variables["DISPLAY"] = display
        xdg_session_type = os.environ.get("XDG_SESSION_TYPE", None)
        if xdg_session_type is not None:
            environment_variables["XDG_SESSION_TYPE"] = xdg_session_type
        xdg_runtime_dir = os.environ.get("XDG_RUNTIME_DIR", None)
        if xdg_runtime_dir is not None:
            volumes.append((xdg_runtime_dir, xdg_runtime_dir))
            environment_variables["XDG_RUNTIME_DIR"] = xdg_runtime_dir
        wayland_display = os.environ.get("WAYLAND_DISPLAY", None)
        if wayland_display is not None:
            environment_variables["WAYLAND_DISPLAY"] = wayland_display

    # Use user-specified --gpu values
    if gpu_enum is not None:
        environment_variables["NVIDIA_VISIBLE_DEVICES"] = gpu_enum
    # If the image was built for iGPU but the system is configured for dGPU, attempt
    # targeting the system's iGPU using the CDI spec
    elif (
        platform_config == PlatformConfiguration.iGPU.value
        and not _host_is_native_igpu()
    ):
        environment_variables["NVIDIA_VISIBLE_DEVICES"] = "nvidia.com/igpu=0"
        logger.info(
            "Attempting to run an image for iGPU (integrated GPU) on a system configured "
            "with a dGPU (discrete GPU). If this is correct (ex: IGX Orin developer kit), "
            "make sure to enable iGPU on dGPU support as described in your developer kit "
            "user guide. If not, either rebuild the image for dGPU or run this image on a "
            "system configured for iGPU only (ex: Jetson AGX, Nano...)."
        )
    # Otherwise, read specs from package manifest
    else:
        requested_gpus = get_requested_gpus(pkg_info)
        available_gpus = get_gpu_count()

        if available_gpus < requested_gpus:
            raise GpuResourceError(
                f"Available GPUs ({available_gpus}) are less than required ({requested_gpus}). "
            )

        if requested_gpus == 0:
            environment_variables["NVIDIA_VISIBLE_DEVICES"] = "all"
        else:
            environment_variables["NVIDIA_VISIBLE_DEVICES"] = ",".join(
                map(str, range(0, requested_gpus))
            )

    if "path" in app_info["input"]:
        mapped_input = Path(app_info["input"]["path"]).as_posix()
    else:
        mapped_input = DefaultValues.INPUT_DIR

    if not posixpath.isabs(mapped_input):
        mapped_input = posixpath.join(app_info["workingDirectory"], mapped_input)
    if input_path is not None:
        volumes.append((str(input_path), mapped_input))

    if "path" in app_info["output"]:
        mapped_output = Path(app_info["output"]["path"]).as_posix()
    else:
        mapped_output = DefaultValues.INPUT_DIR

    if not posixpath.isabs(mapped_output):
        mapped_output = posixpath.join(app_info["workingDirectory"], mapped_output)
    if output_path is not None:
        volumes.append((str(output_path), mapped_output))

    for env in app_info["environment"]:
        if env == EnvironmentVariables.HOLOSCAN_INPUT_PATH:
            environment_variables[env] = mapped_input
        elif env == EnvironmentVariables.HOLOSCAN_OUTPUT_PATH:
            environment_variables[env] = mapped_output
        else:
            environment_variables[env] = app_info["environment"][env]

        # always pass path to config file for Holoscan apps
        if (
            "sdk" in app_info
            and app_info["sdk"] == SdkType.Holoscan.value
            and env == EnvironmentVariables.HOLOSCAN_CONFIG_PATH
        ):
            commands.append("--config")
            commands.append(environment_variables[env])

    if config is not None:
        if EnvironmentVariables.HOLOSCAN_CONFIG_PATH not in app_info["environment"]:
            raise InvalidManifestError(
                "The application manifest does not contain a required "
                f"environment variable: '{EnvironmentVariables.HOLOSCAN_CONFIG_PATH}'"
            )
        volumes.append(
            (
                str(config),
                app_info["environment"][EnvironmentVariables.HOLOSCAN_CONFIG_PATH],
            )
        )
        logger.info(f"Using user provided configuration file: {config}")

    logger.debug(
        f"Environment variables: {json.dumps(environment_variables, indent=4, sort_keys=True)}"
    )
    logger.debug(f"Volumes: {json.dumps(volumes, indent=4, sort_keys=True)}")
    logger.debug(f"Shared memory size: {shared_memory_size}")

    ipc_mode = "host" if shared_memory_size is None else None
    ulimits = [
        "memlock=-1",
        "stack=67108864",
    ]
    additional_devices, group_adds = _additional_devices_to_mount(is_root)
    devices.extend(additional_devices)

    video_group = run_cmd_output(
        '/usr/bin/cat /etc/group | grep "video" | cut -d: -f3'
    ).strip()
    if not is_root and video_group not in group_adds:
        group_adds.append(video_group)

    if terminal:
        _enter_terminal(
            name,
            image_name,
            app_info,
            network,
            user,
            volumes,
            environment_variables,
            shared_memory_size,
            ipc_mode,
            ulimits,
            devices,
            group_adds,
        )
    else:
        _start_container(
            name,
            image_name,
            app_info,
            quiet,
            commands,
            network,
            user,
            volumes,
            environment_variables,
            shared_memory_size,
            ipc_mode,
            ulimits,
            devices,
            group_adds,
        )


def _start_container(
    name,
    image_name,
    app_info,
    quiet,
    commands,
    network,
    user,
    volumes,
    environment_variables,
    shared_memory_size,
    ipc_mode,
    ulimits,
    devices,
    group_adds,
):
    container = docker.container.create(
        image_name,
        command=commands,
        envs=environment_variables,
        hostname=name,
        name=name,
        networks=[network],
        remove=True,
        shm_size=shared_memory_size,
        user=user,
        volumes=volumes,
        workdir=app_info["workingDirectory"],
        ipc=ipc_mode,
        cap_add=["CAP_SYS_PTRACE"],
        ulimit=ulimits,
        devices=devices,
        groups_add=group_adds,
        runtime="nvidia",
    )
    container_name = container.name
    container_id = container.id[:12]

    ulimit_str = ", ".join(
        f"{ulimit.name}={ulimit.soft}:{ulimit.hard}"
        for ulimit in container.host_config.ulimits
    )
    logger.info(
        f"Launching container ({container_id}) using image '{image_name}'..."
        f"\n    container name:      {container_name}"
        f"\n    host name:           {container.config.hostname}"
        f"\n    network:             {network}"
        f"\n    user:                {container.config.user}"
        f"\n    ulimits:             {ulimit_str}"
        f"\n    cap_add:             {', '.join(container.host_config.cap_add)}"
        f"\n    ipc mode:            {container.host_config.ipc_mode}"
        f"\n    shared memory size:  {container.host_config.shm_size}"
        f"\n    devices:             {', '.join(devices)}"
        f"\n    group_add:           {', '.join(group_adds)}"
    )
    logs = container.start(
        attach=True,
        stream=True,
    )

    for log in logs:
        if log[0] == "stdout":
            if not quiet:
                print(log[1].decode("utf-8"))
        elif log[0] == "stderr":
            try:
                print(str(log[1].decode("utf-8")))
            except Exception:
                print(str(log[1]))

    logger.info(f"Container '{container_name}'({container_id}) exited.")


def _enter_terminal(
    name,
    image_name,
    app_info,
    network,
    user,
    volumes,
    environment_variables,
    shared_memory_size,
    ipc_mode,
    ulimits,
    devices,
    group_adds,
):
    print("\n\nEntering terminal...")
    print(
        "\n".join(
            f"\t{k:25s}\t{v}"
            for k, v in sorted(environment_variables.items(), key=lambda t: str(t[0]))
        )
    )
    print("\n\n")
    docker.container.run(
        image_name,
        detach=False,
        entrypoint="/bin/bash",
        envs=environment_variables,
        hostname=name,
        interactive=True,
        name=name,
        networks=[network],
        remove=True,
        shm_size=shared_memory_size,
        tty=True,
        user=user,
        volumes=volumes,
        workdir=app_info["workingDirectory"],
        ipc=ipc_mode,
        cap_add=["CAP_SYS_PTRACE"],
        ulimit=ulimits,
        devices=devices,
        groups_add=group_adds,
        runtime="nvidia",
    )
    logger.info("Container exited.")


def _additional_devices_to_mount(is_root: bool):
    """Mounts additional devices"""
    devices = []
    group_adds = []

    # On iGPU, the /dev/dri/* devices (mounted by the NV container runtime) permissions require root
    # privilege or to be part of the `video` and `render` groups. The ID for these group names might
    # differ on the host system and in the container, so we need to pass the group ID instead of the
    # group name when running docker.
    if (
        os.path.exists("/sys/devices/platform/gpu.0/load")
        and os.path.exists("/usr/bin/tegrastats")
        and not is_root
    ):
        group = run_cmd_output(
            '/usr/bin/cat /etc/group | grep "video" | cut -d: -f3'
        ).strip()
        group_adds.append(group)
        group = run_cmd_output(
            '/usr/bin/cat /etc/group | grep "render" | cut -d: -f3'
        ).strip()
        group_adds.append(group)
    return (devices, group_adds)


def _host_is_native_igpu() -> bool:
    proc = subprocess.run(
        ["nvidia-smi --query-gpu name --format=csv,noheader | grep nvgpu -q"],
        shell=True,
    )
    result = proc.returncode
    return result == 0

1	# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2	# SPDX-License-Identifier: Apache-2.0
3	#
4	# Licensed under the Apache License, Version 2.0 (the "License");
5	# you may not use this file except in compliance with the License.
6	# You may obtain a copy of the License at
7	#
8	# http://www.apache.org/licenses/LICENSE-2.0
9	#
10	# Unless required by applicable law or agreed to in writing, software
11	# distributed under the License is distributed on an "AS IS" BASIS,
12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	# See the License for the specific language governing permissions and
14	# limitations under the License.
15	import json	1✔
16	import logging	1✔
17	import os	1✔
18	import posixpath	1✔
19	import re	1✔
20	import subprocess	1✔
21	from pathlib import Path	1✔
22	from typing import Optional	1✔
23
24	from python_on_whales import docker	1✔
25
26	from ..common.utils import run_cmd_output	1✔
27	from .constants import DefaultValues, EnvironmentVariables	1✔
28	from .enum_types import PlatformConfiguration, SdkType	1✔
29	from .exceptions import GpuResourceError, InvalidManifestError, RunContainerError	1✔
30	from .utils import get_gpu_count, get_requested_gpus	1✔
31
32	logger = logging.getLogger("common")	1✔
33
34
35	def parse_docker_image_name_and_tag(	1✔
36	image_name: str,
37	) -> tuple[Optional[str], Optional[str]]:
38	"""Parse a given Docker image name and tag.
39
40	Args:
41	image_name (str): Docker image name and optionally a tag
42
43	Returns:
44	Tuple[Optional[str], Optional[str]]: a tuple with first item as the name of the image
45	and tag as the second item
46	"""
47	match = re.search(	1✔
48	r"^(?P<name>([\w.\-_]+((:\d+\|)(?=/[a-z0-9._-]+/[a-z0-9._-]+))\|)(/?)([a-z0-9.\-_/]+(/[a-z0-9.\-_]+\|)))(:(?P<tag>[\w.\-_]{1,127})\|)$",
49	image_name,
50	)
51
52	if match is None or match.group("name") is None:	1✔
53	return None, None	1✔
54
55	name = match.group("name")	1✔
56	tag = match.group("tag") if match.group("tag") else None	1✔
57
58	return (name, tag)	1✔
59
60
61	def create_or_use_network(network: Optional[str], image_name: Optional[str]) -> str:	1✔
62	"""Create a Docker network by the given name if not already exists.
63
64	Args:
65	network (Optional[str]): name of the network to create
66	image_name (Optional[str]): name of the image used to generate a network name from
67
68	Raises:
69	RunContainerError: when unable to retrieve the specified network or failed to create one.
70
71	Returns:
72	str: network name
73	"""
74	if network is None and image_name is not None:	×
75	network = image_name.split(":")[0]	×
76	network += "-network"	×
77
78	assert network is not None	×
79
80	try:	×
81	networks = docker.network.list(filters={"name": f"^{network}$"})	×
82	if len(networks) > 0:	×
83	return networks[0].name	×
84	except Exception as ex:	×
85	raise RunContainerError(f"error retrieving network information: {ex}") from ex	×
86
87	try:	×
88	return docker.network.create(network, driver="bridge").name	×
89	except Exception as ex:	×
90	raise RunContainerError(f"error creating Docker network: {ex}") from ex	×
91
92
93	def image_exists(image_name: str) -> bool:	1✔
94	"""Checks if the Docker image exists.
95
96	Args:
97	image_name (str): name of the Docker image
98
99	Returns:
100	bool: whether the image exists or not.
101	"""
102	if image_name is None:	×
103	return False	×
104	try:	×
105	if not docker.image.exists(image_name):	×
106	logger.info(f"Attempting to pull image {image_name}..")	×
107	docker.image.pull(image_name)	×
108	return docker.image.exists(image_name)	×
109	except Exception as e:	×
110	logger.error(str(e))	×
111	return False	×
112
113
114	def docker_export_tarball(file: str, tag: str):	1✔
115	"""Exports the docker image to a file
116
117	Args:
118	file (str): name of the exported file
119	tag (str): source Docker image tag
120	"""
121	docker.image.save(tag, file)	1✔
122
123
124	def create_and_get_builder(builder_name: str):	1✔
125	"""Creates a Docker BuildX builder
126
127	Args:
128	builder_name (str): name of the builder to create
129
130	Returns:
131	_type_: name of the builder created
132	"""
133	builders = docker.buildx.list()	×
134	for builder in builders:	×
135	if builder.name == builder_name:	×
136	logger.info(f"Using existing Docker BuildKit builder `{builder_name}`")	×
137	return builder_name	×
138
NEW 139	logger.info(	×
140	f"Creating Docker BuildKit builder `{builder_name}` using `docker-container`"
141	)
142	builder = docker.buildx.create(	×
143	name=builder_name, driver="docker-container", driver_options={"network": "host"}
144	)
145	return builder.name	×
146
147
148	def build_docker_image(**kwargs):	1✔
149	"""Builds a Docker image"""
150	_ = docker.buildx.build(**kwargs)	×
151
152
153	def docker_run(	1✔
154	name: str,
155	image_name: str,
156	input_path: Optional[Path],
157	output_path: Optional[Path],
158	app_info: dict,
159	pkg_info: dict,
160	quiet: bool,
161	commands: list[str],
162	health_check: bool,
163	network: str,
164	network_interface: Optional[str],
165	use_all_nics: bool,
166	gpu_enum: Optional[str],
167	config: Optional[Path],
168	render: bool,
169	user: str,
170	terminal: bool,
171	devices: list[str],
172	platform_config: str,
173	shared_memory_size: str = "1GB",
174	is_root: bool = False,
175	):
176	"""Creates and runs a Docker container
177
178	`HOLOSCAN_HOSTING_SERVICE` environment variable is used for hiding the help message
179	inside the tools.sh when the users run the container using holoscan run.
180
181	Args:
182	image_name (str): Docker image name
183	input_path (Optional[Path]): input data path
184	output_path (Optional[Path]): output data path
185	app_info (dict): app manifest
186	pkg_info (dict): package manifest
187	quiet (bool): prints only stderr when True, otherwise, prints all logs
188	commands (List[str]): list of arguments to provide to the container
189	health_check (bool): whether or not to enable the gRPC health check service
190	network (str): Docker network to associate the container with
191	network_interface (Optional[str]): Name of the network interface for setting
192	UCX_NET_DEVICES
193	use_all_nics (bool): Sets UCX_CM_USE_ALL_DEVICES to 'y' if True
194	config (Optional[Path]): optional configuration file for overriding the embedded one
195	render (bool): whether or not to enable graphic rendering
196	user (str): UID and GID to associate with the container
197	terminal (bool): whether or not to enter bash terminal
198	devices (List[str]): list of devices to be mapped into the container
199	platformConfig (str): platform configuration value used when packaging the application,
200	shared_memory_size (str): size of /dev/shm,
201	is_root (bool): whether the user is root (UID = 0) or not
202	"""
203
204	volumes = []	×
205	environment_variables = {	×
206	"NVIDIA_DRIVER_CAPABILITIES": "all",
207	"HOLOSCAN_HOSTING_SERVICE": "HOLOSCAN_RUN",
208	"UCX_CM_USE_ALL_DEVICES": "y" if use_all_nics else "n",
209	}
210
211	if network_interface is not None:	×
212	environment_variables["UCX_NET_DEVICES"] = network_interface	×
213
214	if health_check:	×
215	environment_variables["HOLOSCAN_ENABLE_HEALTH_CHECK"] = "true"	×
216
217	if logger.root.level == logging.DEBUG:	×
218	environment_variables["UCX_LOG_LEVEL"] = "DEBUG"	×
219	environment_variables["VK_LOADER_DEBUG"] = "all"	×
220
221	if render:	×
222	volumes.append(("/tmp/.X11-unix", "/tmp/.X11-unix"))	×
223	display = os.environ.get("DISPLAY", None)	×
224	if display is not None:	×
225	environment_variables["DISPLAY"] = display	×
226	xdg_session_type = os.environ.get("XDG_SESSION_TYPE", None)	×
227	if xdg_session_type is not None:	×
228	environment_variables["XDG_SESSION_TYPE"] = xdg_session_type	×
229	xdg_runtime_dir = os.environ.get("XDG_RUNTIME_DIR", None)	×
230	if xdg_runtime_dir is not None:	×
231	volumes.append((xdg_runtime_dir, xdg_runtime_dir))	×
232	environment_variables["XDG_RUNTIME_DIR"] = xdg_runtime_dir	×
233	wayland_display = os.environ.get("WAYLAND_DISPLAY", None)	×
234	if wayland_display is not None:	×
235	environment_variables["WAYLAND_DISPLAY"] = wayland_display	×
236
237	# Use user-specified --gpu values
238	if gpu_enum is not None:	×
239	environment_variables["NVIDIA_VISIBLE_DEVICES"] = gpu_enum	×
240	# If the image was built for iGPU but the system is configured for dGPU, attempt
241	# targeting the system's iGPU using the CDI spec
NEW 242	elif (	×
243	platform_config == PlatformConfiguration.iGPU.value
244	and not _host_is_native_igpu()
245	):
246	environment_variables["NVIDIA_VISIBLE_DEVICES"] = "nvidia.com/igpu=0"	×
247	logger.info(	×
248	"Attempting to run an image for iGPU (integrated GPU) on a system configured "
249	"with a dGPU (discrete GPU). If this is correct (ex: IGX Orin developer kit), "
250	"make sure to enable iGPU on dGPU support as described in your developer kit "
251	"user guide. If not, either rebuild the image for dGPU or run this image on a "
252	"system configured for iGPU only (ex: Jetson AGX, Nano...)."
253	)
254	# Otherwise, read specs from package manifest
255	else:
256	requested_gpus = get_requested_gpus(pkg_info)	×
257	available_gpus = get_gpu_count()	×
258
259	if available_gpus < requested_gpus:	×
260	raise GpuResourceError(	×
261	f"Available GPUs ({available_gpus}) are less than required ({requested_gpus}). "
262	)
263
264	if requested_gpus == 0:	×
265	environment_variables["NVIDIA_VISIBLE_DEVICES"] = "all"	×
266	else:
267	environment_variables["NVIDIA_VISIBLE_DEVICES"] = ",".join(	×
268	map(str, range(0, requested_gpus))
269	)
270
271	if "path" in app_info["input"]:	×
272	mapped_input = Path(app_info["input"]["path"]).as_posix()	×
273	else:
274	mapped_input = DefaultValues.INPUT_DIR	×
275
276	if not posixpath.isabs(mapped_input):	×
277	mapped_input = posixpath.join(app_info["workingDirectory"], mapped_input)	×
278	if input_path is not None:	×
279	volumes.append((str(input_path), mapped_input))	×
280
281	if "path" in app_info["output"]:	×
282	mapped_output = Path(app_info["output"]["path"]).as_posix()	×
283	else:
284	mapped_output = DefaultValues.INPUT_DIR	×
285
286	if not posixpath.isabs(mapped_output):	×
287	mapped_output = posixpath.join(app_info["workingDirectory"], mapped_output)	×
288	if output_path is not None:	×
289	volumes.append((str(output_path), mapped_output))	×
290
291	for env in app_info["environment"]:	×
292	if env == EnvironmentVariables.HOLOSCAN_INPUT_PATH:	×
293	environment_variables[env] = mapped_input	×
294	elif env == EnvironmentVariables.HOLOSCAN_OUTPUT_PATH:	×
295	environment_variables[env] = mapped_output	×
296	else:
297	environment_variables[env] = app_info["environment"][env]	×
298
299	# always pass path to config file for Holoscan apps
300	if (	×
301	"sdk" in app_info
302	and app_info["sdk"] == SdkType.Holoscan.value
303	and env == EnvironmentVariables.HOLOSCAN_CONFIG_PATH
304	):
305	commands.append("--config")	×
306	commands.append(environment_variables[env])	×
307
308	if config is not None:	×
309	if EnvironmentVariables.HOLOSCAN_CONFIG_PATH not in app_info["environment"]:	×
310	raise InvalidManifestError(	×
311	"The application manifest does not contain a required "
312	f"environment variable: '{EnvironmentVariables.HOLOSCAN_CONFIG_PATH}'"
313	)
314	volumes.append(	×
315	(
316	str(config),
317	app_info["environment"][EnvironmentVariables.HOLOSCAN_CONFIG_PATH],
318	)
319	)
320	logger.info(f"Using user provided configuration file: {config}")	×
321
322	logger.debug(	×
323	f"Environment variables: {json.dumps(environment_variables, indent=4, sort_keys=True)}"
324	)
325	logger.debug(f"Volumes: {json.dumps(volumes, indent=4, sort_keys=True)}")	×
326	logger.debug(f"Shared memory size: {shared_memory_size}")	×
327
328	ipc_mode = "host" if shared_memory_size is None else None	×
329	ulimits = [	×
330	"memlock=-1",
331	"stack=67108864",
332	]
333	additional_devices, group_adds = _additional_devices_to_mount(is_root)	×
334	devices.extend(additional_devices)	×
335
NEW 336	video_group = run_cmd_output(	×
337	'/usr/bin/cat /etc/group \| grep "video" \| cut -d: -f3'
338	).strip()
339	if not is_root and video_group not in group_adds:	×
340	group_adds.append(video_group)	×
341
342	if terminal:	×
343	_enter_terminal(	×
344	name,
345	image_name,
346	app_info,
347	network,
348	user,
349	volumes,
350	environment_variables,
351	shared_memory_size,
352	ipc_mode,
353	ulimits,
354	devices,
355	group_adds,
356	)
357	else:
358	_start_container(	×
359	name,
360	image_name,
361	app_info,
362	quiet,
363	commands,
364	network,
365	user,
366	volumes,
367	environment_variables,
368	shared_memory_size,
369	ipc_mode,
370	ulimits,
371	devices,
372	group_adds,
373	)
374
375
376	def _start_container(	1✔
377	name,
378	image_name,
379	app_info,
380	quiet,
381	commands,
382	network,
383	user,
384	volumes,
385	environment_variables,
386	shared_memory_size,
387	ipc_mode,
388	ulimits,
389	devices,
390	group_adds,
391	):
392	container = docker.container.create(	×
393	image_name,
394	command=commands,
395	envs=environment_variables,
396	hostname=name,
397	name=name,
398	networks=[network],
399	remove=True,
400	shm_size=shared_memory_size,
401	user=user,
402	volumes=volumes,
403	workdir=app_info["workingDirectory"],
404	ipc=ipc_mode,
405	cap_add=["CAP_SYS_PTRACE"],
406	ulimit=ulimits,
407	devices=devices,
408	groups_add=group_adds,
409	runtime="nvidia",
410	)
411	container_name = container.name	×
412	container_id = container.id[:12]	×
413
414	ulimit_str = ", ".join(	×
415	f"{ulimit.name}={ulimit.soft}:{ulimit.hard}"
416	for ulimit in container.host_config.ulimits
417	)
418	logger.info(	×
419	f"Launching container ({container_id}) using image '{image_name}'..."
420	f"\n container name: {container_name}"
421	f"\n host name: {container.config.hostname}"
422	f"\n network: {network}"
423	f"\n user: {container.config.user}"
424	f"\n ulimits: {ulimit_str}"
425	f"\n cap_add: {', '.join(container.host_config.cap_add)}"
426	f"\n ipc mode: {container.host_config.ipc_mode}"
427	f"\n shared memory size: {container.host_config.shm_size}"
428	f"\n devices: {', '.join(devices)}"
429	f"\n group_add: {', '.join(group_adds)}"
430	)
431	logs = container.start(	×
432	attach=True,
433	stream=True,
434	)
435
436	for log in logs:	×
437	if log[0] == "stdout":	×
438	if not quiet:	×
439	print(log[1].decode("utf-8"))	×
440	elif log[0] == "stderr":	×
441	try:	×
442	print(str(log[1].decode("utf-8")))	×
443	except Exception:	×
444	print(str(log[1]))	×
445
446	logger.info(f"Container '{container_name}'({container_id}) exited.")	×
447
448
449	def _enter_terminal(	1✔
450	name,
451	image_name,
452	app_info,
453	network,
454	user,
455	volumes,
456	environment_variables,
457	shared_memory_size,
458	ipc_mode,
459	ulimits,
460	devices,
461	group_adds,
462	):
463	print("\n\nEntering terminal...")	×
464	print(	×
465	"\n".join(
466	f"\t{k:25s}\t{v}"
467	for k, v in sorted(environment_variables.items(), key=lambda t: str(t[0]))
468	)
469	)
470	print("\n\n")	×
471	docker.container.run(	×
472	image_name,
473	detach=False,
474	entrypoint="/bin/bash",
475	envs=environment_variables,
476	hostname=name,
477	interactive=True,
478	name=name,
479	networks=[network],
480	remove=True,
481	shm_size=shared_memory_size,
482	tty=True,
483	user=user,
484	volumes=volumes,
485	workdir=app_info["workingDirectory"],
486	ipc=ipc_mode,
487	cap_add=["CAP_SYS_PTRACE"],
488	ulimit=ulimits,
489	devices=devices,
490	groups_add=group_adds,
491	runtime="nvidia",
492	)
493	logger.info("Container exited.")	×
494
495
496	def _additional_devices_to_mount(is_root: bool):	1✔
497	"""Mounts additional devices"""
498	devices = []	×
499	group_adds = []	×
500
501	# On iGPU, the /dev/dri/* devices (mounted by the NV container runtime) permissions require root
502	# privilege or to be part of the `video` and `render` groups. The ID for these group names might
503	# differ on the host system and in the container, so we need to pass the group ID instead of the
504	# group name when running docker.
505	if (	×
506	os.path.exists("/sys/devices/platform/gpu.0/load")
507	and os.path.exists("/usr/bin/tegrastats")
508	and not is_root
509	):
NEW 510	group = run_cmd_output(	×
511	'/usr/bin/cat /etc/group \| grep "video" \| cut -d: -f3'
512	).strip()
513	group_adds.append(group)	×
NEW 514	group = run_cmd_output(	×
515	'/usr/bin/cat /etc/group \| grep "render" \| cut -d: -f3'
516	).strip()
517	group_adds.append(group)	×
518	return (devices, group_adds)	×
519
520
521	def _host_is_native_igpu() -> bool:	1✔
522	proc = subprocess.run(	×
523	["nvidia-smi --query-gpu name --format=csv,noheader \| grep nvgpu -q"],
524	shell=True,
525	)
526	result = proc.returncode	×
527	return result == 0	×

nvidia-holoscan / holoscan-cli / 12230940701

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous