• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nvidia-holoscan / holoscan-cli / 12230940701

09 Dec 2024 07:30AM UTC coverage: 69.278%. First build
12230940701

Pull #4

github

web-flow
Merge c95cf46b9 into fc05764ac
Pull Request #4: Integrate Test App into Actions

56 of 129 new or added lines in 22 files covered. (43.41%)

1353 of 1953 relevant lines covered (69.28%)

0.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

18.86
/src/holoscan_cli/common/dockerutils.py
1
# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
# SPDX-License-Identifier: Apache-2.0
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
import json
1✔
16
import logging
1✔
17
import os
1✔
18
import posixpath
1✔
19
import re
1✔
20
import subprocess
1✔
21
from pathlib import Path
1✔
22
from typing import Optional
1✔
23

24
from python_on_whales import docker
1✔
25

26
from ..common.utils import run_cmd_output
1✔
27
from .constants import DefaultValues, EnvironmentVariables
1✔
28
from .enum_types import PlatformConfiguration, SdkType
1✔
29
from .exceptions import GpuResourceError, InvalidManifestError, RunContainerError
1✔
30
from .utils import get_gpu_count, get_requested_gpus
1✔
31

32
logger = logging.getLogger("common")
1✔
33

34

35
def parse_docker_image_name_and_tag(
1✔
36
    image_name: str,
37
) -> tuple[Optional[str], Optional[str]]:
38
    """Parse a given Docker image name and tag.
39

40
    Args:
41
        image_name (str): Docker image name and optionally a tag
42

43
    Returns:
44
        Tuple[Optional[str], Optional[str]]: a tuple with first item as the name of the image
45
        and tag as the second item
46
    """
47
    match = re.search(
1✔
48
        r"^(?P<name>([\w.\-_]+((:\d+|)(?=/[a-z0-9._-]+/[a-z0-9._-]+))|)(/?)([a-z0-9.\-_/]+(/[a-z0-9.\-_]+|)))(:(?P<tag>[\w.\-_]{1,127})|)$",
49
        image_name,
50
    )
51

52
    if match is None or match.group("name") is None:
1✔
53
        return None, None
1✔
54

55
    name = match.group("name")
1✔
56
    tag = match.group("tag") if match.group("tag") else None
1✔
57

58
    return (name, tag)
1✔
59

60

61
def create_or_use_network(network: Optional[str], image_name: Optional[str]) -> str:
1✔
62
    """Create a Docker network by the given name if not already exists.
63

64
    Args:
65
        network (Optional[str]): name of the network to create
66
        image_name (Optional[str]): name of the image used to generate a network name from
67

68
    Raises:
69
        RunContainerError: when unable to retrieve the specified network or failed to create one.
70

71
    Returns:
72
        str: network name
73
    """
74
    if network is None and image_name is not None:
×
75
        network = image_name.split(":")[0]
×
76
        network += "-network"
×
77

78
    assert network is not None
×
79

80
    try:
×
81
        networks = docker.network.list(filters={"name": f"^{network}$"})
×
82
        if len(networks) > 0:
×
83
            return networks[0].name
×
84
    except Exception as ex:
×
85
        raise RunContainerError(f"error retrieving network information: {ex}") from ex
×
86

87
    try:
×
88
        return docker.network.create(network, driver="bridge").name
×
89
    except Exception as ex:
×
90
        raise RunContainerError(f"error creating Docker network: {ex}") from ex
×
91

92

93
def image_exists(image_name: str) -> bool:
1✔
94
    """Checks if the Docker image exists.
95

96
    Args:
97
        image_name (str): name of the Docker image
98

99
    Returns:
100
        bool: whether the image exists or not.
101
    """
102
    if image_name is None:
×
103
        return False
×
104
    try:
×
105
        if not docker.image.exists(image_name):
×
106
            logger.info(f"Attempting to pull image {image_name}..")
×
107
            docker.image.pull(image_name)
×
108
        return docker.image.exists(image_name)
×
109
    except Exception as e:
×
110
        logger.error(str(e))
×
111
        return False
×
112

113

114
def docker_export_tarball(file: str, tag: str):
1✔
115
    """Exports the docker image to a file
116

117
    Args:
118
        file (str): name of the exported file
119
        tag (str): source Docker image tag
120
    """
121
    docker.image.save(tag, file)
1✔
122

123

124
def create_and_get_builder(builder_name: str):
1✔
125
    """Creates a Docker BuildX builder
126

127
    Args:
128
        builder_name (str): name of the builder to create
129

130
    Returns:
131
        _type_: name of the builder created
132
    """
133
    builders = docker.buildx.list()
×
134
    for builder in builders:
×
135
        if builder.name == builder_name:
×
136
            logger.info(f"Using existing Docker BuildKit builder `{builder_name}`")
×
137
            return builder_name
×
138

NEW
139
    logger.info(
×
140
        f"Creating Docker BuildKit builder `{builder_name}` using `docker-container`"
141
    )
142
    builder = docker.buildx.create(
×
143
        name=builder_name, driver="docker-container", driver_options={"network": "host"}
144
    )
145
    return builder.name
×
146

147

148
def build_docker_image(**kwargs):
1✔
149
    """Builds a Docker image"""
150
    _ = docker.buildx.build(**kwargs)
×
151

152

153
def docker_run(
1✔
154
    name: str,
155
    image_name: str,
156
    input_path: Optional[Path],
157
    output_path: Optional[Path],
158
    app_info: dict,
159
    pkg_info: dict,
160
    quiet: bool,
161
    commands: list[str],
162
    health_check: bool,
163
    network: str,
164
    network_interface: Optional[str],
165
    use_all_nics: bool,
166
    gpu_enum: Optional[str],
167
    config: Optional[Path],
168
    render: bool,
169
    user: str,
170
    terminal: bool,
171
    devices: list[str],
172
    platform_config: str,
173
    shared_memory_size: str = "1GB",
174
    is_root: bool = False,
175
):
176
    """Creates and runs a Docker container
177

178
    `HOLOSCAN_HOSTING_SERVICE` environment variable is used for hiding the help message
179
    inside the tools.sh when the users run the container using holoscan run.
180

181
    Args:
182
        image_name (str): Docker image name
183
        input_path (Optional[Path]): input data path
184
        output_path (Optional[Path]): output data path
185
        app_info (dict): app manifest
186
        pkg_info (dict): package manifest
187
        quiet (bool): prints only stderr when True, otherwise, prints all logs
188
        commands (List[str]): list of arguments to provide to the container
189
        health_check (bool): whether or not to enable the gRPC health check service
190
        network (str): Docker network to associate the container with
191
        network_interface (Optional[str]): Name of the network interface for setting
192
            UCX_NET_DEVICES
193
        use_all_nics (bool): Sets UCX_CM_USE_ALL_DEVICES to 'y' if True
194
        config (Optional[Path]): optional configuration file for overriding the embedded one
195
        render (bool): whether or not to enable graphic rendering
196
        user (str): UID and GID to associate with the container
197
        terminal (bool): whether or not to enter bash terminal
198
        devices (List[str]): list of devices to be mapped into the container
199
        platformConfig (str): platform configuration value used when packaging the application,
200
        shared_memory_size (str): size of /dev/shm,
201
        is_root (bool): whether the user is root (UID = 0) or not
202
    """
203

204
    volumes = []
×
205
    environment_variables = {
×
206
        "NVIDIA_DRIVER_CAPABILITIES": "all",
207
        "HOLOSCAN_HOSTING_SERVICE": "HOLOSCAN_RUN",
208
        "UCX_CM_USE_ALL_DEVICES": "y" if use_all_nics else "n",
209
    }
210

211
    if network_interface is not None:
×
212
        environment_variables["UCX_NET_DEVICES"] = network_interface
×
213

214
    if health_check:
×
215
        environment_variables["HOLOSCAN_ENABLE_HEALTH_CHECK"] = "true"
×
216

217
    if logger.root.level == logging.DEBUG:
×
218
        environment_variables["UCX_LOG_LEVEL"] = "DEBUG"
×
219
        environment_variables["VK_LOADER_DEBUG"] = "all"
×
220

221
    if render:
×
222
        volumes.append(("/tmp/.X11-unix", "/tmp/.X11-unix"))
×
223
        display = os.environ.get("DISPLAY", None)
×
224
        if display is not None:
×
225
            environment_variables["DISPLAY"] = display
×
226
        xdg_session_type = os.environ.get("XDG_SESSION_TYPE", None)
×
227
        if xdg_session_type is not None:
×
228
            environment_variables["XDG_SESSION_TYPE"] = xdg_session_type
×
229
        xdg_runtime_dir = os.environ.get("XDG_RUNTIME_DIR", None)
×
230
        if xdg_runtime_dir is not None:
×
231
            volumes.append((xdg_runtime_dir, xdg_runtime_dir))
×
232
            environment_variables["XDG_RUNTIME_DIR"] = xdg_runtime_dir
×
233
        wayland_display = os.environ.get("WAYLAND_DISPLAY", None)
×
234
        if wayland_display is not None:
×
235
            environment_variables["WAYLAND_DISPLAY"] = wayland_display
×
236

237
    # Use user-specified --gpu values
238
    if gpu_enum is not None:
×
239
        environment_variables["NVIDIA_VISIBLE_DEVICES"] = gpu_enum
×
240
    # If the image was built for iGPU but the system is configured for dGPU, attempt
241
    # targeting the system's iGPU using the CDI spec
NEW
242
    elif (
×
243
        platform_config == PlatformConfiguration.iGPU.value
244
        and not _host_is_native_igpu()
245
    ):
246
        environment_variables["NVIDIA_VISIBLE_DEVICES"] = "nvidia.com/igpu=0"
×
247
        logger.info(
×
248
            "Attempting to run an image for iGPU (integrated GPU) on a system configured "
249
            "with a dGPU (discrete GPU). If this is correct (ex: IGX Orin developer kit), "
250
            "make sure to enable iGPU on dGPU support as described in your developer kit "
251
            "user guide. If not, either rebuild the image for dGPU or run this image on a "
252
            "system configured for iGPU only (ex: Jetson AGX, Nano...)."
253
        )
254
    # Otherwise, read specs from package manifest
255
    else:
256
        requested_gpus = get_requested_gpus(pkg_info)
×
257
        available_gpus = get_gpu_count()
×
258

259
        if available_gpus < requested_gpus:
×
260
            raise GpuResourceError(
×
261
                f"Available GPUs ({available_gpus}) are less than required ({requested_gpus}). "
262
            )
263

264
        if requested_gpus == 0:
×
265
            environment_variables["NVIDIA_VISIBLE_DEVICES"] = "all"
×
266
        else:
267
            environment_variables["NVIDIA_VISIBLE_DEVICES"] = ",".join(
×
268
                map(str, range(0, requested_gpus))
269
            )
270

271
    if "path" in app_info["input"]:
×
272
        mapped_input = Path(app_info["input"]["path"]).as_posix()
×
273
    else:
274
        mapped_input = DefaultValues.INPUT_DIR
×
275

276
    if not posixpath.isabs(mapped_input):
×
277
        mapped_input = posixpath.join(app_info["workingDirectory"], mapped_input)
×
278
    if input_path is not None:
×
279
        volumes.append((str(input_path), mapped_input))
×
280

281
    if "path" in app_info["output"]:
×
282
        mapped_output = Path(app_info["output"]["path"]).as_posix()
×
283
    else:
284
        mapped_output = DefaultValues.INPUT_DIR
×
285

286
    if not posixpath.isabs(mapped_output):
×
287
        mapped_output = posixpath.join(app_info["workingDirectory"], mapped_output)
×
288
    if output_path is not None:
×
289
        volumes.append((str(output_path), mapped_output))
×
290

291
    for env in app_info["environment"]:
×
292
        if env == EnvironmentVariables.HOLOSCAN_INPUT_PATH:
×
293
            environment_variables[env] = mapped_input
×
294
        elif env == EnvironmentVariables.HOLOSCAN_OUTPUT_PATH:
×
295
            environment_variables[env] = mapped_output
×
296
        else:
297
            environment_variables[env] = app_info["environment"][env]
×
298

299
        # always pass path to config file for Holoscan apps
300
        if (
×
301
            "sdk" in app_info
302
            and app_info["sdk"] == SdkType.Holoscan.value
303
            and env == EnvironmentVariables.HOLOSCAN_CONFIG_PATH
304
        ):
305
            commands.append("--config")
×
306
            commands.append(environment_variables[env])
×
307

308
    if config is not None:
×
309
        if EnvironmentVariables.HOLOSCAN_CONFIG_PATH not in app_info["environment"]:
×
310
            raise InvalidManifestError(
×
311
                "The application manifest does not contain a required "
312
                f"environment variable: '{EnvironmentVariables.HOLOSCAN_CONFIG_PATH}'"
313
            )
314
        volumes.append(
×
315
            (
316
                str(config),
317
                app_info["environment"][EnvironmentVariables.HOLOSCAN_CONFIG_PATH],
318
            )
319
        )
320
        logger.info(f"Using user provided configuration file: {config}")
×
321

322
    logger.debug(
×
323
        f"Environment variables: {json.dumps(environment_variables, indent=4, sort_keys=True)}"
324
    )
325
    logger.debug(f"Volumes: {json.dumps(volumes, indent=4, sort_keys=True)}")
×
326
    logger.debug(f"Shared memory size: {shared_memory_size}")
×
327

328
    ipc_mode = "host" if shared_memory_size is None else None
×
329
    ulimits = [
×
330
        "memlock=-1",
331
        "stack=67108864",
332
    ]
333
    additional_devices, group_adds = _additional_devices_to_mount(is_root)
×
334
    devices.extend(additional_devices)
×
335

NEW
336
    video_group = run_cmd_output(
×
337
        '/usr/bin/cat /etc/group | grep "video" | cut -d: -f3'
338
    ).strip()
339
    if not is_root and video_group not in group_adds:
×
340
        group_adds.append(video_group)
×
341

342
    if terminal:
×
343
        _enter_terminal(
×
344
            name,
345
            image_name,
346
            app_info,
347
            network,
348
            user,
349
            volumes,
350
            environment_variables,
351
            shared_memory_size,
352
            ipc_mode,
353
            ulimits,
354
            devices,
355
            group_adds,
356
        )
357
    else:
358
        _start_container(
×
359
            name,
360
            image_name,
361
            app_info,
362
            quiet,
363
            commands,
364
            network,
365
            user,
366
            volumes,
367
            environment_variables,
368
            shared_memory_size,
369
            ipc_mode,
370
            ulimits,
371
            devices,
372
            group_adds,
373
        )
374

375

376
def _start_container(
1✔
377
    name,
378
    image_name,
379
    app_info,
380
    quiet,
381
    commands,
382
    network,
383
    user,
384
    volumes,
385
    environment_variables,
386
    shared_memory_size,
387
    ipc_mode,
388
    ulimits,
389
    devices,
390
    group_adds,
391
):
392
    container = docker.container.create(
×
393
        image_name,
394
        command=commands,
395
        envs=environment_variables,
396
        hostname=name,
397
        name=name,
398
        networks=[network],
399
        remove=True,
400
        shm_size=shared_memory_size,
401
        user=user,
402
        volumes=volumes,
403
        workdir=app_info["workingDirectory"],
404
        ipc=ipc_mode,
405
        cap_add=["CAP_SYS_PTRACE"],
406
        ulimit=ulimits,
407
        devices=devices,
408
        groups_add=group_adds,
409
        runtime="nvidia",
410
    )
411
    container_name = container.name
×
412
    container_id = container.id[:12]
×
413

414
    ulimit_str = ", ".join(
×
415
        f"{ulimit.name}={ulimit.soft}:{ulimit.hard}"
416
        for ulimit in container.host_config.ulimits
417
    )
418
    logger.info(
×
419
        f"Launching container ({container_id}) using image '{image_name}'..."
420
        f"\n    container name:      {container_name}"
421
        f"\n    host name:           {container.config.hostname}"
422
        f"\n    network:             {network}"
423
        f"\n    user:                {container.config.user}"
424
        f"\n    ulimits:             {ulimit_str}"
425
        f"\n    cap_add:             {', '.join(container.host_config.cap_add)}"
426
        f"\n    ipc mode:            {container.host_config.ipc_mode}"
427
        f"\n    shared memory size:  {container.host_config.shm_size}"
428
        f"\n    devices:             {', '.join(devices)}"
429
        f"\n    group_add:           {', '.join(group_adds)}"
430
    )
431
    logs = container.start(
×
432
        attach=True,
433
        stream=True,
434
    )
435

436
    for log in logs:
×
437
        if log[0] == "stdout":
×
438
            if not quiet:
×
439
                print(log[1].decode("utf-8"))
×
440
        elif log[0] == "stderr":
×
441
            try:
×
442
                print(str(log[1].decode("utf-8")))
×
443
            except Exception:
×
444
                print(str(log[1]))
×
445

446
    logger.info(f"Container '{container_name}'({container_id}) exited.")
×
447

448

449
def _enter_terminal(
1✔
450
    name,
451
    image_name,
452
    app_info,
453
    network,
454
    user,
455
    volumes,
456
    environment_variables,
457
    shared_memory_size,
458
    ipc_mode,
459
    ulimits,
460
    devices,
461
    group_adds,
462
):
463
    print("\n\nEntering terminal...")
×
464
    print(
×
465
        "\n".join(
466
            f"\t{k:25s}\t{v}"
467
            for k, v in sorted(environment_variables.items(), key=lambda t: str(t[0]))
468
        )
469
    )
470
    print("\n\n")
×
471
    docker.container.run(
×
472
        image_name,
473
        detach=False,
474
        entrypoint="/bin/bash",
475
        envs=environment_variables,
476
        hostname=name,
477
        interactive=True,
478
        name=name,
479
        networks=[network],
480
        remove=True,
481
        shm_size=shared_memory_size,
482
        tty=True,
483
        user=user,
484
        volumes=volumes,
485
        workdir=app_info["workingDirectory"],
486
        ipc=ipc_mode,
487
        cap_add=["CAP_SYS_PTRACE"],
488
        ulimit=ulimits,
489
        devices=devices,
490
        groups_add=group_adds,
491
        runtime="nvidia",
492
    )
493
    logger.info("Container exited.")
×
494

495

496
def _additional_devices_to_mount(is_root: bool):
1✔
497
    """Mounts additional devices"""
498
    devices = []
×
499
    group_adds = []
×
500

501
    # On iGPU, the /dev/dri/* devices (mounted by the NV container runtime) permissions require root
502
    # privilege or to be part of the `video` and `render` groups. The ID for these group names might
503
    # differ on the host system and in the container, so we need to pass the group ID instead of the
504
    # group name when running docker.
505
    if (
×
506
        os.path.exists("/sys/devices/platform/gpu.0/load")
507
        and os.path.exists("/usr/bin/tegrastats")
508
        and not is_root
509
    ):
NEW
510
        group = run_cmd_output(
×
511
            '/usr/bin/cat /etc/group | grep "video" | cut -d: -f3'
512
        ).strip()
513
        group_adds.append(group)
×
NEW
514
        group = run_cmd_output(
×
515
            '/usr/bin/cat /etc/group | grep "render" | cut -d: -f3'
516
        ).strip()
517
        group_adds.append(group)
×
518
    return (devices, group_adds)
×
519

520

521
def _host_is_native_igpu() -> bool:
1✔
522
    proc = subprocess.run(
×
523
        ["nvidia-smi --query-gpu name --format=csv,noheader | grep nvgpu -q"],
524
        shell=True,
525
    )
526
    result = proc.returncode
×
527
    return result == 0
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc