• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NVIDIA / nvrc / 20373283300

19 Dec 2025 02:40PM UTC coverage: 79.822% (+51.2%) from 28.618%
20373283300

Pull #84

github

web-flow
Merge 3ab21c41c into 5b8b670d9
Pull Request #84: NVRC complete code coverage

67 of 85 new or added lines in 11 files covered. (78.82%)

4 existing lines in 4 files now uncovered.

269 of 337 relevant lines covered (79.82%)

1.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.59
/src/daemon.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// Copyright (c) NVIDIA CORPORATION
3

4
use anyhow::{Context, Result};
5

6
use crate::execute::background;
7
use crate::nvrc::NVRC;
8
use std::fs;
9

10
/// UVM persistence mode keeps unified memory mappings alive between kernel launches,
11
/// avoiding expensive page migrations. Enabled by default for ML workloads.
12
fn persistenced_args(uvm_enabled: bool) -> Vec<&'static str> {
1✔
13
    if uvm_enabled {
1✔
14
        vec!["--verbose", "--uvm-persistence-mode"]
2✔
15
    } else {
16
        vec!["--verbose"]
2✔
17
    }
18
}
19

20
/// Hostengine needs a service account to avoid running as root, and /tmp as home
21
/// because the rootfs is read-only after init completes.
22
fn hostengine_args() -> &'static [&'static str] {
23
    &["--service-account", "nvidia-dcgm", "--home-dir", "/tmp"]
24
}
25

26
/// Kubernetes mode disables standalone HTTP server (we're behind kata-agent),
27
/// and we use the standard counters config shipped with the container image.
28
fn dcgm_exporter_args() -> &'static [&'static str] {
29
    &["-k", "-f", "/etc/dcgm-exporter/default-counters.csv"]
30
}
31

32
/// Fabricmanager needs explicit config path because it doesn't search standard
33
/// locations when running as a subprocess of init.
34
fn fabricmanager_args() -> &'static [&'static str] {
35
    &["-c", "/usr/share/nvidia/nvswitch/fabricmanager.cfg"]
36
}
37

38
/// Configurable path parameters allow testing with /bin/true instead of real
39
/// NVIDIA binaries that don't exist in the test environment.
40
impl NVRC {
41
    /// nvidia-persistenced keeps GPU state warm between container invocations,
42
    /// reducing cold-start latency. UVM persistence mode enables unified memory
43
    /// optimizations. Enabled by default since most workloads benefit from it.
UNCOV
44
    pub fn nvidia_persistenced(&mut self) -> Result<()> {
×
NEW
45
        self.spawn_persistenced("/var/run/nvidia-persistenced", "/bin/nvidia-persistenced")
×
46
    }
47

48
    fn spawn_persistenced(&mut self, run_dir: &str, bin: &str) -> Result<()> {
1✔
49
        fs::create_dir_all(run_dir).with_context(|| format!("create_dir_all {}", run_dir))?;
1✔
50
        let uvm_enabled = self.uvm_persistence_mode.unwrap_or(true);
1✔
51
        let args = persistenced_args(uvm_enabled);
1✔
52
        let child = background(bin, &args)?;
3✔
53
        self.track_daemon("nvidia-persistenced", child);
1✔
54
        Ok(())
1✔
55
    }
56

57
    /// nv-hostengine is the DCGM backend daemon. Only started when DCGM monitoring
58
    /// is explicitly requested - not needed for basic GPU workloads.
59
    pub fn nv_hostengine(&mut self) -> Result<()> {
1✔
60
        self.spawn_hostengine("/bin/nv-hostengine")
1✔
61
    }
62

63
    fn spawn_hostengine(&mut self, bin: &str) -> Result<()> {
1✔
64
        if !self.dcgm_enabled.unwrap_or(false) {
1✔
65
            return Ok(());
1✔
66
        }
67
        let child = background(bin, hostengine_args())?;
2✔
68
        self.track_daemon("nv-hostengine", child);
1✔
69
        Ok(())
1✔
70
    }
71

72
    /// dcgm-exporter exposes GPU metrics for Prometheus. Only started when DCGM
73
    /// is enabled - adds overhead so disabled by default.
74
    pub fn dcgm_exporter(&mut self) -> Result<()> {
1✔
75
        self.spawn_dcgm_exporter("/bin/dcgm-exporter")
1✔
76
    }
77

78
    fn spawn_dcgm_exporter(&mut self, bin: &str) -> Result<()> {
1✔
79
        if !self.dcgm_enabled.unwrap_or(false) {
1✔
80
            return Ok(());
1✔
81
        }
82
        let child = background(bin, dcgm_exporter_args())?;
2✔
83
        self.track_daemon("dcgm-exporter", child);
1✔
84
        Ok(())
1✔
85
    }
86

87
    /// NVSwitch fabric manager is only needed for multi-GPU NVLink topologies.
88
    /// Disabled by default since most VMs have single GPUs.
89
    pub fn nv_fabricmanager(&mut self) -> Result<()> {
1✔
90
        self.spawn_fabricmanager("/bin/nv-fabricmanager")
1✔
91
    }
92

93
    fn spawn_fabricmanager(&mut self, bin: &str) -> Result<()> {
1✔
94
        if !self.fabricmanager_enabled.unwrap_or(false) {
1✔
95
            return Ok(());
1✔
96
        }
97
        let child = background(bin, fabricmanager_args())?;
2✔
98
        self.track_daemon("nv-fabricmanager", child);
1✔
99
        Ok(())
1✔
100
    }
101
}
102

103
#[cfg(test)]
104
mod tests {
105
    use super::*;
106
    use tempfile::TempDir;
107

108
    // === Args builder tests ===
109

110
    #[test]
111
    fn test_persistenced_args_with_uvm() {
112
        let args = persistenced_args(true);
113
        assert_eq!(args, vec!["--verbose", "--uvm-persistence-mode"]);
114
    }
115

116
    #[test]
117
    fn test_persistenced_args_without_uvm() {
118
        let args = persistenced_args(false);
119
        assert_eq!(args, vec!["--verbose"]);
120
    }
121

122
    #[test]
123
    fn test_hostengine_args() {
124
        let args = hostengine_args();
125
        assert_eq!(
126
            args,
127
            &["--service-account", "nvidia-dcgm", "--home-dir", "/tmp"]
128
        );
129
    }
130

131
    #[test]
132
    fn test_dcgm_exporter_args() {
133
        let args = dcgm_exporter_args();
134
        assert_eq!(
135
            args,
136
            &["-k", "-f", "/etc/dcgm-exporter/default-counters.csv"]
137
        );
138
    }
139

140
    #[test]
141
    fn test_fabricmanager_args() {
142
        let args = fabricmanager_args();
143
        assert_eq!(
144
            args,
145
            &["-c", "/usr/share/nvidia/nvswitch/fabricmanager.cfg"]
146
        );
147
    }
148

149
    // === Skip path tests ===
150

151
    #[test]
152
    fn test_nv_hostengine_skipped_by_default() {
153
        // DCGM disabled by default - should be a no-op, no daemon spawned
154
        let mut nvrc = NVRC::default();
155
        assert!(nvrc.nv_hostengine().is_ok());
156
        assert!(nvrc.check_daemons().is_ok());
157
    }
158

159
    #[test]
160
    fn test_dcgm_exporter_skipped_by_default() {
161
        let mut nvrc = NVRC::default();
162
        assert!(nvrc.dcgm_exporter().is_ok());
163
    }
164

165
    #[test]
166
    fn test_nv_fabricmanager_skipped_by_default() {
167
        let mut nvrc = NVRC::default();
168
        assert!(nvrc.nv_fabricmanager().is_ok());
169
    }
170

171
    #[test]
172
    fn test_spawn_persistenced_success() {
173
        let tmpdir = TempDir::new().unwrap();
174
        let run_dir = tmpdir.path().join("nvidia-persistenced");
175

176
        let mut nvrc = NVRC::default();
177
        let result = nvrc.spawn_persistenced(run_dir.to_str().unwrap(), "/bin/true");
178
        assert!(result.is_ok());
179

180
        // Directory should be created
181
        assert!(run_dir.exists());
182

183
        // Daemon should be tracked and exit cleanly
184
        assert!(nvrc.check_daemons().is_ok());
185
    }
186

187
    #[test]
188
    fn test_spawn_persistenced_uvm_disabled() {
189
        let tmpdir = TempDir::new().unwrap();
190
        let run_dir = tmpdir.path().join("nvidia-persistenced");
191

192
        let mut nvrc = NVRC::default();
193
        nvrc.uvm_persistence_mode = Some(false); // Tests the else branch for args
194
        let result = nvrc.spawn_persistenced(run_dir.to_str().unwrap(), "/bin/true");
195
        assert!(result.is_ok());
196
    }
197

198
    #[test]
199
    fn test_spawn_hostengine_success() {
200
        let mut nvrc = NVRC::default();
201
        nvrc.dcgm_enabled = Some(true);
202
        let result = nvrc.spawn_hostengine("/bin/true");
203
        assert!(result.is_ok());
204
        assert!(nvrc.check_daemons().is_ok());
205
    }
206

207
    #[test]
208
    fn test_spawn_dcgm_exporter_success() {
209
        let mut nvrc = NVRC::default();
210
        nvrc.dcgm_enabled = Some(true);
211
        let result = nvrc.spawn_dcgm_exporter("/bin/true");
212
        assert!(result.is_ok());
213
    }
214

215
    #[test]
216
    fn test_spawn_fabricmanager_success() {
217
        let mut nvrc = NVRC::default();
218
        nvrc.fabricmanager_enabled = Some(true);
219
        let result = nvrc.spawn_fabricmanager("/bin/true");
220
        assert!(result.is_ok());
221
    }
222

223
    #[test]
224
    fn test_spawn_persistenced_binary_not_found() {
225
        let tmpdir = TempDir::new().unwrap();
226
        let run_dir = tmpdir.path().join("nvidia-persistenced");
227

228
        let mut nvrc = NVRC::default();
229
        let result = nvrc.spawn_persistenced(run_dir.to_str().unwrap(), "/nonexistent/binary");
230
        assert!(result.is_err());
231
    }
232

233
    #[test]
234
    fn test_check_daemons_empty() {
235
        let mut nvrc = NVRC::default();
236
        assert!(nvrc.check_daemons().is_ok());
237
    }
238
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc