• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NVIDIA / gpu-operator / 21259452260

22 Jan 2026 06:06PM UTC coverage: 25.465% (+1.5%) from 23.951%
21259452260

push

github

rajathagasthya
Add tests for MIG Manager dynamic config

Test the conditional ConfigMap mounting logic for custom MIG config:
* Unit tests verify volume, volumeMount, and CONFIG_FILE env var
* Integration tests verify end-to-end daemonset creation

Signed-off-by: Rajath Agasthya <ragasthya@nvidia.com>

3024 of 11875 relevant lines covered (25.47%)

0.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/cmd/gpu-operator/main.go
1
/*
2
Copyright 2021.
3

4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
7

8
    http://www.apache.org/licenses/LICENSE-2.0
9

10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
15
*/
16

17
package main
18

19
import (
20
        "flag"
21
        "fmt"
22
        "os"
23
        "strings"
24
        "time"
25

26
        // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
27
        // to ensure that exec-entrypoint and run can make use of them.
28
        "go.uber.org/zap/zapcore"
29
        _ "k8s.io/client-go/plugin/pkg/client/auth"
30
        "sigs.k8s.io/controller-runtime/pkg/cache"
31

32
        "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade"
33
        apiconfigv1 "github.com/openshift/api/config/v1"
34
        apiimagev1 "github.com/openshift/api/image/v1"
35
        secv1 "github.com/openshift/api/security/v1"
36
        promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
37
        corev1 "k8s.io/api/core/v1"
38
        apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
39
        "k8s.io/apimachinery/pkg/runtime"
40
        utilruntime "k8s.io/apimachinery/pkg/util/runtime"
41
        clientgoscheme "k8s.io/client-go/kubernetes/scheme"
42
        ctrl "sigs.k8s.io/controller-runtime"
43
        "sigs.k8s.io/controller-runtime/pkg/healthz"
44
        "sigs.k8s.io/controller-runtime/pkg/log/zap"
45
        metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
46
        "sigs.k8s.io/controller-runtime/pkg/webhook"
47

48
        clusterpolicyv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
49
        nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
50
        "github.com/NVIDIA/gpu-operator/controllers"
51
        "github.com/NVIDIA/gpu-operator/controllers/clusterinfo"
52
        "github.com/NVIDIA/gpu-operator/internal/consts"
53
        "github.com/NVIDIA/gpu-operator/internal/info"
54
        // +kubebuilder:scaffold:imports
55
)
56

57
var (
58
        scheme   = runtime.NewScheme()
59
        setupLog = ctrl.Log.WithName("setup")
60
)
61

62
func init() {
×
63
        utilruntime.Must(clientgoscheme.AddToScheme(scheme))
×
64
        utilruntime.Must(clusterpolicyv1.AddToScheme(scheme))
×
65
        utilruntime.Must(apiextensionsv1.AddToScheme(scheme))
×
66
        utilruntime.Must(nvidiav1alpha1.AddToScheme(scheme))
×
67
        utilruntime.Must(promv1.AddToScheme(scheme))
×
68
        utilruntime.Must(secv1.Install(scheme))
×
69
        utilruntime.Must(apiconfigv1.Install(scheme))
×
70
        utilruntime.Must(apiimagev1.Install(scheme))
×
71
}
×
72

73
func main() {
×
74
        var metricsAddr string
×
75
        var enableLeaderElection bool
×
76
        var probeAddr string
×
77
        var renewDeadline time.Duration
×
78

×
79
        flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
×
80
        flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
×
81
        flag.BoolVar(&enableLeaderElection, "leader-elect", false,
×
82
                "Enable leader election for controller manager. "+
×
83
                        "Enabling this will ensure there is only one active controller manager.")
×
84
        flag.DurationVar(&renewDeadline, "leader-lease-renew-deadline", 0,
×
85
                "Set the leader lease renew deadline duration (e.g. \"10s\") of the controller manager. "+
×
86
                        "Only enabled when the --leader-elect flag is set. "+
×
87
                        "If undefined, the renew deadline defaults to the controller-runtime manager's default RenewDeadline. "+
×
88
                        "By setting this option, the LeaseDuration is also set as RenewDealine + 5s.")
×
89

×
90
        opts := zap.Options{
×
91
                StacktraceLevel: zapcore.PanicLevel,
×
92
        }
×
93
        opts.BindFlags(flag.CommandLine)
×
94
        flag.Parse()
×
95

×
96
        logger := zap.New(zap.UseFlagOptions(&opts))
×
97
        ctrl.SetLogger(logger)
×
98

×
99
        ctrl.Log.Info(fmt.Sprintf("version: %s", info.GetVersionString()))
×
100

×
101
        metricsOptions := metricsserver.Options{
×
102
                BindAddress: metricsAddr,
×
103
        }
×
104

×
105
        webhookServer := webhook.NewServer(webhook.Options{
×
106
                Port: 9443,
×
107
        })
×
108

×
109
        operatorNamespace := os.Getenv("OPERATOR_NAMESPACE")
×
110

×
111
        if operatorNamespace == "" {
×
112
                logger.Error(nil, "OPERATOR_NAMESPACE environment variable not set, cannot proceed")
×
113
                // we cannot do anything without the operator namespace,
×
114
                // let the operator Pod run into `CrashloopBackOff`
×
115

×
116
                os.Exit(1)
×
117
        }
×
118

119
        openshiftNamespace := consts.OpenshiftNamespace
×
120
        cacheOptions := cache.Options{
×
121
                DefaultNamespaces: map[string]cache.Config{
×
122
                        operatorNamespace: {},
×
123
                        // Also cache resources in the openshift namespace to retrieve ImageStreams when on an openshift  cluster
×
124
                        openshiftNamespace: {},
×
125
                },
×
126
        }
×
127

×
128
        options := ctrl.Options{
×
129
                Scheme:                 scheme,
×
130
                Metrics:                metricsOptions,
×
131
                HealthProbeBindAddress: probeAddr,
×
132
                LeaderElection:         enableLeaderElection,
×
133
                LeaderElectionID:       "53822513.nvidia.com",
×
134
                WebhookServer:          webhookServer,
×
135
                Cache:                  cacheOptions,
×
136
        }
×
137

×
138
        if enableLeaderElection && int(renewDeadline) != 0 {
×
139
                leaseDuration := renewDeadline + 5*time.Second
×
140

×
141
                options.RenewDeadline = &renewDeadline
×
142
                options.LeaseDuration = &leaseDuration
×
143
        }
×
144

145
        mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), options)
×
146
        if err != nil {
×
147
                setupLog.Error(err, "unable to start manager")
×
148
                os.Exit(1)
×
149
        }
×
150

151
        ctx := ctrl.SetupSignalHandler()
×
152
        if err = (&controllers.ClusterPolicyReconciler{
×
153
                Namespace: operatorNamespace,
×
154
                Client:    mgr.GetClient(),
×
155
                Log:       ctrl.Log.WithName("controllers").WithName("ClusterPolicy"),
×
156
                Scheme:    mgr.GetScheme(),
×
157
        }).SetupWithManager(ctx, mgr); err != nil {
×
158
                setupLog.Error(err, "unable to create controller", "controller", "ClusterPolicy")
×
159
                os.Exit(1)
×
160
        }
×
161

162
        // setup upgrade controller
163
        upgrade.SetDriverName("gpu")
×
164
        upgradeLogger := ctrl.Log.WithName("controllers").WithName("Upgrade")
×
165
        clusterUpgradeStateManager, err := upgrade.NewClusterUpgradeStateManager(
×
166
                upgradeLogger,
×
167
                mgr.GetConfig(),
×
168
                // nolint:staticcheck
×
169
                // TODO: update k8s-operator-libs to leverage events.EventRecorder instead
×
170
                mgr.GetEventRecorderFor("nvidia-gpu-operator"),
×
171
                upgrade.StateOptions{},
×
172
        )
×
173
        if err != nil {
×
174
                setupLog.Error(err, "unable to create new ClusterUpdateStateManager", "controller", "Upgrade")
×
175
                os.Exit(1)
×
176
        }
×
177
        clusterUpgradeStateManager = clusterUpgradeStateManager.WithPodDeletionEnabled(gpuPodSpecFilter).WithValidationEnabled("app=nvidia-operator-validator")
×
178

×
179
        if err = (&controllers.UpgradeReconciler{
×
180
                Client:       mgr.GetClient(),
×
181
                Log:          upgradeLogger,
×
182
                Scheme:       mgr.GetScheme(),
×
183
                StateManager: clusterUpgradeStateManager,
×
184
        }).SetupWithManager(ctx, mgr); err != nil {
×
185
                setupLog.Error(err, "unable to create controller", "controller", "Upgrade")
×
186
                os.Exit(1)
×
187
        }
×
188

189
        clusterInfo, err := clusterinfo.New(
×
190
                ctx,
×
191
                clusterinfo.WithKubernetesConfig(mgr.GetConfig()),
×
192
                clusterinfo.WithOneShot(false),
×
193
        )
×
194
        if err != nil {
×
195
                setupLog.Error(err, "failed to get cluster wide information needed by controllers")
×
196
                os.Exit(1)
×
197
        }
×
198

199
        if err = (&controllers.NVIDIADriverReconciler{
×
200
                Namespace:   operatorNamespace,
×
201
                Client:      mgr.GetClient(),
×
202
                Scheme:      mgr.GetScheme(),
×
203
                ClusterInfo: clusterInfo,
×
204
        }).SetupWithManager(ctx, mgr); err != nil {
×
205
                setupLog.Error(err, "unable to create controller", "controller", "NVIDIADriver")
×
206
                os.Exit(1)
×
207
        }
×
208
        // +kubebuilder:scaffold:builder
209
        if err := mgr.AddHealthzCheck("health", healthz.Ping); err != nil {
×
210
                setupLog.Error(err, "unable to set up health check")
×
211
                os.Exit(1)
×
212
        }
×
213
        if err := mgr.AddReadyzCheck("check", healthz.Ping); err != nil {
×
214
                setupLog.Error(err, "unable to set up ready check")
×
215
                os.Exit(1)
×
216
        }
×
217

218
        setupLog.Info("starting manager")
×
219
        if err := mgr.Start(ctx); err != nil {
×
220
                setupLog.Error(err, "problem running manager")
×
221
                os.Exit(1)
×
222
        }
×
223
}
224

225
func gpuPodSpecFilter(pod corev1.Pod) bool {
×
226
        gpuInResourceList := func(rl corev1.ResourceList) bool {
×
227
                for resourceName := range rl {
×
228
                        str := string(resourceName)
×
229
                        if strings.HasPrefix(str, "nvidia.com/gpu") || strings.HasPrefix(str, "nvidia.com/mig-") {
×
230
                                return true
×
231
                        }
×
232
                }
233
                return false
×
234
        }
235

236
        //  ignore pods other than in running and pending state
237
        if pod.Status.Phase != corev1.PodRunning && pod.Status.Phase != corev1.PodPending {
×
238
                return false
×
239
        }
×
240

241
        for _, c := range pod.Spec.Containers {
×
242
                if gpuInResourceList(c.Resources.Limits) || gpuInResourceList(c.Resources.Requests) {
×
243
                        return true
×
244
                }
×
245
        }
246
        return false
×
247
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc