• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

k8snetworkplumbingwg / sriov-network-operator / 4371184613

pending completion
4371184613

Pull #416

github

GitHub
Merge f947aa404 into eb3d22a45
Pull Request #416: Support Graceful Shutdown

26 of 26 new or added lines in 1 file covered. (100.0%)

1944 of 7582 relevant lines covered (25.64%)

0.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

40.81
/pkg/daemon/daemon.go
1
package daemon
2

3
import (
4
        "bytes"
5
        "context"
6
        "encoding/json"
7
        "flag"
8
        "fmt"
9
        "io/ioutil"
10
        "math/rand"
11
        "os"
12
        "os/exec"
13
        "path"
14
        "strconv"
15
        "strings"
16
        "sync"
17
        "time"
18

19
        "github.com/golang/glog"
20
        mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
21
        daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants"
22
        mcfginformers "github.com/openshift/machine-config-operator/pkg/generated/informers/externalversions"
23
        "golang.org/x/time/rate"
24
        corev1 "k8s.io/api/core/v1"
25
        "k8s.io/apimachinery/pkg/api/errors"
26
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27
        "k8s.io/apimachinery/pkg/labels"
28
        "k8s.io/apimachinery/pkg/types"
29
        utilruntime "k8s.io/apimachinery/pkg/util/runtime"
30
        "k8s.io/apimachinery/pkg/util/strategicpatch"
31
        "k8s.io/apimachinery/pkg/util/wait"
32
        "k8s.io/client-go/informers"
33
        "k8s.io/client-go/kubernetes"
34
        listerv1 "k8s.io/client-go/listers/core/v1"
35
        "k8s.io/client-go/tools/cache"
36
        "k8s.io/client-go/tools/leaderelection"
37
        "k8s.io/client-go/tools/leaderelection/resourcelock"
38
        "k8s.io/client-go/util/workqueue"
39
        "k8s.io/kubectl/pkg/drain"
40

41
        sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
42
        snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
43
        sninformer "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/informers/externalversions"
44
        plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins"
45
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
46
)
47

48
const (
49
        // updateDelay is the baseline speed at which we react to changes.  We don't
50
        // need to react in milliseconds as any change would involve rebooting the node.
51
        updateDelay = 5 * time.Second
52
        // maxUpdateBackoff is the maximum time to react to a change as we back off
53
        // in the face of errors.
54
        maxUpdateBackoff = 60 * time.Second
55

56
        // the presence of this file indicates that the sriov shutdown should be delayed
57
        delayShutdownPath = "/host/tmp/sriov-delay-shutdown"
58
)
59

60
type Message struct {
61
        syncStatus    string
62
        lastSyncError string
63
}
64

65
type Daemon struct {
66
        // name is the node name.
67
        name string
68

69
        platform utils.PlatformType
70

71
        client snclientset.Interface
72
        // kubeClient allows interaction with Kubernetes, including the node we are running on.
73
        kubeClient kubernetes.Interface
74

75
        openshiftContext *utils.OpenshiftContext
76

77
        nodeState *sriovnetworkv1.SriovNetworkNodeState
78

79
        enabledPlugins map[string]plugin.VendorPlugin
80

81
        // channel used by callbacks to signal Run() of an error
82
        exitCh chan<- error
83

84
        // channel used to ensure all spawned goroutines exit when we exit.
85
        stopCh <-chan struct{}
86

87
        syncCh <-chan struct{}
88

89
        refreshCh chan<- Message
90

91
        mu *sync.Mutex
92

93
        drainer *drain.Helper
94

95
        node *corev1.Node
96

97
        drainable bool
98

99
        disableDrain bool
100

101
        nodeLister listerv1.NodeLister
102

103
        workqueue workqueue.RateLimitingInterface
104

105
        mcpName string
106
}
107

108
const (
109
        rdmaScriptsPath      = "/bindata/scripts/enable-rdma.sh"
110
        udevScriptsPath      = "/bindata/scripts/load-udev.sh"
111
        annoKey              = "sriovnetwork.openshift.io/state"
112
        annoIdle             = "Idle"
113
        annoDraining         = "Draining"
114
        annoMcpPaused        = "Draining_MCP_Paused"
115
        syncStatusSucceeded  = "Succeeded"
116
        syncStatusFailed     = "Failed"
117
        syncStatusInProgress = "InProgress"
118
)
119

120
var namespace = os.Getenv("NAMESPACE")
121

122
// used by test to mock interactions with filesystem
123
var filesystemRoot string = ""
124

125
// writer implements io.Writer interface as a pass-through for glog.
126
type writer struct {
127
        logFunc func(args ...interface{})
128
}
129

130
// Write passes string(p) into writer's logFunc and always returns len(p)
131
func (w writer) Write(p []byte) (n int, err error) {
×
132
        w.logFunc(string(p))
×
133
        return len(p), nil
×
134
}
×
135

136
func New(
137
        nodeName string,
138
        client snclientset.Interface,
139
        kubeClient kubernetes.Interface,
140
        openshiftContext *utils.OpenshiftContext,
141
        exitCh chan<- error,
142
        stopCh <-chan struct{},
143
        syncCh <-chan struct{},
144
        refreshCh chan<- Message,
145
        platformType utils.PlatformType,
146
) *Daemon {
1✔
147
        return &Daemon{
1✔
148
                name:             nodeName,
1✔
149
                platform:         platformType,
1✔
150
                client:           client,
1✔
151
                kubeClient:       kubeClient,
1✔
152
                openshiftContext: openshiftContext,
1✔
153
                exitCh:           exitCh,
1✔
154
                stopCh:           stopCh,
1✔
155
                syncCh:           syncCh,
1✔
156
                refreshCh:        refreshCh,
1✔
157
                nodeState:        &sriovnetworkv1.SriovNetworkNodeState{},
1✔
158
                drainer: &drain.Helper{
1✔
159
                        Client:              kubeClient,
1✔
160
                        Force:               true,
1✔
161
                        IgnoreAllDaemonSets: true,
1✔
162
                        DeleteEmptyDirData:  true,
1✔
163
                        GracePeriodSeconds:  -1,
1✔
164
                        Timeout:             90 * time.Second,
1✔
165
                        OnPodDeletedOrEvicted: func(pod *corev1.Pod, usingEviction bool) {
1✔
166
                                verbStr := "Deleted"
×
167
                                if usingEviction {
×
168
                                        verbStr = "Evicted"
×
169
                                }
×
170
                                glog.Info(fmt.Sprintf("%s pod from Node %s/%s", verbStr, pod.Namespace, pod.Name))
×
171
                        },
172
                        Out:    writer{glog.Info},
173
                        ErrOut: writer{glog.Error},
174
                        Ctx:    context.Background(),
175
                },
176
                workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.NewMaxOfRateLimiter(
177
                        &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(updateDelay), 1)},
178
                        workqueue.NewItemExponentialFailureRateLimiter(1*time.Second, maxUpdateBackoff)), "SriovNetworkNodeState"),
179
        }
180
}
181

182
func (dn *Daemon) tryCreateUdevRuleWrapper() error {
1✔
183
        ns, nodeStateErr := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(
1✔
184
                context.Background(),
1✔
185
                dn.name,
1✔
186
                metav1.GetOptions{},
1✔
187
        )
1✔
188
        if nodeStateErr != nil {
2✔
189
                glog.Warningf("Could not fetch node state %s: %v, skip updating switchdev udev rules", dn.name, nodeStateErr)
1✔
190
        } else {
2✔
191
                err := tryCreateSwitchdevUdevRule(ns)
1✔
192
                if err != nil {
2✔
193
                        glog.Warningf("Failed to create switchdev udev rules: %v", err)
1✔
194
                }
1✔
195
        }
196

197
        // update udev rule only if we are on a BM environment
198
        // for virtual environments we don't disable the vfs as they may be used by the platform/host
199
        if dn.platform != utils.VirtualOpenStack {
2✔
200
                err := tryCreateNMUdevRule()
1✔
201
                if err != nil {
1✔
202
                        return err
×
203
                }
×
204
        }
205

206
        return nil
1✔
207
}
208

209
// Run the config daemon
210
func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error {
1✔
211
        if utils.ClusterType == utils.ClusterTypeOpenshift {
1✔
212
                glog.V(0).Infof("Run(): start daemon. openshiftFlavor: %s", dn.openshiftContext.OpenshiftFlavor)
×
213
        } else {
1✔
214
                glog.V(0).Infof("Run(): start daemon.")
1✔
215
        }
1✔
216
        // Only watch own SriovNetworkNodeState CR
217
        defer utilruntime.HandleCrash()
1✔
218
        defer dn.workqueue.ShutDown()
1✔
219

1✔
220
        tryEnableRdma()
1✔
221
        tryEnableTun()
1✔
222
        tryEnableVhostNet()
1✔
223

1✔
224
        if err := dn.tryCreateUdevRuleWrapper(); err != nil {
1✔
225
                return err
×
226
        }
×
227

228
        var timeout int64 = 5
1✔
229
        dn.mu = &sync.Mutex{}
1✔
230
        informerFactory := sninformer.NewFilteredSharedInformerFactory(dn.client,
1✔
231
                time.Second*15,
1✔
232
                namespace,
1✔
233
                func(lo *metav1.ListOptions) {
2✔
234
                        lo.FieldSelector = "metadata.name=" + dn.name
1✔
235
                        lo.TimeoutSeconds = &timeout
1✔
236
                },
1✔
237
        )
238

239
        informer := informerFactory.Sriovnetwork().V1().SriovNetworkNodeStates().Informer()
1✔
240
        informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
1✔
241
                AddFunc: dn.enqueueNodeState,
1✔
242
                UpdateFunc: func(old, new interface{}) {
1✔
243
                        dn.enqueueNodeState(new)
×
244
                },
×
245
        })
246

247
        cfgInformerFactory := sninformer.NewFilteredSharedInformerFactory(dn.client,
1✔
248
                time.Second*30,
1✔
249
                namespace,
1✔
250
                func(lo *metav1.ListOptions) {
2✔
251
                        lo.FieldSelector = "metadata.name=" + "default"
1✔
252
                },
1✔
253
        )
254

255
        cfgInformer := cfgInformerFactory.Sriovnetwork().V1().SriovOperatorConfigs().Informer()
1✔
256
        cfgInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
1✔
257
                AddFunc:    dn.operatorConfigAddHandler,
1✔
258
                UpdateFunc: dn.operatorConfigChangeHandler,
1✔
259
        })
1✔
260

1✔
261
        rand.Seed(time.Now().UnixNano())
1✔
262
        nodeInformerFactory := informers.NewSharedInformerFactory(dn.kubeClient,
1✔
263
                time.Second*15,
1✔
264
        )
1✔
265
        dn.nodeLister = nodeInformerFactory.Core().V1().Nodes().Lister()
1✔
266
        nodeInformer := nodeInformerFactory.Core().V1().Nodes().Informer()
1✔
267
        nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
1✔
268
                AddFunc:    dn.nodeAddHandler,
1✔
269
                UpdateFunc: dn.nodeUpdateHandler,
1✔
270
        })
1✔
271
        go cfgInformer.Run(dn.stopCh)
1✔
272
        go nodeInformer.Run(dn.stopCh)
1✔
273
        time.Sleep(5 * time.Second)
1✔
274
        go informer.Run(dn.stopCh)
1✔
275
        if ok := cache.WaitForCacheSync(stopCh, cfgInformer.HasSynced, nodeInformer.HasSynced, informer.HasSynced); !ok {
1✔
276
                return fmt.Errorf("failed to wait for caches to sync")
×
277
        }
×
278

279
        glog.Info("Starting workers")
1✔
280
        // Launch one workers to process
1✔
281
        go wait.Until(dn.runWorker, time.Second, stopCh)
1✔
282
        glog.Info("Started workers")
1✔
283

1✔
284
        for {
2✔
285
                select {
1✔
286
                case <-stopCh:
1✔
287
                        glog.V(0).Info("Run(): stop daemon")
1✔
288
                        return nil
1✔
289
                case err, more := <-exitCh:
×
290
                        glog.Warningf("Got an error: %v", err)
×
291
                        if more {
×
292
                                dn.refreshCh <- Message{
×
293
                                        syncStatus:    syncStatusFailed,
×
294
                                        lastSyncError: err.Error(),
×
295
                                }
×
296
                        }
×
297
                        return err
×
298
                case <-time.After(30 * time.Second):
×
299
                        glog.V(2).Info("Run(): period refresh")
×
300
                        if err := dn.tryCreateUdevRuleWrapper(); err != nil {
×
301
                                glog.V(2).Info("Could not create udev rule: ", err)
×
302
                        }
×
303
                }
304
        }
305
}
306

307
func (dn *Daemon) runWorker() {
1✔
308
        for dn.processNextWorkItem() {
2✔
309
        }
1✔
310
}
311

312
func (dn *Daemon) enqueueNodeState(obj interface{}) {
1✔
313
        var ns *sriovnetworkv1.SriovNetworkNodeState
1✔
314
        var ok bool
1✔
315
        if ns, ok = obj.(*sriovnetworkv1.SriovNetworkNodeState); !ok {
1✔
316
                utilruntime.HandleError(fmt.Errorf("expected SriovNetworkNodeState but got %#v", obj))
×
317
                return
×
318
        }
×
319
        key := ns.GetGeneration()
1✔
320
        dn.workqueue.Add(key)
1✔
321
}
322

323
func (dn *Daemon) processNextWorkItem() bool {
1✔
324
        glog.V(2).Infof("worker queue size: %d", dn.workqueue.Len())
1✔
325
        obj, shutdown := dn.workqueue.Get()
1✔
326
        if shutdown {
2✔
327
                return false
1✔
328
        }
1✔
329

330
        glog.V(2).Infof("get item: %d", obj.(int64))
1✔
331

1✔
332
        // We wrap this block in a func so we can defer c.workqueue.Done.
1✔
333
        err := func(obj interface{}) error {
2✔
334
                // We call Done here so the workqueue knows we have finished
1✔
335
                // processing this item.
1✔
336
                defer dn.workqueue.Done(obj)
1✔
337
                var key int64
1✔
338
                var ok bool
1✔
339
                if key, ok = obj.(int64); !ok {
1✔
340
                        // As the item in the workqueue is actually invalid, we call
×
341
                        // Forget here.
×
342
                        dn.workqueue.Forget(obj)
×
343
                        utilruntime.HandleError(fmt.Errorf("expected workItem in workqueue but got %#v", obj))
×
344
                        return nil
×
345
                }
×
346

347
                err := dn.nodeStateSyncHandler()
1✔
348
                if err != nil {
1✔
349
                        // Ereport error message, and put the item back to work queue for retry.
×
350
                        dn.refreshCh <- Message{
×
351
                                syncStatus:    syncStatusFailed,
×
352
                                lastSyncError: err.Error(),
×
353
                        }
×
354
                        <-dn.syncCh
×
355
                        dn.workqueue.AddRateLimited(key)
×
356
                        return fmt.Errorf("error syncing: %s, requeuing", err.Error())
×
357
                }
×
358
                // Finally, if no error occurs we Forget this item so it does not
359
                // get queued again until another change happens.
360
                dn.workqueue.Forget(obj)
1✔
361
                glog.Infof("Successfully synced")
1✔
362
                return nil
1✔
363
        }(obj)
364

365
        if err != nil {
1✔
366
                utilruntime.HandleError(err)
×
367
        }
×
368

369
        return true
1✔
370
}
371

372
func (dn *Daemon) nodeAddHandler(obj interface{}) {
1✔
373
        dn.nodeUpdateHandler(nil, obj)
1✔
374
}
1✔
375

376
func (dn *Daemon) nodeUpdateHandler(old, new interface{}) {
1✔
377
        node, err := dn.nodeLister.Get(dn.name)
1✔
378
        if errors.IsNotFound(err) {
1✔
379
                glog.V(2).Infof("nodeUpdateHandler(): node %v has been deleted", dn.name)
×
380
                return
×
381
        }
×
382
        dn.node = node.DeepCopy()
1✔
383
        nodes, err := dn.nodeLister.List(labels.Everything())
1✔
384
        if err != nil {
1✔
385
                return
×
386
        }
×
387
        for _, node := range nodes {
2✔
388
                if node.GetName() != dn.name && (node.Annotations[annoKey] == annoDraining || node.Annotations[annoKey] == annoMcpPaused) {
1✔
389
                        dn.drainable = false
×
390
                        return
×
391
                }
×
392
        }
393
        dn.drainable = true
1✔
394
}
395

396
func (dn *Daemon) operatorConfigAddHandler(obj interface{}) {
×
397
        dn.operatorConfigChangeHandler(&sriovnetworkv1.SriovOperatorConfig{}, obj)
×
398
}
×
399

400
func (dn *Daemon) operatorConfigChangeHandler(old, new interface{}) {
×
401
        newCfg := new.(*sriovnetworkv1.SriovOperatorConfig)
×
402
        var level = glog.Level(newCfg.Spec.LogLevel)
×
403
        if level != flag.Lookup("v").Value.(flag.Getter).Get() {
×
404
                glog.Infof("Set log verbose level to: %d", level)
×
405
                flag.Set("v", level.String())
×
406
        }
×
407
        newDisableDrain := newCfg.Spec.DisableDrain
×
408
        if dn.disableDrain != newDisableDrain {
×
409
                dn.disableDrain = newDisableDrain
×
410
                glog.Infof("Set Disable Drain to: %t", dn.disableDrain)
×
411
        }
×
412
}
413

414
func (dn *Daemon) nodeStateSyncHandler() error {
1✔
415
        var err error
1✔
416
        // Get the latest NodeState
1✔
417
        var latestState *sriovnetworkv1.SriovNetworkNodeState
1✔
418
        latestState, err = dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), dn.name, metav1.GetOptions{})
1✔
419
        if err != nil {
1✔
420
                glog.Warningf("nodeStateSyncHandler(): Failed to fetch node state %s: %v", dn.name, err)
×
421
                return err
×
422
        }
×
423
        latest := latestState.GetGeneration()
1✔
424
        glog.V(0).Infof("nodeStateSyncHandler(): new generation is %d", latest)
1✔
425

1✔
426
        if dn.nodeState.GetGeneration() == latest {
1✔
427
                glog.V(0).Infof("nodeStateSyncHandler(): Interface not changed")
×
428
                if latestState.Status.LastSyncError != "" ||
×
429
                        latestState.Status.SyncStatus != syncStatusSucceeded {
×
430
                        dn.refreshCh <- Message{
×
431
                                syncStatus:    syncStatusSucceeded,
×
432
                                lastSyncError: "",
×
433
                        }
×
434
                        // wait for writer to refresh the status
×
435
                        <-dn.syncCh
×
436
                }
×
437

438
                return nil
×
439
        }
440

441
        if latestState.GetGeneration() == 1 && len(latestState.Spec.Interfaces) == 0 {
1✔
442
                glog.V(0).Infof("nodeStateSyncHandler(): Name: %s, Interface policy spec not yet set by controller", latestState.Name)
×
443
                if latestState.Status.SyncStatus != "Succeeded" {
×
444
                        dn.refreshCh <- Message{
×
445
                                syncStatus:    "Succeeded",
×
446
                                lastSyncError: "",
×
447
                        }
×
448
                        // wait for writer to refresh status
×
449
                        <-dn.syncCh
×
450
                }
×
451
                return nil
×
452
        }
453

454
        dn.refreshCh <- Message{
1✔
455
                syncStatus:    syncStatusInProgress,
1✔
456
                lastSyncError: "",
1✔
457
        }
1✔
458

1✔
459
        // load plugins if has not loaded
1✔
460
        if len(dn.enabledPlugins) == 0 {
1✔
461
                dn.enabledPlugins, err = enablePlugins(dn.platform, latestState)
×
462
                if err != nil {
×
463
                        glog.Errorf("nodeStateSyncHandler(): failed to enable vendor plugins error: %v", err)
×
464
                        return err
×
465
                }
×
466
        }
467

468
        reqReboot := false
1✔
469
        reqDrain := false
1✔
470
        for k, p := range dn.enabledPlugins {
2✔
471
                d, r := false, false
1✔
472
                if dn.nodeState.GetName() == "" {
2✔
473
                        glog.V(0).Infof("nodeStateSyncHandler(): calling OnNodeStateChange for a new node state")
1✔
474
                } else {
1✔
475
                        glog.V(0).Infof("nodeStateSyncHandler(): calling OnNodeStateChange for an updated node state")
×
476
                }
×
477
                d, r, err = p.OnNodeStateChange(latestState)
1✔
478
                if err != nil {
1✔
479
                        glog.Errorf("nodeStateSyncHandler(): plugin %s error: %v", k, err)
×
480
                        return err
×
481
                }
×
482
                glog.V(0).Infof("nodeStateSyncHandler(): plugin %s: reqDrain %v, reqReboot %v", k, d, r)
1✔
483
                reqDrain = reqDrain || d
1✔
484
                reqReboot = reqReboot || r
1✔
485
        }
486
        glog.V(0).Infof("nodeStateSyncHandler(): reqDrain %v, reqReboot %v disableDrain %v", reqDrain, reqReboot, dn.disableDrain)
1✔
487

1✔
488
        for k, p := range dn.enabledPlugins {
2✔
489
                if k != GenericPluginName {
1✔
490
                        err := p.Apply()
×
491
                        if err != nil {
×
492
                                glog.Errorf("nodeStateSyncHandler(): plugin %s fail to apply: %v", k, err)
×
493
                                return err
×
494
                        }
×
495
                }
496
        }
497
        if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() {
1✔
498
                if err = dn.getNodeMachinePool(); err != nil {
×
499
                        return err
×
500
                }
×
501
        }
502
        if reqDrain {
1✔
503
                if !dn.isNodeDraining() {
×
504
                        if !dn.disableDrain {
×
505
                                ctx, cancel := context.WithCancel(context.TODO())
×
506
                                defer cancel()
×
507

×
508
                                glog.Infof("nodeStateSyncHandler(): get drain lock for sriov daemon")
×
509
                                done := make(chan bool)
×
510
                                go dn.getDrainLock(ctx, done)
×
511
                                <-done
×
512
                        }
×
513

514
                        if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() {
×
515
                                glog.Infof("nodeStateSyncHandler(): pause MCP")
×
516
                                if err := dn.pauseMCP(); err != nil {
×
517
                                        return err
×
518
                                }
×
519
                        }
520
                }
521

522
                glog.Info("nodeStateSyncHandler(): drain node")
×
523
                if err := dn.drainNode(); err != nil {
×
524
                        return err
×
525
                }
×
526
        }
527

528
        if !reqReboot {
2✔
529
                selectedPlugin, ok := dn.enabledPlugins[GenericPluginName]
1✔
530
                if ok {
2✔
531
                        // Apply generic_plugin last
1✔
532
                        err = selectedPlugin.Apply()
1✔
533
                        if err != nil {
1✔
534
                                glog.Errorf("nodeStateSyncHandler(): generic_plugin fail to apply: %v", err)
×
535
                                return err
×
536
                        }
×
537
                }
538
        }
539

540
        if reqReboot {
1✔
541
                glog.Info("nodeStateSyncHandler(): reboot node")
×
542
                rebootNode()
×
543
                return nil
×
544
        }
×
545

546
        // restart device plugin pod
547
        glog.Info("nodeStateSyncHandler(): restart device plugin pod")
1✔
548
        if err := dn.restartDevicePluginPod(); err != nil {
1✔
549
                glog.Errorf("nodeStateSyncHandler(): fail to restart device plugin pod: %v", err)
×
550
                return err
×
551
        }
×
552
        if dn.isNodeDraining() {
1✔
553
                if err := dn.completeDrain(); err != nil {
×
554
                        glog.Errorf("nodeStateSyncHandler(): failed to complete draining: %v", err)
×
555
                        return err
×
556
                }
×
557
        } else {
1✔
558
                if !dn.nodeHasAnnotation(annoKey, annoIdle) {
2✔
559
                        if err := dn.annotateNode(dn.name, annoIdle); err != nil {
1✔
560
                                glog.Errorf("nodeStateSyncHandler(): failed to annotate node: %v", err)
×
561
                                return err
×
562
                        }
×
563
                }
564
        }
565
        glog.Info("nodeStateSyncHandler(): sync succeeded")
1✔
566
        dn.nodeState = latestState.DeepCopy()
1✔
567
        dn.refreshCh <- Message{
1✔
568
                syncStatus:    syncStatusSucceeded,
1✔
569
                lastSyncError: "",
1✔
570
        }
1✔
571
        // wait for writer to refresh the status
1✔
572
        <-dn.syncCh
1✔
573
        return nil
1✔
574
}
575

576
func (dn *Daemon) nodeHasAnnotation(annoKey string, value string) bool {
1✔
577
        // Check if node already contains annotation
1✔
578
        if anno, ok := dn.node.Annotations[annoKey]; ok && (anno == value) {
1✔
579
                return true
×
580
        }
×
581
        return false
1✔
582
}
583

584
func (dn *Daemon) isNodeDraining() bool {
1✔
585
        if anno, ok := dn.node.Annotations[annoKey]; ok && (anno == annoDraining || anno == annoMcpPaused) {
1✔
586
                return true
×
587
        }
×
588
        return false
1✔
589
}
590

591
func (dn *Daemon) completeDrain() error {
×
592
        if !dn.disableDrain {
×
593
                if err := drain.RunCordonOrUncordon(dn.drainer, dn.node, false); err != nil {
×
594
                        return err
×
595
                }
×
596
        }
597

598
        if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() {
×
599
                glog.Infof("completeDrain(): resume MCP %s", dn.mcpName)
×
600
                pausePatch := []byte("{\"spec\":{\"paused\":false}}")
×
601
                if _, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{}); err != nil {
×
602
                        glog.Errorf("completeDrain(): failed to resume MCP %s: %v", dn.mcpName, err)
×
603
                        return err
×
604
                }
×
605
        }
606

607
        if err := dn.annotateNode(dn.name, annoIdle); err != nil {
×
608
                glog.Errorf("completeDrain(): failed to annotate node: %v", err)
×
609
                return err
×
610
        }
×
611

612
        if _, err := os.Stat(delayShutdownPath); err == nil {
×
613
                if err := os.Remove(delayShutdownPath); err != nil {
×
614
                        glog.Errorf("completeDrain(): failed to remove file %v: %v", delayShutdownPath, err)
×
615
                        return err
×
616
                }
×
617
        } else if !os.IsNotExist(err) { // error is not "not exist"
×
618
                glog.Errorf("completeDrain(): error checking file status %v: %v", delayShutdownPath, err)
×
619
                return err
×
620
        }
×
621

622
        return nil
×
623
}
624

625
func (dn *Daemon) restartDevicePluginPod() error {
1✔
626
        dn.mu.Lock()
1✔
627
        defer dn.mu.Unlock()
1✔
628
        glog.V(2).Infof("restartDevicePluginPod(): try to restart device plugin pod")
1✔
629

1✔
630
        var podToDelete string
1✔
631
        pods, err := dn.kubeClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
1✔
632
                LabelSelector: "app=sriov-device-plugin",
1✔
633
                FieldSelector: "spec.nodeName=" + dn.name,
1✔
634
        })
1✔
635
        if err != nil {
1✔
636
                if errors.IsNotFound(err) {
×
637
                        glog.Info("restartDevicePluginPod(): device plugin pod exited")
×
638
                        return nil
×
639
                }
×
640
                glog.Warningf("restartDevicePluginPod(): Failed to list device plugin pod: %s, retrying", err)
×
641
                return err
×
642
        }
643

644
        if len(pods.Items) == 0 {
1✔
645
                glog.Info("restartDevicePluginPod(): device plugin pod exited")
×
646
                return nil
×
647
        }
×
648
        podToDelete = pods.Items[0].Name
1✔
649

1✔
650
        glog.V(2).Infof("restartDevicePluginPod(): Found device plugin pod %s, deleting it", podToDelete)
1✔
651
        err = dn.kubeClient.CoreV1().Pods(namespace).Delete(context.Background(), podToDelete, metav1.DeleteOptions{})
1✔
652
        if errors.IsNotFound(err) {
1✔
653
                glog.Info("restartDevicePluginPod(): pod to delete not found")
×
654
                return nil
×
655
        }
×
656
        if err != nil {
1✔
657
                glog.Errorf("restartDevicePluginPod(): Failed to delete device plugin pod: %s, retrying", err)
×
658
                return err
×
659
        }
×
660

661
        if err := wait.PollImmediateUntil(3*time.Second, func() (bool, error) {
2✔
662
                _, err := dn.kubeClient.CoreV1().Pods(namespace).Get(context.Background(), podToDelete, metav1.GetOptions{})
1✔
663
                if errors.IsNotFound(err) {
2✔
664
                        glog.Info("restartDevicePluginPod(): device plugin pod exited")
1✔
665
                        return true, nil
1✔
666
                }
1✔
667

668
                if err != nil {
×
669
                        glog.Warningf("restartDevicePluginPod(): Failed to check for device plugin exit: %s, retrying", err)
×
670
                } else {
×
671
                        glog.Infof("restartDevicePluginPod(): waiting for device plugin %s to exit", podToDelete)
×
672
                }
×
673
                return false, nil
×
674
        }, dn.stopCh); err != nil {
×
675
                glog.Errorf("restartDevicePluginPod(): failed to wait for checking pod deletion: %v", err)
×
676
                return err
×
677
        }
×
678

679
        return nil
1✔
680
}
681

682
func rebootNode() {
×
683
        glog.Infof("rebootNode(): trigger node reboot")
×
684
        exit, err := utils.Chroot("/host")
×
685
        if err != nil {
×
686
                glog.Errorf("rebootNode(): %v", err)
×
687
        }
×
688
        defer exit()
×
689
        // creates a new transient systemd unit to reboot the system that
×
690
        // reboots the system using `systemctl rooboot``
×
691
        // by shutting down the system this way instead via `reboot`,
×
692
        // when kubelet is configured with a shutdownGracePeriod, then it will
×
693
        // be give some time to pods to run their preStop scripts and respond to
×
694
        // SIGTERM by terminating gracefully before being forcefully killed via
×
695
        // SIGKILL. stopping the kubelet service and then immediately running
×
696
        // `reboot` just results in all pods being immediately killed
×
697
        cmd := exec.Command("systemd-run", "--unit", "sriov-network-config-daemon-reboot",
×
698
                "--description", "sriov-network-config-daemon reboot node", "/bin/sh", "-c", "systemctl reboot")
×
699

×
700
        if err := cmd.Run(); err != nil {
×
701
                glog.Errorf("failed to reboot node: %v", err)
×
702
        }
×
703
}
704

705
func (dn *Daemon) annotateNode(node, value string) error {
1✔
706
        glog.Infof("annotateNode(): Annotate node %s with: %s", node, value)
1✔
707

1✔
708
        oldNode, err := dn.kubeClient.CoreV1().Nodes().Get(context.Background(), dn.name, metav1.GetOptions{})
1✔
709
        if err != nil {
1✔
710
                glog.Infof("annotateNode(): Failed to get node %s %v, retrying", node, err)
×
711
                return err
×
712
        }
×
713
        oldData, err := json.Marshal(oldNode)
1✔
714
        if err != nil {
1✔
715
                return err
×
716
        }
×
717

718
        newNode := oldNode.DeepCopy()
1✔
719
        if newNode.Annotations == nil {
2✔
720
                newNode.Annotations = map[string]string{}
1✔
721
        }
1✔
722
        if newNode.Annotations[annoKey] != value {
2✔
723
                newNode.Annotations[annoKey] = value
1✔
724
                newData, err := json.Marshal(newNode)
1✔
725
                if err != nil {
1✔
726
                        return err
×
727
                }
×
728
                patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, corev1.Node{})
1✔
729
                if err != nil {
1✔
730
                        return err
×
731
                }
×
732
                _, err = dn.kubeClient.CoreV1().Nodes().Patch(context.Background(),
1✔
733
                        dn.name,
1✔
734
                        types.StrategicMergePatchType,
1✔
735
                        patchBytes,
1✔
736
                        metav1.PatchOptions{})
1✔
737
                if err != nil {
1✔
738
                        glog.Infof("annotateNode(): Failed to patch node %s: %v", node, err)
×
739
                        return err
×
740
                }
×
741
        }
742
        return nil
1✔
743
}
744

745
func (dn *Daemon) getNodeMachinePool() error {
×
746
        desiredConfig, ok := dn.node.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey]
×
747
        if !ok {
×
748
                glog.Errorf("getNodeMachinePool(): Failed to find the the desiredConfig Annotation")
×
749
                return fmt.Errorf("getNodeMachinePool(): Failed to find the the desiredConfig Annotation")
×
750
        }
×
751
        mc, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), desiredConfig, metav1.GetOptions{})
×
752
        if err != nil {
×
753
                glog.Errorf("getNodeMachinePool(): Failed to get the desired Machine Config: %v", err)
×
754
                return err
×
755
        }
×
756
        for _, owner := range mc.OwnerReferences {
×
757
                if owner.Kind == "MachineConfigPool" {
×
758
                        dn.mcpName = owner.Name
×
759
                        return nil
×
760
                }
×
761
        }
762
        glog.Error("getNodeMachinePool(): Failed to find the MCP of the node")
×
763
        return fmt.Errorf("getNodeMachinePool(): Failed to find the MCP of the node")
×
764
}
765

766
func (dn *Daemon) getDrainLock(ctx context.Context, done chan bool) {
×
767
        var err error
×
768

×
769
        lock := &resourcelock.LeaseLock{
×
770
                LeaseMeta: metav1.ObjectMeta{
×
771
                        Name:      "config-daemon-draining-lock",
×
772
                        Namespace: namespace,
×
773
                },
×
774
                Client: dn.kubeClient.CoordinationV1(),
×
775
                LockConfig: resourcelock.ResourceLockConfig{
×
776
                        Identity: dn.name,
×
777
                },
×
778
        }
×
779

×
780
        // start the leader election
×
781
        leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{
×
782
                Lock:            lock,
×
783
                ReleaseOnCancel: true,
×
784
                LeaseDuration:   5 * time.Second,
×
785
                RenewDeadline:   3 * time.Second,
×
786
                RetryPeriod:     1 * time.Second,
×
787
                Callbacks: leaderelection.LeaderCallbacks{
×
788
                        OnStartedLeading: func(ctx context.Context) {
×
789
                                glog.V(2).Info("getDrainLock(): started leading")
×
790
                                for {
×
791
                                        time.Sleep(3 * time.Second)
×
792
                                        if dn.node.Annotations[annoKey] == annoMcpPaused {
×
793
                                                // The node in Draining_MCP_Paused state, no other node is draining. Skip drainable checking
×
794
                                                done <- true
×
795
                                                return
×
796
                                        }
×
797
                                        if dn.drainable {
×
798
                                                glog.V(2).Info("getDrainLock(): no other node is draining")
×
799
                                                err = dn.annotateNode(dn.name, annoDraining)
×
800
                                                if err != nil {
×
801
                                                        glog.Errorf("getDrainLock(): Failed to annotate node: %v", err)
×
802
                                                        continue
×
803
                                                }
804
                                                done <- true
×
805
                                                return
×
806
                                        }
807
                                        glog.V(2).Info("getDrainLock(): other node is draining, wait...")
×
808
                                }
809
                        },
810
                        OnStoppedLeading: func() {
×
811
                                glog.V(2).Info("getDrainLock(): stopped leading")
×
812
                        },
×
813
                },
814
        })
815
}
816

817
func (dn *Daemon) pauseMCP() error {
×
818
        glog.Info("pauseMCP(): pausing MCP")
×
819
        var err error
×
820

×
821
        mcpInformerFactory := mcfginformers.NewSharedInformerFactory(dn.openshiftContext.McClient,
×
822
                time.Second*30,
×
823
        )
×
824
        mcpInformer := mcpInformerFactory.Machineconfiguration().V1().MachineConfigPools().Informer()
×
825

×
826
        ctx, cancel := context.WithCancel(context.TODO())
×
827
        defer cancel()
×
828
        paused := dn.node.Annotations[annoKey] == annoMcpPaused
×
829

×
830
        mcpEventHandler := func(obj interface{}) {
×
831
                mcp := obj.(*mcfgv1.MachineConfigPool)
×
832
                if mcp.GetName() != dn.mcpName {
×
833
                        return
×
834
                }
×
835
                // Always get the latest object
836
                newMcp, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, dn.mcpName, metav1.GetOptions{})
×
837
                if err != nil {
×
838
                        glog.V(2).Infof("pauseMCP(): Failed to get MCP %s: %v", dn.mcpName, err)
×
839
                        return
×
840
                }
×
841
                if mcfgv1.IsMachineConfigPoolConditionFalse(newMcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) &&
×
842
                        mcfgv1.IsMachineConfigPoolConditionTrue(newMcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) &&
×
843
                        mcfgv1.IsMachineConfigPoolConditionFalse(newMcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) {
×
844
                        glog.V(2).Infof("pauseMCP(): MCP %s is ready", dn.mcpName)
×
845
                        if paused {
×
846
                                glog.V(2).Info("pauseMCP(): stop MCP informer")
×
847
                                cancel()
×
848
                                return
×
849
                        }
×
850
                        if newMcp.Spec.Paused {
×
851
                                glog.V(2).Infof("pauseMCP(): MCP %s was paused by other, wait...", dn.mcpName)
×
852
                                return
×
853
                        }
×
854
                        glog.Infof("pauseMCP(): pause MCP %s", dn.mcpName)
×
855
                        pausePatch := []byte("{\"spec\":{\"paused\":true}}")
×
856
                        _, err = dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{})
×
857
                        if err != nil {
×
858
                                glog.V(2).Infof("pauseMCP(): Failed to pause MCP %s: %v", dn.mcpName, err)
×
859
                                return
×
860
                        }
×
861
                        err = dn.annotateNode(dn.name, annoMcpPaused)
×
862
                        if err != nil {
×
863
                                glog.V(2).Infof("pauseMCP(): Failed to annotate node: %v", err)
×
864
                                return
×
865
                        }
×
866
                        paused = true
×
867
                        return
×
868
                }
869
                if paused {
×
870
                        glog.Infof("pauseMCP(): MCP is processing, resume MCP %s", dn.mcpName)
×
871
                        pausePatch := []byte("{\"spec\":{\"paused\":false}}")
×
872
                        _, err = dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{})
×
873
                        if err != nil {
×
874
                                glog.V(2).Infof("pauseMCP(): fail to resume MCP %s: %v", dn.mcpName, err)
×
875
                                return
×
876
                        }
×
877
                        err = dn.annotateNode(dn.name, annoDraining)
×
878
                        if err != nil {
×
879
                                glog.V(2).Infof("pauseMCP(): Failed to annotate node: %v", err)
×
880
                                return
×
881
                        }
×
882
                        paused = false
×
883
                }
884
                glog.Infof("pauseMCP():MCP %s is not ready: %v, wait...", newMcp.GetName(), newMcp.Status.Conditions)
×
885
        }
886

887
        mcpInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
×
888
                AddFunc: mcpEventHandler,
×
889
                UpdateFunc: func(old, new interface{}) {
×
890
                        mcpEventHandler(new)
×
891
                },
×
892
        })
893

894
        // The Draining_MCP_Paused state means the MCP work has been paused by the config daemon in previous round.
895
        // Only check MCP state if the node is not in Draining_MCP_Paused state
896
        if !paused {
×
897
                mcpInformerFactory.Start(ctx.Done())
×
898
                mcpInformerFactory.WaitForCacheSync(ctx.Done())
×
899
                <-ctx.Done()
×
900
        }
×
901

902
        return err
×
903
}
904

905
func (dn *Daemon) drainNode() error {
×
906
        if dn.disableDrain {
×
907
                glog.Info("drainNode(): disable drain is true skipping drain")
×
908
                return nil
×
909
        }
×
910

911
        glog.Info("drainNode(): Update prepared")
×
912
        var err error
×
913

×
914
        backoff := wait.Backoff{
×
915
                Steps:    5,
×
916
                Duration: 10 * time.Second,
×
917
                Factor:   2,
×
918
        }
×
919
        var lastErr error
×
920

×
921
        glog.Info("drainNode(): Start draining")
×
922
        if err = wait.ExponentialBackoff(backoff, func() (bool, error) {
×
923
                err := drain.RunCordonOrUncordon(dn.drainer, dn.node, true)
×
924
                if err != nil {
×
925
                        lastErr = err
×
926
                        glog.Infof("Cordon failed with: %v, retrying", err)
×
927
                        return false, nil
×
928
                }
×
929
                err = drain.RunNodeDrain(dn.drainer, dn.name)
×
930
                if err == nil {
×
931
                        return true, nil
×
932
                }
×
933
                lastErr = err
×
934
                glog.Infof("Draining failed with: %v, retrying", err)
×
935
                return false, nil
×
936
        }); err != nil {
×
937
                if err == wait.ErrWaitTimeout {
×
938
                        glog.Errorf("drainNode(): failed to drain node (%d tries): %v :%v", backoff.Steps, err, lastErr)
×
939
                }
×
940
                glog.Errorf("drainNode(): failed to drain node: %v", err)
×
941
                return err
×
942
        }
943
        glog.Info("drainNode(): drain complete")
×
944

×
945
        file, err := os.Create(delayShutdownPath)
×
946
        if err != nil {
×
947
                glog.Errorf("drainNode(): failed to create file %v %v", delayShutdownPath, err)
×
948
                return err
×
949
        }
×
950
        defer file.Close()
×
951

×
952
        return nil
×
953
}
954

955
func tryEnableTun() {
1✔
956
        if err := utils.LoadKernelModule("tun"); err != nil {
2✔
957
                glog.Errorf("tryEnableTun(): TUN kernel module not loaded: %v", err)
1✔
958
        }
1✔
959
}
960

961
func tryEnableVhostNet() {
1✔
962
        if err := utils.LoadKernelModule("vhost_net"); err != nil {
2✔
963
                glog.Errorf("tryEnableVhostNet(): VHOST_NET kernel module not loaded: %v", err)
1✔
964
        }
1✔
965
}
966

967
func tryEnableRdma() (bool, error) {
1✔
968
        glog.V(2).Infof("tryEnableRdma()")
1✔
969
        var stdout, stderr bytes.Buffer
1✔
970

1✔
971
        cmd := exec.Command("/bin/bash", path.Join(filesystemRoot, rdmaScriptsPath))
1✔
972
        cmd.Stdout = &stdout
1✔
973
        cmd.Stderr = &stderr
1✔
974
        if err := cmd.Run(); err != nil {
1✔
975
                glog.Errorf("tryEnableRdma(): fail to enable rdma %v: %v", err, cmd.Stderr)
×
976
                return false, err
×
977
        }
×
978
        glog.V(2).Infof("tryEnableRdma(): %v", cmd.Stdout)
1✔
979

1✔
980
        i, err := strconv.Atoi(strings.TrimSpace(stdout.String()))
1✔
981
        if err == nil {
1✔
982
                if i == 0 {
×
983
                        glog.V(2).Infof("tryEnableRdma(): RDMA kernel modules loaded")
×
984
                        return true, nil
×
985
                } else {
×
986
                        glog.V(2).Infof("tryEnableRdma(): RDMA kernel modules not loaded")
×
987
                        return false, nil
×
988
                }
×
989
        }
990
        return false, err
1✔
991
}
992

993
func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) error {
1✔
994
        glog.V(2).Infof("tryCreateSwitchdevUdevRule()")
1✔
995
        var newContent string
1✔
996
        filePath := path.Join(filesystemRoot, "/host/etc/udev/rules.d/20-switchdev.rules")
1✔
997

1✔
998
        for _, ifaceStatus := range nodeState.Status.Interfaces {
2✔
999
                if ifaceStatus.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev {
1✔
1000
                        switchID, err := utils.GetPhysSwitchID(ifaceStatus.Name)
×
1001
                        if err != nil {
×
1002
                                return err
×
1003
                        }
×
1004
                        portName, err := utils.GetPhysPortName(ifaceStatus.Name)
×
1005
                        if err != nil {
×
1006
                                return err
×
1007
                        }
×
1008
                        newContent = newContent + fmt.Sprintf("SUBSYSTEM==\"net\", ACTION==\"add|move\", ATTRS{phys_switch_id}==\"%s\", ATTR{phys_port_name}==\"pf%svf*\", IMPORT{program}=\"/etc/udev/switchdev-vf-link-name.sh $attr{phys_port_name}\", NAME=\"%s_$env{NUMBER}\"\n", switchID, strings.TrimPrefix(portName, "p"), ifaceStatus.Name)
×
1009
                }
1010
        }
1011

1012
        oldContent, err := ioutil.ReadFile(filePath)
1✔
1013
        // if oldContent = newContent, don't do anything
1✔
1014
        if err == nil && newContent == string(oldContent) {
1✔
1015
                return nil
×
1016
        }
×
1017

1018
        glog.V(2).Infof("Old udev content '%v' and new content '%v' differ. Writing to file %v.",
1✔
1019
                strings.TrimSuffix(string(oldContent), "\n"),
1✔
1020
                strings.TrimSuffix(newContent, "\n"),
1✔
1021
                filePath)
1✔
1022

1✔
1023
        // if the file does not exist or if oldContent != newContent
1✔
1024
        // write to file and create it if it doesn't exist
1✔
1025
        err = ioutil.WriteFile(filePath, []byte(newContent), 0664)
1✔
1026
        if err != nil {
2✔
1027
                glog.Errorf("tryCreateSwitchdevUdevRule(): fail to write file: %v", err)
1✔
1028
                return err
1✔
1029
        }
1✔
1030

1031
        var stdout, stderr bytes.Buffer
×
1032
        cmd := exec.Command("/bin/bash", path.Join(filesystemRoot, udevScriptsPath))
×
1033
        cmd.Stdout = &stdout
×
1034
        cmd.Stderr = &stderr
×
1035
        if err := cmd.Run(); err != nil {
×
1036
                return err
×
1037
        }
×
1038
        glog.V(2).Infof("tryCreateSwitchdevUdevRule(): %v", cmd.Stdout)
×
1039

×
1040
        i, err := strconv.Atoi(strings.TrimSpace(stdout.String()))
×
1041
        if err == nil {
×
1042
                if i == 0 {
×
1043
                        glog.V(2).Infof("tryCreateSwitchdevUdevRule(): switchdev udev rules loaded")
×
1044
                } else {
×
1045
                        glog.V(2).Infof("tryCreateSwitchdevUdevRule(): switchdev udev rules not loaded")
×
1046
                }
×
1047
        }
1048
        return nil
×
1049
}
1050

1051
func tryCreateNMUdevRule() error {
1✔
1052
        glog.V(2).Infof("tryCreateNMUdevRule()")
1✔
1053
        dirPath := path.Join(filesystemRoot, "/host/etc/udev/rules.d")
1✔
1054
        filePath := path.Join(dirPath, "10-nm-unmanaged.rules")
1✔
1055

1✔
1056
        newContent := fmt.Sprintf("ACTION==\"add|change|move\", ATTRS{device}==\"%s\", ENV{NM_UNMANAGED}=\"1\"\n", strings.Join(sriovnetworkv1.GetSupportedVfIds(), "|"))
1✔
1057

1✔
1058
        // add NM udev rules for renaming VF rep
1✔
1059
        newContent = newContent + "SUBSYSTEM==\"net\", ACTION==\"add|move\", ATTRS{phys_switch_id}!=\"\", ATTR{phys_port_name}==\"pf*vf*\", ENV{NM_UNMANAGED}=\"1\"\n"
1✔
1060

1✔
1061
        oldContent, err := ioutil.ReadFile(filePath)
1✔
1062
        // if oldContent = newContent, don't do anything
1✔
1063
        if err == nil && newContent == string(oldContent) {
1✔
1064
                return nil
×
1065
        }
×
1066

1067
        glog.V(2).Infof("Old udev content '%v' and new content '%v' differ. Writing to file %v.",
1✔
1068
                strings.TrimSuffix(string(oldContent), "\n"),
1✔
1069
                strings.TrimSuffix(newContent, "\n"),
1✔
1070
                filePath)
1✔
1071

1✔
1072
        err = os.MkdirAll(dirPath, os.ModePerm)
1✔
1073
        if err != nil && !os.IsExist(err) {
1✔
1074
                glog.Errorf("tryCreateNMUdevRule(): failed to create dir %s: %v", dirPath, err)
×
1075
                return err
×
1076
        }
×
1077

1078
        // if the file does not exist or if oldContent != newContent
1079
        // write to file and create it if it doesn't exist
1080
        err = ioutil.WriteFile(filePath, []byte(newContent), 0666)
1✔
1081
        if err != nil {
1✔
1082
                glog.Errorf("tryCreateNMUdevRule(): fail to write file: %v", err)
×
1083
                return err
×
1084
        }
×
1085
        return nil
1✔
1086
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc