• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

k8snetworkplumbingwg / sriov-network-operator / 3751025296

pending completion
3751025296

Pull #365

github

GitHub
Merge 421284b55 into 788d76f7e
Pull Request #365: Implementation for new systemd configuration method

958 of 958 new or added lines in 18 files covered. (100.0%)

1971 of 8330 relevant lines covered (23.66%)

0.27 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

39.22
/pkg/daemon/daemon.go
1
package daemon
2

3
import (
4
        "bytes"
5
        "context"
6
        "encoding/json"
7
        "flag"
8
        "fmt"
9
        "io/ioutil"
10
        "math/rand"
11
        "os"
12
        "os/exec"
13
        "path"
14
        "strconv"
15
        "strings"
16
        "sync"
17
        "time"
18

19
        "github.com/golang/glog"
20
        mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
21
        daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants"
22
        mcfginformers "github.com/openshift/machine-config-operator/pkg/generated/informers/externalversions"
23
        "golang.org/x/time/rate"
24
        corev1 "k8s.io/api/core/v1"
25
        "k8s.io/apimachinery/pkg/api/errors"
26
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27
        "k8s.io/apimachinery/pkg/labels"
28
        "k8s.io/apimachinery/pkg/types"
29
        utilruntime "k8s.io/apimachinery/pkg/util/runtime"
30
        "k8s.io/apimachinery/pkg/util/strategicpatch"
31
        "k8s.io/apimachinery/pkg/util/wait"
32
        "k8s.io/client-go/informers"
33
        "k8s.io/client-go/kubernetes"
34
        listerv1 "k8s.io/client-go/listers/core/v1"
35
        "k8s.io/client-go/tools/cache"
36
        "k8s.io/client-go/tools/leaderelection"
37
        "k8s.io/client-go/tools/leaderelection/resourcelock"
38
        "k8s.io/client-go/util/workqueue"
39
        "k8s.io/kubectl/pkg/drain"
40

41
        sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
42
        snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
43
        sninformer "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/informers/externalversions"
44
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host"
45
        plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins"
46
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/service"
47
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/systemd"
48
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
49
)
50

51
const (
52
        // updateDelay is the baseline speed at which we react to changes.  We don't
53
        // need to react in milliseconds as any change would involve rebooting the node.
54
        updateDelay = 5 * time.Second
55
        // maxUpdateBackoff is the maximum time to react to a change as we back off
56
        // in the face of errors.
57
        maxUpdateBackoff = 60 * time.Second
58
)
59

60
type Message struct {
61
        syncStatus    string
62
        lastSyncError string
63
}
64

65
type Daemon struct {
66
        // name is the node name.
67
        name string
68

69
        platform utils.PlatformType
70

71
        useSystemdService bool
72

73
        devMode bool
74

75
        client snclientset.Interface
76
        // kubeClient allows interaction with Kubernetes, including the node we are running on.
77
        kubeClient kubernetes.Interface
78

79
        openshiftContext utils.OpenshiftContext
80

81
        nodeState *sriovnetworkv1.SriovNetworkNodeState
82

83
        enabledPlugins map[string]plugin.VendorPlugin
84

85
        serviceManager service.ServiceManager
86

87
        // channel used by callbacks to signal Run() of an error
88
        exitCh chan<- error
89

90
        // channel used to ensure all spawned goroutines exit when we exit.
91
        stopCh <-chan struct{}
92

93
        syncCh <-chan struct{}
94

95
        refreshCh chan<- Message
96

97
        mu *sync.Mutex
98

99
        drainer *drain.Helper
100

101
        node *corev1.Node
102

103
        drainable bool
104

105
        disableDrain bool
106

107
        nodeLister listerv1.NodeLister
108

109
        workqueue workqueue.RateLimitingInterface
110

111
        mcpName string
112
}
113

114
const (
115
        udevScriptsPath     = "/bindata/scripts/load-udev.sh"
116
        annoKey             = "sriovnetwork.openshift.io/state"
117
        annoIdle            = "Idle"
118
        annoDraining        = "Draining"
119
        annoMcpPaused       = "Draining_MCP_Paused"
120
        syncStatusSucceeded = "Succeeded"
121
        syncStatusFailed    = "Failed"
122
)
123

124
var namespace = os.Getenv("NAMESPACE")
125

126
// used by test to mock interactions with filesystem
127
var filesystemRoot string = ""
128

129
// writer implements io.Writer interface as a pass-through for glog.
130
type writer struct {
131
        logFunc func(args ...interface{})
132
}
133

134
// Write passes string(p) into writer's logFunc and always returns len(p)
135
func (w writer) Write(p []byte) (n int, err error) {
×
136
        w.logFunc(string(p))
×
137
        return len(p), nil
×
138
}
×
139

140
func New(
141
        nodeName string,
142
        client snclientset.Interface,
143
        kubeClient kubernetes.Interface,
144
        openshiftContext utils.OpenshiftContext,
145
        exitCh chan<- error,
146
        stopCh <-chan struct{},
147
        syncCh <-chan struct{},
148
        refreshCh chan<- Message,
149
        platformType utils.PlatformType,
150
        useSystemdService bool,
151
        devMode bool,
152
) *Daemon {
1✔
153
        return &Daemon{
1✔
154
                name:              nodeName,
1✔
155
                platform:          platformType,
1✔
156
                useSystemdService: useSystemdService,
1✔
157
                devMode:           devMode,
1✔
158
                client:            client,
1✔
159
                kubeClient:        kubeClient,
1✔
160
                openshiftContext:  openshiftContext,
1✔
161
                serviceManager:    service.NewServiceManager("/host"),
1✔
162
                exitCh:            exitCh,
1✔
163
                stopCh:            stopCh,
1✔
164
                syncCh:            syncCh,
1✔
165
                refreshCh:         refreshCh,
1✔
166
                nodeState:         &sriovnetworkv1.SriovNetworkNodeState{},
1✔
167
                drainer: &drain.Helper{
1✔
168
                        Client:              kubeClient,
1✔
169
                        Force:               true,
1✔
170
                        IgnoreAllDaemonSets: true,
1✔
171
                        DeleteEmptyDirData:  true,
1✔
172
                        GracePeriodSeconds:  -1,
1✔
173
                        Timeout:             90 * time.Second,
1✔
174
                        OnPodDeletedOrEvicted: func(pod *corev1.Pod, usingEviction bool) {
1✔
175
                                verbStr := "Deleted"
×
176
                                if usingEviction {
×
177
                                        verbStr = "Evicted"
×
178
                                }
×
179
                                glog.Info(fmt.Sprintf("%s pod from Node %s/%s", verbStr, pod.Namespace, pod.Name))
×
180
                        },
181
                        Out:    writer{glog.Info},
182
                        ErrOut: writer{glog.Error},
183
                        Ctx:    context.Background(),
184
                },
185
                workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.NewMaxOfRateLimiter(
186
                        &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(updateDelay), 1)},
187
                        workqueue.NewItemExponentialFailureRateLimiter(1*time.Second, maxUpdateBackoff)), "SriovNetworkNodeState"),
188
        }
189
}
190

191
func (dn *Daemon) tryCreateUdevRuleWrapper() error {
1✔
192
        ns, nodeStateErr := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(
1✔
193
                context.Background(),
1✔
194
                dn.name,
1✔
195
                metav1.GetOptions{},
1✔
196
        )
1✔
197
        if nodeStateErr != nil {
2✔
198
                glog.Warningf("Could not fetch node state %s: %v, skip updating switchdev udev rules", dn.name, nodeStateErr)
1✔
199
        } else {
2✔
200
                err := tryCreateSwitchdevUdevRule(ns)
1✔
201
                if err != nil {
2✔
202
                        glog.Warningf("Failed to create switchdev udev rules: %v", err)
1✔
203
                }
1✔
204
        }
205

206
        // update udev rule only if we are on a BM environment
207
        // for virtual environments we don't disable the vfs as they may be used by the platform/host
208
        if dn.platform != utils.VirtualOpenStack {
2✔
209
                err := tryCreateNMUdevRule()
1✔
210
                if err != nil {
1✔
211
                        return err
×
212
                }
×
213
        }
214

215
        return nil
1✔
216
}
217

218
// Run the config daemon
219
func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error {
1✔
220
        if utils.ClusterType == utils.ClusterTypeOpenshift {
1✔
221
                glog.V(0).Infof("Run(): start daemon. openshiftFlavor: %s", dn.openshiftContext.OpenshiftFlavor)
×
222
        } else {
1✔
223
                glog.V(0).Infof("Run(): start daemon.")
1✔
224
        }
1✔
225

226
        if dn.useSystemdService {
1✔
227
                glog.V(0).Info("Run(): daemon running in systemd mode")
×
228
        }
×
229
        // Only watch own SriovNetworkNodeState CR
230
        defer utilruntime.HandleCrash()
1✔
231
        defer dn.workqueue.ShutDown()
1✔
232

1✔
233
        if !dn.useSystemdService {
2✔
234
                hostManager := host.NewHostManager(dn.useSystemdService)
1✔
235
                hostManager.TryEnableRdma()
1✔
236
                hostManager.TryEnableTun()
1✔
237
                hostManager.TryEnableVhostNet()
1✔
238
                err := systemd.CleanSriovFilesFromHost()
1✔
239
                if err != nil {
1✔
240
                        glog.Warningf("failed to remove all the systemd sriov files error: %v", err)
×
241
                }
×
242
        }
243

244
        if err := dn.tryCreateUdevRuleWrapper(); err != nil {
1✔
245
                return err
×
246
        }
×
247

248
        var timeout int64 = 5
1✔
249
        dn.mu = &sync.Mutex{}
1✔
250
        informerFactory := sninformer.NewFilteredSharedInformerFactory(dn.client,
1✔
251
                time.Second*15,
1✔
252
                namespace,
1✔
253
                func(lo *metav1.ListOptions) {
2✔
254
                        lo.FieldSelector = "metadata.name=" + dn.name
1✔
255
                        lo.TimeoutSeconds = &timeout
1✔
256
                },
1✔
257
        )
258

259
        informer := informerFactory.Sriovnetwork().V1().SriovNetworkNodeStates().Informer()
1✔
260
        informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
1✔
261
                AddFunc: dn.enqueueNodeState,
1✔
262
                UpdateFunc: func(old, new interface{}) {
1✔
263
                        dn.enqueueNodeState(new)
×
264
                },
×
265
        })
266

267
        cfgInformerFactory := sninformer.NewFilteredSharedInformerFactory(dn.client,
1✔
268
                time.Second*30,
1✔
269
                namespace,
1✔
270
                func(lo *metav1.ListOptions) {
2✔
271
                        lo.FieldSelector = "metadata.name=" + "default"
1✔
272
                },
1✔
273
        )
274

275
        cfgInformer := cfgInformerFactory.Sriovnetwork().V1().SriovOperatorConfigs().Informer()
1✔
276
        cfgInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
1✔
277
                AddFunc:    dn.operatorConfigAddHandler,
1✔
278
                UpdateFunc: dn.operatorConfigChangeHandler,
1✔
279
        })
1✔
280

1✔
281
        rand.Seed(time.Now().UnixNano())
1✔
282
        nodeInformerFactory := informers.NewSharedInformerFactory(dn.kubeClient,
1✔
283
                time.Second*15,
1✔
284
        )
1✔
285
        dn.nodeLister = nodeInformerFactory.Core().V1().Nodes().Lister()
1✔
286
        nodeInformer := nodeInformerFactory.Core().V1().Nodes().Informer()
1✔
287
        nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
1✔
288
                AddFunc:    dn.nodeAddHandler,
1✔
289
                UpdateFunc: dn.nodeUpdateHandler,
1✔
290
        })
1✔
291
        go cfgInformer.Run(dn.stopCh)
1✔
292
        go nodeInformer.Run(dn.stopCh)
1✔
293
        time.Sleep(5 * time.Second)
1✔
294
        go informer.Run(dn.stopCh)
1✔
295
        if ok := cache.WaitForCacheSync(stopCh, cfgInformer.HasSynced, nodeInformer.HasSynced, informer.HasSynced); !ok {
1✔
296
                return fmt.Errorf("failed to wait for caches to sync")
×
297
        }
×
298

299
        glog.Info("Starting workers")
1✔
300
        // Launch one worker to process
1✔
301
        go wait.Until(dn.runWorker, time.Second, stopCh)
1✔
302
        glog.Info("Started workers")
1✔
303

1✔
304
        for {
2✔
305
                select {
1✔
306
                case <-stopCh:
1✔
307
                        glog.V(0).Info("Run(): stop daemon")
1✔
308
                        return nil
1✔
309
                case err, more := <-exitCh:
×
310
                        glog.Warningf("Got an error: %v", err)
×
311
                        if more {
×
312
                                dn.refreshCh <- Message{
×
313
                                        syncStatus:    syncStatusFailed,
×
314
                                        lastSyncError: err.Error(),
×
315
                                }
×
316
                        }
×
317
                        return err
×
318
                case <-time.After(30 * time.Second):
×
319
                        glog.V(2).Info("Run(): period refresh")
×
320
                        if err := dn.tryCreateUdevRuleWrapper(); err != nil {
×
321
                                glog.V(2).Info("Could not create udev rule: ", err)
×
322
                        }
×
323
                }
324
        }
325
}
326

327
func (dn *Daemon) runWorker() {
1✔
328
        for dn.processNextWorkItem() {
2✔
329
        }
1✔
330
}
331

332
func (dn *Daemon) enqueueNodeState(obj interface{}) {
1✔
333
        var ns *sriovnetworkv1.SriovNetworkNodeState
1✔
334
        var ok bool
1✔
335
        if ns, ok = obj.(*sriovnetworkv1.SriovNetworkNodeState); !ok {
1✔
336
                utilruntime.HandleError(fmt.Errorf("expected SriovNetworkNodeState but got %#v", obj))
×
337
                return
×
338
        }
×
339
        key := ns.GetGeneration()
1✔
340
        dn.workqueue.Add(key)
1✔
341
}
342

343
func (dn *Daemon) processNextWorkItem() bool {
1✔
344
        glog.V(2).Infof("worker queue size: %d", dn.workqueue.Len())
1✔
345
        obj, shutdown := dn.workqueue.Get()
1✔
346
        if shutdown {
2✔
347
                return false
1✔
348
        }
1✔
349

350
        glog.V(2).Infof("get item: %d", obj.(int64))
1✔
351

1✔
352
        // We wrap this block in a func so we can defer c.workqueue.Done.
1✔
353
        err := func(obj interface{}) error {
2✔
354
                // We call Done here so the workqueue knows we have finished
1✔
355
                // processing this item.
1✔
356
                defer dn.workqueue.Done(obj)
1✔
357
                var key int64
1✔
358
                var ok bool
1✔
359
                if key, ok = obj.(int64); !ok {
1✔
360
                        // As the item in the workqueue is actually invalid, we call
×
361
                        // Forget here.
×
362
                        dn.workqueue.Forget(obj)
×
363
                        utilruntime.HandleError(fmt.Errorf("expected workItem in workqueue but got %#v", obj))
×
364
                        return nil
×
365
                }
×
366

367
                err := dn.nodeStateSyncHandler()
1✔
368
                if err != nil {
1✔
369
                        // Ereport error message, and put the item back to work queue for retry.
×
370
                        dn.refreshCh <- Message{
×
371
                                syncStatus:    syncStatusFailed,
×
372
                                lastSyncError: err.Error(),
×
373
                        }
×
374
                        <-dn.syncCh
×
375
                        dn.workqueue.AddRateLimited(key)
×
376
                        return fmt.Errorf("error syncing: %s, requeuing", err.Error())
×
377
                }
×
378
                // Finally, if no error occurs we Forget this item so it does not
379
                // get queued again until another change happens.
380
                dn.workqueue.Forget(obj)
1✔
381
                glog.Infof("Successfully synced")
1✔
382
                return nil
1✔
383
        }(obj)
384

385
        if err != nil {
1✔
386
                utilruntime.HandleError(err)
×
387
        }
×
388

389
        return true
1✔
390
}
391

392
func (dn *Daemon) nodeAddHandler(obj interface{}) {
1✔
393
        dn.nodeUpdateHandler(nil, obj)
1✔
394
}
1✔
395

396
func (dn *Daemon) nodeUpdateHandler(old, new interface{}) {
1✔
397
        node, err := dn.nodeLister.Get(dn.name)
1✔
398
        if errors.IsNotFound(err) {
1✔
399
                glog.V(2).Infof("nodeUpdateHandler(): node %v has been deleted", dn.name)
×
400
                return
×
401
        }
×
402
        dn.node = node.DeepCopy()
1✔
403
        nodes, err := dn.nodeLister.List(labels.Everything())
1✔
404
        if err != nil {
1✔
405
                return
×
406
        }
×
407
        for _, node := range nodes {
2✔
408
                if node.GetName() != dn.name && (node.Annotations[annoKey] == annoDraining || node.Annotations[annoKey] == annoMcpPaused) {
1✔
409
                        dn.drainable = false
×
410
                        return
×
411
                }
×
412
        }
413
        dn.drainable = true
1✔
414
}
415

416
func (dn *Daemon) operatorConfigAddHandler(obj interface{}) {
×
417
        dn.operatorConfigChangeHandler(&sriovnetworkv1.SriovOperatorConfig{}, obj)
×
418
}
×
419

420
func (dn *Daemon) operatorConfigChangeHandler(old, new interface{}) {
×
421
        newCfg := new.(*sriovnetworkv1.SriovOperatorConfig)
×
422
        var level = glog.Level(newCfg.Spec.LogLevel)
×
423
        if level != flag.Lookup("v").Value.(flag.Getter).Get() {
×
424
                glog.Infof("Set log verbose level to: %d", level)
×
425
                flag.Set("v", level.String())
×
426
        }
×
427
        newDisableDrain := newCfg.Spec.DisableDrain
×
428
        if dn.disableDrain != newDisableDrain {
×
429
                dn.disableDrain = newDisableDrain
×
430
                glog.Infof("Set Disable Drain to: %t", dn.disableDrain)
×
431
        }
×
432
}
433

434
func (dn *Daemon) nodeStateSyncHandler() error {
1✔
435
        var err error
1✔
436
        // Get the latest NodeState
1✔
437
        var latestState *sriovnetworkv1.SriovNetworkNodeState
1✔
438
        var sriovResult *systemd.SriovResult
1✔
439
        latestState, err = dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), dn.name, metav1.GetOptions{})
1✔
440
        if err != nil {
1✔
441
                glog.Warningf("nodeStateSyncHandler(): Failed to fetch node state %s: %v", dn.name, err)
×
442
                return err
×
443
        }
×
444
        latest := latestState.GetGeneration()
1✔
445
        glog.V(0).Infof("nodeStateSyncHandler(): new generation is %d", latest)
1✔
446

1✔
447
        reqReboot := false
1✔
448
        reqDrain := false
1✔
449

1✔
450
        if utils.ClusterType == utils.ClusterTypeOpenshift && !dn.openshiftContext.IsHypershift() {
1✔
451
                if err = dn.getNodeMachinePool(); err != nil {
×
452
                        return err
×
453
                }
×
454
        }
455

456
        if dn.nodeState.GetGeneration() == latest {
1✔
457
                glog.V(0).Infof("nodeStateSyncHandler(): Interface not changed")
×
458
                if latestState.Status.LastSyncError != "" ||
×
459
                        latestState.Status.SyncStatus != syncStatusSucceeded {
×
460
                        dn.refreshCh <- Message{
×
461
                                syncStatus:    syncStatusSucceeded,
×
462
                                lastSyncError: "",
×
463
                        }
×
464
                        // wait for writer to refresh the status
×
465
                        <-dn.syncCh
×
466
                }
×
467

468
                return nil
×
469
        }
470

471
        if latestState.GetGeneration() == 1 && len(latestState.Spec.Interfaces) == 0 {
1✔
472
                glog.V(0).Infof("nodeStateSyncHandler(): Name: %s, Interface policy spec not yet set by controller", latestState.Name)
×
473
                if latestState.Status.SyncStatus != "Succeeded" {
×
474
                        dn.refreshCh <- Message{
×
475
                                syncStatus:    "Succeeded",
×
476
                                lastSyncError: "",
×
477
                        }
×
478
                        // wait for writer to refresh status
×
479
                        <-dn.syncCh
×
480
                }
×
481
                return nil
×
482
        }
483

484
        if dn.useSystemdService {
1✔
485
                serviceExist, err := dn.serviceManager.IsServiceExist(systemd.SriovServicePath)
×
486
                if err != nil {
×
487
                        glog.Errorf("nodeStateSyncHandler(): failed to check if sriov-config service exist on host: %v", err)
×
488
                        return err
×
489
                }
×
490

491
                // if the service doesn't exist we should continue to let the k8s plugin to create the service files
492
                // this is only for k8s base environments, for openshift the sriov-operator creates a machine config to will apply
493
                // the system service and reboot the node the config-daemon doesn't need to do anything.
494
                if !serviceExist {
×
495
                        sriovResult = &systemd.SriovResult{SyncStatus: syncStatusFailed, LastSyncError: "sriov-config systemd service doesn't exist on node"}
×
496
                } else {
×
497
                        sriovResult, err = systemd.ReadSriovResult()
×
498
                        if err != nil {
×
499
                                glog.Errorf("nodeStateSyncHandler(): failed to load sriov result file from host: %v", err)
×
500
                                return err
×
501
                        }
×
502

503
                        if dn.nodeState.GetGeneration() == latest {
×
504
                                if sriovResult.LastSyncError != "" || sriovResult.SyncStatus != syncStatusSucceeded {
×
505
                                        glog.Infof("nodeStateSyncHandler(): sync failed systemd service error: %s", sriovResult.LastSyncError)
×
506
                                        dn.nodeState = latestState.DeepCopy()
×
507

×
508
                                        // add the error but don't requeue
×
509
                                        dn.refreshCh <- Message{
×
510
                                                syncStatus:    syncStatusFailed,
×
511
                                                lastSyncError: sriovResult.LastSyncError,
×
512
                                        }
×
513
                                        <-dn.syncCh
×
514
                                        return nil
×
515
                                }
×
516
                        }
517
                }
518
        }
519

520
        dn.refreshCh <- Message{
1✔
521
                syncStatus:    "InProgress",
1✔
522
                lastSyncError: "",
1✔
523
        }
1✔
524

1✔
525
        // load plugins if it has not loaded
1✔
526
        if len(dn.enabledPlugins) == 0 {
1✔
527
                dn.enabledPlugins, err = enablePlugins(dn.platform, dn.useSystemdService, latestState)
×
528
                if err != nil {
×
529
                        glog.Errorf("nodeStateSyncHandler(): failed to enable vendor plugins error: %v", err)
×
530
                        return err
×
531
                }
×
532
        }
533

534
        // check if any of the plugins required to drain or reboot the node
535
        for k, p := range dn.enabledPlugins {
2✔
536
                d, r := false, false
1✔
537
                if dn.nodeState.GetName() == "" {
2✔
538
                        glog.V(0).Infof("nodeStateSyncHandler(): calling OnNodeStateChange for a new node state")
1✔
539
                } else {
1✔
540
                        glog.V(0).Infof("nodeStateSyncHandler(): calling OnNodeStateChange for an updated node state")
×
541
                }
×
542
                d, r, err = p.OnNodeStateChange(latestState)
1✔
543
                if err != nil {
1✔
544
                        glog.Errorf("nodeStateSyncHandler(): plugin %s error: %v", k, err)
×
545
                        return err
×
546
                }
×
547
                glog.V(0).Infof("nodeStateSyncHandler(): plugin %s: reqDrain %v, reqReboot %v", k, d, r)
1✔
548
                reqDrain = reqDrain || d
1✔
549
                reqReboot = reqReboot || r
1✔
550
        }
551

552
        // When running using systemd check if the applied configuration is the latest one
553
        // or there is a new config we need to apply
554
        // When using systemd configuration we write the file
555
        if dn.useSystemdService {
1✔
556
                r, err := systemd.WriteConfFile(latestState, dn.devMode, dn.platform)
×
557
                if err != nil {
×
558
                        glog.Errorf("nodeStateSyncHandler(): failed to write configuration file for systemd mode: %v", err)
×
559
                        return err
×
560
                }
×
561
                reqDrain = reqDrain || r
×
562
                reqReboot = reqReboot || r
×
563
                glog.V(0).Infof("nodeStateSyncHandler(): systemd mode reqDrain %v, reqReboot %v disableDrain %v", r, r, dn.disableDrain)
×
564

×
565
                err = systemd.WriteSriovSupportedNics()
×
566
                if err != nil {
×
567
                        glog.Errorf("nodeStateSyncHandler(): failed to write supported nic ids file for systemd mode: %v", err)
×
568
                        return err
×
569
                }
×
570
        }
571
        glog.V(0).Infof("nodeStateSyncHandler(): aggregated daemon reqDrain %v, reqReboot %v disableDrain %v", reqDrain, reqReboot, dn.disableDrain)
1✔
572

1✔
573
        for k, p := range dn.enabledPlugins {
2✔
574
                // Skip both the general and virtual plugin apply them last
1✔
575
                if k != GenericPluginName && k != VirtualPluginName {
1✔
576
                        err := p.Apply()
×
577
                        if err != nil {
×
578
                                glog.Errorf("nodeStateSyncHandler(): plugin %s fail to apply: %v", k, err)
×
579
                                return err
×
580
                        }
×
581
                }
582
        }
583

584
        if reqDrain {
1✔
585
                if !dn.isNodeDraining() {
×
586
                        if !dn.disableDrain {
×
587
                                ctx, cancel := context.WithCancel(context.TODO())
×
588
                                defer cancel()
×
589

×
590
                                glog.Infof("nodeStateSyncHandler(): get drain lock for sriov daemon")
×
591
                                done := make(chan bool)
×
592
                                go dn.getDrainLock(ctx, done)
×
593
                                <-done
×
594
                        }
×
595

596
                        if utils.ClusterType == utils.ClusterTypeOpenshift && !dn.openshiftContext.IsHypershift() {
×
597
                                glog.Infof("nodeStateSyncHandler(): pause MCP")
×
598
                                if err := dn.pauseMCP(); err != nil {
×
599
                                        return err
×
600
                                }
×
601
                        }
602
                }
603

604
                glog.Info("nodeStateSyncHandler(): drain node")
×
605
                if err := dn.drainNode(); err != nil {
×
606
                        return err
×
607
                }
×
608
        }
609

610
        if !reqReboot && !dn.useSystemdService {
2✔
611
                // For BareMetal machines apply the generic plugin
1✔
612
                selectedPlugin, ok := dn.enabledPlugins[GenericPluginName]
1✔
613
                if ok {
2✔
614
                        // Apply generic_plugin last
1✔
615
                        err = selectedPlugin.Apply()
1✔
616
                        if err != nil {
1✔
617
                                glog.Errorf("nodeStateSyncHandler(): generic_plugin fail to apply: %v", err)
×
618
                                return err
×
619
                        }
×
620
                }
621

622
                // For Virtual machines apply the virtual plugin
623
                selectedPlugin, ok = dn.enabledPlugins[VirtualPluginName]
1✔
624
                if ok {
1✔
625
                        // Apply virtual_plugin last
×
626
                        err = selectedPlugin.Apply()
×
627
                        if err != nil {
×
628
                                glog.Errorf("nodeStateSyncHandler(): generic_plugin fail to apply: %v", err)
×
629
                                return err
×
630
                        }
×
631
                }
632
        }
633

634
        if reqReboot {
1✔
635
                glog.Info("nodeStateSyncHandler(): reboot node")
×
636
                rebootNode()
×
637
                return nil
×
638
        }
×
639

640
        // restart device plugin pod
641
        glog.Info("nodeStateSyncHandler(): restart device plugin pod")
1✔
642
        if err := dn.restartDevicePluginPod(); err != nil {
1✔
643
                glog.Errorf("nodeStateSyncHandler(): fail to restart device plugin pod: %v", err)
×
644
                return err
×
645
        }
×
646
        if dn.isNodeDraining() {
1✔
647
                if err := dn.completeDrain(); err != nil {
×
648
                        glog.Errorf("nodeStateSyncHandler(): failed to complete draining: %v", err)
×
649
                        return err
×
650
                }
×
651
        } else {
1✔
652
                if !dn.nodeHasAnnotation(annoKey, annoIdle) {
2✔
653
                        if err := dn.annotateNode(dn.name, annoIdle); err != nil {
1✔
654
                                glog.Errorf("nodeStateSyncHandler(): failed to annotate node: %v", err)
×
655
                                return err
×
656
                        }
×
657
                }
658
        }
659
        glog.Info("nodeStateSyncHandler(): sync succeeded")
1✔
660
        dn.nodeState = latestState.DeepCopy()
1✔
661
        if dn.useSystemdService {
1✔
662
                dn.refreshCh <- Message{
×
663
                        syncStatus:    sriovResult.SyncStatus,
×
664
                        lastSyncError: sriovResult.LastSyncError,
×
665
                }
×
666
        } else {
1✔
667
                dn.refreshCh <- Message{
1✔
668
                        syncStatus:    syncStatusSucceeded,
1✔
669
                        lastSyncError: "",
1✔
670
                }
1✔
671
        }
1✔
672
        // wait for writer to refresh the status
673
        <-dn.syncCh
1✔
674
        return nil
1✔
675
}
676

677
func (dn *Daemon) nodeHasAnnotation(annoKey string, value string) bool {
1✔
678
        // Check if node already contains annotation
1✔
679
        if anno, ok := dn.node.Annotations[annoKey]; ok && (anno == value) {
1✔
680
                return true
×
681
        }
×
682
        return false
1✔
683
}
684

685
func (dn *Daemon) isNodeDraining() bool {
1✔
686
        if anno, ok := dn.node.Annotations[annoKey]; ok && (anno == annoDraining || anno == annoMcpPaused) {
1✔
687
                return true
×
688
        }
×
689
        return false
1✔
690
}
691

692
func (dn *Daemon) completeDrain() error {
×
693
        if !dn.disableDrain {
×
694
                if err := drain.RunCordonOrUncordon(dn.drainer, dn.node, false); err != nil {
×
695
                        return err
×
696
                }
×
697
        }
698

699
        if utils.ClusterType == utils.ClusterTypeOpenshift && !dn.openshiftContext.IsHypershift() {
×
700
                glog.Infof("completeDrain(): resume MCP %s", dn.mcpName)
×
701
                pausePatch := []byte("{\"spec\":{\"paused\":false}}")
×
702
                if _, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{}); err != nil {
×
703
                        glog.Errorf("completeDrain(): failed to resume MCP %s: %v", dn.mcpName, err)
×
704
                        return err
×
705
                }
×
706
        }
707

708
        if err := dn.annotateNode(dn.name, annoIdle); err != nil {
×
709
                glog.Errorf("completeDrain(): failed to annotate node: %v", err)
×
710
                return err
×
711
        }
×
712
        return nil
×
713
}
714

715
func (dn *Daemon) restartDevicePluginPod() error {
1✔
716
        dn.mu.Lock()
1✔
717
        defer dn.mu.Unlock()
1✔
718
        glog.V(2).Infof("restartDevicePluginPod(): try to restart device plugin pod")
1✔
719

1✔
720
        var podToDelete string
1✔
721
        pods, err := dn.kubeClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
1✔
722
                LabelSelector: "app=sriov-device-plugin",
1✔
723
                FieldSelector: "spec.nodeName=" + dn.name,
1✔
724
        })
1✔
725
        if err != nil {
1✔
726
                if errors.IsNotFound(err) {
×
727
                        glog.Info("restartDevicePluginPod(): device plugin pod exited")
×
728
                        return nil
×
729
                }
×
730
                glog.Warningf("restartDevicePluginPod(): Failed to list device plugin pod: %s, retrying", err)
×
731
                return err
×
732
        }
733

734
        if len(pods.Items) == 0 {
1✔
735
                glog.Info("restartDevicePluginPod(): device plugin pod exited")
×
736
                return nil
×
737
        }
×
738
        podToDelete = pods.Items[0].Name
1✔
739

1✔
740
        glog.V(2).Infof("restartDevicePluginPod(): Found device plugin pod %s, deleting it", podToDelete)
1✔
741
        err = dn.kubeClient.CoreV1().Pods(namespace).Delete(context.Background(), podToDelete, metav1.DeleteOptions{})
1✔
742
        if errors.IsNotFound(err) {
1✔
743
                glog.Info("restartDevicePluginPod(): pod to delete not found")
×
744
                return nil
×
745
        }
×
746
        if err != nil {
1✔
747
                glog.Errorf("restartDevicePluginPod(): Failed to delete device plugin pod: %s, retrying", err)
×
748
                return err
×
749
        }
×
750

751
        if err := wait.PollImmediateUntil(3*time.Second, func() (bool, error) {
2✔
752
                _, err := dn.kubeClient.CoreV1().Pods(namespace).Get(context.Background(), podToDelete, metav1.GetOptions{})
1✔
753
                if errors.IsNotFound(err) {
2✔
754
                        glog.Info("restartDevicePluginPod(): device plugin pod exited")
1✔
755
                        return true, nil
1✔
756
                }
1✔
757

758
                if err != nil {
×
759
                        glog.Warningf("restartDevicePluginPod(): Failed to check for device plugin exit: %s, retrying", err)
×
760
                } else {
×
761
                        glog.Infof("restartDevicePluginPod(): waiting for device plugin %s to exit", podToDelete)
×
762
                }
×
763
                return false, nil
×
764
        }, dn.stopCh); err != nil {
×
765
                glog.Errorf("restartDevicePluginPod(): failed to wait for checking pod deletion: %v", err)
×
766
                return err
×
767
        }
×
768

769
        return nil
1✔
770
}
771

772
func rebootNode() {
×
773
        glog.Infof("rebootNode(): trigger node reboot")
×
774
        exit, err := utils.Chroot("/host")
×
775
        if err != nil {
×
776
                glog.Errorf("rebootNode(): %v", err)
×
777
        }
×
778
        defer exit()
×
779
        // creates a new transient systemd unit to reboot the system.
×
780
        // We explictily try to stop kubelet.service first, before anything else; this
×
781
        // way we ensure the rest of system stays running, because kubelet may need
×
782
        // to do "graceful" shutdown by e.g. de-registering with a load balancer.
×
783
        // However note we use `;` instead of `&&` so we keep rebooting even
×
784
        // if kubelet failed to shutdown - that way the machine will still eventually reboot
×
785
        // as systemd will time out the stop invocation.
×
786
        cmd := exec.Command("systemd-run", "--unit", "sriov-network-config-daemon-reboot",
×
787
                "--description", "sriov-network-config-daemon reboot node", "/bin/sh", "-c", "systemctl stop kubelet.service; reboot")
×
788

×
789
        if err := cmd.Run(); err != nil {
×
790
                glog.Errorf("failed to reboot node: %v", err)
×
791
        }
×
792
}
793

794
func (dn *Daemon) annotateNode(node, value string) error {
1✔
795
        glog.Infof("annotateNode(): Annotate node %s with: %s", node, value)
1✔
796

1✔
797
        oldNode, err := dn.kubeClient.CoreV1().Nodes().Get(context.Background(), dn.name, metav1.GetOptions{})
1✔
798
        if err != nil {
1✔
799
                glog.Infof("annotateNode(): Failed to get node %s %v, retrying", node, err)
×
800
                return err
×
801
        }
×
802
        oldData, err := json.Marshal(oldNode)
1✔
803
        if err != nil {
1✔
804
                return err
×
805
        }
×
806

807
        newNode := oldNode.DeepCopy()
1✔
808
        if newNode.Annotations == nil {
2✔
809
                newNode.Annotations = map[string]string{}
1✔
810
        }
1✔
811
        if newNode.Annotations[annoKey] != value {
2✔
812
                newNode.Annotations[annoKey] = value
1✔
813
                newData, err := json.Marshal(newNode)
1✔
814
                if err != nil {
1✔
815
                        return err
×
816
                }
×
817
                patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, corev1.Node{})
1✔
818
                if err != nil {
1✔
819
                        return err
×
820
                }
×
821
                _, err = dn.kubeClient.CoreV1().Nodes().Patch(context.Background(),
1✔
822
                        dn.name,
1✔
823
                        types.StrategicMergePatchType,
1✔
824
                        patchBytes,
1✔
825
                        metav1.PatchOptions{})
1✔
826
                if err != nil {
1✔
827
                        glog.Infof("annotateNode(): Failed to patch node %s: %v", node, err)
×
828
                        return err
×
829
                }
×
830
        }
831
        return nil
1✔
832
}
833

834
func (dn *Daemon) getNodeMachinePool() error {
×
835
        desiredConfig, ok := dn.node.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey]
×
836
        if !ok {
×
837
                glog.Errorf("getNodeMachinePool(): Failed to find the the desiredConfig Annotation")
×
838
                return fmt.Errorf("getNodeMachinePool(): Failed to find the the desiredConfig Annotation")
×
839
        }
×
840
        mc, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), desiredConfig, metav1.GetOptions{})
×
841
        if err != nil {
×
842
                glog.Errorf("getNodeMachinePool(): Failed to get the desired Machine Config: %v", err)
×
843
                return err
×
844
        }
×
845
        for _, owner := range mc.OwnerReferences {
×
846
                if owner.Kind == "MachineConfigPool" {
×
847
                        dn.mcpName = owner.Name
×
848
                        return nil
×
849
                }
×
850
        }
851
        glog.Error("getNodeMachinePool(): Failed to find the MCP of the node")
×
852
        return fmt.Errorf("getNodeMachinePool(): Failed to find the MCP of the node")
×
853
}
854

855
func (dn *Daemon) getDrainLock(ctx context.Context, done chan bool) {
×
856
        var err error
×
857

×
858
        lock := &resourcelock.LeaseLock{
×
859
                LeaseMeta: metav1.ObjectMeta{
×
860
                        Name:      "config-daemon-draining-lock",
×
861
                        Namespace: namespace,
×
862
                },
×
863
                Client: dn.kubeClient.CoordinationV1(),
×
864
                LockConfig: resourcelock.ResourceLockConfig{
×
865
                        Identity: dn.name,
×
866
                },
×
867
        }
×
868

×
869
        // start the leader election
×
870
        leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{
×
871
                Lock:            lock,
×
872
                ReleaseOnCancel: true,
×
873
                LeaseDuration:   5 * time.Second,
×
874
                RenewDeadline:   3 * time.Second,
×
875
                RetryPeriod:     1 * time.Second,
×
876
                Callbacks: leaderelection.LeaderCallbacks{
×
877
                        OnStartedLeading: func(ctx context.Context) {
×
878
                                glog.V(2).Info("getDrainLock(): started leading")
×
879
                                for {
×
880
                                        time.Sleep(3 * time.Second)
×
881
                                        if dn.node.Annotations[annoKey] == annoMcpPaused {
×
882
                                                // The node in Draining_MCP_Paused state, no other node is draining. Skip drainable checking
×
883
                                                done <- true
×
884
                                                return
×
885
                                        }
×
886
                                        if dn.drainable {
×
887
                                                glog.V(2).Info("getDrainLock(): no other node is draining")
×
888
                                                err = dn.annotateNode(dn.name, annoDraining)
×
889
                                                if err != nil {
×
890
                                                        glog.Errorf("getDrainLock(): Failed to annotate node: %v", err)
×
891
                                                        continue
×
892
                                                }
893
                                                done <- true
×
894
                                                return
×
895
                                        }
896
                                        glog.V(2).Info("getDrainLock(): other node is draining, wait...")
×
897
                                }
898
                        },
899
                        OnStoppedLeading: func() {
×
900
                                glog.V(2).Info("getDrainLock(): stopped leading")
×
901
                        },
×
902
                },
903
        })
904
}
905

906
func (dn *Daemon) pauseMCP() error {
×
907
        glog.Info("pauseMCP(): pausing MCP")
×
908
        var err error
×
909

×
910
        mcpInformerFactory := mcfginformers.NewSharedInformerFactory(dn.openshiftContext.McClient,
×
911
                time.Second*30,
×
912
        )
×
913
        mcpInformer := mcpInformerFactory.Machineconfiguration().V1().MachineConfigPools().Informer()
×
914

×
915
        ctx, cancel := context.WithCancel(context.TODO())
×
916
        defer cancel()
×
917
        paused := dn.node.Annotations[annoKey] == annoMcpPaused
×
918

×
919
        mcpEventHandler := func(obj interface{}) {
×
920
                mcp := obj.(*mcfgv1.MachineConfigPool)
×
921
                if mcp.GetName() != dn.mcpName {
×
922
                        return
×
923
                }
×
924
                // Always get the latest object
925
                newMcp, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, dn.mcpName, metav1.GetOptions{})
×
926
                if err != nil {
×
927
                        glog.V(2).Infof("pauseMCP(): Failed to get MCP %s: %v", dn.mcpName, err)
×
928
                        return
×
929
                }
×
930
                if mcfgv1.IsMachineConfigPoolConditionFalse(newMcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) &&
×
931
                        mcfgv1.IsMachineConfigPoolConditionTrue(newMcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) &&
×
932
                        mcfgv1.IsMachineConfigPoolConditionFalse(newMcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) {
×
933
                        glog.V(2).Infof("pauseMCP(): MCP %s is ready", dn.mcpName)
×
934
                        if paused {
×
935
                                glog.V(2).Info("pauseMCP(): stop MCP informer")
×
936
                                cancel()
×
937
                                return
×
938
                        }
×
939
                        if newMcp.Spec.Paused {
×
940
                                glog.V(2).Infof("pauseMCP(): MCP %s was paused by other, wait...", dn.mcpName)
×
941
                                return
×
942
                        }
×
943
                        glog.Infof("pauseMCP(): pause MCP %s", dn.mcpName)
×
944
                        pausePatch := []byte("{\"spec\":{\"paused\":true}}")
×
945
                        _, err = dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{})
×
946
                        if err != nil {
×
947
                                glog.V(2).Infof("pauseMCP(): Failed to pause MCP %s: %v", dn.mcpName, err)
×
948
                                return
×
949
                        }
×
950
                        err = dn.annotateNode(dn.name, annoMcpPaused)
×
951
                        if err != nil {
×
952
                                glog.V(2).Infof("pauseMCP(): Failed to annotate node: %v", err)
×
953
                                return
×
954
                        }
×
955
                        paused = true
×
956
                        return
×
957
                }
958
                if paused {
×
959
                        glog.Infof("pauseMCP(): MCP is processing, resume MCP %s", dn.mcpName)
×
960
                        pausePatch := []byte("{\"spec\":{\"paused\":false}}")
×
961
                        _, err = dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{})
×
962
                        if err != nil {
×
963
                                glog.V(2).Infof("pauseMCP(): fail to resume MCP %s: %v", dn.mcpName, err)
×
964
                                return
×
965
                        }
×
966
                        err = dn.annotateNode(dn.name, annoDraining)
×
967
                        if err != nil {
×
968
                                glog.V(2).Infof("pauseMCP(): Failed to annotate node: %v", err)
×
969
                                return
×
970
                        }
×
971
                        paused = false
×
972
                }
973
                glog.Infof("pauseMCP():MCP %s is not ready: %v, wait...", newMcp.GetName(), newMcp.Status.Conditions)
×
974
        }
975

976
        mcpInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
×
977
                AddFunc: mcpEventHandler,
×
978
                UpdateFunc: func(old, new interface{}) {
×
979
                        mcpEventHandler(new)
×
980
                },
×
981
        })
982

983
        // The Draining_MCP_Paused state means the MCP work has been paused by the config daemon in previous round.
984
        // Only check MCP state if the node is not in Draining_MCP_Paused state
985
        if !paused {
×
986
                mcpInformerFactory.Start(ctx.Done())
×
987
                mcpInformerFactory.WaitForCacheSync(ctx.Done())
×
988
                <-ctx.Done()
×
989
        }
×
990

991
        return err
×
992
}
993

994
func (dn *Daemon) drainNode() error {
×
995
        if dn.disableDrain {
×
996
                glog.Info("drainNode(): disable drain is true skipping drain")
×
997
                return nil
×
998
        }
×
999

1000
        glog.Info("drainNode(): Update prepared")
×
1001
        var err error
×
1002

×
1003
        backoff := wait.Backoff{
×
1004
                Steps:    5,
×
1005
                Duration: 10 * time.Second,
×
1006
                Factor:   2,
×
1007
        }
×
1008
        var lastErr error
×
1009

×
1010
        glog.Info("drainNode(): Start draining")
×
1011
        if err = wait.ExponentialBackoff(backoff, func() (bool, error) {
×
1012
                err := drain.RunCordonOrUncordon(dn.drainer, dn.node, true)
×
1013
                if err != nil {
×
1014
                        lastErr = err
×
1015
                        glog.Infof("Cordon failed with: %v, retrying", err)
×
1016
                        return false, nil
×
1017
                }
×
1018
                err = drain.RunNodeDrain(dn.drainer, dn.name)
×
1019
                if err == nil {
×
1020
                        return true, nil
×
1021
                }
×
1022
                lastErr = err
×
1023
                glog.Infof("Draining failed with: %v, retrying", err)
×
1024
                return false, nil
×
1025
        }); err != nil {
×
1026
                if err == wait.ErrWaitTimeout {
×
1027
                        glog.Errorf("drainNode(): failed to drain node (%d tries): %v :%v", backoff.Steps, err, lastErr)
×
1028
                }
×
1029
                glog.Errorf("drainNode(): failed to drain node: %v", err)
×
1030
                return err
×
1031
        }
1032
        glog.Info("drainNode(): drain complete")
×
1033
        return nil
×
1034
}
1035

1036
func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) error {
1✔
1037
        glog.V(2).Infof("tryCreateSwitchdevUdevRule()")
1✔
1038
        var newContent string
1✔
1039
        filePath := path.Join(filesystemRoot, "/host/etc/udev/rules.d/20-switchdev.rules")
1✔
1040

1✔
1041
        for _, ifaceStatus := range nodeState.Status.Interfaces {
2✔
1042
                if ifaceStatus.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev {
1✔
1043
                        switchID, err := utils.GetPhysSwitchID(ifaceStatus.Name)
×
1044
                        if err != nil {
×
1045
                                return err
×
1046
                        }
×
1047
                        portName, err := utils.GetPhysPortName(ifaceStatus.Name)
×
1048
                        if err != nil {
×
1049
                                return err
×
1050
                        }
×
1051
                        newContent = newContent + fmt.Sprintf("SUBSYSTEM==\"net\", ACTION==\"add|move\", ATTRS{phys_switch_id}==\"%s\", ATTR{phys_port_name}==\"pf%svf*\", IMPORT{program}=\"/etc/udev/switchdev-vf-link-name.sh $attr{phys_port_name}\", NAME=\"%s_$env{NUMBER}\"\n", switchID, strings.TrimPrefix(portName, "p"), ifaceStatus.Name)
×
1052
                }
1053
        }
1054

1055
        oldContent, err := ioutil.ReadFile(filePath)
1✔
1056
        // if oldContent = newContent, don't do anything
1✔
1057
        if err == nil && newContent == string(oldContent) {
1✔
1058
                return nil
×
1059
        }
×
1060

1061
        glog.V(2).Infof("Old udev content '%v' and new content '%v' differ. Writing to file %v.",
1✔
1062
                strings.TrimSuffix(string(oldContent), "\n"),
1✔
1063
                strings.TrimSuffix(newContent, "\n"),
1✔
1064
                filePath)
1✔
1065

1✔
1066
        // if the file does not exist or if oldContent != newContent
1✔
1067
        // write to file and create it if it doesn't exist
1✔
1068
        err = ioutil.WriteFile(filePath, []byte(newContent), 0664)
1✔
1069
        if err != nil {
2✔
1070
                glog.Errorf("tryCreateSwitchdevUdevRule(): fail to write file: %v", err)
1✔
1071
                return err
1✔
1072
        }
1✔
1073

1074
        var stdout, stderr bytes.Buffer
×
1075
        cmd := exec.Command("/bin/bash", path.Join(filesystemRoot, udevScriptsPath))
×
1076
        cmd.Stdout = &stdout
×
1077
        cmd.Stderr = &stderr
×
1078
        if err := cmd.Run(); err != nil {
×
1079
                return err
×
1080
        }
×
1081
        glog.V(2).Infof("tryCreateSwitchdevUdevRule(): %v", cmd.Stdout)
×
1082

×
1083
        i, err := strconv.Atoi(strings.TrimSpace(stdout.String()))
×
1084
        if err == nil {
×
1085
                if i == 0 {
×
1086
                        glog.V(2).Infof("tryCreateSwitchdevUdevRule(): switchdev udev rules loaded")
×
1087
                } else {
×
1088
                        glog.V(2).Infof("tryCreateSwitchdevUdevRule(): switchdev udev rules not loaded")
×
1089
                }
×
1090
        }
1091
        return nil
×
1092
}
1093

1094
func tryCreateNMUdevRule() error {
1✔
1095
        glog.V(2).Infof("tryCreateNMUdevRule()")
1✔
1096
        dirPath := path.Join(filesystemRoot, "/host/etc/udev/rules.d")
1✔
1097
        filePath := path.Join(dirPath, "10-nm-unmanaged.rules")
1✔
1098

1✔
1099
        newContent := fmt.Sprintf("ACTION==\"add|change|move\", ATTRS{device}==\"%s\", ENV{NM_UNMANAGED}=\"1\"\n", strings.Join(sriovnetworkv1.GetSupportedVfIds(), "|"))
1✔
1100

1✔
1101
        // add NM udev rules for renaming VF rep
1✔
1102
        newContent = newContent + "SUBSYSTEM==\"net\", ACTION==\"add|move\", ATTRS{phys_switch_id}!=\"\", ATTR{phys_port_name}==\"pf*vf*\", ENV{NM_UNMANAGED}=\"1\"\n"
1✔
1103

1✔
1104
        oldContent, err := ioutil.ReadFile(filePath)
1✔
1105
        // if oldContent = newContent, don't do anything
1✔
1106
        if err == nil && newContent == string(oldContent) {
1✔
1107
                return nil
×
1108
        }
×
1109

1110
        glog.V(2).Infof("Old udev content '%v' and new content '%v' differ. Writing to file %v.",
1✔
1111
                strings.TrimSuffix(string(oldContent), "\n"),
1✔
1112
                strings.TrimSuffix(newContent, "\n"),
1✔
1113
                filePath)
1✔
1114

1✔
1115
        err = os.MkdirAll(dirPath, os.ModePerm)
1✔
1116
        if err != nil && !os.IsExist(err) {
1✔
1117
                glog.Errorf("tryCreateNMUdevRule(): failed to create dir %s: %v", dirPath, err)
×
1118
                return err
×
1119
        }
×
1120

1121
        // if the file does not exist or if oldContent != newContent
1122
        // write to file and create it if it doesn't exist
1123
        err = ioutil.WriteFile(filePath, []byte(newContent), 0666)
1✔
1124
        if err != nil {
1✔
1125
                glog.Errorf("tryCreateNMUdevRule(): fail to write file: %v", err)
×
1126
                return err
×
1127
        }
×
1128
        return nil
1✔
1129
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc