• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

k8snetworkplumbingwg / sriov-network-operator / 18002387940

25 Sep 2025 08:59AM UTC coverage: 62.008% (-0.03%) from 62.036%
18002387940

Pull #903

github

web-flow
Merge 1638ca7cd into c49a32c97
Pull Request #903: add validation in e2e for multiple resource injection

8691 of 14016 relevant lines covered (62.01%)

0.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.43
/controllers/drain_controller_helper.go
1
package controllers
2

3
import (
4
        "context"
5
        "fmt"
6

7
        "github.com/go-logr/logr"
8
        corev1 "k8s.io/api/core/v1"
9
        "k8s.io/apimachinery/pkg/api/errors"
10
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11
        "k8s.io/apimachinery/pkg/labels"
12
        ctrl "sigs.k8s.io/controller-runtime"
13
        "sigs.k8s.io/controller-runtime/pkg/client"
14
        "sigs.k8s.io/controller-runtime/pkg/reconcile"
15

16
        sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
17
        constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
18
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
19
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
20
)
21

22
func (dr *DrainReconcile) handleNodeIdleNodeStateDrainingOrCompleted(ctx context.Context,
23
        node *corev1.Node,
24
        nodeNetworkState *sriovnetworkv1.SriovNetworkNodeState) (ctrl.Result, error) {
1✔
25
        reqLogger := ctx.Value("logger").(logr.Logger).WithName("handleNodeIdleNodeStateDrainingOrCompleted")
1✔
26
        completed, err := dr.drainer.CompleteDrainNode(ctx, node)
1✔
27
        if err != nil {
1✔
28
                reqLogger.Error(err, "failed to complete drain on node")
×
29
                dr.recorder.Event(nodeNetworkState,
×
30
                        corev1.EventTypeWarning,
×
31
                        "DrainController",
×
32
                        "failed to drain node")
×
33
                return ctrl.Result{}, err
×
34
        }
×
35

36
        // if we didn't manage to complete the un drain of the node we retry
37
        if !completed {
1✔
38
                reqLogger.Info("complete drain was not completed re queueing the request")
×
39
                dr.recorder.Event(nodeNetworkState,
×
40
                        corev1.EventTypeWarning,
×
41
                        "DrainController",
×
42
                        "node complete drain was not completed")
×
43
                // TODO: make this time configurable
×
44
                return reconcile.Result{RequeueAfter: constants.DrainControllerRequeueTime}, nil
×
45
        }
×
46

47
        // move the node state back to idle
48
        err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainIdle, dr.Client)
1✔
49
        if err != nil {
1✔
50
                reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainIdle)
×
51
                return ctrl.Result{}, err
×
52
        }
×
53

54
        reqLogger.Info("completed the un drain for node")
1✔
55
        dr.recorder.Event(nodeNetworkState,
1✔
56
                corev1.EventTypeWarning,
1✔
57
                "DrainController",
1✔
58
                "node un drain completed")
1✔
59
        return ctrl.Result{}, nil
1✔
60
}
61

62
func (dr *DrainReconcile) handleNodeDrainOrReboot(ctx context.Context,
63
        node *corev1.Node,
64
        nodeNetworkState *sriovnetworkv1.SriovNetworkNodeState,
65
        nodeDrainAnnotation,
66
        nodeStateDrainAnnotationCurrent string) (ctrl.Result, error) {
1✔
67
        reqLogger := ctx.Value("logger").(logr.Logger).WithName("handleNodeDrainOrReboot")
1✔
68
        // nothing to do here we need to wait for the node to move back to idle
1✔
69
        if nodeStateDrainAnnotationCurrent == constants.DrainComplete {
2✔
70
                reqLogger.Info("node requested a drain and nodeState is on drain completed nothing todo")
1✔
71
                return ctrl.Result{}, nil
1✔
72
        }
1✔
73

74
        // we need to start the drain, but first we need to check that we can drain the node
75
        if nodeStateDrainAnnotationCurrent == constants.DrainIdle {
2✔
76
                result, err := dr.tryDrainNode(ctx, node)
1✔
77
                if err != nil {
1✔
78
                        reqLogger.Error(err, "failed to check if we can drain the node")
×
79
                        return ctrl.Result{}, err
×
80
                }
×
81

82
                // in case we need to wait because we just to the max number of draining nodes
83
                if result != nil {
2✔
84
                        return *result, nil
1✔
85
                }
1✔
86
        }
87

88
        // Check if we are on a single node, and we require a reboot/full-drain we just return
89
        fullNodeDrain := nodeDrainAnnotation == constants.RebootRequired
1✔
90
        singleNode := false
1✔
91
        if fullNodeDrain {
2✔
92
                nodeList := &corev1.NodeList{}
1✔
93
                err := dr.Client.List(ctx, nodeList)
1✔
94
                if err != nil {
1✔
95
                        reqLogger.Error(err, "failed to list nodes")
×
96
                        return ctrl.Result{}, err
×
97
                }
×
98
                if len(nodeList.Items) == 1 {
2✔
99
                        reqLogger.Info("drainNode(): FullNodeDrain requested and we are on Single node")
1✔
100
                        singleNode = true
1✔
101
                }
1✔
102
        }
103

104
        // call the drain function that will also call drain to other platform providers like openshift
105
        drained, err := dr.drainer.DrainNode(ctx, node, fullNodeDrain, singleNode)
1✔
106
        if err != nil {
2✔
107
                reqLogger.Error(err, "error trying to drain the node")
1✔
108
                dr.recorder.Event(nodeNetworkState,
1✔
109
                        corev1.EventTypeWarning,
1✔
110
                        "DrainController",
1✔
111
                        "failed to drain node")
1✔
112
                return reconcile.Result{}, err
1✔
113
        }
1✔
114

115
        // if we didn't manage to complete the drain of the node we retry
116
        if !drained {
1✔
117
                reqLogger.Info("the nodes was not drained re queueing the request")
×
118
                dr.recorder.Event(nodeNetworkState,
×
119
                        corev1.EventTypeWarning,
×
120
                        "DrainController",
×
121
                        "node drain operation was not completed")
×
122
                return reconcile.Result{RequeueAfter: constants.DrainControllerRequeueTime}, nil
×
123
        }
×
124

125
        // if we manage to drain we label the node state with drain completed and finish
126
        err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete, dr.Client)
1✔
127
        if err != nil {
2✔
128
                reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainComplete)
1✔
129
                return ctrl.Result{}, err
1✔
130
        }
1✔
131

132
        reqLogger.Info("node drained successfully")
1✔
133
        dr.recorder.Event(nodeNetworkState,
1✔
134
                corev1.EventTypeWarning,
1✔
135
                "DrainController",
1✔
136
                "node drain completed")
1✔
137
        return ctrl.Result{}, nil
1✔
138
}
139

140
func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (*reconcile.Result, error) {
1✔
141
        reqLogger := ctx.Value("logger").(logr.Logger).WithName("tryDrainNode")
1✔
142

1✔
143
        //critical section we need to check if we can start the draining
1✔
144
        dr.drainCheckMutex.Lock()
1✔
145
        defer dr.drainCheckMutex.Unlock()
1✔
146

1✔
147
        // find the relevant node pool
1✔
148
        nodePool, nodeList, err := dr.findNodePoolConfig(ctx, node)
1✔
149
        if err != nil {
1✔
150
                reqLogger.Error(err, "failed to find the pool for the requested node")
×
151
                return nil, err
×
152
        }
×
153

154
        // check how many nodes we can drain in parallel for the specific pool
155
        maxUnv, err := nodePool.MaxUnavailable(len(nodeList))
1✔
156
        if err != nil {
1✔
157
                reqLogger.Error(err, "failed to calculate max unavailable")
×
158
                return nil, err
×
159
        }
×
160

161
        current := 0
1✔
162
        snns := &sriovnetworkv1.SriovNetworkNodeState{}
1✔
163

1✔
164
        var currentSnns *sriovnetworkv1.SriovNetworkNodeState
1✔
165
        for _, nodeObj := range nodeList {
2✔
166
                err = dr.Get(ctx, client.ObjectKey{Name: nodeObj.GetName(), Namespace: vars.Namespace}, snns)
1✔
167
                if err != nil {
1✔
168
                        if errors.IsNotFound(err) {
×
169
                                reqLogger.V(2).Info("node doesn't have a sriovNetworkNodeState, skipping")
×
170
                                continue
×
171
                        }
172
                        return nil, err
×
173
                }
174

175
                if snns.GetName() == node.GetName() {
2✔
176
                        currentSnns = snns.DeepCopy()
1✔
177
                }
1✔
178

179
                if utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.Draining) ||
1✔
180
                        utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete) {
2✔
181
                        current++
1✔
182
                }
1✔
183
        }
184
        reqLogger.Info("Max node allowed to be draining at the same time", "MaxParallelNodeConfiguration", maxUnv)
1✔
185
        reqLogger.Info("Count of draining", "drainingNodes", current)
1✔
186

1✔
187
        // if maxUnv is zero this means we drain all the nodes in parallel without a limit
1✔
188
        if maxUnv == -1 {
2✔
189
                reqLogger.Info("draining all the nodes in parallel")
1✔
190
        } else if current >= maxUnv {
3✔
191
                // the node requested to be drained, but we are at the limit so we re-enqueue the request
1✔
192
                reqLogger.Info("MaxParallelNodeConfiguration limit reached for draining nodes re-enqueue the request")
1✔
193
                // TODO: make this time configurable
1✔
194
                return &reconcile.Result{RequeueAfter: constants.DrainControllerRequeueTime}, nil
1✔
195
        }
1✔
196

197
        if currentSnns == nil {
1✔
198
                return nil, fmt.Errorf("failed to find sriov network node state for requested node")
×
199
        }
×
200

201
        err = utils.AnnotateObject(ctx, currentSnns, constants.NodeStateDrainAnnotationCurrent, constants.Draining, dr.Client)
1✔
202
        if err != nil {
1✔
203
                reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.Draining)
×
204
                return nil, err
×
205
        }
×
206

207
        return nil, nil
1✔
208
}
209

210
func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) {
1✔
211
        logger := ctx.Value("logger").(logr.Logger).WithName("findNodePoolConfig")
1✔
212
        // get all the sriov network pool configs
1✔
213
        npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{}
1✔
214
        err := dr.List(ctx, npcl)
1✔
215
        if err != nil {
1✔
216
                logger.Error(err, "failed to list sriovNetworkPoolConfig")
×
217
                return nil, nil, err
×
218
        }
×
219

220
        selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{}
1✔
221
        nodesInPools := map[string]interface{}{}
1✔
222

1✔
223
        for _, npc := range npcl.Items {
2✔
224
                // we skip hw offload objects
1✔
225
                if npc.Spec.OvsHardwareOffloadConfig.Name != "" {
1✔
226
                        continue
×
227
                }
228

229
                if npc.Spec.NodeSelector == nil {
2✔
230
                        npc.Spec.NodeSelector = &metav1.LabelSelector{}
1✔
231
                }
1✔
232

233
                selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector)
1✔
234
                if err != nil {
1✔
235
                        logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector)
×
236
                        return nil, nil, err
×
237
                }
×
238

239
                if selector.Matches(labels.Set(node.Labels)) {
2✔
240
                        selectedNpcl = append(selectedNpcl, npc.DeepCopy())
1✔
241
                }
1✔
242

243
                nodeList := &corev1.NodeList{}
1✔
244
                err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
1✔
245
                if err != nil {
1✔
246
                        logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector",
×
247
                                "machineConfigPoolName", npc,
×
248
                                "nodeSelector", npc.Spec.NodeSelector)
×
249
                        return nil, nil, err
×
250
                }
×
251

252
                for _, nodeName := range nodeList.Items {
2✔
253
                        nodesInPools[nodeName.Name] = nil
1✔
254
                }
1✔
255
        }
256

257
        if len(selectedNpcl) > 1 {
1✔
258
                // don't allow the node to be part of multiple pools
×
259
                err = fmt.Errorf("node is part of more then one pool")
×
260
                logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl)
×
261
                return nil, nil, err
×
262
        } else if len(selectedNpcl) == 1 {
2✔
263
                // found one pool for our node
1✔
264
                logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0])
1✔
265
                selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector)
1✔
266
                if err != nil {
1✔
267
                        logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector)
×
268
                        return nil, nil, err
×
269
                }
×
270

271
                // list all the nodes that are also part of this pool and return them
272
                nodeList := &corev1.NodeList{}
1✔
273
                err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
1✔
274
                if err != nil {
1✔
275
                        logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector)
×
276
                        return nil, nil, err
×
277
                }
×
278

279
                return selectedNpcl[0], nodeList.Items, nil
1✔
280
        } else {
1✔
281
                // in this case we get all the nodes and remove the ones that already part of any pool
1✔
282
                logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultPoolConfig)
1✔
283
                nodeList := &corev1.NodeList{}
1✔
284
                err = dr.List(ctx, nodeList)
1✔
285
                if err != nil {
1✔
286
                        logger.Error(err, "failed to list all the nodes")
×
287
                        return nil, nil, err
×
288
                }
×
289

290
                defaultNodeLists := []corev1.Node{}
1✔
291
                for _, nodeObj := range nodeList.Items {
2✔
292
                        if _, exist := nodesInPools[nodeObj.Name]; !exist {
2✔
293
                                defaultNodeLists = append(defaultNodeLists, nodeObj)
1✔
294
                        }
1✔
295
                }
296
                return defaultPoolConfig, defaultNodeLists, nil
1✔
297
        }
298
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc