• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

k8snetworkplumbingwg / sriov-network-operator / 13856261115

14 Mar 2025 12:09PM UTC coverage: 48.251% (-0.6%) from 48.875%
13856261115

Pull #856

github

web-flow
Merge 2cb0a44e6 into 230f50baa
Pull Request #856: Reach MetaData service over IPv6

0 of 52 new or added lines in 1 file covered. (0.0%)

673 existing lines in 11 files now uncovered.

7350 of 15233 relevant lines covered (48.25%)

0.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.84
/controllers/drain_controller_helper.go
1
package controllers
2

3
import (
4
        "context"
5
        "fmt"
6

7
        "github.com/go-logr/logr"
8
        corev1 "k8s.io/api/core/v1"
9
        "k8s.io/apimachinery/pkg/api/errors"
10
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11
        "k8s.io/apimachinery/pkg/labels"
12
        ctrl "sigs.k8s.io/controller-runtime"
13
        "sigs.k8s.io/controller-runtime/pkg/client"
14
        "sigs.k8s.io/controller-runtime/pkg/log"
15
        "sigs.k8s.io/controller-runtime/pkg/reconcile"
16

17
        sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
18
        constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
19
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
20
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
21
)
22

23
func (dr *DrainReconcile) handleNodeIdleNodeStateDrainingOrCompleted(ctx context.Context,
24
        reqLogger *logr.Logger,
25
        node *corev1.Node,
26
        nodeNetworkState *sriovnetworkv1.SriovNetworkNodeState) (ctrl.Result, error) {
1✔
27
        completed, err := dr.drainer.CompleteDrainNode(ctx, node)
1✔
28
        if err != nil {
1✔
29
                reqLogger.Error(err, "failed to complete drain on node")
×
30
                dr.recorder.Event(nodeNetworkState,
×
31
                        corev1.EventTypeWarning,
×
32
                        "DrainController",
×
33
                        "failed to drain node")
×
34
                return ctrl.Result{}, err
×
35
        }
×
36

37
        // if we didn't manage to complete the un drain of the node we retry
38
        if !completed {
1✔
39
                reqLogger.Info("complete drain was not completed re queueing the request")
×
40
                dr.recorder.Event(nodeNetworkState,
×
41
                        corev1.EventTypeWarning,
×
42
                        "DrainController",
×
43
                        "node complete drain was not completed")
×
44
                // TODO: make this time configurable
×
45
                return reconcile.Result{RequeueAfter: constants.DrainControllerRequeueTime}, nil
×
46
        }
×
47

48
        // move the node state back to idle
49
        err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainIdle, dr.Client)
1✔
50
        if err != nil {
1✔
51
                reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainIdle)
×
52
                return ctrl.Result{}, err
×
53
        }
×
54

55
        reqLogger.Info("completed the un drain for node")
1✔
56
        dr.recorder.Event(nodeNetworkState,
1✔
57
                corev1.EventTypeWarning,
1✔
58
                "DrainController",
1✔
59
                "node un drain completed")
1✔
60
        return ctrl.Result{}, nil
1✔
61
}
62

63
func (dr *DrainReconcile) handleNodeDrainOrReboot(ctx context.Context,
64
        reqLogger *logr.Logger,
65
        node *corev1.Node,
66
        nodeNetworkState *sriovnetworkv1.SriovNetworkNodeState,
67
        nodeDrainAnnotation,
68
        nodeStateDrainAnnotationCurrent string) (ctrl.Result, error) {
1✔
69
        // nothing to do here we need to wait for the node to move back to idle
1✔
70
        if nodeStateDrainAnnotationCurrent == constants.DrainComplete {
2✔
71
                reqLogger.Info("node requested a drain and nodeState is on drain completed nothing todo")
1✔
72
                return ctrl.Result{}, nil
1✔
73
        }
1✔
74

75
        // we need to start the drain, but first we need to check that we can drain the node
76
        if nodeStateDrainAnnotationCurrent == constants.DrainIdle {
2✔
77
                result, err := dr.tryDrainNode(ctx, node)
1✔
78
                if err != nil {
1✔
79
                        reqLogger.Error(err, "failed to check if we can drain the node")
×
80
                        return ctrl.Result{}, err
×
81
                }
×
82

83
                // in case we need to wait because we just to the max number of draining nodes
84
                if result != nil {
2✔
85
                        return *result, nil
1✔
86
                }
1✔
87
        }
88

89
        // Check if we are on a single node, and we require a reboot/full-drain we just return
90
        fullNodeDrain := nodeDrainAnnotation == constants.RebootRequired
1✔
91
        singleNode := false
1✔
92
        if fullNodeDrain {
2✔
93
                nodeList := &corev1.NodeList{}
1✔
94
                err := dr.Client.List(ctx, nodeList)
1✔
95
                if err != nil {
1✔
96
                        reqLogger.Error(err, "failed to list nodes")
×
97
                        return ctrl.Result{}, err
×
98
                }
×
99
                if len(nodeList.Items) == 1 {
2✔
100
                        reqLogger.Info("drainNode(): FullNodeDrain requested and we are on Single node")
1✔
101
                        singleNode = true
1✔
102
                }
1✔
103
        }
104

105
        // call the drain function that will also call drain to other platform providers like openshift
106
        drained, err := dr.drainer.DrainNode(ctx, node, fullNodeDrain, singleNode)
1✔
107
        if err != nil {
1✔
108
                reqLogger.Error(err, "error trying to drain the node")
×
UNCOV
109
                dr.recorder.Event(nodeNetworkState,
×
UNCOV
110
                        corev1.EventTypeWarning,
×
UNCOV
111
                        "DrainController",
×
UNCOV
112
                        "failed to drain node")
×
UNCOV
113
                return reconcile.Result{}, err
×
UNCOV
114
        }
×
115

116
        // if we didn't manage to complete the drain of the node we retry
117
        if !drained {
1✔
UNCOV
118
                reqLogger.Info("the nodes was not drained re queueing the request")
×
UNCOV
119
                dr.recorder.Event(nodeNetworkState,
×
UNCOV
120
                        corev1.EventTypeWarning,
×
UNCOV
121
                        "DrainController",
×
UNCOV
122
                        "node drain operation was not completed")
×
UNCOV
123
                return reconcile.Result{RequeueAfter: constants.DrainControllerRequeueTime}, nil
×
UNCOV
124
        }
×
125

126
        // if we manage to drain we label the node state with drain completed and finish
127
        err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete, dr.Client)
1✔
128
        if err != nil {
2✔
129
                reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainComplete)
1✔
130
                return ctrl.Result{}, err
1✔
131
        }
1✔
132

133
        reqLogger.Info("node drained successfully")
1✔
134
        dr.recorder.Event(nodeNetworkState,
1✔
135
                corev1.EventTypeWarning,
1✔
136
                "DrainController",
1✔
137
                "node drain completed")
1✔
138
        return ctrl.Result{}, nil
1✔
139
}
140

141
func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (*reconcile.Result, error) {
1✔
142
        // configure logs
1✔
143
        reqLogger := log.FromContext(ctx)
1✔
144
        reqLogger.Info("checkForNodeDrain():")
1✔
145

1✔
146
        //critical section we need to check if we can start the draining
1✔
147
        dr.drainCheckMutex.Lock()
1✔
148
        defer dr.drainCheckMutex.Unlock()
1✔
149

1✔
150
        // find the relevant node pool
1✔
151
        nodePool, nodeList, err := dr.findNodePoolConfig(ctx, node)
1✔
152
        if err != nil {
1✔
UNCOV
153
                reqLogger.Error(err, "failed to find the pool for the requested node")
×
UNCOV
154
                return nil, err
×
155
        }
×
156

157
        // check how many nodes we can drain in parallel for the specific pool
158
        maxUnv, err := nodePool.MaxUnavailable(len(nodeList))
1✔
159
        if err != nil {
1✔
UNCOV
160
                reqLogger.Error(err, "failed to calculate max unavailable")
×
UNCOV
161
                return nil, err
×
UNCOV
162
        }
×
163

164
        current := 0
1✔
165
        snns := &sriovnetworkv1.SriovNetworkNodeState{}
1✔
166

1✔
167
        var currentSnns *sriovnetworkv1.SriovNetworkNodeState
1✔
168
        for _, nodeObj := range nodeList {
2✔
169
                err = dr.Get(ctx, client.ObjectKey{Name: nodeObj.GetName(), Namespace: vars.Namespace}, snns)
1✔
170
                if err != nil {
1✔
UNCOV
171
                        if errors.IsNotFound(err) {
×
UNCOV
172
                                reqLogger.V(2).Info("node doesn't have a sriovNetworkNodePolicy")
×
UNCOV
173
                                continue
×
174
                        }
UNCOV
175
                        return nil, err
×
176
                }
177

178
                if snns.GetName() == node.GetName() {
2✔
179
                        currentSnns = snns.DeepCopy()
1✔
180
                }
1✔
181

182
                if utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.Draining) ||
1✔
183
                        utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete) {
2✔
184
                        current++
1✔
185
                }
1✔
186
        }
187
        reqLogger.Info("Max node allowed to be draining at the same time", "MaxParallelNodeConfiguration", maxUnv)
1✔
188
        reqLogger.Info("Count of draining", "drainingNodes", current)
1✔
189

1✔
190
        // if maxUnv is zero this means we drain all the nodes in parallel without a limit
1✔
191
        if maxUnv == -1 {
2✔
192
                reqLogger.Info("draining all the nodes in parallel")
1✔
193
        } else if current >= maxUnv {
3✔
194
                // the node requested to be drained, but we are at the limit so we re-enqueue the request
1✔
195
                reqLogger.Info("MaxParallelNodeConfiguration limit reached for draining nodes re-enqueue the request")
1✔
196
                // TODO: make this time configurable
1✔
197
                return &reconcile.Result{RequeueAfter: constants.DrainControllerRequeueTime}, nil
1✔
198
        }
1✔
199

200
        if currentSnns == nil {
1✔
UNCOV
201
                return nil, fmt.Errorf("failed to find sriov network node state for requested node")
×
UNCOV
202
        }
×
203

204
        err = utils.AnnotateObject(ctx, currentSnns, constants.NodeStateDrainAnnotationCurrent, constants.Draining, dr.Client)
1✔
205
        if err != nil {
1✔
206
                reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.Draining)
×
UNCOV
207
                return nil, err
×
UNCOV
208
        }
×
209

210
        return nil, nil
1✔
211
}
212

213
func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) {
1✔
214
        logger := log.FromContext(ctx)
1✔
215
        logger.Info("findNodePoolConfig():")
1✔
216
        // get all the sriov network pool configs
1✔
217
        npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{}
1✔
218
        err := dr.List(ctx, npcl)
1✔
219
        if err != nil {
1✔
UNCOV
220
                logger.Error(err, "failed to list sriovNetworkPoolConfig")
×
UNCOV
221
                return nil, nil, err
×
UNCOV
222
        }
×
223

224
        selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{}
1✔
225
        nodesInPools := map[string]interface{}{}
1✔
226

1✔
227
        for _, npc := range npcl.Items {
2✔
228
                // we skip hw offload objects
1✔
229
                if npc.Spec.OvsHardwareOffloadConfig.Name != "" {
2✔
230
                        continue
1✔
231
                }
232

233
                if npc.Spec.NodeSelector == nil {
2✔
234
                        npc.Spec.NodeSelector = &metav1.LabelSelector{}
1✔
235
                }
1✔
236

237
                selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector)
1✔
238
                if err != nil {
1✔
UNCOV
239
                        logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector)
×
UNCOV
240
                        return nil, nil, err
×
UNCOV
241
                }
×
242

243
                if selector.Matches(labels.Set(node.Labels)) {
2✔
244
                        selectedNpcl = append(selectedNpcl, npc.DeepCopy())
1✔
245
                }
1✔
246

247
                nodeList := &corev1.NodeList{}
1✔
248
                err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
1✔
249
                if err != nil {
1✔
UNCOV
250
                        logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector",
×
UNCOV
251
                                "machineConfigPoolName", npc,
×
UNCOV
252
                                "nodeSelector", npc.Spec.NodeSelector)
×
UNCOV
253
                        return nil, nil, err
×
UNCOV
254
                }
×
255

256
                for _, nodeName := range nodeList.Items {
2✔
257
                        nodesInPools[nodeName.Name] = nil
1✔
258
                }
1✔
259
        }
260

261
        if len(selectedNpcl) > 1 {
1✔
UNCOV
262
                // don't allow the node to be part of multiple pools
×
263
                err = fmt.Errorf("node is part of more then one pool")
×
264
                logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl)
×
265
                return nil, nil, err
×
266
        } else if len(selectedNpcl) == 1 {
2✔
267
                // found one pool for our node
1✔
268
                logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0])
1✔
269
                selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector)
1✔
270
                if err != nil {
1✔
UNCOV
271
                        logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector)
×
UNCOV
272
                        return nil, nil, err
×
UNCOV
273
                }
×
274

275
                // list all the nodes that are also part of this pool and return them
276
                nodeList := &corev1.NodeList{}
1✔
277
                err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
1✔
278
                if err != nil {
1✔
UNCOV
279
                        logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector)
×
UNCOV
280
                        return nil, nil, err
×
UNCOV
281
                }
×
282

283
                return selectedNpcl[0], nodeList.Items, nil
1✔
284
        } else {
1✔
285
                // in this case we get all the nodes and remove the ones that already part of any pool
1✔
286
                logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultPoolConfig)
1✔
287
                nodeList := &corev1.NodeList{}
1✔
288
                err = dr.List(ctx, nodeList)
1✔
289
                if err != nil {
1✔
UNCOV
290
                        logger.Error(err, "failed to list all the nodes")
×
UNCOV
291
                        return nil, nil, err
×
UNCOV
292
                }
×
293

294
                defaultNodeLists := []corev1.Node{}
1✔
295
                for _, nodeObj := range nodeList.Items {
2✔
296
                        if _, exist := nodesInPools[nodeObj.Name]; !exist {
2✔
297
                                defaultNodeLists = append(defaultNodeLists, nodeObj)
1✔
298
                        }
1✔
299
                }
300
                return defaultPoolConfig, defaultNodeLists, nil
1✔
301
        }
302
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc