• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

k8snetworkplumbingwg / sriov-network-operator / 19227470377

10 Nov 2025 09:48AM UTC coverage: 62.151% (-0.2%) from 62.366%
19227470377

Pull #902

github

web-flow
Merge f9637c189 into 3d1a472a6
Pull Request #902: Create platform and orchestrator packages

319 of 659 new or added lines in 25 files covered. (48.41%)

41 existing lines in 9 files now uncovered.

8772 of 14114 relevant lines covered (62.15%)

0.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.19
/pkg/orchestrator/openshift/openshift.go
1
package openshift
2

3
import (
4
        "context"
5
        "fmt"
6
        "sync"
7
        "time"
8

9
        "github.com/go-logr/logr"
10
        corev1 "k8s.io/api/core/v1"
11
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
12
        "k8s.io/apimachinery/pkg/types"
13
        "sigs.k8s.io/controller-runtime/pkg/client"
14

15
        configv1 "github.com/openshift/api/config/v1"
16
        mcv1 "github.com/openshift/api/machineconfiguration/v1"
17
        mcoconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants"
18

19
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
20
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
21
        "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
22
)
23

24
const (
25
        // default Infrastructure resource name for Openshift
26
        infraResourceName = "cluster"
27
)
28

29
// OpenshiftOrchestrator implements the orchestrator.Interface for OpenShift clusters.
30
// It contains metadata and structs utilized to interact with OpenShift clusters,
31
// including MachineConfigPool management for safe node draining.
32
type OpenshiftOrchestrator struct {
33
        // openshiftFlavor holds metadata about the type of Openshift environment the operator is in.
34
        openshiftFlavor consts.ClusterFlavor
35

36
        // kubeClient is a generic client
37
        kubeClient client.Client
38

39
        // mcpPauseMutex holds the mutex to change machine config pause state
40
        mcpPauseMutex sync.Mutex
41
}
42

43
// New creates a new OpenshiftOrchestrator orchestrator instance.
44
// Detects whether the cluster is Hypershift or standard OpenShift by checking the Infrastructure resource.
45
// Returns a configured OpenshiftOrchestrator or an error if initialization fails.
46
func New() (*OpenshiftOrchestrator, error) {
1✔
47
        kubeClient, err := client.New(vars.Config, client.Options{Scheme: vars.Scheme})
1✔
48
        if err != nil {
1✔
49
                return nil, err
×
50
        }
×
51

52
        openshiftFlavor := consts.ClusterFlavorDefault
1✔
53
        isHypershift, err := isExternalControlPlaneCluster(kubeClient)
1✔
54
        if err != nil {
1✔
55
                return nil, err
×
56
        }
×
57

58
        if isHypershift {
2✔
59
                openshiftFlavor = consts.ClusterFlavorHypershift
1✔
60
        }
1✔
61

62
        return &OpenshiftOrchestrator{openshiftFlavor, kubeClient, sync.Mutex{}}, nil
1✔
63
}
64

65
// Name returns the name of the OpenShift orchestrator.
NEW
66
func (c *OpenshiftOrchestrator) Name() string {
×
NEW
67
        return "OpenShift"
×
68
}
×
69

70
// ClusterType returns the cluster type for OpenShift.
71
func (c *OpenshiftOrchestrator) ClusterType() consts.ClusterType {
1✔
72
        return consts.ClusterTypeOpenshift
1✔
73
}
1✔
74

75
// Flavor returns the OpenShift cluster flavor (standard or Hypershift).
76
func (c *OpenshiftOrchestrator) Flavor() consts.ClusterFlavor {
1✔
77
        return c.openshiftFlavor
1✔
78
}
1✔
79

80
// BeforeDrainNode pauses the node's MachineConfigPool before draining to prevent automatic reboots.
81
// For Hypershift clusters, returns true immediately as there is no MachineConfigOperator.
82
// Returns true if the MCP was successfully paused and drain can proceed, false if more time is needed.
83
func (c *OpenshiftOrchestrator) BeforeDrainNode(ctx context.Context, node *corev1.Node) (bool, error) {
1✔
84
        // if the operator is running on hypershift variation of openshift there is no machine config operator
1✔
85
        // just return true here
1✔
86
        if c.Flavor() == consts.ClusterFlavorHypershift {
2✔
87
                return true, nil
1✔
88
        }
1✔
89

90
        // get the machine pool name for the requested node
91
        mcpName, err := c.GetNodeMachinePoolName(ctx, node)
1✔
92
        if err != nil {
1✔
93
                return false, err
×
94
        }
×
95

96
        // lock critical section where we check if the machine config pool is already paused or not
97
        // then we act base on that
98
        c.mcpPauseMutex.Lock()
1✔
99
        defer c.mcpPauseMutex.Unlock()
1✔
100

1✔
101
        // get the machine config pool that handle the specific node we want to drain
1✔
102
        mcp := &mcv1.MachineConfigPool{}
1✔
103
        err = c.kubeClient.Get(ctx, client.ObjectKey{Name: mcpName}, mcp)
1✔
104
        if err != nil {
1✔
105
                return false, err
×
106
        }
×
107

108
        // check if the machine config pool was already paused by the operator
109
        if utils.ObjectHasAnnotation(mcp,
1✔
110
                consts.MachineConfigPoolPausedAnnotation,
1✔
111
                consts.MachineConfigPoolPausedAnnotationPaused) {
2✔
112
                // check if the machine config pool is really paused
1✔
113
                // if not then we need to check if the machine config operator is doing something for this pool
1✔
114
                if !mcp.Spec.Paused {
2✔
115
                        // if the machine config pool needs to update then we return false
1✔
116
                        // if they are equal we can pause the pool
1✔
117
                        if mcp.Spec.Configuration.Name == "" || mcp.Status.Configuration.Name == "" ||
1✔
118
                                mcp.Spec.Configuration.Name != mcp.Status.Configuration.Name {
2✔
119
                                return false, err
1✔
120
                        } else {
2✔
121
                                err = c.ChangeMachineConfigPoolPause(ctx, mcp, true)
1✔
122
                                if err != nil {
1✔
123
                                        return false, err
×
124
                                }
×
125
                        }
126
                }
127
                return true, nil
1✔
128
        }
129

130
        // check if the machine config operator is doing something
131
        // to be sure we can just check that the desired and requested configuration are the same
132
        if mcp.Spec.Configuration.Name != mcp.Status.Configuration.Name {
1✔
133
                // return false as the machine config operator is applying stuff
×
134
                return false, nil
×
135
        }
×
136

137
        // now we are going to label the machine config with paused and then pause the machine config
138
        // we do it in that order to avoid any edge cases where we pause but didn't add our label
139
        err = utils.AnnotateObject(ctx, mcp,
1✔
140
                consts.MachineConfigPoolPausedAnnotation,
1✔
141
                consts.MachineConfigPoolPausedAnnotationPaused,
1✔
142
                c.kubeClient)
1✔
143
        if err != nil {
1✔
144
                return false, err
×
145
        }
×
146

147
        err = c.ChangeMachineConfigPoolPause(ctx, mcp, true)
1✔
148
        if err != nil {
1✔
149
                return false, err
×
150
        }
×
151

152
        // re-fetch the object to see if we don't need to revert the pause
153
        mcp = &mcv1.MachineConfigPool{}
1✔
154
        err = c.kubeClient.Get(ctx, client.ObjectKey{Name: mcpName}, mcp)
1✔
155
        if err != nil {
1✔
156
                return false, err
×
157
        }
×
158

159
        // machine config operator start updating the nodes, so we just remove the pause
160
        if mcp.Spec.Configuration.Name != mcp.Status.Configuration.Name {
1✔
161
                err = c.ChangeMachineConfigPoolPause(ctx, mcp, false)
×
162
                if err != nil {
×
163
                        return false, err
×
164
                }
×
165

166
                // after we remove the pause we change the label
167
                err = utils.AnnotateObject(ctx, mcp, consts.MachineConfigPoolPausedAnnotation, consts.MachineConfigPoolPausedAnnotationIdle, c.kubeClient)
×
168
                if err != nil {
×
169
                        return false, err
×
170
                }
×
171

172
                return false, nil
×
173
        }
174

175
        // manage to pause the requested machine config pool
176
        return true, nil
1✔
177
}
178

179
// AfterCompleteDrainNode unpauses the node's MachineConfigPool after drain is complete.
180
// For Hypershift clusters, returns true immediately as there is no MachineConfigOperator.
181
// Only unpauses the MCP if this is the last node in the pool completing the drain.
182
// Returns true if the MCP was successfully unpaused or if no unpause was needed, false if more time is needed.
183
func (c *OpenshiftOrchestrator) AfterCompleteDrainNode(ctx context.Context, node *corev1.Node) (bool, error) {
1✔
184
        // if the operator is running on hypershift variation of openshift, there is no machine config operator
1✔
185
        // just return true here
1✔
186
        if c.Flavor() == consts.ClusterFlavorHypershift {
2✔
187
                return true, nil
1✔
188
        }
1✔
189

190
        // get the machine pool name for the requested node
191
        mcpName, err := c.GetNodeMachinePoolName(ctx, node)
1✔
192
        if err != nil {
1✔
193
                return false, err
×
194
        }
×
195

196
        // lock critical section where we check if the machine config pool is already paused, or not
197
        // then we act base on that
198
        c.mcpPauseMutex.Lock()
1✔
199
        defer c.mcpPauseMutex.Unlock()
1✔
200

1✔
201
        // get the machine config pool that handles the specific node we want to drain
1✔
202
        mcp := &mcv1.MachineConfigPool{}
1✔
203
        err = c.kubeClient.Get(ctx, client.ObjectKey{Name: mcpName}, mcp)
1✔
204
        if err != nil {
1✔
205
                return false, err
×
206
        }
×
207

208
        value, exist := mcp.Annotations[consts.MachineConfigPoolPausedAnnotation]
1✔
209
        // if the label doesn't exist we just return true here
1✔
210
        // this can be a case where the node was moved to another MCP in the time we start the drain
1✔
211
        if !exist {
2✔
212
                return true, nil
1✔
213
        }
1✔
214
        // check if the sriov annotation on mcp is idle
215
        // if the value is idle we just return here
216
        if value == consts.MachineConfigPoolPausedAnnotationIdle {
2✔
217
                return true, nil
1✔
218
        }
1✔
219

220
        // get all the nodes that belong to this machine config pool to validate this is the last node
221
        // request to complete the drain
222
        nodesInPool := &corev1.NodeList{}
1✔
223
        selector, err := metav1.LabelSelectorAsSelector(mcp.Spec.NodeSelector)
1✔
224
        if err != nil {
1✔
225
                return false, err
×
226
        }
×
227

228
        err = c.kubeClient.List(ctx, nodesInPool, &client.ListOptions{LabelSelector: selector})
1✔
229
        if err != nil {
1✔
230
                return false, err
×
231
        }
×
232

233
        for _, nodeInPool := range nodesInPool.Items {
2✔
234
                // we skip our node
1✔
235
                if nodeInPool.GetName() == node.Name {
2✔
236
                        continue
1✔
237
                }
238

239
                // if there is an annotation here we check if it's something else then idle
240
                if utils.ObjectHasAnnotationKey(&nodeInPool, consts.NodeDrainAnnotation) &&
1✔
241
                        nodeInPool.GetAnnotations()[consts.NodeDrainAnnotation] != consts.DrainIdle {
2✔
242
                        // there are other nodes from the machine config pool that are also under configuration, so we just return
1✔
243
                        // only the last node in the machine config pool that finish the drain should remove the pause
1✔
244
                        return true, nil
1✔
245
                }
1✔
246
        }
247

248
        // if we get here, this means we are the last node from this machine config pool that completes the drain,
249
        // so we unpause the pool and remove the label in that order to avoid any race issues
250
        err = c.ChangeMachineConfigPoolPause(ctx, mcp, false)
1✔
251
        if err != nil {
1✔
252
                return false, err
×
253
        }
×
254

255
        // remove the label now that we unpause the machine config pool
256
        err = utils.AnnotateObject(ctx, mcp, consts.MachineConfigPoolPausedAnnotation, consts.MachineConfigPoolPausedAnnotationIdle, c.kubeClient)
1✔
257
        if err != nil {
1✔
258
                return false, err
×
259
        }
×
260

261
        return true, nil
1✔
262
}
263

264
func (c *OpenshiftOrchestrator) GetNodeMachinePoolName(ctx context.Context, node *corev1.Node) (string, error) {
1✔
265
        // hyperShift cluster doesn't have a machine config
1✔
266
        if c.Flavor() == consts.ClusterFlavorHypershift {
2✔
267
                return "", fmt.Errorf("hypershift doesn't have machineConfig")
1✔
268
        }
1✔
269

270
        desiredConfig, ok := node.Annotations[mcoconsts.DesiredMachineConfigAnnotationKey]
1✔
271
        if !ok {
2✔
272
                return "", fmt.Errorf("failed to find the the annotation [%s] on node [%s]", mcoconsts.DesiredMachineConfigAnnotationKey, node.Name)
1✔
273
        }
1✔
274

275
        mc := &mcv1.MachineConfig{}
1✔
276
        err := c.kubeClient.Get(ctx, client.ObjectKey{Name: desiredConfig}, mc)
1✔
277
        if err != nil {
2✔
278
                return "", fmt.Errorf("failed to get the desired MachineConfig [%s] for node [%s]: %w", desiredConfig, node.Name, err)
1✔
279
        }
1✔
280
        for _, owner := range mc.OwnerReferences {
2✔
281
                if owner.Kind == "MachineConfigPool" {
2✔
282
                        return owner.Name, nil
1✔
283
                }
1✔
284
        }
285

286
        return "", fmt.Errorf("failed to find the MCP of the node")
1✔
287
}
288

289
func (c *OpenshiftOrchestrator) ChangeMachineConfigPoolPause(ctx context.Context, mcp *mcv1.MachineConfigPool, pause bool) error {
1✔
290
        logger := ctx.Value("logger").(logr.Logger).WithName("ChangeMachineConfigPoolPause")
1✔
291
        logger.Info("change machine config pool state", "pause", pause, "mcp", mcp.Name)
1✔
292

1✔
293
        patchString := []byte(fmt.Sprintf(`{"spec":{"paused":%t}}`, pause))
1✔
294
        patch := client.RawPatch(types.MergePatchType, patchString)
1✔
295
        err := c.kubeClient.Patch(ctx, mcp, patch)
1✔
296
        if err != nil {
2✔
297
                return err
1✔
298
        }
1✔
299

300
        return nil
1✔
301
}
302

303
// IsExternalControlPlaneCluster detects control plane location of the cluster.
304
// On OpenShift, the control plane topology is configured in configv1.Infrastucture struct.
305
// On kubernetes, it is determined by which node the sriov operator is scheduled on. If operator
306
// pod is scheduled on worker node, it is considered as an external control plane.
307
func isExternalControlPlaneCluster(c client.Client) (bool, error) {
1✔
308
        ctx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
1✔
309
        defer cancelFunc()
1✔
310

1✔
311
        infra := &configv1.Infrastructure{}
1✔
312
        err := c.Get(ctx, types.NamespacedName{Name: infraResourceName}, infra)
1✔
313
        if err != nil {
1✔
314
                return false, fmt.Errorf("openshiftControlPlaneTopologyStatus(): Failed to get Infrastructure (name: %s): %w", infraResourceName, err)
×
315
        }
×
316

317
        if infra.Status.ControlPlaneTopology == "External" {
2✔
318
                return true, nil
1✔
319
        }
1✔
320
        return false, nil
1✔
321
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc