• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NVIDIA / skyhook / 22637387869

03 Mar 2026 06:32PM UTC coverage: 80.777% (+0.06%) from 80.713%
22637387869

push

github

lockwobr
chore: update k8s version, fix chainsaw install

6862 of 8495 relevant lines covered (80.78%)

3.76 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.32
/operator/internal/wrapper/node.go
1
/*
2
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
 * SPDX-License-Identifier: Apache-2.0
4
 *
5
 *
6
 * Licensed under the Apache License, Version 2.0 (the "License");
7
 * you may not use this file except in compliance with the License.
8
 * You may obtain a copy of the License at
9
 *
10
 * http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing, software
13
 * distributed under the License is distributed on an "AS IS" BASIS,
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 * See the License for the specific language governing permissions and
16
 * limitations under the License.
17
 */
18

19
package wrapper
20

21
import (
22
        "encoding/json"
23
        "fmt"
24
        "sort"
25
        "strings"
26

27
        "github.com/NVIDIA/skyhook/operator/api/v1alpha1"
28
        "github.com/NVIDIA/skyhook/operator/internal/graph"
29
        "github.com/NVIDIA/skyhook/operator/internal/version"
30
        "github.com/go-logr/logr"
31
        corev1 "k8s.io/api/core/v1"
32
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33
)
34

35
// there are 2 interface to reflect functions that need a skyhook and node
36
// and ones that just need a node
37

38
// SkyhookNode wraps a node with a supporting skyhook
39
type SkyhookNode interface {
40
        SkyhookNodeOnly
41
        GetSkyhook() *Skyhook
42
        GetComplete() []string
43
        SetStatus(status v1alpha1.Status)
44
        IsComplete() bool
45
        ProgressSkipped()
46
        IsPackageComplete(_package v1alpha1.Package) bool
47
        RunNext() ([]*v1alpha1.Package, error)
48
        NextStage(_package *v1alpha1.Package) *v1alpha1.Stage
49
        HasInterrupt(_package v1alpha1.Package) bool
50
        UpdateCondition()
51
        HasSkyhookAnnotations() bool
52
}
53

54
// SkyhookNodeOnly wraps the node with just a skyhook name
55
type SkyhookNodeOnly interface {
56
        Status() v1alpha1.Status
57
        // SetStatus is in both interfaces, does more if skyhook is not nil
58
        SetStatus(status v1alpha1.Status)
59
        PackageStatus(name string) (*v1alpha1.PackageStatus, bool)
60
        SetVersion()
61
        GetVersion() string
62
        Migrate(logger logr.Logger) error
63
        State() (v1alpha1.NodeState, error)
64
        SetState(state v1alpha1.NodeState) error
65
        RemoveState(_package v1alpha1.PackageRef) error
66
        Upsert(_package v1alpha1.PackageRef, image string, state v1alpha1.State, stage v1alpha1.Stage, restarts int32, containerSHA string) error
67
        GetNode() *corev1.Node
68
        Taint(key string)
69
        RemoveTaint(key string)
70
        Cordon()
71
        Uncordon()
72
        Reset()
73
        Changed() bool
74
}
75

76
var _ SkyhookNode = &skyhookNode{}
77

78
// NewSkyhookNodeOnly most of use cases for the wrapper just needs name, so this stub is for making helpers for those use cases,
79
// should help reduce calls to api, and not leak stubbed skyhooks with just name set
80
func NewSkyhookNodeOnly(node *corev1.Node, skyhookName string) (SkyhookNodeOnly, error) {
7✔
81
        ret := &skyhookNode{
7✔
82
                Node:        node,
7✔
83
                skyhookName: skyhookName,
7✔
84
        }
7✔
85
        state, err := ret.State()
7✔
86
        if err != nil {
7✔
87
                return nil, fmt.Errorf("error creating skyhookNode: %w", err)
×
88
        }
×
89
        ret.nodeState = state
7✔
90
        return ret, nil
7✔
91
}
92

93
// Convert will upgrade this to be the full interface if you have a skyhook
94
func Convert(node SkyhookNodeOnly, skyhook *v1alpha1.Skyhook) (SkyhookNode, error) {
7✔
95
        ret := node.(*skyhookNode)
7✔
96
        ret.skyhook = &Skyhook{Skyhook: skyhook}
7✔
97

7✔
98
        graph, err := skyhook.Spec.BuildGraph()
7✔
99
        if err != nil {
7✔
100
                return nil, err
×
101
        }
×
102

103
        ret.graph = graph
7✔
104

7✔
105
        return ret, nil
7✔
106
}
107

108
func NewSkyhookNode(node *corev1.Node, skyhook *v1alpha1.Skyhook) (SkyhookNode, error) {
7✔
109

7✔
110
        t, err := NewSkyhookNodeOnly(node, skyhook.Name)
7✔
111
        if err != nil {
7✔
112
                return nil, err
×
113
        }
×
114

115
        return Convert(t, skyhook)
7✔
116
}
117

118
type skyhookNode struct {
119
        *corev1.Node
120
        skyhookName string
121
        skyhook     *Skyhook
122
        nodeState   v1alpha1.NodeState
123
        graph       graph.DependencyGraph[*v1alpha1.Package]
124
        updated     bool
125
}
126

127
// GetSkyhook implements sskyhookNode.
128
func (node *skyhookNode) GetSkyhook() *Skyhook {
6✔
129
        return node.skyhook
6✔
130
}
6✔
131

132
// GetNode implements sskyhookNode.
133
func (node *skyhookNode) GetNode() *corev1.Node {
6✔
134
        return node.Node
6✔
135
}
6✔
136

137
func (node *skyhookNode) SetStatus(status v1alpha1.Status) {
6✔
138

6✔
139
        s, ok := node.Annotations[fmt.Sprintf("%s/status_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)]
6✔
140
        if !ok || s != string(status) {
12✔
141
                if node.Annotations == nil {
6✔
142
                        node.Annotations = make(map[string]string)
×
143
                }
×
144
                if node.Labels == nil {
6✔
145
                        node.Labels = make(map[string]string)
×
146
                }
×
147
                node.updated = true
6✔
148
                node.Annotations[fmt.Sprintf("%s/status_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)] = string(status)
6✔
149
                node.Labels[fmt.Sprintf("%s/status_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)] = string(status)
6✔
150
        }
151

152
        if status == v1alpha1.StatusComplete {
12✔
153
                node.Uncordon()
6✔
154
        }
6✔
155

156
        if node.skyhook != nil {
12✔
157
                node.skyhook.SetNodeStatus(node.Node.Name, status)
6✔
158
                node.skyhook.SetNodeState(node.Node.Name, node.nodeState)
6✔
159
        }
6✔
160
}
161

162
func (node *skyhookNode) Status() v1alpha1.Status {
6✔
163
        status, ok := node.Annotations[fmt.Sprintf("%s/status_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)]
6✔
164
        if !ok {
12✔
165
                return v1alpha1.StatusUnknown
6✔
166
        }
6✔
167
        return v1alpha1.GetStatus(status)
6✔
168
}
169

170
func (node *skyhookNode) State() (v1alpha1.NodeState, error) {
7✔
171

7✔
172
        if node.nodeState != nil {
13✔
173
                return node.nodeState, nil
6✔
174
        }
6✔
175

176
        if node == nil {
7✔
177
                return nil, nil
×
178
        }
×
179
        s, ok := node.Annotations[fmt.Sprintf("%s/nodeState_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)]
7✔
180
        if !ok {
14✔
181
                return nil, nil
7✔
182
        }
7✔
183

184
        ret := v1alpha1.NodeState{}
6✔
185
        err := json.Unmarshal([]byte(s), &ret)
6✔
186
        if err != nil {
6✔
187
                return nil, fmt.Errorf("error unmarshalling node state: %w", err)
×
188
        }
×
189

190
        return ret, nil
6✔
191
}
192

193
func (node *skyhookNode) PackageStatus(name string) (*v1alpha1.PackageStatus, bool) {
6✔
194
        packageStatus := node.nodeState.Get(name)
6✔
195
        if packageStatus != nil {
12✔
196
                return packageStatus, true
6✔
197
        }
6✔
198

199
        return nil, false
6✔
200
}
201

202
func (node *skyhookNode) SetVersion() {
6✔
203

6✔
204
        current := node.GetVersion()
6✔
205
        if current == version.VERSION { // if has not changed, do nothing and not set updated
6✔
206
                return
×
207
        }
×
208

209
        if version.VERSION == "" { // was not compiled with version, so do nothing
6✔
210
                return
×
211
        }
×
212

213
        if node.Annotations == nil {
6✔
214
                node.Annotations = map[string]string{}
×
215
        }
×
216

217
        node.Annotations[fmt.Sprintf("%s/version_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)] = version.VERSION
6✔
218
        node.updated = true
6✔
219
}
220

221
func (node *skyhookNode) GetVersion() string {
6✔
222
        version, ok := node.Annotations[fmt.Sprintf("%s/version_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)]
6✔
223
        if !ok {
12✔
224
                return ""
6✔
225
        }
6✔
226
        return version
6✔
227
}
228

229
func (node *skyhookNode) Migrate(logger logr.Logger) error {
6✔
230

6✔
231
        from := node.GetVersion()
6✔
232
        to := version.VERSION
6✔
233

6✔
234
        if from == to { // already migrated
6✔
235
                return nil
×
236
        }
×
237

238
        mm := version.MajorMinor(from)
6✔
239
        switch mm {
6✔
240
        // because there was a bug in versioning, this same migration needs to be run for more then just the v0.5 releases
241
        // empty string is for before versioning was added
242
        case "", "v0.5", "v0.6", "v0.7":
6✔
243
                err := migrateNodeTo_0_5_0(node, logger)
6✔
244
                if err != nil {
6✔
245
                        return err
×
246
                }
×
247
                node.SetVersion()
6✔
248
                return nil
6✔
249
        }
250

251
        return nil
×
252
}
253

254
func (node *skyhookNode) SetState(state v1alpha1.NodeState) error {
7✔
255
        if node == nil || state == nil {
7✔
256
                return nil
×
257
        }
×
258

259
        data, err := json.Marshal(state)
7✔
260
        if err != nil {
7✔
261
                return fmt.Errorf("error marshalling node state: %w", err)
×
262
        }
×
263

264
        if node.Annotations == nil {
8✔
265
                node.Annotations = map[string]string{}
1✔
266
        }
1✔
267

268
        s, ok := node.Annotations[fmt.Sprintf("%s/nodeState_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)]
7✔
269
        if !ok || s != string(data) {
14✔
270
                node.Annotations[fmt.Sprintf("%s/nodeState_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)] = string(data)
7✔
271
                node.nodeState = state
7✔
272
                node.updated = true
7✔
273
        }
7✔
274

275
        return nil
7✔
276
}
277

278
func (node *skyhookNode) RemoveState(_package v1alpha1.PackageRef) error {
4✔
279
        changed := node.nodeState.RemoveState(_package)
4✔
280
        if changed {
8✔
281
                return node.SetState(node.nodeState)
4✔
282
        }
4✔
283

284
        return nil
×
285
}
286

287
func (node *skyhookNode) Upsert(_package v1alpha1.PackageRef, image string, state v1alpha1.State, stage v1alpha1.Stage, restarts int32, containerSHA string) error {
7✔
288
        changed := node.nodeState.Upsert(_package, image, state, stage, restarts, containerSHA)
7✔
289
        if changed {
14✔
290
                if node.skyhook != nil {
14✔
291
                        node.skyhook.Updated = true
7✔
292
                }
7✔
293

294
                return node.SetState(node.nodeState)
7✔
295
        }
296
        return nil
5✔
297
}
298

299
func (node *skyhookNode) IsPackageComplete(_package v1alpha1.Package) bool {
4✔
300
        return node.nodeState.IsPackageComplete(_package, node.skyhook.GetConfigInterrupts(), node.skyhook.GetConfigUpdates())
4✔
301
}
4✔
302

303
func (node *skyhookNode) IsComplete() bool {
6✔
304
        return node.nodeState.IsComplete(node.skyhook.Spec.Packages, node.skyhook.GetConfigInterrupts(), node.skyhook.GetConfigUpdates())
6✔
305
}
6✔
306

307
func (node *skyhookNode) GetComplete() []string {
7✔
308
        return node.nodeState.GetComplete(node.skyhook.Spec.Packages, node.skyhook.GetConfigInterrupts(), node.skyhook.GetConfigUpdates())
7✔
309
}
7✔
310

311
func (node *skyhookNode) ProgressSkipped() {
4✔
312
        if node.nodeState.ProgressSkipped(node.skyhook.Spec.Packages, node.skyhook.GetConfigInterrupts(), node.skyhook.GetConfigUpdates()) {
8✔
313
                node.skyhook.Updated = true
4✔
314
                node.updated = true
4✔
315
        }
4✔
316
}
317

318
func (node *skyhookNode) RunNext() ([]*v1alpha1.Package, error) {
7✔
319
        complete := node.GetComplete()
7✔
320

7✔
321
        // Get next available nodes based on completed dependencies
7✔
322
        next, err := node.graph.Next(complete...)
7✔
323
        if err != nil {
7✔
324
                return nil, err
×
325
        }
×
326

327
        toRun := node.graph.Get(next...)
7✔
328

7✔
329
        // Sort for deterministic ordering
7✔
330
        sort.Slice(toRun, func(i, j int) bool {
12✔
331
                return toRun[i].Name < toRun[j].Name
5✔
332
        })
5✔
333

334
        return toRun, nil
7✔
335
}
336

337
func (node *skyhookNode) NextStage(_package *v1alpha1.Package) *v1alpha1.Stage {
6✔
338
        return node.nodeState.NextStage(_package, node.skyhook.GetConfigInterrupts(), node.skyhook.GetConfigUpdates())
6✔
339
}
6✔
340

341
func (node *skyhookNode) Changed() bool {
6✔
342
        return node.updated
6✔
343
}
6✔
344

345
func (node *skyhookNode) HasInterrupt(_package v1alpha1.Package) bool {
6✔
346
        return node.nodeState.HasInterrupt(_package, node.skyhook.GetConfigInterrupts(), node.skyhook.GetConfigUpdates())
6✔
347
}
6✔
348

349
func (node *skyhookNode) Taint(key string) {
×
350

×
351
        // dont add it if it exists already, dups will error
×
352
        for _, t := range node.Spec.Taints {
×
353
                if t.Key == key {
×
354
                        return
×
355
                }
×
356
        }
357

358
        if node.Spec.Taints == nil {
×
359
                node.Spec.Taints = make([]corev1.Taint, 0)
×
360
        }
×
361

362
        node.Spec.Taints = append(node.Spec.Taints, corev1.Taint{
×
363
                Key:    key,
×
364
                Value:  node.GetSkyhook().Name,
×
365
                Effect: corev1.TaintEffectNoSchedule,
×
366
        })
×
367
        node.updated = true
×
368
}
369

370
func (node *skyhookNode) RemoveTaint(key string) {
×
371

×
372
        if len(node.Spec.Taints) == 0 {
×
373
                return
×
374
        }
×
375

376
        temp := node.Spec.Taints[:0]
×
377
        for _, t := range node.Spec.Taints {
×
378
                if t.Key != key {
×
379
                        temp = append(temp, t)
×
380
                }
×
381
        }
382

383
        if len(temp) < len(node.Spec.Taints) {
×
384
                node.Spec.Taints = temp
×
385
                node.updated = true
×
386
        }
×
387
}
388

389
// HasSkyhookAnnotations returns true if the node has any annotation with the
390
// skyhook.nvidia.com/ prefix, indicating it has been previously touched by the Skyhook operator.
391
func (node *skyhookNode) HasSkyhookAnnotations() bool {
5✔
392
        for key := range node.Annotations {
10✔
393
                if strings.HasPrefix(key, v1alpha1.METADATA_PREFIX+"/") {
10✔
394
                        return true
5✔
395
                }
5✔
396
        }
397
        return false
5✔
398
}
399

400
func (node *skyhookNode) Cordon() {
4✔
401
        _, ok := node.Annotations[fmt.Sprintf("%s/cordon_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)]
4✔
402
        if !node.Spec.Unschedulable || !ok {
8✔
403
                node.Spec.Unschedulable = true
4✔
404
                node.Annotations[fmt.Sprintf("%s/cordon_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)] = "true"
4✔
405
                node.updated = true
4✔
406
        }
4✔
407
}
408

409
func (node *skyhookNode) Uncordon() {
6✔
410

6✔
411
        // if we hold a cordon remove it, also we dont want to remove a cordon if we dont have one...
6✔
412
        _, ok := node.Annotations[fmt.Sprintf("%s/cordon_%s", v1alpha1.METADATA_PREFIX, node.skyhookName)]
6✔
413
        if ok {
10✔
414
                node.Spec.Unschedulable = false
4✔
415
                delete(node.Annotations, fmt.Sprintf("%s/cordon_%s", v1alpha1.METADATA_PREFIX, node.skyhookName))
4✔
416
                node.updated = true
4✔
417
        }
4✔
418
}
419

420
func (node *skyhookNode) Reset() {
×
421

×
422
        delete(node.skyhook.Status.NodeState, node.Name)
×
423
        delete(node.skyhook.Status.NodeStatus, node.Name)
×
424
        node.skyhook.Status.Status = v1alpha1.StatusUnknown
×
425
        node.skyhook.Updated = true
×
426

×
427
        delete(node.Annotations, fmt.Sprintf("%s/cordon_", v1alpha1.METADATA_PREFIX))
×
428
        delete(node.Annotations, fmt.Sprintf("%s/nodeState_%s", v1alpha1.METADATA_PREFIX, node.skyhook.Name))
×
429
        delete(node.Annotations, fmt.Sprintf("%s/status_%s", v1alpha1.METADATA_PREFIX, node.skyhook.Name))
×
430

×
431
        delete(node.Labels, fmt.Sprintf("%s/status_%s", v1alpha1.METADATA_PREFIX, node.skyhook.Name))
×
432
        node.updated = true
×
433
}
×
434

435
func (node *skyhookNode) UpdateCondition() {
6✔
436
        readyReason, errorReason := "Incomplete", "Not Erroring"
6✔
437
        errorCondFound, condFound := false, false
6✔
438

6✔
439
        if node.Node.Status.Conditions == nil {
6✔
440
                node.Node.Status.Conditions = make([]corev1.NodeCondition, 0)
×
441
        }
×
442

443
        errorStatus, condStatus := corev1.ConditionFalse, corev1.ConditionTrue
6✔
444
        if node.IsComplete() {
12✔
445
                readyReason = "Complete"
6✔
446
                condStatus = corev1.ConditionFalse
6✔
447
        }
6✔
448

449
        for _, packageStatus := range node.nodeState {
12✔
450
                switch packageStatus.State {
6✔
451
                case v1alpha1.StateErroring, v1alpha1.StateUnknown:
4✔
452
                        errorReason = "Package(s) Erroring or Unknown"
4✔
453
                        errorStatus = corev1.ConditionTrue
4✔
454
                }
455
        }
456

457
        cond := corev1.NodeCondition{
6✔
458
                Type:               corev1.NodeConditionType(fmt.Sprintf("%s/%s/NotReady", v1alpha1.METADATA_PREFIX, node.skyhookName)),
6✔
459
                Status:             condStatus,
6✔
460
                LastHeartbeatTime:  metav1.Now(),
6✔
461
                LastTransitionTime: metav1.Now(),
6✔
462
                Reason:             readyReason,
6✔
463
                Message:            fmt.Sprintf("Skyhook %s Ready", node.skyhookName),
6✔
464
        }
6✔
465

6✔
466
        errorCond := corev1.NodeCondition{
6✔
467
                Type:               corev1.NodeConditionType(fmt.Sprintf("%s/%s/Erroring", v1alpha1.METADATA_PREFIX, node.skyhookName)),
6✔
468
                Status:             errorStatus,
6✔
469
                LastHeartbeatTime:  metav1.Now(),
6✔
470
                LastTransitionTime: metav1.Now(),
6✔
471
                Reason:             errorReason,
6✔
472
                Message:            fmt.Sprintf("Package Erroring or Unknown for %s", node.skyhookName),
6✔
473
        }
6✔
474

6✔
475
        for i, condition := range node.Node.Status.Conditions {
12✔
476
                switch condition.Type {
6✔
477
                case errorCond.Type:
6✔
478
                        errorCondFound = true
6✔
479
                        if condition.Reason != errorCond.Reason && condition.Message == errorCond.Message {
10✔
480
                                node.Node.Status.Conditions[i] = errorCond // update it with the new condition
4✔
481
                                node.updated = true
4✔
482
                        }
4✔
483
                case cond.Type:
6✔
484
                        condFound = true
6✔
485
                        if condition.Reason != cond.Reason && condition.Message == cond.Message {
12✔
486
                                node.Node.Status.Conditions[i] = cond // update it with the new condition
6✔
487
                                node.updated = true
6✔
488
                        }
6✔
489
                }
490
        }
491

492
        if !errorCondFound {
12✔
493
                node.Node.Status.Conditions = append([]corev1.NodeCondition{errorCond}, node.Node.Status.Conditions...)
6✔
494
                node.updated = true
6✔
495
        }
6✔
496
        if !condFound {
12✔
497
                node.Node.Status.Conditions = append([]corev1.NodeCondition{cond}, node.Node.Status.Conditions...)
6✔
498
                node.updated = true
6✔
499
        }
6✔
500
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc