• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NVIDIA / skyhook / 20320280516

17 Dec 2025 11:15PM UTC coverage: 75.452% (+0.5%) from 74.903%
20320280516

push

github

web-flow
feat(cli): add package and node management commands with lifecycle controls (#123)

Add comprehensive CLI commands for managing Skyhook packages and nodes:

Package Commands:
- `package rerun`: Force re-execution of packages on specific nodes
  - Support for stage-specific re-runs (apply, config, interrupt, post-interrupt)
  - Node matching via exact names or regex patterns
- `package status`: Query package status across the cluster
- `package logs`: Retrieve package execution logs with follow/tail support

Node Commands:
- `node list`: List all nodes with Skyhook status
- `node status`: Display detailed status for specific nodes
- `node ignore`: Add/remove ignore label to pause operations on nodes
- `node reset`: Reset node state for a Skyhook

Lifecycle Commands:
- `pause`: Pause Skyhook reconciliation temporarily
- `resume`: Resume paused Skyhook operations
- `disable`: Disable a Skyhook completely
- `enable`: Re-enable a disabled Skyhook

Also includes:
- Comprehensive unit tests with K8s dynamic client mocks
- CLI e2e test suite using chainsaw (lifecycle, node, package tests)
- CI integration for CLI tests in operator-ci workflow
- Shared utilities for node matching, label management, and patch-based updates

1142 of 1535 new or added lines in 16 files covered. (74.4%)

2 existing lines in 1 file now uncovered.

5803 of 7691 relevant lines covered (75.45%)

1.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.73
/operator/internal/cli/node/node_status.go
1
/*
2
 * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
 * SPDX-License-Identifier: Apache-2.0
4
 *
5
 *
6
 * Licensed under the Apache License, Version 2.0 (the "License");
7
 * you may not use this file except in compliance with the License.
8
 * You may obtain a copy of the License at
9
 *
10
 * http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing, software
13
 * distributed under the License is distributed on an "AS IS" BASIS,
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 * See the License for the specific language governing permissions and
16
 * limitations under the License.
17
 */
18

19
package node
20

21
import (
22
        "context"
23
        "encoding/json"
24
        "fmt"
25
        "io"
26
        "sort"
27
        "strings"
28

29
        "github.com/spf13/cobra"
30
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31

32
        "github.com/NVIDIA/skyhook/operator/api/v1alpha1"
33
        "github.com/NVIDIA/skyhook/operator/internal/cli/client"
34
        cliContext "github.com/NVIDIA/skyhook/operator/internal/cli/context"
35
        "github.com/NVIDIA/skyhook/operator/internal/cli/utils"
36
)
37

38
const nodeStateAnnotationPrefix = v1alpha1.METADATA_PREFIX + "/nodeState_"
39

40
// nodeStatusOptions holds the options for the node status command
41
type nodeStatusOptions struct {
42
        skyhookName string
43
        output      string
44
}
45

46
// BindToCmd binds the options to the command flags
47
func (o *nodeStatusOptions) BindToCmd(cmd *cobra.Command) {
1✔
48
        cmd.Flags().StringVar(&o.skyhookName, "skyhook", "", "Filter by Skyhook name")
1✔
49
        cmd.Flags().StringVarP(&o.output, "output", "o", "table", "Output format: table, json, yaml, wide")
1✔
50
}
1✔
51

52
// NewStatusCmd creates the node status command
53
func NewStatusCmd(ctx *cliContext.CLIContext) *cobra.Command {
1✔
54
        opts := &nodeStatusOptions{}
1✔
55

1✔
56
        cmd := &cobra.Command{
1✔
57
                Use:   "status [node-name...]",
1✔
58
                Short: "Show all Skyhook activity on specific node(s)",
1✔
59
                Long: `Show all Skyhook activity on specific node(s) by reading node annotations.
1✔
60

1✔
61
This command displays a summary of all Skyhook CRs that have activity on the 
1✔
62
specified node(s), including overall status and package completion counts.
1✔
63

1✔
64
If no node name is provided, all nodes with Skyhook annotations are shown.
1✔
65
Node names can be exact matches or regex patterns.`,
1✔
66
                Example: `  # Show all Skyhook activity on a specific node
1✔
67
  kubectl skyhook node status worker-1
1✔
68

1✔
69
  # Show Skyhook activity on multiple nodes
1✔
70
  kubectl skyhook node status worker-1 worker-2 worker-3
1✔
71

1✔
72
  # Show Skyhook activity on nodes matching a pattern
1✔
73
  kubectl skyhook node status "worker-.*"
1✔
74

1✔
75
  # Filter by specific Skyhook
1✔
76
  kubectl skyhook node status worker-1 --skyhook gpu-init
1✔
77

1✔
78
  # View all nodes with Skyhook activity
1✔
79
  kubectl skyhook node status
1✔
80

1✔
81
  # Output as JSON
1✔
82
  kubectl skyhook node status worker-1 -o json
1✔
83

1✔
84
  # Output with package details
1✔
85
  kubectl skyhook node status worker-1 -o wide`,
1✔
86
                RunE: func(cmd *cobra.Command, args []string) error {
1✔
NEW
87
                        clientFactory := client.NewFactory(ctx.GlobalFlags.ConfigFlags)
×
NEW
88
                        kubeClient, err := clientFactory.Client()
×
NEW
89
                        if err != nil {
×
NEW
90
                                return fmt.Errorf("initializing kubernetes client: %w", err)
×
NEW
91
                        }
×
92

NEW
93
                        return runNodeStatus(cmd.Context(), cmd.OutOrStdout(), kubeClient, args, opts)
×
94
                },
95
        }
96

97
        opts.BindToCmd(cmd)
1✔
98

1✔
99
        return cmd
1✔
100
}
101

102
// nodeSkyhookSummary represents a summary of Skyhook activity on a node
103
type nodeSkyhookSummary struct {
104
        NodeName         string                 `json:"nodeName"`
105
        SkyhookName      string                 `json:"skyhookName"`
106
        Status           string                 `json:"status"`
107
        PackagesComplete int                    `json:"packagesComplete"`
108
        PackagesTotal    int                    `json:"packagesTotal"`
109
        Packages         []nodeSkyhookPkgStatus `json:"packages,omitempty"`
110
}
111

112
// nodeSkyhookPkgStatus represents the status of a single package
113
type nodeSkyhookPkgStatus struct {
114
        Name     string `json:"name"`
115
        Version  string `json:"version"`
116
        Stage    string `json:"stage"`
117
        State    string `json:"state"`
118
        Restarts int32  `json:"restarts"`
119
        Image    string `json:"image,omitempty"`
120
}
121

122
func runNodeStatus(ctx context.Context, out io.Writer, kubeClient *client.Client, nodePatterns []string, opts *nodeStatusOptions) error {
1✔
123
        // Get all nodes
1✔
124
        nodeList, err := kubeClient.Kubernetes().CoreV1().Nodes().List(ctx, metav1.ListOptions{})
1✔
125
        if err != nil {
1✔
NEW
126
                return fmt.Errorf("listing nodes: %w", err)
×
NEW
127
        }
×
128

129
        // Collect all node names for pattern matching
130
        allNodeNames := make([]string, 0, len(nodeList.Items))
1✔
131
        for _, node := range nodeList.Items {
2✔
132
                allNodeNames = append(allNodeNames, node.Name)
1✔
133
        }
1✔
134

135
        // Filter nodes by pattern if specified
136
        var targetNodes []string
1✔
137
        if len(nodePatterns) > 0 {
2✔
138
                targetNodes, err = utils.MatchNodes(nodePatterns, allNodeNames)
1✔
139
                if err != nil {
1✔
NEW
140
                        return fmt.Errorf("matching nodes: %w", err)
×
NEW
141
                }
×
142
                if len(targetNodes) == 0 {
1✔
NEW
143
                        _, _ = fmt.Fprintf(out, "No nodes matched the specified patterns\n")
×
NEW
144
                        return nil
×
NEW
145
                }
×
146
        } else {
1✔
147
                targetNodes = allNodeNames
1✔
148
        }
1✔
149

150
        targetNodeSet := make(map[string]bool)
1✔
151
        for _, n := range targetNodes {
2✔
152
                targetNodeSet[n] = true
1✔
153
        }
1✔
154

155
        // Collect status from all nodes with Skyhook annotations
156
        var summaries []nodeSkyhookSummary
1✔
157

1✔
158
        for _, node := range nodeList.Items {
2✔
159
                if !targetNodeSet[node.Name] {
1✔
NEW
160
                        continue
×
161
                }
162

163
                // Find all Skyhook annotations on this node
164
                for annotationKey, annotationValue := range node.Annotations {
2✔
165
                        if !strings.HasPrefix(annotationKey, nodeStateAnnotationPrefix) {
1✔
NEW
166
                                continue
×
167
                        }
168

169
                        skyhookName := strings.TrimPrefix(annotationKey, nodeStateAnnotationPrefix)
1✔
170

1✔
171
                        // Filter by skyhook name if specified
1✔
172
                        if opts.skyhookName != "" && skyhookName != opts.skyhookName {
2✔
173
                                continue
1✔
174
                        }
175

176
                        var nodeState v1alpha1.NodeState
1✔
177
                        if err := json.Unmarshal([]byte(annotationValue), &nodeState); err != nil {
1✔
NEW
178
                                continue // Skip invalid annotations
×
179
                        }
180

181
                        packages := make([]nodeSkyhookPkgStatus, 0, len(nodeState))
1✔
182
                        completeCount := 0
1✔
183
                        hasError := false
1✔
184
                        hasInProgress := false
1✔
185

1✔
186
                        for _, pkgStatus := range nodeState {
2✔
187
                                packages = append(packages, nodeSkyhookPkgStatus{
1✔
188
                                        Name:     pkgStatus.Name,
1✔
189
                                        Version:  pkgStatus.Version,
1✔
190
                                        Stage:    string(pkgStatus.Stage),
1✔
191
                                        State:    string(pkgStatus.State),
1✔
192
                                        Restarts: pkgStatus.Restarts,
1✔
193
                                        Image:    pkgStatus.Image,
1✔
194
                                })
1✔
195

1✔
196
                                switch pkgStatus.State {
1✔
197
                                case v1alpha1.StateComplete:
1✔
198
                                        completeCount++
1✔
199
                                case v1alpha1.StateErroring:
1✔
200
                                        hasError = true
1✔
201
                                case v1alpha1.StateInProgress:
1✔
202
                                        hasInProgress = true
1✔
203
                                }
204
                        }
205

206
                        // Determine overall status
207
                        status := string(v1alpha1.StateUnknown)
1✔
208
                        if hasError {
2✔
209
                                status = string(v1alpha1.StateErroring)
1✔
210
                        } else if completeCount == len(packages) && len(packages) > 0 {
3✔
211
                                status = string(v1alpha1.StateComplete)
1✔
212
                        } else if hasInProgress || completeCount > 0 {
3✔
213
                                status = string(v1alpha1.StateInProgress)
1✔
214
                        }
1✔
215

216
                        // Sort packages by name
217
                        sort.Slice(packages, func(i, j int) bool {
2✔
218
                                return packages[i].Name < packages[j].Name
1✔
219
                        })
1✔
220

221
                        summaries = append(summaries, nodeSkyhookSummary{
1✔
222
                                NodeName:         node.Name,
1✔
223
                                SkyhookName:      skyhookName,
1✔
224
                                Status:           status,
1✔
225
                                PackagesComplete: completeCount,
1✔
226
                                PackagesTotal:    len(packages),
1✔
227
                                Packages:         packages,
1✔
228
                        })
1✔
229
                }
230
        }
231

232
        // Sort by node name, then skyhook name
233
        sort.Slice(summaries, func(i, j int) bool {
2✔
234
                if summaries[i].NodeName != summaries[j].NodeName {
2✔
235
                        return summaries[i].NodeName < summaries[j].NodeName
1✔
236
                }
1✔
237
                return summaries[i].SkyhookName < summaries[j].SkyhookName
1✔
238
        })
239

240
        if len(summaries) == 0 {
2✔
241
                _, _ = fmt.Fprintf(out, "No Skyhook activity found on specified nodes\n")
1✔
242
                return nil
1✔
243
        }
1✔
244

245
        // Output based on format
246
        switch opts.output {
1✔
247
        case "json":
1✔
248
                return utils.OutputJSON(out, summaries)
1✔
NEW
249
        case "yaml":
×
NEW
250
                return utils.OutputYAML(out, summaries)
×
NEW
251
        case "wide":
×
NEW
252
                return outputNodeStatusWide(out, summaries)
×
253
        default:
1✔
254
                return outputNodeStatusTable(out, summaries)
1✔
255
        }
256
}
257

258
// nodeStatusTableConfig returns the table configuration for node status output
259
func nodeStatusTableConfig() utils.TableConfig[nodeSkyhookSummary] {
1✔
260
        return utils.TableConfig[nodeSkyhookSummary]{
1✔
261
                Headers: []string{"NODE", "SKYHOOK", "STATUS", "PACKAGES"},
1✔
262
                Extract: func(s nodeSkyhookSummary) []string {
2✔
263
                        return []string{
1✔
264
                                s.NodeName,
1✔
265
                                s.SkyhookName,
1✔
266
                                s.Status,
1✔
267
                                fmt.Sprintf("%d/%d", s.PackagesComplete, s.PackagesTotal),
1✔
268
                        }
1✔
269
                },
1✔
270
                WideHeaders: []string{"COMPLETE", "TOTAL"},
NEW
271
                WideExtract: func(s nodeSkyhookSummary) []string {
×
NEW
272
                        return []string{
×
NEW
273
                                fmt.Sprintf("%d", s.PackagesComplete),
×
NEW
274
                                fmt.Sprintf("%d", s.PackagesTotal),
×
NEW
275
                        }
×
NEW
276
                },
×
277
        }
278
}
279

280
func outputNodeStatusTable(out io.Writer, summaries []nodeSkyhookSummary) error {
1✔
281
        return utils.OutputTable(out, nodeStatusTableConfig(), summaries)
1✔
282
}
1✔
283

284
// nodeStatusWideEntry represents a flattened entry for wide output (one row per package)
285
type nodeStatusWideEntry struct {
286
        NodeName    string
287
        SkyhookName string
288
        Package     nodeSkyhookPkgStatus
289
}
290

291
func outputNodeStatusWide(out io.Writer, summaries []nodeSkyhookSummary) error {
1✔
292
        // Wide output shows one row per package, not per summary
1✔
293
        cfg := utils.TableConfig[nodeStatusWideEntry]{
1✔
294
                Headers: []string{"NODE", "SKYHOOK", "PACKAGE", "VERSION", "STAGE", "STATE"},
1✔
295
                Extract: func(e nodeStatusWideEntry) []string {
2✔
296
                        return []string{
1✔
297
                                e.NodeName,
1✔
298
                                e.SkyhookName,
1✔
299
                                e.Package.Name,
1✔
300
                                e.Package.Version,
1✔
301
                                e.Package.Stage,
1✔
302
                                e.Package.State,
1✔
303
                        }
1✔
304
                },
1✔
305
                WideHeaders: []string{"RESTARTS", "IMAGE"},
306
                WideExtract: func(e nodeStatusWideEntry) []string {
1✔
307
                        return []string{fmt.Sprintf("%d", e.Package.Restarts), e.Package.Image}
1✔
308
                },
1✔
309
        }
310

311
        // Flatten summaries to per-package entries
312
        var entries []nodeStatusWideEntry
1✔
313
        for _, s := range summaries {
2✔
314
                for _, pkg := range s.Packages {
2✔
315
                        entries = append(entries, nodeStatusWideEntry{
1✔
316
                                NodeName:    s.NodeName,
1✔
317
                                SkyhookName: s.SkyhookName,
1✔
318
                                Package:     pkg,
1✔
319
                        })
1✔
320
                }
1✔
321
        }
322

323
        return utils.OutputWide(out, cfg, entries)
1✔
324
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc