• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NVIDIA / skyhook / 20320280516

17 Dec 2025 11:15PM UTC coverage: 75.452% (+0.5%) from 74.903%
20320280516

push

github

web-flow
feat(cli): add package and node management commands with lifecycle controls (#123)

Add comprehensive CLI commands for managing Skyhook packages and nodes:

Package Commands:
- `package rerun`: Force re-execution of packages on specific nodes
  - Support for stage-specific re-runs (apply, config, interrupt, post-interrupt)
  - Node matching via exact names or regex patterns
- `package status`: Query package status across the cluster
- `package logs`: Retrieve package execution logs with follow/tail support

Node Commands:
- `node list`: List all nodes with Skyhook status
- `node status`: Display detailed status for specific nodes
- `node ignore`: Add/remove ignore label to pause operations on nodes
- `node reset`: Reset node state for a Skyhook

Lifecycle Commands:
- `pause`: Pause Skyhook reconciliation temporarily
- `resume`: Resume paused Skyhook operations
- `disable`: Disable a Skyhook completely
- `enable`: Re-enable a disabled Skyhook

Also includes:
- Comprehensive unit tests with K8s dynamic client mocks
- CLI e2e test suite using chainsaw (lifecycle, node, package tests)
- CI integration for CLI tests in operator-ci workflow
- Shared utilities for node matching, label management, and patch-based updates

1142 of 1535 new or added lines in 16 files covered. (74.4%)

2 existing lines in 1 file now uncovered.

5803 of 7691 relevant lines covered (75.45%)

1.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.3
/operator/internal/cli/node/node_list.go
1
/*
2
 * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
 * SPDX-License-Identifier: Apache-2.0
4
 *
5
 *
6
 * Licensed under the Apache License, Version 2.0 (the "License");
7
 * you may not use this file except in compliance with the License.
8
 * You may obtain a copy of the License at
9
 *
10
 * http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing, software
13
 * distributed under the License is distributed on an "AS IS" BASIS,
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 * See the License for the specific language governing permissions and
16
 * limitations under the License.
17
 */
18

19
package node
20

21
import (
22
        "context"
23
        "encoding/json"
24
        "fmt"
25
        "io"
26
        "sort"
27
        "strings"
28

29
        "github.com/spf13/cobra"
30
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31

32
        "github.com/NVIDIA/skyhook/operator/api/v1alpha1"
33
        "github.com/NVIDIA/skyhook/operator/internal/cli/client"
34
        cliContext "github.com/NVIDIA/skyhook/operator/internal/cli/context"
35
        "github.com/NVIDIA/skyhook/operator/internal/cli/utils"
36
)
37

38
// nodeListOptions holds the options for the node list command
39
type nodeListOptions struct {
40
        skyhookName string
41
        output      string
42
}
43

44
// BindToCmd binds the options to the command flags
45
func (o *nodeListOptions) BindToCmd(cmd *cobra.Command) {
1✔
46
        cmd.Flags().StringVar(&o.skyhookName, "skyhook", "", "Name of the Skyhook CR (required)")
1✔
47
        cmd.Flags().StringVarP(&o.output, "output", "o", "table", "Output format: table, json, yaml, wide")
1✔
48

1✔
49
        _ = cmd.MarkFlagRequired("skyhook")
1✔
50
}
1✔
51

52
// NewListCmd creates the node list command
53
func NewListCmd(ctx *cliContext.CLIContext) *cobra.Command {
1✔
54
        opts := &nodeListOptions{}
1✔
55

1✔
56
        cmd := &cobra.Command{
1✔
57
                Use:   "list",
1✔
58
                Short: "List all nodes targeted by a Skyhook",
1✔
59
                Long: `List all nodes that have activity for a specific Skyhook.
1✔
60

1✔
61
This command shows all nodes that have Skyhook state annotations for the
1✔
62
specified Skyhook CR, along with a summary of package completion status.`,
1✔
63
                Example: `  # List all nodes targeted by gpu-init Skyhook
1✔
64
  kubectl skyhook node list --skyhook gpu-init
1✔
65

1✔
66
  # Output as JSON
1✔
67
  kubectl skyhook node list --skyhook gpu-init -o json`,
1✔
68
                RunE: func(cmd *cobra.Command, args []string) error {
1✔
NEW
69
                        if opts.skyhookName == "" {
×
NEW
70
                                return fmt.Errorf("--skyhook flag is required")
×
NEW
71
                        }
×
72

NEW
73
                        clientFactory := client.NewFactory(ctx.GlobalFlags.ConfigFlags)
×
NEW
74
                        kubeClient, err := clientFactory.Client()
×
NEW
75
                        if err != nil {
×
NEW
76
                                return fmt.Errorf("initializing kubernetes client: %w", err)
×
NEW
77
                        }
×
78

NEW
79
                        return runNodeList(cmd.Context(), cmd.OutOrStdout(), kubeClient, opts)
×
80
                },
81
        }
82

83
        opts.BindToCmd(cmd)
1✔
84

1✔
85
        return cmd
1✔
86
}
87

88
// nodeListEntry represents a node in the list output
89
type nodeListEntry struct {
90
        NodeName         string `json:"nodeName"`
91
        Status           string `json:"status"`
92
        PackagesComplete int    `json:"packagesComplete"`
93
        PackagesTotal    int    `json:"packagesTotal"`
94
        Restarts         int32  `json:"restarts"`
95
}
96

97
func runNodeList(ctx context.Context, out io.Writer, kubeClient *client.Client, opts *nodeListOptions) error {
1✔
98
        // Get all nodes
1✔
99
        nodeList, err := kubeClient.Kubernetes().CoreV1().Nodes().List(ctx, metav1.ListOptions{})
1✔
100
        if err != nil {
1✔
NEW
101
                return fmt.Errorf("listing nodes: %w", err)
×
NEW
102
        }
×
103

104
        annotationKey := nodeStateAnnotationPrefix + opts.skyhookName
1✔
105
        entries := make([]nodeListEntry, 0, len(nodeList.Items))
1✔
106

1✔
107
        for _, node := range nodeList.Items {
2✔
108
                annotation, ok := node.Annotations[annotationKey]
1✔
109
                if !ok {
2✔
110
                        continue
1✔
111
                }
112

113
                var nodeState v1alpha1.NodeState
1✔
114
                if err := json.Unmarshal([]byte(annotation), &nodeState); err != nil {
1✔
NEW
115
                        continue
×
116
                }
117

118
                completeCount := 0
1✔
119
                hasError := false
1✔
120
                hasInProgress := false
1✔
121
                var totalRestarts int32
1✔
122

1✔
123
                for _, pkgStatus := range nodeState {
2✔
124
                        totalRestarts += pkgStatus.Restarts
1✔
125
                        switch pkgStatus.State {
1✔
126
                        case v1alpha1.StateComplete:
1✔
127
                                completeCount++
1✔
128
                        case v1alpha1.StateErroring:
1✔
129
                                hasError = true
1✔
130
                        case v1alpha1.StateInProgress:
1✔
131
                                hasInProgress = true
1✔
132
                        }
133
                }
134

135
                // Determine overall status
136
                status := string(v1alpha1.StateUnknown)
1✔
137
                if hasError {
2✔
138
                        status = string(v1alpha1.StateErroring)
1✔
139
                } else if completeCount == len(nodeState) && len(nodeState) > 0 {
3✔
140
                        status = string(v1alpha1.StateComplete)
1✔
141
                } else if hasInProgress || completeCount > 0 {
3✔
142
                        status = string(v1alpha1.StateInProgress)
1✔
143
                }
1✔
144

145
                entries = append(entries, nodeListEntry{
1✔
146
                        NodeName:         node.Name,
1✔
147
                        Status:           status,
1✔
148
                        PackagesComplete: completeCount,
1✔
149
                        PackagesTotal:    len(nodeState),
1✔
150
                        Restarts:         totalRestarts,
1✔
151
                })
1✔
152
        }
153

154
        // Sort by node name
155
        sort.Slice(entries, func(i, j int) bool {
2✔
156
                return entries[i].NodeName < entries[j].NodeName
1✔
157
        })
1✔
158

159
        if len(entries) == 0 {
2✔
160
                _, _ = fmt.Fprintf(out, "No nodes found for Skyhook %q\n", opts.skyhookName)
1✔
161
                return nil
1✔
162
        }
1✔
163

164
        // Output based on format
165
        output := nodeListOutput{SkyhookName: opts.skyhookName, Nodes: entries}
1✔
166
        switch opts.output {
1✔
167
        case "json":
1✔
168
                return utils.OutputJSON(out, output)
1✔
NEW
169
        case "yaml":
×
NEW
170
                return utils.OutputYAML(out, output)
×
NEW
171
        case "wide":
×
NEW
172
                return outputNodeListTableOrWide(out, opts.skyhookName, entries, true)
×
173
        default:
1✔
174
                return outputNodeListTableOrWide(out, opts.skyhookName, entries, false)
1✔
175
        }
176
}
177

178
// nodeListOutput is the structured output for JSON/YAML
179
type nodeListOutput struct {
180
        SkyhookName string          `json:"skyhookName" yaml:"skyhookName"`
181
        Nodes       []nodeListEntry `json:"nodes" yaml:"nodes"`
182
}
183

184
// nodeListTableConfig returns the table configuration for node list output
185
func nodeListTableConfig() utils.TableConfig[nodeListEntry] {
1✔
186
        return utils.TableConfig[nodeListEntry]{
1✔
187
                Headers: []string{"NODE", "STATUS", "PACKAGES"},
1✔
188
                Extract: func(e nodeListEntry) []string {
2✔
189
                        status := e.Status
1✔
190
                        if e.Status == string(v1alpha1.StateErroring) {
2✔
191
                                status = strings.ToUpper(status)
1✔
192
                        }
1✔
193
                        return []string{e.NodeName, status, fmt.Sprintf("%d/%d", e.PackagesComplete, e.PackagesTotal)}
1✔
194
                },
195
                WideHeaders: []string{"RESTARTS"},
196
                WideExtract: func(e nodeListEntry) []string {
1✔
197
                        return []string{fmt.Sprintf("%d", e.Restarts)}
1✔
198
                },
1✔
199
        }
200
}
201

202
func formatNodeListSummary(entries []nodeListEntry) string {
1✔
203
        totalNodes := len(entries)
1✔
204
        completeNodes := 0
1✔
205
        errorNodes := 0
1✔
206
        for _, e := range entries {
2✔
207
                switch e.Status {
1✔
208
                case string(v1alpha1.StateComplete):
1✔
209
                        completeNodes++
1✔
210
                case string(v1alpha1.StateErroring):
1✔
211
                        errorNodes++
1✔
212
                }
213
        }
214
        return fmt.Sprintf("Summary: %d nodes (%d complete, %d erroring, %d in progress)",
1✔
215
                totalNodes, completeNodes, errorNodes, totalNodes-completeNodes-errorNodes)
1✔
216
}
217

218
func outputNodeListTableOrWide(out io.Writer, skyhookName string, entries []nodeListEntry, wide bool) error {
1✔
219
        headerLine := fmt.Sprintf("Skyhook: %s\n\n%s", skyhookName, formatNodeListSummary(entries))
1✔
220
        if wide {
2✔
221
                return utils.OutputWideWithHeader(out, headerLine, nodeListTableConfig(), entries)
1✔
222
        }
1✔
223
        return utils.OutputTableWithHeader(out, headerLine, nodeListTableConfig(), entries)
1✔
224
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc