• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NVIDIA / skyhook / 20353746630

18 Dec 2025 10:50PM UTC coverage: 75.716% (-0.2%) from 75.958%
20353746630

Pull #133

github

web-flow
Merge a731af90a into 19dce4787
Pull Request #133: fix: cleanup cli code

29 of 63 new or added lines in 9 files covered. (46.03%)

11 existing lines in 6 files now uncovered.

5818 of 7684 relevant lines covered (75.72%)

1.12 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.92
/operator/cmd/cli/app/node/node_reset.go
1
/*
2
 * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
 * SPDX-License-Identifier: Apache-2.0
4
 *
5
 *
6
 * Licensed under the Apache License, Version 2.0 (the "License");
7
 * you may not use this file except in compliance with the License.
8
 * You may obtain a copy of the License at
9
 *
10
 * http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing, software
13
 * distributed under the License is distributed on an "AS IS" BASIS,
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 * See the License for the specific language governing permissions and
16
 * limitations under the License.
17
 */
18

19
package node
20

21
import (
22
        "bufio"
23
        "context"
24
        "encoding/json"
25
        "fmt"
26
        "strings"
27

28
        "github.com/spf13/cobra"
29
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30

31
        "github.com/NVIDIA/skyhook/operator/api/v1alpha1"
32
        "github.com/NVIDIA/skyhook/operator/internal/cli/client"
33
        cliContext "github.com/NVIDIA/skyhook/operator/internal/cli/context"
34
        "github.com/NVIDIA/skyhook/operator/internal/cli/utils"
35
)
36

37
// nodeResetOptions holds the options for the node reset command
38
type nodeResetOptions struct {
39
        skyhookName string
40
        confirm     bool
41
}
42

43
// BindToCmd binds the options to the command flags
44
func (o *nodeResetOptions) BindToCmd(cmd *cobra.Command) {
1✔
45
        cmd.Flags().StringVar(&o.skyhookName, "skyhook", "", "Name of the Skyhook CR (required)")
1✔
46
        cmd.Flags().BoolVarP(&o.confirm, "confirm", "y", false, "Skip confirmation prompt")
1✔
47

1✔
48
        _ = cmd.MarkFlagRequired("skyhook")
1✔
49
}
1✔
50

51
// NewResetCmd creates the node reset command
52
func NewResetCmd(ctx *cliContext.CLIContext) *cobra.Command {
1✔
53
        opts := &nodeResetOptions{}
1✔
54

1✔
55
        cmd := &cobra.Command{
1✔
56
                Use:   "reset <node-name...>",
1✔
57
                Short: "Reset all package state on node(s) for a Skyhook",
1✔
58
                Long: `Reset all package state on node(s) for a specific Skyhook, forcing a complete re-run.
1✔
59

1✔
60
This command removes all Skyhook state from the specified node(s), causing
1✔
61
the operator to re-execute all packages from the beginning.
1✔
62

1✔
63
Unlike 'package rerun' which resets a single package, 'node reset' clears
1✔
64
ALL package state for a Skyhook on the specified node(s).
1✔
65

1✔
66
Node names can be exact matches or regex patterns.`,
1✔
67
                Example: `  # Reset all packages on worker-1 for gpu-init Skyhook
1✔
68
  kubectl skyhook node reset worker-1 --skyhook gpu-init --confirm
1✔
69

1✔
70
  # Reset multiple nodes
1✔
71
  kubectl skyhook node reset worker-1 worker-2 worker-3 --skyhook gpu-init --confirm
1✔
72

1✔
73
  # Reset all nodes matching a pattern
1✔
74
  kubectl skyhook node reset "gpu-node-.*" --skyhook gpu-init --confirm
1✔
75

1✔
76
  # Preview changes without applying (dry-run)
1✔
77
  kubectl skyhook node reset worker-1 --skyhook gpu-init --dry-run`,
1✔
78
                Args: cobra.MinimumNArgs(1),
1✔
79
                RunE: func(cmd *cobra.Command, args []string) error {
1✔
80
                        if opts.skyhookName == "" {
×
81
                                return fmt.Errorf("--skyhook flag is required")
×
82
                        }
×
83

84
                        clientFactory := client.NewFactory(ctx.GlobalFlags.ConfigFlags)
×
85
                        kubeClient, err := clientFactory.Client()
×
86
                        if err != nil {
×
87
                                return fmt.Errorf("initializing kubernetes client: %w", err)
×
88
                        }
×
89

90
                        return runNodeReset(cmd.Context(), cmd, kubeClient, args, opts, ctx)
×
91
                },
92
        }
93

94
        opts.BindToCmd(cmd)
1✔
95

1✔
96
        return cmd
1✔
97
}
98

99
func runNodeReset(ctx context.Context, cmd *cobra.Command, kubeClient *client.Client, nodePatterns []string, opts *nodeResetOptions, cliCtx *cliContext.CLIContext) error {
1✔
100
        // Get all nodes
1✔
101
        nodeList, err := kubeClient.Kubernetes().CoreV1().Nodes().List(ctx, metav1.ListOptions{})
1✔
102
        if err != nil {
1✔
103
                return fmt.Errorf("listing nodes: %w", err)
×
104
        }
×
105

106
        // Collect all node names for pattern matching
107
        allNodeNames := make([]string, 0, len(nodeList.Items))
1✔
108
        nodeMap := make(map[string]int) // node name -> index in nodeList.Items
1✔
109
        for i, node := range nodeList.Items {
2✔
110
                allNodeNames = append(allNodeNames, node.Name)
1✔
111
                nodeMap[node.Name] = i
1✔
112
        }
1✔
113

114
        // Match nodes
115
        matchedNodes, err := utils.MatchNodes(nodePatterns, allNodeNames)
1✔
116
        if err != nil {
1✔
117
                return fmt.Errorf("matching nodes: %w", err)
×
118
        }
×
119

120
        if len(matchedNodes) == 0 {
2✔
121
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "No nodes matched the specified patterns\n")
1✔
122
                return nil
1✔
123
        }
1✔
124

125
        // Find nodes that have the specified Skyhook annotation
126
        annotationKey := nodeStateAnnotationPrefix + opts.skyhookName
1✔
127
        nodesToReset := make([]string, 0, len(matchedNodes))
1✔
128
        nodeStates := make(map[string]v1alpha1.NodeState)
1✔
129

1✔
130
        for _, nodeName := range matchedNodes {
2✔
131
                idx := nodeMap[nodeName]
1✔
132
                node := &nodeList.Items[idx]
1✔
133

1✔
134
                annotation, ok := node.Annotations[annotationKey]
1✔
135
                if !ok {
2✔
136
                        continue
1✔
137
                }
138

139
                var nodeState v1alpha1.NodeState
1✔
140
                if err := json.Unmarshal([]byte(annotation), &nodeState); err != nil {
1✔
NEW
141
                        if cliCtx.GlobalFlags.Verbose {
×
NEW
142
                                _, _ = fmt.Fprintf(cmd.ErrOrStderr(), "Warning: skipping node %q - invalid annotation: %v\n", nodeName, err)
×
NEW
143
                        }
×
UNCOV
144
                        continue
×
145
                }
146

147
                nodesToReset = append(nodesToReset, nodeName)
1✔
148
                nodeStates[nodeName] = nodeState
1✔
149
        }
150

151
        if len(nodesToReset) == 0 {
2✔
152
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "No nodes have state for Skyhook %q\n", opts.skyhookName)
1✔
153
                return nil
1✔
154
        }
1✔
155

156
        // Print summary
157
        _, _ = fmt.Fprintf(cmd.OutOrStdout(), "Skyhook: %s\n", opts.skyhookName)
1✔
158
        _, _ = fmt.Fprintf(cmd.OutOrStdout(), "Nodes to reset (%d):\n", len(nodesToReset))
1✔
159
        for _, nodeName := range nodesToReset {
2✔
160
                nodeState := nodeStates[nodeName]
1✔
161
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "  - %s (%d packages)\n", nodeName, len(nodeState))
1✔
162
        }
1✔
163

164
        // Dry run check
165
        if cliCtx.GlobalFlags.DryRun {
2✔
166
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "\n[dry-run] No changes applied\n")
1✔
167
                return nil
1✔
168
        }
1✔
169

170
        // Confirmation
171
        if !opts.confirm {
2✔
172
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "\nThis will remove ALL package state for Skyhook %q on these nodes.\n", opts.skyhookName)
1✔
173
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "All packages will re-run from the beginning.\n")
1✔
174
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "Continue? [y/N]: ")
1✔
175

1✔
176
                reader := bufio.NewReader(cmd.InOrStdin())
1✔
177
                response, err := reader.ReadString('\n')
1✔
178
                if err != nil {
1✔
179
                        return fmt.Errorf("reading confirmation: %w", err)
×
180
                }
×
181

182
                response = strings.ToLower(strings.TrimSpace(response))
1✔
183
                if response != "y" && response != "yes" {
2✔
184
                        _, _ = fmt.Fprintf(cmd.OutOrStdout(), "Aborted\n")
1✔
185
                        return nil
1✔
186
                }
1✔
187
        }
188

189
        // Apply changes
190
        var updateErrors []string
1✔
191
        successCount := 0
1✔
192

1✔
193
        for _, nodeName := range nodesToReset {
2✔
194
                if err := utils.RemoveNodeAnnotation(ctx, kubeClient.Kubernetes(), nodeName, annotationKey); err != nil {
1✔
195
                        updateErrors = append(updateErrors, fmt.Sprintf("%s: %v", nodeName, err))
×
196
                        continue
×
197
                }
198
                successCount++
1✔
199
        }
200

201
        // Print results
202
        if len(updateErrors) > 0 {
1✔
203
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "\nErrors resetting some nodes:\n")
×
204
                for _, e := range updateErrors {
×
205
                        _, _ = fmt.Fprintf(cmd.OutOrStdout(), "  - %s\n", e)
×
206
                }
×
207
        }
208

209
        if successCount > 0 {
2✔
210
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "\nSuccessfully reset %d node(s) for Skyhook %q\n", successCount, opts.skyhookName)
1✔
211
        }
1✔
212

213
        return nil
1✔
214
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc