• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NVIDIA / skyhook / 20150581929

11 Dec 2025 11:16PM UTC coverage: 77.153%. First build
20150581929

Pull #125

github

t0mmylam
feat: Consolidate CLI e2e tests with proper assertions and CI integration
Pull Request #125: feat: Create CLI e2e tests with assertions and CI integration

1085 of 1296 new or added lines in 13 files covered. (83.72%)

5724 of 7419 relevant lines covered (77.15%)

0.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.7
/operator/internal/cli/node/node_reset.go
1
/*
2
 * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
 * SPDX-License-Identifier: Apache-2.0
4
 *
5
 *
6
 * Licensed under the Apache License, Version 2.0 (the "License");
7
 * you may not use this file except in compliance with the License.
8
 * You may obtain a copy of the License at
9
 *
10
 * http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing, software
13
 * distributed under the License is distributed on an "AS IS" BASIS,
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 * See the License for the specific language governing permissions and
16
 * limitations under the License.
17
 */
18

19
package node
20

21
import (
22
        "bufio"
23
        "context"
24
        "encoding/json"
25
        "fmt"
26
        "strings"
27

28
        "github.com/spf13/cobra"
29
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30

31
        "github.com/NVIDIA/skyhook/operator/api/v1alpha1"
32
        "github.com/NVIDIA/skyhook/operator/internal/cli/client"
33
        cliContext "github.com/NVIDIA/skyhook/operator/internal/cli/context"
34
        "github.com/NVIDIA/skyhook/operator/internal/cli/utils"
35
)
36

37
// nodeResetOptions holds the options for the node reset command
38
type nodeResetOptions struct {
39
        skyhookName string
40
        confirm     bool
41
}
42

43
// BindToCmd binds the options to the command flags
44
func (o *nodeResetOptions) BindToCmd(cmd *cobra.Command) {
1✔
45
        cmd.Flags().StringVar(&o.skyhookName, "skyhook", "", "Name of the Skyhook CR (required)")
1✔
46
        cmd.Flags().BoolVarP(&o.confirm, "confirm", "y", false, "Skip confirmation prompt")
1✔
47

1✔
48
        _ = cmd.MarkFlagRequired("skyhook")
1✔
49
}
1✔
50

51
// NewResetCmd creates the node reset command
52
func NewResetCmd(ctx *cliContext.CLIContext) *cobra.Command {
1✔
53
        opts := &nodeResetOptions{}
1✔
54

1✔
55
        cmd := &cobra.Command{
1✔
56
                Use:   "reset <node-name...>",
1✔
57
                Short: "Reset all package state on node(s) for a Skyhook",
1✔
58
                Long: `Reset all package state on node(s) for a specific Skyhook, forcing a complete re-run.
1✔
59

1✔
60
This command removes all Skyhook state from the specified node(s), causing
1✔
61
the operator to re-execute all packages from the beginning.
1✔
62

1✔
63
Unlike 'package rerun' which resets a single package, 'node reset' clears
1✔
64
ALL package state for a Skyhook on the specified node(s).
1✔
65

1✔
66
Node names can be exact matches or regex patterns.`,
1✔
67
                Example: `  # Reset all packages on worker-1 for gpu-init Skyhook
1✔
68
  kubectl skyhook node reset worker-1 --skyhook gpu-init --confirm
1✔
69

1✔
70
  # Reset multiple nodes
1✔
71
  kubectl skyhook node reset worker-1 worker-2 worker-3 --skyhook gpu-init --confirm
1✔
72

1✔
73
  # Reset all nodes matching a pattern
1✔
74
  kubectl skyhook node reset "gpu-node-.*" --skyhook gpu-init --confirm
1✔
75

1✔
76
  # Preview changes without applying (dry-run)
1✔
77
  kubectl skyhook node reset worker-1 --skyhook gpu-init --dry-run`,
1✔
78
                Args: cobra.MinimumNArgs(1),
1✔
79
                RunE: func(cmd *cobra.Command, args []string) error {
1✔
NEW
80
                        if opts.skyhookName == "" {
×
NEW
81
                                return fmt.Errorf("--skyhook flag is required")
×
NEW
82
                        }
×
83

NEW
84
                        clientFactory := client.NewFactory(ctx.GlobalFlags.ConfigFlags)
×
NEW
85
                        kubeClient, err := clientFactory.Client()
×
NEW
86
                        if err != nil {
×
NEW
87
                                return fmt.Errorf("initializing kubernetes client: %w", err)
×
NEW
88
                        }
×
89

NEW
90
                        return runNodeReset(cmd.Context(), cmd, kubeClient, args, opts, ctx)
×
91
                },
92
        }
93

94
        opts.BindToCmd(cmd)
1✔
95

1✔
96
        return cmd
1✔
97
}
98

99
func runNodeReset(ctx context.Context, cmd *cobra.Command, kubeClient *client.Client, nodePatterns []string, opts *nodeResetOptions, cliCtx *cliContext.CLIContext) error {
1✔
100
        // Get all nodes
1✔
101
        nodeList, err := kubeClient.Kubernetes().CoreV1().Nodes().List(ctx, metav1.ListOptions{})
1✔
102
        if err != nil {
1✔
NEW
103
                return fmt.Errorf("listing nodes: %w", err)
×
NEW
104
        }
×
105

106
        // Collect all node names for pattern matching
107
        allNodeNames := make([]string, 0, len(nodeList.Items))
1✔
108
        nodeMap := make(map[string]int) // node name -> index in nodeList.Items
1✔
109
        for i, node := range nodeList.Items {
2✔
110
                allNodeNames = append(allNodeNames, node.Name)
1✔
111
                nodeMap[node.Name] = i
1✔
112
        }
1✔
113

114
        // Match nodes
115
        matchedNodes, err := utils.MatchNodes(nodePatterns, allNodeNames)
1✔
116
        if err != nil {
1✔
NEW
117
                return fmt.Errorf("matching nodes: %w", err)
×
NEW
118
        }
×
119

120
        if len(matchedNodes) == 0 {
2✔
121
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "No nodes matched the specified patterns\n")
1✔
122
                return nil
1✔
123
        }
1✔
124

125
        // Find nodes that have the specified Skyhook annotation
126
        annotationKey := nodeStateAnnotationPrefix + opts.skyhookName
1✔
127
        nodesToReset := make([]string, 0, len(matchedNodes))
1✔
128
        nodeStates := make(map[string]v1alpha1.NodeState)
1✔
129

1✔
130
        for _, nodeName := range matchedNodes {
2✔
131
                idx := nodeMap[nodeName]
1✔
132
                node := &nodeList.Items[idx]
1✔
133

1✔
134
                annotation, ok := node.Annotations[annotationKey]
1✔
135
                if !ok {
2✔
136
                        continue
1✔
137
                }
138

139
                var nodeState v1alpha1.NodeState
1✔
140
                if err := json.Unmarshal([]byte(annotation), &nodeState); err != nil {
1✔
NEW
141
                        continue
×
142
                }
143

144
                nodesToReset = append(nodesToReset, nodeName)
1✔
145
                nodeStates[nodeName] = nodeState
1✔
146
        }
147

148
        if len(nodesToReset) == 0 {
2✔
149
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "No nodes have state for Skyhook %q\n", opts.skyhookName)
1✔
150
                return nil
1✔
151
        }
1✔
152

153
        // Print summary
154
        _, _ = fmt.Fprintf(cmd.OutOrStdout(), "Skyhook: %s\n", opts.skyhookName)
1✔
155
        _, _ = fmt.Fprintf(cmd.OutOrStdout(), "Nodes to reset (%d):\n", len(nodesToReset))
1✔
156
        for _, nodeName := range nodesToReset {
2✔
157
                nodeState := nodeStates[nodeName]
1✔
158
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "  - %s (%d packages)\n", nodeName, len(nodeState))
1✔
159
        }
1✔
160

161
        // Dry run check
162
        if cliCtx.GlobalFlags.DryRun {
2✔
163
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "\n[dry-run] No changes applied\n")
1✔
164
                return nil
1✔
165
        }
1✔
166

167
        // Confirmation
168
        if !opts.confirm {
2✔
169
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "\nThis will remove ALL package state for Skyhook %q on these nodes.\n", opts.skyhookName)
1✔
170
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "All packages will re-run from the beginning.\n")
1✔
171
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "Continue? [y/N]: ")
1✔
172

1✔
173
                reader := bufio.NewReader(cmd.InOrStdin())
1✔
174
                response, err := reader.ReadString('\n')
1✔
175
                if err != nil {
1✔
NEW
176
                        return fmt.Errorf("reading confirmation: %w", err)
×
NEW
177
                }
×
178

179
                response = strings.ToLower(strings.TrimSpace(response))
1✔
180
                if response != "y" && response != "yes" {
2✔
181
                        _, _ = fmt.Fprintf(cmd.OutOrStdout(), "Aborted\n")
1✔
182
                        return nil
1✔
183
                }
1✔
184
        }
185

186
        // Apply changes
187
        var updateErrors []string
1✔
188
        successCount := 0
1✔
189

1✔
190
        for _, nodeName := range nodesToReset {
2✔
191
                idx := nodeMap[nodeName]
1✔
192
                node := nodeList.Items[idx].DeepCopy()
1✔
193

1✔
194
                // Remove the Skyhook annotation
1✔
195
                delete(node.Annotations, annotationKey)
1✔
196

1✔
197
                _, err := kubeClient.Kubernetes().CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{})
1✔
198
                if err != nil {
1✔
NEW
199
                        updateErrors = append(updateErrors, fmt.Sprintf("%s: %v", nodeName, err))
×
NEW
200
                        continue
×
201
                }
202
                successCount++
1✔
203
        }
204

205
        // Print results
206
        if len(updateErrors) > 0 {
1✔
NEW
207
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "\nErrors resetting some nodes:\n")
×
NEW
208
                for _, e := range updateErrors {
×
NEW
209
                        _, _ = fmt.Fprintf(cmd.OutOrStdout(), "  - %s\n", e)
×
NEW
210
                }
×
211
        }
212

213
        if successCount > 0 {
2✔
214
                _, _ = fmt.Fprintf(cmd.OutOrStdout(), "\nSuccessfully reset %d node(s) for Skyhook %q\n", successCount, opts.skyhookName)
1✔
215
        }
1✔
216

217
        return nil
1✔
218
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc