• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

kubevirt / containerized-data-importer / #4810

25 Jul 2024 09:15PM UTC coverage: 59.028% (-0.03%) from 59.062%
#4810

push

travis-ci

web-flow
Fix race condition in multi-stage import logic (#3348)

This commit attempts to fix a race condition in the multi-stage import logic when updating the checkpoint annotations for a PVC.

Once a multi-stage import pod succeeds and the current checkpoint is copied, we usually proceed to update the checkpoint and pod annotations to manually trigger a new importer pod creation with the updated data. This operation is racy as the existing logic requires the pod to be succeeded and deleted before updating the pod annotations. This could lead to cases where we update the checkpoint annotations but the pod ones remain succeeded.

Signed-off-by: Alvaro Romero <alromero@redhat.com>

0 of 9 new or added lines in 1 file covered. (0.0%)

6 existing lines in 2 files now uncovered.

16470 of 27902 relevant lines covered (59.03%)

0.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/pkg/controller/common/checkpoint-util.go
1
/*
2
Copyright 2023 The CDI Authors.
3

4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
7

8
        http://www.apache.org/licenses/LICENSE-2.0
9

10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
15
*/
16

17
package common
18

19
import (
20
        "context"
21
        "fmt"
22
        "reflect"
23
        "strconv"
24
        "strings"
25

26
        "github.com/go-logr/logr"
27

28
        corev1 "k8s.io/api/core/v1"
29
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30

31
        "sigs.k8s.io/controller-runtime/pkg/client"
32

33
        cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
34
)
35

36
const (
37
        // ImportPaused provides a const to indicate that a multistage import is waiting for the next stage
38
        ImportPaused = "ImportPaused"
39
        // MessageImportPaused provides a const for a "multistage import paused" message
40
        MessageImportPaused = "Multistage import into PVC %s is paused"
41
)
42

43
// CheckpointRecord is set after comparing the list of checkpoints in the DataVolume/VolumeImportSource
44
// spec with the annotations on the PVC indicating which checkpoints have already been copied.
45
// Return the first checkpoint that does not have this annotation, meaning the first checkpoint that has not yet been copied.
46
type CheckpointRecord struct {
47
        cdiv1.DataVolumeCheckpoint
48
        IsFinal bool
49
}
50

51
// CheckpointArgs is a struct used to store all checkpoint-related arguments to simplify passing them.
52
type CheckpointArgs struct {
53
        Client client.Client
54
        Log    logr.Logger
55
        // Checkpoints is a list of DataVolumeCheckpoints, representing stages in a multistage import.
56
        Checkpoints []cdiv1.DataVolumeCheckpoint
57
        // IsFinal indicates whether the current DataVolumeCheckpoint is the final checkpoint.
58
        IsFinal bool
59
}
60

61
// UpdatesMultistageImportSucceeded handles multi-stage annotations when the importer pod is succeeded
62
func UpdatesMultistageImportSucceeded(pvc *corev1.PersistentVolumeClaim, args *CheckpointArgs) error {
×
63
        if multiStageImport := metav1.HasAnnotation(pvc.ObjectMeta, AnnCurrentCheckpoint); !multiStageImport {
×
64
                return nil
×
65
        }
×
66

67
        // The presence of the current checkpoint annotation indicates it is a stage in a multistage import.
68
        // If all the checkpoints have been copied, then we need to remove the annotations from the PVC.
69
        // Otherwise, we need to change the annotations to advance to the next checkpoint.
70
        currentCheckpoint := pvc.Annotations[AnnCurrentCheckpoint]
×
71
        alreadyCopied := checkpointAlreadyCopied(pvc, currentCheckpoint)
×
72
        finalCheckpoint, _ := strconv.ParseBool(pvc.Annotations[AnnFinalCheckpoint])
×
73

×
74
        if finalCheckpoint && alreadyCopied {
×
75
                // Last checkpoint done, so clean up
×
76
                if err := deleteMultistageImportAnnotations(pvc, args); err != nil {
×
77
                        return err
×
78
                }
×
79
        } else {
×
80
                // Advances annotations to next checkpoint
×
81
                if err := setPvcMultistageImportAnnotations(pvc, args); err != nil {
×
82
                        return err
×
83
                }
×
84
        }
85
        return nil
×
86
}
87

88
// MaybeSetPvcMultiStageAnnotation sets the annotation if pvc needs it, and does not have it yet
89
func MaybeSetPvcMultiStageAnnotation(pvc *corev1.PersistentVolumeClaim, args *CheckpointArgs) error {
×
90
        if pvc.Status.Phase == corev1.ClaimBound {
×
91
                // If a PVC already exists with no multi-stage annotations, check if it
×
92
                // needs them set (if not already finished with an import).
×
93
                multiStageImport := (len(args.Checkpoints) > 0)
×
94
                multiStageAnnotationsSet := metav1.HasAnnotation(pvc.ObjectMeta, AnnCurrentCheckpoint)
×
95
                multiStageAlreadyDone := metav1.HasAnnotation(pvc.ObjectMeta, AnnMultiStageImportDone)
×
96
                if multiStageImport && !multiStageAnnotationsSet && !multiStageAlreadyDone {
×
97
                        err := setPvcMultistageImportAnnotations(pvc, args)
×
98
                        if err != nil {
×
99
                                return err
×
100
                        }
×
101
                }
102
        }
103
        return nil
×
104
}
105

106
// Set the PVC annotations related to multi-stage imports so that they point to the next checkpoint to copy.
107
func setPvcMultistageImportAnnotations(pvc *corev1.PersistentVolumeClaim, args *CheckpointArgs) error {
×
108
        pvcCopy := pvc.DeepCopy()
×
109

×
110
        // Only mark this checkpoint complete if it was completed by the current pod.
×
111
        // This keeps us from skipping over checkpoints when a reconcile fails at a bad time.
×
112
        uuidAlreadyUsed := false
×
113
        for key, value := range pvcCopy.Annotations {
×
114
                if strings.HasPrefix(key, getCheckpointCopiedKey("")) { // Blank checkpoint name to get just the prefix
×
115
                        if value == pvcCopy.Annotations[AnnCurrentPodID] {
×
116
                                uuidAlreadyUsed = true
×
117
                                break
×
118
                        }
119
                }
120
        }
121
        if !uuidAlreadyUsed {
×
122
                // Mark checkpoint complete by saving UID of current pod to a
×
123
                // PVC annotation specific to this checkpoint.
×
124
                currentCheckpoint := pvcCopy.Annotations[AnnCurrentCheckpoint]
×
125
                if currentCheckpoint != "" {
×
126
                        currentPodID := pvcCopy.Annotations[AnnCurrentPodID]
×
127
                        annotation := getCheckpointCopiedKey(currentCheckpoint)
×
128
                        pvcCopy.ObjectMeta.Annotations[annotation] = currentPodID
×
129
                        args.Log.V(1).Info("UUID not already used, marking checkpoint completed by current pod ID.", "checkpoint", currentCheckpoint, "podId", currentPodID)
×
130
                } else {
×
131
                        args.Log.Info("Cannot mark empty checkpoint complete. Check spec for empty checkpoints.")
×
132
                }
×
133
        }
134
        // else: If the UID was already used for another transfer, then we are
135
        // just waiting for a new pod to start up to transfer the next checkpoint.
136

137
        // Set multi-stage PVC annotations so further reconcile loops will create new pods as needed.
138
        checkpoint := GetNextCheckpoint(pvcCopy, args)
×
139
        if checkpoint != nil { // Only move to the next checkpoint if there is a next checkpoint to move to
×
140
                pvcCopy.ObjectMeta.Annotations[AnnCurrentCheckpoint] = checkpoint.Current
×
141
                pvcCopy.ObjectMeta.Annotations[AnnPreviousCheckpoint] = checkpoint.Previous
×
142
                pvcCopy.ObjectMeta.Annotations[AnnFinalCheckpoint] = strconv.FormatBool(checkpoint.IsFinal)
×
143

×
144
                // Check to see if there is a running pod for this PVC. If there are
×
145
                // more checkpoints to copy but the PVC is stopped in Succeeded,
×
146
                // reset the phase to get another pod started for the next checkpoint.
×
147
                podNamespace := pvc.Namespace
×
148

×
149
                phase := pvcCopy.ObjectMeta.Annotations[AnnPodPhase]
×
150
                pod, _ := GetPodFromPvc(args.Client, podNamespace, pvcCopy)
×
NEW
151
                if phase == string(corev1.PodSucceeded) {
×
NEW
152
                        if pod != nil {
×
NEW
153
                                // Once pod succeeds we can safely assume it's either being deleted or
×
NEW
154
                                // intentionally retained for debugging purposes.
×
NEW
155
                                // TODO: This pod should always have a deletion timestamp (GetPodFromPvc ignores multi-stage pods with AnnPodRetainAfterCompletion).
×
NEW
156
                                //       Consider handling the scenario where it lacks one.
×
NEW
157
                                args.Log.V(3).Info(fmt.Sprintf("Pod %s still exists but it's being deleted, continuing with multi-stage import", pod.Name))
×
NEW
158
                        }
×
159
                        // Reset PVC phase so importer will create a new pod
160
                        pvcCopy.ObjectMeta.Annotations[AnnPodPhase] = string(corev1.PodUnknown)
×
161
                        delete(pvcCopy.ObjectMeta.Annotations, AnnImportPod)
×
162
                }
163
                // else: There's a pod already running, no need to try to start a new one.
164
        }
165
        // else: There aren't any checkpoints ready to be copied over.
166

167
        // only update if something has changed
168
        if !reflect.DeepEqual(pvc, pvcCopy) {
×
NEW
169
                args.Log.V(1).Info("Updating PVC with new checkpoint info.", "current checkpoint:", pvcCopy.Annotations[AnnCurrentCheckpoint], "is final:", pvcCopy.Annotations[AnnFinalCheckpoint])
×
170
                return args.Client.Update(context.TODO(), pvcCopy)
×
171
        }
×
172
        return nil
×
173
}
174

175
// Clean up PVC annotations after a multi-stage import.
176
func deleteMultistageImportAnnotations(pvc *corev1.PersistentVolumeClaim, args *CheckpointArgs) error {
×
177
        pvcCopy := pvc.DeepCopy()
×
178
        delete(pvcCopy.Annotations, AnnCurrentCheckpoint)
×
179
        delete(pvcCopy.Annotations, AnnPreviousCheckpoint)
×
180
        delete(pvcCopy.Annotations, AnnFinalCheckpoint)
×
181
        delete(pvcCopy.Annotations, AnnCurrentPodID)
×
182

×
183
        prefix := getCheckpointCopiedKey("")
×
184
        for key := range pvcCopy.Annotations {
×
185
                if strings.HasPrefix(key, prefix) {
×
186
                        delete(pvcCopy.Annotations, key)
×
187
                }
×
188
        }
189

190
        pvcCopy.ObjectMeta.Annotations[AnnMultiStageImportDone] = "true"
×
191

×
192
        // only update if something has changed
×
193
        if !reflect.DeepEqual(pvc, pvcCopy) {
×
194
                return args.Client.Update(context.TODO(), pvcCopy)
×
195
        }
×
196
        return nil
×
197
}
198

199
// Single place to hold the scheme for annotations that indicate a checkpoint
200
// has already been copied. Currently storage.checkpoint.copied.[checkpoint] = ID,
201
// where ID is the UID of the pod that successfully transferred that checkpoint.
202
func getCheckpointCopiedKey(checkpoint string) string {
×
203
        return AnnCheckpointsCopied + "." + checkpoint
×
204
}
×
205

206
// Find out if this checkpoint has already been copied by looking for an annotation
207
// like storage.checkpoint.copied.[checkpoint]. If it exists, then this checkpoint
208
// was already copied.
209
func checkpointAlreadyCopied(pvc *corev1.PersistentVolumeClaim, checkpoint string) bool {
×
210
        annotation := getCheckpointCopiedKey(checkpoint)
×
211
        return metav1.HasAnnotation(pvc.ObjectMeta, annotation)
×
212
}
×
213

214
// GetNextCheckpoint returns the appropriate checkpoint according to multistage annotations
215
func GetNextCheckpoint(pvc *corev1.PersistentVolumeClaim, args *CheckpointArgs) *CheckpointRecord {
×
216
        numCheckpoints := len(args.Checkpoints)
×
217
        if numCheckpoints < 1 {
×
218
                return nil
×
219
        }
×
220

221
        // If there are no annotations, get the first checkpoint from the spec
222
        if pvc.ObjectMeta.Annotations[AnnCurrentCheckpoint] == "" {
×
223
                checkpoint := &CheckpointRecord{
×
224
                        cdiv1.DataVolumeCheckpoint{
×
225
                                Current:  args.Checkpoints[0].Current,
×
226
                                Previous: args.Checkpoints[0].Previous,
×
227
                        },
×
228
                        (numCheckpoints == 1) && args.IsFinal,
×
229
                }
×
230
                return checkpoint
×
231
        }
×
232

233
        // If there are annotations, keep checking the spec checkpoint list for an existing "copied.X" annotation until the first one not found
234
        for count, specCheckpoint := range args.Checkpoints {
×
235
                if specCheckpoint.Current == "" {
×
236
                        args.Log.Info(fmt.Sprintf("DataVolume spec has a blank 'current' entry in checkpoint %d", count))
×
237
                        continue
×
238
                }
239
                if !checkpointAlreadyCopied(pvc, specCheckpoint.Current) {
×
240
                        checkpoint := &CheckpointRecord{
×
241
                                cdiv1.DataVolumeCheckpoint{
×
242
                                        Current:  specCheckpoint.Current,
×
243
                                        Previous: specCheckpoint.Previous,
×
244
                                },
×
245
                                (numCheckpoints == (count + 1)) && args.IsFinal,
×
246
                        }
×
247
                        return checkpoint
×
248
                }
×
249
        }
250

251
        return nil
×
252
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc