• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zalando / postgres-operator / 11294828962

11 Oct 2024 03:11PM UTC coverage: 44.821% (-0.09%) from 44.914%
11294828962

push

github

web-flow
Add major upgrade prechecks (#2772)

Don't fail major upgrade (don't set annotation) if replica(s) are not
(yet) streaming or replication lag is too high

0 of 34 new or added lines in 1 file covered. (0.0%)

1 existing line in 1 file now uncovered.

6729 of 15013 relevant lines covered (44.82%)

26.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

12.07
/pkg/cluster/majorversionupgrade.go
1
package cluster
2

3
import (
4
        "context"
5
        "encoding/json"
6
        "fmt"
7
        "strings"
8

9
        "github.com/Masterminds/semver"
10
        "github.com/zalando/postgres-operator/pkg/spec"
11
        "github.com/zalando/postgres-operator/pkg/util"
12
        v1 "k8s.io/api/core/v1"
13
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14
        "k8s.io/apimachinery/pkg/types"
15
)
16

17
// VersionMap Map of version numbers
18
var VersionMap = map[string]int{
19
        "12": 120000,
20
        "13": 130000,
21
        "14": 140000,
22
        "15": 150000,
23
        "16": 160000,
24
}
25

26
const (
27
        majorVersionUpgradeSuccessAnnotation = "last-major-upgrade-success"
28
        majorVersionUpgradeFailureAnnotation = "last-major-upgrade-failure"
29
)
30

31
// IsBiggerPostgresVersion Compare two Postgres version numbers
32
func IsBiggerPostgresVersion(old string, new string) bool {
1✔
33
        oldN := VersionMap[old]
1✔
34
        newN := VersionMap[new]
1✔
35
        return newN > oldN
1✔
36
}
1✔
37

38
// GetDesiredMajorVersionAsInt Convert string to comparable integer of PG version
39
func (c *Cluster) GetDesiredMajorVersionAsInt() int {
6✔
40
        return VersionMap[c.GetDesiredMajorVersion()]
6✔
41
}
6✔
42

43
// GetDesiredMajorVersion returns major version to use, incl. potential auto upgrade
44
func (c *Cluster) GetDesiredMajorVersion() string {
7✔
45

7✔
46
        if c.Config.OpConfig.MajorVersionUpgradeMode == "full" {
7✔
47
                // e.g. current is 12, minimal is 12 allowing 12 to 16 clusters, everything below is upgraded
×
48
                if IsBiggerPostgresVersion(c.Spec.PgVersion, c.Config.OpConfig.MinimalMajorVersion) {
×
49
                        c.logger.Infof("overwriting configured major version %s to %s", c.Spec.PgVersion, c.Config.OpConfig.TargetMajorVersion)
×
50
                        return c.Config.OpConfig.TargetMajorVersion
×
51
                }
×
52
        }
53

54
        return c.Spec.PgVersion
7✔
55
}
56

57
func (c *Cluster) isUpgradeAllowedForTeam(owningTeam string) bool {
×
58
        allowedTeams := c.OpConfig.MajorVersionUpgradeTeamAllowList
×
59

×
60
        if len(allowedTeams) == 0 {
×
61
                return false
×
62
        }
×
63

64
        return util.SliceContains(allowedTeams, owningTeam)
×
65
}
66

67
func (c *Cluster) annotatePostgresResource(isSuccess bool) error {
×
68
        annotations := make(map[string]string)
×
69
        currentTime := metav1.Now().Format("2006-01-02T15:04:05Z")
×
70
        if isSuccess {
×
71
                annotations[majorVersionUpgradeSuccessAnnotation] = currentTime
×
72
        } else {
×
73
                annotations[majorVersionUpgradeFailureAnnotation] = currentTime
×
74
        }
×
75
        patchData, err := metaAnnotationsPatch(annotations)
×
76
        if err != nil {
×
77
                c.logger.Errorf("could not form patch for %s postgresql resource: %v", c.Name, err)
×
78
                return err
×
79
        }
×
80
        _, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.MergePatchType, patchData, metav1.PatchOptions{})
×
81
        if err != nil {
×
82
                c.logger.Errorf("failed to patch annotations to postgresql resource: %v", err)
×
83
                return err
×
84
        }
×
85
        return nil
×
86
}
87

88
func (c *Cluster) removeFailuresAnnotation() error {
×
89
        annotationToRemove := []map[string]string{
×
90
                {
×
91
                        "op":   "remove",
×
92
                        "path": fmt.Sprintf("/metadata/annotations/%s", majorVersionUpgradeFailureAnnotation),
×
93
                },
×
94
        }
×
95
        removePatch, err := json.Marshal(annotationToRemove)
×
96
        if err != nil {
×
97
                c.logger.Errorf("could not form removal patch for %s postgresql resource: %v", c.Name, err)
×
98
                return err
×
99
        }
×
100
        _, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.JSONPatchType, removePatch, metav1.PatchOptions{})
×
101
        if err != nil {
×
102
                c.logger.Errorf("failed to remove annotations from postgresql resource: %v", err)
×
103
                return err
×
104
        }
×
105
        return nil
×
106
}
107

108
/*
109
Execute upgrade when mode is set to manual or full or when the owning team is allowed for upgrade (and mode is "off").
110

111
Manual upgrade means, it is triggered by the user via manifest version change
112
Full upgrade means, operator also determines the minimal version used accross all clusters and upgrades violators.
113
*/
114
func (c *Cluster) majorVersionUpgrade() error {
6✔
115

6✔
116
        if c.OpConfig.MajorVersionUpgradeMode == "off" && !c.isUpgradeAllowedForTeam(c.Spec.TeamID) {
6✔
117
                return nil
×
118
        }
×
119

120
        desiredVersion := c.GetDesiredMajorVersionAsInt()
6✔
121

6✔
122
        if c.currentMajorVersion >= desiredVersion {
12✔
123
                if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { // if failure annotation exists, remove it
6✔
124
                        c.removeFailuresAnnotation()
×
125
                        c.logger.Infof("removing failure annotation as the cluster is already up to date")
×
126
                }
×
127
                c.logger.Infof("cluster version up to date. current: %d, min desired: %d", c.currentMajorVersion, desiredVersion)
6✔
128
                return nil
6✔
129
        }
130

131
        if !isInMainternanceWindow(c.Spec.MaintenanceWindows) {
×
132
                c.logger.Infof("skipping major version upgrade, not in maintenance window")
×
133
                return nil
×
134
        }
×
135

136
        pods, err := c.listPods()
×
137
        if err != nil {
×
138
                return err
×
139
        }
×
140

141
        allRunning := true
×
142

×
143
        var masterPod *v1.Pod
×
144

×
145
        for i, pod := range pods {
×
146
                ps, _ := c.patroni.GetMemberData(&pod)
×
147

×
148
                if ps.State != "running" {
×
149
                        allRunning = false
×
150
                        c.logger.Infof("identified non running pod, potentially skipping major version upgrade")
×
151
                }
×
152

153
                if ps.Role == "master" {
×
154
                        masterPod = &pods[i]
×
155
                        c.currentMajorVersion = ps.ServerVersion
×
156
                }
×
157
        }
158

159
        // Recheck version with newest data from Patroni
160
        if c.currentMajorVersion >= desiredVersion {
×
161
                if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { // if failure annotation exists, remove it
×
162
                        c.removeFailuresAnnotation()
×
163
                        c.logger.Infof("removing failure annotation as the cluster is already up to date")
×
164
                }
×
165
                c.logger.Infof("recheck cluster version is already up to date. current: %d, min desired: %d", c.currentMajorVersion, desiredVersion)
×
166
                return nil
×
167
        }
168

169
        if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists {
×
170
                c.logger.Infof("last major upgrade failed, skipping upgrade")
×
171
                return nil
×
172
        }
×
173

NEW
174
        members, err := c.patroni.GetClusterMembers(masterPod)
×
NEW
175
        if err != nil {
×
NEW
176
                c.logger.Error("could not get cluster members data from Patroni API, skipping major version upgrade")
×
NEW
177
                return err
×
NEW
178
        }
×
NEW
179
        patroniData, err := c.patroni.GetMemberData(masterPod)
×
NEW
180
        if err != nil {
×
NEW
181
                c.logger.Error("could not get members data from Patroni API, skipping major version upgrade")
×
NEW
182
                return err
×
NEW
183
        }
×
NEW
184
        patroniVer, err := semver.NewVersion(patroniData.Patroni.Version)
×
NEW
185
        if err != nil {
×
NEW
186
                c.logger.Error("error parsing Patroni version")
×
NEW
187
                patroniVer, _ = semver.NewVersion("3.0.4")
×
NEW
188
        }
×
NEW
189
        verConstraint, _ := semver.NewConstraint(">= 3.0.4")
×
NEW
190
        checkStreaming, _ := verConstraint.Validate(patroniVer)
×
NEW
191

×
NEW
192
        for _, member := range members {
×
NEW
193
                if PostgresRole(member.Role) == Leader {
×
NEW
194
                        continue
×
195
                }
NEW
196
                if checkStreaming && member.State != "streaming" {
×
NEW
197
                        c.logger.Infof("skipping major version upgrade, replica %s is not streaming from primary", member.Name)
×
NEW
198
                        return nil
×
NEW
199
                }
×
NEW
200
                if member.Lag > 16*1024*1024 {
×
NEW
201
                        c.logger.Infof("skipping major version upgrade, replication lag on member %s is too high", member.Name)
×
NEW
202
                        return nil
×
NEW
203
                }
×
204
        }
205

206
        isUpgradeSuccess := true
×
207
        numberOfPods := len(pods)
×
208
        if allRunning && masterPod != nil {
×
209
                c.logger.Infof("healthy cluster ready to upgrade, current: %d desired: %d", c.currentMajorVersion, desiredVersion)
×
210
                if c.currentMajorVersion < desiredVersion {
×
211
                        podName := &spec.NamespacedName{Namespace: masterPod.Namespace, Name: masterPod.Name}
×
212
                        c.logger.Infof("triggering major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods)
×
213
                        c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "starting major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods)
×
214
                        upgradeCommand := fmt.Sprintf("set -o pipefail && /usr/bin/python3 /scripts/inplace_upgrade.py %d 2>&1 | tee last_upgrade.log", numberOfPods)
×
215

×
216
                        c.logger.Debug("checking if the spilo image runs with root or non-root (check for user id=0)")
×
217
                        resultIdCheck, errIdCheck := c.ExecCommand(podName, "/bin/bash", "-c", "/usr/bin/id -u")
×
218
                        if errIdCheck != nil {
×
219
                                c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "checking user id to run upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, errIdCheck)
×
220
                        }
×
221

222
                        resultIdCheck = strings.TrimSuffix(resultIdCheck, "\n")
×
NEW
223
                        var result, scriptErrMsg string
×
224
                        if resultIdCheck != "0" {
×
225
                                c.logger.Infof("user id was identified as: %s, hence default user is non-root already", resultIdCheck)
×
226
                                result, err = c.ExecCommand(podName, "/bin/bash", "-c", upgradeCommand)
×
NEW
227
                                scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log")
×
228
                        } else {
×
229
                                c.logger.Infof("user id was identified as: %s, using su to reach the postgres user", resultIdCheck)
×
230
                                result, err = c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand)
×
NEW
231
                                scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log")
×
232
                        }
×
233
                        if err != nil {
×
234
                                isUpgradeSuccess = false
×
235
                                c.annotatePostgresResource(isUpgradeSuccess)
×
NEW
236
                                c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, scriptErrMsg)
×
NEW
237
                                return fmt.Errorf(scriptErrMsg)
×
UNCOV
238
                        }
×
239

240
                        c.annotatePostgresResource(isUpgradeSuccess)
×
241
                        c.logger.Infof("upgrade action triggered and command completed: %s", result[:100])
×
242
                        c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion)
×
243
                }
244
        }
245

246
        return nil
×
247
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc