• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

kubevirt / hyperconverged-cluster-operator / 11310572661

13 Oct 2024 02:08AM UTC coverage: 72.006% (-0.2%) from 72.24%
11310572661

Pull #3106

github

dasionov
pkg/monitoring/metrics: add alert for VMs using outdated machine type

- Introduce new alert for VMs using an outdated machine type.

- Machine types are considered outdated if they are no longer compatible
  due to changes in the virt-launcher OS version. These VMs must be
  updated with supported machine types to ensure compatibility and avoid
  potential issues.

- Add a functional test to verify the alert is triggered when VMs with
  outdated machine types are detected.

Signed-off-by: Daniel Sionov <dsionov@redhat.com>
Pull Request #3106: pkg/monitoring/metrics: add new alert for vms using outdated machine type

10 of 40 new or added lines in 1 file covered. (25.0%)

5970 of 8291 relevant lines covered (72.01%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.87
/pkg/monitoring/rules/alerts/operator_alerts.go
1
package alerts
2

3
import (
4
        "fmt"
5
        "os"
6
        "strconv"
7
        "strings"
8

9
        promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
10
        "k8s.io/apimachinery/pkg/util/intstr"
11
        "k8s.io/utils/ptr"
12
        logf "sigs.k8s.io/controller-runtime/pkg/log"
13
)
14

15
const (
16
        outOfBandUpdateAlert              = "KubeVirtCRModified"
17
        unsafeModificationAlert           = "UnsupportedHCOModification"
18
        installationNotCompletedAlert     = "HCOInstallationIncomplete"
19
        singleStackIPv6Alert              = "SingleStackIPv6Unsupported"
20
        MisconfiguredDeschedulerAlert     = "HCOMisconfiguredDescheduler"
21
        VMOutdatedMachineTypeAlert        = "VMHasOutdatedMachineType"
22
        minSupportedVirtLauncherOSVersion = 8
23
        severityAlertLabelKey             = "severity"
24
        healthImpactAlertLabelKey         = "operator_health_impact"
25
)
26

27
func operatorAlerts() []promv1.Rule {
1✔
28
        rules := []promv1.Rule{
1✔
29
                {
1✔
30
                        Alert: outOfBandUpdateAlert,
1✔
31
                        Expr:  intstr.FromString("sum by(component_name) ((round(increase(kubevirt_hco_out_of_band_modifications_total[10m]))>0 and kubevirt_hco_out_of_band_modifications_total offset 10m) or (kubevirt_hco_out_of_band_modifications_total != 0 unless kubevirt_hco_out_of_band_modifications_total offset 10m))"),
1✔
32
                        Annotations: map[string]string{
1✔
33
                                "description": "Out-of-band modification for {{ $labels.component_name }}.",
1✔
34
                                "summary":     "{{ $value }} out-of-band CR modifications were detected in the last 10 minutes.",
1✔
35
                        },
1✔
36
                        Labels: map[string]string{
1✔
37
                                severityAlertLabelKey:     "warning",
1✔
38
                                healthImpactAlertLabelKey: "warning",
1✔
39
                        },
1✔
40
                },
1✔
41
                {
1✔
42
                        Alert: unsafeModificationAlert,
1✔
43
                        Expr:  intstr.FromString("sum by(annotation_name, namespace) ((kubevirt_hco_unsafe_modifications)>0)"),
1✔
44
                        Annotations: map[string]string{
1✔
45
                                "description": "unsafe modification for the {{ $labels.annotation_name }} annotation in the HyperConverged resource.",
1✔
46
                                "summary":     "{{ $value }} unsafe modifications were detected in the HyperConverged resource.",
1✔
47
                        },
1✔
48
                        Labels: map[string]string{
1✔
49
                                severityAlertLabelKey:     "info",
1✔
50
                                healthImpactAlertLabelKey: "none",
1✔
51
                        },
1✔
52
                },
1✔
53
                {
1✔
54
                        Alert: installationNotCompletedAlert,
1✔
55
                        Expr:  intstr.FromString("kubevirt_hco_hyperconverged_cr_exists == 0"),
1✔
56
                        For:   ptr.To(promv1.Duration("1h")),
1✔
57
                        Annotations: map[string]string{
1✔
58
                                "description": "the installation was not completed; the HyperConverged custom resource is missing. In order to complete the installation of the Hyperconverged Cluster Operator you should create the HyperConverged custom resource.",
1✔
59
                                "summary":     "the installation was not completed; to complete the installation, create a HyperConverged custom resource.",
1✔
60
                        },
1✔
61
                        Labels: map[string]string{
1✔
62
                                severityAlertLabelKey:     "info",
1✔
63
                                healthImpactAlertLabelKey: "critical",
1✔
64
                        },
1✔
65
                },
1✔
66
                {
1✔
67
                        Alert: singleStackIPv6Alert,
1✔
68
                        Expr:  intstr.FromString("kubevirt_hco_single_stack_ipv6 == 1"),
1✔
69
                        Annotations: map[string]string{
1✔
70
                                "description": "KubeVirt Hyperconverged is not supported on a single stack IPv6 cluster",
1✔
71
                                "summary":     "KubeVirt Hyperconverged is not supported on a single stack IPv6 cluster",
1✔
72
                        },
1✔
73
                        Labels: map[string]string{
1✔
74
                                severityAlertLabelKey:     "critical",
1✔
75
                                healthImpactAlertLabelKey: "critical",
1✔
76
                        },
1✔
77
                },
1✔
78
                {
1✔
79
                        Alert: MisconfiguredDeschedulerAlert,
1✔
80
                        Expr:  intstr.FromString("kubevirt_hco_misconfigured_descheduler == 1"),
1✔
81
                        Annotations: map[string]string{
1✔
82
                                "description": "Kube Descheduler is not correctly configured for KubeVirt",
1✔
83
                                "summary":     "Kube Descheduler is not correctly configured for KubeVirt",
1✔
84
                        },
1✔
85
                        Labels: map[string]string{
1✔
86
                                severityAlertLabelKey:     "critical",
1✔
87
                                healthImpactAlertLabelKey: "critical",
1✔
88
                        },
1✔
89
                },
1✔
90
        }
1✔
91

1✔
92
        if rule, created := createVMOutdatedMachineTypeRule(); created {
1✔
NEW
93
                rules = append(rules, rule)
×
NEW
94
        }
×
95

96
        return rules
1✔
97
}
98

99
func createVMOutdatedMachineTypeRule() (promv1.Rule, bool) {
1✔
100
        logger := logf.Log.WithName("operator-alerts")
1✔
101
        rhelVersion, exists := os.LookupEnv("VIRT_LAUNCHER_OS_VERSION")
1✔
102
        if !exists {
2✔
103
                return promv1.Rule{}, false
1✔
104
        }
1✔
105

NEW
106
        virtLauncherOSVersion, err := strconv.Atoi(rhelVersion)
×
NEW
107
        if err != nil {
×
NEW
108
                logger.Error(err, "Error parsing VIRT_LAUNCHER_OS_VERSION")
×
NEW
109
                return promv1.Rule{}, false
×
NEW
110
        }
×
111

NEW
112
        if virtLauncherOSVersion > minSupportedVirtLauncherOSVersion {
×
NEW
113
                rule := promv1.Rule{
×
NEW
114
                        Alert: VMOutdatedMachineTypeAlert,
×
NEW
115
                        Expr:  intstr.FromString(getMachineTypeVersionExpr(minSupportedVirtLauncherOSVersion, virtLauncherOSVersion)),
×
NEW
116
                        Annotations: map[string]string{
×
NEW
117
                                "description": "There are virtual machines using an outdated machine type that need to be patched.",
×
NEW
118
                                "summary":     "{{ $value }} virtual machines are using an outdated machine type.",
×
NEW
119
                        },
×
NEW
120
                        Labels: map[string]string{
×
NEW
121
                                severityAlertLabelKey:     "warning",
×
NEW
122
                                healthImpactAlertLabelKey: "none",
×
NEW
123
                        },
×
NEW
124
                }
×
NEW
125
                return rule, true
×
NEW
126
        }
×
127

NEW
128
        return promv1.Rule{}, false
×
129
}
130

NEW
131
func getMachineTypeVersionExpr(mn, mx int) string {
×
NEW
132
        var versions []string
×
NEW
133
        for v := mn; v < mx; v++ {
×
NEW
134
                versions = append(versions, fmt.Sprintf(".*rhel%d.*", v))
×
NEW
135
        }
×
NEW
136
        vers := strings.Join(versions, "|")
×
NEW
137
        return fmt.Sprintf(`count(kubevirt_vmi_info{guest_os_machine=~%q} and on(name, namespace) kubevirt_vm_info{status=~"Running|Stopped"}) > 0`, vers)
×
138
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc