• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tensorchord / openmodelz / 6448015943

08 Oct 2023 01:47PM UTC coverage: 27.12% (-0.03%) from 27.15%
6448015943

push

github

web-flow
chore: Fix can not get metrics from BYOC deployment (#185)

Signed-off-by: xieydd <xieydd@gmail.com>

4 of 4 new or added lines in 2 files covered. (100.0%)

982 of 3621 relevant lines covered (27.12%)

1.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/agent/pkg/server/server_run.go
1
package server
2

3
import (
4
        "context"
5
        "errors"
6
        "fmt"
7
        "net/http"
8
        "os"
9
        "os/signal"
10
        "syscall"
11
        "time"
12

13
        "github.com/sirupsen/logrus"
14
        "github.com/tensorchord/openmodelz/agent/api/types"
15
        "k8s.io/apimachinery/pkg/util/wait"
16
)
17

18
func (s *Server) Run() error {
×
19
        srv := &http.Server{
×
20
                Addr:         fmt.Sprintf(":%d", s.config.Server.ServerPort),
×
21
                Handler:      s.router,
×
22
                WriteTimeout: s.config.Server.WriteTimeout,
×
23
                ReadTimeout:  s.config.Server.ReadTimeout,
×
24
        }
×
25

×
26
        go func() {
×
27
                if err := srv.ListenAndServe(); err != nil &&
×
28
                        !errors.Is(err, http.ErrServerClosed) {
×
29
                        logrus.Errorf("listen on port %d error: %v", s.config.Server.ServerPort, err)
×
30
                }
×
31
        }()
32

33
        metricsSrv := &http.Server{
×
34
                Addr:         fmt.Sprintf(":%d", s.config.Metrics.ServerPort),
×
35
                Handler:      s.metricsRouter,
×
36
                ReadTimeout:  s.config.Metrics.PollingInterval,
×
37
                WriteTimeout: s.config.Metrics.PollingInterval,
×
38
        }
×
39
        go func() {
×
40
                if err := metricsSrv.ListenAndServe(); err != nil &&
×
41
                        !errors.Is(err, http.ErrServerClosed) {
×
42
                        logrus.Errorf("listen on port %d error: %v",
×
43
                                s.config.Metrics.ServerPort, err)
×
44
                }
×
45
        }()
46

47
        logrus.WithField("port", s.config.Server.ServerPort).
×
48
                Info("server is running...")
×
49
        logrus.WithField("metrics-port", s.config.Metrics.ServerPort).
×
50
                Info("metrics server is running...")
×
51

×
52
        if s.config.ModelZCloud.Enabled {
×
53
                // check apiserver is ready
×
54
                apiServerReady := make(chan struct{})
×
55
                go func() {
×
56
                        if err := s.modelzCloudClient.WaitForAPIServerReady(); err != nil {
×
57
                                logrus.Fatalf("failed to wait for apiserver ready: %v", err)
×
58
                        }
×
59
                        close(apiServerReady)
×
60
                }()
61
                // websocket
62
                // build websocket
63
                go s.connect(apiServerReady)
×
64

×
65
                // heartbeat with apiserver
×
66
                go wait.UntilWithContext(context.Background(), func(ctx context.Context) {
×
67
                        cluster := types.ManagedCluster{
×
68
                                Name:      s.config.ModelZCloud.Name,
×
69
                                ID:        s.config.ModelZCloud.ID,
×
70
                                Status:    types.ClusterStatusActive,
×
71
                                UpdatedAt: time.Now().UTC(),
×
72
                                TokenID:   s.config.ModelZCloud.TokenID,
×
73
                                Region:    s.config.ModelZCloud.Region,
×
74
                                PrometheusURL: fmt.Sprintf("%s:%d", s.config.Metrics.PrometheusHost,
×
75
                                        s.config.Metrics.PrometheusPort),
×
76
                        }
×
77
                        err := s.runtime.GetClusterInfo(&cluster)
×
78
                        if err != nil {
×
79
                                logrus.Errorf("failed to get managed cluster info: %v", err)
×
80
                        }
×
81

82
                        err = s.modelzCloudClient.UpdateAgentStatus(ctx, apiServerReady, s.config.ModelZCloud.AgentToken, cluster)
×
83
                        if err != nil {
×
84
                                logrus.Errorf("failed to update agent status: %v", err)
×
85
                        }
×
86
                        logrus.Debugf("update agent status: %v", cluster)
×
87
                }, s.config.ModelZCloud.HeartbeatInterval)
88

89
                go wait.UntilWithContext(context.Background(), func(ctx context.Context) {
×
90
                        apikeys, err := s.modelzCloudClient.GetAPIKeys(ctx, apiServerReady, s.config.ModelZCloud.AgentToken, s.config.ModelZCloud.ID)
×
91
                        if err != nil {
×
92
                                logrus.Errorf("failed to get apikeys: %v", err)
×
93
                        }
×
94

95
                        s.config.ModelZCloud.APIKeys = apikeys
×
96
                        logrus.Debugf("update apikeys")
×
97
                }, s.config.ModelZCloud.HeartbeatInterval) // default 1min update, TODO(xieydd) make it configurable
98

99
                go wait.UntilWithContext(context.Background(), func(ctx context.Context) {
×
100
                        namespaces, err := s.modelzCloudClient.GetNamespaces(ctx, apiServerReady, s.config.ModelZCloud.AgentToken, s.config.ModelZCloud.ID)
×
101
                        if err != nil {
×
102
                                logrus.Errorf("failed to get namespaces: %v", err)
×
103
                        }
×
104

105
                        for _, ns := range namespaces.Items {
×
106
                                if ContainString(ns, s.config.ModelZCloud.UserNamespaces) {
×
107
                                        continue
×
108
                                }
109
                                err = s.runtime.NamespaceCreate(ctx, ns)
×
110
                                if err != nil {
×
111
                                        logrus.Errorf("failed to create namespace %s: %v", ns, err)
×
112
                                        continue
×
113
                                }
114
                                s.config.ModelZCloud.UserNamespaces = append(s.config.ModelZCloud.UserNamespaces, ns)
×
115
                                logrus.Debugf("update namespaces")
×
116
                        }
117
                }, s.config.ModelZCloud.HeartbeatInterval) // default 1h update, make it configurable
118
        }
119

120
        quit := make(chan os.Signal, 1)
×
121
        signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
×
122
        <-quit
×
123
        logrus.Info("shutdown server")
×
124

×
125
        ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
×
126
        defer cancel()
×
127

×
128
        return srv.Shutdown(ctx)
×
129
}
130

131
func ContainString(target string, strs []string) bool {
×
132
        for _, str := range strs {
×
133
                if str == target {
×
134
                        return true
×
135
                }
×
136
        }
137
        return false
×
138
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc