• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #5074

17 May 2026 01:15AM UTC coverage: 73.309% (-0.008%) from 73.317%
#5074

push

travis-ci

web-flow
feat (TDgpt): Dynamic Model Synchronization Enhancements (#35344)

* refactor: do some internal refactor.

* fix: fix multiprocess sync issue.

* feat: add dynamic anomaly detection and forecasting services

* fix: log error message for undeploying model in exception handling

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: handle undeploy when model exists only on disk

Agent-Logs-Url: https://github.com/taosdata/TDengine/sessions/286aafa0-c3ce-4c27-b803-2707571e9dc1

Co-authored-by: hjxilinx <8252296+hjxilinx@users.noreply.github.com>

* fix: guard dynamic registry concurrent access

Agent-Logs-Url: https://github.com/taosdata/TDengine/sessions/5e4db858-6458-40f4-ac28-d1b1b7f97c18

Co-authored-by: hjxilinx <8252296+hjxilinx@users.noreply.github.com>

* fix: tighten service list locking scope

Agent-Logs-Url: https://github.com/taosdata/TDengine/sessions/5e4db858-6458-40f4-ac28-d1b1b7f97c18

Co-authored-by: hjxilinx <8252296+hjxilinx@users.noreply.github.com>

* fix: restore prophet support and update tests per review feedback

Agent-Logs-Url: https://github.com/taosdata/TDengine/sessions/92298ae1-7da6-4d07-b20e-101c7cd0b26b

Co-authored-by: hjxilinx <8252296+hjxilinx@users.noreply.github.com>

* fix: improve test name and move copy inside lock scope

Agent-Logs-Url: https://github.com/taosdata/TDengine/sessions/92298ae1-7da6-4d07-b20e-101c7cd0b26b

Co-authored-by: hjxilinx <8252296+hjxilinx@users.noreply.github.com>

* Potential fix for pull request finding

Co-au... (continued)

281356 of 383795 relevant lines covered (73.31%)

138311536.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.17
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndBnode.h"
21
#include "mndCluster.h"
22
#include "mndCompact.h"
23
#include "mndCompactDetail.h"
24
#include "mndConfig.h"
25
#include "mndConsumer.h"
26
#include "mndDb.h"
27
#include "mndDnode.h"
28
#include "mndEncryptAlgr.h"
29
#include "mndFunc.h"
30
#include "mndGrant.h"
31
#include "mndIndex.h"
32
#include "mndInfoSchema.h"
33
#include "mndInstance.h"
34
#include "mndMnode.h"
35
#include "mndMount.h"
36
#include "mndPerfSchema.h"
37
#include "mndPrivilege.h"
38
#include "mndProfile.h"
39
#include "mndQnode.h"
40
#include "mndQuery.h"
41
#include "mndRetention.h"
42
#include "mndRetentionDetail.h"
43
#include "mndRole.h"
44
#include "mndRsma.h"
45
#include "mndScan.h"
46
#include "mndScanDetail.h"
47
#include "mndSecurityPolicy.h"
48
#include "mndShow.h"
49
#include "mndSma.h"
50
#include "mndSnode.h"
51
#include "mndSsMigrate.h"
52
#include "mndStb.h"
53
#include "mndStream.h"
54
#include "mndSubscribe.h"
55
#include "mndSync.h"
56
#include "mndTelem.h"
57
#include "mndToken.h"
58
#include "mndTopic.h"
59
#include "mndTrans.h"
60
#include "mndUser.h"
61
#include "mndVgroup.h"
62
#include "mndView.h"
63
#include "mndXnode.h"
64
#include "tencrypt.h"
65

66
#define UPGRADE_INTERVAL 10
67
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
4,976,978✔
68
  int32_t code = 0;
4,976,978✔
69
  (void)taosThreadRwlockRdlock(&pMnode->lock);
4,976,978✔
70
  if (pMnode->stopped) {
4,976,978✔
71
    code = TSDB_CODE_APP_IS_STOPPING;
×
72
  } else if (!mndIsLeader(pMnode)) {
4,976,978✔
73
    code = 1;
×
74
  } else {
75
#if 1
76
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
4,976,978✔
77
#else
78
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
79
    mTrace("mnode rpc is acquired, ref:%d", ref);
80
#endif
81
  }
82
  (void)taosThreadRwlockUnlock(&pMnode->lock);
4,976,978✔
83
  TAOS_RETURN(code);
4,976,978✔
84
}
85

86
static inline void mndReleaseRpc(SMnode *pMnode) {
306,858,011✔
87
  (void)taosThreadRwlockRdlock(&pMnode->lock);
306,858,011✔
88
#if 1
89
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
306,858,281✔
90
#else
91
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
92
  mTrace("mnode rpc is released, ref:%d", ref);
93
#endif
94
  (void)taosThreadRwlockUnlock(&pMnode->lock);
306,857,836✔
95
}
306,858,319✔
96

97
static void *mndBuildTimerMsg(int32_t *pContLen) {
66,341,893✔
98
  terrno = 0;
66,341,893✔
99
  SMTimerReq timerReq = {0};
66,341,893✔
100

101
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
66,341,893✔
102
  if (contLen <= 0) return NULL;
66,341,893✔
103
  void *pReq = rpcMallocCont(contLen);
66,341,893✔
104
  if (pReq == NULL) return NULL;
66,341,893✔
105

106
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
66,341,893✔
107
    mError("failed to serialize timer msg since %s", terrstr());
×
108
  }
109
  *pContLen = contLen;
66,340,909✔
110
  return pReq;
66,341,097✔
111
}
112

113
static void mndPullupTrans(SMnode *pMnode) {
13,308,266✔
114
  mTrace("pullup trans msg");
13,308,266✔
115
  int32_t contLen = 0;
13,308,266✔
116
  void   *pReq = mndBuildTimerMsg(&contLen);
13,308,266✔
117
  if (pReq != NULL) {
13,308,266✔
118
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
13,308,266✔
119
    // TODO check return value
120
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
13,308,266✔
121
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
122
    }
123
  }
124
}
13,308,266✔
125

126
static void mndPullupUpgradeSdb(SMnode *pMnode) {
366,195✔
127
  if (sdbIsUpgraded(pMnode->pSdb)) {
366,195✔
128
    pMnode->version = TSDB_MNODE_BUILTIN_DATA_VERSION;
325,277✔
129
    return;
325,277✔
130
  }
131

132
  if (pMnode->version < TSDB_MNODE_BUILTIN_DATA_VERSION && mndIsLeader(pMnode)) {
40,918✔
133
    if (sdbUpgrade(pMnode->pSdb, pMnode->version) != 0) {
40,918✔
134
      mError("failed to upgrade sdb while start mnode");
×
135
      return;
×
136
    }
137
    if (sdbIsUpgraded(pMnode->pSdb)) {
40,918✔
138
      pMnode->version = TSDB_MNODE_BUILTIN_DATA_VERSION;
100✔
139
    }
140
  }
141
}
142

143
static void mndPullupCompacts(SMnode *pMnode) {
2,669,060✔
144
  mTrace("pullup compact timer msg");
2,669,060✔
145
  int32_t contLen = 0;
2,669,060✔
146
  void   *pReq = mndBuildTimerMsg(&contLen);
2,669,060✔
147
  if (pReq != NULL) {
2,669,060✔
148
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
2,669,060✔
149
    // TODO check return value
150
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
2,669,060✔
151
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
152
    }
153
  }
154
}
2,669,060✔
155

156
static void mndPullupScans(SMnode *pMnode) {
2,669,060✔
157
  mTrace("pullup scan timer msg");
2,669,060✔
158
  int32_t contLen = 0;
2,669,060✔
159
  void   *pReq = mndBuildTimerMsg(&contLen);
2,669,060✔
160
  if (pReq != NULL) {
2,669,060✔
161
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_SCAN_TIMER, .pCont = pReq, .contLen = contLen};
2,669,060✔
162
    // TODO check return value
163
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
2,669,060✔
164
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
165
    }
166
  }
167
}
2,669,060✔
168

169
static void mndPullupInstances(SMnode *pMnode) {
5,329,670✔
170
  mTrace("pullup instance timer msg");
5,329,670✔
171
  int32_t contLen = 0;
5,329,670✔
172
  void   *pReq = mndBuildTimerMsg(&contLen);
5,329,670✔
173
  if (pReq != NULL) {
5,329,670✔
174
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_INSTANCE_TIMER, .pCont = pReq, .contLen = contLen};
5,329,670✔
175
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,329,670✔
176
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
177
    }
178
  }
179
}
5,329,670✔
180

181
static void mndPullupTtl(SMnode *pMnode) {
2,792,660✔
182
  mTrace("pullup ttl");
2,792,660✔
183
  int32_t contLen = 0;
2,792,660✔
184
  void   *pReq = mndBuildTimerMsg(&contLen);
2,792,660✔
185
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
2,792,660✔
186
  // TODO check return value
187
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
2,792,660✔
188
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
189
  }
190
}
2,792,660✔
191

192
static void mndPullupTrimDb(SMnode *pMnode) {
5,723✔
193
  mTrace("pullup trim");
5,723✔
194
  int32_t contLen = 0;
5,723✔
195
  void   *pReq = mndBuildTimerMsg(&contLen);
5,723✔
196
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
5,723✔
197
  // TODO check return value
198
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,723✔
199
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
200
  }
201
}
5,723✔
202

203
static void mndPullupQueryTrimDb(SMnode *pMnode) {
2,885,380✔
204
  mTrace("pullup trim query");
2,885,380✔
205
  int32_t contLen = 0;
2,885,380✔
206
  void   *pReq = mndBuildTimerMsg(&contLen);
2,885,380✔
207
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_QUERY_TRIM_TIMER, .pCont = pReq, .contLen = contLen};
2,885,380✔
208
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
2,885,380✔
209
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
210
  }
211
}
2,885,380✔
212

213
static void mndPullupSsMigrateDb(SMnode *pMnode) {
×
214
  if (grantCheck(TSDB_GRANT_SHARED_STORAGE) != TSDB_CODE_SUCCESS) {
×
215
    return;
×
216
  }
217

218
  mTrace("pullup ssmigrate db");
×
219
  int32_t contLen = 0;
×
220
  void   *pReq = mndBuildTimerMsg(&contLen);
×
221
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_SSMIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
222
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
223
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
224
  }
225
}
226

227
static void mndPullupUpdateSsMigrateProgress(SMnode *pMnode) {
173,225✔
228
  mTrace("pullup update ssmigrate progress");
173,225✔
229
  int32_t contLen = 0;
173,225✔
230
  void   *pReq = mndBuildTimerMsg(&contLen);
173,225✔
231
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPDATE_SSMIGRATE_PROGRESS_TIMER, .pCont = pReq, .contLen = contLen};
173,225✔
232
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
173,225✔
233
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
234
  }
235
}
173,225✔
236

237
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
12,803,618✔
238
  mTrace("pullup arb hb");
12,803,618✔
239
  int32_t contLen = 0;
12,803,618✔
240
  void   *pReq = mndBuildTimerMsg(&contLen);
12,803,618✔
241
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
12,803,618✔
242
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
12,803,618✔
243
}
244

245
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
8,483,028✔
246
  mTrace("pullup arb sync");
8,483,028✔
247
  int32_t contLen = 0;
8,483,028✔
248
  void   *pReq = mndBuildTimerMsg(&contLen);
8,483,028✔
249
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
8,483,028✔
250
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
8,483,028✔
251
}
252

253
static void mndCalMqRebalance(SMnode *pMnode) {
13,308,070✔
254
  int32_t contLen = 0;
13,308,070✔
255
  void   *pReq = mndBuildTimerMsg(&contLen);
13,308,070✔
256
  if (pReq != NULL) {
13,308,070✔
257
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
13,308,070✔
258
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
13,308,070✔
259
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
260
    }
261
  }
262
}
13,308,070✔
263

264
static void mndPullupTelem(SMnode *pMnode) {
246✔
265
  mTrace("pullup telem msg");
246✔
266
  int32_t contLen = 0;
246✔
267
  void   *pReq = mndBuildTimerMsg(&contLen);
246✔
268
  if (pReq != NULL) {
246✔
269
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
246✔
270
    // TODO check return value
271
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
246✔
272
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
273
    }
274
  }
275
}
246✔
276

277
static void mndPullupGrant(SMnode *pMnode) {
1,832,478✔
278
  mTrace("pullup grant msg");
1,832,478✔
279
  int32_t contLen = 0;
1,832,478✔
280
  void   *pReq = mndBuildTimerMsg(&contLen);
1,832,478✔
281
  if (pReq != NULL) {
1,832,478✔
282
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
1,832,478✔
283
                      .pCont = pReq,
284
                      .contLen = contLen,
285
                      .info.notFreeAhandle = 1,
286
                      .info.ahandle = 0};
287
    // TODO check return value
288
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1,832,478✔
289
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
290
    }
291
  }
292
}
1,832,478✔
293

294
static void mndPullupAuth(SMnode *pMnode) {
×
295
  mTrace("pullup auth msg");
×
296
  int32_t contLen = 0;
×
297
  void   *pReq = mndBuildTimerMsg(&contLen);
×
298
  if (pReq != NULL) {
×
299
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_AUTH_HB_TIMER, .pCont = pReq, .contLen = contLen, .info.notFreeAhandle = 1, .info.ahandle = 0};
×
300
    // TODO check return value
301
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
302
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
303
    }
304
  }
305
}
×
306

307
static void mndPullupCls(SMnode *pMnode) {
×
308
  mTrace("pullup cls msg");
×
309
  int32_t contLen = 0;
×
310
  void   *pReq = mndBuildTimerMsg(&contLen);
×
311
  if (pReq != NULL) {
×
312
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_CLS_HB_TIMER, .pCont = pReq, .contLen = contLen, .info.notFreeAhandle = 1, .info.ahandle = 0};
×
313
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
314
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
315
    }
316
  }
317
}
×
318

319
static void mndIncreaseUpTime(SMnode *pMnode) {
81,409✔
320
  mTrace("increate uptime");
81,409✔
321
  int32_t contLen = 0;
81,409✔
322
  void   *pReq = mndBuildTimerMsg(&contLen);
81,409✔
323
  if (pReq != NULL) {
81,409✔
324
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
81,409✔
325
                      .pCont = pReq,
326
                      .contLen = contLen,
327
                      .info.notFreeAhandle = 1,
328
                      .info.ahandle = 0};
329
    // TODO check return value
330
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
81,409✔
331
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
332
    }
333
  }
334
}
81,409✔
335

336
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
106,754✔
337
  SSdb *pSdb = pMnode->pSdb;
106,754✔
338

339
  void *pIter = NULL;
106,754✔
340
  while (1) {
486,376✔
341
    SVgObj *pVgroup = NULL;
593,130✔
342
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
593,130✔
343
    if (pIter == NULL) break;
593,130✔
344

345
    bool stateChanged = false;
486,376✔
346
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
1,244,644✔
347
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
965,360✔
348
      if (pGid->dnodeId == dnodeId) {
965,360✔
349
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
207,092✔
350
          mInfo(
70,675✔
351
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:offline "
352
              "restored:0 "
353
              "canRead:0",
354
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
355
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
70,675✔
356
          pGid->syncRestore = 0;
70,675✔
357
          pGid->syncCanRead = 0;
70,675✔
358
          pGid->startTimeMs = 0;
70,675✔
359
          pGid->learnerProgress = 0;
70,675✔
360
          pGid->snapSeq = -1;
70,675✔
361
          stateChanged = true;
70,675✔
362
        }
363
        break;
207,092✔
364
      }
365
    }
366

367
    if (stateChanged) {
486,376✔
368
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
70,675✔
369
      if (pDb != NULL && pDb->stateTs != curMs) {
70,675✔
370
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
34,508✔
371
              curMs);
372
        pDb->stateTs = curMs;
34,508✔
373
      }
374
      mndReleaseDb(pMnode, pDb);
70,675✔
375
    }
376

377
    sdbRelease(pSdb, pVgroup);
486,376✔
378
  }
379
}
106,754✔
380

381
static void mndCheckDnodeOffline(SMnode *pMnode) {
4,976,896✔
382
  mTrace("check dnode offline");
4,976,896✔
383
  if (mndAcquireRpc(pMnode) != 0) return;
4,976,896✔
384

385
  SSdb   *pSdb = pMnode->pSdb;
4,976,896✔
386
  int64_t curMs = taosGetTimestampMs();
4,976,896✔
387

388
  void *pIter = NULL;
4,976,896✔
389
  while (1) {
8,710,246✔
390
    SDnodeObj *pDnode = NULL;
13,687,142✔
391
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
13,687,142✔
392
    if (pIter == NULL) break;
13,687,142✔
393

394
    bool online = mndIsDnodeOnline(pDnode, curMs);
8,710,246✔
395
    if (!online) {
8,710,246✔
396
      mInfo("dnode:%d, in offline state", pDnode->id);
106,754✔
397
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
106,754✔
398
    }
399

400
    sdbRelease(pSdb, pDnode);
8,710,246✔
401
  }
402

403
  mndReleaseRpc(pMnode);
4,976,896✔
404
}
405

406
static bool mnodeIsNotLeader(SMnode *pMnode) {
299,129,665✔
407
  terrno = 0;
299,129,665✔
408
  (void)taosThreadRwlockRdlock(&pMnode->lock);
299,129,800✔
409
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
299,128,254✔
410
  if (terrno != 0) {
299,129,853✔
411
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
412
    return true;
×
413
  }
414

415
  if (state.state != TAOS_SYNC_STATE_LEADER) {
299,127,293✔
416
    (void)taosThreadRwlockUnlock(&pMnode->lock);
11,509,582✔
417
    terrno = TSDB_CODE_SYN_NOT_LEADER;
11,509,722✔
418
    return true;
11,509,722✔
419
  }
420
  if (!state.restored || !pMnode->restored) {
287,617,711✔
421
    (void)taosThreadRwlockUnlock(&pMnode->lock);
9,829✔
422
    terrno = TSDB_CODE_SYN_RESTORING;
7,984✔
423
    return true;
7,984✔
424
  }
425
  (void)taosThreadRwlockUnlock(&pMnode->lock);
287,607,882✔
426
  return false;
287,611,015✔
427
}
428

429
static int32_t minCronTime() {
×
430
  int32_t min = INT32_MAX;
×
431
  min = TMIN(min, tsTtlPushIntervalSec);
×
432
  min = TMIN(min, tsTrimVDbIntervalSec);
×
433
  min = TMIN(min, tsSsAutoMigrateIntervalSec);
×
434
  min = TMIN(min, tsTransPullupInterval);
×
435
  min = TMIN(min, tsCompactPullupInterval);
×
436
  min = TMIN(min, tsMqRebalanceInterval);
×
437

438
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
×
439
  min = TMIN(min, telemInt);
×
440
  min = TMIN(min, tsGrantHBInterval);
×
441
  min = TMIN(min, tsUptimeInterval);
×
442
#ifdef TD_ENTERPRISE
443
  if (tsClsEnabled) min = TMIN(min, tsClsRefreshInterval);
×
444
#endif
445

446
  return min <= 1 ? 2 : min;
×
447
}
448
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
26,617,846✔
449
  int32_t code = 0;
26,617,846✔
450
#ifndef TD_ASTRA
451
  if (sec % tsGrantHBInterval == 0) {  // put in the 1st place as to take effect ASAP
26,617,846✔
452
    mndPullupGrant(pMnode);
1,832,478✔
453
  }
454
  if (sec % tsTtlPushIntervalSec == 0) {
26,617,846✔
455
    mndPullupTtl(pMnode);
2,792,660✔
456
  }
457

458
  if (sec % tsTrimVDbIntervalSec == 0) {
26,617,846✔
459
    mndPullupTrimDb(pMnode);
5,723✔
460
  }
461

462
  if (sec % tsQueryTrimIntervalSec == 0) {
26,617,846✔
463
    mndPullupQueryTrimDb(pMnode);
2,885,380✔
464
  }
465
#endif
466
#ifdef USE_SHARED_STORAGE
467
  if (tsSsEnabled) {
26,617,846✔
468
    if (sec % tsQuerySsMigrateIntervalSec == 0) {
244,205✔
469
      mndPullupUpdateSsMigrateProgress(pMnode);
173,225✔
470
    }
471
    if (tsSsEnabled == 2) {
244,205✔
472
      // By default, both tsTrimVDbIntervalSec and tsSsAutoMigrateIntervalSec are 3600 seconds,
473
      // so, delay half interval to do ss migrate to avoid conflict.
474
      //
475
      // NOTE: this solution is not perfect, there could still be conflict if user changes the
476
      // default value, but it is good enough as user is unlikely to change the default value.
477
      // The best solution is adding a new offset config to all cron tasks, but that would add
478
      // extra complexity.
479
      if ((sec % tsSsAutoMigrateIntervalSec) == (tsSsAutoMigrateIntervalSec / 2)) {
×
480
        mndPullupSsMigrateDb(pMnode);
×
481
      }
482
    }
483
  }
484
#endif
485
#ifdef TD_ENTERPRISE
486
  if (tsAuthReq) {
26,617,846✔
487
    if (sec % tsAuthReqHBInterval == 0) {
×
488
      mndPullupAuth(pMnode);
×
489
    }
490
  }
491
  if (tsClsEnabled || tsClsRefreshInterval == GRANT_CLS_CLOSING || tsClsRefreshInterval == GRANT_CLS_OPENING) {
26,617,846✔
492
    if (sec % tsClsRefreshInterval == 0) {
×
493
      mndPullupCls(pMnode);
×
494
    }
495
  }
496
#endif
497
  if (sec % tsTransPullupInterval == 0) {
26,617,846✔
498
    mndPullupTrans(pMnode);
13,308,266✔
499
  }
500

501
  if (sec % tsCompactPullupInterval == 0) {
26,617,846✔
502
    mndPullupCompacts(pMnode);
2,669,060✔
503
  }
504

505
  if (sec % tsScanPullupInterval == 0) {
26,617,846✔
506
    mndPullupScans(pMnode);
2,669,060✔
507
  }
508
  if (tsInstancePullupInterval > 0 && sec % tsInstancePullupInterval == 0) {  // check instance expired
26,617,846✔
509
    mndPullupInstances(pMnode);
5,329,670✔
510
  }
511
#ifdef USE_TOPIC
512
  if (sec % tsMqRebalanceInterval == 0) {
26,617,846✔
513
    mndCalMqRebalance(pMnode);
13,308,070✔
514
  }
515
#endif
516
  if (tsTelemInterval > 0 && sec % tsTelemInterval == 0) {
26,617,846✔
517
    mndPullupTelem(pMnode);
246✔
518
  }
519
  if (sec % tsUptimeInterval == 0) {
26,617,846✔
520
    mndIncreaseUpTime(pMnode);
81,409✔
521
  }
522

523
  if (pMnode->version < TSDB_MNODE_BUILTIN_DATA_VERSION && sec % UPGRADE_INTERVAL == 0) {
26,617,846✔
524
    mndPullupUpgradeSdb(pMnode);
366,195✔
525
  }
526
}
26,617,846✔
527

528
void mndDoArbTimerPullupTask(SMnode *pMnode, int64_t ms) {
260,995,193✔
529
  int32_t code = 0;
260,995,193✔
530
#ifndef TD_ASTRA
531
  if (ms % (tsArbHeartBeatIntervalMs) == 0) {
260,995,193✔
532
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
12,803,618✔
533
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
534
    }
535
  }
536

537
  if (ms % (tsArbCheckSyncIntervalMs) == 0) {
260,995,193✔
538
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
8,483,028✔
539
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
540
    }
541
  }
542
#endif
543
}
260,995,193✔
544

545
void mndDoTimerCheckStatus(SMnode *pMnode, int64_t ms) {
260,995,193✔
546
  if (ms % (tsStatusTimeoutMs) == 0) {
260,995,193✔
547
    mndCheckDnodeOffline(pMnode);
4,976,896✔
548
  }
549
}
260,995,193✔
550

551
void mndDoTimerCheckSync(SMnode *pMnode, int64_t sec) {
26,617,846✔
552
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
26,617,846✔
553
    mndSyncCheckTimeout(pMnode);
879,933✔
554
  }
555
  if (!tsDisableStream && (sec % MND_STREAM_HEALTH_CHECK_PERIOD_SEC == 0)) {
26,617,846✔
556
    msmHealthCheck(pMnode);
8,881,253✔
557
  }
558
}
26,617,846✔
559

560
static void *mndThreadSecFp(void *param) {
529,167✔
561
  SMnode *pMnode = param;
529,167✔
562
  int64_t lastSec = 0;
529,167✔
563
  setThreadName("mnode-timer");
529,167✔
564
  taosSetCpuAffinity(THREAD_CAT_MANAGEMENT);
529,167✔
565

566
  while (1) {
271,952,077✔
567
    if (mndGetStop(pMnode)) break;
272,481,244✔
568

569
    int64_t nowSec = taosGetTimestampMs() / 1000;
271,952,077✔
570
    if (nowSec == lastSec) {
271,952,077✔
571
      taosMsleep(100);
244,233,527✔
572
      continue;
244,233,527✔
573
    }
574
    lastSec = nowSec;
27,718,550✔
575

576
    if (mnodeIsNotLeader(pMnode)) {
27,718,550✔
577
      taosMsleep(100);
1,100,704✔
578
      mTrace("timer not process since mnode is not leader");
1,100,704✔
579
      continue;
1,100,704✔
580
    }
581

582
    mndDoTimerCheckSync(pMnode, nowSec);
26,617,846✔
583

584
    mndDoTimerPullupTask(pMnode, nowSec);
26,617,846✔
585

586
    taosMsleep(100);
26,617,846✔
587
  }
588

589
  return NULL;
529,167✔
590
}
591

592
static void *mndThreadMsFp(void *param) {
529,167✔
593
  SMnode *pMnode = param;
529,167✔
594
  int64_t lastTime = 0;
529,167✔
595
  setThreadName("mnode-arb-timer");
529,167✔
596
  taosSetCpuAffinity(THREAD_CAT_MANAGEMENT);
529,167✔
597

598
  while (1) {
599
    lastTime += 100;
271,941,362✔
600
    taosMsleep(100);
271,941,362✔
601

602
    if (mndGetStop(pMnode)) break;
271,941,362✔
603
    if (lastTime % 10 != 0) continue;
271,412,195✔
604

605
    if (mnodeIsNotLeader(pMnode)) {
271,412,195✔
606
      mTrace("timer not process since mnode is not leader");
10,417,002✔
607
      continue;
10,417,002✔
608
    }
609

610
    mndDoTimerCheckStatus(pMnode, lastTime);
260,995,193✔
611

612
    mndDoArbTimerPullupTask(pMnode, lastTime);
260,995,193✔
613
  }
614

615
  return NULL;
529,167✔
616
}
617

618
static int32_t mndInitTimer(SMnode *pMnode) {
529,167✔
619
  int32_t      code = 0;
529,167✔
620
  TdThreadAttr thAttr;
528,202✔
621
  (void)taosThreadAttrInit(&thAttr);
529,167✔
622
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
529,167✔
623
#ifdef TD_COMPACT_OS
624
  (void)taosThreadAttrSetStackSize(&thAttr, STACK_SIZE_SMALL);
625
#endif
626
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadSecFp, pMnode)) != 0) {
529,167✔
627
    mError("failed to create timer thread since %s", tstrerror(code));
×
628
    TAOS_RETURN(code);
×
629
  }
630

631
  (void)taosThreadAttrDestroy(&thAttr);
529,167✔
632
  tmsgReportStartup("mnode-timer", "initialized");
529,167✔
633

634
  TdThreadAttr arbAttr;
528,202✔
635
  (void)taosThreadAttrInit(&arbAttr);
529,167✔
636
  (void)taosThreadAttrSetDetachState(&arbAttr, PTHREAD_CREATE_JOINABLE);
529,167✔
637
#ifdef TD_COMPACT_OS
638
  (void)taosThreadAttrSetStackSize(&arbAttr, STACK_SIZE_SMALL);
639
#endif
640
  if ((code = taosThreadCreate(&pMnode->arbThread, &arbAttr, mndThreadMsFp, pMnode)) != 0) {
529,167✔
641
    mError("failed to create arb timer thread since %s", tstrerror(code));
×
642
    TAOS_RETURN(code);
×
643
  }
644

645
  (void)taosThreadAttrDestroy(&arbAttr);
529,167✔
646
  tmsgReportStartup("mnode-timer", "initialized");
529,167✔
647
  TAOS_RETURN(code);
529,167✔
648
}
649

650
static void mndCleanupTimer(SMnode *pMnode) {
529,167✔
651
  if (taosCheckPthreadValid(pMnode->thread)) {
529,167✔
652
    (void)taosThreadJoin(pMnode->thread, NULL);
529,167✔
653
    taosThreadClear(&pMnode->thread);
529,167✔
654
  }
655
  if (taosCheckPthreadValid(pMnode->arbThread)) {
529,167✔
656
    (void)taosThreadJoin(pMnode->arbThread, NULL);
529,167✔
657
    taosThreadClear(&pMnode->arbThread);
529,167✔
658
  }
659
}
529,167✔
660

661
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
529,324✔
662
  int32_t code = 0;
529,324✔
663
  pMnode->path = taosStrdup(path);
529,324✔
664
  if (pMnode->path == NULL) {
529,324✔
665
    code = terrno;
×
666
    TAOS_RETURN(code);
×
667
  }
668

669
  if (taosMkDir(pMnode->path) != 0) {
529,324✔
670
    code = terrno;
×
671
    TAOS_RETURN(code);
×
672
  }
673

674
  TAOS_RETURN(code);
529,324✔
675
}
676

677
static int32_t mndInitWal(SMnode *pMnode) {
529,324✔
678
  int32_t code = 0;
529,324✔
679
  char    path[PATH_MAX + 20] = {0};
529,324✔
680
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
529,324✔
681
  SWalCfg cfg = {.vgId = 1,
529,324✔
682
                 .fsyncPeriod = 0,
683
                 .rollPeriod = -1,
684
                 .segSize = -1,
685
                 .committed = -1,
686
                 .retentionPeriod = 0,
687
                 .retentionSize = 0,
688
                 .level = TAOS_WAL_FSYNC,
689
                 .encryptAlgr = 0,
690
                 .encryptData = {0}};
691

692
#if defined(TD_ENTERPRISE) || defined(TD_ASTRA_TODO)
693
  if (taosWaitCfgKeyLoaded() != 0) {
529,324✔
694
    code = terrno;
×
695
    TAOS_RETURN(code);
×
696
  }
697
  if (tsMetaKey[0] != '\0') {
529,324✔
698
    tstrncpy(cfg.encryptData.encryptKey, tsMetaKey, ENCRYPT_KEY_LEN + 1);
5,648✔
699
  }
700
#endif
701

702
  pMnode->pWal = walOpen(path, &cfg);
529,324✔
703
  if (pMnode->pWal == NULL) {
529,324✔
704
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
705
    if (terrno != 0) code = terrno;
×
706
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
707
    TAOS_RETURN(code);
×
708
  }
709

710
  TAOS_RETURN(code);
529,324✔
711
}
712

713
static void mndCloseWal(SMnode *pMnode) {
529,262✔
714
  if (pMnode->pWal != NULL) {
529,262✔
715
    walClose(pMnode->pWal);
529,262✔
716
    pMnode->pWal = NULL;
529,262✔
717
  }
718
}
529,262✔
719

720
// Forward declarations for mmFile.c functions
721
extern int32_t mmReadFile(const char *path, SMnodeOpt *pOption);
722
extern int32_t mmWriteFile(const char *path, const SMnodeOpt *pOption);
723

724
// Callback function to persist encrypted flag to mnode.json
725
static int32_t mndPersistEncryptedFlag(void *param) {
8,011✔
726
  SMnode *pMnode = (SMnode *)param;
8,011✔
727
  if (pMnode == NULL) {
8,011✔
728
    return TSDB_CODE_INVALID_PARA;
×
729
  }
730
  
731
  mInfo("persisting encrypted flag to mnode.json");
8,011✔
732
  
733
  SMnodeOpt option = {0};
8,011✔
734
  int32_t code = mmReadFile(pMnode->path, &option);
8,011✔
735
  if (code != 0) {
8,011✔
736
    mError("failed to read mnode.json for persisting encrypted flag since %s", tstrerror(code));
×
737
    return code;
×
738
  }
739
  
740
  option.encrypted = true;
8,011✔
741
  code = mmWriteFile(pMnode->path, &option);
8,011✔
742
  if (code != 0) {
8,011✔
743
    mError("failed to write mnode.json for persisting encrypted flag since %s", tstrerror(code));
×
744
    return code;
×
745
  }
746
  
747
  // Also update mnode's encrypted flag
748
  pMnode->encrypted = true;
8,011✔
749
  
750
  mInfo("successfully persisted encrypted flag to mnode.json");
8,011✔
751
  return 0;
8,011✔
752
}
753

754
static int32_t mndInitSdb(SMnode *pMnode) {
529,324✔
755
  int32_t code = 0;
529,324✔
756
  SSdbOpt opt = {0};
529,324✔
757
  opt.path = pMnode->path;
529,324✔
758
  opt.pMnode = pMnode;
529,324✔
759
  opt.pWal = pMnode->pWal;
529,324✔
760

761
  pMnode->pSdb = sdbInit(&opt);
529,324✔
762
  if (pMnode->pSdb == NULL) {
529,324✔
763
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
764
    if (terrno != 0) code = terrno;
×
765
    TAOS_RETURN(code);
×
766
  }
767

768
  TAOS_RETURN(code);
529,324✔
769
}
770

771
static int32_t mndOpenSdb(SMnode *pMnode) {
529,324✔
772
  int32_t code = 0;
529,324✔
773
  
774
  pMnode->pSdb->encrypted = pMnode->encrypted;
529,324✔
775
  
776
  // Set callback for persisting encrypted flag
777
  pMnode->pSdb->persistEncryptedFlagFp = mndPersistEncryptedFlag;
529,324✔
778
  pMnode->pSdb->pMnodeForCallback = pMnode;
529,324✔
779

780
  if (!pMnode->deploy) {
529,324✔
781
    code = sdbReadFile(pMnode->pSdb);
148,526✔
782
  }
783

784
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
529,324✔
785

786
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
529,324✔
787
  return code;
529,324✔
788
}
789

790
static void mndCleanupSdb(SMnode *pMnode) {
529,262✔
791
  if (pMnode->pSdb) {
529,262✔
792
    sdbCleanup(pMnode->pSdb);
529,262✔
793
    pMnode->pSdb = NULL;
529,262✔
794
  }
795
}
529,262✔
796

797
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
26,995,524✔
798
  SMnodeStep step = {0};
26,995,524✔
799
  step.name = name;
26,995,524✔
800
  step.initFp = initFp;
26,995,524✔
801
  step.cleanupFp = cleanupFp;
26,995,524✔
802
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
53,991,048✔
803
    TAOS_RETURN(terrno);
×
804
  }
805

806
  TAOS_RETURN(0);
26,995,524✔
807
}
808

809
static int32_t mndInitSteps(SMnode *pMnode) {
529,324✔
810
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
529,324✔
811
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
529,324✔
812
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
529,324✔
813
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
529,324✔
814
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-security-policy", mndInitSecurityPolicy, mndCleanupSecurityPolicy));
529,324✔
815
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-encrypt-algorithms", mndInitEncryptAlgr, mndCleanupEncryptAlgr));
529,324✔
816
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
529,324✔
817
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
529,324✔
818
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
529,324✔
819
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
529,324✔
820
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-bnode", mndInitBnode, mndCleanupBnode));
529,324✔
821
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-xnode", mndInitXnode, mndCleanupXnode));
529,324✔
822
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
529,324✔
823
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
529,324✔
824
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
529,324✔
825
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-role", mndInitRole, mndCleanupRole));
529,324✔
826
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
529,324✔
827
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-token", mndInitToken, mndCleanupToken));
529,324✔
828
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
529,324✔
829
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
529,324✔
830
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
529,324✔
831
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
529,324✔
832
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-instance", mndInitInstance, mndCleanupInstance));
529,324✔
833
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
529,324✔
834
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
529,324✔
835
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
529,324✔
836
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
529,324✔
837
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
529,324✔
838
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
529,324✔
839
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
529,324✔
840
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
529,324✔
841
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
529,324✔
842
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
529,324✔
843
#ifdef USE_MOUNT
844
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount", mndInitMount, mndCleanupMount));
529,324✔
845
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount-log", mndInitMountLog, mndCleanupMountLog));
529,324✔
846
#endif
847
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-rsma", mndInitRsma, mndCleanupRsma));
529,324✔
848
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
529,324✔
849
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
529,324✔
850
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
529,324✔
851
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-scan", mndInitScan, mndCleanupScan));
529,324✔
852
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-retention", mndInitRetention, mndCleanupRetention));
529,324✔
853
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
529,324✔
854
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-scan-detail", mndInitScanDetail, mndCleanupScanDetail));
529,324✔
855
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-retention-detail", mndInitRetentionDetail, mndCleanupRetentionDetail));
529,324✔
856
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-ssmigrate", mndInitSsMigrate, mndCleanupSsMigrate));
529,324✔
857
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
529,324✔
858
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
529,324✔
859
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
529,324✔
860
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
529,324✔
861
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
529,324✔
862
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
529,324✔
863
  return 0;
529,324✔
864
}
865

866
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
529,262✔
867
  if (pMnode->pSteps == NULL) return;
529,262✔
868

869
  if (pos == -1) {
529,262✔
870
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
529,262✔
871
  }
872

873
  for (int32_t s = pos; s >= 0; s--) {
27,521,624✔
874
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
26,992,362✔
875
    mInfo("%s will cleanup", pStep->name);
26,992,362✔
876
    if (pStep->cleanupFp != NULL) {
26,992,362✔
877
      (*pStep->cleanupFp)(pMnode);
25,933,838✔
878
    }
879
  }
880

881
  taosArrayClear(pMnode->pSteps);
529,262✔
882
  taosArrayDestroy(pMnode->pSteps);
529,262✔
883
  pMnode->pSteps = NULL;
529,262✔
884
}
885

886
static int32_t mndExecSteps(SMnode *pMnode) {
529,324✔
887
  int32_t code = 0;
529,324✔
888
  int32_t size = taosArrayGetSize(pMnode->pSteps);
529,324✔
889
  for (int32_t pos = 0; pos < size; pos++) {
27,524,848✔
890
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
26,995,524✔
891
    if (pStep->initFp == NULL) continue;
26,995,524✔
892

893
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
26,995,524✔
894
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
895
      mndCleanupSteps(pMnode, pos);
×
896
      TAOS_RETURN(code);
×
897
    } else {
898
      mInfo("%s is initialized", pStep->name);
26,995,524✔
899
      tmsgReportStartup(pStep->name, "initialized");
26,995,524✔
900
    }
901
  }
902

903
  pMnode->clusterId = mndGetClusterId(pMnode);
529,324✔
904
  TAOS_RETURN(0);
529,324✔
905
}
906

907
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
529,324✔
908
  pMnode->msgCb = pOption->msgCb;
529,324✔
909
  pMnode->selfDnodeId = pOption->dnodeId;
529,324✔
910
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
529,324✔
911
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
529,324✔
912
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
529,324✔
913
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
529,324✔
914
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
529,324✔
915
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
529,324✔
916
  pMnode->encrypted = pOption->encrypted;
529,324✔
917
}
529,324✔
918

919
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
529,324✔
920
  terrno = 0;
529,324✔
921
  mInfo("start to open mnode in %s", path);
529,324✔
922

923
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
529,324✔
924
  if (pMnode == NULL) {
529,324✔
925
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
926
    mError("failed to open mnode in step 1, since %s", terrstr());
×
927
    return NULL;
×
928
  }
929
  (void)memset(pMnode, 0, sizeof(SMnode));
529,324✔
930

931
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
529,324✔
932
  if (code != 0) {
529,324✔
933
    taosMemoryFree(pMnode);
×
934
    mError("failed to open mnode in step 2, add lock, since %s", tstrerror(code));
×
935
    terrno = code;
×
936
    return NULL;
×
937
  }
938

939
  mInfo("vgId:1, mnode set options to syncMgmt, dnodeId:%d, numOfTotalReplicas:%d", pOption->selfIndex,
529,324✔
940
        pOption->numOfTotalReplicas);
941
  mndSetOptions(pMnode, pOption);
529,324✔
942

943
  pMnode->deploy = pOption->deploy;
529,324✔
944
  pMnode->version = pOption->version;
529,324✔
945
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
529,324✔
946
  if (pMnode->pSteps == NULL) {
529,324✔
947
    taosMemoryFree(pMnode);
×
948
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
949
    mError("failed to open mnode in step 4, since %s", terrstr());
×
950
    return NULL;
×
951
  }
952

953
  code = mndCreateDir(pMnode, path);
529,324✔
954
  if (code != 0) {
529,324✔
955
    mError("failed to open mnode in step 5, since %s", tstrerror(code));
×
956
    mndClose(pMnode);
×
957
    terrno = code;
×
958
    return NULL;
×
959
  }
960

961
  code = mndInitSteps(pMnode);
529,324✔
962
  if (code != 0) {
529,324✔
963
    mError("failed to open mnode in step 6, since %s", tstrerror(code));
×
964
    mndClose(pMnode);
×
965
    terrno = code;
×
966
    return NULL;
×
967
  }
968

969
  code = mndExecSteps(pMnode);
529,324✔
970
  if (code != 0) {
529,324✔
971
    mError("failed to open mnode in step 7, since %s", tstrerror(code));
×
972
    mndClose(pMnode);
×
973
    terrno = code;
×
974
    return NULL;
×
975
  }
976

977
  mInfo("mnode open successfully");
529,324✔
978
  return pMnode;
529,324✔
979
}
980

981
void mndPreClose(SMnode *pMnode) {
529,167✔
982
  if (pMnode != NULL) {
529,167✔
983
    int32_t code = 0;
529,167✔
984
    // TODO check return value
985
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
529,167✔
986
    if (code < 0) {
529,167✔
987
      mError("failed to transfer leader since %s", tstrerror(code));
×
988
    }
989
    syncPreStop(pMnode->syncMgmt.sync);
529,167✔
990
    code = sdbWriteFile(pMnode->pSdb, 0);
529,167✔
991
    if (code < 0) {
529,167✔
992
      mError("failed to write sdb since %s", tstrerror(code));
815✔
993
    }
994
  }
995
}
529,167✔
996

997
void mndClose(SMnode *pMnode) {
529,262✔
998
  if (pMnode != NULL) {
529,262✔
999
    mInfo("start to close mnode");
529,262✔
1000
    mndCleanupSteps(pMnode, -1);
529,262✔
1001
    taosMemoryFreeClear(pMnode->path);
529,262✔
1002
    taosMemoryFreeClear(pMnode);
529,262✔
1003
    mInfo("mnode is closed");
529,262✔
1004
  }
1005
}
529,262✔
1006

1007
int32_t mndStart(SMnode *pMnode) {
529,167✔
1008
  int32_t code = 0;
529,167✔
1009
  mndSyncStart(pMnode);
529,167✔
1010
  if (pMnode->deploy) {
529,167✔
1011
    if (sdbDeploy(pMnode->pSdb) != 0) {
380,798✔
1012
      mError("failed to deploy sdb while start mnode");
×
1013
      return -1;
×
1014
    }
1015
    mndSetRestored(pMnode, true);
380,798✔
1016
  }
1017

1018
  if (sdbIsUpgraded(pMnode->pSdb)) {
529,167✔
1019
    pMnode->version = TSDB_MNODE_BUILTIN_DATA_VERSION;
129,546✔
1020
  } else if (pMnode->version < TSDB_MNODE_BUILTIN_DATA_VERSION) {
399,621✔
1021
    if (sdbUpgrade(pMnode->pSdb, pMnode->version) != 0) {
399,621✔
1022
      mError("failed to upgrade sdb while start mnode");
×
1023
      return -1;
×
1024
    }
1025
    if (sdbIsUpgraded(pMnode->pSdb)) {
399,621✔
1026
      pMnode->version = TSDB_MNODE_BUILTIN_DATA_VERSION;
×
1027
    }
1028
  }
1029

1030
#ifdef TD_ENTERPRISE
1031
  if (mndIsLeader(pMnode)) {
529,167✔
1032
    if (tsSodEnforceMode) {
476,782✔
1033
      if ((code = mndProcessEnforceSod(pMnode)) != 0) {
×
1034
        if (code == TSDB_CODE_MND_ROLE_NO_VALID_SYSDBA || code == TSDB_CODE_MND_ROLE_NO_VALID_SYSSEC ||
×
1035
            code == TSDB_CODE_MND_ROLE_NO_VALID_SYSAUDIT) {
1036
          mInfo("enter SoD pending mode. Enforce SoD by command line failed since %s", tstrerror(code));
×
1037
        } else if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
×
1038
          mInfo("enter SoD pending mode. Enforce SoD is in progress");
×
1039
        } else {
1040
          mError("failed to enforce SoD by command line since %s", tstrerror(code));
×
1041
          TAOS_RETURN(code);
×
1042
        }
1043
      } else {
1044
        mndSetSoDPhase(pMnode, TSDB_SOD_PHASE_STABLE);
×
1045
      }
1046
    }
1047
  }
1048
#endif
1049
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
529,167✔
1050

1051
  return mndInitTimer(pMnode);
529,167✔
1052
}
1053

1054
bool mndNeedUpgrade(SMnode *pMnode, int32_t version) { return pMnode->version > version; }
529,167✔
1055

1056
int32_t mndGetVersion(SMnode *pMnode) { return pMnode->version; }
72,544✔
1057

1058
int32_t mndGetEncryptedFlag(SMnode *pMnode) { return pMnode->encrypted; }
72,544✔
1059

1060
int32_t mndIsCatchUp(SMnode *pMnode) {
140,253✔
1061
  int64_t rid = pMnode->syncMgmt.sync;
140,253✔
1062
  return syncIsCatchUp(rid);
140,253✔
1063
}
1064

1065
ESyncRole mndGetRole(SMnode *pMnode) {
140,253✔
1066
  int64_t rid = pMnode->syncMgmt.sync;
140,253✔
1067
  return syncGetRole(rid);
140,253✔
1068
}
1069

1070
int64_t mndGetTerm(SMnode *pMnode) {
8,601,094✔
1071
  int64_t rid = pMnode->syncMgmt.sync;
8,601,094✔
1072
  return syncGetTerm(rid);
8,601,094✔
1073
}
1074

1075
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
21,405,808✔
1076

1077
void mndStop(SMnode *pMnode) {
529,167✔
1078
  mndSetStop(pMnode);
529,167✔
1079
  mndSyncStop(pMnode);
529,167✔
1080
  mndCleanupTimer(pMnode);
529,167✔
1081
}
529,167✔
1082

1083
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
66,582,586✔
1084
  SMnode    *pMnode = pMsg->info.node;
66,582,586✔
1085
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
66,582,586✔
1086

1087
  const STraceId *trace = &pMsg->info.traceId;
66,582,586✔
1088
  mGTrace("vgId:1, process sync msg:%p, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
66,582,586✔
1089

1090
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
66,582,586✔
1091
  if (code != 0) {
66,582,586✔
1092
    mGError("vgId:1, failed to process sync msg:%p type:%s since %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
938✔
1093
            tstrerror(code), code);
1094
  }
1095

1096
  return code;
66,582,586✔
1097
}
1098

1099
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
304,860,359✔
1100
  int32_t code = 0;
304,860,359✔
1101
  if (!IsReq(pMsg)) TAOS_RETURN(code);
304,860,359✔
1102
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
267,097,691✔
1103
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
260,473,999✔
1104
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
254,815,269✔
1105
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
241,751,276✔
1106
    TAOS_RETURN(code);
25,348,304✔
1107
  }
1108

1109
  SMnode *pMnode = pMsg->info.node;
241,746,966✔
1110
  (void)taosThreadRwlockRdlock(&pMnode->lock);
241,748,977✔
1111
  if (pMnode->stopped) {
241,751,701✔
1112
    (void)taosThreadRwlockUnlock(&pMnode->lock);
610✔
1113
    code = TSDB_CODE_APP_IS_STOPPING;
610✔
1114
    TAOS_RETURN(code);
610✔
1115
  }
1116

1117
  terrno = 0;
241,747,491✔
1118
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
241,750,180✔
1119
  if (terrno != 0) {
241,751,886✔
1120
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1121
    code = terrno;
×
1122
    TAOS_RETURN(code);
×
1123
  }
1124

1125
  if (state.state != TAOS_SYNC_STATE_LEADER) {
241,751,667✔
1126
    (void)taosThreadRwlockUnlock(&pMnode->lock);
2,673,297✔
1127
    code = TSDB_CODE_SYN_NOT_LEADER;
2,673,297✔
1128
    goto _OVER;
2,673,297✔
1129
  }
1130

1131
  if (!state.restored || !pMnode->restored) {
239,078,370✔
1132
    (void)taosThreadRwlockUnlock(&pMnode->lock);
310,711✔
1133
    code = TSDB_CODE_SYN_RESTORING;
310,036✔
1134
    goto _OVER;
310,036✔
1135
  }
1136

1137
#if 1
1138
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
238,767,845✔
1139
#else
1140
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
1141
  mTrace("mnode rpc is acquired, ref:%d", ref);
1142
#endif
1143

1144
  (void)taosThreadRwlockUnlock(&pMnode->lock);
238,767,868✔
1145
  TAOS_RETURN(code);
238,767,477✔
1146

1147
_OVER:
2,983,333✔
1148
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
2,983,333✔
1149
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
2,983,164✔
1150
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
2,983,090✔
1151
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
2,983,043✔
1152
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
2,983,333✔
1153
      pMsg->msgType == TDMT_MND_SSMIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
2,983,042✔
1154
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER || pMsg->msgType == TDMT_MND_CHECK_STREAM_TIMER ||
2,983,164✔
1155
      pMsg->msgType == TDMT_MND_UPDATE_SSMIGRATE_PROGRESS_TIMER || pMsg->msgType == TDMT_MND_SCAN_TIMER ||
2,983,091✔
1156
      pMsg->msgType == TDMT_MND_QUERY_TRIM_TIMER || pMsg->msgType == TDMT_MND_AUTH_HB_TIMER ||
2,983,211✔
1157
      pMsg->msgType == TDMT_MND_CLS_HB_TIMER) {
2,983,216✔
1158
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
47✔
1159
           pMnode->stopped, state.restored, syncStr(state.state));
1160
    TAOS_RETURN(code);
47✔
1161
  }
1162

1163
  const STraceId *trace = &pMsg->info.traceId;
2,982,812✔
1164
  SEpSet          epSet = {0};
2,982,585✔
1165
  mndGetMnodeEpSet(pMnode, &epSet);
2,982,689✔
1166

1167
  mGDebug(
2,983,333✔
1168
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
1169
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
1170
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
1171
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
1172

1173
  if (epSet.numOfEps <= 0) return -1;
2,983,333✔
1174

1175
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
11,162,136✔
1176
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
8,178,634✔
1177
  }
1178

1179
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
2,983,502✔
1180
  pMsg->info.rsp = rpcMallocCont(contLen);
2,983,067✔
1181
  if (pMsg->info.rsp != NULL) {
2,983,333✔
1182
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
2,983,206✔
1183
      mError("failed to serialize ep set");
×
1184
    }
1185
    pMsg->info.hasEpSet = 1;
2,981,967✔
1186
    pMsg->info.rspLen = contLen;
2,982,154✔
1187
  }
1188

1189
  TAOS_RETURN(code);
2,982,256✔
1190
}
1191

1192
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
304,869,361✔
1193
  SMnode         *pMnode = pMsg->info.node;
304,869,361✔
1194
  const STraceId *trace = &pMsg->info.traceId;
304,869,483✔
1195
  int32_t         code = TSDB_CODE_SUCCESS;
304,868,578✔
1196

1197
#ifdef TD_ENTERPRISE
1198
  if (pMsg->msgType != TDMT_MND_HEARTBEAT && pMsg->info.conn.isToken) {
304,868,578✔
1199
    SCachedTokenInfo ti = {0};
20,388✔
1200
    if (mndGetCachedTokenInfo(pMsg->info.conn.identifier, &ti) == NULL) {
20,388✔
1201
      mGError("msg:%p, failed to get token info, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
1,740✔
1202
      code = TSDB_CODE_MND_TOKEN_NOT_EXIST;
1,740✔
1203
      TAOS_RETURN(code);
1,740✔
1204
    }
1205
    if (ti.enabled == 0) {
18,648✔
1206
      mGError("msg:%p, token is disabled, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
3,096✔
1207
      code = TSDB_CODE_MND_TOKEN_DISABLED;
3,096✔
1208
      TAOS_RETURN(code);
3,096✔
1209
    }
1210
    if (ti.expireTime > 0 && taosGetTimestampSec() > (ti.expireTime + TSDB_TOKEN_EXPIRY_LEEWAY)) {
15,552✔
1211
      mGError("msg:%p, token is expired, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
1212
      code = TSDB_CODE_MND_TOKEN_EXPIRED;
×
1213
      TAOS_RETURN(code);
×
1214
    }
1215
    tstrncpy(pMsg->info.conn.user, ti.user, sizeof(pMsg->info.conn.user));
15,552✔
1216
  }
1217
#endif
1218

1219
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
304,860,737✔
1220
  MndMsgFpExt fpExt = NULL;
304,864,222✔
1221
  if (fp == NULL) {
304,864,222✔
1222
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
25,350,291✔
1223
    if (fpExt == NULL) {
25,350,255✔
1224
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
1225
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
1226
      TAOS_RETURN(code);
×
1227
    }
1228
  }
1229

1230
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
304,864,186✔
1231

1232
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
301,880,199✔
1233
  if (fp)
301,879,944✔
1234
    code = (*fp)(pMsg);
276,529,689✔
1235
  else
1236
    code = (*fpExt)(pMsg, pQueueInfo);
25,350,255✔
1237
  mndReleaseRpc(pMnode);
301,880,754✔
1238

1239
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
301,881,341✔
1240
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
39,470,913✔
1241
  } else if (code == 0) {
262,410,428✔
1242
    mGTrace("msg:%p, successfully processed", pMsg);
243,489,988✔
1243
  } else {
1244
    // TODO removve this wrong set code
1245
    if (code == -1) {
18,920,440✔
1246
      code = terrno;
×
1247
    }
1248
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
18,920,440✔
1249
            TMSG_INFO(pMsg->msgType));
1250
  }
1251

1252
  TAOS_RETURN(code);
301,881,686✔
1253
}
1254

1255
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
128,096,408✔
1256
  tmsg_t type = TMSG_INDEX(msgType);
128,096,408✔
1257
  if (type < TDMT_MAX) {
128,096,408✔
1258
    pMnode->msgFp[type] = fp;
128,096,408✔
1259
  }
1260
}
128,096,408✔
1261

1262
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
4,234,592✔
1263
  tmsg_t type = TMSG_INDEX(msgType);
4,234,592✔
1264
  if (type < TDMT_MAX) {
4,234,592✔
1265
    pMnode->msgFpExt[type] = fp;
4,234,592✔
1266
  }
1267
}
4,234,592✔
1268

1269
// Note: uid 0 is reserved
1270
int64_t mndGenerateUid(const char *name, int32_t len) {
13,479,085✔
1271
  int32_t hashval = MurmurHash3_32(name, len);
13,479,085✔
1272
  do {
×
1273
    int64_t us = taosGetTimestampUs();
13,478,942✔
1274
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
13,478,942✔
1275
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
13,478,942✔
1276
    if (uuid) {
13,479,443✔
1277
      return llabs(uuid);
13,479,443✔
1278
    }
1279
  } while (true);
1280
}
1281

1282
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
82✔
1283
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
1284
  int32_t code = mndAcquireRpc(pMnode);
82✔
1285
  if (code < 0) {
82✔
1286
    TAOS_RETURN(code);
×
1287
  } else if (code == 1) {
82✔
1288
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
1289
  }
1290

1291
  SSdb   *pSdb = pMnode->pSdb;
82✔
1292
  int64_t ms = taosGetTimestampMs();
82✔
1293

1294
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
82✔
1295
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
82✔
1296
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
82✔
1297
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
82✔
1298
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
82✔
1299
      pStbInfo->stbs == NULL) {
82✔
1300
    mndReleaseRpc(pMnode);
×
1301
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1302
    if (terrno != 0) code = terrno;
×
1303
    TAOS_RETURN(code);
×
1304
  }
1305

1306
  // cluster info
1307
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
82✔
1308
  pClusterInfo->monitor_interval = tsMonitorInterval;
82✔
1309
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
82✔
1310
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
82✔
1311
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
82✔
1312
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
82✔
1313
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
82✔
1314

1315
  void *pIter = NULL;
82✔
1316
  while (1) {
82✔
1317
    SDnodeObj *pObj = NULL;
164✔
1318
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
164✔
1319
    if (pIter == NULL) break;
164✔
1320

1321
    SMonDnodeDesc desc = {0};
82✔
1322
    desc.dnode_id = pObj->id;
82✔
1323
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
82✔
1324
    if (mndIsDnodeOnline(pObj, ms)) {
82✔
1325
      tstrncpy(desc.status, "ready", sizeof(desc.status));
82✔
1326
    } else {
1327
      tstrncpy(desc.status, "offline", sizeof(desc.status));
×
1328
    }
1329
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
164✔
1330
      mError("failed put dnode into array, but continue at this monitor report")
×
1331
    }
1332
    sdbRelease(pSdb, pObj);
82✔
1333
  }
1334

1335
  pIter = NULL;
82✔
1336
  while (1) {
82✔
1337
    SMnodeObj *pObj = NULL;
164✔
1338
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
164✔
1339
    if (pIter == NULL) break;
164✔
1340

1341
    SMonMnodeDesc desc = {0};
82✔
1342
    desc.mnode_id = pObj->id;
82✔
1343
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
82✔
1344

1345
    if (pObj->id == pMnode->selfDnodeId) {
82✔
1346
      pClusterInfo->first_ep_dnode_id = pObj->id;
82✔
1347
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
82✔
1348
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1349
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
82✔
1350
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1351
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
82✔
1352
      desc.syncState = TAOS_SYNC_STATE_LEADER;
82✔
1353
    } else {
1354
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1355
      desc.syncState = pObj->syncState;
×
1356
    }
1357
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
164✔
1358
      mError("failed to put mnode into array, but continue at this monitor report");
×
1359
    }
1360
    sdbRelease(pSdb, pObj);
82✔
1361
  }
1362

1363
  // vgroup info
1364
  pIter = NULL;
82✔
1365
  while (1) {
492✔
1366
    SVgObj *pVgroup = NULL;
574✔
1367
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
574✔
1368
    if (pIter == NULL) break;
574✔
1369

1370
    if (pVgroup->mountVgId) {
492✔
1371
      sdbRelease(pSdb, pVgroup);
×
1372
      continue;
×
1373
    }
1374

1375
    pClusterInfo->vgroups_total++;
492✔
1376
    pClusterInfo->tbs_total += pVgroup->numOfTables;
492✔
1377

1378
    SMonVgroupDesc desc = {0};
492✔
1379
    desc.vgroup_id = pVgroup->vgId;
492✔
1380

1381
    SName name = {0};
492✔
1382
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
492✔
1383
    if (code < 0) {
492✔
1384
      mError("failed to get db name since %s", tstrerror(code));
×
1385
      sdbCancelFetch(pSdb, pIter);
×
1386
      sdbRelease(pSdb, pVgroup);
×
1387
      TAOS_RETURN(code);
×
1388
    }
1389
    (void)tNameGetDbName(&name, desc.database_name);
492✔
1390

1391
    desc.tables_num = pVgroup->numOfTables;
492✔
1392
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
492✔
1393
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
492✔
1394
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
984✔
1395
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
492✔
1396
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
492✔
1397
      pVnDesc->dnode_id = pVgid->dnodeId;
492✔
1398
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
492✔
1399
      pVnDesc->syncState = pVgid->syncState;
492✔
1400
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
492✔
1401
        tstrncpy(desc.status, "ready", sizeof(desc.status));
492✔
1402
        pClusterInfo->vgroups_alive++;
492✔
1403
      }
1404
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
492✔
1405
        pClusterInfo->vnodes_alive++;
492✔
1406
      }
1407
      pClusterInfo->vnodes_total++;
492✔
1408
    }
1409

1410
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
984✔
1411
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1412
    }
1413
    sdbRelease(pSdb, pVgroup);
492✔
1414
  }
1415

1416
  // stb info
1417
  pIter = NULL;
82✔
1418
  while (1) {
82✔
1419
    SStbObj *pStb = NULL;
164✔
1420
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
164✔
1421
    if (pIter == NULL) break;
164✔
1422

1423
    SMonStbDesc desc = {0};
82✔
1424

1425
    SName name1 = {0};
82✔
1426
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
82✔
1427
    if (code < 0) {
82✔
1428
      mError("failed to get db name since %s", tstrerror(code));
×
1429
      sdbRelease(pSdb, pStb);
×
1430
      TAOS_RETURN(code);
×
1431
    }
1432
    (void)tNameGetDbName(&name1, desc.database_name);
82✔
1433

1434
    SName name2 = {0};
82✔
1435
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
82✔
1436
    if (code < 0) {
82✔
1437
      mError("failed to get table name since %s", tstrerror(code));
×
1438
      sdbRelease(pSdb, pStb);
×
1439
      TAOS_RETURN(code);
×
1440
    }
1441
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
82✔
1442

1443
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
164✔
1444
      mError("failed to put stb into array, but continue at this monitor report");
×
1445
    }
1446
    sdbRelease(pSdb, pStb);
82✔
1447
  }
1448

1449
  // grant info
1450
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
82✔
1451
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
82✔
1452
  if (pMnode->grant.expireTimeMS == 0) {
82✔
1453
    pGrantInfo->expire_time = 0;
×
1454
    pGrantInfo->timeseries_total = 0;
×
1455
  }
1456

1457
  mndReleaseRpc(pMnode);
82✔
1458
  TAOS_RETURN(code);
82✔
1459
}
1460

1461
int32_t mndResetTimer(SMnode *pMnode){
×
1462
  return syncResetTimer(pMnode->syncMgmt.sync, tsMnodeElectIntervalMs, tsMnodeHeartbeatIntervalMs);
×
1463
}
1464

1465
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
26,828,269✔
1466
  mTrace("mnode get load");
26,828,269✔
1467
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
26,828,269✔
1468
  pLoad->syncState = state.state;
26,828,269✔
1469
  pLoad->syncRestore = state.restored;
26,828,269✔
1470
  pLoad->syncTerm = state.term;
26,828,269✔
1471
  pLoad->roleTimeMs = state.roleTimeMs;
26,828,269✔
1472
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
26,828,269✔
1473
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1474
  return 0;
26,828,269✔
1475
}
1476

1477
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
8,483,028✔
1478
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
8,483,028✔
1479
  return state.roleTimeMs;
8,483,028✔
1480
}
1481

1482
void mndSetRestored(SMnode *pMnode, bool restored) {
529,167✔
1483
  if (restored) {
529,167✔
1484
    (void)taosThreadRwlockWrlock(&pMnode->lock);
529,167✔
1485
    pMnode->restored = true;
529,167✔
1486
    (void)taosThreadRwlockUnlock(&pMnode->lock);
529,167✔
1487
    mInfo("mnode set restored:%d", restored);
529,167✔
1488
  } else {
1489
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1490
    pMnode->restored = false;
×
1491
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1492
    mInfo("mnode set restored:%d", restored);
×
1493
    while (1) {
1494
      if (pMnode->rpcRef <= 0) break;
×
1495
      taosMsleep(3);
×
1496
    }
1497
  }
1498
}
529,167✔
1499

1500
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1501

1502
void mndSetStop(SMnode *pMnode) {
529,167✔
1503
  (void)taosThreadRwlockWrlock(&pMnode->lock);
529,167✔
1504
  pMnode->stopped = true;
529,167✔
1505
  (void)taosThreadRwlockUnlock(&pMnode->lock);
529,167✔
1506
  mInfo("mnode set stopped");
529,167✔
1507
}
529,167✔
1508

1509
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
544,375,823✔
1510

1511
void mndSetSoDPhase(SMnode *pMnode, int8_t phase) {
378✔
1512
  (void)taosThreadRwlockWrlock(&pMnode->lock);
378✔
1513
  pMnode->sodPhase = phase;
378✔
1514
  (void)taosThreadRwlockUnlock(&pMnode->lock);
378✔
1515
}
378✔
1516

1517
int8_t mndGetSoDPhase(SMnode *pMnode) {
82,237✔
1518
  int8_t result = TSDB_SOD_PHASE_STABLE;
82,237✔
1519
  (void)taosThreadRwlockRdlock(&pMnode->lock);
82,237✔
1520
  result = pMnode->sodPhase;
82,237✔
1521
  (void)taosThreadRwlockUnlock(&pMnode->lock);
82,237✔
1522
  if (result < TSDB_SOD_PHASE_STABLE || result > TSDB_SOD_PHASE_ENFORCE) {
82,237✔
1523
    mWarn("invalid SoD phase:%d, reset to stable", result);
×
1524
    result = TSDB_SOD_PHASE_STABLE;
×
1525
  }
1526
  return result;
82,237✔
1527
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc