• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #4754

25 Sep 2025 05:58AM UTC coverage: 57.946% (-1.0%) from 58.977%
#4754

push

travis-ci

web-flow
enh: taos command line support '-uroot' on windows (#33055)

133189 of 293169 branches covered (45.43%)

Branch coverage included in aggregate %.

201677 of 284720 relevant lines covered (70.83%)

5398749.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.48
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndCluster.h"
21
#include "mndCompact.h"
22
#include "mndCompactDetail.h"
23
#include "mndSsMigrate.h"
24
#include "mndConfig.h"
25
#include "mndConsumer.h"
26
#include "mndDb.h"
27
#include "mndDnode.h"
28
#include "mndFunc.h"
29
#include "mndGrant.h"
30
#include "mndIndex.h"
31
#include "mndInfoSchema.h"
32
#include "mndMnode.h"
33
#include "mndMount.h"
34
#include "mndPerfSchema.h"
35
#include "mndPrivilege.h"
36
#include "mndProfile.h"
37
#include "mndQnode.h"
38
#include "mndQuery.h"
39
#include "mndShow.h"
40
#include "mndSma.h"
41
#include "mndSnode.h"
42
#include "mndStb.h"
43
#include "mndStream.h"
44
#include "mndSubscribe.h"
45
#include "mndSync.h"
46
#include "mndTelem.h"
47
#include "mndTopic.h"
48
#include "mndTrans.h"
49
#include "mndUser.h"
50
#include "mndVgroup.h"
51
#include "mndView.h"
52
#include "mndBnode.h"
53

54
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
10,951✔
55
  int32_t code = 0;
10,951✔
56
  (void)taosThreadRwlockRdlock(&pMnode->lock);
10,951✔
57
  if (pMnode->stopped) {
10,951!
58
    code = TSDB_CODE_APP_IS_STOPPING;
×
59
  } else if (!mndIsLeader(pMnode)) {
10,951!
60
    code = 1;
×
61
  } else {
62
#if 1
63
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
10,951✔
64
#else
65
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
66
    mTrace("mnode rpc is acquired, ref:%d", ref);
67
#endif
68
  }
69
  (void)taosThreadRwlockUnlock(&pMnode->lock);
10,951✔
70
  TAOS_RETURN(code);
10,951✔
71
}
72

73
static inline void mndReleaseRpc(SMnode *pMnode) {
651,674✔
74
  (void)taosThreadRwlockRdlock(&pMnode->lock);
651,674✔
75
#if 1
76
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
651,681✔
77
#else
78
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
79
  mTrace("mnode rpc is released, ref:%d", ref);
80
#endif
81
  (void)taosThreadRwlockUnlock(&pMnode->lock);
651,679✔
82
}
651,684✔
83

84
static void *mndBuildTimerMsg(int32_t *pContLen) {
119,136✔
85
  terrno = 0;
119,136✔
86
  SMTimerReq timerReq = {0};
119,136✔
87

88
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
119,136✔
89
  if (contLen <= 0) return NULL;
119,122!
90
  void *pReq = rpcMallocCont(contLen);
119,122✔
91
  if (pReq == NULL) return NULL;
119,115!
92

93
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
119,115!
94
    mError("failed to serialize timer msg since %s", terrstr());
×
95
  }
96
  *pContLen = contLen;
119,095✔
97
  return pReq;
119,095✔
98
}
99

100
static void mndPullupTrans(SMnode *pMnode) {
28,550✔
101
  mTrace("pullup trans msg");
28,550✔
102
  int32_t contLen = 0;
28,550✔
103
  void   *pReq = mndBuildTimerMsg(&contLen);
28,550✔
104
  if (pReq != NULL) {
28,550!
105
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
28,550✔
106
    // TODO check return value
107
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
28,550!
108
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
109
    }
110
  }
111
}
28,550✔
112

113
static void mndPullupCompacts(SMnode *pMnode) {
5,158✔
114
  mTrace("pullup compact timer msg");
5,158✔
115
  int32_t contLen = 0;
5,158✔
116
  void   *pReq = mndBuildTimerMsg(&contLen);
5,158✔
117
  if (pReq != NULL) {
5,158!
118
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
5,158✔
119
    // TODO check return value
120
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,158!
121
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
122
    }
123
  }
124
}
5,158✔
125

126
static void mndPullupTtl(SMnode *pMnode) {
5,280✔
127
  mTrace("pullup ttl");
5,280✔
128
  int32_t contLen = 0;
5,280✔
129
  void   *pReq = mndBuildTimerMsg(&contLen);
5,280✔
130
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
5,280✔
131
  // TODO check return value
132
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,280!
133
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
134
  }
135
}
5,280✔
136

137
static void mndPullupTrimDb(SMnode *pMnode) {
×
138
  mTrace("pullup trim");
×
139
  int32_t contLen = 0;
×
140
  void   *pReq = mndBuildTimerMsg(&contLen);
×
141
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
142
  // TODO check return value
143
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
144
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
145
  }
146
}
×
147

148
static void mndPullupSsMigrateDb(SMnode *pMnode) {
×
149
  if (grantCheck(TSDB_GRANT_SHARED_STORAGE) != TSDB_CODE_SUCCESS) {
×
150
    return;
×
151
  }
152

153
  mTrace("pullup ssmigrate db");
×
154
  int32_t contLen = 0;
×
155
  void   *pReq = mndBuildTimerMsg(&contLen);
×
156
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_SSMIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
157
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
158
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
159
  }
160
}
161

162
static void mndPullupUpdateSsMigrateProgress(SMnode *pMnode) {
×
163
  mTrace("pullup update ssmigrate progress");
×
164
  int32_t contLen = 0;
×
165
  void   *pReq = mndBuildTimerMsg(&contLen);
×
166
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPDATE_SSMIGRATE_PROGRESS_TIMER, .pCont = pReq, .contLen = contLen};
×
167
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
168
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
169
  }
170
}
×
171

172
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
28,557✔
173
  mTrace("pullup arb hb");
28,557✔
174
  int32_t contLen = 0;
28,557✔
175
  void   *pReq = mndBuildTimerMsg(&contLen);
28,557✔
176
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
28,557✔
177
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
28,557✔
178
}
179

180
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
18,782✔
181
  mTrace("pullup arb sync");
18,782✔
182
  int32_t contLen = 0;
18,782✔
183
  void   *pReq = mndBuildTimerMsg(&contLen);
18,782✔
184
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
18,782✔
185
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
18,782✔
186
}
187

188
static void mndCalMqRebalance(SMnode *pMnode) {
28,546✔
189
  int32_t contLen = 0;
28,546✔
190
  void   *pReq = mndBuildTimerMsg(&contLen);
28,546✔
191
  if (pReq != NULL) {
28,546!
192
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
28,546✔
193
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
28,546!
194
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
195
    }
196
  }
197
}
28,546✔
198

199
static void mndPullupTelem(SMnode *pMnode) {
2✔
200
  mTrace("pullup telem msg");
2!
201
  int32_t contLen = 0;
2✔
202
  void   *pReq = mndBuildTimerMsg(&contLen);
2✔
203
  if (pReq != NULL) {
2!
204
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
2✔
205
    // TODO check return value
206
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
2!
207
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
208
    }
209
  }
210
}
2✔
211

212
static void mndPullupGrant(SMnode *pMnode) {
4,257✔
213
  mTrace("pullup grant msg");
4,257✔
214
  int32_t contLen = 0;
4,257✔
215
  void   *pReq = mndBuildTimerMsg(&contLen);
4,257✔
216
  if (pReq != NULL) {
4,257!
217
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
4,257✔
218
                      .pCont = pReq,
219
                      .contLen = contLen,
220
                      .info.notFreeAhandle = 1,
221
                      .info.ahandle = 0};
222
    // TODO check return value
223
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
4,257!
224
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
225
    }
226
  }
227
}
4,257✔
228

229
static void mndIncreaseUpTime(SMnode *pMnode) {
8✔
230
  mTrace("increate uptime");
8!
231
  int32_t contLen = 0;
8✔
232
  void   *pReq = mndBuildTimerMsg(&contLen);
8✔
233
  if (pReq != NULL) {
8!
234
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
8✔
235
                      .pCont = pReq,
236
                      .contLen = contLen,
237
                      .info.notFreeAhandle = 1,
238
                      .info.ahandle = 0};
239
    // TODO check return value
240
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
8!
241
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
242
    }
243
  }
244
}
8✔
245

246
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
312✔
247
  SSdb *pSdb = pMnode->pSdb;
312✔
248

249
  void *pIter = NULL;
312✔
250
  while (1) {
827✔
251
    SVgObj *pVgroup = NULL;
1,139✔
252
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
1,139✔
253
    if (pIter == NULL) break;
1,139✔
254

255
    bool stateChanged = false;
827✔
256
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
2,076✔
257
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
1,558✔
258
      if (pGid->dnodeId == dnodeId) {
1,558✔
259
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
309✔
260
          mInfo(
94!
261
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:offline "
262
              "restored:0 "
263
              "canRead:0",
264
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
265
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
94✔
266
          pGid->syncRestore = 0;
94✔
267
          pGid->syncCanRead = 0;
94✔
268
          pGid->startTimeMs = 0;
94✔
269
          stateChanged = true;
94✔
270
        }
271
        break;
309✔
272
      }
273
    }
274

275
    if (stateChanged) {
827✔
276
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
94✔
277
      if (pDb != NULL && pDb->stateTs != curMs) {
94!
278
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
40!
279
              curMs);
280
        pDb->stateTs = curMs;
40✔
281
      }
282
      mndReleaseDb(pMnode, pDb);
94✔
283
    }
284

285
    sdbRelease(pSdb, pVgroup);
827✔
286
  }
287
}
312✔
288

289
static void mndCheckDnodeOffline(SMnode *pMnode) {
10,950✔
290
  mTrace("check dnode offline");
10,950✔
291
  if (mndAcquireRpc(pMnode) != 0) return;
10,950!
292

293
  SSdb   *pSdb = pMnode->pSdb;
10,950✔
294
  int64_t curMs = taosGetTimestampMs();
10,950✔
295

296
  void *pIter = NULL;
10,950✔
297
  while (1) {
19,441✔
298
    SDnodeObj *pDnode = NULL;
30,391✔
299
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
30,391✔
300
    if (pIter == NULL) break;
30,391✔
301

302
    bool online = mndIsDnodeOnline(pDnode, curMs);
19,441✔
303
    if (!online) {
19,441✔
304
      mInfo("dnode:%d, in offline state", pDnode->id);
312!
305
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
312✔
306
    }
307

308
    sdbRelease(pSdb, pDnode);
19,441✔
309
  }
310

311
  mndReleaseRpc(pMnode);
10,950✔
312
}
313

314
static bool mnodeIsNotLeader(SMnode *pMnode) {
723,923✔
315
  terrno = 0;
723,923✔
316
  (void)taosThreadRwlockRdlock(&pMnode->lock);
724,343✔
317
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
724,330✔
318
  if (terrno != 0) {
724,368!
319
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
320
    return true;
×
321
  }
322

323
  if (state.state != TAOS_SYNC_STATE_LEADER) {
724,368✔
324
    (void)taosThreadRwlockUnlock(&pMnode->lock);
78,331✔
325
    terrno = TSDB_CODE_SYN_NOT_LEADER;
78,332✔
326
    return true;
78,332✔
327
  }
328
  if (!state.restored || !pMnode->restored) {
646,037!
329
    (void)taosThreadRwlockUnlock(&pMnode->lock);
26✔
330
    terrno = TSDB_CODE_SYN_RESTORING;
26✔
331
    return true;
26✔
332
  }
333
  (void)taosThreadRwlockUnlock(&pMnode->lock);
646,011✔
334
  return false;
646,011✔
335
}
336

337
static int32_t minCronTime() {
×
338
  int32_t min = INT32_MAX;
×
339
  min = TMIN(min, tsTtlPushIntervalSec);
×
340
  min = TMIN(min, tsTrimVDbIntervalSec);
×
341
  min = TMIN(min, tsSsAutoMigrateIntervalSec);
×
342
  min = TMIN(min, tsTransPullupInterval);
×
343
  min = TMIN(min, tsCompactPullupInterval);
×
344
  min = TMIN(min, tsMqRebalanceInterval);
×
345

346
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
×
347
  min = TMIN(min, telemInt);
×
348
  min = TMIN(min, tsGrantHBInterval);
×
349
  min = TMIN(min, tsUptimeInterval);
×
350

351
  return min <= 1 ? 2 : min;
×
352
}
353
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
58,017✔
354
  int32_t code = 0;
58,017✔
355
#ifndef TD_ASTRA
356
  if (sec % tsGrantHBInterval == 0) {  // put in the 1st place as to take effect ASAP
58,017✔
357
    mndPullupGrant(pMnode);
4,257✔
358
  }
359
  if (sec % tsTtlPushIntervalSec == 0) {
58,017✔
360
    mndPullupTtl(pMnode);
5,280✔
361
  }
362

363
  if (sec % tsTrimVDbIntervalSec == 0) {
58,017!
364
    mndPullupTrimDb(pMnode);
×
365
  }
366
#endif
367
#ifdef USE_SHARED_STORAGE
368
  if (tsSsEnabled) {
58,017!
369
    if (sec % 10 == 0) { // TODO: make 10 to be configurable
×
370
      mndPullupUpdateSsMigrateProgress(pMnode);
×
371
    }
372
    if (tsSsEnabled == 2 && sec % tsSsAutoMigrateIntervalSec == 0) {
×
373
      mndPullupSsMigrateDb(pMnode);
×
374
    }
375
  }
376
#endif
377
  if (sec % tsTransPullupInterval == 0) {
58,017✔
378
    mndPullupTrans(pMnode);
28,550✔
379
  }
380

381
  if (sec % tsCompactPullupInterval == 0) {
58,017✔
382
    mndPullupCompacts(pMnode);
5,158✔
383
  }
384
#ifdef USE_TOPIC
385
  if (sec % tsMqRebalanceInterval == 0) {
58,017✔
386
    mndCalMqRebalance(pMnode);
28,546✔
387
  }
388
#endif
389
  if (tsTelemInterval > 0 && sec % tsTelemInterval == 0) {
58,017!
390
    mndPullupTelem(pMnode);
2✔
391
  }
392
  if (sec % tsUptimeInterval == 0) {
58,017✔
393
    mndIncreaseUpTime(pMnode);
8✔
394
  }
395
}
58,017✔
396

397
void mndDoArbTimerPullupTask(SMnode *pMnode, int64_t ms) {
587,994✔
398
  int32_t code = 0;
587,994✔
399
#ifndef TD_ASTRA
400
  if (ms % (tsArbHeartBeatIntervalMs) == 0) {
587,994✔
401
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
28,557!
402
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
403
    }
404
  }
405

406
  if (ms % (tsArbCheckSyncIntervalMs) == 0) {
587,994✔
407
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
18,782!
408
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
409
    }
410
  }
411
#endif
412
}
587,994✔
413

414
void mndDoTimerCheckStatus(SMnode *pMnode, int64_t ms) {
587,994✔
415
  if (ms % (tsStatusTimeoutMs) == 0) {
587,994✔
416
    mndCheckDnodeOffline(pMnode);
10,950✔
417
  }
418
}
587,994✔
419

420
void mndDoTimerCheckSync(SMnode *pMnode, int64_t sec) {
58,017✔
421
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
58,017✔
422
    mndSyncCheckTimeout(pMnode);
1,362✔
423
  }
424
  if (!tsDisableStream && (sec % MND_STREAM_HEALTH_CHECK_PERIOD_SEC == 0)) {
58,017!
425
    msmHealthCheck(pMnode);
18,784✔
426
  }
427
}
58,017✔
428

429
static void *mndThreadSecFp(void *param) {
1,919✔
430
  SMnode *pMnode = param;
1,919✔
431
  int64_t lastTime = 0;
1,919✔
432
  setThreadName("mnode-timer");
1,919✔
433

434
  while (1) {
659,363✔
435
    lastTime++;
661,282✔
436
    taosMsleep(100);
661,282✔
437

438
    if (mndGetStop(pMnode)) break;
661,282✔
439
    if (lastTime % 10 != 0) continue;
659,363✔
440

441
    if (mnodeIsNotLeader(pMnode)) {
65,025✔
442
      mTrace("timer not process since mnode is not leader");
7,008!
443
      continue;
7,008✔
444
    }
445

446
    int64_t sec = lastTime / 10;
58,017✔
447
    mndDoTimerCheckSync(pMnode, sec);
58,017✔
448

449
    mndDoTimerPullupTask(pMnode, sec);
58,017✔
450
  }
451

452
  return NULL;
1,919✔
453
}
454

455
static void *mndThreadMsFp(void *param) {
1,919✔
456
  SMnode *pMnode = param;
1,919✔
457
  int64_t lastTime = 0;
1,919✔
458
  setThreadName("mnode-arb-timer");
1,919✔
459

460
  while (1) {
461
    lastTime += 100;
661,263✔
462
    taosMsleep(100);
661,263✔
463

464
    if (mndGetStop(pMnode)) break;
661,263✔
465
    if (lastTime % 10 != 0) continue;
659,344!
466

467
    if (mnodeIsNotLeader(pMnode)) {
659,344✔
468
      mTrace("timer not process since mnode is not leader");
71,350!
469
      continue;
71,350✔
470
    }
471

472
    mndDoTimerCheckStatus(pMnode, lastTime);
587,994✔
473

474
    mndDoArbTimerPullupTask(pMnode, lastTime);
587,994✔
475
  }
476

477
  return NULL;
1,919✔
478
}
479

480
static int32_t mndInitTimer(SMnode *pMnode) {
1,919✔
481
  int32_t      code = 0;
1,919✔
482
  TdThreadAttr thAttr;
483
  (void)taosThreadAttrInit(&thAttr);
1,919✔
484
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
1,919✔
485
#ifdef TD_COMPACT_OS
486
  (void)taosThreadAttrSetStackSize(&thAttr, STACK_SIZE_SMALL);
487
#endif
488
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadSecFp, pMnode)) != 0) {
1,919!
489
    mError("failed to create timer thread since %s", tstrerror(code));
×
490
    TAOS_RETURN(code);
×
491
  }
492

493
  (void)taosThreadAttrDestroy(&thAttr);
1,919✔
494
  tmsgReportStartup("mnode-timer", "initialized");
1,919✔
495

496
  TdThreadAttr arbAttr;
497
  (void)taosThreadAttrInit(&arbAttr);
1,919✔
498
  (void)taosThreadAttrSetDetachState(&arbAttr, PTHREAD_CREATE_JOINABLE);
1,919✔
499
#ifdef TD_COMPACT_OS
500
  (void)taosThreadAttrSetStackSize(&arbAttr, STACK_SIZE_SMALL);
501
#endif
502
  if ((code = taosThreadCreate(&pMnode->arbThread, &arbAttr, mndThreadMsFp, pMnode)) != 0) {
1,919!
503
    mError("failed to create arb timer thread since %s", tstrerror(code));
×
504
    TAOS_RETURN(code);
×
505
  }
506

507
  (void)taosThreadAttrDestroy(&arbAttr);
1,919✔
508
  tmsgReportStartup("mnode-timer", "initialized");
1,919✔
509
  TAOS_RETURN(code);
1,919✔
510
}
511

512
static void mndCleanupTimer(SMnode *pMnode) {
1,919✔
513
  if (taosCheckPthreadValid(pMnode->thread)) {
1,919!
514
    (void)taosThreadJoin(pMnode->thread, NULL);
1,919✔
515
    taosThreadClear(&pMnode->thread);
1,919✔
516
  }
517
  if (taosCheckPthreadValid(pMnode->arbThread)) {
1,919!
518
    (void)taosThreadJoin(pMnode->arbThread, NULL);
1,919✔
519
    taosThreadClear(&pMnode->arbThread);
1,919✔
520
  }
521
}
1,919✔
522

523
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
1,919✔
524
  int32_t code = 0;
1,919✔
525
  pMnode->path = taosStrdup(path);
1,919!
526
  if (pMnode->path == NULL) {
1,919!
527
    code = terrno;
×
528
    TAOS_RETURN(code);
×
529
  }
530

531
  if (taosMkDir(pMnode->path) != 0) {
1,919!
532
    code = terrno;
×
533
    TAOS_RETURN(code);
×
534
  }
535

536
  TAOS_RETURN(code);
1,919✔
537
}
538

539
static int32_t mndInitWal(SMnode *pMnode) {
1,919✔
540
  int32_t code = 0;
1,919✔
541
  char    path[PATH_MAX + 20] = {0};
1,919✔
542
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
1,919✔
543
  SWalCfg cfg = {.vgId = 1,
1,919✔
544
                 .fsyncPeriod = 0,
545
                 .rollPeriod = -1,
546
                 .segSize = -1,
547
                 .committed = -1,
548
                 .retentionPeriod = 0,
549
                 .retentionSize = 0,
550
                 .level = TAOS_WAL_FSYNC,
551
                 .encryptAlgorithm = 0,
552
                 .encryptKey = {0}};
553

554
#if defined(TD_ENTERPRISE) || defined(TD_ASTRA_TODO)
555
  if (tsiEncryptAlgorithm == DND_CA_SM4 && (tsiEncryptScope & DND_CS_MNODE_WAL) == DND_CS_MNODE_WAL) {
1,919!
556
    cfg.encryptAlgorithm = (tsiEncryptScope & DND_CS_MNODE_WAL) ? tsiEncryptAlgorithm : 0;
×
557
    if (tsEncryptKey[0] == '\0') {
×
558
      code = TSDB_CODE_DNODE_INVALID_ENCRYPTKEY;
×
559
      TAOS_RETURN(code);
×
560
    } else {
561
      tstrncpy(cfg.encryptKey, tsEncryptKey, ENCRYPT_KEY_LEN + 1);
×
562
    }
563
  }
564
#endif
565

566
  pMnode->pWal = walOpen(path, &cfg);
1,919✔
567
  if (pMnode->pWal == NULL) {
1,919!
568
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
569
    if (terrno != 0) code = terrno;
×
570
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
571
    TAOS_RETURN(code);
×
572
  }
573

574
  TAOS_RETURN(code);
1,919✔
575
}
576

577
static void mndCloseWal(SMnode *pMnode) {
1,919✔
578
  if (pMnode->pWal != NULL) {
1,919!
579
    walClose(pMnode->pWal);
1,919✔
580
    pMnode->pWal = NULL;
1,919✔
581
  }
582
}
1,919✔
583

584
static int32_t mndInitSdb(SMnode *pMnode) {
1,919✔
585
  int32_t code = 0;
1,919✔
586
  SSdbOpt opt = {0};
1,919✔
587
  opt.path = pMnode->path;
1,919✔
588
  opt.pMnode = pMnode;
1,919✔
589
  opt.pWal = pMnode->pWal;
1,919✔
590

591
  pMnode->pSdb = sdbInit(&opt);
1,919✔
592
  if (pMnode->pSdb == NULL) {
1,919!
593
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
594
    if (terrno != 0) code = terrno;
×
595
    TAOS_RETURN(code);
×
596
  }
597

598
  TAOS_RETURN(code);
1,919✔
599
}
600

601
static int32_t mndOpenSdb(SMnode *pMnode) {
1,919✔
602
  int32_t code = 0;
1,919✔
603
  if (!pMnode->deploy) {
1,919✔
604
    code = sdbReadFile(pMnode->pSdb);
498✔
605
  }
606

607
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
1,919!
608

609
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
1,919✔
610
  return code;
1,919✔
611
}
612

613
static void mndCleanupSdb(SMnode *pMnode) {
1,919✔
614
  if (pMnode->pSdb) {
1,919!
615
    sdbCleanup(pMnode->pSdb);
1,919✔
616
    pMnode->pSdb = NULL;
1,919✔
617
  }
618
}
1,919✔
619

620
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
76,760✔
621
  SMnodeStep step = {0};
76,760✔
622
  step.name = name;
76,760✔
623
  step.initFp = initFp;
76,760✔
624
  step.cleanupFp = cleanupFp;
76,760✔
625
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
153,520!
626
    TAOS_RETURN(terrno);
×
627
  }
628

629
  TAOS_RETURN(0);
76,760✔
630
}
631

632
static int32_t mndInitSteps(SMnode *pMnode) {
1,919✔
633
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
1,919!
634
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
1,919!
635
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
1,919!
636
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
1,919!
637
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
1,919!
638
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
1,919!
639
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
1,919!
640
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
1,919!
641
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-bnode", mndInitBnode, mndCleanupBnode));
1,919!
642
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
1,919!
643
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
1,919!
644
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
1,919!
645
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
1,919!
646
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
1,919!
647
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
1,919!
648
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
1,919!
649
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
1,919!
650
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
1,919!
651
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
1,919!
652
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
1,919!
653
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
1,919!
654
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
1,919!
655
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
1,919!
656
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
1,919!
657
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
1,919!
658
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
1,919!
659
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
1,919!
660
#ifdef USE_MOUNT
661
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount", mndInitMount, mndCleanupMount));
1,919!
662
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount-log", mndInitMountLog, mndCleanupMountLog));
1,919!
663
#endif
664
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
1,919!
665
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
1,919!
666
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
1,919!
667
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
1,919!
668
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-ssmigrate", mndInitSsMigrate, mndCleanupSsMigrate));
1,919!
669
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
1,919!
670
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
1,919!
671
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
1,919!
672
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
1,919!
673
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
1,919!
674
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
1,919!
675
  return 0;
1,919✔
676
}
677

678
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
1,919✔
679
  if (pMnode->pSteps == NULL) return;
1,919!
680

681
  if (pos == -1) {
1,919!
682
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
1,919✔
683
  }
684

685
  for (int32_t s = pos; s >= 0; s--) {
78,679✔
686
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
76,760✔
687
    mInfo("%s will cleanup", pStep->name);
76,760!
688
    if (pStep->cleanupFp != NULL) {
76,760✔
689
      (*pStep->cleanupFp)(pMnode);
72,922✔
690
    }
691
  }
692

693
  taosArrayClear(pMnode->pSteps);
1,919✔
694
  taosArrayDestroy(pMnode->pSteps);
1,919✔
695
  pMnode->pSteps = NULL;
1,919✔
696
}
697

698
static int32_t mndExecSteps(SMnode *pMnode) {
1,919✔
699
  int32_t code = 0;
1,919✔
700
  int32_t size = taosArrayGetSize(pMnode->pSteps);
1,919✔
701
  for (int32_t pos = 0; pos < size; pos++) {
78,679✔
702
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
76,760✔
703
    if (pStep->initFp == NULL) continue;
76,760!
704

705
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
76,760!
706
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
707
      mndCleanupSteps(pMnode, pos);
×
708
      TAOS_RETURN(code);
×
709
    } else {
710
      mInfo("%s is initialized", pStep->name);
76,760!
711
      tmsgReportStartup(pStep->name, "initialized");
76,760✔
712
    }
713
  }
714

715
  pMnode->clusterId = mndGetClusterId(pMnode);
1,919✔
716
  TAOS_RETURN(0);
1,919✔
717
}
718

719
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
1,919✔
720
  pMnode->msgCb = pOption->msgCb;
1,919✔
721
  pMnode->selfDnodeId = pOption->dnodeId;
1,919✔
722
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
1,919✔
723
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
1,919✔
724
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
1,919✔
725
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
1,919✔
726
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
1,919✔
727
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
1,919✔
728
}
1,919✔
729

730
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
1,919✔
731
  terrno = 0;
1,919✔
732
  mInfo("start to open mnode in %s", path);
1,919!
733

734
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
1,919!
735
  if (pMnode == NULL) {
1,919!
736
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
737
    mError("failed to open mnode since %s", terrstr());
×
738
    return NULL;
×
739
  }
740
  (void)memset(pMnode, 0, sizeof(SMnode));
1,919✔
741

742
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
1,919✔
743
  if (code != 0) {
1,919!
744
    taosMemoryFree(pMnode);
×
745
    mError("failed to open mnode lock since %s", tstrerror(code));
×
746
    return NULL;
×
747
  }
748

749
  char timestr[24] = "1970-01-01 00:00:00.00";
1,919✔
750
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, NULL);
1,919✔
751
  if (code < 0) {
1,919!
752
    mError("failed to parse time since %s", tstrerror(code));
×
753
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
754
    taosMemoryFree(pMnode);
×
755
    return NULL;
×
756
  }
757
  mndSetOptions(pMnode, pOption);
1,919✔
758

759
  pMnode->deploy = pOption->deploy;
1,919✔
760
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
1,919✔
761
  if (pMnode->pSteps == NULL) {
1,919!
762
    taosMemoryFree(pMnode);
×
763
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
764
    mError("failed to open mnode since %s", terrstr());
×
765
    return NULL;
×
766
  }
767

768
  code = mndCreateDir(pMnode, path);
1,919✔
769
  if (code != 0) {
1,919!
770
    code = terrno;
×
771
    mError("failed to open mnode since %s", tstrerror(code));
×
772
    mndClose(pMnode);
×
773
    terrno = code;
×
774
    return NULL;
×
775
  }
776

777
  code = mndInitSteps(pMnode);
1,919✔
778
  if (code != 0) {
1,919!
779
    code = terrno;
×
780
    mError("failed to open mnode since %s", tstrerror(code));
×
781
    mndClose(pMnode);
×
782
    terrno = code;
×
783
    return NULL;
×
784
  }
785

786
  code = mndExecSteps(pMnode);
1,919✔
787
  if (code != 0) {
1,919!
788
    code = terrno;
×
789
    mError("failed to open mnode since %s", tstrerror(code));
×
790
    mndClose(pMnode);
×
791
    terrno = code;
×
792
    return NULL;
×
793
  }
794

795
  mInfo("mnode open successfully");
1,919!
796
  return pMnode;
1,919✔
797
}
798

799
void mndPreClose(SMnode *pMnode) {
1,919✔
800
  if (pMnode != NULL) {
1,919!
801
    int32_t code = 0;
1,919✔
802
    // TODO check return value
803
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
1,919✔
804
    if (code < 0) {
1,919!
805
      mError("failed to transfer leader since %s", tstrerror(code));
×
806
    }
807
    syncPreStop(pMnode->syncMgmt.sync);
1,919✔
808
    code = sdbWriteFile(pMnode->pSdb, 0);
1,919✔
809
    if (code < 0) {
1,919!
810
      mError("failed to write sdb since %s", tstrerror(code));
×
811
    }
812
  }
813
}
1,919✔
814

815
void mndClose(SMnode *pMnode) {
1,919✔
816
  if (pMnode != NULL) {
1,919!
817
    mInfo("start to close mnode");
1,919!
818
    mndCleanupSteps(pMnode, -1);
1,919✔
819
    taosMemoryFreeClear(pMnode->path);
1,919!
820
    taosMemoryFreeClear(pMnode);
1,919!
821
    mInfo("mnode is closed");
1,919!
822
  }
823
}
1,919✔
824

825
int32_t mndStart(SMnode *pMnode) {
1,919✔
826
  mndSyncStart(pMnode);
1,919✔
827
  if (pMnode->deploy) {
1,919✔
828
    if (sdbDeploy(pMnode->pSdb) != 0) {
1,421!
829
      mError("failed to deploy sdb while start mnode");
×
830
      return -1;
×
831
    }
832
    mndSetRestored(pMnode, true);
1,421✔
833
  }
834
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
1,919✔
835

836
  return mndInitTimer(pMnode);
1,919✔
837
}
838

839
int32_t mndIsCatchUp(SMnode *pMnode) {
1,216✔
840
  int64_t rid = pMnode->syncMgmt.sync;
1,216✔
841
  return syncIsCatchUp(rid);
1,216✔
842
}
843

844
ESyncRole mndGetRole(SMnode *pMnode) {
1,216✔
845
  int64_t rid = pMnode->syncMgmt.sync;
1,216✔
846
  return syncGetRole(rid);
1,216✔
847
}
848

849
int64_t mndGetTerm(SMnode *pMnode) {
18,951✔
850
  int64_t rid = pMnode->syncMgmt.sync;
18,951✔
851
  return syncGetTerm(rid);
18,951✔
852
}
853

854
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
47,497✔
855

856
void mndStop(SMnode *pMnode) {
1,919✔
857
  mndSetStop(pMnode);
1,919✔
858
  mndSyncStop(pMnode);
1,919✔
859
  mndCleanupTimer(pMnode);
1,919✔
860
}
1,919✔
861

862
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
192,055✔
863
  SMnode    *pMnode = pMsg->info.node;
192,055✔
864
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
192,055✔
865

866
  const STraceId *trace = &pMsg->info.traceId;
192,055✔
867
  mGTrace("vgId:1, process sync msg:%p, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
192,055!
868

869
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
192,055✔
870
  if (code != 0) {
192,054✔
871
    mGError("vgId:1, failed to process sync msg:%p type:%s since %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
6!
872
            tstrerror(code), code);
873
  }
874

875
  return code;
192,054✔
876
}
877

878
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
657,349✔
879
  int32_t code = 0;
657,349✔
880
  if (!IsReq(pMsg)) TAOS_RETURN(code);
657,349✔
881
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
577,567✔
882
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
558,893!
883
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
545,875✔
884
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
508,672✔
885
    TAOS_RETURN(code);
68,897✔
886
  }
887

888
  SMnode *pMnode = pMsg->info.node;
508,670✔
889
  (void)taosThreadRwlockRdlock(&pMnode->lock);
508,670✔
890
  if (pMnode->stopped) {
508,712✔
891
    (void)taosThreadRwlockUnlock(&pMnode->lock);
28✔
892
    code = TSDB_CODE_APP_IS_STOPPING;
28✔
893
    TAOS_RETURN(code);
28✔
894
  }
895

896
  terrno = 0;
508,684✔
897
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
508,650✔
898
  if (terrno != 0) {
508,689!
899
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
900
    code = terrno;
×
901
    TAOS_RETURN(code);
×
902
  }
903

904
  if (state.state != TAOS_SYNC_STATE_LEADER) {
508,685✔
905
    (void)taosThreadRwlockUnlock(&pMnode->lock);
16,195✔
906
    code = TSDB_CODE_SYN_NOT_LEADER;
16,192✔
907
    goto _OVER;
16,192✔
908
  }
909

910
  if (!state.restored || !pMnode->restored) {
492,490✔
911
    (void)taosThreadRwlockUnlock(&pMnode->lock);
489✔
912
    code = TSDB_CODE_SYN_RESTORING;
480✔
913
    goto _OVER;
480✔
914
  }
915

916
#if 1
917
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
492,001✔
918
#else
919
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
920
  mTrace("mnode rpc is acquired, ref:%d", ref);
921
#endif
922

923
  (void)taosThreadRwlockUnlock(&pMnode->lock);
492,013✔
924
  TAOS_RETURN(code);
492,013✔
925

926
_OVER:
16,672✔
927
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
16,672!
928
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
16,671!
929
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
16,666!
930
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
16,667!
931
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
16,667!
932
      pMsg->msgType == TDMT_MND_SSMIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
16,663!
933
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER || pMsg->msgType == TDMT_MND_CHECK_STREAM_TIMER ||
16,661!
934
      pMsg->msgType == TDMT_MND_UPDATE_SSMIGRATE_PROGRESS_TIMER) {
16,662✔
935
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
13!
936
           pMnode->stopped, state.restored, syncStr(state.state));
937
    TAOS_RETURN(code);
13✔
938
  }
939

940
  const STraceId *trace = &pMsg->info.traceId;
16,659✔
941
  SEpSet          epSet = {0};
16,659✔
942
  mndGetMnodeEpSet(pMnode, &epSet);
16,659✔
943

944
  mGDebug(
16,662!
945
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
946
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
947
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
948
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
949

950
  if (epSet.numOfEps <= 0) return -1;
16,662!
951

952
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
64,817✔
953
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
48,155✔
954
  }
955

956
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
16,662✔
957
  pMsg->info.rsp = rpcMallocCont(contLen);
16,662✔
958
  if (pMsg->info.rsp != NULL) {
16,660!
959
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
16,661!
960
      mError("failed to serialize ep set");
×
961
    }
962
    pMsg->info.hasEpSet = 1;
16,661✔
963
    pMsg->info.rspLen = contLen;
16,661✔
964
  }
965

966
  TAOS_RETURN(code);
16,660✔
967
}
968

969
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
657,360✔
970
  SMnode         *pMnode = pMsg->info.node;
657,360✔
971
  const STraceId *trace = &pMsg->info.traceId;
657,360✔
972
  int32_t         code = TSDB_CODE_SUCCESS;
657,360✔
973

974
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
657,360✔
975
  MndMsgFpExt fpExt = NULL;
657,360✔
976
  if (fp == NULL) {
657,360✔
977
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
68,936✔
978
    if (fpExt == NULL) {
68,936!
979
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
980
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
981
      TAOS_RETURN(code);
×
982
    }
983
  }
984

985
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
657,360✔
986

987
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
640,694!
988
  if (fp)
640,694✔
989
    code = (*fp)(pMsg);
571,758✔
990
  else
991
    code = (*fpExt)(pMsg, pQueueInfo);
68,936✔
992
  mndReleaseRpc(pMnode);
640,723✔
993

994
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
640,733✔
995
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
96,860!
996
  } else if (code == 0) {
543,873✔
997
    mGTrace("msg:%p, successfully processed", pMsg);
536,349!
998
  } else {
999
    // TODO removve this wrong set code
1000
    if (code == -1) {
7,524✔
1001
      code = terrno;
7✔
1002
    }
1003
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
7,524!
1004
            TMSG_INFO(pMsg->msgType));
1005
  }
1006

1007
  TAOS_RETURN(code);
640,733✔
1008
}
1009

1010
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
328,149✔
1011
  tmsg_t type = TMSG_INDEX(msgType);
328,149✔
1012
  if (type < TDMT_MAX) {
328,149!
1013
    pMnode->msgFp[type] = fp;
328,149✔
1014
  }
1015
}
328,149✔
1016

1017
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
15,352✔
1018
  tmsg_t type = TMSG_INDEX(msgType);
15,352✔
1019
  if (type < TDMT_MAX) {
15,352!
1020
    pMnode->msgFpExt[type] = fp;
15,352✔
1021
  }
1022
}
15,352✔
1023

1024
// Note: uid 0 is reserved
1025
int64_t mndGenerateUid(const char *name, int32_t len) {
31,316✔
1026
  int32_t hashval = MurmurHash3_32(name, len);
31,316✔
1027
  do {
×
1028
    int64_t us = taosGetTimestampUs();
31,316✔
1029
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
31,316✔
1030
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
31,316✔
1031
    if (uuid) {
31,316!
1032
      return llabs(uuid);
31,316✔
1033
    }
1034
  } while (true);
1035
}
1036

1037
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
1✔
1038
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
1039
  int32_t code = mndAcquireRpc(pMnode);
1✔
1040
  if (code < 0) {
1!
1041
    TAOS_RETURN(code);
×
1042
  } else if (code == 1) {
1!
1043
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
1044
  }
1045

1046
  SSdb   *pSdb = pMnode->pSdb;
1✔
1047
  int64_t ms = taosGetTimestampMs();
1✔
1048

1049
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
1✔
1050
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
1✔
1051
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
1✔
1052
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
1✔
1053
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
1!
1054
      pStbInfo->stbs == NULL) {
1!
1055
    mndReleaseRpc(pMnode);
×
1056
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1057
    if (terrno != 0) code = terrno;
×
1058
    TAOS_RETURN(code);
×
1059
  }
1060

1061
  // cluster info
1062
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
1✔
1063
  pClusterInfo->monitor_interval = tsMonitorInterval;
1✔
1064
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
1✔
1065
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
1✔
1066
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
1✔
1067
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
1✔
1068
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
1✔
1069

1070
  void *pIter = NULL;
1✔
1071
  while (1) {
1✔
1072
    SDnodeObj *pObj = NULL;
2✔
1073
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
2✔
1074
    if (pIter == NULL) break;
2✔
1075

1076
    SMonDnodeDesc desc = {0};
1✔
1077
    desc.dnode_id = pObj->id;
1✔
1078
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
1✔
1079
    if (mndIsDnodeOnline(pObj, ms)) {
1!
1080
      tstrncpy(desc.status, "ready", sizeof(desc.status));
1✔
1081
    } else {
1082
      tstrncpy(desc.status, "offline", sizeof(desc.status));
×
1083
    }
1084
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
2!
1085
      mError("failed put dnode into array, but continue at this monitor report")
×
1086
    }
1087
    sdbRelease(pSdb, pObj);
1✔
1088
  }
1089

1090
  pIter = NULL;
1✔
1091
  while (1) {
1✔
1092
    SMnodeObj *pObj = NULL;
2✔
1093
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
2✔
1094
    if (pIter == NULL) break;
2✔
1095

1096
    SMonMnodeDesc desc = {0};
1✔
1097
    desc.mnode_id = pObj->id;
1✔
1098
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
1✔
1099

1100
    if (pObj->id == pMnode->selfDnodeId) {
1!
1101
      pClusterInfo->first_ep_dnode_id = pObj->id;
1✔
1102
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
1✔
1103
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1104
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
1✔
1105
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1106
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
1✔
1107
      desc.syncState = TAOS_SYNC_STATE_LEADER;
1✔
1108
    } else {
1109
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1110
      desc.syncState = pObj->syncState;
×
1111
    }
1112
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
2!
1113
      mError("failed to put mnode into array, but continue at this monitor report");
×
1114
    }
1115
    sdbRelease(pSdb, pObj);
1✔
1116
  }
1117

1118
  // vgroup info
1119
  pIter = NULL;
1✔
1120
  while (1) {
6✔
1121
    SVgObj *pVgroup = NULL;
7✔
1122
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
7✔
1123
    if (pIter == NULL) break;
7✔
1124

1125
    if (pVgroup->mountVgId) {
6!
1126
      sdbRelease(pSdb, pVgroup);
×
1127
      continue;
×
1128
    }
1129

1130
    pClusterInfo->vgroups_total++;
6✔
1131
    pClusterInfo->tbs_total += pVgroup->numOfTables;
6✔
1132

1133
    SMonVgroupDesc desc = {0};
6✔
1134
    desc.vgroup_id = pVgroup->vgId;
6✔
1135

1136
    SName name = {0};
6✔
1137
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
6✔
1138
    if (code < 0) {
6!
1139
      mError("failed to get db name since %s", tstrerror(code));
×
1140
      sdbRelease(pSdb, pVgroup);
×
1141
      TAOS_RETURN(code);
×
1142
    }
1143
    (void)tNameGetDbName(&name, desc.database_name);
6✔
1144

1145
    desc.tables_num = pVgroup->numOfTables;
6✔
1146
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
6✔
1147
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
6✔
1148
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
12✔
1149
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
6✔
1150
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
6✔
1151
      pVnDesc->dnode_id = pVgid->dnodeId;
6✔
1152
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
6✔
1153
      pVnDesc->syncState = pVgid->syncState;
6✔
1154
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
6!
1155
        tstrncpy(desc.status, "ready", sizeof(desc.status));
6✔
1156
        pClusterInfo->vgroups_alive++;
6✔
1157
      }
1158
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
6!
1159
        pClusterInfo->vnodes_alive++;
6✔
1160
      }
1161
      pClusterInfo->vnodes_total++;
6✔
1162
    }
1163

1164
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
12!
1165
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1166
    }
1167
    sdbRelease(pSdb, pVgroup);
6✔
1168
  }
1169

1170
  // stb info
1171
  pIter = NULL;
1✔
1172
  while (1) {
1✔
1173
    SStbObj *pStb = NULL;
2✔
1174
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
2✔
1175
    if (pIter == NULL) break;
2✔
1176

1177
    SMonStbDesc desc = {0};
1✔
1178

1179
    SName name1 = {0};
1✔
1180
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
1✔
1181
    if (code < 0) {
1!
1182
      mError("failed to get db name since %s", tstrerror(code));
×
1183
      sdbRelease(pSdb, pStb);
×
1184
      TAOS_RETURN(code);
×
1185
    }
1186
    (void)tNameGetDbName(&name1, desc.database_name);
1✔
1187

1188
    SName name2 = {0};
1✔
1189
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
1✔
1190
    if (code < 0) {
1!
1191
      mError("failed to get table name since %s", tstrerror(code));
×
1192
      sdbRelease(pSdb, pStb);
×
1193
      TAOS_RETURN(code);
×
1194
    }
1195
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
1✔
1196

1197
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
2!
1198
      mError("failed to put stb into array, but continue at this monitor report");
×
1199
    }
1200
    sdbRelease(pSdb, pStb);
1✔
1201
  }
1202

1203
  // grant info
1204
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
1✔
1205
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
1✔
1206
  if (pMnode->grant.expireTimeMS == 0) {
1!
1207
    pGrantInfo->expire_time = 0;
×
1208
    pGrantInfo->timeseries_total = 0;
×
1209
  }
1210

1211
  mndReleaseRpc(pMnode);
1✔
1212
  TAOS_RETURN(code);
1✔
1213
}
1214

1215
int32_t mndResetTimer(SMnode *pMnode){
×
1216
  return syncResetTimer(pMnode->syncMgmt.sync, tsMnodeElectIntervalMs, tsMnodeHeartbeatIntervalMs);
×
1217
}
1218

1219
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
64,984✔
1220
  mTrace("mnode get load");
64,984✔
1221
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
64,984✔
1222
  pLoad->syncState = state.state;
64,984✔
1223
  pLoad->syncRestore = state.restored;
64,984✔
1224
  pLoad->syncTerm = state.term;
64,984✔
1225
  pLoad->roleTimeMs = state.roleTimeMs;
64,984✔
1226
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
64,984✔
1227
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1228
  return 0;
64,984✔
1229
}
1230

1231
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
18,782✔
1232
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
18,782✔
1233
  return state.roleTimeMs;
18,782✔
1234
}
1235

1236
void mndSetRestored(SMnode *pMnode, bool restored) {
1,919✔
1237
  if (restored) {
1,919!
1238
    (void)taosThreadRwlockWrlock(&pMnode->lock);
1,919✔
1239
    pMnode->restored = true;
1,919✔
1240
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,919✔
1241
    mInfo("mnode set restored:%d", restored);
1,919!
1242
  } else {
1243
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1244
    pMnode->restored = false;
×
1245
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1246
    mInfo("mnode set restored:%d", restored);
×
1247
    while (1) {
1248
      if (pMnode->rpcRef <= 0) break;
×
1249
      taosMsleep(3);
×
1250
    }
1251
  }
1252
}
1,919✔
1253

1254
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1255

1256
void mndSetStop(SMnode *pMnode) {
1,919✔
1257
  (void)taosThreadRwlockWrlock(&pMnode->lock);
1,919✔
1258
  pMnode->stopped = true;
1,919✔
1259
  (void)taosThreadRwlockUnlock(&pMnode->lock);
1,919✔
1260
  mInfo("mnode set stopped");
1,919!
1261
}
1,919✔
1262

1263
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
1,318,168✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc