• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #4661

08 Aug 2025 08:36AM UTC coverage: 59.883% (-0.2%) from 60.053%
#4661

push

travis-ci

web-flow
test: update cases desc (#32498)

137331 of 291923 branches covered (47.04%)

Branch coverage included in aggregate %.

207730 of 284307 relevant lines covered (73.07%)

4552406.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.97
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndCluster.h"
21
#include "mndCompact.h"
22
#include "mndCompactDetail.h"
23
#include "mndSsMigrate.h"
24
#include "mndConfig.h"
25
#include "mndConsumer.h"
26
#include "mndDb.h"
27
#include "mndDnode.h"
28
#include "mndFunc.h"
29
#include "mndGrant.h"
30
#include "mndIndex.h"
31
#include "mndInfoSchema.h"
32
#include "mndMnode.h"
33
#include "mndMount.h"
34
#include "mndPerfSchema.h"
35
#include "mndPrivilege.h"
36
#include "mndProfile.h"
37
#include "mndQnode.h"
38
#include "mndQuery.h"
39
#include "mndShow.h"
40
#include "mndSma.h"
41
#include "mndSnode.h"
42
#include "mndStb.h"
43
#include "mndStream.h"
44
#include "mndSubscribe.h"
45
#include "mndSync.h"
46
#include "mndTelem.h"
47
#include "mndTopic.h"
48
#include "mndTrans.h"
49
#include "mndUser.h"
50
#include "mndVgroup.h"
51
#include "mndView.h"
52
#include "mndBnode.h"
53

54
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
10,835✔
55
  int32_t code = 0;
10,835✔
56
  (void)taosThreadRwlockRdlock(&pMnode->lock);
10,835✔
57
  if (pMnode->stopped) {
10,835!
58
    code = TSDB_CODE_APP_IS_STOPPING;
×
59
  } else if (!mndIsLeader(pMnode)) {
10,835!
60
    code = 1;
×
61
  } else {
62
#if 1
63
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
10,835✔
64
#else
65
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
66
    mTrace("mnode rpc is acquired, ref:%d", ref);
67
#endif
68
  }
69
  (void)taosThreadRwlockUnlock(&pMnode->lock);
10,835✔
70
  TAOS_RETURN(code);
10,835✔
71
}
72

73
static inline void mndReleaseRpc(SMnode *pMnode) {
664,355✔
74
  (void)taosThreadRwlockRdlock(&pMnode->lock);
664,355✔
75
#if 1
76
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
664,379✔
77
#else
78
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
79
  mTrace("mnode rpc is released, ref:%d", ref);
80
#endif
81
  (void)taosThreadRwlockUnlock(&pMnode->lock);
664,370✔
82
}
664,377✔
83

84
static void *mndBuildTimerMsg(int32_t *pContLen) {
120,095✔
85
  terrno = 0;
120,095✔
86
  SMTimerReq timerReq = {0};
120,095✔
87

88
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
120,095✔
89
  if (contLen <= 0) return NULL;
120,095!
90
  void *pReq = rpcMallocCont(contLen);
120,095✔
91
  if (pReq == NULL) return NULL;
120,095!
92

93
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
120,095!
94
    mError("failed to serialize timer msg since %s", terrstr());
×
95
  }
96
  *pContLen = contLen;
120,095✔
97
  return pReq;
120,095✔
98
}
99

100
static void mndPullupTrans(SMnode *pMnode) {
28,701✔
101
  mTrace("pullup trans msg");
28,701✔
102
  int32_t contLen = 0;
28,701✔
103
  void   *pReq = mndBuildTimerMsg(&contLen);
28,701✔
104
  if (pReq != NULL) {
28,701!
105
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
28,701✔
106
    // TODO check return value
107
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
28,701✔
108
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
1!
109
    }
110
  }
111
}
28,701✔
112

113
static void mndPullupCompacts(SMnode *pMnode) {
4,909✔
114
  mTrace("pullup compact timer msg");
4,909✔
115
  int32_t contLen = 0;
4,909✔
116
  void   *pReq = mndBuildTimerMsg(&contLen);
4,909✔
117
  if (pReq != NULL) {
4,909!
118
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
4,909✔
119
    // TODO check return value
120
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
4,909!
121
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
122
    }
123
  }
124
}
4,909✔
125

126
static void mndPullupTtl(SMnode *pMnode) {
5,026✔
127
  mTrace("pullup ttl");
5,026✔
128
  int32_t contLen = 0;
5,026✔
129
  void   *pReq = mndBuildTimerMsg(&contLen);
5,026✔
130
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
5,026✔
131
  // TODO check return value
132
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,026!
133
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
134
  }
135
}
5,026✔
136

137
static void mndPullupTrimDb(SMnode *pMnode) {
×
138
  mTrace("pullup trim");
×
139
  int32_t contLen = 0;
×
140
  void   *pReq = mndBuildTimerMsg(&contLen);
×
141
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
142
  // TODO check return value
143
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
144
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
145
  }
146
}
×
147

148
static void mndPullupSsMigrateDb(SMnode *pMnode) {
×
149
  if (grantCheck(TSDB_GRANT_SHARED_STORAGE) != TSDB_CODE_SUCCESS) {
×
150
    return;
×
151
  }
152

153
  mTrace("pullup ssmigrate db");
×
154
  int32_t contLen = 0;
×
155
  void   *pReq = mndBuildTimerMsg(&contLen);
×
156
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_SSMIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
157
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
158
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
159
  }
160
}
161

162
static void mndPullupQuerySsMigrateProgress(SMnode *pMnode) {
×
163
  mTrace("pullup query ssmigrate progress");
×
164
  int32_t contLen = 0;
×
165
  void   *pReq = mndBuildTimerMsg(&contLen);
×
166
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_QUERY_SSMIGRATE_PROGRESS_TIMER, .pCont = pReq, .contLen = contLen};
×
167
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
168
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
169
  }
170
}
×
171

172
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
28,660✔
173
  mTrace("pullup arb hb");
28,660✔
174
  int32_t contLen = 0;
28,660✔
175
  void   *pReq = mndBuildTimerMsg(&contLen);
28,660✔
176
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
28,660✔
177
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
28,660✔
178
}
179

180
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
18,735✔
181
  mTrace("pullup arb sync");
18,735✔
182
  int32_t contLen = 0;
18,735✔
183
  void   *pReq = mndBuildTimerMsg(&contLen);
18,735✔
184
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
18,735✔
185
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
18,735✔
186
}
187

188
static void mndCalMqRebalance(SMnode *pMnode) {
28,646✔
189
  int32_t contLen = 0;
28,646✔
190
  void   *pReq = mndBuildTimerMsg(&contLen);
28,646✔
191
  if (pReq != NULL) {
28,646!
192
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
28,646✔
193
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
28,646✔
194
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
1!
195
    }
196
  }
197
}
28,646✔
198

199
static void mndPullupTelem(SMnode *pMnode) {
2✔
200
  mTrace("pullup telem msg");
2!
201
  int32_t contLen = 0;
2✔
202
  void   *pReq = mndBuildTimerMsg(&contLen);
2✔
203
  if (pReq != NULL) {
2!
204
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
2✔
205
    // TODO check return value
206
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
2!
207
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
208
    }
209
  }
210
}
2✔
211

212
static void mndPullupGrant(SMnode *pMnode) {
5,406✔
213
  mTrace("pullup grant msg");
5,406✔
214
  int32_t contLen = 0;
5,406✔
215
  void   *pReq = mndBuildTimerMsg(&contLen);
5,406✔
216
  if (pReq != NULL) {
5,406!
217
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
5,406✔
218
                      .pCont = pReq,
219
                      .contLen = contLen,
220
                      .info.notFreeAhandle = 1,
221
                      .info.ahandle = 0};
222
    // TODO check return value
223
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,406✔
224
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
1!
225
    }
226
  }
227
}
5,406✔
228

229
static void mndIncreaseUpTime(SMnode *pMnode) {
10✔
230
  mTrace("increate uptime");
10!
231
  int32_t contLen = 0;
10✔
232
  void   *pReq = mndBuildTimerMsg(&contLen);
10✔
233
  if (pReq != NULL) {
10!
234
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
10✔
235
                      .pCont = pReq,
236
                      .contLen = contLen,
237
                      .info.notFreeAhandle = 1,
238
                      .info.ahandle = 0};
239
    // TODO check return value
240
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
10!
241
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
242
    }
243
  }
244
}
10✔
245

246
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
318✔
247
  SSdb *pSdb = pMnode->pSdb;
318✔
248

249
  void *pIter = NULL;
318✔
250
  while (1) {
802✔
251
    SVgObj *pVgroup = NULL;
1,120✔
252
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
1,120✔
253
    if (pIter == NULL) break;
1,120✔
254

255
    bool stateChanged = false;
802✔
256
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
2,265✔
257
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
1,755✔
258
      if (pGid->dnodeId == dnodeId) {
1,755✔
259
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
292✔
260
          mInfo(
55!
261
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:error restored:0 "
262
              "canRead:0",
263
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
264
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
55✔
265
          pGid->syncRestore = 0;
55✔
266
          pGid->syncCanRead = 0;
55✔
267
          pGid->startTimeMs = 0;
55✔
268
          stateChanged = true;
55✔
269
        }
270
        break;
292✔
271
      }
272
    }
273

274
    if (stateChanged) {
802✔
275
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
55✔
276
      if (pDb != NULL && pDb->stateTs != curMs) {
55!
277
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
32!
278
              curMs);
279
        pDb->stateTs = curMs;
32✔
280
      }
281
      mndReleaseDb(pMnode, pDb);
55✔
282
    }
283

284
    sdbRelease(pSdb, pVgroup);
802✔
285
  }
286
}
318✔
287

288
static void mndCheckDnodeOffline(SMnode *pMnode) {
10,819✔
289
  mTrace("check dnode offline");
10,819✔
290
  if (mndAcquireRpc(pMnode) != 0) return;
10,819!
291

292
  SSdb   *pSdb = pMnode->pSdb;
10,819✔
293
  int64_t curMs = taosGetTimestampMs();
10,819✔
294

295
  void *pIter = NULL;
10,819✔
296
  while (1) {
18,627✔
297
    SDnodeObj *pDnode = NULL;
29,446✔
298
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
29,446✔
299
    if (pIter == NULL) break;
29,446✔
300

301
    bool online = mndIsDnodeOnline(pDnode, curMs);
18,627✔
302
    if (!online) {
18,627✔
303
      mInfo("dnode:%d, in offline state", pDnode->id);
318!
304
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
318✔
305
    }
306

307
    sdbRelease(pSdb, pDnode);
18,627✔
308
  }
309

310
  mndReleaseRpc(pMnode);
10,819✔
311
}
312

313
static bool mnodeIsNotLeader(SMnode *pMnode) {
64,190✔
314
  terrno = 0;
64,190✔
315
  (void)taosThreadRwlockRdlock(&pMnode->lock);
64,190✔
316
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
64,190✔
317
  if (terrno != 0) {
64,190!
318
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
319
    return true;
×
320
  }
321

322
  if (state.state != TAOS_SYNC_STATE_LEADER) {
64,190✔
323
    (void)taosThreadRwlockUnlock(&pMnode->lock);
5,779✔
324
    terrno = TSDB_CODE_SYN_NOT_LEADER;
5,779✔
325
    return true;
5,779✔
326
  }
327
  if (!state.restored || !pMnode->restored) {
58,411!
328
    (void)taosThreadRwlockUnlock(&pMnode->lock);
4✔
329
    terrno = TSDB_CODE_SYN_RESTORING;
4✔
330
    return true;
4✔
331
  }
332
  (void)taosThreadRwlockUnlock(&pMnode->lock);
58,407✔
333
  return false;
58,407✔
334
}
335

336
static int32_t minCronTime() {
×
337
  int32_t min = INT32_MAX;
×
338
  min = TMIN(min, tsTtlPushIntervalSec);
×
339
  min = TMIN(min, tsTrimVDbIntervalSec);
×
340
  min = TMIN(min, tsSsAutoMigrateIntervalSec);
×
341
  min = TMIN(min, tsTransPullupInterval);
×
342
  min = TMIN(min, tsCompactPullupInterval);
×
343
  min = TMIN(min, tsMqRebalanceInterval);
×
344
  min = TMIN(min, tsArbHeartBeatIntervalSec);
×
345
  min = TMIN(min, tsArbCheckSyncIntervalSec);
×
346

347
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
×
348
  min = TMIN(min, telemInt);
×
349
  min = TMIN(min, tsGrantHBInterval);
×
350
  min = TMIN(min, tsUptimeInterval);
×
351

352
  return min <= 1 ? 2 : min;
×
353
}
354
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
58,407✔
355
  int32_t code = 0;
58,407✔
356
#ifndef TD_ASTRA
357
  if (sec % tsGrantHBInterval == 0) {  // put in the 1st place as to take effect ASAP
58,407✔
358
    mndPullupGrant(pMnode);
5,406✔
359
  }
360
  if (sec % tsTtlPushIntervalSec == 0) {
58,407✔
361
    mndPullupTtl(pMnode);
5,026✔
362
  }
363

364
  if (sec % tsTrimVDbIntervalSec == 0) {
58,407!
365
    mndPullupTrimDb(pMnode);
×
366
  }
367
#endif
368
#ifdef USE_SHARED_STORAGE
369
  if (tsSsEnabled) {
58,407!
370
    if (sec % 10 == 0) { // TODO: make 10 to be configurable
×
371
      mndPullupQuerySsMigrateProgress(pMnode);
×
372
    }
373
    if (tsSsEnabled == 2 && sec % tsSsAutoMigrateIntervalSec == 0) {
×
374
      mndPullupSsMigrateDb(pMnode);
×
375
    }
376
  }
377
#endif
378
  if (sec % tsTransPullupInterval == 0) {
58,407✔
379
    mndPullupTrans(pMnode);
28,701✔
380
  }
381

382
  if (sec % tsCompactPullupInterval == 0) {
58,407✔
383
    mndPullupCompacts(pMnode);
4,909✔
384
  }
385
#ifdef USE_TOPIC
386
  if (sec % tsMqRebalanceInterval == 0) {
58,407✔
387
    mndCalMqRebalance(pMnode);
28,646✔
388
  }
389
#endif
390
  if (tsTelemInterval > 0 && sec % tsTelemInterval == 0) {
58,407!
391
    mndPullupTelem(pMnode);
2✔
392
  }
393
  if (sec % tsUptimeInterval == 0) {
58,407✔
394
    mndIncreaseUpTime(pMnode);
10✔
395
  }
396
#ifndef TD_ASTRA
397
  if (sec % (tsArbHeartBeatIntervalSec) == 0) {
58,407✔
398
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
28,660✔
399
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
1!
400
    }
401
  }
402

403
  if (sec % (tsArbCheckSyncIntervalSec) == 0) {
58,407✔
404
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
18,735!
405
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
406
    }
407
  }
408
#endif
409
}
58,407✔
410

411
void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) {
58,407✔
412
  if (sec % (tsStatusInterval * 5) == 0) {
58,407✔
413
    mndCheckDnodeOffline(pMnode);
10,819✔
414
  }
415
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
58,407✔
416
    mndSyncCheckTimeout(pMnode);
1,292✔
417
  }
418
  if (!tsDisableStream && (sec % MND_STREAM_HEALTH_CHECK_PERIOD_SEC == 0)) {
58,407!
419
    msmHealthCheck(pMnode);
18,735✔
420
  }
421
}
58,407✔
422

423
static void *mndThreadFp(void *param) {
2,512✔
424
  SMnode *pMnode = param;
2,512✔
425
  int64_t lastTime = 0;
2,512✔
426
  setThreadName("mnode-timer");
2,512✔
427

428
  while (1) {
654,103✔
429
    lastTime++;
656,615✔
430
    taosMsleep(100);
656,615✔
431

432
    if (mndGetStop(pMnode)) break;
656,615✔
433
    if (lastTime % 10 != 0) continue;
654,103✔
434

435
    if (mnodeIsNotLeader(pMnode)) {
64,190✔
436
      mTrace("timer not process since mnode is not leader");
5,783!
437
      continue;
5,783✔
438
    }
439

440
    int64_t sec = lastTime / 10;
58,407✔
441
    mndDoTimerCheckTask(pMnode, sec);
58,407✔
442

443
    mndDoTimerPullupTask(pMnode, sec);
58,407✔
444
  }
445

446
  return NULL;
2,512✔
447
}
448

449
static int32_t mndInitTimer(SMnode *pMnode) {
2,512✔
450
  int32_t      code = 0;
2,512✔
451
  TdThreadAttr thAttr;
452
  (void)taosThreadAttrInit(&thAttr);
2,512✔
453
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
2,512✔
454
#ifdef TD_COMPACT_OS
455
  (void)taosThreadAttrSetStackSize(&thAttr, STACK_SIZE_SMALL);
456
#endif
457
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadFp, pMnode)) != 0) {
2,512!
458
    mError("failed to create timer thread since %s", tstrerror(code));
×
459
    TAOS_RETURN(code);
×
460
  }
461

462
  (void)taosThreadAttrDestroy(&thAttr);
2,512✔
463
  tmsgReportStartup("mnode-timer", "initialized");
2,512✔
464
  TAOS_RETURN(code);
2,512✔
465
}
466

467
static void mndCleanupTimer(SMnode *pMnode) {
2,512✔
468
  if (taosCheckPthreadValid(pMnode->thread)) {
2,512!
469
    (void)taosThreadJoin(pMnode->thread, NULL);
2,512✔
470
    taosThreadClear(&pMnode->thread);
2,512✔
471
  }
472
}
2,512✔
473

474
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
2,512✔
475
  int32_t code = 0;
2,512✔
476
  pMnode->path = taosStrdup(path);
2,512!
477
  if (pMnode->path == NULL) {
2,512!
478
    code = terrno;
×
479
    TAOS_RETURN(code);
×
480
  }
481

482
  if (taosMkDir(pMnode->path) != 0) {
2,512!
483
    code = terrno;
×
484
    TAOS_RETURN(code);
×
485
  }
486

487
  TAOS_RETURN(code);
2,512✔
488
}
489

490
static int32_t mndInitWal(SMnode *pMnode) {
2,512✔
491
  int32_t code = 0;
2,512✔
492
  char    path[PATH_MAX + 20] = {0};
2,512✔
493
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
2,512✔
494
  SWalCfg cfg = {.vgId = 1,
2,512✔
495
                 .fsyncPeriod = 0,
496
                 .rollPeriod = -1,
497
                 .segSize = -1,
498
                 .committed = -1,
499
                 .retentionPeriod = 0,
500
                 .retentionSize = 0,
501
                 .level = TAOS_WAL_FSYNC,
502
                 .encryptAlgorithm = 0,
503
                 .encryptKey = {0}};
504

505
#if defined(TD_ENTERPRISE) || defined(TD_ASTRA_TODO)
506
  if (tsiEncryptAlgorithm == DND_CA_SM4 && (tsiEncryptScope & DND_CS_MNODE_WAL) == DND_CS_MNODE_WAL) {
2,512!
507
    cfg.encryptAlgorithm = (tsiEncryptScope & DND_CS_MNODE_WAL) ? tsiEncryptAlgorithm : 0;
×
508
    if (tsEncryptKey[0] == '\0') {
×
509
      code = TSDB_CODE_DNODE_INVALID_ENCRYPTKEY;
×
510
      TAOS_RETURN(code);
×
511
    } else {
512
      tstrncpy(cfg.encryptKey, tsEncryptKey, ENCRYPT_KEY_LEN + 1);
×
513
    }
514
  }
515
#endif
516

517
  pMnode->pWal = walOpen(path, &cfg);
2,512✔
518
  if (pMnode->pWal == NULL) {
2,512!
519
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
520
    if (terrno != 0) code = terrno;
×
521
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
522
    TAOS_RETURN(code);
×
523
  }
524

525
  TAOS_RETURN(code);
2,512✔
526
}
527

528
static void mndCloseWal(SMnode *pMnode) {
2,512✔
529
  if (pMnode->pWal != NULL) {
2,512!
530
    walClose(pMnode->pWal);
2,512✔
531
    pMnode->pWal = NULL;
2,512✔
532
  }
533
}
2,512✔
534

535
static int32_t mndInitSdb(SMnode *pMnode) {
2,512✔
536
  int32_t code = 0;
2,512✔
537
  SSdbOpt opt = {0};
2,512✔
538
  opt.path = pMnode->path;
2,512✔
539
  opt.pMnode = pMnode;
2,512✔
540
  opt.pWal = pMnode->pWal;
2,512✔
541

542
  pMnode->pSdb = sdbInit(&opt);
2,512✔
543
  if (pMnode->pSdb == NULL) {
2,512!
544
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
545
    if (terrno != 0) code = terrno;
×
546
    TAOS_RETURN(code);
×
547
  }
548

549
  TAOS_RETURN(code);
2,512✔
550
}
551

552
static int32_t mndOpenSdb(SMnode *pMnode) {
2,512✔
553
  int32_t code = 0;
2,512✔
554
  if (!pMnode->deploy) {
2,512✔
555
    code = sdbReadFile(pMnode->pSdb);
642✔
556
  }
557

558
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
2,512!
559

560
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
2,512✔
561
  return code;
2,512✔
562
}
563

564
static void mndCleanupSdb(SMnode *pMnode) {
2,512✔
565
  if (pMnode->pSdb) {
2,512!
566
    sdbCleanup(pMnode->pSdb);
2,512✔
567
    pMnode->pSdb = NULL;
2,512✔
568
  }
569
}
2,512✔
570

571
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
100,480✔
572
  SMnodeStep step = {0};
100,480✔
573
  step.name = name;
100,480✔
574
  step.initFp = initFp;
100,480✔
575
  step.cleanupFp = cleanupFp;
100,480✔
576
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
200,960!
577
    TAOS_RETURN(terrno);
×
578
  }
579

580
  TAOS_RETURN(0);
100,480✔
581
}
582

583
static int32_t mndInitSteps(SMnode *pMnode) {
2,512✔
584
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
2,512!
585
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
2,512!
586
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
2,512!
587
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
2,512!
588
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
2,512!
589
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
2,512!
590
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
2,512!
591
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
2,512!
592
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-bnode", mndInitBnode, mndCleanupBnode));
2,512!
593
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
2,512!
594
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
2,512!
595
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
2,512!
596
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
2,512!
597
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
2,512!
598
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
2,512!
599
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
2,512!
600
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
2,512!
601
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
2,512!
602
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
2,512!
603
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
2,512!
604
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
2,512!
605
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
2,512!
606
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
2,512!
607
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
2,512!
608
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
2,512!
609
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
2,512!
610
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
2,512!
611
#ifdef USE_MOUNT
612
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount", mndInitMount, mndCleanupMount));
2,512!
613
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount-log", mndInitMountLog, mndCleanupMountLog));
2,512!
614
#endif
615
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
2,512!
616
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
2,512!
617
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
2,512!
618
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
2,512!
619
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-ssmigrate", mndInitSsMigrate, mndCleanupSsMigrate));
2,512!
620
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
2,512!
621
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
2,512!
622
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
2,512!
623
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
2,512!
624
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
2,512!
625
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
2,512!
626
  return 0;
2,512✔
627
}
628

629
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
2,512✔
630
  if (pMnode->pSteps == NULL) return;
2,512!
631

632
  if (pos == -1) {
2,512!
633
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
2,512✔
634
  }
635

636
  for (int32_t s = pos; s >= 0; s--) {
102,992✔
637
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
100,480✔
638
    mInfo("%s will cleanup", pStep->name);
100,480!
639
    if (pStep->cleanupFp != NULL) {
100,480✔
640
      (*pStep->cleanupFp)(pMnode);
95,456✔
641
    }
642
  }
643

644
  taosArrayClear(pMnode->pSteps);
2,512✔
645
  taosArrayDestroy(pMnode->pSteps);
2,512✔
646
  pMnode->pSteps = NULL;
2,512✔
647
}
648

649
static int32_t mndExecSteps(SMnode *pMnode) {
2,512✔
650
  int32_t code = 0;
2,512✔
651
  int32_t size = taosArrayGetSize(pMnode->pSteps);
2,512✔
652
  for (int32_t pos = 0; pos < size; pos++) {
102,992✔
653
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
100,480✔
654
    if (pStep->initFp == NULL) continue;
100,480!
655

656
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
100,480!
657
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
658
      mndCleanupSteps(pMnode, pos);
×
659
      TAOS_RETURN(code);
×
660
    } else {
661
      mInfo("%s is initialized", pStep->name);
100,480!
662
      tmsgReportStartup(pStep->name, "initialized");
100,480✔
663
    }
664
  }
665

666
  pMnode->clusterId = mndGetClusterId(pMnode);
2,512✔
667
  TAOS_RETURN(0);
2,512✔
668
}
669

670
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
2,512✔
671
  pMnode->msgCb = pOption->msgCb;
2,512✔
672
  pMnode->selfDnodeId = pOption->dnodeId;
2,512✔
673
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
2,512✔
674
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
2,512✔
675
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
2,512✔
676
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
2,512✔
677
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
2,512✔
678
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
2,512✔
679
}
2,512✔
680

681
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
2,512✔
682
  terrno = 0;
2,512✔
683
  mInfo("start to open mnode in %s", path);
2,512!
684

685
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
2,512!
686
  if (pMnode == NULL) {
2,512!
687
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
688
    mError("failed to open mnode since %s", terrstr());
×
689
    return NULL;
×
690
  }
691
  (void)memset(pMnode, 0, sizeof(SMnode));
2,512✔
692

693
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
2,512✔
694
  if (code != 0) {
2,512!
695
    taosMemoryFree(pMnode);
×
696
    mError("failed to open mnode lock since %s", tstrerror(code));
×
697
    return NULL;
×
698
  }
699

700
  char timestr[24] = "1970-01-01 00:00:00.00";
2,512✔
701
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, NULL);
2,512✔
702
  if (code < 0) {
2,512!
703
    mError("failed to parse time since %s", tstrerror(code));
×
704
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
705
    taosMemoryFree(pMnode);
×
706
    return NULL;
×
707
  }
708
  mndSetOptions(pMnode, pOption);
2,512✔
709

710
  pMnode->deploy = pOption->deploy;
2,512✔
711
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
2,512✔
712
  if (pMnode->pSteps == NULL) {
2,512!
713
    taosMemoryFree(pMnode);
×
714
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
715
    mError("failed to open mnode since %s", terrstr());
×
716
    return NULL;
×
717
  }
718

719
  code = mndCreateDir(pMnode, path);
2,512✔
720
  if (code != 0) {
2,512!
721
    code = terrno;
×
722
    mError("failed to open mnode since %s", tstrerror(code));
×
723
    mndClose(pMnode);
×
724
    terrno = code;
×
725
    return NULL;
×
726
  }
727

728
  code = mndInitSteps(pMnode);
2,512✔
729
  if (code != 0) {
2,512!
730
    code = terrno;
×
731
    mError("failed to open mnode since %s", tstrerror(code));
×
732
    mndClose(pMnode);
×
733
    terrno = code;
×
734
    return NULL;
×
735
  }
736

737
  code = mndExecSteps(pMnode);
2,512✔
738
  if (code != 0) {
2,512!
739
    code = terrno;
×
740
    mError("failed to open mnode since %s", tstrerror(code));
×
741
    mndClose(pMnode);
×
742
    terrno = code;
×
743
    return NULL;
×
744
  }
745

746
  mInfo("mnode open successfully");
2,512!
747
  return pMnode;
2,512✔
748
}
749

750
void mndPreClose(SMnode *pMnode) {
2,512✔
751
  if (pMnode != NULL) {
2,512!
752
    int32_t code = 0;
2,512✔
753
    // TODO check return value
754
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
2,512✔
755
    if (code < 0) {
2,512!
756
      mError("failed to transfer leader since %s", tstrerror(code));
×
757
    }
758
    syncPreStop(pMnode->syncMgmt.sync);
2,512✔
759
    code = sdbWriteFile(pMnode->pSdb, 0);
2,512✔
760
    if (code < 0) {
2,512!
761
      mError("failed to write sdb since %s", tstrerror(code));
×
762
    }
763
  }
764
}
2,512✔
765

766
void mndClose(SMnode *pMnode) {
2,512✔
767
  if (pMnode != NULL) {
2,512!
768
    mInfo("start to close mnode");
2,512!
769
    mndCleanupSteps(pMnode, -1);
2,512✔
770
    taosMemoryFreeClear(pMnode->path);
2,512!
771
    taosMemoryFreeClear(pMnode);
2,512!
772
    mInfo("mnode is closed");
2,512!
773
  }
774
}
2,512✔
775

776
int32_t mndStart(SMnode *pMnode) {
2,512✔
777
  mndSyncStart(pMnode);
2,512✔
778
  if (pMnode->deploy) {
2,512✔
779
    if (sdbDeploy(pMnode->pSdb) != 0) {
1,870!
780
      mError("failed to deploy sdb while start mnode");
×
781
      return -1;
×
782
    }
783
    mndSetRestored(pMnode, true);
1,870✔
784
  }
785
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
2,512✔
786

787
  return mndInitTimer(pMnode);
2,512✔
788
}
789

790
int32_t mndIsCatchUp(SMnode *pMnode) {
1,163✔
791
  int64_t rid = pMnode->syncMgmt.sync;
1,163✔
792
  return syncIsCatchUp(rid);
1,163✔
793
}
794

795
ESyncRole mndGetRole(SMnode *pMnode) {
1,163✔
796
  int64_t rid = pMnode->syncMgmt.sync;
1,163✔
797
  return syncGetRole(rid);
1,163✔
798
}
799

800
int64_t mndGetTerm(SMnode *pMnode) {
18,866✔
801
  int64_t rid = pMnode->syncMgmt.sync;
18,866✔
802
  return syncGetTerm(rid);
18,866✔
803
}
804

805
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
47,518✔
806

807
void mndStop(SMnode *pMnode) {
2,512✔
808
  mndSetStop(pMnode);
2,512✔
809
  mndSyncStop(pMnode);
2,512✔
810
  mndCleanupTimer(pMnode);
2,512✔
811
}
2,512✔
812

813
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
189,330✔
814
  SMnode    *pMnode = pMsg->info.node;
189,330✔
815
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
189,330✔
816

817
  const STraceId *trace = &pMsg->info.traceId;
189,330✔
818
  mGTrace("vgId:1, process sync msg:%p, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
189,330!
819

820
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
189,330✔
821
  if (code != 0) {
189,330✔
822
    mGError("vgId:1, failed to process sync msg:%p type:%s since %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
7!
823
            tstrerror(code), code);
824
  }
825

826
  return code;
189,330✔
827
}
828

829
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
671,000✔
830
  int32_t code = 0;
671,000✔
831
  if (!IsReq(pMsg)) TAOS_RETURN(code);
671,000✔
832
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
590,719✔
833
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
571,046!
834
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
557,073✔
835
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
517,913✔
836
    TAOS_RETURN(code);
72,817✔
837
  }
838

839
  SMnode *pMnode = pMsg->info.node;
517,902✔
840
  (void)taosThreadRwlockRdlock(&pMnode->lock);
517,902✔
841
  if (pMnode->stopped) {
518,407✔
842
    (void)taosThreadRwlockUnlock(&pMnode->lock);
52✔
843
    code = TSDB_CODE_APP_IS_STOPPING;
52✔
844
    TAOS_RETURN(code);
52✔
845
  }
846

847
  terrno = 0;
518,355✔
848
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
518,040✔
849
  if (terrno != 0) {
518,420!
850
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
851
    code = terrno;
×
852
    TAOS_RETURN(code);
×
853
  }
854

855
  if (state.state != TAOS_SYNC_STATE_LEADER) {
518,417✔
856
    (void)taosThreadRwlockUnlock(&pMnode->lock);
17,065✔
857
    code = TSDB_CODE_SYN_NOT_LEADER;
17,063✔
858
    goto _OVER;
17,063✔
859
  }
860

861
  if (!state.restored || !pMnode->restored) {
501,352✔
862
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,129✔
863
    code = TSDB_CODE_SYN_RESTORING;
1,122✔
864
    goto _OVER;
1,122✔
865
  }
866

867
#if 1
868
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
500,223✔
869
#else
870
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
871
  mTrace("mnode rpc is acquired, ref:%d", ref);
872
#endif
873

874
  (void)taosThreadRwlockUnlock(&pMnode->lock);
500,236✔
875
  TAOS_RETURN(code);
500,216✔
876

877
_OVER:
18,185✔
878
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
18,185!
879
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
18,182!
880
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
18,179!
881
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
18,180!
882
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
18,179!
883
      pMsg->msgType == TDMT_MND_SSMIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
18,178!
884
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER || pMsg->msgType == TDMT_MND_CHECK_STREAM_TIMER ||
18,178!
885
      pMsg->msgType == TDMT_MND_QUERY_SSMIGRATE_PROGRESS_TIMER) {
18,178!
886
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
7!
887
           pMnode->stopped, state.restored, syncStr(state.state));
888
    TAOS_RETURN(code);
7✔
889
  }
890

891
  const STraceId *trace = &pMsg->info.traceId;
18,178✔
892
  SEpSet          epSet = {0};
18,178✔
893
  mndGetMnodeEpSet(pMnode, &epSet);
18,178✔
894

895
  mGDebug(
18,178!
896
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
897
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
898
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
899
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
900

901
  if (epSet.numOfEps <= 0) return -1;
18,178!
902

903
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
68,681✔
904
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
50,503✔
905
  }
906

907
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
18,178✔
908
  pMsg->info.rsp = rpcMallocCont(contLen);
18,177✔
909
  if (pMsg->info.rsp != NULL) {
18,178✔
910
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
18,177!
911
      mError("failed to serialize ep set");
×
912
    }
913
    pMsg->info.hasEpSet = 1;
18,178✔
914
    pMsg->info.rspLen = contLen;
18,178✔
915
  }
916

917
  TAOS_RETURN(code);
18,179✔
918
}
919

920
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
670,965✔
921
  SMnode         *pMnode = pMsg->info.node;
670,965✔
922
  const STraceId *trace = &pMsg->info.traceId;
670,965✔
923
  int32_t         code = TSDB_CODE_SUCCESS;
670,965✔
924

925
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
670,965✔
926
  MndMsgFpExt fpExt = NULL;
670,965✔
927
  if (fp == NULL) {
670,965✔
928
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
73,025✔
929
    if (fpExt == NULL) {
73,025!
930
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
931
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
932
      TAOS_RETURN(code);
×
933
    }
934
  }
935

936
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
670,965✔
937

938
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
653,471!
939
  if (fp)
653,471✔
940
    code = (*fp)(pMsg);
580,447✔
941
  else
942
    code = (*fpExt)(pMsg, pQueueInfo);
73,024✔
943
  mndReleaseRpc(pMnode);
653,511✔
944

945
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
653,541✔
946
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
102,072!
947
  } else if (code == 0) {
551,469✔
948
    mGTrace("msg:%p, successfully processed", pMsg);
543,632!
949
  } else {
950
    // TODO removve this wrong set code
951
    if (code == -1) {
7,837✔
952
      code = terrno;
13✔
953
    }
954
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
7,837!
955
            TMSG_INFO(pMsg->msgType));
956
  }
957

958
  TAOS_RETURN(code);
653,541✔
959
}
960

961
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
427,040✔
962
  tmsg_t type = TMSG_INDEX(msgType);
427,040✔
963
  if (type < TDMT_MAX) {
427,040!
964
    pMnode->msgFp[type] = fp;
427,040✔
965
  }
966
}
427,040✔
967

968
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
20,096✔
969
  tmsg_t type = TMSG_INDEX(msgType);
20,096✔
970
  if (type < TDMT_MAX) {
20,096!
971
    pMnode->msgFpExt[type] = fp;
20,096✔
972
  }
973
}
20,096✔
974

975
// Note: uid 0 is reserved
976
int64_t mndGenerateUid(const char *name, int32_t len) {
35,144✔
977
  int32_t hashval = MurmurHash3_32(name, len);
35,144✔
978
  do {
×
979
    int64_t us = taosGetTimestampUs();
35,144✔
980
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
35,144✔
981
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
35,144✔
982
    if (uuid) {
35,144!
983
      return llabs(uuid);
35,144✔
984
    }
985
  } while (true);
986
}
987

988
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
16✔
989
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
990
  int32_t code = mndAcquireRpc(pMnode);
16✔
991
  if (code < 0) {
16!
992
    TAOS_RETURN(code);
×
993
  } else if (code == 1) {
16!
994
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
995
  }
996

997
  SSdb   *pSdb = pMnode->pSdb;
16✔
998
  int64_t ms = taosGetTimestampMs();
16✔
999

1000
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
16✔
1001
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
16✔
1002
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
16✔
1003
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
16✔
1004
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
16!
1005
      pStbInfo->stbs == NULL) {
16!
1006
    mndReleaseRpc(pMnode);
×
1007
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1008
    if (terrno != 0) code = terrno;
×
1009
    TAOS_RETURN(code);
×
1010
  }
1011

1012
  // cluster info
1013
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
16✔
1014
  pClusterInfo->monitor_interval = tsMonitorInterval;
16✔
1015
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
16✔
1016
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
16✔
1017
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
16✔
1018
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
16✔
1019
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
16✔
1020

1021
  void *pIter = NULL;
16✔
1022
  while (1) {
16✔
1023
    SDnodeObj *pObj = NULL;
32✔
1024
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
32✔
1025
    if (pIter == NULL) break;
32✔
1026

1027
    SMonDnodeDesc desc = {0};
16✔
1028
    desc.dnode_id = pObj->id;
16✔
1029
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
16✔
1030
    if (mndIsDnodeOnline(pObj, ms)) {
16✔
1031
      tstrncpy(desc.status, "ready", sizeof(desc.status));
15✔
1032
    } else {
1033
      tstrncpy(desc.status, "offline", sizeof(desc.status));
1✔
1034
    }
1035
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
32!
1036
      mError("failed put dnode into array, but continue at this monitor report")
×
1037
    }
1038
    sdbRelease(pSdb, pObj);
16✔
1039
  }
1040

1041
  pIter = NULL;
16✔
1042
  while (1) {
16✔
1043
    SMnodeObj *pObj = NULL;
32✔
1044
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
32✔
1045
    if (pIter == NULL) break;
32✔
1046

1047
    SMonMnodeDesc desc = {0};
16✔
1048
    desc.mnode_id = pObj->id;
16✔
1049
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
16✔
1050

1051
    if (pObj->id == pMnode->selfDnodeId) {
16!
1052
      pClusterInfo->first_ep_dnode_id = pObj->id;
16✔
1053
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
16✔
1054
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1055
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
16✔
1056
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1057
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
16✔
1058
      desc.syncState = TAOS_SYNC_STATE_LEADER;
16✔
1059
    } else {
1060
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1061
      desc.syncState = pObj->syncState;
×
1062
    }
1063
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
32!
1064
      mError("failed to put mnode into array, but continue at this monitor report");
×
1065
    }
1066
    sdbRelease(pSdb, pObj);
16✔
1067
  }
1068

1069
  // vgroup info
1070
  pIter = NULL;
16✔
1071
  while (1) {
34✔
1072
    SVgObj *pVgroup = NULL;
50✔
1073
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
50✔
1074
    if (pIter == NULL) break;
50✔
1075

1076
    if (pVgroup->mountVgId) {
34!
1077
      sdbRelease(pSdb, pVgroup);
×
1078
      continue;
×
1079
    }
1080

1081
    pClusterInfo->vgroups_total++;
34✔
1082
    pClusterInfo->tbs_total += pVgroup->numOfTables;
34✔
1083

1084
    SMonVgroupDesc desc = {0};
34✔
1085
    desc.vgroup_id = pVgroup->vgId;
34✔
1086

1087
    SName name = {0};
34✔
1088
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
34✔
1089
    if (code < 0) {
34!
1090
      mError("failed to get db name since %s", tstrerror(code));
×
1091
      sdbRelease(pSdb, pVgroup);
×
1092
      TAOS_RETURN(code);
×
1093
    }
1094
    (void)tNameGetDbName(&name, desc.database_name);
34✔
1095

1096
    desc.tables_num = pVgroup->numOfTables;
34✔
1097
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
34✔
1098
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
34✔
1099
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
68✔
1100
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
34✔
1101
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
34✔
1102
      pVnDesc->dnode_id = pVgid->dnodeId;
34✔
1103
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
34✔
1104
      pVnDesc->syncState = pVgid->syncState;
34✔
1105
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
34!
1106
        tstrncpy(desc.status, "ready", sizeof(desc.status));
34✔
1107
        pClusterInfo->vgroups_alive++;
34✔
1108
      }
1109
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
34!
1110
        pClusterInfo->vnodes_alive++;
34✔
1111
      }
1112
      pClusterInfo->vnodes_total++;
34✔
1113
    }
1114

1115
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
68!
1116
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1117
    }
1118
    sdbRelease(pSdb, pVgroup);
34✔
1119
  }
1120

1121
  // stb info
1122
  pIter = NULL;
16✔
1123
  while (1) {
15✔
1124
    SStbObj *pStb = NULL;
31✔
1125
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
31✔
1126
    if (pIter == NULL) break;
31✔
1127

1128
    SMonStbDesc desc = {0};
15✔
1129

1130
    SName name1 = {0};
15✔
1131
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
15✔
1132
    if (code < 0) {
15!
1133
      mError("failed to get db name since %s", tstrerror(code));
×
1134
      sdbRelease(pSdb, pStb);
×
1135
      TAOS_RETURN(code);
×
1136
    }
1137
    (void)tNameGetDbName(&name1, desc.database_name);
15✔
1138

1139
    SName name2 = {0};
15✔
1140
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
15✔
1141
    if (code < 0) {
15!
1142
      mError("failed to get table name since %s", tstrerror(code));
×
1143
      sdbRelease(pSdb, pStb);
×
1144
      TAOS_RETURN(code);
×
1145
    }
1146
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
15✔
1147

1148
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
30!
1149
      mError("failed to put stb into array, but continue at this monitor report");
×
1150
    }
1151
    sdbRelease(pSdb, pStb);
15✔
1152
  }
1153

1154
  // grant info
1155
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
16✔
1156
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
16✔
1157
  if (pMnode->grant.expireTimeMS == 0) {
16!
1158
    pGrantInfo->expire_time = 0;
×
1159
    pGrantInfo->timeseries_total = 0;
×
1160
  }
1161

1162
  mndReleaseRpc(pMnode);
16✔
1163
  TAOS_RETURN(code);
16✔
1164
}
1165

1166
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
64,196✔
1167
  mTrace("mnode get load");
64,196✔
1168
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
64,196✔
1169
  pLoad->syncState = state.state;
64,196✔
1170
  pLoad->syncRestore = state.restored;
64,196✔
1171
  pLoad->syncTerm = state.term;
64,196✔
1172
  pLoad->roleTimeMs = state.roleTimeMs;
64,196✔
1173
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
64,196✔
1174
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1175
  return 0;
64,196✔
1176
}
1177

1178
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
18,735✔
1179
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
18,735✔
1180
  return state.roleTimeMs;
18,735✔
1181
}
1182

1183
void mndSetRestored(SMnode *pMnode, bool restored) {
2,512✔
1184
  if (restored) {
2,512!
1185
    (void)taosThreadRwlockWrlock(&pMnode->lock);
2,512✔
1186
    pMnode->restored = true;
2,512✔
1187
    (void)taosThreadRwlockUnlock(&pMnode->lock);
2,512✔
1188
    mInfo("mnode set restored:%d", restored);
2,512!
1189
  } else {
1190
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1191
    pMnode->restored = false;
×
1192
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1193
    mInfo("mnode set restored:%d", restored);
×
1194
    while (1) {
1195
      if (pMnode->rpcRef <= 0) break;
×
1196
      taosMsleep(3);
×
1197
    }
1198
  }
1199
}
2,512✔
1200

1201
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1202

1203
void mndSetStop(SMnode *pMnode) {
2,512✔
1204
  (void)taosThreadRwlockWrlock(&pMnode->lock);
2,512✔
1205
  pMnode->stopped = true;
2,512✔
1206
  (void)taosThreadRwlockUnlock(&pMnode->lock);
2,512✔
1207
  mInfo("mnode set stopped");
2,512!
1208
}
2,512✔
1209

1210
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
656,615✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc