• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #4488

12 Jul 2025 07:47AM UTC coverage: 62.207% (-0.7%) from 62.948%
#4488

push

travis-ci

web-flow
docs: update stream docs (#31822)

157961 of 324087 branches covered (48.74%)

Branch coverage included in aggregate %.

244465 of 322830 relevant lines covered (75.73%)

6561668.76 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

68.53
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndCluster.h"
21
#include "mndCompact.h"
22
#include "mndCompactDetail.h"
23
#include "mndConfig.h"
24
#include "mndConsumer.h"
25
#include "mndDb.h"
26
#include "mndDnode.h"
27
#include "mndFunc.h"
28
#include "mndGrant.h"
29
#include "mndIndex.h"
30
#include "mndInfoSchema.h"
31
#include "mndMnode.h"
32
#include "mndMount.h"
33
#include "mndPerfSchema.h"
34
#include "mndPrivilege.h"
35
#include "mndProfile.h"
36
#include "mndQnode.h"
37
#include "mndQuery.h"
38
#include "mndShow.h"
39
#include "mndSma.h"
40
#include "mndSnode.h"
41
#include "mndStb.h"
42
#include "mndStream.h"
43
#include "mndSubscribe.h"
44
#include "mndSync.h"
45
#include "mndTelem.h"
46
#include "mndTopic.h"
47
#include "mndTrans.h"
48
#include "mndUser.h"
49
#include "mndVgroup.h"
50
#include "mndView.h"
51
#include "mndBnode.h"
52

53
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
11,183✔
54
  int32_t code = 0;
11,183✔
55
  (void)taosThreadRwlockRdlock(&pMnode->lock);
11,183✔
56
  if (pMnode->stopped) {
11,183!
57
    code = TSDB_CODE_APP_IS_STOPPING;
×
58
  } else if (!mndIsLeader(pMnode)) {
11,183!
59
    code = 1;
×
60
  } else {
61
#if 1
62
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
11,183✔
63
#else
64
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
65
    mTrace("mnode rpc is acquired, ref:%d", ref);
66
#endif
67
  }
68
  (void)taosThreadRwlockUnlock(&pMnode->lock);
11,183✔
69
  TAOS_RETURN(code);
11,183✔
70
}
71

72
static inline void mndReleaseRpc(SMnode *pMnode) {
703,518✔
73
  (void)taosThreadRwlockRdlock(&pMnode->lock);
703,518✔
74
#if 1
75
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
703,530✔
76
#else
77
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
78
  mTrace("mnode rpc is released, ref:%d", ref);
79
#endif
80
  (void)taosThreadRwlockUnlock(&pMnode->lock);
703,530✔
81
}
703,531✔
82

83
static void *mndBuildTimerMsg(int32_t *pContLen) {
126,507✔
84
  terrno = 0;
126,507✔
85
  SMTimerReq timerReq = {0};
126,507✔
86

87
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
126,507✔
88
  if (contLen <= 0) return NULL;
126,507!
89
  void *pReq = rpcMallocCont(contLen);
126,507✔
90
  if (pReq == NULL) return NULL;
126,507!
91

92
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
126,507!
93
    mError("failed to serialize timer msg since %s", terrstr());
×
94
  }
95
  *pContLen = contLen;
126,507✔
96
  return pReq;
126,507✔
97
}
98

99
static void mndPullupTrans(SMnode *pMnode) {
30,316✔
100
  mTrace("pullup trans msg");
30,316✔
101
  int32_t contLen = 0;
30,316✔
102
  void   *pReq = mndBuildTimerMsg(&contLen);
30,316✔
103
  if (pReq != NULL) {
30,316!
104
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
30,316✔
105
    // TODO check return value
106
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
30,316!
107
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
108
    }
109
  }
110
}
30,316✔
111

112
static void mndPullupCompacts(SMnode *pMnode) {
5,086✔
113
  mTrace("pullup compact timer msg");
5,086✔
114
  int32_t contLen = 0;
5,086✔
115
  void   *pReq = mndBuildTimerMsg(&contLen);
5,086✔
116
  if (pReq != NULL) {
5,086!
117
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
5,086✔
118
    // TODO check return value
119
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,086!
120
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
121
    }
122
  }
123
}
5,086✔
124

125
static void mndPullupTtl(SMnode *pMnode) {
5,674✔
126
  mTrace("pullup ttl");
5,674✔
127
  int32_t contLen = 0;
5,674✔
128
  void   *pReq = mndBuildTimerMsg(&contLen);
5,674✔
129
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
5,674✔
130
  // TODO check return value
131
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,674!
132
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
133
  }
134
}
5,674✔
135

136
static void mndPullupTrimDb(SMnode *pMnode) {
×
137
  mTrace("pullup s3migrate");
×
138
  int32_t contLen = 0;
×
139
  void   *pReq = mndBuildTimerMsg(&contLen);
×
140
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
141
  // TODO check return value
142
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
143
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
144
  }
145
}
×
146

147
static void mndPullupS3MigrateDb(SMnode *pMnode) {
×
148
  mTrace("pullup trim");
×
149
  int32_t contLen = 0;
×
150
  void   *pReq = mndBuildTimerMsg(&contLen);
×
151
  // TODO check return value
152
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_S3MIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
153
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
154
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
155
  }
156
}
×
157

158
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
29,461✔
159
  mTrace("pullup arb hb");
29,461✔
160
  int32_t contLen = 0;
29,461✔
161
  void   *pReq = mndBuildTimerMsg(&contLen);
29,461✔
162
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
29,461✔
163
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
29,461✔
164
}
165

166
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
19,317✔
167
  mTrace("pullup arb sync");
19,317✔
168
  int32_t contLen = 0;
19,317✔
169
  void   *pReq = mndBuildTimerMsg(&contLen);
19,317✔
170
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
19,317✔
171
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
19,317✔
172
}
173

174
static void mndCalMqRebalance(SMnode *pMnode) {
29,460✔
175
  int32_t contLen = 0;
29,460✔
176
  void   *pReq = mndBuildTimerMsg(&contLen);
29,460✔
177
  if (pReq != NULL) {
29,460!
178
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
29,460✔
179
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
29,460!
180
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
181
    }
182
  }
183
}
29,460✔
184

185
static void mndStreamCheckpointTimer(SMnode *pMnode) {
1,357✔
186
  SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg));
1,357✔
187
  if (pMsg != NULL) {
1,357!
188
    int32_t size = sizeof(SMStreamDoCheckpointMsg);
1,357✔
189
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size};
1,357✔
190
    // TODO check return value
191
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1,357!
192
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
193
    }
194
  }
195
}
1,357✔
196

197
static void mndStreamCheckNode(SMnode *pMnode) {
37✔
198
  int32_t contLen = 0;
37✔
199
  void   *pReq = mndBuildTimerMsg(&contLen);
37✔
200
  if (pReq != NULL) {
37!
201
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_NODECHECK_TIMER, .pCont = pReq, .contLen = contLen};
37✔
202
    // TODO check return value
203
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
37!
204
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
205
    }
206
  }
207
}
37✔
208

209
static void mndStreamCheckStatus(SMnode *pMnode) {
1✔
210
  int32_t contLen = 0;
1✔
211
  void   *pReq = mndBuildTimerMsg(&contLen);
1✔
212
  if (pReq != NULL) {
1!
213
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_CHECK_STREAM_TIMER, .pCont = pReq, .contLen = contLen};
1✔
214
    // TODO check return value
215
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1!
216
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
217
    }
218
  }
219
}
1✔
220

221
static void mndStreamConsensusChkpt(SMnode *pMnode) {
1,357✔
222
  int32_t contLen = 0;
1,357✔
223
  void   *pReq = mndBuildTimerMsg(&contLen);
1,357✔
224
  if (pReq != NULL) {
1,357!
225
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CONSEN_TIMER, .pCont = pReq, .contLen = contLen};
1,357✔
226
    // TODO check return value
227
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1,357!
228
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
229
    }
230
  }
231
}
1,357✔
232

233
static void mndPullupTelem(SMnode *pMnode) {
2✔
234
  mTrace("pullup telem msg");
2!
235
  int32_t contLen = 0;
2✔
236
  void   *pReq = mndBuildTimerMsg(&contLen);
2✔
237
  if (pReq != NULL) {
2!
238
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
2✔
239
    // TODO check return value
240
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
2!
241
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
242
    }
243
  }
244
}
2✔
245

246
static void mndPullupGrant(SMnode *pMnode) {
5,778✔
247
  mTrace("pullup grant msg");
5,778✔
248
  int32_t contLen = 0;
5,778✔
249
  void   *pReq = mndBuildTimerMsg(&contLen);
5,778✔
250
  if (pReq != NULL) {
5,778!
251
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
5,778✔
252
                      .pCont = pReq,
253
                      .contLen = contLen,
254
                      .info.notFreeAhandle = 1,
255
                      .info.ahandle = 0};
256
    // TODO check return value
257
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,778!
258
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
259
    }
260
  }
261
}
5,778✔
262

263
static void mndIncreaseUpTime(SMnode *pMnode) {
18✔
264
  mTrace("increate uptime");
18!
265
  int32_t contLen = 0;
18✔
266
  void   *pReq = mndBuildTimerMsg(&contLen);
18✔
267
  if (pReq != NULL) {
18!
268
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
18✔
269
                      .pCont = pReq,
270
                      .contLen = contLen,
271
                      .info.notFreeAhandle = 1,
272
                      .info.ahandle = 0};
273
    // TODO check return value
274
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
18!
275
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
276
    }
277
  }
278
}
18✔
279

280
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
640✔
281
  SSdb *pSdb = pMnode->pSdb;
640✔
282

283
  void *pIter = NULL;
640✔
284
  while (1) {
2,151✔
285
    SVgObj *pVgroup = NULL;
2,791✔
286
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
2,791✔
287
    if (pIter == NULL) break;
2,791✔
288

289
    bool stateChanged = false;
2,151✔
290
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
5,535✔
291
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
4,112✔
292
      if (pGid->dnodeId == dnodeId) {
4,112✔
293
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
728✔
294
          mInfo(
319!
295
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:error restored:0 "
296
              "canRead:0",
297
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
298
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
319✔
299
          pGid->syncRestore = 0;
319✔
300
          pGid->syncCanRead = 0;
319✔
301
          pGid->startTimeMs = 0;
319✔
302
          stateChanged = true;
319✔
303
        }
304
        break;
728✔
305
      }
306
    }
307

308
    if (stateChanged) {
2,151✔
309
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
319✔
310
      if (pDb != NULL && pDb->stateTs != curMs) {
319!
311
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
204!
312
              curMs);
313
        pDb->stateTs = curMs;
204✔
314
      }
315
      mndReleaseDb(pMnode, pDb);
319✔
316
    }
317

318
    sdbRelease(pSdb, pVgroup);
2,151✔
319
  }
320
}
640✔
321

322
static void mndCheckDnodeOffline(SMnode *pMnode) {
11,167✔
323
  mTrace("check dnode offline");
11,167✔
324
  if (mndAcquireRpc(pMnode) != 0) return;
11,167!
325

326
  SSdb   *pSdb = pMnode->pSdb;
11,167✔
327
  int64_t curMs = taosGetTimestampMs();
11,167✔
328

329
  void *pIter = NULL;
11,167✔
330
  while (1) {
20,801✔
331
    SDnodeObj *pDnode = NULL;
31,968✔
332
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
31,968✔
333
    if (pIter == NULL) break;
31,968✔
334

335
    bool online = mndIsDnodeOnline(pDnode, curMs);
20,801✔
336
    if (!online) {
20,801✔
337
      mInfo("dnode:%d, in offline state", pDnode->id);
640!
338
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
640✔
339
    }
340

341
    sdbRelease(pSdb, pDnode);
20,801✔
342
  }
343

344
  mndReleaseRpc(pMnode);
11,167✔
345
}
346

347
static bool mnodeIsNotLeader(SMnode *pMnode) {
65,033✔
348
  terrno = 0;
65,033✔
349
  (void)taosThreadRwlockRdlock(&pMnode->lock);
65,033✔
350
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
65,033✔
351
  if (terrno != 0) {
65,033!
352
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
353
    return true;
×
354
  }
355

356
  if (state.state != TAOS_SYNC_STATE_LEADER) {
65,033✔
357
    (void)taosThreadRwlockUnlock(&pMnode->lock);
4,935✔
358
    terrno = TSDB_CODE_SYN_NOT_LEADER;
4,935✔
359
    return true;
4,935✔
360
  }
361
  if (!state.restored || !pMnode->restored) {
60,098!
362
    (void)taosThreadRwlockUnlock(&pMnode->lock);
42✔
363
    terrno = TSDB_CODE_SYN_RESTORING;
42✔
364
    return true;
42✔
365
  }
366
  (void)taosThreadRwlockUnlock(&pMnode->lock);
60,056✔
367
  return false;
60,056✔
368
}
369

370
static int32_t minCronTime() {
×
371
  int32_t min = INT32_MAX;
×
372
  min = TMIN(min, tsTtlPushIntervalSec);
×
373
  min = TMIN(min, tsTrimVDbIntervalSec);
×
374
  min = TMIN(min, tsS3MigrateIntervalSec);
×
375
  min = TMIN(min, tsTransPullupInterval);
×
376
  min = TMIN(min, tsCompactPullupInterval);
×
377
  min = TMIN(min, tsMqRebalanceInterval);
×
378
  min = TMIN(min, tsStreamCheckpointInterval);
×
379
  min = TMIN(min, tsStreamNodeCheckInterval);
×
380
  min = TMIN(min, tsArbHeartBeatIntervalSec);
×
381
  min = TMIN(min, tsArbCheckSyncIntervalSec);
×
382

383
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
×
384
  min = TMIN(min, telemInt);
×
385
  min = TMIN(min, tsGrantHBInterval);
×
386
  min = TMIN(min, tsUptimeInterval);
×
387

388
  return min <= 1 ? 2 : min;
×
389
}
390
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
60,056✔
391
  int32_t code = 0;
60,056✔
392
#ifndef TD_ASTRA
393
  if (sec % tsGrantHBInterval == 0) {  // put in the 1st place as to take effect ASAP
60,056✔
394
    mndPullupGrant(pMnode);
5,778✔
395
  }
396
  if (sec % tsTtlPushIntervalSec == 0) {
60,056✔
397
    mndPullupTtl(pMnode);
5,674✔
398
  }
399

400
  if (sec % tsTrimVDbIntervalSec == 0) {
60,056!
401
    mndPullupTrimDb(pMnode);
×
402
  }
403
#endif
404
#ifdef USE_S3
405
  if (tsS3MigrateEnabled && sec % tsS3MigrateIntervalSec == 0) {
60,056!
406
    mndPullupS3MigrateDb(pMnode);
×
407
  }
408
#endif
409
  if (sec % tsTransPullupInterval == 0) {
60,056✔
410
    mndPullupTrans(pMnode);
30,316✔
411
  }
412

413
  if (sec % tsCompactPullupInterval == 0) {
60,056✔
414
    mndPullupCompacts(pMnode);
5,086✔
415
  }
416
#ifdef USE_TOPIC
417
  if (sec % tsMqRebalanceInterval == 0) {
60,056✔
418
    mndCalMqRebalance(pMnode);
29,460✔
419
  }
420
#endif
421
#ifdef USE_STREAM
422
  if (sec % 30 == 0) {  // send the checkpoint info every 30 sec
60,056✔
423
    mndStreamCheckpointTimer(pMnode);
1,357✔
424
  }
425

426
  if (sec % tsStreamNodeCheckInterval == 0) {
60,056✔
427
    mndStreamCheckNode(pMnode);
37✔
428
  }
429

430
  if (sec % (tsStreamFailedTimeout / 1000) == 0) {
60,056✔
431
    mndStreamCheckStatus(pMnode);
1✔
432
  }
433

434
  if (sec % 30 == 0) {
60,056✔
435
    mndStreamConsensusChkpt(pMnode);
1,357✔
436
  }
437

438
  if (tsTelemInterval > 0 && sec % tsTelemInterval == 0) {
60,056!
439
    mndPullupTelem(pMnode);
2✔
440
  }
441
#endif
442
  if (sec % tsUptimeInterval == 0) {
60,056✔
443
    mndIncreaseUpTime(pMnode);
18✔
444
  }
445
#ifndef TD_ASTRA
446
  if (sec % (tsArbHeartBeatIntervalSec) == 0) {
60,056✔
447
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
29,461!
448
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
449
    }
450
  }
451

452
  if (sec % (tsArbCheckSyncIntervalSec) == 0) {
60,056✔
453
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
19,317!
454
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
455
    }
456
  }
457
#endif
458
}
60,056✔
459
void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) {
60,056✔
460
  if (sec % (tsStatusInterval * 5) == 0) {
60,056✔
461
    mndCheckDnodeOffline(pMnode);
11,167✔
462
  }
463
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
60,056✔
464
    mndSyncCheckTimeout(pMnode);
1,357✔
465
  }
466
}
60,056✔
467

468
static void *mndThreadFp(void *param) {
2,466✔
469
  SMnode *pMnode = param;
2,466✔
470
  int64_t lastTime = 0;
2,466✔
471
  setThreadName("mnode-timer");
2,466✔
472

473
  while (1) {
662,241✔
474
    lastTime++;
664,707✔
475
    taosMsleep(100);
664,707✔
476

477
    if (mndGetStop(pMnode)) break;
664,707✔
478
    if (lastTime % 10 != 0) continue;
662,241✔
479

480
    if (mnodeIsNotLeader(pMnode)) {
65,033✔
481
      mTrace("timer not process since mnode is not leader");
4,977!
482
      continue;
4,977✔
483
    }
484

485
    int64_t sec = lastTime / 10;
60,056✔
486
    mndDoTimerCheckTask(pMnode, sec);
60,056✔
487

488
    mndDoTimerPullupTask(pMnode, sec);
60,056✔
489
  }
490

491
  return NULL;
2,466✔
492
}
493

494
static int32_t mndInitTimer(SMnode *pMnode) {
2,466✔
495
  int32_t      code = 0;
2,466✔
496
  TdThreadAttr thAttr;
497
  (void)taosThreadAttrInit(&thAttr);
2,466✔
498
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
2,466✔
499
#ifdef TD_COMPACT_OS
500
  (void)taosThreadAttrSetStackSize(&thAttr, STACK_SIZE_SMALL);
501
#endif
502
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadFp, pMnode)) != 0) {
2,466!
503
    mError("failed to create timer thread since %s", tstrerror(code));
×
504
    TAOS_RETURN(code);
×
505
  }
506

507
  (void)taosThreadAttrDestroy(&thAttr);
2,466✔
508
  tmsgReportStartup("mnode-timer", "initialized");
2,466✔
509
  TAOS_RETURN(code);
2,466✔
510
}
511

512
static void mndCleanupTimer(SMnode *pMnode) {
2,466✔
513
  if (taosCheckPthreadValid(pMnode->thread)) {
2,466!
514
    (void)taosThreadJoin(pMnode->thread, NULL);
2,466✔
515
    taosThreadClear(&pMnode->thread);
2,466✔
516
  }
517
}
2,466✔
518

519
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
2,467✔
520
  int32_t code = 0;
2,467✔
521
  pMnode->path = taosStrdup(path);
2,467!
522
  if (pMnode->path == NULL) {
2,467!
523
    code = terrno;
×
524
    TAOS_RETURN(code);
×
525
  }
526

527
  if (taosMkDir(pMnode->path) != 0) {
2,467!
528
    code = terrno;
×
529
    TAOS_RETURN(code);
×
530
  }
531

532
  TAOS_RETURN(code);
2,467✔
533
}
534

535
static int32_t mndInitWal(SMnode *pMnode) {
2,467✔
536
  int32_t code = 0;
2,467✔
537
  char    path[PATH_MAX + 20] = {0};
2,467✔
538
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
2,467✔
539
  SWalCfg cfg = {.vgId = 1,
2,467✔
540
                 .fsyncPeriod = 0,
541
                 .rollPeriod = -1,
542
                 .segSize = -1,
543
                 .committed = -1,
544
                 .retentionPeriod = 0,
545
                 .retentionSize = 0,
546
                 .level = TAOS_WAL_FSYNC,
547
                 .encryptAlgorithm = 0,
548
                 .encryptKey = {0}};
549

550
#if defined(TD_ENTERPRISE) || defined(TD_ASTRA_TODO)
551
  if (tsiEncryptAlgorithm == DND_CA_SM4 && (tsiEncryptScope & DND_CS_MNODE_WAL) == DND_CS_MNODE_WAL) {
2,467!
552
    cfg.encryptAlgorithm = (tsiEncryptScope & DND_CS_MNODE_WAL) ? tsiEncryptAlgorithm : 0;
×
553
    if (tsEncryptKey[0] == '\0') {
×
554
      code = TSDB_CODE_DNODE_INVALID_ENCRYPTKEY;
×
555
      TAOS_RETURN(code);
×
556
    } else {
557
      tstrncpy(cfg.encryptKey, tsEncryptKey, ENCRYPT_KEY_LEN + 1);
×
558
    }
559
  }
560
#endif
561

562
  pMnode->pWal = walOpen(path, &cfg);
2,467✔
563
  if (pMnode->pWal == NULL) {
2,467!
564
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
565
    if (terrno != 0) code = terrno;
×
566
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
567
    TAOS_RETURN(code);
×
568
  }
569

570
  TAOS_RETURN(code);
2,467✔
571
}
572

573
static void mndCloseWal(SMnode *pMnode) {
2,466✔
574
  if (pMnode->pWal != NULL) {
2,466!
575
    walClose(pMnode->pWal);
2,466✔
576
    pMnode->pWal = NULL;
2,466✔
577
  }
578
}
2,466✔
579

580
static int32_t mndInitSdb(SMnode *pMnode) {
2,467✔
581
  int32_t code = 0;
2,467✔
582
  SSdbOpt opt = {0};
2,467✔
583
  opt.path = pMnode->path;
2,467✔
584
  opt.pMnode = pMnode;
2,467✔
585
  opt.pWal = pMnode->pWal;
2,467✔
586

587
  pMnode->pSdb = sdbInit(&opt);
2,467✔
588
  if (pMnode->pSdb == NULL) {
2,467!
589
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
590
    if (terrno != 0) code = terrno;
×
591
    TAOS_RETURN(code);
×
592
  }
593

594
  TAOS_RETURN(code);
2,467✔
595
}
596

597
static int32_t mndOpenSdb(SMnode *pMnode) {
2,467✔
598
  int32_t code = 0;
2,467✔
599
  if (!pMnode->deploy) {
2,467✔
600
    code = sdbReadFile(pMnode->pSdb);
605✔
601
  }
602

603
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
2,467!
604

605
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
2,467✔
606
  return code;
2,467✔
607
}
608

609
static void mndCleanupSdb(SMnode *pMnode) {
2,466✔
610
  if (pMnode->pSdb) {
2,466!
611
    sdbCleanup(pMnode->pSdb);
2,466✔
612
    pMnode->pSdb = NULL;
2,466✔
613
  }
614
}
2,466✔
615

616
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
96,213✔
617
  SMnodeStep step = {0};
96,213✔
618
  step.name = name;
96,213✔
619
  step.initFp = initFp;
96,213✔
620
  step.cleanupFp = cleanupFp;
96,213✔
621
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
192,426!
622
    TAOS_RETURN(terrno);
×
623
  }
624

625
  TAOS_RETURN(0);
96,213✔
626
}
627

628
static int32_t mndInitSteps(SMnode *pMnode) {
2,467✔
629
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
2,467!
630
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
2,467!
631
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
2,467!
632
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
2,467!
633
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
2,467!
634
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
2,467!
635
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
2,467!
636
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
2,467!
637
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-bnode", mndInitBnode, mndCleanupBnode));
2,467!
638
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
2,467!
639
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
2,467!
640
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
2,467!
641
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
2,467!
642
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
2,467!
643
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
2,467!
644
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
2,467!
645
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
2,467!
646
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
2,467!
647
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
2,467!
648
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
2,467!
649
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
2,467!
650
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
2,467!
651
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
2,467!
652
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
2,467!
653
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
2,467!
654
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
2,467!
655
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
2,467!
656
#ifdef USE_MOUNT
657
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount", mndInitMount, mndCleanupMount));
2,467!
658
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount-log", mndInitMountLog, mndCleanupMountLog));
2,467!
659
#endif
660
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
2,467!
661
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
2,467!
662
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
2,467!
663
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
2,467!
664
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
2,467!
665
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
2,467!
666
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
2,467!
667
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
2,467!
668
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
2,467!
669
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
2,467!
670
  return 0;
2,467✔
671
}
672

673
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
2,466✔
674
  if (pMnode->pSteps == NULL) return;
2,466!
675

676
  if (pos == -1) {
2,466!
677
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
2,466✔
678
  }
679

680
  for (int32_t s = pos; s >= 0; s--) {
98,640✔
681
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
96,174✔
682
    mInfo("%s will cleanup", pStep->name);
96,174!
683
    if (pStep->cleanupFp != NULL) {
96,174✔
684
      (*pStep->cleanupFp)(pMnode);
91,242✔
685
    }
686
  }
687

688
  taosArrayClear(pMnode->pSteps);
2,466✔
689
  taosArrayDestroy(pMnode->pSteps);
2,466✔
690
  pMnode->pSteps = NULL;
2,466✔
691
}
692

693
static int32_t mndExecSteps(SMnode *pMnode) {
2,467✔
694
  int32_t code = 0;
2,467✔
695
  int32_t size = taosArrayGetSize(pMnode->pSteps);
2,467✔
696
  for (int32_t pos = 0; pos < size; pos++) {
98,680✔
697
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
96,213✔
698
    if (pStep->initFp == NULL) continue;
96,213!
699

700
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
96,213!
701
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
702
      mndCleanupSteps(pMnode, pos);
×
703
      TAOS_RETURN(code);
×
704
    } else {
705
      mInfo("%s is initialized", pStep->name);
96,213!
706
      tmsgReportStartup(pStep->name, "initialized");
96,213✔
707
    }
708
  }
709

710
  pMnode->clusterId = mndGetClusterId(pMnode);
2,467✔
711
  TAOS_RETURN(0);
2,467✔
712
}
713

714
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
2,467✔
715
  pMnode->msgCb = pOption->msgCb;
2,467✔
716
  pMnode->selfDnodeId = pOption->dnodeId;
2,467✔
717
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
2,467✔
718
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
2,467✔
719
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
2,467✔
720
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
2,467✔
721
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
2,467✔
722
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
2,467✔
723
}
2,467✔
724

725
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
2,467✔
726
  terrno = 0;
2,467✔
727
  mInfo("start to open mnode in %s", path);
2,467!
728

729
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
2,467!
730
  if (pMnode == NULL) {
2,467!
731
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
732
    mError("failed to open mnode since %s", terrstr());
×
733
    return NULL;
×
734
  }
735
  (void)memset(pMnode, 0, sizeof(SMnode));
2,467✔
736

737
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
2,467✔
738
  if (code != 0) {
2,467!
739
    taosMemoryFree(pMnode);
×
740
    mError("failed to open mnode lock since %s", tstrerror(code));
×
741
    return NULL;
×
742
  }
743

744
  char timestr[24] = "1970-01-01 00:00:00.00";
2,467✔
745
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, NULL);
2,467✔
746
  if (code < 0) {
2,467!
747
    mError("failed to parse time since %s", tstrerror(code));
×
748
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
749
    taosMemoryFree(pMnode);
×
750
    return NULL;
×
751
  }
752
  mndSetOptions(pMnode, pOption);
2,467✔
753

754
  pMnode->deploy = pOption->deploy;
2,467✔
755
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
2,467✔
756
  if (pMnode->pSteps == NULL) {
2,467!
757
    taosMemoryFree(pMnode);
×
758
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
759
    mError("failed to open mnode since %s", terrstr());
×
760
    return NULL;
×
761
  }
762

763
  code = mndCreateDir(pMnode, path);
2,467✔
764
  if (code != 0) {
2,467!
765
    code = terrno;
×
766
    mError("failed to open mnode since %s", tstrerror(code));
×
767
    mndClose(pMnode);
×
768
    terrno = code;
×
769
    return NULL;
×
770
  }
771

772
  code = mndInitSteps(pMnode);
2,467✔
773
  if (code != 0) {
2,467!
774
    code = terrno;
×
775
    mError("failed to open mnode since %s", tstrerror(code));
×
776
    mndClose(pMnode);
×
777
    terrno = code;
×
778
    return NULL;
×
779
  }
780

781
  code = mndExecSteps(pMnode);
2,467✔
782
  if (code != 0) {
2,467!
783
    code = terrno;
×
784
    mError("failed to open mnode since %s", tstrerror(code));
×
785
    mndClose(pMnode);
×
786
    terrno = code;
×
787
    return NULL;
×
788
  }
789

790
  mInfo("mnode open successfully");
2,467!
791
  return pMnode;
2,467✔
792
}
793

794
void mndPreClose(SMnode *pMnode) {
2,466✔
795
  if (pMnode != NULL) {
2,466!
796
    int32_t code = 0;
2,466✔
797
    // TODO check return value
798
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
2,466✔
799
    if (code < 0) {
2,466✔
800
      mError("failed to transfer leader since %s", tstrerror(code));
3!
801
    }
802
    syncPreStop(pMnode->syncMgmt.sync);
2,466✔
803
    code = sdbWriteFile(pMnode->pSdb, 0);
2,466✔
804
    if (code < 0) {
2,466!
805
      mError("failed to write sdb since %s", tstrerror(code));
×
806
    }
807
  }
808
}
2,466✔
809

810
void mndClose(SMnode *pMnode) {
2,466✔
811
  if (pMnode != NULL) {
2,466!
812
    mInfo("start to close mnode");
2,466!
813
    mndCleanupSteps(pMnode, -1);
2,466✔
814
    taosMemoryFreeClear(pMnode->path);
2,466!
815
    taosMemoryFreeClear(pMnode);
2,466!
816
    mInfo("mnode is closed");
2,466!
817
  }
818
}
2,466✔
819

820
int32_t mndStart(SMnode *pMnode) {
2,466✔
821
  mndSyncStart(pMnode);
2,466✔
822
  if (pMnode->deploy) {
2,466✔
823
    if (sdbDeploy(pMnode->pSdb) != 0) {
1,862!
824
      mError("failed to deploy sdb while start mnode");
×
825
      return -1;
×
826
    }
827
    mndSetRestored(pMnode, true);
1,862✔
828
  }
829
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
2,466✔
830

831
  return mndInitTimer(pMnode);
2,466✔
832
}
833

834
int32_t mndIsCatchUp(SMnode *pMnode) {
783✔
835
  int64_t rid = pMnode->syncMgmt.sync;
783✔
836
  return syncIsCatchUp(rid);
783✔
837
}
838

839
ESyncRole mndGetRole(SMnode *pMnode) {
783✔
840
  int64_t rid = pMnode->syncMgmt.sync;
783✔
841
  return syncGetRole(rid);
783✔
842
}
843

844
int64_t mndGetTerm(SMnode *pMnode) {
19,409✔
845
  int64_t rid = pMnode->syncMgmt.sync;
19,409✔
846
  return syncGetTerm(rid);
19,409✔
847
}
848

849
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
48,871✔
850

851
void mndStop(SMnode *pMnode) {
2,466✔
852
  mndSetStop(pMnode);
2,466✔
853
  mndSyncStop(pMnode);
2,466✔
854
  mndCleanupTimer(pMnode);
2,466✔
855
}
2,466✔
856

857
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
190,523✔
858
  SMnode    *pMnode = pMsg->info.node;
190,523✔
859
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
190,523✔
860

861
  const STraceId *trace = &pMsg->info.traceId;
190,523✔
862
  mGTrace("vgId:1, process sync msg:%p, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
190,523!
863

864
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
190,523✔
865
  if (code != 0) {
190,522✔
866
    mGError("vgId:1, failed to process sync msg:%p type:%s since %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
5!
867
            tstrerror(code), code);
868
  }
869

870
  return code;
190,522✔
871
}
872

873
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
717,198✔
874
  int32_t code = 0;
717,198✔
875
  if (!IsReq(pMsg)) TAOS_RETURN(code);
717,198✔
876
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
615,852✔
877
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
596,177!
878
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
582,163✔
879
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
543,003!
880
    TAOS_RETURN(code);
72,833✔
881
  }
882

883
  SMnode *pMnode = pMsg->info.node;
543,019✔
884
  (void)taosThreadRwlockRdlock(&pMnode->lock);
543,019✔
885
  if (pMnode->stopped) {
543,581✔
886
    (void)taosThreadRwlockUnlock(&pMnode->lock);
69✔
887
    code = TSDB_CODE_APP_IS_STOPPING;
69✔
888
    TAOS_RETURN(code);
69✔
889
  }
890

891
  terrno = 0;
543,512✔
892
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
543,233✔
893
  if (terrno != 0) {
543,570!
894
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
895
    code = terrno;
×
896
    TAOS_RETURN(code);
×
897
  }
898

899
  if (state.state != TAOS_SYNC_STATE_LEADER) {
543,564✔
900
    (void)taosThreadRwlockUnlock(&pMnode->lock);
23,088✔
901
    code = TSDB_CODE_SYN_NOT_LEADER;
23,090✔
902
    goto _OVER;
23,090✔
903
  }
904

905
  if (!state.restored || !pMnode->restored) {
520,476✔
906
    (void)taosThreadRwlockUnlock(&pMnode->lock);
2,501✔
907
    code = TSDB_CODE_SYN_RESTORING;
2,500✔
908
    goto _OVER;
2,500✔
909
  }
910

911
#if 1
912
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
517,975✔
913
#else
914
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
915
  mTrace("mnode rpc is acquired, ref:%d", ref);
916
#endif
917

918
  (void)taosThreadRwlockUnlock(&pMnode->lock);
517,960✔
919
  TAOS_RETURN(code);
517,972✔
920

921
_OVER:
25,590✔
922
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
25,590!
923
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
25,583!
924
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
25,590!
925
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
25,588!
926
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
25,589!
927
      pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
25,589!
928
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER || pMsg->msgType == TDMT_MND_CHECK_STREAM_TIMER) {
25,590✔
929
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
3!
930
           pMnode->stopped, state.restored, syncStr(state.state));
931
    TAOS_RETURN(code);
3✔
932
  }
933

934
  const STraceId *trace = &pMsg->info.traceId;
25,587✔
935
  SEpSet          epSet = {0};
25,587✔
936
  mndGetMnodeEpSet(pMnode, &epSet);
25,587✔
937

938
  mGDebug(
25,590!
939
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
940
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
941
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
942
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
943

944
  if (epSet.numOfEps <= 0) return -1;
25,590!
945

946
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
99,023✔
947
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
73,433✔
948
  }
949

950
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
25,590✔
951
  pMsg->info.rsp = rpcMallocCont(contLen);
25,589✔
952
  if (pMsg->info.rsp != NULL) {
25,587!
953
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
25,588!
954
      mError("failed to serialize ep set");
×
955
    }
956
    pMsg->info.hasEpSet = 1;
25,589✔
957
    pMsg->info.rspLen = contLen;
25,589✔
958
  }
959

960
  TAOS_RETURN(code);
25,588✔
961
}
962

963
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
717,176✔
964
  SMnode         *pMnode = pMsg->info.node;
717,176✔
965
  const STraceId *trace = &pMsg->info.traceId;
717,176✔
966
  int32_t         code = TSDB_CODE_SUCCESS;
717,176✔
967

968
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
717,176✔
969
  MndMsgFpExt fpExt = NULL;
717,176✔
970
  if (fp == NULL) {
717,176✔
971
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
73,027✔
972
    if (fpExt == NULL) {
73,027!
973
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
974
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
975
      TAOS_RETURN(code);
×
976
    }
977
  }
978

979
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
717,176✔
980

981
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
692,266!
982
  if (fp)
692,266✔
983
    code = (*fp)(pMsg);
619,241✔
984
  else
985
    code = (*fpExt)(pMsg, pQueueInfo);
73,025✔
986
  mndReleaseRpc(pMnode);
692,327✔
987

988
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
692,349✔
989
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
100,390!
990
  } else if (code == 0) {
591,959✔
991
    mGTrace("msg:%p, successfully processed", pMsg);
583,477!
992
  } else {
993
    // TODO removve this wrong set code
994
    if (code == -1) {
8,482✔
995
      code = terrno;
8✔
996
    }
997
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
8,482!
998
            TMSG_INFO(pMsg->msgType));
999
  }
1000

1001
  TAOS_RETURN(code);
692,349✔
1002
}
1003

1004
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
476,131✔
1005
  tmsg_t type = TMSG_INDEX(msgType);
476,131✔
1006
  if (type < TDMT_MAX) {
476,131!
1007
    pMnode->msgFp[type] = fp;
476,131✔
1008
  }
1009
}
476,131✔
1010

1011
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
19,736✔
1012
  tmsg_t type = TMSG_INDEX(msgType);
19,736✔
1013
  if (type < TDMT_MAX) {
19,736!
1014
    pMnode->msgFpExt[type] = fp;
19,736✔
1015
  }
1016
}
19,736✔
1017

1018
// Note: uid 0 is reserved
1019
int64_t mndGenerateUid(const char *name, int32_t len) {
51,537✔
1020
  int32_t hashval = MurmurHash3_32(name, len);
51,537✔
1021
  do {
×
1022
    int64_t us = taosGetTimestampUs();
51,538✔
1023
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
51,538✔
1024
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
51,538✔
1025
    if (uuid) {
51,539!
1026
      return llabs(uuid);
51,539✔
1027
    }
1028
  } while (true);
1029
}
1030

1031
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
16✔
1032
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
1033
  int32_t code = mndAcquireRpc(pMnode);
16✔
1034
  if (code < 0) {
16!
1035
    TAOS_RETURN(code);
×
1036
  } else if (code == 1) {
16!
1037
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
1038
  }
1039

1040
  SSdb   *pSdb = pMnode->pSdb;
16✔
1041
  int64_t ms = taosGetTimestampMs();
16✔
1042

1043
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
16✔
1044
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
16✔
1045
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
16✔
1046
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
16✔
1047
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
16!
1048
      pStbInfo->stbs == NULL) {
16!
1049
    mndReleaseRpc(pMnode);
×
1050
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1051
    if (terrno != 0) code = terrno;
×
1052
    TAOS_RETURN(code);
×
1053
  }
1054

1055
  // cluster info
1056
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
16✔
1057
  pClusterInfo->monitor_interval = tsMonitorInterval;
16✔
1058
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
16✔
1059
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
16✔
1060
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
16✔
1061
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
16✔
1062
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
16✔
1063

1064
  void *pIter = NULL;
16✔
1065
  while (1) {
16✔
1066
    SDnodeObj *pObj = NULL;
32✔
1067
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
32✔
1068
    if (pIter == NULL) break;
32✔
1069

1070
    SMonDnodeDesc desc = {0};
16✔
1071
    desc.dnode_id = pObj->id;
16✔
1072
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
16✔
1073
    if (mndIsDnodeOnline(pObj, ms)) {
16✔
1074
      tstrncpy(desc.status, "ready", sizeof(desc.status));
15✔
1075
    } else {
1076
      tstrncpy(desc.status, "offline", sizeof(desc.status));
1✔
1077
    }
1078
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
32!
1079
      mError("failed put dnode into array, but continue at this monitor report")
×
1080
    }
1081
    sdbRelease(pSdb, pObj);
16✔
1082
  }
1083

1084
  pIter = NULL;
16✔
1085
  while (1) {
16✔
1086
    SMnodeObj *pObj = NULL;
32✔
1087
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
32✔
1088
    if (pIter == NULL) break;
32✔
1089

1090
    SMonMnodeDesc desc = {0};
16✔
1091
    desc.mnode_id = pObj->id;
16✔
1092
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
16✔
1093

1094
    if (pObj->id == pMnode->selfDnodeId) {
16!
1095
      pClusterInfo->first_ep_dnode_id = pObj->id;
16✔
1096
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
16✔
1097
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1098
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
16✔
1099
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1100
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
16✔
1101
      desc.syncState = TAOS_SYNC_STATE_LEADER;
16✔
1102
    } else {
1103
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1104
      desc.syncState = pObj->syncState;
×
1105
    }
1106
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
32!
1107
      mError("failed to put mnode into array, but continue at this monitor report");
×
1108
    }
1109
    sdbRelease(pSdb, pObj);
16✔
1110
  }
1111

1112
  // vgroup info
1113
  pIter = NULL;
16✔
1114
  while (1) {
38✔
1115
    SVgObj *pVgroup = NULL;
54✔
1116
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
54✔
1117
    if (pIter == NULL) break;
54✔
1118

1119
    if (pVgroup->mountVgId) {
38!
1120
      sdbRelease(pSdb, pVgroup);
×
1121
      continue;
×
1122
    }
1123

1124
    pClusterInfo->vgroups_total++;
38✔
1125
    pClusterInfo->tbs_total += pVgroup->numOfTables;
38✔
1126

1127
    SMonVgroupDesc desc = {0};
38✔
1128
    desc.vgroup_id = pVgroup->vgId;
38✔
1129

1130
    SName name = {0};
38✔
1131
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
38✔
1132
    if (code < 0) {
38!
1133
      mError("failed to get db name since %s", tstrerror(code));
×
1134
      sdbRelease(pSdb, pVgroup);
×
1135
      TAOS_RETURN(code);
×
1136
    }
1137
    (void)tNameGetDbName(&name, desc.database_name);
38✔
1138

1139
    desc.tables_num = pVgroup->numOfTables;
38✔
1140
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
38✔
1141
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
38✔
1142
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
76✔
1143
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
38✔
1144
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
38✔
1145
      pVnDesc->dnode_id = pVgid->dnodeId;
38✔
1146
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
38✔
1147
      pVnDesc->syncState = pVgid->syncState;
38✔
1148
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
38!
1149
        tstrncpy(desc.status, "ready", sizeof(desc.status));
38✔
1150
        pClusterInfo->vgroups_alive++;
38✔
1151
      }
1152
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
38!
1153
        pClusterInfo->vnodes_alive++;
38✔
1154
      }
1155
      pClusterInfo->vnodes_total++;
38✔
1156
    }
1157

1158
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
76!
1159
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1160
    }
1161
    sdbRelease(pSdb, pVgroup);
38✔
1162
  }
1163

1164
  // stb info
1165
  pIter = NULL;
16✔
1166
  while (1) {
15✔
1167
    SStbObj *pStb = NULL;
31✔
1168
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
31✔
1169
    if (pIter == NULL) break;
31✔
1170

1171
    SMonStbDesc desc = {0};
15✔
1172

1173
    SName name1 = {0};
15✔
1174
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
15✔
1175
    if (code < 0) {
15!
1176
      mError("failed to get db name since %s", tstrerror(code));
×
1177
      sdbRelease(pSdb, pStb);
×
1178
      TAOS_RETURN(code);
×
1179
    }
1180
    (void)tNameGetDbName(&name1, desc.database_name);
15✔
1181

1182
    SName name2 = {0};
15✔
1183
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
15✔
1184
    if (code < 0) {
15!
1185
      mError("failed to get table name since %s", tstrerror(code));
×
1186
      sdbRelease(pSdb, pStb);
×
1187
      TAOS_RETURN(code);
×
1188
    }
1189
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
15✔
1190

1191
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
30!
1192
      mError("failed to put stb into array, but continue at this monitor report");
×
1193
    }
1194
    sdbRelease(pSdb, pStb);
15✔
1195
  }
1196

1197
  // grant info
1198
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
16✔
1199
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
16✔
1200
  if (pMnode->grant.expireTimeMS == 0) {
16!
1201
    pGrantInfo->expire_time = 0;
×
1202
    pGrantInfo->timeseries_total = 0;
×
1203
  }
1204

1205
  mndReleaseRpc(pMnode);
16✔
1206
  TAOS_RETURN(code);
16✔
1207
}
1208

1209
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
65,070✔
1210
  mTrace("mnode get load");
65,070✔
1211
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
65,070✔
1212
  pLoad->syncState = state.state;
65,070✔
1213
  pLoad->syncRestore = state.restored;
65,070✔
1214
  pLoad->syncTerm = state.term;
65,070✔
1215
  pLoad->roleTimeMs = state.roleTimeMs;
65,070✔
1216
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
65,070✔
1217
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1218
  return 0;
65,070✔
1219
}
1220

1221
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
19,317✔
1222
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
19,317✔
1223
  return state.roleTimeMs;
19,317✔
1224
}
1225

1226
void mndSetRestored(SMnode *pMnode, bool restored) {
2,466✔
1227
  if (restored) {
2,466!
1228
    (void)taosThreadRwlockWrlock(&pMnode->lock);
2,466✔
1229
    pMnode->restored = true;
2,466✔
1230
    (void)taosThreadRwlockUnlock(&pMnode->lock);
2,466✔
1231
    mInfo("mnode set restored:%d", restored);
2,466!
1232
  } else {
1233
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1234
    pMnode->restored = false;
×
1235
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1236
    mInfo("mnode set restored:%d", restored);
×
1237
    while (1) {
1238
      if (pMnode->rpcRef <= 0) break;
×
1239
      taosMsleep(3);
×
1240
    }
1241
  }
1242
}
2,466✔
1243

1244
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1245

1246
void mndSetStop(SMnode *pMnode) {
2,466✔
1247
  (void)taosThreadRwlockWrlock(&pMnode->lock);
2,466✔
1248
  pMnode->stopped = true;
2,466✔
1249
  (void)taosThreadRwlockUnlock(&pMnode->lock);
2,466✔
1250
  mInfo("mnode set stopped");
2,466!
1251
}
2,466✔
1252

1253
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
664,707✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc