• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #4720

08 Sep 2025 08:43AM UTC coverage: 58.139% (-0.6%) from 58.762%
#4720

push

travis-ci

web-flow
Merge pull request #32881 from taosdata/enh/add-new-windows-ci

fix(ci): update workflow reference to use new Windows CI YAML

133181 of 292179 branches covered (45.58%)

Branch coverage included in aggregate %.

201691 of 283811 relevant lines covered (71.07%)

5442780.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.01
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndCluster.h"
21
#include "mndCompact.h"
22
#include "mndCompactDetail.h"
23
#include "mndSsMigrate.h"
24
#include "mndConfig.h"
25
#include "mndConsumer.h"
26
#include "mndDb.h"
27
#include "mndDnode.h"
28
#include "mndFunc.h"
29
#include "mndGrant.h"
30
#include "mndIndex.h"
31
#include "mndInfoSchema.h"
32
#include "mndMnode.h"
33
#include "mndMount.h"
34
#include "mndPerfSchema.h"
35
#include "mndPrivilege.h"
36
#include "mndProfile.h"
37
#include "mndQnode.h"
38
#include "mndQuery.h"
39
#include "mndShow.h"
40
#include "mndSma.h"
41
#include "mndSnode.h"
42
#include "mndStb.h"
43
#include "mndStream.h"
44
#include "mndSubscribe.h"
45
#include "mndSync.h"
46
#include "mndTelem.h"
47
#include "mndTopic.h"
48
#include "mndTrans.h"
49
#include "mndUser.h"
50
#include "mndVgroup.h"
51
#include "mndView.h"
52
#include "mndBnode.h"
53

54
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
10,781✔
55
  int32_t code = 0;
10,781✔
56
  (void)taosThreadRwlockRdlock(&pMnode->lock);
10,781✔
57
  if (pMnode->stopped) {
10,781!
58
    code = TSDB_CODE_APP_IS_STOPPING;
×
59
  } else if (!mndIsLeader(pMnode)) {
10,781!
60
    code = 1;
×
61
  } else {
62
#if 1
63
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
10,781✔
64
#else
65
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
66
    mTrace("mnode rpc is acquired, ref:%d", ref);
67
#endif
68
  }
69
  (void)taosThreadRwlockUnlock(&pMnode->lock);
10,781✔
70
  TAOS_RETURN(code);
10,781✔
71
}
72

73
static inline void mndReleaseRpc(SMnode *pMnode) {
644,207✔
74
  (void)taosThreadRwlockRdlock(&pMnode->lock);
644,207✔
75
#if 1
76
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
644,213✔
77
#else
78
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
79
  mTrace("mnode rpc is released, ref:%d", ref);
80
#endif
81
  (void)taosThreadRwlockUnlock(&pMnode->lock);
644,212✔
82
}
644,212✔
83

84
static void *mndBuildTimerMsg(int32_t *pContLen) {
117,331✔
85
  terrno = 0;
117,331✔
86
  SMTimerReq timerReq = {0};
117,334✔
87

88
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
117,334✔
89
  if (contLen <= 0) return NULL;
117,294!
90
  void *pReq = rpcMallocCont(contLen);
117,294✔
91
  if (pReq == NULL) return NULL;
117,264!
92

93
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
117,264!
94
    mError("failed to serialize timer msg since %s", terrstr());
×
95
  }
96
  *pContLen = contLen;
117,322✔
97
  return pReq;
117,322✔
98
}
99

100
static void mndPullupTrans(SMnode *pMnode) {
28,122✔
101
  mTrace("pullup trans msg");
28,122✔
102
  int32_t contLen = 0;
28,122✔
103
  void   *pReq = mndBuildTimerMsg(&contLen);
28,122✔
104
  if (pReq != NULL) {
28,122!
105
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
28,122✔
106
    // TODO check return value
107
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
28,122!
108
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
109
    }
110
  }
111
}
28,122✔
112

113
static void mndPullupCompacts(SMnode *pMnode) {
5,038✔
114
  mTrace("pullup compact timer msg");
5,038✔
115
  int32_t contLen = 0;
5,038✔
116
  void   *pReq = mndBuildTimerMsg(&contLen);
5,038✔
117
  if (pReq != NULL) {
5,038!
118
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
5,038✔
119
    // TODO check return value
120
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,038!
121
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
122
    }
123
  }
124
}
5,038✔
125

126
static void mndPullupTtl(SMnode *pMnode) {
5,158✔
127
  mTrace("pullup ttl");
5,158✔
128
  int32_t contLen = 0;
5,158✔
129
  void   *pReq = mndBuildTimerMsg(&contLen);
5,158✔
130
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
5,158✔
131
  // TODO check return value
132
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,158!
133
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
134
  }
135
}
5,158✔
136

137
static void mndPullupTrimDb(SMnode *pMnode) {
×
138
  mTrace("pullup trim");
×
139
  int32_t contLen = 0;
×
140
  void   *pReq = mndBuildTimerMsg(&contLen);
×
141
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
142
  // TODO check return value
143
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
144
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
145
  }
146
}
×
147

148
static void mndPullupSsMigrateDb(SMnode *pMnode) {
×
149
  if (grantCheck(TSDB_GRANT_SHARED_STORAGE) != TSDB_CODE_SUCCESS) {
×
150
    return;
×
151
  }
152

153
  mTrace("pullup ssmigrate db");
×
154
  int32_t contLen = 0;
×
155
  void   *pReq = mndBuildTimerMsg(&contLen);
×
156
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_SSMIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
157
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
158
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
159
  }
160
}
161

162
static void mndPullupUpdateSsMigrateProgress(SMnode *pMnode) {
×
163
  mTrace("pullup update ssmigrate progress");
×
164
  int32_t contLen = 0;
×
165
  void   *pReq = mndBuildTimerMsg(&contLen);
×
166
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPDATE_SSMIGRATE_PROGRESS_TIMER, .pCont = pReq, .contLen = contLen};
×
167
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
168
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
169
  }
170
}
×
171

172
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
28,132✔
173
  mTrace("pullup arb hb");
28,132✔
174
  int32_t contLen = 0;
28,132✔
175
  void   *pReq = mndBuildTimerMsg(&contLen);
28,132✔
176
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
28,132✔
177
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
28,132✔
178
}
179

180
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
18,520✔
181
  mTrace("pullup arb sync");
18,520✔
182
  int32_t contLen = 0;
18,520✔
183
  void   *pReq = mndBuildTimerMsg(&contLen);
18,520✔
184
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
18,520✔
185
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
18,520✔
186
}
187

188
static void mndCalMqRebalance(SMnode *pMnode) {
28,118✔
189
  int32_t contLen = 0;
28,118✔
190
  void   *pReq = mndBuildTimerMsg(&contLen);
28,118✔
191
  if (pReq != NULL) {
28,118!
192
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
28,118✔
193
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
28,118!
194
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
195
    }
196
  }
197
}
28,118✔
198

199
static void mndPullupTelem(SMnode *pMnode) {
2✔
200
  mTrace("pullup telem msg");
2!
201
  int32_t contLen = 0;
2✔
202
  void   *pReq = mndBuildTimerMsg(&contLen);
2✔
203
  if (pReq != NULL) {
2!
204
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
2✔
205
    // TODO check return value
206
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
2!
207
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
208
    }
209
  }
210
}
2✔
211

212
static void mndPullupGrant(SMnode *pMnode) {
4,236✔
213
  mTrace("pullup grant msg");
4,236✔
214
  int32_t contLen = 0;
4,236✔
215
  void   *pReq = mndBuildTimerMsg(&contLen);
4,236✔
216
  if (pReq != NULL) {
4,236!
217
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
4,236✔
218
                      .pCont = pReq,
219
                      .contLen = contLen,
220
                      .info.notFreeAhandle = 1,
221
                      .info.ahandle = 0};
222
    // TODO check return value
223
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
4,236!
224
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
225
    }
226
  }
227
}
4,236✔
228

229
static void mndIncreaseUpTime(SMnode *pMnode) {
9✔
230
  mTrace("increate uptime");
9!
231
  int32_t contLen = 0;
9✔
232
  void   *pReq = mndBuildTimerMsg(&contLen);
9✔
233
  if (pReq != NULL) {
9!
234
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
9✔
235
                      .pCont = pReq,
236
                      .contLen = contLen,
237
                      .info.notFreeAhandle = 1,
238
                      .info.ahandle = 0};
239
    // TODO check return value
240
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
9!
241
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
242
    }
243
  }
244
}
9✔
245

246
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
293✔
247
  SSdb *pSdb = pMnode->pSdb;
293✔
248

249
  void *pIter = NULL;
293✔
250
  while (1) {
790✔
251
    SVgObj *pVgroup = NULL;
1,083✔
252
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
1,083✔
253
    if (pIter == NULL) break;
1,083✔
254

255
    bool stateChanged = false;
790✔
256
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
2,003✔
257
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
1,506✔
258
      if (pGid->dnodeId == dnodeId) {
1,506✔
259
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
293✔
260
          mInfo(
64!
261
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:offline "
262
              "restored:0 "
263
              "canRead:0",
264
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
265
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
64✔
266
          pGid->syncRestore = 0;
64✔
267
          pGid->syncCanRead = 0;
64✔
268
          pGid->startTimeMs = 0;
64✔
269
          stateChanged = true;
64✔
270
        }
271
        break;
293✔
272
      }
273
    }
274

275
    if (stateChanged) {
790✔
276
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
64✔
277
      if (pDb != NULL && pDb->stateTs != curMs) {
64!
278
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
38!
279
              curMs);
280
        pDb->stateTs = curMs;
38✔
281
      }
282
      mndReleaseDb(pMnode, pDb);
64✔
283
    }
284

285
    sdbRelease(pSdb, pVgroup);
790✔
286
  }
287
}
293✔
288

289
static void mndCheckDnodeOffline(SMnode *pMnode) {
10,780✔
290
  mTrace("check dnode offline");
10,780✔
291
  if (mndAcquireRpc(pMnode) != 0) return;
10,780!
292

293
  SSdb   *pSdb = pMnode->pSdb;
10,780✔
294
  int64_t curMs = taosGetTimestampMs();
10,780✔
295

296
  void *pIter = NULL;
10,780✔
297
  while (1) {
18,653✔
298
    SDnodeObj *pDnode = NULL;
29,433✔
299
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
29,433✔
300
    if (pIter == NULL) break;
29,433✔
301

302
    bool online = mndIsDnodeOnline(pDnode, curMs);
18,653✔
303
    if (!online) {
18,653✔
304
      mInfo("dnode:%d, in offline state", pDnode->id);
293!
305
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
293✔
306
    }
307

308
    sdbRelease(pSdb, pDnode);
18,653✔
309
  }
310

311
  mndReleaseRpc(pMnode);
10,780✔
312
}
313

314
static bool mnodeIsNotLeader(SMnode *pMnode) {
708,085✔
315
  terrno = 0;
708,085✔
316
  (void)taosThreadRwlockRdlock(&pMnode->lock);
708,422✔
317
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
708,408✔
318
  if (terrno != 0) {
708,449!
319
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
320
    return true;
×
321
  }
322

323
  if (state.state != TAOS_SYNC_STATE_LEADER) {
708,449✔
324
    (void)taosThreadRwlockUnlock(&pMnode->lock);
71,358✔
325
    terrno = TSDB_CODE_SYN_NOT_LEADER;
71,358✔
326
    return true;
71,358✔
327
  }
328
  if (!state.restored || !pMnode->restored) {
637,091✔
329
    (void)taosThreadRwlockUnlock(&pMnode->lock);
4✔
330
    terrno = TSDB_CODE_SYN_RESTORING;
2✔
331
    return true;
3✔
332
  }
333
  (void)taosThreadRwlockUnlock(&pMnode->lock);
637,087✔
334
  return false;
637,084✔
335
}
336

337
static int32_t minCronTime() {
×
338
  int32_t min = INT32_MAX;
×
339
  min = TMIN(min, tsTtlPushIntervalSec);
×
340
  min = TMIN(min, tsTrimVDbIntervalSec);
×
341
  min = TMIN(min, tsSsAutoMigrateIntervalSec);
×
342
  min = TMIN(min, tsTransPullupInterval);
×
343
  min = TMIN(min, tsCompactPullupInterval);
×
344
  min = TMIN(min, tsMqRebalanceInterval);
×
345

346
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
×
347
  min = TMIN(min, telemInt);
×
348
  min = TMIN(min, tsGrantHBInterval);
×
349
  min = TMIN(min, tsUptimeInterval);
×
350

351
  return min <= 1 ? 2 : min;
×
352
}
353
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
57,200✔
354
  int32_t code = 0;
57,200✔
355
#ifndef TD_ASTRA
356
  if (sec % tsGrantHBInterval == 0) {  // put in the 1st place as to take effect ASAP
57,200✔
357
    mndPullupGrant(pMnode);
4,236✔
358
  }
359
  if (sec % tsTtlPushIntervalSec == 0) {
57,200✔
360
    mndPullupTtl(pMnode);
5,158✔
361
  }
362

363
  if (sec % tsTrimVDbIntervalSec == 0) {
57,200!
364
    mndPullupTrimDb(pMnode);
×
365
  }
366
#endif
367
#ifdef USE_SHARED_STORAGE
368
  if (tsSsEnabled) {
57,200!
369
    if (sec % 10 == 0) { // TODO: make 10 to be configurable
×
370
      mndPullupUpdateSsMigrateProgress(pMnode);
×
371
    }
372
    if (tsSsEnabled == 2 && sec % tsSsAutoMigrateIntervalSec == 0) {
×
373
      mndPullupSsMigrateDb(pMnode);
×
374
    }
375
  }
376
#endif
377
  if (sec % tsTransPullupInterval == 0) {
57,200✔
378
    mndPullupTrans(pMnode);
28,122✔
379
  }
380

381
  if (sec % tsCompactPullupInterval == 0) {
57,200✔
382
    mndPullupCompacts(pMnode);
5,038✔
383
  }
384
#ifdef USE_TOPIC
385
  if (sec % tsMqRebalanceInterval == 0) {
57,200✔
386
    mndCalMqRebalance(pMnode);
28,118✔
387
  }
388
#endif
389
  if (tsTelemInterval > 0 && sec % tsTelemInterval == 0) {
57,200!
390
    mndPullupTelem(pMnode);
2✔
391
  }
392
  if (sec % tsUptimeInterval == 0) {
57,200✔
393
    mndIncreaseUpTime(pMnode);
9✔
394
  }
395
}
57,200✔
396

397
void mndDoArbTimerPullupTask(SMnode *pMnode, int64_t ms) {
579,891✔
398
  int32_t code = 0;
579,891✔
399
#ifndef TD_ASTRA
400
  if (ms % (tsArbHeartBeatIntervalMs) == 0) {
579,891✔
401
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
28,132!
402
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
403
    }
404
  }
405

406
  if (ms % (tsArbCheckSyncIntervalMs) == 0) {
579,891✔
407
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
18,520!
408
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
409
    }
410
  }
411
#endif
412
}
579,891✔
413

414
void mndDoTimerCheckStatus(SMnode *pMnode, int64_t ms) {
579,891✔
415
  if (ms % (tsStatusTimeoutMs) == 0) {
579,891✔
416
    mndCheckDnodeOffline(pMnode);
10,780✔
417
  }
418
}
579,891✔
419

420
void mndDoTimerCheckSync(SMnode *pMnode, int64_t sec) {
57,200✔
421
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
57,200✔
422
    mndSyncCheckTimeout(pMnode);
1,339✔
423
  }
424
  if (!tsDisableStream && (sec % MND_STREAM_HEALTH_CHECK_PERIOD_SEC == 0)) {
57,200!
425
    msmHealthCheck(pMnode);
18,521✔
426
  }
427
}
57,200✔
428

429
static void *mndThreadSecFp(void *param) {
1,929✔
430
  SMnode *pMnode = param;
1,929✔
431
  int64_t lastTime = 0;
1,929✔
432
  setThreadName("mnode-timer");
1,929✔
433

434
  while (1) {
644,900✔
435
    lastTime++;
646,829✔
436
    taosMsleep(100);
646,829✔
437

438
    if (mndGetStop(pMnode)) break;
646,829✔
439
    if (lastTime % 10 != 0) continue;
644,900✔
440

441
    if (mnodeIsNotLeader(pMnode)) {
63,573✔
442
      mTrace("timer not process since mnode is not leader");
6,373!
443
      continue;
6,373✔
444
    }
445

446
    int64_t sec = lastTime / 10;
57,200✔
447
    mndDoTimerCheckSync(pMnode, sec);
57,200✔
448

449
    mndDoTimerPullupTask(pMnode, sec);
57,200✔
450
  }
451

452
  return NULL;
1,929✔
453
}
454

455
static void *mndThreadMsFp(void *param) {
1,929✔
456
  SMnode *pMnode = param;
1,929✔
457
  int64_t lastTime = 0;
1,929✔
458
  setThreadName("mnode-arb-timer");
1,929✔
459

460
  while (1) {
461
    lastTime += 100;
646,807✔
462
    taosMsleep(100);
646,807✔
463

464
    if (mndGetStop(pMnode)) break;
646,807✔
465
    if (lastTime % 10 != 0) continue;
644,878!
466

467
    if (mnodeIsNotLeader(pMnode)) {
644,878✔
468
      mTrace("timer not process since mnode is not leader");
64,987!
469
      continue;
64,987✔
470
    }
471

472
    mndDoTimerCheckStatus(pMnode, lastTime);
579,891✔
473

474
    mndDoArbTimerPullupTask(pMnode, lastTime);
579,891✔
475
  }
476

477
  return NULL;
1,929✔
478
}
479

480
static int32_t mndInitTimer(SMnode *pMnode) {
1,929✔
481
  int32_t      code = 0;
1,929✔
482
  TdThreadAttr thAttr;
483
  (void)taosThreadAttrInit(&thAttr);
1,929✔
484
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
1,929✔
485
#ifdef TD_COMPACT_OS
486
  (void)taosThreadAttrSetStackSize(&thAttr, STACK_SIZE_SMALL);
487
#endif
488
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadSecFp, pMnode)) != 0) {
1,929!
489
    mError("failed to create timer thread since %s", tstrerror(code));
×
490
    TAOS_RETURN(code);
×
491
  }
492

493
  (void)taosThreadAttrDestroy(&thAttr);
1,929✔
494
  tmsgReportStartup("mnode-timer", "initialized");
1,929✔
495

496
  TdThreadAttr arbAttr;
497
  (void)taosThreadAttrInit(&arbAttr);
1,929✔
498
  (void)taosThreadAttrSetDetachState(&arbAttr, PTHREAD_CREATE_JOINABLE);
1,929✔
499
#ifdef TD_COMPACT_OS
500
  (void)taosThreadAttrSetStackSize(&arbAttr, STACK_SIZE_SMALL);
501
#endif
502
  if ((code = taosThreadCreate(&pMnode->arbThread, &arbAttr, mndThreadMsFp, pMnode)) != 0) {
1,929!
503
    mError("failed to create arb timer thread since %s", tstrerror(code));
×
504
    TAOS_RETURN(code);
×
505
  }
506

507
  (void)taosThreadAttrDestroy(&arbAttr);
1,929✔
508
  tmsgReportStartup("mnode-timer", "initialized");
1,929✔
509
  TAOS_RETURN(code);
1,929✔
510
}
511

512
static void mndCleanupTimer(SMnode *pMnode) {
1,929✔
513
  if (taosCheckPthreadValid(pMnode->thread)) {
1,929!
514
    (void)taosThreadJoin(pMnode->thread, NULL);
1,929✔
515
    taosThreadClear(&pMnode->thread);
1,929✔
516
  }
517
  if (taosCheckPthreadValid(pMnode->arbThread)) {
1,929!
518
    (void)taosThreadJoin(pMnode->arbThread, NULL);
1,929✔
519
    taosThreadClear(&pMnode->arbThread);
1,929✔
520
  }
521
}
1,929✔
522

523
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
1,929✔
524
  int32_t code = 0;
1,929✔
525
  pMnode->path = taosStrdup(path);
1,929!
526
  if (pMnode->path == NULL) {
1,929!
527
    code = terrno;
×
528
    TAOS_RETURN(code);
×
529
  }
530

531
  if (taosMkDir(pMnode->path) != 0) {
1,929!
532
    code = terrno;
×
533
    TAOS_RETURN(code);
×
534
  }
535

536
  TAOS_RETURN(code);
1,929✔
537
}
538

539
static int32_t mndInitWal(SMnode *pMnode) {
1,929✔
540
  int32_t code = 0;
1,929✔
541
  char    path[PATH_MAX + 20] = {0};
1,929✔
542
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
1,929✔
543
  SWalCfg cfg = {.vgId = 1,
1,929✔
544
                 .fsyncPeriod = 0,
545
                 .rollPeriod = -1,
546
                 .segSize = -1,
547
                 .committed = -1,
548
                 .retentionPeriod = 0,
549
                 .retentionSize = 0,
550
                 .level = TAOS_WAL_FSYNC,
551
                 .encryptAlgorithm = 0,
552
                 .encryptKey = {0}};
553

554
#if defined(TD_ENTERPRISE) || defined(TD_ASTRA_TODO)
555
  if (tsiEncryptAlgorithm == DND_CA_SM4 && (tsiEncryptScope & DND_CS_MNODE_WAL) == DND_CS_MNODE_WAL) {
1,929!
556
    cfg.encryptAlgorithm = (tsiEncryptScope & DND_CS_MNODE_WAL) ? tsiEncryptAlgorithm : 0;
×
557
    if (tsEncryptKey[0] == '\0') {
×
558
      code = TSDB_CODE_DNODE_INVALID_ENCRYPTKEY;
×
559
      TAOS_RETURN(code);
×
560
    } else {
561
      tstrncpy(cfg.encryptKey, tsEncryptKey, ENCRYPT_KEY_LEN + 1);
×
562
    }
563
  }
564
#endif
565

566
  pMnode->pWal = walOpen(path, &cfg);
1,929✔
567
  if (pMnode->pWal == NULL) {
1,929!
568
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
569
    if (terrno != 0) code = terrno;
×
570
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
571
    TAOS_RETURN(code);
×
572
  }
573

574
  TAOS_RETURN(code);
1,929✔
575
}
576

577
static void mndCloseWal(SMnode *pMnode) {
1,929✔
578
  if (pMnode->pWal != NULL) {
1,929!
579
    walClose(pMnode->pWal);
1,929✔
580
    pMnode->pWal = NULL;
1,929✔
581
  }
582
}
1,929✔
583

584
static int32_t mndInitSdb(SMnode *pMnode) {
1,929✔
585
  int32_t code = 0;
1,929✔
586
  SSdbOpt opt = {0};
1,929✔
587
  opt.path = pMnode->path;
1,929✔
588
  opt.pMnode = pMnode;
1,929✔
589
  opt.pWal = pMnode->pWal;
1,929✔
590

591
  pMnode->pSdb = sdbInit(&opt);
1,929✔
592
  if (pMnode->pSdb == NULL) {
1,929!
593
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
594
    if (terrno != 0) code = terrno;
×
595
    TAOS_RETURN(code);
×
596
  }
597

598
  TAOS_RETURN(code);
1,929✔
599
}
600

601
static int32_t mndOpenSdb(SMnode *pMnode) {
1,929✔
602
  int32_t code = 0;
1,929✔
603
  if (!pMnode->deploy) {
1,929✔
604
    code = sdbReadFile(pMnode->pSdb);
503✔
605
  }
606

607
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
1,929!
608

609
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
1,929✔
610
  return code;
1,929✔
611
}
612

613
static void mndCleanupSdb(SMnode *pMnode) {
1,929✔
614
  if (pMnode->pSdb) {
1,929!
615
    sdbCleanup(pMnode->pSdb);
1,929✔
616
    pMnode->pSdb = NULL;
1,929✔
617
  }
618
}
1,929✔
619

620
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
77,160✔
621
  SMnodeStep step = {0};
77,160✔
622
  step.name = name;
77,160✔
623
  step.initFp = initFp;
77,160✔
624
  step.cleanupFp = cleanupFp;
77,160✔
625
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
154,320!
626
    TAOS_RETURN(terrno);
×
627
  }
628

629
  TAOS_RETURN(0);
77,160✔
630
}
631

632
static int32_t mndInitSteps(SMnode *pMnode) {
1,929✔
633
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
1,929!
634
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
1,929!
635
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
1,929!
636
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
1,929!
637
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
1,929!
638
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
1,929!
639
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
1,929!
640
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
1,929!
641
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-bnode", mndInitBnode, mndCleanupBnode));
1,929!
642
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
1,929!
643
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
1,929!
644
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
1,929!
645
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
1,929!
646
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
1,929!
647
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
1,929!
648
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
1,929!
649
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
1,929!
650
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
1,929!
651
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
1,929!
652
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
1,929!
653
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
1,929!
654
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
1,929!
655
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
1,929!
656
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
1,929!
657
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
1,929!
658
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
1,929!
659
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
1,929!
660
#ifdef USE_MOUNT
661
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount", mndInitMount, mndCleanupMount));
1,929!
662
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount-log", mndInitMountLog, mndCleanupMountLog));
1,929!
663
#endif
664
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
1,929!
665
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
1,929!
666
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
1,929!
667
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
1,929!
668
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-ssmigrate", mndInitSsMigrate, mndCleanupSsMigrate));
1,929!
669
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
1,929!
670
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
1,929!
671
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
1,929!
672
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
1,929!
673
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
1,929!
674
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
1,929!
675
  return 0;
1,929✔
676
}
677

678
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
1,929✔
679
  if (pMnode->pSteps == NULL) return;
1,929!
680

681
  if (pos == -1) {
1,929!
682
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
1,929✔
683
  }
684

685
  for (int32_t s = pos; s >= 0; s--) {
79,089✔
686
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
77,160✔
687
    mInfo("%s will cleanup", pStep->name);
77,160!
688
    if (pStep->cleanupFp != NULL) {
77,160✔
689
      (*pStep->cleanupFp)(pMnode);
73,302✔
690
    }
691
  }
692

693
  taosArrayClear(pMnode->pSteps);
1,929✔
694
  taosArrayDestroy(pMnode->pSteps);
1,929✔
695
  pMnode->pSteps = NULL;
1,929✔
696
}
697

698
static int32_t mndExecSteps(SMnode *pMnode) {
1,929✔
699
  int32_t code = 0;
1,929✔
700
  int32_t size = taosArrayGetSize(pMnode->pSteps);
1,929✔
701
  for (int32_t pos = 0; pos < size; pos++) {
79,089✔
702
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
77,160✔
703
    if (pStep->initFp == NULL) continue;
77,160!
704

705
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
77,160!
706
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
707
      mndCleanupSteps(pMnode, pos);
×
708
      TAOS_RETURN(code);
×
709
    } else {
710
      mInfo("%s is initialized", pStep->name);
77,160!
711
      tmsgReportStartup(pStep->name, "initialized");
77,160✔
712
    }
713
  }
714

715
  pMnode->clusterId = mndGetClusterId(pMnode);
1,929✔
716
  TAOS_RETURN(0);
1,929✔
717
}
718

719
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
1,929✔
720
  pMnode->msgCb = pOption->msgCb;
1,929✔
721
  pMnode->selfDnodeId = pOption->dnodeId;
1,929✔
722
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
1,929✔
723
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
1,929✔
724
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
1,929✔
725
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
1,929✔
726
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
1,929✔
727
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
1,929✔
728
}
1,929✔
729

730
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
1,929✔
731
  terrno = 0;
1,929✔
732
  mInfo("start to open mnode in %s", path);
1,929!
733

734
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
1,929!
735
  if (pMnode == NULL) {
1,929!
736
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
737
    mError("failed to open mnode since %s", terrstr());
×
738
    return NULL;
×
739
  }
740
  (void)memset(pMnode, 0, sizeof(SMnode));
1,929✔
741

742
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
1,929✔
743
  if (code != 0) {
1,929!
744
    taosMemoryFree(pMnode);
×
745
    mError("failed to open mnode lock since %s", tstrerror(code));
×
746
    return NULL;
×
747
  }
748

749
  char timestr[24] = "1970-01-01 00:00:00.00";
1,929✔
750
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, NULL);
1,929✔
751
  if (code < 0) {
1,929!
752
    mError("failed to parse time since %s", tstrerror(code));
×
753
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
754
    taosMemoryFree(pMnode);
×
755
    return NULL;
×
756
  }
757
  mndSetOptions(pMnode, pOption);
1,929✔
758

759
  pMnode->deploy = pOption->deploy;
1,929✔
760
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
1,929✔
761
  if (pMnode->pSteps == NULL) {
1,929!
762
    taosMemoryFree(pMnode);
×
763
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
764
    mError("failed to open mnode since %s", terrstr());
×
765
    return NULL;
×
766
  }
767

768
  code = mndCreateDir(pMnode, path);
1,929✔
769
  if (code != 0) {
1,929!
770
    code = terrno;
×
771
    mError("failed to open mnode since %s", tstrerror(code));
×
772
    mndClose(pMnode);
×
773
    terrno = code;
×
774
    return NULL;
×
775
  }
776

777
  code = mndInitSteps(pMnode);
1,929✔
778
  if (code != 0) {
1,929!
779
    code = terrno;
×
780
    mError("failed to open mnode since %s", tstrerror(code));
×
781
    mndClose(pMnode);
×
782
    terrno = code;
×
783
    return NULL;
×
784
  }
785

786
  code = mndExecSteps(pMnode);
1,929✔
787
  if (code != 0) {
1,929!
788
    code = terrno;
×
789
    mError("failed to open mnode since %s", tstrerror(code));
×
790
    mndClose(pMnode);
×
791
    terrno = code;
×
792
    return NULL;
×
793
  }
794

795
  mInfo("mnode open successfully");
1,929!
796
  return pMnode;
1,929✔
797
}
798

799
void mndPreClose(SMnode *pMnode) {
1,929✔
800
  if (pMnode != NULL) {
1,929!
801
    int32_t code = 0;
1,929✔
802
    // TODO check return value
803
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
1,929✔
804
    if (code < 0) {
1,929!
805
      mError("failed to transfer leader since %s", tstrerror(code));
×
806
    }
807
    syncPreStop(pMnode->syncMgmt.sync);
1,929✔
808
    code = sdbWriteFile(pMnode->pSdb, 0);
1,929✔
809
    if (code < 0) {
1,929!
810
      mError("failed to write sdb since %s", tstrerror(code));
×
811
    }
812
  }
813
}
1,929✔
814

815
void mndClose(SMnode *pMnode) {
1,929✔
816
  if (pMnode != NULL) {
1,929!
817
    mInfo("start to close mnode");
1,929!
818
    mndCleanupSteps(pMnode, -1);
1,929✔
819
    taosMemoryFreeClear(pMnode->path);
1,929!
820
    taosMemoryFreeClear(pMnode);
1,929!
821
    mInfo("mnode is closed");
1,929!
822
  }
823
}
1,929✔
824

825
int32_t mndStart(SMnode *pMnode) {
1,929✔
826
  mndSyncStart(pMnode);
1,929✔
827
  if (pMnode->deploy) {
1,929✔
828
    if (sdbDeploy(pMnode->pSdb) != 0) {
1,426!
829
      mError("failed to deploy sdb while start mnode");
×
830
      return -1;
×
831
    }
832
    mndSetRestored(pMnode, true);
1,426✔
833
  }
834
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
1,929✔
835

836
  return mndInitTimer(pMnode);
1,929✔
837
}
838

839
int32_t mndIsCatchUp(SMnode *pMnode) {
1,242✔
840
  int64_t rid = pMnode->syncMgmt.sync;
1,242✔
841
  return syncIsCatchUp(rid);
1,242✔
842
}
843

844
ESyncRole mndGetRole(SMnode *pMnode) {
1,242✔
845
  int64_t rid = pMnode->syncMgmt.sync;
1,242✔
846
  return syncGetRole(rid);
1,242✔
847
}
848

849
int64_t mndGetTerm(SMnode *pMnode) {
18,681✔
850
  int64_t rid = pMnode->syncMgmt.sync;
18,681✔
851
  return syncGetTerm(rid);
18,681✔
852
}
853

854
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
46,805✔
855

856
void mndStop(SMnode *pMnode) {
1,929✔
857
  mndSetStop(pMnode);
1,929✔
858
  mndSyncStop(pMnode);
1,929✔
859
  mndCleanupTimer(pMnode);
1,929✔
860
}
1,929✔
861

862
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
183,445✔
863
  SMnode    *pMnode = pMsg->info.node;
183,445✔
864
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
183,445✔
865

866
  const STraceId *trace = &pMsg->info.traceId;
183,445✔
867
  mGTrace("vgId:1, process sync msg:%p, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
183,445!
868

869
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
183,445✔
870
  if (code != 0) {
183,443✔
871
    mGError("vgId:1, failed to process sync msg:%p type:%s since %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
4!
872
            tstrerror(code), code);
873
  }
874

875
  return code;
183,443✔
876
}
877

878
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
649,177✔
879
  int32_t code = 0;
649,177✔
880
  if (!IsReq(pMsg)) TAOS_RETURN(code);
649,177✔
881
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
570,637✔
882
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
551,975!
883
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
538,937✔
884
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
501,768!
885
    TAOS_RETURN(code);
68,863✔
886
  }
887

888
  SMnode *pMnode = pMsg->info.node;
501,774✔
889
  (void)taosThreadRwlockRdlock(&pMnode->lock);
501,774✔
890
  if (pMnode->stopped) {
501,840✔
891
    (void)taosThreadRwlockUnlock(&pMnode->lock);
22✔
892
    code = TSDB_CODE_APP_IS_STOPPING;
22✔
893
    TAOS_RETURN(code);
22✔
894
  }
895

896
  terrno = 0;
501,818✔
897
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
501,794✔
898
  if (terrno != 0) {
501,826!
899
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
900
    code = terrno;
×
901
    TAOS_RETURN(code);
×
902
  }
903

904
  if (state.state != TAOS_SYNC_STATE_LEADER) {
501,817✔
905
    (void)taosThreadRwlockUnlock(&pMnode->lock);
15,351✔
906
    code = TSDB_CODE_SYN_NOT_LEADER;
15,350✔
907
    goto _OVER;
15,350✔
908
  }
909

910
  if (!state.restored || !pMnode->restored) {
486,466✔
911
    (void)taosThreadRwlockUnlock(&pMnode->lock);
464✔
912
    code = TSDB_CODE_SYN_RESTORING;
467✔
913
    goto _OVER;
467✔
914
  }
915

916
#if 1
917
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
486,002✔
918
#else
919
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
920
  mTrace("mnode rpc is acquired, ref:%d", ref);
921
#endif
922

923
  (void)taosThreadRwlockUnlock(&pMnode->lock);
485,998✔
924
  TAOS_RETURN(code);
486,007✔
925

926
_OVER:
15,817✔
927
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
15,817!
928
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
15,816!
929
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
15,818!
930
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
15,819!
931
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
15,819!
932
      pMsg->msgType == TDMT_MND_SSMIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
15,819!
933
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER || pMsg->msgType == TDMT_MND_CHECK_STREAM_TIMER ||
15,819!
934
      pMsg->msgType == TDMT_MND_UPDATE_SSMIGRATE_PROGRESS_TIMER) {
15,819!
935
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
×
936
           pMnode->stopped, state.restored, syncStr(state.state));
937
    TAOS_RETURN(code);
×
938
  }
939

940
  const STraceId *trace = &pMsg->info.traceId;
15,819✔
941
  SEpSet          epSet = {0};
15,819✔
942
  mndGetMnodeEpSet(pMnode, &epSet);
15,819✔
943

944
  mGDebug(
15,819!
945
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
946
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
947
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
948
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
949

950
  if (epSet.numOfEps <= 0) return -1;
15,819!
951

952
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
61,236✔
953
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
45,417✔
954
  }
955

956
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
15,819✔
957
  pMsg->info.rsp = rpcMallocCont(contLen);
15,819✔
958
  if (pMsg->info.rsp != NULL) {
15,819!
959
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
15,819!
960
      mError("failed to serialize ep set");
×
961
    }
962
    pMsg->info.hasEpSet = 1;
15,819✔
963
    pMsg->info.rspLen = contLen;
15,819✔
964
  }
965

966
  TAOS_RETURN(code);
15,819✔
967
}
968

969
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
649,187✔
970
  SMnode         *pMnode = pMsg->info.node;
649,187✔
971
  const STraceId *trace = &pMsg->info.traceId;
649,187✔
972
  int32_t         code = TSDB_CODE_SUCCESS;
649,187✔
973

974
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
649,187✔
975
  MndMsgFpExt fpExt = NULL;
649,187✔
976
  if (fp == NULL) {
649,187✔
977
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
68,884✔
978
    if (fpExt == NULL) {
68,884!
979
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
980
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
981
      TAOS_RETURN(code);
×
982
    }
983
  }
984

985
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
649,187✔
986

987
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
633,397!
988
  if (fp)
633,397✔
989
    code = (*fp)(pMsg);
564,514✔
990
  else
991
    code = (*fpExt)(pMsg, pQueueInfo);
68,883✔
992
  mndReleaseRpc(pMnode);
633,429✔
993

994
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
633,432✔
995
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
96,452!
996
  } else if (code == 0) {
536,980✔
997
    mGTrace("msg:%p, successfully processed", pMsg);
529,586!
998
  } else {
999
    // TODO removve this wrong set code
1000
    if (code == -1) {
7,394✔
1001
      code = terrno;
7✔
1002
    }
1003
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
7,394!
1004
            TMSG_INFO(pMsg->msgType));
1005
  }
1006

1007
  TAOS_RETURN(code);
633,432✔
1008
}
1009

1010
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
329,859✔
1011
  tmsg_t type = TMSG_INDEX(msgType);
329,859✔
1012
  if (type < TDMT_MAX) {
329,859!
1013
    pMnode->msgFp[type] = fp;
329,859✔
1014
  }
1015
}
329,859✔
1016

1017
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
15,432✔
1018
  tmsg_t type = TMSG_INDEX(msgType);
15,432✔
1019
  if (type < TDMT_MAX) {
15,432!
1020
    pMnode->msgFpExt[type] = fp;
15,432✔
1021
  }
1022
}
15,432✔
1023

1024
// Note: uid 0 is reserved
1025
int64_t mndGenerateUid(const char *name, int32_t len) {
31,902✔
1026
  int32_t hashval = MurmurHash3_32(name, len);
31,902✔
1027
  do {
×
1028
    int64_t us = taosGetTimestampUs();
31,902✔
1029
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
31,902✔
1030
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
31,902✔
1031
    if (uuid) {
31,902!
1032
      return llabs(uuid);
31,902✔
1033
    }
1034
  } while (true);
1035
}
1036

1037
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
1✔
1038
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
1039
  int32_t code = mndAcquireRpc(pMnode);
1✔
1040
  if (code < 0) {
1!
1041
    TAOS_RETURN(code);
×
1042
  } else if (code == 1) {
1!
1043
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
1044
  }
1045

1046
  SSdb   *pSdb = pMnode->pSdb;
1✔
1047
  int64_t ms = taosGetTimestampMs();
1✔
1048

1049
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
1✔
1050
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
1✔
1051
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
1✔
1052
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
1✔
1053
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
1!
1054
      pStbInfo->stbs == NULL) {
1!
1055
    mndReleaseRpc(pMnode);
×
1056
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1057
    if (terrno != 0) code = terrno;
×
1058
    TAOS_RETURN(code);
×
1059
  }
1060

1061
  // cluster info
1062
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
1✔
1063
  pClusterInfo->monitor_interval = tsMonitorInterval;
1✔
1064
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
1✔
1065
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
1✔
1066
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
1✔
1067
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
1✔
1068
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
1✔
1069

1070
  void *pIter = NULL;
1✔
1071
  while (1) {
1✔
1072
    SDnodeObj *pObj = NULL;
2✔
1073
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
2✔
1074
    if (pIter == NULL) break;
2✔
1075

1076
    SMonDnodeDesc desc = {0};
1✔
1077
    desc.dnode_id = pObj->id;
1✔
1078
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
1✔
1079
    if (mndIsDnodeOnline(pObj, ms)) {
1!
1080
      tstrncpy(desc.status, "ready", sizeof(desc.status));
1✔
1081
    } else {
1082
      tstrncpy(desc.status, "offline", sizeof(desc.status));
×
1083
    }
1084
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
2!
1085
      mError("failed put dnode into array, but continue at this monitor report")
×
1086
    }
1087
    sdbRelease(pSdb, pObj);
1✔
1088
  }
1089

1090
  pIter = NULL;
1✔
1091
  while (1) {
1✔
1092
    SMnodeObj *pObj = NULL;
2✔
1093
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
2✔
1094
    if (pIter == NULL) break;
2✔
1095

1096
    SMonMnodeDesc desc = {0};
1✔
1097
    desc.mnode_id = pObj->id;
1✔
1098
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
1✔
1099

1100
    if (pObj->id == pMnode->selfDnodeId) {
1!
1101
      pClusterInfo->first_ep_dnode_id = pObj->id;
1✔
1102
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
1✔
1103
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1104
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
1✔
1105
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1106
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
1✔
1107
      desc.syncState = TAOS_SYNC_STATE_LEADER;
1✔
1108
    } else {
1109
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1110
      desc.syncState = pObj->syncState;
×
1111
    }
1112
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
2!
1113
      mError("failed to put mnode into array, but continue at this monitor report");
×
1114
    }
1115
    sdbRelease(pSdb, pObj);
1✔
1116
  }
1117

1118
  // vgroup info
1119
  pIter = NULL;
1✔
1120
  while (1) {
6✔
1121
    SVgObj *pVgroup = NULL;
7✔
1122
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
7✔
1123
    if (pIter == NULL) break;
7✔
1124

1125
    if (pVgroup->mountVgId) {
6!
1126
      sdbRelease(pSdb, pVgroup);
×
1127
      continue;
×
1128
    }
1129

1130
    pClusterInfo->vgroups_total++;
6✔
1131
    pClusterInfo->tbs_total += pVgroup->numOfTables;
6✔
1132

1133
    SMonVgroupDesc desc = {0};
6✔
1134
    desc.vgroup_id = pVgroup->vgId;
6✔
1135

1136
    SName name = {0};
6✔
1137
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
6✔
1138
    if (code < 0) {
6!
1139
      mError("failed to get db name since %s", tstrerror(code));
×
1140
      sdbRelease(pSdb, pVgroup);
×
1141
      TAOS_RETURN(code);
×
1142
    }
1143
    (void)tNameGetDbName(&name, desc.database_name);
6✔
1144

1145
    desc.tables_num = pVgroup->numOfTables;
6✔
1146
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
6✔
1147
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
6✔
1148
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
12✔
1149
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
6✔
1150
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
6✔
1151
      pVnDesc->dnode_id = pVgid->dnodeId;
6✔
1152
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
6✔
1153
      pVnDesc->syncState = pVgid->syncState;
6✔
1154
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
6!
1155
        tstrncpy(desc.status, "ready", sizeof(desc.status));
6✔
1156
        pClusterInfo->vgroups_alive++;
6✔
1157
      }
1158
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
6!
1159
        pClusterInfo->vnodes_alive++;
6✔
1160
      }
1161
      pClusterInfo->vnodes_total++;
6✔
1162
    }
1163

1164
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
12!
1165
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1166
    }
1167
    sdbRelease(pSdb, pVgroup);
6✔
1168
  }
1169

1170
  // stb info
1171
  pIter = NULL;
1✔
1172
  while (1) {
1✔
1173
    SStbObj *pStb = NULL;
2✔
1174
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
2✔
1175
    if (pIter == NULL) break;
2✔
1176

1177
    SMonStbDesc desc = {0};
1✔
1178

1179
    SName name1 = {0};
1✔
1180
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
1✔
1181
    if (code < 0) {
1!
1182
      mError("failed to get db name since %s", tstrerror(code));
×
1183
      sdbRelease(pSdb, pStb);
×
1184
      TAOS_RETURN(code);
×
1185
    }
1186
    (void)tNameGetDbName(&name1, desc.database_name);
1✔
1187

1188
    SName name2 = {0};
1✔
1189
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
1✔
1190
    if (code < 0) {
1!
1191
      mError("failed to get table name since %s", tstrerror(code));
×
1192
      sdbRelease(pSdb, pStb);
×
1193
      TAOS_RETURN(code);
×
1194
    }
1195
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
1✔
1196

1197
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
2!
1198
      mError("failed to put stb into array, but continue at this monitor report");
×
1199
    }
1200
    sdbRelease(pSdb, pStb);
1✔
1201
  }
1202

1203
  // grant info
1204
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
1✔
1205
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
1✔
1206
  if (pMnode->grant.expireTimeMS == 0) {
1!
1207
    pGrantInfo->expire_time = 0;
×
1208
    pGrantInfo->timeseries_total = 0;
×
1209
  }
1210

1211
  mndReleaseRpc(pMnode);
1✔
1212
  TAOS_RETURN(code);
1✔
1213
}
1214

1215
int32_t mndResetTimer(SMnode *pMnode){
×
1216
  return syncResetTimer(pMnode->syncMgmt.sync, tsMnodeElectIntervalMs, tsMnodeHeartbeatIntervalMs);
×
1217
}
1218

1219
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
63,547✔
1220
  mTrace("mnode get load");
63,547✔
1221
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
63,547✔
1222
  pLoad->syncState = state.state;
63,547✔
1223
  pLoad->syncRestore = state.restored;
63,547✔
1224
  pLoad->syncTerm = state.term;
63,547✔
1225
  pLoad->roleTimeMs = state.roleTimeMs;
63,547✔
1226
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
63,547✔
1227
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1228
  return 0;
63,547✔
1229
}
1230

1231
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
18,520✔
1232
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
18,520✔
1233
  return state.roleTimeMs;
18,520✔
1234
}
1235

1236
void mndSetRestored(SMnode *pMnode, bool restored) {
1,929✔
1237
  if (restored) {
1,929!
1238
    (void)taosThreadRwlockWrlock(&pMnode->lock);
1,929✔
1239
    pMnode->restored = true;
1,929✔
1240
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,929✔
1241
    mInfo("mnode set restored:%d", restored);
1,929!
1242
  } else {
1243
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1244
    pMnode->restored = false;
×
1245
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1246
    mInfo("mnode set restored:%d", restored);
×
1247
    while (1) {
1248
      if (pMnode->rpcRef <= 0) break;
×
1249
      taosMsleep(3);
×
1250
    }
1251
  }
1252
}
1,929✔
1253

1254
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1255

1256
void mndSetStop(SMnode *pMnode) {
1,929✔
1257
  (void)taosThreadRwlockWrlock(&pMnode->lock);
1,929✔
1258
  pMnode->stopped = true;
1,929✔
1259
  (void)taosThreadRwlockUnlock(&pMnode->lock);
1,929✔
1260
  mInfo("mnode set stopped");
1,929!
1261
}
1,929✔
1262

1263
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
1,291,358✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc