• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #4506

15 Jul 2025 12:33AM UTC coverage: 62.026% (-0.7%) from 62.706%
#4506

push

travis-ci

web-flow
docs: update stream docs (#31874)

155391 of 320094 branches covered (48.55%)

Branch coverage included in aggregate %.

240721 of 318525 relevant lines covered (75.57%)

6529048.03 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.29
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndCluster.h"
21
#include "mndCompact.h"
22
#include "mndCompactDetail.h"
23
#include "mndSsMigrate.h"
24
#include "mndConfig.h"
25
#include "mndConsumer.h"
26
#include "mndDb.h"
27
#include "mndDnode.h"
28
#include "mndFunc.h"
29
#include "mndGrant.h"
30
#include "mndIndex.h"
31
#include "mndInfoSchema.h"
32
#include "mndMnode.h"
33
#include "mndMount.h"
34
#include "mndPerfSchema.h"
35
#include "mndPrivilege.h"
36
#include "mndProfile.h"
37
#include "mndQnode.h"
38
#include "mndQuery.h"
39
#include "mndShow.h"
40
#include "mndSma.h"
41
#include "mndSnode.h"
42
#include "mndStb.h"
43
#include "mndStream.h"
44
#include "mndSubscribe.h"
45
#include "mndSync.h"
46
#include "mndTelem.h"
47
#include "mndTopic.h"
48
#include "mndTrans.h"
49
#include "mndUser.h"
50
#include "mndVgroup.h"
51
#include "mndView.h"
52
#include "mndBnode.h"
53

54
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
11,288✔
55
  int32_t code = 0;
11,288✔
56
  (void)taosThreadRwlockRdlock(&pMnode->lock);
11,288✔
57
  if (pMnode->stopped) {
11,288!
58
    code = TSDB_CODE_APP_IS_STOPPING;
×
59
  } else if (!mndIsLeader(pMnode)) {
11,288!
60
    code = 1;
×
61
  } else {
62
#if 1
63
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
11,288✔
64
#else
65
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
66
    mTrace("mnode rpc is acquired, ref:%d", ref);
67
#endif
68
  }
69
  (void)taosThreadRwlockUnlock(&pMnode->lock);
11,288✔
70
  TAOS_RETURN(code);
11,288✔
71
}
72

73
static inline void mndReleaseRpc(SMnode *pMnode) {
690,822✔
74
  (void)taosThreadRwlockRdlock(&pMnode->lock);
690,822✔
75
#if 1
76
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
690,840✔
77
#else
78
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
79
  mTrace("mnode rpc is released, ref:%d", ref);
80
#endif
81
  (void)taosThreadRwlockUnlock(&pMnode->lock);
690,828✔
82
}
690,843✔
83

84
static void *mndBuildTimerMsg(int32_t *pContLen) {
127,687✔
85
  terrno = 0;
127,687✔
86
  SMTimerReq timerReq = {0};
127,687✔
87

88
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
127,687✔
89
  if (contLen <= 0) return NULL;
127,687!
90
  void *pReq = rpcMallocCont(contLen);
127,687✔
91
  if (pReq == NULL) return NULL;
127,687!
92

93
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
127,687!
94
    mError("failed to serialize timer msg since %s", terrstr());
×
95
  }
96
  *pContLen = contLen;
127,687✔
97
  return pReq;
127,687✔
98
}
99

100
static void mndPullupTrans(SMnode *pMnode) {
30,571✔
101
  mTrace("pullup trans msg");
30,571✔
102
  int32_t contLen = 0;
30,571✔
103
  void   *pReq = mndBuildTimerMsg(&contLen);
30,571✔
104
  if (pReq != NULL) {
30,571!
105
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
30,571✔
106
    // TODO check return value
107
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
30,571!
108
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
109
    }
110
  }
111
}
30,571✔
112

113
static void mndPullupCompacts(SMnode *pMnode) {
5,138✔
114
  mTrace("pullup compact timer msg");
5,138✔
115
  int32_t contLen = 0;
5,138✔
116
  void   *pReq = mndBuildTimerMsg(&contLen);
5,138✔
117
  if (pReq != NULL) {
5,138!
118
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
5,138✔
119
    // TODO check return value
120
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,138!
121
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
122
    }
123
  }
124
}
5,138✔
125

126
static void mndPullupTtl(SMnode *pMnode) {
5,727✔
127
  mTrace("pullup ttl");
5,727✔
128
  int32_t contLen = 0;
5,727✔
129
  void   *pReq = mndBuildTimerMsg(&contLen);
5,727✔
130
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
5,727✔
131
  // TODO check return value
132
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,727!
133
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
134
  }
135
}
5,727✔
136

137
static void mndPullupTrimDb(SMnode *pMnode) {
×
138
  mTrace("pullup trim");
×
139
  int32_t contLen = 0;
×
140
  void   *pReq = mndBuildTimerMsg(&contLen);
×
141
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
142
  // TODO check return value
143
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
144
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
145
  }
146
}
×
147

148
static void mndPullupSsMigrateDb(SMnode *pMnode) {
×
149
  if (grantCheck(TSDB_GRANT_SHARED_STORAGE) != TSDB_CODE_SUCCESS) {
×
150
    return;
×
151
  }
152

153
  mTrace("pullup ssmigrate db");
×
154
  int32_t contLen = 0;
×
155
  void   *pReq = mndBuildTimerMsg(&contLen);
×
156
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_SSMIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
157
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
158
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
159
  }
160
}
161

162
static void mndPullupQuerySsMigrateProgress(SMnode *pMnode) {
×
163
  mTrace("pullup query ssmigrate progress");
×
164
  int32_t contLen = 0;
×
165
  void   *pReq = mndBuildTimerMsg(&contLen);
×
166
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_QUERY_SSMIGRATE_PROGRESS_TIMER, .pCont = pReq, .contLen = contLen};
×
167
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
168
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
169
  }
170
}
×
171

172
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
29,741✔
173
  mTrace("pullup arb hb");
29,741✔
174
  int32_t contLen = 0;
29,741✔
175
  void   *pReq = mndBuildTimerMsg(&contLen);
29,741✔
176
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
29,741✔
177
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
29,741✔
178
}
179

180
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
19,514✔
181
  mTrace("pullup arb sync");
19,514✔
182
  int32_t contLen = 0;
19,514✔
183
  void   *pReq = mndBuildTimerMsg(&contLen);
19,514✔
184
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
19,514✔
185
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
19,514✔
186
}
187

188
static void mndCalMqRebalance(SMnode *pMnode) {
29,739✔
189
  int32_t contLen = 0;
29,739✔
190
  void   *pReq = mndBuildTimerMsg(&contLen);
29,739✔
191
  if (pReq != NULL) {
29,739!
192
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
29,739✔
193
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
29,739!
194
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
195
    }
196
  }
197
}
29,739✔
198

199
static void mndStreamCheckpointTimer(SMnode *pMnode) {
1,363✔
200
  SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg));
1,363✔
201
  if (pMsg != NULL) {
1,363!
202
    int32_t size = sizeof(SMStreamDoCheckpointMsg);
1,363✔
203
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size};
1,363✔
204
    // TODO check return value
205
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1,363!
206
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
207
    }
208
  }
209
}
1,363✔
210

211
static void mndStreamCheckNode(SMnode *pMnode) {
38✔
212
  int32_t contLen = 0;
38✔
213
  void   *pReq = mndBuildTimerMsg(&contLen);
38✔
214
  if (pReq != NULL) {
38!
215
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_NODECHECK_TIMER, .pCont = pReq, .contLen = contLen};
38✔
216
    // TODO check return value
217
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
38!
218
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
219
    }
220
  }
221
}
38✔
222

223
static void mndStreamCheckStatus(SMnode *pMnode) {
1✔
224
  int32_t contLen = 0;
1✔
225
  void   *pReq = mndBuildTimerMsg(&contLen);
1✔
226
  if (pReq != NULL) {
1!
227
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_CHECK_STREAM_TIMER, .pCont = pReq, .contLen = contLen};
1✔
228
    // TODO check return value
229
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1!
230
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
231
    }
232
  }
233
}
1✔
234

235
static void mndStreamConsensusChkpt(SMnode *pMnode) {
1,363✔
236
  int32_t contLen = 0;
1,363✔
237
  void   *pReq = mndBuildTimerMsg(&contLen);
1,363✔
238
  if (pReq != NULL) {
1,363!
239
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CONSEN_TIMER, .pCont = pReq, .contLen = contLen};
1,363✔
240
    // TODO check return value
241
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1,363!
242
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
243
    }
244
  }
245
}
1,363✔
246

247
static void mndPullupTelem(SMnode *pMnode) {
2✔
248
  mTrace("pullup telem msg");
2!
249
  int32_t contLen = 0;
2✔
250
  void   *pReq = mndBuildTimerMsg(&contLen);
2✔
251
  if (pReq != NULL) {
2!
252
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
2✔
253
    // TODO check return value
254
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
2!
255
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
256
    }
257
  }
258
}
2✔
259

260
static void mndPullupGrant(SMnode *pMnode) {
5,834✔
261
  mTrace("pullup grant msg");
5,834✔
262
  int32_t contLen = 0;
5,834✔
263
  void   *pReq = mndBuildTimerMsg(&contLen);
5,834✔
264
  if (pReq != NULL) {
5,834!
265
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
5,834✔
266
                      .pCont = pReq,
267
                      .contLen = contLen,
268
                      .info.notFreeAhandle = 1,
269
                      .info.ahandle = 0};
270
    // TODO check return value
271
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
5,834!
272
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
273
    }
274
  }
275
}
5,834✔
276

277
static void mndIncreaseUpTime(SMnode *pMnode) {
19✔
278
  mTrace("increate uptime");
19!
279
  int32_t contLen = 0;
19✔
280
  void   *pReq = mndBuildTimerMsg(&contLen);
19✔
281
  if (pReq != NULL) {
19!
282
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
19✔
283
                      .pCont = pReq,
284
                      .contLen = contLen,
285
                      .info.notFreeAhandle = 1,
286
                      .info.ahandle = 0};
287
    // TODO check return value
288
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
19!
289
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
290
    }
291
  }
292
}
19✔
293

294
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
599✔
295
  SSdb *pSdb = pMnode->pSdb;
599✔
296

297
  void *pIter = NULL;
599✔
298
  while (1) {
2,106✔
299
    SVgObj *pVgroup = NULL;
2,705✔
300
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
2,705✔
301
    if (pIter == NULL) break;
2,705✔
302

303
    bool stateChanged = false;
2,106✔
304
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
5,055✔
305
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
3,611✔
306
      if (pGid->dnodeId == dnodeId) {
3,611✔
307
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
662✔
308
          mInfo(
390!
309
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:error restored:0 "
310
              "canRead:0",
311
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
312
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
390✔
313
          pGid->syncRestore = 0;
390✔
314
          pGid->syncCanRead = 0;
390✔
315
          pGid->startTimeMs = 0;
390✔
316
          stateChanged = true;
390✔
317
        }
318
        break;
662✔
319
      }
320
    }
321

322
    if (stateChanged) {
2,106✔
323
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
390✔
324
      if (pDb != NULL && pDb->stateTs != curMs) {
390!
325
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
288!
326
              curMs);
327
        pDb->stateTs = curMs;
288✔
328
      }
329
      mndReleaseDb(pMnode, pDb);
390✔
330
    }
331

332
    sdbRelease(pSdb, pVgroup);
2,106✔
333
  }
334
}
599✔
335

336
static void mndCheckDnodeOffline(SMnode *pMnode) {
11,272✔
337
  mTrace("check dnode offline");
11,272✔
338
  if (mndAcquireRpc(pMnode) != 0) return;
11,272!
339

340
  SSdb   *pSdb = pMnode->pSdb;
11,272✔
341
  int64_t curMs = taosGetTimestampMs();
11,272✔
342

343
  void *pIter = NULL;
11,272✔
344
  while (1) {
21,033✔
345
    SDnodeObj *pDnode = NULL;
32,305✔
346
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
32,305✔
347
    if (pIter == NULL) break;
32,305✔
348

349
    bool online = mndIsDnodeOnline(pDnode, curMs);
21,033✔
350
    if (!online) {
21,033✔
351
      mInfo("dnode:%d, in offline state", pDnode->id);
599!
352
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
599✔
353
    }
354

355
    sdbRelease(pSdb, pDnode);
21,033✔
356
  }
357

358
  mndReleaseRpc(pMnode);
11,272✔
359
}
360

361
static bool mnodeIsNotLeader(SMnode *pMnode) {
65,888✔
362
  terrno = 0;
65,888✔
363
  (void)taosThreadRwlockRdlock(&pMnode->lock);
65,888✔
364
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
65,888✔
365
  if (terrno != 0) {
65,888!
366
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
367
    return true;
×
368
  }
369

370
  if (state.state != TAOS_SYNC_STATE_LEADER) {
65,888✔
371
    (void)taosThreadRwlockUnlock(&pMnode->lock);
5,228✔
372
    terrno = TSDB_CODE_SYN_NOT_LEADER;
5,228✔
373
    return true;
5,228✔
374
  }
375
  if (!state.restored || !pMnode->restored) {
60,660!
376
    (void)taosThreadRwlockUnlock(&pMnode->lock);
23✔
377
    terrno = TSDB_CODE_SYN_RESTORING;
23✔
378
    return true;
23✔
379
  }
380
  (void)taosThreadRwlockUnlock(&pMnode->lock);
60,637✔
381
  return false;
60,637✔
382
}
383

384
static int32_t minCronTime() {
×
385
  int32_t min = INT32_MAX;
×
386
  min = TMIN(min, tsTtlPushIntervalSec);
×
387
  min = TMIN(min, tsTrimVDbIntervalSec);
×
388
  min = TMIN(min, tsSsAutoMigrateIntervalSec);
×
389
  min = TMIN(min, tsTransPullupInterval);
×
390
  min = TMIN(min, tsCompactPullupInterval);
×
391
  min = TMIN(min, tsMqRebalanceInterval);
×
392
  min = TMIN(min, tsStreamCheckpointInterval);
×
393
  min = TMIN(min, tsStreamNodeCheckInterval);
×
394
  min = TMIN(min, tsArbHeartBeatIntervalSec);
×
395
  min = TMIN(min, tsArbCheckSyncIntervalSec);
×
396

397
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
×
398
  min = TMIN(min, telemInt);
×
399
  min = TMIN(min, tsGrantHBInterval);
×
400
  min = TMIN(min, tsUptimeInterval);
×
401

402
  return min <= 1 ? 2 : min;
×
403
}
404
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
60,637✔
405
  int32_t code = 0;
60,637✔
406
#ifndef TD_ASTRA
407
  if (sec % tsGrantHBInterval == 0) {  // put in the 1st place as to take effect ASAP
60,637✔
408
    mndPullupGrant(pMnode);
5,834✔
409
  }
410
  if (sec % tsTtlPushIntervalSec == 0) {
60,637✔
411
    mndPullupTtl(pMnode);
5,727✔
412
  }
413

414
  if (sec % tsTrimVDbIntervalSec == 0) {
60,637!
415
    mndPullupTrimDb(pMnode);
×
416
  }
417
#endif
418
#ifdef USE_SHARED_STORAGE
419
  if (tsSsEnabled) {
60,637!
420
    if (sec % 10 == 0) { // TODO: make 10 to be configurable
×
421
      mndPullupQuerySsMigrateProgress(pMnode);
×
422
    }
423
    if (tsSsEnabled == 2 && sec % tsSsAutoMigrateIntervalSec == 0) {
×
424
      mndPullupSsMigrateDb(pMnode);
×
425
    }
426
  }
427
#endif
428
  if (sec % tsTransPullupInterval == 0) {
60,637✔
429
    mndPullupTrans(pMnode);
30,571✔
430
  }
431

432
  if (sec % tsCompactPullupInterval == 0) {
60,637✔
433
    mndPullupCompacts(pMnode);
5,138✔
434
  }
435
#ifdef USE_TOPIC
436
  if (sec % tsMqRebalanceInterval == 0) {
60,637✔
437
    mndCalMqRebalance(pMnode);
29,739✔
438
  }
439
#endif
440
#ifdef USE_STREAM
441
  if (sec % 30 == 0) {  // send the checkpoint info every 30 sec
60,637✔
442
    mndStreamCheckpointTimer(pMnode);
1,363✔
443
  }
444

445
  if (sec % tsStreamNodeCheckInterval == 0) {
60,637✔
446
    mndStreamCheckNode(pMnode);
38✔
447
  }
448

449
  if (sec % (tsStreamFailedTimeout / 1000) == 0) {
60,637✔
450
    mndStreamCheckStatus(pMnode);
1✔
451
  }
452

453
  if (sec % 30 == 0) {
60,637✔
454
    mndStreamConsensusChkpt(pMnode);
1,363✔
455
  }
456

457
  if (tsTelemInterval > 0 && sec % tsTelemInterval == 0) {
60,637!
458
    mndPullupTelem(pMnode);
2✔
459
  }
460
#endif
461
  if (sec % tsUptimeInterval == 0) {
60,637✔
462
    mndIncreaseUpTime(pMnode);
19✔
463
  }
464
#ifndef TD_ASTRA
465
  if (sec % (tsArbHeartBeatIntervalSec) == 0) {
60,637✔
466
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
29,741!
467
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
468
    }
469
  }
470

471
  if (sec % (tsArbCheckSyncIntervalSec) == 0) {
60,637✔
472
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
19,514!
473
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
474
    }
475
  }
476
#endif
477
}
60,637✔
478
void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) {
60,637✔
479
  if (sec % (tsStatusInterval * 5) == 0) {
60,637✔
480
    mndCheckDnodeOffline(pMnode);
11,272✔
481
  }
482
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
60,637✔
483
    mndSyncCheckTimeout(pMnode);
1,363✔
484
  }
485
}
60,637✔
486

487
static void *mndThreadFp(void *param) {
2,475✔
488
  SMnode *pMnode = param;
2,475✔
489
  int64_t lastTime = 0;
2,475✔
490
  setThreadName("mnode-timer");
2,475✔
491

492
  while (1) {
670,747✔
493
    lastTime++;
673,222✔
494
    taosMsleep(100);
673,222✔
495

496
    if (mndGetStop(pMnode)) break;
673,222✔
497
    if (lastTime % 10 != 0) continue;
670,747✔
498

499
    if (mnodeIsNotLeader(pMnode)) {
65,888✔
500
      mTrace("timer not process since mnode is not leader");
5,251!
501
      continue;
5,251✔
502
    }
503

504
    int64_t sec = lastTime / 10;
60,637✔
505
    mndDoTimerCheckTask(pMnode, sec);
60,637✔
506

507
    mndDoTimerPullupTask(pMnode, sec);
60,637✔
508
  }
509

510
  return NULL;
2,475✔
511
}
512

513
static int32_t mndInitTimer(SMnode *pMnode) {
2,475✔
514
  int32_t      code = 0;
2,475✔
515
  TdThreadAttr thAttr;
516
  (void)taosThreadAttrInit(&thAttr);
2,475✔
517
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
2,475✔
518
#ifdef TD_COMPACT_OS
519
  (void)taosThreadAttrSetStackSize(&thAttr, STACK_SIZE_SMALL);
520
#endif
521
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadFp, pMnode)) != 0) {
2,475!
522
    mError("failed to create timer thread since %s", tstrerror(code));
×
523
    TAOS_RETURN(code);
×
524
  }
525

526
  (void)taosThreadAttrDestroy(&thAttr);
2,475✔
527
  tmsgReportStartup("mnode-timer", "initialized");
2,475✔
528
  TAOS_RETURN(code);
2,475✔
529
}
530

531
static void mndCleanupTimer(SMnode *pMnode) {
2,475✔
532
  if (taosCheckPthreadValid(pMnode->thread)) {
2,475!
533
    (void)taosThreadJoin(pMnode->thread, NULL);
2,475✔
534
    taosThreadClear(&pMnode->thread);
2,475✔
535
  }
536
}
2,475✔
537

538
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
2,476✔
539
  int32_t code = 0;
2,476✔
540
  pMnode->path = taosStrdup(path);
2,476!
541
  if (pMnode->path == NULL) {
2,476!
542
    code = terrno;
×
543
    TAOS_RETURN(code);
×
544
  }
545

546
  if (taosMkDir(pMnode->path) != 0) {
2,476!
547
    code = terrno;
×
548
    TAOS_RETURN(code);
×
549
  }
550

551
  TAOS_RETURN(code);
2,476✔
552
}
553

554
static int32_t mndInitWal(SMnode *pMnode) {
2,476✔
555
  int32_t code = 0;
2,476✔
556
  char    path[PATH_MAX + 20] = {0};
2,476✔
557
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
2,476✔
558
  SWalCfg cfg = {.vgId = 1,
2,476✔
559
                 .fsyncPeriod = 0,
560
                 .rollPeriod = -1,
561
                 .segSize = -1,
562
                 .committed = -1,
563
                 .retentionPeriod = 0,
564
                 .retentionSize = 0,
565
                 .level = TAOS_WAL_FSYNC,
566
                 .encryptAlgorithm = 0,
567
                 .encryptKey = {0}};
568

569
#if defined(TD_ENTERPRISE) || defined(TD_ASTRA_TODO)
570
  if (tsiEncryptAlgorithm == DND_CA_SM4 && (tsiEncryptScope & DND_CS_MNODE_WAL) == DND_CS_MNODE_WAL) {
2,476!
571
    cfg.encryptAlgorithm = (tsiEncryptScope & DND_CS_MNODE_WAL) ? tsiEncryptAlgorithm : 0;
×
572
    if (tsEncryptKey[0] == '\0') {
×
573
      code = TSDB_CODE_DNODE_INVALID_ENCRYPTKEY;
×
574
      TAOS_RETURN(code);
×
575
    } else {
576
      tstrncpy(cfg.encryptKey, tsEncryptKey, ENCRYPT_KEY_LEN + 1);
×
577
    }
578
  }
579
#endif
580

581
  pMnode->pWal = walOpen(path, &cfg);
2,476✔
582
  if (pMnode->pWal == NULL) {
2,476!
583
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
584
    if (terrno != 0) code = terrno;
×
585
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
586
    TAOS_RETURN(code);
×
587
  }
588

589
  TAOS_RETURN(code);
2,476✔
590
}
591

592
static void mndCloseWal(SMnode *pMnode) {
2,475✔
593
  if (pMnode->pWal != NULL) {
2,475!
594
    walClose(pMnode->pWal);
2,475✔
595
    pMnode->pWal = NULL;
2,475✔
596
  }
597
}
2,475✔
598

599
static int32_t mndInitSdb(SMnode *pMnode) {
2,476✔
600
  int32_t code = 0;
2,476✔
601
  SSdbOpt opt = {0};
2,476✔
602
  opt.path = pMnode->path;
2,476✔
603
  opt.pMnode = pMnode;
2,476✔
604
  opt.pWal = pMnode->pWal;
2,476✔
605

606
  pMnode->pSdb = sdbInit(&opt);
2,476✔
607
  if (pMnode->pSdb == NULL) {
2,476!
608
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
609
    if (terrno != 0) code = terrno;
×
610
    TAOS_RETURN(code);
×
611
  }
612

613
  TAOS_RETURN(code);
2,476✔
614
}
615

616
static int32_t mndOpenSdb(SMnode *pMnode) {
2,476✔
617
  int32_t code = 0;
2,476✔
618
  if (!pMnode->deploy) {
2,476✔
619
    code = sdbReadFile(pMnode->pSdb);
606✔
620
  }
621

622
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
2,476!
623

624
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
2,476✔
625
  return code;
2,476✔
626
}
627

628
static void mndCleanupSdb(SMnode *pMnode) {
2,475✔
629
  if (pMnode->pSdb) {
2,475!
630
    sdbCleanup(pMnode->pSdb);
2,475✔
631
    pMnode->pSdb = NULL;
2,475✔
632
  }
633
}
2,475✔
634

635
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
99,040✔
636
  SMnodeStep step = {0};
99,040✔
637
  step.name = name;
99,040✔
638
  step.initFp = initFp;
99,040✔
639
  step.cleanupFp = cleanupFp;
99,040✔
640
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
198,080!
641
    TAOS_RETURN(terrno);
×
642
  }
643

644
  TAOS_RETURN(0);
99,040✔
645
}
646

647
static int32_t mndInitSteps(SMnode *pMnode) {
2,476✔
648
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
2,476!
649
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
2,476!
650
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
2,476!
651
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
2,476!
652
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
2,476!
653
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
2,476!
654
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
2,476!
655
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
2,476!
656
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-bnode", mndInitBnode, mndCleanupBnode));
2,476!
657
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
2,476!
658
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
2,476!
659
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
2,476!
660
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
2,476!
661
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
2,476!
662
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
2,476!
663
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
2,476!
664
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
2,476!
665
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
2,476!
666
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
2,476!
667
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
2,476!
668
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
2,476!
669
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
2,476!
670
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
2,476!
671
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
2,476!
672
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
2,476!
673
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
2,476!
674
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
2,476!
675
#ifdef USE_MOUNT
676
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount", mndInitMount, mndCleanupMount));
2,476!
677
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount-log", mndInitMountLog, mndCleanupMountLog));
2,476!
678
#endif
679
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
2,476!
680
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
2,476!
681
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
2,476!
682
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
2,476!
683
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-ssmigrate", mndInitSsMigrate, mndCleanupSsMigrate));
2,476!
684
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
2,476!
685
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
2,476!
686
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
2,476!
687
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
2,476!
688
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
2,476!
689
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
2,476!
690
  return 0;
2,476✔
691
}
692

693
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
2,475✔
694
  if (pMnode->pSteps == NULL) return;
2,475!
695

696
  if (pos == -1) {
2,475!
697
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
2,475✔
698
  }
699

700
  for (int32_t s = pos; s >= 0; s--) {
101,475✔
701
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
99,000✔
702
    mInfo("%s will cleanup", pStep->name);
99,000!
703
    if (pStep->cleanupFp != NULL) {
99,000✔
704
      (*pStep->cleanupFp)(pMnode);
94,050✔
705
    }
706
  }
707

708
  taosArrayClear(pMnode->pSteps);
2,475✔
709
  taosArrayDestroy(pMnode->pSteps);
2,475✔
710
  pMnode->pSteps = NULL;
2,475✔
711
}
712

713
static int32_t mndExecSteps(SMnode *pMnode) {
2,476✔
714
  int32_t code = 0;
2,476✔
715
  int32_t size = taosArrayGetSize(pMnode->pSteps);
2,476✔
716
  for (int32_t pos = 0; pos < size; pos++) {
101,516✔
717
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
99,040✔
718
    if (pStep->initFp == NULL) continue;
99,040!
719

720
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
99,040!
721
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
722
      mndCleanupSteps(pMnode, pos);
×
723
      TAOS_RETURN(code);
×
724
    } else {
725
      mInfo("%s is initialized", pStep->name);
99,040!
726
      tmsgReportStartup(pStep->name, "initialized");
99,040✔
727
    }
728
  }
729

730
  pMnode->clusterId = mndGetClusterId(pMnode);
2,476✔
731
  TAOS_RETURN(0);
2,476✔
732
}
733

734
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
2,476✔
735
  pMnode->msgCb = pOption->msgCb;
2,476✔
736
  pMnode->selfDnodeId = pOption->dnodeId;
2,476✔
737
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
2,476✔
738
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
2,476✔
739
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
2,476✔
740
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
2,476✔
741
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
2,476✔
742
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
2,476✔
743
}
2,476✔
744

745
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
2,476✔
746
  terrno = 0;
2,476✔
747
  mInfo("start to open mnode in %s", path);
2,476!
748

749
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
2,476!
750
  if (pMnode == NULL) {
2,476!
751
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
752
    mError("failed to open mnode since %s", terrstr());
×
753
    return NULL;
×
754
  }
755
  (void)memset(pMnode, 0, sizeof(SMnode));
2,476✔
756

757
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
2,476✔
758
  if (code != 0) {
2,476!
759
    taosMemoryFree(pMnode);
×
760
    mError("failed to open mnode lock since %s", tstrerror(code));
×
761
    return NULL;
×
762
  }
763

764
  char timestr[24] = "1970-01-01 00:00:00.00";
2,476✔
765
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, NULL);
2,476✔
766
  if (code < 0) {
2,476!
767
    mError("failed to parse time since %s", tstrerror(code));
×
768
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
769
    taosMemoryFree(pMnode);
×
770
    return NULL;
×
771
  }
772
  mndSetOptions(pMnode, pOption);
2,476✔
773

774
  pMnode->deploy = pOption->deploy;
2,476✔
775
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
2,476✔
776
  if (pMnode->pSteps == NULL) {
2,476!
777
    taosMemoryFree(pMnode);
×
778
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
779
    mError("failed to open mnode since %s", terrstr());
×
780
    return NULL;
×
781
  }
782

783
  code = mndCreateDir(pMnode, path);
2,476✔
784
  if (code != 0) {
2,476!
785
    code = terrno;
×
786
    mError("failed to open mnode since %s", tstrerror(code));
×
787
    mndClose(pMnode);
×
788
    terrno = code;
×
789
    return NULL;
×
790
  }
791

792
  code = mndInitSteps(pMnode);
2,476✔
793
  if (code != 0) {
2,476!
794
    code = terrno;
×
795
    mError("failed to open mnode since %s", tstrerror(code));
×
796
    mndClose(pMnode);
×
797
    terrno = code;
×
798
    return NULL;
×
799
  }
800

801
  code = mndExecSteps(pMnode);
2,476✔
802
  if (code != 0) {
2,476!
803
    code = terrno;
×
804
    mError("failed to open mnode since %s", tstrerror(code));
×
805
    mndClose(pMnode);
×
806
    terrno = code;
×
807
    return NULL;
×
808
  }
809

810
  mInfo("mnode open successfully");
2,476!
811
  return pMnode;
2,476✔
812
}
813

814
void mndPreClose(SMnode *pMnode) {
2,475✔
815
  if (pMnode != NULL) {
2,475!
816
    int32_t code = 0;
2,475✔
817
    // TODO check return value
818
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
2,475✔
819
    if (code < 0) {
2,475✔
820
      mError("failed to transfer leader since %s", tstrerror(code));
2!
821
    }
822
    syncPreStop(pMnode->syncMgmt.sync);
2,475✔
823
    code = sdbWriteFile(pMnode->pSdb, 0);
2,475✔
824
    if (code < 0) {
2,475!
825
      mError("failed to write sdb since %s", tstrerror(code));
×
826
    }
827
  }
828
}
2,475✔
829

830
void mndClose(SMnode *pMnode) {
2,475✔
831
  if (pMnode != NULL) {
2,475!
832
    mInfo("start to close mnode");
2,475!
833
    mndCleanupSteps(pMnode, -1);
2,475✔
834
    taosMemoryFreeClear(pMnode->path);
2,475!
835
    taosMemoryFreeClear(pMnode);
2,475!
836
    mInfo("mnode is closed");
2,475!
837
  }
838
}
2,475✔
839

840
int32_t mndStart(SMnode *pMnode) {
2,475✔
841
  mndSyncStart(pMnode);
2,475✔
842
  if (pMnode->deploy) {
2,475✔
843
    if (sdbDeploy(pMnode->pSdb) != 0) {
1,870!
844
      mError("failed to deploy sdb while start mnode");
×
845
      return -1;
×
846
    }
847
    mndSetRestored(pMnode, true);
1,870✔
848
  }
849
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
2,475✔
850

851
  return mndInitTimer(pMnode);
2,475✔
852
}
853

854
int32_t mndIsCatchUp(SMnode *pMnode) {
1,068✔
855
  int64_t rid = pMnode->syncMgmt.sync;
1,068✔
856
  return syncIsCatchUp(rid);
1,068✔
857
}
858

859
ESyncRole mndGetRole(SMnode *pMnode) {
1,068✔
860
  int64_t rid = pMnode->syncMgmt.sync;
1,068✔
861
  return syncGetRole(rid);
1,068✔
862
}
863

864
int64_t mndGetTerm(SMnode *pMnode) {
19,587✔
865
  int64_t rid = pMnode->syncMgmt.sync;
19,587✔
866
  return syncGetTerm(rid);
19,587✔
867
}
868

869
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
49,331✔
870

871
void mndStop(SMnode *pMnode) {
2,475✔
872
  mndSetStop(pMnode);
2,475✔
873
  mndSyncStop(pMnode);
2,475✔
874
  mndCleanupTimer(pMnode);
2,475✔
875
}
2,475✔
876

877
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
190,964✔
878
  SMnode    *pMnode = pMsg->info.node;
190,964✔
879
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
190,964✔
880

881
  const STraceId *trace = &pMsg->info.traceId;
190,964✔
882
  mGTrace("vgId:1, process sync msg:%p, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
190,964!
883

884
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
190,964✔
885
  if (code != 0) {
190,963✔
886
    mGError("vgId:1, failed to process sync msg:%p type:%s since %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
6!
887
            tstrerror(code), code);
888
  }
889

890
  return code;
190,963✔
891
}
892

893
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
707,105✔
894
  int32_t code = 0;
707,105✔
895
  if (!IsReq(pMsg)) TAOS_RETURN(code);
707,105✔
896
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
605,637✔
897
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
586,214!
898
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
572,307✔
899
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
533,589✔
900
    TAOS_RETURN(code);
72,058✔
901
  }
902

903
  SMnode *pMnode = pMsg->info.node;
533,579✔
904
  (void)taosThreadRwlockRdlock(&pMnode->lock);
533,579✔
905
  if (pMnode->stopped) {
534,188✔
906
    (void)taosThreadRwlockUnlock(&pMnode->lock);
65✔
907
    code = TSDB_CODE_APP_IS_STOPPING;
65✔
908
    TAOS_RETURN(code);
65✔
909
  }
910

911
  terrno = 0;
534,123✔
912
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
533,758✔
913
  if (terrno != 0) {
534,194!
914
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
915
    code = terrno;
×
916
    TAOS_RETURN(code);
×
917
  }
918

919
  if (state.state != TAOS_SYNC_STATE_LEADER) {
534,182✔
920
    (void)taosThreadRwlockUnlock(&pMnode->lock);
26,713✔
921
    code = TSDB_CODE_SYN_NOT_LEADER;
26,713✔
922
    goto _OVER;
26,713✔
923
  }
924

925
  if (!state.restored || !pMnode->restored) {
507,469✔
926
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,631✔
927
    code = TSDB_CODE_SYN_RESTORING;
1,634✔
928
    goto _OVER;
1,634✔
929
  }
930

931
#if 1
932
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
505,838✔
933
#else
934
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
935
  mTrace("mnode rpc is acquired, ref:%d", ref);
936
#endif
937

938
  (void)taosThreadRwlockUnlock(&pMnode->lock);
505,809✔
939
  TAOS_RETURN(code);
505,823✔
940

941
_OVER:
28,347✔
942
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
28,347!
943
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
28,344!
944
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
28,347!
945
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
28,347!
946
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
28,347!
947
      pMsg->msgType == TDMT_MND_SSMIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
28,347!
948
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER || pMsg->msgType == TDMT_MND_CHECK_STREAM_TIMER ||
28,347!
949
      pMsg->msgType == TDMT_MND_QUERY_SSMIGRATE_PROGRESS_TIMER) {
28,347✔
950
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
1!
951
           pMnode->stopped, state.restored, syncStr(state.state));
952
    TAOS_RETURN(code);
1✔
953
  }
954

955
  const STraceId *trace = &pMsg->info.traceId;
28,346✔
956
  SEpSet          epSet = {0};
28,346✔
957
  mndGetMnodeEpSet(pMnode, &epSet);
28,346✔
958

959
  mGDebug(
28,345!
960
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
961
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
962
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
963
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
964

965
  if (epSet.numOfEps <= 0) return -1;
28,346!
966

967
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
109,828✔
968
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
81,482✔
969
  }
970

971
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
28,346✔
972
  pMsg->info.rsp = rpcMallocCont(contLen);
28,347✔
973
  if (pMsg->info.rsp != NULL) {
28,345!
974
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
28,345!
975
      mError("failed to serialize ep set");
×
976
    }
977
    pMsg->info.hasEpSet = 1;
28,346✔
978
    pMsg->info.rspLen = contLen;
28,346✔
979
  }
980

981
  TAOS_RETURN(code);
28,346✔
982
}
983

984
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
707,053✔
985
  SMnode         *pMnode = pMsg->info.node;
707,053✔
986
  const STraceId *trace = &pMsg->info.traceId;
707,053✔
987
  int32_t         code = TSDB_CODE_SUCCESS;
707,053✔
988

989
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
707,053✔
990
  MndMsgFpExt fpExt = NULL;
707,053✔
991
  if (fp == NULL) {
707,053✔
992
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
72,250✔
993
    if (fpExt == NULL) {
72,250!
994
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
995
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
996
      TAOS_RETURN(code);
×
997
    }
998
  }
999

1000
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
707,053✔
1001

1002
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
679,351!
1003
  if (fp)
679,351✔
1004
    code = (*fp)(pMsg);
607,102✔
1005
  else
1006
    code = (*fpExt)(pMsg, pQueueInfo);
72,249✔
1007
  mndReleaseRpc(pMnode);
679,542✔
1008

1009
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
679,555✔
1010
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
99,102!
1011
  } else if (code == 0) {
580,453✔
1012
    mGTrace("msg:%p, successfully processed", pMsg);
572,008!
1013
  } else {
1014
    // TODO removve this wrong set code
1015
    if (code == -1) {
8,445✔
1016
      code = terrno;
8✔
1017
    }
1018
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
8,445!
1019
            TMSG_INFO(pMsg->msgType));
1020
  }
1021

1022
  TAOS_RETURN(code);
679,554✔
1023
}
1024

1025
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
490,248✔
1026
  tmsg_t type = TMSG_INDEX(msgType);
490,248✔
1027
  if (type < TDMT_MAX) {
490,248!
1028
    pMnode->msgFp[type] = fp;
490,248✔
1029
  }
1030
}
490,248✔
1031

1032
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
19,808✔
1033
  tmsg_t type = TMSG_INDEX(msgType);
19,808✔
1034
  if (type < TDMT_MAX) {
19,808!
1035
    pMnode->msgFpExt[type] = fp;
19,808✔
1036
  }
1037
}
19,808✔
1038

1039
// Note: uid 0 is reserved
1040
int64_t mndGenerateUid(const char *name, int32_t len) {
50,875✔
1041
  int32_t hashval = MurmurHash3_32(name, len);
50,875✔
1042
  do {
×
1043
    int64_t us = taosGetTimestampUs();
50,877✔
1044
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
50,877✔
1045
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
50,877✔
1046
    if (uuid) {
50,878!
1047
      return llabs(uuid);
50,878✔
1048
    }
1049
  } while (true);
1050
}
1051

1052
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
16✔
1053
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
1054
  int32_t code = mndAcquireRpc(pMnode);
16✔
1055
  if (code < 0) {
16!
1056
    TAOS_RETURN(code);
×
1057
  } else if (code == 1) {
16!
1058
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
1059
  }
1060

1061
  SSdb   *pSdb = pMnode->pSdb;
16✔
1062
  int64_t ms = taosGetTimestampMs();
16✔
1063

1064
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
16✔
1065
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
16✔
1066
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
16✔
1067
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
16✔
1068
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
16!
1069
      pStbInfo->stbs == NULL) {
16!
1070
    mndReleaseRpc(pMnode);
×
1071
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1072
    if (terrno != 0) code = terrno;
×
1073
    TAOS_RETURN(code);
×
1074
  }
1075

1076
  // cluster info
1077
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
16✔
1078
  pClusterInfo->monitor_interval = tsMonitorInterval;
16✔
1079
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
16✔
1080
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
16✔
1081
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
16✔
1082
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
16✔
1083
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
16✔
1084

1085
  void *pIter = NULL;
16✔
1086
  while (1) {
16✔
1087
    SDnodeObj *pObj = NULL;
32✔
1088
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
32✔
1089
    if (pIter == NULL) break;
32✔
1090

1091
    SMonDnodeDesc desc = {0};
16✔
1092
    desc.dnode_id = pObj->id;
16✔
1093
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
16✔
1094
    if (mndIsDnodeOnline(pObj, ms)) {
16✔
1095
      tstrncpy(desc.status, "ready", sizeof(desc.status));
15✔
1096
    } else {
1097
      tstrncpy(desc.status, "offline", sizeof(desc.status));
1✔
1098
    }
1099
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
32!
1100
      mError("failed put dnode into array, but continue at this monitor report")
×
1101
    }
1102
    sdbRelease(pSdb, pObj);
16✔
1103
  }
1104

1105
  pIter = NULL;
16✔
1106
  while (1) {
16✔
1107
    SMnodeObj *pObj = NULL;
32✔
1108
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
32✔
1109
    if (pIter == NULL) break;
32✔
1110

1111
    SMonMnodeDesc desc = {0};
16✔
1112
    desc.mnode_id = pObj->id;
16✔
1113
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
16✔
1114

1115
    if (pObj->id == pMnode->selfDnodeId) {
16!
1116
      pClusterInfo->first_ep_dnode_id = pObj->id;
16✔
1117
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
16✔
1118
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1119
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
16✔
1120
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1121
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
16✔
1122
      desc.syncState = TAOS_SYNC_STATE_LEADER;
16✔
1123
    } else {
1124
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1125
      desc.syncState = pObj->syncState;
×
1126
    }
1127
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
32!
1128
      mError("failed to put mnode into array, but continue at this monitor report");
×
1129
    }
1130
    sdbRelease(pSdb, pObj);
16✔
1131
  }
1132

1133
  // vgroup info
1134
  pIter = NULL;
16✔
1135
  while (1) {
38✔
1136
    SVgObj *pVgroup = NULL;
54✔
1137
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
54✔
1138
    if (pIter == NULL) break;
54✔
1139

1140
    if (pVgroup->mountVgId) {
38!
1141
      sdbRelease(pSdb, pVgroup);
×
1142
      continue;
×
1143
    }
1144

1145
    pClusterInfo->vgroups_total++;
38✔
1146
    pClusterInfo->tbs_total += pVgroup->numOfTables;
38✔
1147

1148
    SMonVgroupDesc desc = {0};
38✔
1149
    desc.vgroup_id = pVgroup->vgId;
38✔
1150

1151
    SName name = {0};
38✔
1152
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
38✔
1153
    if (code < 0) {
38!
1154
      mError("failed to get db name since %s", tstrerror(code));
×
1155
      sdbRelease(pSdb, pVgroup);
×
1156
      TAOS_RETURN(code);
×
1157
    }
1158
    (void)tNameGetDbName(&name, desc.database_name);
38✔
1159

1160
    desc.tables_num = pVgroup->numOfTables;
38✔
1161
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
38✔
1162
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
38✔
1163
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
76✔
1164
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
38✔
1165
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
38✔
1166
      pVnDesc->dnode_id = pVgid->dnodeId;
38✔
1167
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
38✔
1168
      pVnDesc->syncState = pVgid->syncState;
38✔
1169
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
38!
1170
        tstrncpy(desc.status, "ready", sizeof(desc.status));
38✔
1171
        pClusterInfo->vgroups_alive++;
38✔
1172
      }
1173
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
38!
1174
        pClusterInfo->vnodes_alive++;
38✔
1175
      }
1176
      pClusterInfo->vnodes_total++;
38✔
1177
    }
1178

1179
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
76!
1180
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1181
    }
1182
    sdbRelease(pSdb, pVgroup);
38✔
1183
  }
1184

1185
  // stb info
1186
  pIter = NULL;
16✔
1187
  while (1) {
15✔
1188
    SStbObj *pStb = NULL;
31✔
1189
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
31✔
1190
    if (pIter == NULL) break;
31✔
1191

1192
    SMonStbDesc desc = {0};
15✔
1193

1194
    SName name1 = {0};
15✔
1195
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
15✔
1196
    if (code < 0) {
15!
1197
      mError("failed to get db name since %s", tstrerror(code));
×
1198
      sdbRelease(pSdb, pStb);
×
1199
      TAOS_RETURN(code);
×
1200
    }
1201
    (void)tNameGetDbName(&name1, desc.database_name);
15✔
1202

1203
    SName name2 = {0};
15✔
1204
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
15✔
1205
    if (code < 0) {
15!
1206
      mError("failed to get table name since %s", tstrerror(code));
×
1207
      sdbRelease(pSdb, pStb);
×
1208
      TAOS_RETURN(code);
×
1209
    }
1210
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
15✔
1211

1212
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
30!
1213
      mError("failed to put stb into array, but continue at this monitor report");
×
1214
    }
1215
    sdbRelease(pSdb, pStb);
15✔
1216
  }
1217

1218
  // grant info
1219
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
16✔
1220
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
16✔
1221
  if (pMnode->grant.expireTimeMS == 0) {
16!
1222
    pGrantInfo->expire_time = 0;
×
1223
    pGrantInfo->timeseries_total = 0;
×
1224
  }
1225

1226
  mndReleaseRpc(pMnode);
16✔
1227
  TAOS_RETURN(code);
16✔
1228
}
1229

1230
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
65,906✔
1231
  mTrace("mnode get load");
65,906✔
1232
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
65,906✔
1233
  pLoad->syncState = state.state;
65,906✔
1234
  pLoad->syncRestore = state.restored;
65,906✔
1235
  pLoad->syncTerm = state.term;
65,906✔
1236
  pLoad->roleTimeMs = state.roleTimeMs;
65,906✔
1237
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
65,906✔
1238
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1239
  return 0;
65,906✔
1240
}
1241

1242
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
19,514✔
1243
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
19,514✔
1244
  return state.roleTimeMs;
19,514✔
1245
}
1246

1247
void mndSetRestored(SMnode *pMnode, bool restored) {
2,475✔
1248
  if (restored) {
2,475!
1249
    (void)taosThreadRwlockWrlock(&pMnode->lock);
2,475✔
1250
    pMnode->restored = true;
2,475✔
1251
    (void)taosThreadRwlockUnlock(&pMnode->lock);
2,475✔
1252
    mInfo("mnode set restored:%d", restored);
2,475!
1253
  } else {
1254
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1255
    pMnode->restored = false;
×
1256
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1257
    mInfo("mnode set restored:%d", restored);
×
1258
    while (1) {
1259
      if (pMnode->rpcRef <= 0) break;
×
1260
      taosMsleep(3);
×
1261
    }
1262
  }
1263
}
2,475✔
1264

1265
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1266

1267
void mndSetStop(SMnode *pMnode) {
2,475✔
1268
  (void)taosThreadRwlockWrlock(&pMnode->lock);
2,475✔
1269
  pMnode->stopped = true;
2,475✔
1270
  (void)taosThreadRwlockUnlock(&pMnode->lock);
2,475✔
1271
  mInfo("mnode set stopped");
2,475!
1272
}
2,475✔
1273

1274
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
673,222✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc