• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In
Build has been canceled!

taosdata / TDengine / #3847

11 Apr 2025 06:14AM UTC coverage: 62.612% (+0.2%) from 62.398%
#3847

push

travis-ci

web-flow
Merge pull request #30758 from taosdata/merge/mainto3.0

merge: from main to 3.0 branch

154571 of 315259 branches covered (49.03%)

Branch coverage included in aggregate %.

63 of 80 new or added lines in 9 files covered. (78.75%)

946 existing lines in 106 files now uncovered.

240135 of 315138 relevant lines covered (76.2%)

19768383.08 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.46
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndCluster.h"
21
#include "mndCompact.h"
22
#include "mndCompactDetail.h"
23
#include "mndConfig.h"
24
#include "mndConsumer.h"
25
#include "mndDb.h"
26
#include "mndDnode.h"
27
#include "mndFunc.h"
28
#include "mndGrant.h"
29
#include "mndIndex.h"
30
#include "mndInfoSchema.h"
31
#include "mndMnode.h"
32
#include "mndPerfSchema.h"
33
#include "mndPrivilege.h"
34
#include "mndProfile.h"
35
#include "mndQnode.h"
36
#include "mndQuery.h"
37
#include "mndShow.h"
38
#include "mndSma.h"
39
#include "mndSnode.h"
40
#include "mndStb.h"
41
#include "mndStream.h"
42
#include "mndSubscribe.h"
43
#include "mndSync.h"
44
#include "mndTelem.h"
45
#include "mndTopic.h"
46
#include "mndTrans.h"
47
#include "mndUser.h"
48
#include "mndVgroup.h"
49
#include "mndView.h"
50

51
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
15,701✔
52
  int32_t code = 0;
15,701✔
53
  (void)taosThreadRwlockRdlock(&pMnode->lock);
15,701✔
54
  if (pMnode->stopped) {
15,701!
55
    code = TSDB_CODE_APP_IS_STOPPING;
×
56
  } else if (!mndIsLeader(pMnode)) {
15,701!
57
    code = 1;
×
58
  } else {
59
#if 1
60
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
15,701✔
61
#else
62
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
63
    mTrace("mnode rpc is acquired, ref:%d", ref);
64
#endif
65
  }
66
  (void)taosThreadRwlockUnlock(&pMnode->lock);
15,701✔
67
  TAOS_RETURN(code);
15,701✔
68
}
69

70
static inline void mndReleaseRpc(SMnode *pMnode) {
4,908,772✔
71
  (void)taosThreadRwlockRdlock(&pMnode->lock);
4,908,772✔
72
#if 1
73
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
4,909,445✔
74
#else
75
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
76
  mTrace("mnode rpc is released, ref:%d", ref);
77
#endif
78
  (void)taosThreadRwlockUnlock(&pMnode->lock);
4,909,544✔
79
}
4,909,478✔
80

81
static void *mndBuildTimerMsg(int32_t *pContLen) {
188,035✔
82
  terrno = 0;
188,035✔
83
  SMTimerReq timerReq = {0};
188,035✔
84

85
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
188,035✔
86
  if (contLen <= 0) return NULL;
188,035!
87
  void *pReq = rpcMallocCont(contLen);
188,035✔
88
  if (pReq == NULL) return NULL;
188,035!
89

90
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
188,035!
91
    mError("failed to serialize timer msg since %s", terrstr());
×
92
  }
93
  *pContLen = contLen;
188,035✔
94
  return pReq;
188,035✔
95
}
96

97
static void mndPullupTrans(SMnode *pMnode) {
41,102✔
98
  mTrace("pullup trans msg");
41,102✔
99
  int32_t contLen = 0;
41,102✔
100
  void   *pReq = mndBuildTimerMsg(&contLen);
41,102✔
101
  if (pReq != NULL) {
41,102!
102
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
41,102✔
103
    // TODO check return value
104
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
41,102!
105
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
106
    }
107
  }
108
}
41,102✔
109

110
static void mndPullupCompacts(SMnode *pMnode) {
7,609✔
111
  mTrace("pullup compact timer msg");
7,609✔
112
  int32_t contLen = 0;
7,609✔
113
  void   *pReq = mndBuildTimerMsg(&contLen);
7,609✔
114
  if (pReq != NULL) {
7,609!
115
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
7,609✔
116
    // TODO check return value
117
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
7,609!
118
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
119
    }
120
  }
121
}
7,609✔
122

123
static void mndPullupTtl(SMnode *pMnode) {
8,302✔
124
  mTrace("pullup ttl");
8,302✔
125
  int32_t contLen = 0;
8,302✔
126
  void   *pReq = mndBuildTimerMsg(&contLen);
8,302✔
127
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
8,302✔
128
  // TODO check return value
129
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
8,302!
130
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
131
  }
132
}
8,302✔
133

134
static void mndPullupTrimDb(SMnode *pMnode) {
6✔
135
  mTrace("pullup s3migrate");
6!
136
  int32_t contLen = 0;
6✔
137
  void   *pReq = mndBuildTimerMsg(&contLen);
6✔
138
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
6✔
139
  // TODO check return value
140
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
6!
141
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
142
  }
143
}
6✔
144

145
static void mndPullupS3MigrateDb(SMnode *pMnode) {
×
146
  mTrace("pullup trim");
×
147
  int32_t contLen = 0;
×
148
  void   *pReq = mndBuildTimerMsg(&contLen);
×
149
  // TODO check return value
150
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_S3MIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
151
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
152
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
153
  }
154
}
×
155

156
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
40,335✔
157
  mTrace("pullup arb hb");
40,335✔
158
  int32_t contLen = 0;
40,335✔
159
  void   *pReq = mndBuildTimerMsg(&contLen);
40,335✔
160
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
40,335✔
161
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
40,335✔
162
}
163

164
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
26,668✔
165
  mTrace("pullup arb sync");
26,668✔
166
  int32_t contLen = 0;
26,668✔
167
  void   *pReq = mndBuildTimerMsg(&contLen);
26,668✔
168
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
26,668✔
169
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
26,668✔
170
}
171

172
static void mndCalMqRebalance(SMnode *pMnode) {
40,334✔
173
  int32_t contLen = 0;
40,334✔
174
  void   *pReq = mndBuildTimerMsg(&contLen);
40,334✔
175
  if (pReq != NULL) {
40,334!
176
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
40,334✔
177
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
40,334!
178
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
179
    }
180
  }
181
}
40,334✔
182

183
static void mndStreamCheckpointTimer(SMnode *pMnode) {
2,284✔
184
  SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg));
2,284✔
185
  if (pMsg != NULL) {
2,284!
186
    int32_t size = sizeof(SMStreamDoCheckpointMsg);
2,284✔
187
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size};
2,284✔
188
    // TODO check return value
189
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
2,284!
190
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
191
    }
192
  }
193
}
2,284✔
194

195
static void mndStreamCheckNode(SMnode *pMnode) {
3,583✔
196
  int32_t contLen = 0;
3,583✔
197
  void   *pReq = mndBuildTimerMsg(&contLen);
3,583✔
198
  if (pReq != NULL) {
3,583!
199
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_NODECHECK_TIMER, .pCont = pReq, .contLen = contLen};
3,583✔
200
    // TODO check return value
201
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
3,583!
202
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
203
    }
204
  }
205
}
3,583✔
206

207
static void mndStreamCheckStatus(SMnode *pMnode) {
17✔
208
  int32_t contLen = 0;
17✔
209
  void   *pReq = mndBuildTimerMsg(&contLen);
17✔
210
  if (pReq != NULL) {
17!
211
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_CHECK_STREAM_TIMER, .pCont = pReq, .contLen = contLen};
17✔
212
    // TODO check return value
213
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
17!
214
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
215
    }
216
  }
217
}
17✔
218

219
static void mndStreamConsensusChkpt(SMnode *pMnode) {
15,688✔
220
  int32_t contLen = 0;
15,688✔
221
  void   *pReq = mndBuildTimerMsg(&contLen);
15,688✔
222
  if (pReq != NULL) {
15,688!
223
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CONSEN_TIMER, .pCont = pReq, .contLen = contLen};
15,688✔
224
    // TODO check return value
225
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
15,688!
226
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
227
    }
228
  }
229
}
15,688✔
230

231
static void mndPullupTelem(SMnode *pMnode) {
2✔
232
  mTrace("pullup telem msg");
2!
233
  int32_t contLen = 0;
2✔
234
  void   *pReq = mndBuildTimerMsg(&contLen);
2✔
235
  if (pReq != NULL) {
2!
236
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
2✔
237
    // TODO check return value
238
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
2!
239
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
240
    }
241
  }
242
}
2✔
243

244
static void mndPullupGrant(SMnode *pMnode) {
4,257✔
245
  mTrace("pullup grant msg");
4,257✔
246
  int32_t contLen = 0;
4,257✔
247
  void   *pReq = mndBuildTimerMsg(&contLen);
4,257✔
248
  if (pReq != NULL) {
4,257!
249
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
4,257✔
250
                      .pCont = pReq,
251
                      .contLen = contLen,
252
                      .info.notFreeAhandle = 1,
253
                      .info.ahandle = 0};
254
    // TODO check return value
255
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
4,257!
256
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
257
    }
258
  }
259
}
4,257✔
260

261
static void mndIncreaseUpTime(SMnode *pMnode) {
132✔
262
  mTrace("increate uptime");
132✔
263
  int32_t contLen = 0;
132✔
264
  void   *pReq = mndBuildTimerMsg(&contLen);
132✔
265
  if (pReq != NULL) {
132!
266
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
132✔
267
                      .pCont = pReq,
268
                      .contLen = contLen,
269
                      .info.notFreeAhandle = 1,
270
                      .info.ahandle = 0};
271
    // TODO check return value
272
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
132!
273
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
274
    }
275
  }
276
}
132✔
277

278
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
248✔
279
  SSdb *pSdb = pMnode->pSdb;
248✔
280

281
  void *pIter = NULL;
248✔
282
  while (1) {
1,107✔
283
    SVgObj *pVgroup = NULL;
1,355✔
284
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
1,355✔
285
    if (pIter == NULL) break;
1,355✔
286

287
    bool stateChanged = false;
1,107✔
288
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
2,453✔
289
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
1,723✔
290
      if (pGid->dnodeId == dnodeId) {
1,723✔
291
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
377✔
292
          mInfo(
190!
293
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:error restored:0 "
294
              "canRead:0",
295
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
296
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
190✔
297
          pGid->syncRestore = 0;
190✔
298
          pGid->syncCanRead = 0;
190✔
299
          pGid->startTimeMs = 0;
190✔
300
          stateChanged = true;
190✔
301
        }
302
        break;
377✔
303
      }
304
    }
305

306
    if (stateChanged) {
1,107✔
307
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
190✔
308
      if (pDb != NULL && pDb->stateTs != curMs) {
190!
309
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
137!
310
              curMs);
311
        pDb->stateTs = curMs;
137✔
312
      }
313
      mndReleaseDb(pMnode, pDb);
190✔
314
    }
315

316
    sdbRelease(pSdb, pVgroup);
1,107✔
317
  }
318
}
248✔
319

320
static void mndCheckDnodeOffline(SMnode *pMnode) {
15,688✔
321
  mTrace("check dnode offline");
15,688✔
322
  if (mndAcquireRpc(pMnode) != 0) return;
15,688!
323

324
  SSdb   *pSdb = pMnode->pSdb;
15,688✔
325
  int64_t curMs = taosGetTimestampMs();
15,688✔
326

327
  void *pIter = NULL;
15,688✔
328
  while (1) {
30,701✔
329
    SDnodeObj *pDnode = NULL;
46,389✔
330
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
46,389✔
331
    if (pIter == NULL) break;
46,389✔
332

333
    bool online = mndIsDnodeOnline(pDnode, curMs);
30,701✔
334
    if (!online) {
30,701✔
335
      mInfo("dnode:%d, in offline state", pDnode->id);
248!
336
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
248✔
337
    }
338

339
    sdbRelease(pSdb, pDnode);
30,701✔
340
  }
341

342
  mndReleaseRpc(pMnode);
15,688✔
343
}
344

345
static bool mnodeIsNotLeader(SMnode *pMnode) {
85,684✔
346
  terrno = 0;
85,684✔
347
  (void)taosThreadRwlockRdlock(&pMnode->lock);
85,684✔
348
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
85,684✔
349
  if (terrno != 0) {
85,684!
350
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
351
    return true;
×
352
  }
353

354
  if (state.state != TAOS_SYNC_STATE_LEADER) {
85,684✔
355
    (void)taosThreadRwlockUnlock(&pMnode->lock);
4,241✔
356
    terrno = TSDB_CODE_SYN_NOT_LEADER;
4,241✔
357
    return true;
4,241✔
358
  }
359
  if (!state.restored || !pMnode->restored) {
81,443!
360
    (void)taosThreadRwlockUnlock(&pMnode->lock);
23✔
361
    terrno = TSDB_CODE_SYN_RESTORING;
23✔
362
    return true;
23✔
363
  }
364
  (void)taosThreadRwlockUnlock(&pMnode->lock);
81,420✔
365
  return false;
81,420✔
366
}
367

368
static int32_t minCronTime() {
×
369
  int32_t min = INT32_MAX;
×
370
  min = TMIN(min, tsTtlPushIntervalSec);
×
371
  min = TMIN(min, tsTrimVDbIntervalSec);
×
372
  min = TMIN(min, tsS3MigrateIntervalSec);
×
373
  min = TMIN(min, tsTransPullupInterval);
×
374
  min = TMIN(min, tsCompactPullupInterval);
×
375
  min = TMIN(min, tsMqRebalanceInterval);
×
376
  min = TMIN(min, tsStreamCheckpointInterval);
×
377
  min = TMIN(min, tsStreamNodeCheckInterval);
×
378
  min = TMIN(min, tsArbHeartBeatIntervalSec);
×
379
  min = TMIN(min, tsArbCheckSyncIntervalSec);
×
380

381
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
×
382
  min = TMIN(min, telemInt);
×
383
  min = TMIN(min, tsGrantHBInterval);
×
384
  min = TMIN(min, tsUptimeInterval);
×
385

386
  return min <= 1 ? 2 : min;
×
387
}
388
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
81,420✔
389
  int32_t code = 0;
81,420✔
390
#ifndef TD_ASTRA  
391
  if (sec % tsTtlPushIntervalSec == 0) {
81,420✔
392
    mndPullupTtl(pMnode);
8,302✔
393
  }
394

395
  if (sec % tsTrimVDbIntervalSec == 0) {
81,420✔
396
    mndPullupTrimDb(pMnode);
6✔
397
  }
398
#endif
399
#ifdef USE_S3
400
  if (tsS3MigrateEnabled && sec % tsS3MigrateIntervalSec == 0) {
81,420!
401
    mndPullupS3MigrateDb(pMnode);
×
402
  }
403
#endif
404
  if (sec % tsTransPullupInterval == 0) {
81,420✔
405
    mndPullupTrans(pMnode);
41,102✔
406
  }
407

408
  if (sec % tsCompactPullupInterval == 0) {
81,420✔
409
    mndPullupCompacts(pMnode);
7,609✔
410
  }
411
#ifdef USE_TOPIC
412
  if (sec % tsMqRebalanceInterval == 0) {
81,420✔
413
    mndCalMqRebalance(pMnode);
40,334✔
414
  }
415
#endif
416
#ifdef USE_STREAM
417
  if (sec % 30 == 0) {  // send the checkpoint info every 30 sec
81,420✔
418
    mndStreamCheckpointTimer(pMnode);
2,284✔
419
  }
420

421
  if (sec % tsStreamNodeCheckInterval == 0) {
81,420✔
422
    mndStreamCheckNode(pMnode);
3,583✔
423
  }
424

425
  if (sec % (tsStreamFailedTimeout/1000) == 0) {
81,420✔
426
    mndStreamCheckStatus(pMnode);
17✔
427
  }
428

429
  if (sec % 5 == 0) {
81,420✔
430
    mndStreamConsensusChkpt(pMnode);
15,688✔
431
  }
432

433
  if (tsTelemInterval > 0 && sec % tsTelemInterval == 0) {
81,420!
434
    mndPullupTelem(pMnode);
2✔
435
  }
436
#endif
437
#ifndef TD_ASTRA
438
  if (sec % tsGrantHBInterval == 0) {
81,420✔
439
    mndPullupGrant(pMnode);
4,257✔
440
  }
441
#endif
442
  if (sec % tsUptimeInterval == 0) {
81,420✔
443
    mndIncreaseUpTime(pMnode);
132✔
444
  }
445
#ifndef TD_ASTRA
446
  if (sec % (tsArbHeartBeatIntervalSec) == 0) {
81,420✔
447
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
40,335!
UNCOV
448
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
449
    }
450
  }
451

452
  if (sec % (tsArbCheckSyncIntervalSec) == 0) {
81,420✔
453
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
26,668!
454
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
455
    }
456
  }
457
#endif
458
}
81,420✔
459
void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) {
81,420✔
460
  if (sec % (tsStatusInterval * 5) == 0) {
81,420✔
461
    mndCheckDnodeOffline(pMnode);
15,688✔
462
  }
463
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
81,420✔
464
    mndSyncCheckTimeout(pMnode);
2,284✔
465
  }
466
}
81,420✔
467

468
static void *mndThreadFp(void *param) {
1,766✔
469
  SMnode *pMnode = param;
1,766✔
470
  int64_t lastTime = 0;
1,766✔
471
  setThreadName("mnode-timer");
1,766✔
472

473
  while (1) {
865,413✔
474
    lastTime++;
867,179✔
475
    taosMsleep(100);
867,179✔
476

477
    if (mndGetStop(pMnode)) break;
867,179✔
478
    if (lastTime % 10 != 0) continue;
865,413✔
479

480
    if (mnodeIsNotLeader(pMnode)) {
85,684✔
481
      mTrace("timer not process since mnode is not leader");
4,264!
482
      continue;
4,264✔
483
    }
484

485
    int64_t sec = lastTime / 10;
81,420✔
486
    mndDoTimerCheckTask(pMnode, sec);
81,420✔
487

488
    mndDoTimerPullupTask(pMnode, sec);
81,420✔
489
  }
490

491
  return NULL;
1,766✔
492
}
493

494
static int32_t mndInitTimer(SMnode *pMnode) {
1,766✔
495
  int32_t      code = 0;
1,766✔
496
  TdThreadAttr thAttr;
497
  (void)taosThreadAttrInit(&thAttr);
1,766✔
498
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
1,766✔
499
#ifdef TD_COMPACT_OS
500
  (void)taosThreadAttrSetStackSize(&thAttr, STACK_SIZE_SMALL);
501
#endif
502
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadFp, pMnode)) != 0) {
1,766!
503
    mError("failed to create timer thread since %s", tstrerror(code));
×
504
    TAOS_RETURN(code);
×
505
  }
506

507
  (void)taosThreadAttrDestroy(&thAttr);
1,766✔
508
  tmsgReportStartup("mnode-timer", "initialized");
1,766✔
509
  TAOS_RETURN(code);
1,766✔
510
}
511

512
static void mndCleanupTimer(SMnode *pMnode) {
1,766✔
513
  if (taosCheckPthreadValid(pMnode->thread)) {
1,766!
514
    (void)taosThreadJoin(pMnode->thread, NULL);
1,766✔
515
    taosThreadClear(&pMnode->thread);
1,766✔
516
  }
517
}
1,766✔
518

519
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
1,767✔
520
  int32_t code = 0;
1,767✔
521
  pMnode->path = taosStrdup(path);
1,767!
522
  if (pMnode->path == NULL) {
1,767!
523
    code = terrno;
×
524
    TAOS_RETURN(code);
×
525
  }
526

527
  if (taosMkDir(pMnode->path) != 0) {
1,767!
528
    code = terrno;
×
529
    TAOS_RETURN(code);
×
530
  }
531

532
  TAOS_RETURN(code);
1,767✔
533
}
534

535
static int32_t mndInitWal(SMnode *pMnode) {
1,767✔
536
  int32_t code = 0;
1,767✔
537
  char    path[PATH_MAX + 20] = {0};
1,767✔
538
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
1,767✔
539
  SWalCfg cfg = {.vgId = 1,
1,767✔
540
                 .fsyncPeriod = 0,
541
                 .rollPeriod = -1,
542
                 .segSize = -1,
543
                 .committed = -1,
544
                 .retentionPeriod = 0,
545
                 .retentionSize = 0,
546
                 .level = TAOS_WAL_FSYNC,
547
                 .encryptAlgorithm = 0,
548
                 .encryptKey = {0}};
549

550
#if defined(TD_ENTERPRISE) || defined(TD_ASTRA_TODO)
551
  if (tsiEncryptAlgorithm == DND_CA_SM4 && (tsiEncryptScope & DND_CS_MNODE_WAL) == DND_CS_MNODE_WAL) {
1,767!
552
    cfg.encryptAlgorithm = (tsiEncryptScope & DND_CS_MNODE_WAL) ? tsiEncryptAlgorithm : 0;
×
553
    if (tsEncryptKey[0] == '\0') {
×
554
      code = TSDB_CODE_DNODE_INVALID_ENCRYPTKEY;
×
555
      TAOS_RETURN(code);
×
556
    } else {
557
      tstrncpy(cfg.encryptKey, tsEncryptKey, ENCRYPT_KEY_LEN + 1);
×
558
    }
559
  }
560
#endif
561

562
  pMnode->pWal = walOpen(path, &cfg);
1,767✔
563
  if (pMnode->pWal == NULL) {
1,767!
564
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
565
    if (terrno != 0) code = terrno;
×
566
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
567
    TAOS_RETURN(code);
×
568
  }
569

570
  TAOS_RETURN(code);
1,767✔
571
}
572

573
static void mndCloseWal(SMnode *pMnode) {
1,766✔
574
  if (pMnode->pWal != NULL) {
1,766!
575
    walClose(pMnode->pWal);
1,766✔
576
    pMnode->pWal = NULL;
1,766✔
577
  }
578
}
1,766✔
579

580
static int32_t mndInitSdb(SMnode *pMnode) {
1,767✔
581
  int32_t code = 0;
1,767✔
582
  SSdbOpt opt = {0};
1,767✔
583
  opt.path = pMnode->path;
1,767✔
584
  opt.pMnode = pMnode;
1,767✔
585
  opt.pWal = pMnode->pWal;
1,767✔
586

587
  pMnode->pSdb = sdbInit(&opt);
1,767✔
588
  if (pMnode->pSdb == NULL) {
1,767!
589
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
590
    if (terrno != 0) code = terrno;
×
591
    TAOS_RETURN(code);
×
592
  }
593

594
  TAOS_RETURN(code);
1,767✔
595
}
596

597
static int32_t mndOpenSdb(SMnode *pMnode) {
1,767✔
598
  int32_t code = 0;
1,767✔
599
  if (!pMnode->deploy) {
1,767✔
600
    code = sdbReadFile(pMnode->pSdb);
491✔
601
  }
602

603
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
1,767!
604

605
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
1,767✔
606
  return code;
1,767✔
607
}
608

609
static void mndCleanupSdb(SMnode *pMnode) {
1,766✔
610
  if (pMnode->pSdb) {
1,766!
611
    sdbCleanup(pMnode->pSdb);
1,766✔
612
    pMnode->pSdb = NULL;
1,766✔
613
  }
614
}
1,766✔
615

616
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
63,612✔
617
  SMnodeStep step = {0};
63,612✔
618
  step.name = name;
63,612✔
619
  step.initFp = initFp;
63,612✔
620
  step.cleanupFp = cleanupFp;
63,612✔
621
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
127,224!
622
    TAOS_RETURN(terrno);
×
623
  }
624

625
  TAOS_RETURN(0);
63,612✔
626
}
627

628
static int32_t mndInitSteps(SMnode *pMnode) {
1,767✔
629
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
1,767!
630
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
1,767!
631
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
1,767!
632
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
1,767!
633
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
1,767!
634
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
1,767!
635
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
1,767!
636
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
1,767!
637
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
1,767!
638
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
1,767!
639
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
1,767!
640
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
1,767!
641
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
1,767!
642
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
1,767!
643
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
1,767!
644
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
1,767!
645
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
1,767!
646
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
1,767!
647
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
1,767!
648
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
1,767!
649
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
1,767!
650
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
1,767!
651
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
1,767!
652
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
1,767!
653
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
1,767!
654
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
1,767!
655
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
1,767!
656
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
1,767!
657
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
1,767!
658
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
1,767!
659
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
1,767!
660
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
1,767!
661
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
1,767!
662
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
1,767!
663
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
1,767!
664
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
1,767!
665
  return 0;
1,767✔
666
}
667

668
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
1,766✔
669
  if (pMnode->pSteps == NULL) return;
1,766!
670

671
  if (pos == -1) {
1,766!
672
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
1,766✔
673
  }
674

675
  for (int32_t s = pos; s >= 0; s--) {
65,342✔
676
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
63,576✔
677
    mInfo("%s will cleanup", pStep->name);
63,576!
678
    if (pStep->cleanupFp != NULL) {
63,576✔
679
      (*pStep->cleanupFp)(pMnode);
60,044✔
680
    }
681
  }
682

683
  taosArrayClear(pMnode->pSteps);
1,766✔
684
  taosArrayDestroy(pMnode->pSteps);
1,766✔
685
  pMnode->pSteps = NULL;
1,766✔
686
}
687

688
static int32_t mndExecSteps(SMnode *pMnode) {
1,767✔
689
  int32_t code = 0;
1,767✔
690
  int32_t size = taosArrayGetSize(pMnode->pSteps);
1,767✔
691
  for (int32_t pos = 0; pos < size; pos++) {
65,379✔
692
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
63,612✔
693
    if (pStep->initFp == NULL) continue;
63,612!
694

695
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
63,612!
696
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
697
      mndCleanupSteps(pMnode, pos);
×
698
      TAOS_RETURN(code);
×
699
    } else {
700
      mInfo("%s is initialized", pStep->name);
63,612!
701
      tmsgReportStartup(pStep->name, "initialized");
63,612✔
702
    }
703
  }
704

705
  pMnode->clusterId = mndGetClusterId(pMnode);
1,767✔
706
  TAOS_RETURN(0);
1,767✔
707
}
708

709
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
1,767✔
710
  pMnode->msgCb = pOption->msgCb;
1,767✔
711
  pMnode->selfDnodeId = pOption->dnodeId;
1,767✔
712
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
1,767✔
713
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
1,767✔
714
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
1,767✔
715
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
1,767✔
716
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
1,767✔
717
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
1,767✔
718
}
1,767✔
719

720
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
1,767✔
721
  terrno = 0;
1,767✔
722
  mInfo("start to open mnode in %s", path);
1,767!
723

724
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
1,767!
725
  if (pMnode == NULL) {
1,767!
726
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
727
    mError("failed to open mnode since %s", terrstr());
×
728
    return NULL;
×
729
  }
730
  (void)memset(pMnode, 0, sizeof(SMnode));
1,767✔
731

732
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
1,767✔
733
  if (code != 0) {
1,767!
734
    taosMemoryFree(pMnode);
×
735
    mError("failed to open mnode lock since %s", tstrerror(code));
×
736
    return NULL;
×
737
  }
738

739
  char timestr[24] = "1970-01-01 00:00:00.00";
1,767✔
740
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, NULL);
1,767✔
741
  if (code < 0) {
1,767!
742
    mError("failed to parse time since %s", tstrerror(code));
×
743
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
744
    taosMemoryFree(pMnode);
×
745
    return NULL;
×
746
  }
747
  mndSetOptions(pMnode, pOption);
1,767✔
748

749
  pMnode->deploy = pOption->deploy;
1,767✔
750
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
1,767✔
751
  if (pMnode->pSteps == NULL) {
1,767!
752
    taosMemoryFree(pMnode);
×
753
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
754
    mError("failed to open mnode since %s", terrstr());
×
755
    return NULL;
×
756
  }
757

758
  code = mndCreateDir(pMnode, path);
1,767✔
759
  if (code != 0) {
1,767!
760
    code = terrno;
×
761
    mError("failed to open mnode since %s", tstrerror(code));
×
762
    mndClose(pMnode);
×
763
    terrno = code;
×
764
    return NULL;
×
765
  }
766

767
  code = mndInitSteps(pMnode);
1,767✔
768
  if (code != 0) {
1,767!
769
    code = terrno;
×
770
    mError("failed to open mnode since %s", tstrerror(code));
×
771
    mndClose(pMnode);
×
772
    terrno = code;
×
773
    return NULL;
×
774
  }
775

776
  code = mndExecSteps(pMnode);
1,767✔
777
  if (code != 0) {
1,767!
778
    code = terrno;
×
779
    mError("failed to open mnode since %s", tstrerror(code));
×
780
    mndClose(pMnode);
×
781
    terrno = code;
×
782
    return NULL;
×
783
  }
784

785
  mInfo("mnode open successfully");
1,767!
786
  return pMnode;
1,767✔
787
}
788

789
void mndPreClose(SMnode *pMnode) {
1,766✔
790
  if (pMnode != NULL) {
1,766!
791
    int32_t code = 0;
1,766✔
792
    // TODO check return value
793
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
1,766✔
794
    if (code < 0) {
1,766!
UNCOV
795
      mError("failed to transfer leader since %s", tstrerror(code));
×
796
    }
797
    syncPreStop(pMnode->syncMgmt.sync);
1,766✔
798
    code = sdbWriteFile(pMnode->pSdb, 0);
1,766✔
799
    if (code < 0) {
1,766!
800
      mError("failed to write sdb since %s", tstrerror(code));
×
801
    }
802
  }
803
}
1,766✔
804

805
void mndClose(SMnode *pMnode) {
1,766✔
806
  if (pMnode != NULL) {
1,766!
807
    mInfo("start to close mnode");
1,766!
808
    mndCleanupSteps(pMnode, -1);
1,766✔
809
    taosMemoryFreeClear(pMnode->path);
1,766!
810
    taosMemoryFreeClear(pMnode);
1,766!
811
    mInfo("mnode is closed");
1,766!
812
  }
813
}
1,766✔
814

815
int32_t mndStart(SMnode *pMnode) {
1,766✔
816
  mndSyncStart(pMnode);
1,766✔
817
  if (pMnode->deploy) {
1,766✔
818
    if (sdbDeploy(pMnode->pSdb) != 0) {
1,276!
819
      mError("failed to deploy sdb while start mnode");
×
820
      return -1;
×
821
    }
822
    mndSetRestored(pMnode, true);
1,276✔
823
  }
824
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
1,766✔
825

826
  return mndInitTimer(pMnode);
1,766✔
827
}
828

829
int32_t mndIsCatchUp(SMnode *pMnode) {
946✔
830
  int64_t rid = pMnode->syncMgmt.sync;
946✔
831
  return syncIsCatchUp(rid);
946✔
832
}
833

834
ESyncRole mndGetRole(SMnode *pMnode) {
946✔
835
  int64_t rid = pMnode->syncMgmt.sync;
946✔
836
  return syncGetRole(rid);
946✔
837
}
838

839
int64_t mndGetTerm(SMnode *pMnode) {
26,729✔
840
  int64_t rid = pMnode->syncMgmt.sync;
26,729✔
841
  return syncGetTerm(rid);
26,729✔
842
}
843

844
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
67,070✔
845

846
void mndStop(SMnode *pMnode) {
1,766✔
847
  mndSetStop(pMnode);
1,766✔
848
  mndSyncStop(pMnode);
1,766✔
849
  mndCleanupTimer(pMnode);
1,766✔
850
}
1,766✔
851

852
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
172,341✔
853
  SMnode    *pMnode = pMsg->info.node;
172,341✔
854
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
172,341✔
855

856
  const STraceId *trace = &pMsg->info.traceId;
172,341✔
857
  mGTrace("vgId:1, process sync msg:%p, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
172,341!
858

859
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
172,341✔
860
  if (code != 0) {
172,340✔
861
    mGError("vgId:1, failed to process sync msg:%p type:%s since %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
5!
862
            tstrerror(code), code);
863
  }
864

865
  return code;
172,340✔
866
}
867

868
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
4,920,499✔
869
  int32_t code = 0;
4,920,499✔
870
  if (!IsReq(pMsg)) TAOS_RETURN(code);
4,920,499✔
871
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
4,715,423✔
872
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
4,463,382!
873
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
4,417,646✔
874
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
3,913,697!
875
    TAOS_RETURN(code);
801,694✔
876
  }
877

878
  SMnode *pMnode = pMsg->info.node;
3,913,729✔
879
  (void)taosThreadRwlockRdlock(&pMnode->lock);
3,913,729✔
880
  if (pMnode->stopped) {
3,914,662✔
881
    (void)taosThreadRwlockUnlock(&pMnode->lock);
55✔
882
    code = TSDB_CODE_APP_IS_STOPPING;
55✔
883
    TAOS_RETURN(code);
55✔
884
  }
885

886
  terrno = 0;
3,914,607✔
887
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
3,913,833✔
888
  if (terrno != 0) {
3,914,691!
889
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
890
    code = terrno;
×
891
    TAOS_RETURN(code);
×
892
  }
893

894
  if (state.state != TAOS_SYNC_STATE_LEADER) {
3,914,666✔
895
    (void)taosThreadRwlockUnlock(&pMnode->lock);
26,662✔
896
    code = TSDB_CODE_SYN_NOT_LEADER;
26,657✔
897
    goto _OVER;
26,657✔
898
  }
899

900
  if (!state.restored || !pMnode->restored) {
3,888,004✔
901
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,747✔
902
    code = TSDB_CODE_SYN_RESTORING;
1,698✔
903
    goto _OVER;
1,698✔
904
  }
905

906
#if 1
907
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
3,886,257✔
908
#else
909
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
910
  mTrace("mnode rpc is acquired, ref:%d", ref);
911
#endif
912

913
  (void)taosThreadRwlockUnlock(&pMnode->lock);
3,886,346✔
914
  TAOS_RETURN(code);
3,886,298✔
915

916
_OVER:
28,355✔
917
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
28,355!
918
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
28,358!
919
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
28,360!
920
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
28,360!
921
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
28,360!
922
      pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
28,360!
923
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER || pMsg->msgType == TDMT_MND_CHECK_STREAM_TIMER) {
28,359!
924
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
×
925
           pMnode->stopped, state.restored, syncStr(state.state));
926
    TAOS_RETURN(code);
×
927
  }
928

929
  const STraceId *trace = &pMsg->info.traceId;
28,359✔
930
  SEpSet          epSet = {0};
28,359✔
931
  mndGetMnodeEpSet(pMnode, &epSet);
28,359✔
932

933
  mGDebug(
28,360!
934
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
935
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
936
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
937
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
938

939
  if (epSet.numOfEps <= 0) return -1;
28,360!
940

941
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
111,038✔
942
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
82,678✔
943
  }
944

945
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
28,360✔
946
  pMsg->info.rsp = rpcMallocCont(contLen);
28,360✔
947
  if (pMsg->info.rsp != NULL) {
28,359!
948
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
28,359!
949
      mError("failed to serialize ep set");
×
950
    }
951
    pMsg->info.hasEpSet = 1;
28,359✔
952
    pMsg->info.rspLen = contLen;
28,359✔
953
  }
954

955
  TAOS_RETURN(code);
28,359✔
956
}
957

958
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
4,920,597✔
959
  SMnode         *pMnode = pMsg->info.node;
4,920,597✔
960
  const STraceId *trace = &pMsg->info.traceId;
4,920,597✔
961
  int32_t         code = TSDB_CODE_SUCCESS;
4,920,597✔
962

963
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
4,920,597✔
964
  MndMsgFpExt fpExt = NULL;
4,920,597✔
965
  if (fp == NULL) {
4,920,597✔
966
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
802,521✔
967
    if (fpExt == NULL) {
802,521✔
968
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
10!
969
      code = TSDB_CODE_MSG_NOT_PROCESSED;
10✔
970
      TAOS_RETURN(code);
10✔
971
    }
972
  }
973

974
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
4,920,587✔
975

976
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
4,893,237!
977
  if (fp)
4,893,237✔
978
    code = (*fp)(pMsg);
4,090,893✔
979
  else
980
    code = (*fpExt)(pMsg, pQueueInfo);
802,344✔
981
  mndReleaseRpc(pMnode);
4,893,363✔
982

983
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
4,893,337✔
984
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
830,001!
985
  } else if (code == 0) {
4,063,336✔
986
    mGTrace("msg:%p, successfully processed", pMsg);
4,056,522!
987
  } else {
988
    // TODO removve this wrong set code
989
    if (code == -1) {
6,814✔
990
      code = terrno;
6✔
991
    }
992
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
6,814!
993
            TMSG_INFO(pMsg->msgType));
994
  }
995

996
  TAOS_RETURN(code);
4,893,337✔
997
}
998

999
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
321,594✔
1000
  tmsg_t type = TMSG_INDEX(msgType);
321,594✔
1001
  if (type < TDMT_MAX) {
321,594!
1002
    pMnode->msgFp[type] = fp;
321,594✔
1003
  }
1004
}
321,594✔
1005

1006
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
14,136✔
1007
  tmsg_t type = TMSG_INDEX(msgType);
14,136✔
1008
  if (type < TDMT_MAX) {
14,136!
1009
    pMnode->msgFpExt[type] = fp;
14,136✔
1010
  }
1011
}
14,136✔
1012

1013
// Note: uid 0 is reserved
1014
int64_t mndGenerateUid(const char *name, int32_t len) {
131,333✔
1015
  int32_t hashval = MurmurHash3_32(name, len);
131,333✔
1016
  do {
×
1017
    int64_t us = taosGetTimestampUs();
131,333✔
1018
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
131,333✔
1019
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
131,333✔
1020
    if (uuid) {
131,333!
1021
      return llabs(uuid);
131,333✔
1022
    }
1023
  } while (true);
1024
}
1025

1026
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
13✔
1027
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
1028
  int32_t code = mndAcquireRpc(pMnode);
13✔
1029
  if (code < 0) {
13!
1030
    TAOS_RETURN(code);
×
1031
  } else if (code == 1) {
13!
1032
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
1033
  }
1034

1035
  SSdb   *pSdb = pMnode->pSdb;
13✔
1036
  int64_t ms = taosGetTimestampMs();
13✔
1037

1038
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
13✔
1039
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
13✔
1040
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
13✔
1041
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
13✔
1042
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
13!
1043
      pStbInfo->stbs == NULL) {
13!
1044
    mndReleaseRpc(pMnode);
×
1045
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1046
    if (terrno != 0) code = terrno;
×
1047
    TAOS_RETURN(code);
×
1048
  }
1049

1050
  // cluster info
1051
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
13✔
1052
  pClusterInfo->monitor_interval = tsMonitorInterval;
13✔
1053
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
13✔
1054
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
13✔
1055
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
13✔
1056
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
13✔
1057
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
13✔
1058

1059
  void *pIter = NULL;
13✔
1060
  while (1) {
13✔
1061
    SDnodeObj *pObj = NULL;
26✔
1062
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
26✔
1063
    if (pIter == NULL) break;
26✔
1064

1065
    SMonDnodeDesc desc = {0};
13✔
1066
    desc.dnode_id = pObj->id;
13✔
1067
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
13✔
1068
    if (mndIsDnodeOnline(pObj, ms)) {
13✔
1069
      tstrncpy(desc.status, "ready", sizeof(desc.status));
12✔
1070
    } else {
1071
      tstrncpy(desc.status, "offline", sizeof(desc.status));
1✔
1072
    }
1073
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
26!
1074
      mError("failed put dnode into array, but continue at this monitor report")
×
1075
    }
1076
    sdbRelease(pSdb, pObj);
13✔
1077
  }
1078

1079
  pIter = NULL;
13✔
1080
  while (1) {
13✔
1081
    SMnodeObj *pObj = NULL;
26✔
1082
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
26✔
1083
    if (pIter == NULL) break;
26✔
1084

1085
    SMonMnodeDesc desc = {0};
13✔
1086
    desc.mnode_id = pObj->id;
13✔
1087
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
13✔
1088

1089
    if (pObj->id == pMnode->selfDnodeId) {
13!
1090
      pClusterInfo->first_ep_dnode_id = pObj->id;
13✔
1091
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
13✔
1092
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1093
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
13✔
1094
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1095
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
13✔
1096
      desc.syncState = TAOS_SYNC_STATE_LEADER;
13✔
1097
    } else {
1098
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1099
      desc.syncState = pObj->syncState;
×
1100
    }
1101
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
26!
1102
      mError("failed to put mnode into array, but continue at this monitor report");
×
1103
    }
1104
    sdbRelease(pSdb, pObj);
13✔
1105
  }
1106

1107
  // vgroup info
1108
  pIter = NULL;
13✔
1109
  while (1) {
32✔
1110
    SVgObj *pVgroup = NULL;
45✔
1111
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
45✔
1112
    if (pIter == NULL) break;
45✔
1113

1114
    pClusterInfo->vgroups_total++;
32✔
1115
    pClusterInfo->tbs_total += pVgroup->numOfTables;
32✔
1116

1117
    SMonVgroupDesc desc = {0};
32✔
1118
    desc.vgroup_id = pVgroup->vgId;
32✔
1119

1120
    SName name = {0};
32✔
1121
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
32✔
1122
    if (code < 0) {
32!
1123
      mError("failed to get db name since %s", tstrerror(code));
×
1124
      sdbRelease(pSdb, pVgroup);
×
1125
      TAOS_RETURN(code);
×
1126
    }
1127
    (void)tNameGetDbName(&name, desc.database_name);
32✔
1128

1129
    desc.tables_num = pVgroup->numOfTables;
32✔
1130
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
32✔
1131
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
32✔
1132
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
64✔
1133
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
32✔
1134
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
32✔
1135
      pVnDesc->dnode_id = pVgid->dnodeId;
32✔
1136
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
32✔
1137
      pVnDesc->syncState = pVgid->syncState;
32✔
1138
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
32!
1139
        tstrncpy(desc.status, "ready", sizeof(desc.status));
32✔
1140
        pClusterInfo->vgroups_alive++;
32✔
1141
      }
1142
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
32!
1143
        pClusterInfo->vnodes_alive++;
32✔
1144
      }
1145
      pClusterInfo->vnodes_total++;
32✔
1146
    }
1147

1148
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
64!
1149
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1150
    }
1151
    sdbRelease(pSdb, pVgroup);
32✔
1152
  }
1153

1154
  // stb info
1155
  pIter = NULL;
13✔
1156
  while (1) {
12✔
1157
    SStbObj *pStb = NULL;
25✔
1158
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
25✔
1159
    if (pIter == NULL) break;
25✔
1160

1161
    SMonStbDesc desc = {0};
12✔
1162

1163
    SName name1 = {0};
12✔
1164
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
12✔
1165
    if (code < 0) {
12!
1166
      mError("failed to get db name since %s", tstrerror(code));
×
1167
      sdbRelease(pSdb, pStb);
×
1168
      TAOS_RETURN(code);
×
1169
    }
1170
    (void)tNameGetDbName(&name1, desc.database_name);
12✔
1171

1172
    SName name2 = {0};
12✔
1173
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
12✔
1174
    if (code < 0) {
12!
1175
      mError("failed to get table name since %s", tstrerror(code));
×
1176
      sdbRelease(pSdb, pStb);
×
1177
      TAOS_RETURN(code);
×
1178
    }
1179
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
12✔
1180

1181
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
24!
1182
      mError("failed to put stb into array, but continue at this monitor report");
×
1183
    }
1184
    sdbRelease(pSdb, pStb);
12✔
1185
  }
1186

1187
  // grant info
1188
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
13✔
1189
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
13✔
1190
  if (pMnode->grant.expireTimeMS == 0) {
13!
1191
    pGrantInfo->expire_time = 0;
×
1192
    pGrantInfo->timeseries_total = 0;
×
1193
  }
1194

1195
  mndReleaseRpc(pMnode);
13✔
1196
  TAOS_RETURN(code);
13✔
1197
}
1198

1199
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
85,758✔
1200
  mTrace("mnode get load");
85,758✔
1201
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
85,758✔
1202
  pLoad->syncState = state.state;
85,758✔
1203
  pLoad->syncRestore = state.restored;
85,758✔
1204
  pLoad->syncTerm = state.term;
85,758✔
1205
  pLoad->roleTimeMs = state.roleTimeMs;
85,758✔
1206
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
85,758✔
1207
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1208
  return 0;
85,758✔
1209
}
1210

1211
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
26,668✔
1212
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
26,668✔
1213
  return state.roleTimeMs;
26,668✔
1214
}
1215

1216
void mndSetRestored(SMnode *pMnode, bool restored) {
1,766✔
1217
  if (restored) {
1,766!
1218
    (void)taosThreadRwlockWrlock(&pMnode->lock);
1,766✔
1219
    pMnode->restored = true;
1,766✔
1220
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,766✔
1221
    mInfo("mnode set restored:%d", restored);
1,766!
1222
  } else {
1223
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1224
    pMnode->restored = false;
×
1225
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1226
    mInfo("mnode set restored:%d", restored);
×
1227
    while (1) {
1228
      if (pMnode->rpcRef <= 0) break;
×
1229
      taosMsleep(3);
×
1230
    }
1231
  }
1232
}
1,766✔
1233

1234
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1235

1236
void mndSetStop(SMnode *pMnode) {
1,766✔
1237
  (void)taosThreadRwlockWrlock(&pMnode->lock);
1,766✔
1238
  pMnode->stopped = true;
1,766✔
1239
  (void)taosThreadRwlockUnlock(&pMnode->lock);
1,766✔
1240
  mInfo("mnode set stopped");
1,766!
1241
}
1,766✔
1242

1243
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
867,179✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc