• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3558

17 Dec 2024 06:05AM UTC coverage: 59.778% (+1.6%) from 58.204%
#3558

push

travis-ci

web-flow
Merge pull request #29179 from taosdata/merge/mainto3.0

merge: form main to 3.0 branch

132787 of 287595 branches covered (46.17%)

Branch coverage included in aggregate %.

104 of 191 new or added lines in 5 files covered. (54.45%)

6085 existing lines in 168 files now uncovered.

209348 of 284746 relevant lines covered (73.52%)

8164844.48 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.58
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndCluster.h"
21
#include "mndCompact.h"
22
#include "mndCompactDetail.h"
23
#include "mndConfig.h"
24
#include "mndConsumer.h"
25
#include "mndDb.h"
26
#include "mndDnode.h"
27
#include "mndFunc.h"
28
#include "mndGrant.h"
29
#include "mndIndex.h"
30
#include "mndInfoSchema.h"
31
#include "mndMnode.h"
32
#include "mndPerfSchema.h"
33
#include "mndPrivilege.h"
34
#include "mndProfile.h"
35
#include "mndQnode.h"
36
#include "mndQuery.h"
37
#include "mndShow.h"
38
#include "mndSma.h"
39
#include "mndSnode.h"
40
#include "mndStb.h"
41
#include "mndStream.h"
42
#include "mndSubscribe.h"
43
#include "mndSync.h"
44
#include "mndTelem.h"
45
#include "mndTopic.h"
46
#include "mndTrans.h"
47
#include "mndUser.h"
48
#include "mndVgroup.h"
49
#include "mndView.h"
50

51
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
8,813✔
52
  int32_t code = 0;
8,813✔
53
  (void)taosThreadRwlockRdlock(&pMnode->lock);
8,813✔
54
  if (pMnode->stopped) {
8,813!
55
    code = TSDB_CODE_APP_IS_STOPPING;
×
56
  } else if (!mndIsLeader(pMnode)) {
8,813✔
57
    code = 1;
482✔
58
  } else {
59
#if 1
60
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
8,331✔
61
#else
62
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
63
    mTrace("mnode rpc is acquired, ref:%d", ref);
64
#endif
65
  }
66
  (void)taosThreadRwlockUnlock(&pMnode->lock);
8,813✔
67
  TAOS_RETURN(code);
8,813✔
68
}
69

70
static inline void mndReleaseRpc(SMnode *pMnode) {
1,656,134✔
71
  (void)taosThreadRwlockRdlock(&pMnode->lock);
1,656,134✔
72
#if 1
73
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
1,657,553✔
74
#else
75
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
76
  mTrace("mnode rpc is released, ref:%d", ref);
77
#endif
78
  (void)taosThreadRwlockUnlock(&pMnode->lock);
1,657,564✔
79
}
1,657,564✔
80

81
static void *mndBuildTimerMsg(int32_t *pContLen) {
79,503✔
82
  terrno = 0;
79,503✔
83
  SMTimerReq timerReq = {0};
79,503✔
84

85
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
79,503✔
86
  if (contLen <= 0) return NULL;
79,503!
87
  void *pReq = rpcMallocCont(contLen);
79,503✔
88
  if (pReq == NULL) return NULL;
79,503!
89

90
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
79,503!
91
    mError("failed to serialize timer msg since %s", terrstr());
×
92
  }
93
  *pContLen = contLen;
79,503✔
94
  return pReq;
79,503✔
95
}
96

97
static void mndPullupTrans(SMnode *pMnode) {
22,558✔
98
  mTrace("pullup trans msg");
22,558✔
99
  int32_t contLen = 0;
22,558✔
100
  void   *pReq = mndBuildTimerMsg(&contLen);
22,558✔
101
  if (pReq != NULL) {
22,558!
102
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
22,558✔
103
    // TODO check return value
104
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
22,558✔
105
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
1!
106
    }
107
  }
108
}
22,558✔
109

110
static void mndPullupCompacts(SMnode *pMnode) {
3,940✔
111
  mTrace("pullup compact timer msg");
3,940✔
112
  int32_t contLen = 0;
3,940✔
113
  void   *pReq = mndBuildTimerMsg(&contLen);
3,940✔
114
  if (pReq != NULL) {
3,940!
115
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
3,940✔
116
    // TODO check return value
117
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
3,940!
118
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
119
    }
120
  }
121
}
3,940✔
122

123
static void mndPullupTtl(SMnode *pMnode) {
4,233✔
124
  mTrace("pullup ttl");
4,233✔
125
  int32_t contLen = 0;
4,233✔
126
  void   *pReq = mndBuildTimerMsg(&contLen);
4,233✔
127
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
4,233✔
128
  // TODO check return value
129
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
4,233!
130
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
131
  }
132
}
4,233✔
133

UNCOV
134
static void mndPullupTrimDb(SMnode *pMnode) {
×
UNCOV
135
  mTrace("pullup s3migrate");
×
UNCOV
136
  int32_t contLen = 0;
×
UNCOV
137
  void   *pReq = mndBuildTimerMsg(&contLen);
×
UNCOV
138
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
139
  // TODO check return value
UNCOV
140
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
141
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
142
  }
UNCOV
143
}
×
144

145
static void mndPullupS3MigrateDb(SMnode *pMnode) {
×
146
  mTrace("pullup trim");
×
147
  int32_t contLen = 0;
×
148
  void   *pReq = mndBuildTimerMsg(&contLen);
×
149
  // TODO check return value
150
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_S3MIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
151
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
152
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
153
  }
154
}
×
155

156
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
8,587✔
157
  mTrace("pullup arb hb");
8,587✔
158
  int32_t contLen = 0;
8,587✔
159
  void   *pReq = mndBuildTimerMsg(&contLen);
8,587✔
160
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
8,587✔
161
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
8,587✔
162
}
163

164
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
3,940✔
165
  mTrace("pullup arb sync");
3,940✔
166
  int32_t contLen = 0;
3,940✔
167
  void   *pReq = mndBuildTimerMsg(&contLen);
3,940✔
168
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
3,940✔
169
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
3,940✔
170
}
171

172
static void mndCalMqRebalance(SMnode *pMnode) {
21,764✔
173
  int32_t contLen = 0;
21,764✔
174
  void   *pReq = mndBuildTimerMsg(&contLen);
21,764✔
175
  if (pReq != NULL) {
21,764!
176
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
21,764✔
177
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
21,764✔
178
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
1!
179
    }
180
  }
181
}
21,764✔
182

183
static void mndStreamCheckpointTimer(SMnode *pMnode) {
1,096✔
184
  SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg));
1,096✔
185
  if (pMsg != NULL) {
1,096!
186
    int32_t size = sizeof(SMStreamDoCheckpointMsg);
1,096✔
187
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size};
1,096✔
188
    // TODO check return value
189
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1,096!
190
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
191
    }
192
  }
193
}
1,096✔
194

195
static void mndStreamCheckNode(SMnode *pMnode) {
1,799✔
196
  int32_t contLen = 0;
1,799✔
197
  void   *pReq = mndBuildTimerMsg(&contLen);
1,799✔
198
  if (pReq != NULL) {
1,799!
199
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_NODECHECK_TIMER, .pCont = pReq, .contLen = contLen};
1,799✔
200
    // TODO check return value
201
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
1,799!
202
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
203
    }
204
  }
205
}
1,799✔
206

207
static void mndStreamConsensusChkpt(SMnode *pMnode) {
8,587✔
208
  int32_t contLen = 0;
8,587✔
209
  void   *pReq = mndBuildTimerMsg(&contLen);
8,587✔
210
  if (pReq != NULL) {
8,587!
211
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CONSEN_TIMER, .pCont = pReq, .contLen = contLen};
8,587✔
212
    // TODO check return value
213
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
8,587!
214
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
215
    }
216
  }
217
}
8,587✔
218

219
static void mndPullupTelem(SMnode *pMnode) {
2✔
220
  mTrace("pullup telem msg");
2!
221
  int32_t contLen = 0;
2✔
222
  void   *pReq = mndBuildTimerMsg(&contLen);
2✔
223
  if (pReq != NULL) {
2!
224
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
2✔
225
    // TODO check return value
226
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
2!
227
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
228
    }
229
  }
230
}
2✔
231

232
static void mndPullupGrant(SMnode *pMnode) {
4,054✔
233
  mTrace("pullup grant msg");
4,054✔
234
  int32_t contLen = 0;
4,054✔
235
  void   *pReq = mndBuildTimerMsg(&contLen);
4,054✔
236
  if (pReq != NULL) {
4,054!
237
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
4,054✔
238
                      .pCont = pReq,
239
                      .contLen = contLen,
240
                      .info.notFreeAhandle = 1,
241
                      .info.ahandle = 0};
242
    // TODO check return value
243
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
4,054✔
244
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
1!
245
    }
246
  }
247
}
4,054✔
248

249
static void mndIncreaseUpTime(SMnode *pMnode) {
39✔
250
  mTrace("increate uptime");
39✔
251
  int32_t contLen = 0;
39✔
252
  void   *pReq = mndBuildTimerMsg(&contLen);
39✔
253
  if (pReq != NULL) {
39!
254
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
39✔
255
                      .pCont = pReq,
256
                      .contLen = contLen,
257
                      .info.notFreeAhandle = 1,
258
                      .info.ahandle = 0};
259
    // TODO check return value
260
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
39!
261
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
262
    }
263
  }
264
}
39✔
265

266
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
452✔
267
  SSdb *pSdb = pMnode->pSdb;
452✔
268

269
  void *pIter = NULL;
452✔
270
  while (1) {
6,433✔
271
    SVgObj *pVgroup = NULL;
6,885✔
272
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
6,885✔
273
    if (pIter == NULL) break;
6,885✔
274

275
    bool stateChanged = false;
6,433✔
276
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
11,603✔
277
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
6,814✔
278
      if (pGid->dnodeId == dnodeId) {
6,814✔
279
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
1,644✔
280
          mInfo(
47!
281
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:error restored:0 "
282
              "canRead:0",
283
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
284
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
47✔
285
          pGid->syncRestore = 0;
47✔
286
          pGid->syncCanRead = 0;
47✔
287
          pGid->startTimeMs = 0;
47✔
288
          stateChanged = true;
47✔
289
        }
290
        break;
1,644✔
291
      }
292
    }
293

294
    if (stateChanged) {
6,433✔
295
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
47✔
296
      if (pDb != NULL && pDb->stateTs != curMs) {
47!
297
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
33!
298
              curMs);
299
        pDb->stateTs = curMs;
33✔
300
      }
301
      mndReleaseDb(pMnode, pDb);
47✔
302
    }
303

304
    sdbRelease(pSdb, pVgroup);
6,433✔
305
  }
306
}
452✔
307

308
static void mndCheckDnodeOffline(SMnode *pMnode) {
8,801✔
309
  mTrace("check dnode offline");
8,801✔
310
  if (mndAcquireRpc(pMnode) != 0) return;
8,801✔
311

312
  SSdb   *pSdb = pMnode->pSdb;
8,319✔
313
  int64_t curMs = taosGetTimestampMs();
8,319✔
314

315
  void *pIter = NULL;
8,319✔
316
  while (1) {
16,366✔
317
    SDnodeObj *pDnode = NULL;
24,685✔
318
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
24,685✔
319
    if (pIter == NULL) break;
24,685✔
320

321
    bool online = mndIsDnodeOnline(pDnode, curMs);
16,366✔
322
    if (!online) {
16,366✔
323
      mInfo("dnode:%d, in offline state", pDnode->id);
452!
324
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
452✔
325
    }
326

327
    sdbRelease(pSdb, pDnode);
16,366✔
328
  }
329

330
  mndReleaseRpc(pMnode);
8,319✔
331
}
332

333
static bool mnodeIsNotLeader(SMnode *pMnode) {
23,075✔
334
  terrno = 0;
23,075✔
335
  (void)taosThreadRwlockRdlock(&pMnode->lock);
23,075✔
336
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
23,075✔
337
  if (terrno != 0) {
23,075!
338
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
339
    return true;
×
340
  }
341

342
  if (state.state != TAOS_SYNC_STATE_LEADER) {
23,075✔
343
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,307✔
344
    terrno = TSDB_CODE_SYN_NOT_LEADER;
1,307✔
345
    return true;
1,307✔
346
  }
347
  if (!state.restored || !pMnode->restored) {
21,768!
348
    (void)taosThreadRwlockUnlock(&pMnode->lock);
3✔
349
    terrno = TSDB_CODE_SYN_RESTORING;
3✔
350
    return true;
3✔
351
  }
352
  (void)taosThreadRwlockUnlock(&pMnode->lock);
21,765✔
353
  return false;
21,765✔
354
}
355

356
static int32_t minCronTime() {
46,865✔
357
  int32_t min = INT32_MAX;
46,865✔
358
  min = TMIN(min, tsTtlPushIntervalSec);
46,865✔
359
  min = TMIN(min, tsTrimVDbIntervalSec);
46,865✔
360
  min = TMIN(min, tsS3MigrateIntervalSec);
46,865✔
361
  min = TMIN(min, tsTransPullupInterval);
46,865✔
362
  min = TMIN(min, tsCompactPullupInterval);
46,865✔
363
  min = TMIN(min, tsMqRebalanceInterval);
46,865✔
364
  min = TMIN(min, tsStreamCheckpointInterval);
46,865✔
365
  min = TMIN(min, tsStreamNodeCheckInterval);
46,865✔
366
  min = TMIN(min, tsArbHeartBeatIntervalSec);
46,865✔
367
  min = TMIN(min, tsArbCheckSyncIntervalSec);
46,865✔
368

369
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
46,865✔
370
  min = TMIN(min, telemInt);
46,865✔
371
  min = TMIN(min, tsGrantHBInterval);
46,865✔
372
  min = TMIN(min, tsUptimeInterval);
46,865✔
373

374
  return min <= 1 ? 2 : min;
46,865✔
375
}
376
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
45,555✔
377
  int32_t code = 0;
45,555✔
378
  if (sec % tsTtlPushIntervalSec == 0) {
45,555✔
379
    mndPullupTtl(pMnode);
4,233✔
380
  }
381

382
  if (sec % tsTrimVDbIntervalSec == 0) {
45,555!
UNCOV
383
    mndPullupTrimDb(pMnode);
×
384
  }
385

386
  if (tsS3MigrateEnabled && sec % tsS3MigrateIntervalSec == 0) {
45,555!
387
    mndPullupS3MigrateDb(pMnode);
×
388
  }
389

390
  if (sec % tsTransPullupInterval == 0) {
45,555✔
391
    mndPullupTrans(pMnode);
22,558✔
392
  }
393

394
  if (sec % tsCompactPullupInterval == 0) {
45,555✔
395
    mndPullupCompacts(pMnode);
3,940✔
396
  }
397

398
  if (sec % tsMqRebalanceInterval == 0) {
45,555✔
399
    mndCalMqRebalance(pMnode);
21,764✔
400
  }
401

402
  if (sec % 30 == 0) {  // send the checkpoint info every 30 sec
45,555✔
403
    mndStreamCheckpointTimer(pMnode);
1,096✔
404
  }
405

406
  if (sec % tsStreamNodeCheckInterval == 0) {
45,555✔
407
    mndStreamCheckNode(pMnode);
1,799✔
408
  }
409

410
  if (sec % 5 == 0) {
45,555✔
411
    mndStreamConsensusChkpt(pMnode);
8,587✔
412
  }
413

414
  if (sec % tsTelemInterval == (TMIN(86400, (tsTelemInterval - 1)))) {
45,555✔
415
    mndPullupTelem(pMnode);
2✔
416
  }
417

418
  if (sec % tsGrantHBInterval == 0) {
45,555✔
419
    mndPullupGrant(pMnode);
4,054✔
420
  }
421

422
  if (sec % tsUptimeInterval == 0) {
45,555✔
423
    mndIncreaseUpTime(pMnode);
39✔
424
  }
425

426
  if (sec % (tsArbHeartBeatIntervalSec) == 0) {
45,555✔
427
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
8,587!
428
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
429
    }
430
  }
431

432
  if (sec % (tsArbCheckSyncIntervalSec) == 0) {
45,555✔
433
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
3,940!
434
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
435
    }
436
  }
437
}
45,555✔
438
void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) {
46,865✔
439
  if (sec % (tsStatusInterval * 5) == 0) {
46,865✔
440
    mndCheckDnodeOffline(pMnode);
8,801✔
441
  }
442
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
46,865✔
443
    mndSyncCheckTimeout(pMnode);
1,136✔
444
  }
445
}
46,865✔
446

447
static void *mndThreadFp(void *param) {
1,444✔
448
  SMnode *pMnode = param;
1,444✔
449
  int64_t lastTime = 0;
1,444✔
450
  setThreadName("mnode-timer");
1,444✔
451

452
  while (1) {
475,614✔
453
    lastTime++;
477,058✔
454
    taosMsleep(100);
477,058✔
455
    if (mndGetStop(pMnode)) break;
477,058✔
456
    if (lastTime % 10 != 0) continue;
475,614✔
457

458
    int64_t sec = lastTime / 10;
46,865✔
459
    mndDoTimerCheckTask(pMnode, sec);
46,865✔
460

461
    int64_t minCron = minCronTime();
46,865✔
462
    if (sec % minCron == 0 && mnodeIsNotLeader(pMnode)) {
46,865✔
463
      // not leader, do nothing
464
      mTrace("timer not process since mnode is not leader, reason: %s", tstrerror(terrno));
1,310!
465
      terrno = 0;
1,310✔
466
      continue;
1,310✔
467
    }
468
    mndDoTimerPullupTask(pMnode, sec);
45,555✔
469
  }
470

471
  return NULL;
1,444✔
472
}
473

474
static int32_t mndInitTimer(SMnode *pMnode) {
1,444✔
475
  int32_t      code = 0;
1,444✔
476
  TdThreadAttr thAttr;
477
  (void)taosThreadAttrInit(&thAttr);
1,444✔
478
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
1,444✔
479
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadFp, pMnode)) != 0) {
1,444!
480
    mError("failed to create timer thread since %s", tstrerror(code));
×
481
    TAOS_RETURN(code);
×
482
  }
483

484
  (void)taosThreadAttrDestroy(&thAttr);
1,444✔
485
  tmsgReportStartup("mnode-timer", "initialized");
1,444✔
486
  TAOS_RETURN(code);
1,444✔
487
}
488

489
static void mndCleanupTimer(SMnode *pMnode) {
1,515✔
490
  if (taosCheckPthreadValid(pMnode->thread)) {
1,515✔
491
    (void)taosThreadJoin(pMnode->thread, NULL);
1,444✔
492
    taosThreadClear(&pMnode->thread);
1,444✔
493
  }
494
}
1,515✔
495

496
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
1,516✔
497
  int32_t code = 0;
1,516✔
498
  pMnode->path = taosStrdup(path);
1,516!
499
  if (pMnode->path == NULL) {
1,516!
500
    code = terrno;
×
501
    TAOS_RETURN(code);
×
502
  }
503

504
  if (taosMkDir(pMnode->path) != 0) {
1,516!
505
    code = terrno;
×
506
    TAOS_RETURN(code);
×
507
  }
508

509
  TAOS_RETURN(code);
1,516✔
510
}
511

512
static int32_t mndInitWal(SMnode *pMnode) {
1,516✔
513
  int32_t code = 0;
1,516✔
514
  char    path[PATH_MAX + 20] = {0};
1,516✔
515
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
1,516✔
516
  SWalCfg cfg = {.vgId = 1,
1,516✔
517
                 .fsyncPeriod = 0,
518
                 .rollPeriod = -1,
519
                 .segSize = -1,
520
                 .committed = -1,
521
                 .retentionPeriod = 0,
522
                 .retentionSize = 0,
523
                 .level = TAOS_WAL_FSYNC,
524
                 .encryptAlgorithm = 0,
525
                 .encryptKey = {0}};
526

527
#if defined(TD_ENTERPRISE)
528
  if (tsiEncryptAlgorithm == DND_CA_SM4 && (tsiEncryptScope & DND_CS_MNODE_WAL) == DND_CS_MNODE_WAL) {
1,516!
529
    cfg.encryptAlgorithm = (tsiEncryptScope & DND_CS_MNODE_WAL) ? tsiEncryptAlgorithm : 0;
×
530
    if (tsEncryptKey[0] == '\0') {
×
531
      code = TSDB_CODE_DNODE_INVALID_ENCRYPTKEY;
×
532
      TAOS_RETURN(code);
×
533
    } else {
534
      tstrncpy(cfg.encryptKey, tsEncryptKey, ENCRYPT_KEY_LEN + 1);
×
535
    }
536
  }
537
#endif
538

539
  pMnode->pWal = walOpen(path, &cfg);
1,516✔
540
  if (pMnode->pWal == NULL) {
1,516!
541
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
542
    if (terrno != 0) code = terrno;
×
543
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
544
    TAOS_RETURN(code);
×
545
  }
546

547
  TAOS_RETURN(code);
1,516✔
548
}
549

550
static void mndCloseWal(SMnode *pMnode) {
1,515✔
551
  if (pMnode->pWal != NULL) {
1,515!
552
    walClose(pMnode->pWal);
1,515✔
553
    pMnode->pWal = NULL;
1,515✔
554
  }
555
}
1,515✔
556

557
static int32_t mndInitSdb(SMnode *pMnode) {
1,516✔
558
  int32_t code = 0;
1,516✔
559
  SSdbOpt opt = {0};
1,516✔
560
  opt.path = pMnode->path;
1,516✔
561
  opt.pMnode = pMnode;
1,516✔
562
  opt.pWal = pMnode->pWal;
1,516✔
563

564
  pMnode->pSdb = sdbInit(&opt);
1,516✔
565
  if (pMnode->pSdb == NULL) {
1,516!
566
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
567
    if (terrno != 0) code = terrno;
×
568
    TAOS_RETURN(code);
×
569
  }
570

571
  TAOS_RETURN(code);
1,516✔
572
}
573

574
static int32_t mndOpenSdb(SMnode *pMnode) {
1,516✔
575
  int32_t code = 0;
1,516✔
576
  if (!pMnode->deploy) {
1,516✔
577
    code = sdbReadFile(pMnode->pSdb);
392✔
578
  }
579

580
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
1,516!
581

582
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
1,516✔
583
  return code;
1,516✔
584
}
585

586
static void mndCleanupSdb(SMnode *pMnode) {
1,515✔
587
  if (pMnode->pSdb) {
1,515!
588
    sdbCleanup(pMnode->pSdb);
1,515✔
589
    pMnode->pSdb = NULL;
1,515✔
590
  }
591
}
1,515✔
592

593
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
54,576✔
594
  SMnodeStep step = {0};
54,576✔
595
  step.name = name;
54,576✔
596
  step.initFp = initFp;
54,576✔
597
  step.cleanupFp = cleanupFp;
54,576✔
598
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
109,152!
599
    TAOS_RETURN(terrno);
×
600
  }
601

602
  TAOS_RETURN(0);
54,576✔
603
}
604

605
static int32_t mndInitSteps(SMnode *pMnode) {
1,516✔
606
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
1,516!
607
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
1,516!
608
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
1,516!
609
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
1,516!
610
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
1,516!
611
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
1,516!
612
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
1,516!
613
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
1,516!
614
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
1,516!
615
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
1,516!
616
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
1,516!
617
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
1,516!
618
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
1,516!
619
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
1,516!
620
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
1,516!
621
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
1,516!
622
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
1,516!
623
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
1,516!
624
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
1,516!
625
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
1,516!
626
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
1,516!
627
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
1,516!
628
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
1,516!
629
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
1,516!
630
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
1,516!
631
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
1,516!
632
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
1,516!
633
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
1,516!
634
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
1,516!
635
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
1,516!
636
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
1,516!
637
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
1,516!
638
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
1,516!
639
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
1,516!
640
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
1,516!
641
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
1,516!
642
  return 0;
1,516✔
643
}
644

645
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
1,515✔
646
  if (pMnode->pSteps == NULL) return;
1,515!
647

648
  if (pos == -1) {
1,515!
649
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
1,515✔
650
  }
651

652
  for (int32_t s = pos; s >= 0; s--) {
56,055✔
653
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
54,540✔
654
    mInfo("%s will cleanup", pStep->name);
54,540!
655
    if (pStep->cleanupFp != NULL) {
54,540✔
656
      (*pStep->cleanupFp)(pMnode);
51,510✔
657
    }
658
  }
659

660
  taosArrayClear(pMnode->pSteps);
1,515✔
661
  taosArrayDestroy(pMnode->pSteps);
1,515✔
662
  pMnode->pSteps = NULL;
1,515✔
663
}
664

665
static int32_t mndExecSteps(SMnode *pMnode) {
1,516✔
666
  int32_t code = 0;
1,516✔
667
  int32_t size = taosArrayGetSize(pMnode->pSteps);
1,516✔
668
  for (int32_t pos = 0; pos < size; pos++) {
56,092✔
669
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
54,576✔
670
    if (pStep->initFp == NULL) continue;
54,576!
671

672
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
54,576!
673
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
674
      mndCleanupSteps(pMnode, pos);
×
675
      TAOS_RETURN(code);
×
676
    } else {
677
      mInfo("%s is initialized", pStep->name);
54,576!
678
      tmsgReportStartup(pStep->name, "initialized");
54,576✔
679
    }
680
  }
681

682
  pMnode->clusterId = mndGetClusterId(pMnode);
1,516✔
683
  TAOS_RETURN(0);
1,516✔
684
}
685

686
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
1,516✔
687
  pMnode->msgCb = pOption->msgCb;
1,516✔
688
  pMnode->selfDnodeId = pOption->dnodeId;
1,516✔
689
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
1,516✔
690
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
1,516✔
691
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
1,516✔
692
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
1,516✔
693
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
1,516✔
694
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
1,516✔
695
}
1,516✔
696

697
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
1,516✔
698
  terrno = 0;
1,516✔
699
  mInfo("start to open mnode in %s", path);
1,516!
700

701
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
1,516!
702
  if (pMnode == NULL) {
1,516!
703
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
704
    mError("failed to open mnode since %s", terrstr());
×
705
    return NULL;
×
706
  }
707
  (void)memset(pMnode, 0, sizeof(SMnode));
1,516✔
708

709
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
1,516✔
710
  if (code != 0) {
1,516!
711
    taosMemoryFree(pMnode);
×
712
    mError("failed to open mnode lock since %s", tstrerror(code));
×
713
    return NULL;
×
714
  }
715

716
  char timestr[24] = "1970-01-01 00:00:00.00";
1,516✔
717
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, NULL);
1,516✔
718
  if (code < 0) {
1,516!
719
    mError("failed to parse time since %s", tstrerror(code));
×
720
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
721
    taosMemoryFree(pMnode);
×
722
    return NULL;
×
723
  }
724
  mndSetOptions(pMnode, pOption);
1,516✔
725

726
  pMnode->deploy = pOption->deploy;
1,516✔
727
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
1,516✔
728
  if (pMnode->pSteps == NULL) {
1,516!
729
    taosMemoryFree(pMnode);
×
730
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
731
    mError("failed to open mnode since %s", terrstr());
×
732
    return NULL;
×
733
  }
734

735
  code = mndCreateDir(pMnode, path);
1,516✔
736
  if (code != 0) {
1,516!
737
    code = terrno;
×
738
    mError("failed to open mnode since %s", tstrerror(code));
×
739
    mndClose(pMnode);
×
740
    terrno = code;
×
741
    return NULL;
×
742
  }
743

744
  code = mndInitSteps(pMnode);
1,516✔
745
  if (code != 0) {
1,516!
746
    code = terrno;
×
747
    mError("failed to open mnode since %s", tstrerror(code));
×
748
    mndClose(pMnode);
×
749
    terrno = code;
×
750
    return NULL;
×
751
  }
752

753
  code = mndExecSteps(pMnode);
1,516✔
754
  if (code != 0) {
1,516!
755
    code = terrno;
×
756
    mError("failed to open mnode since %s", tstrerror(code));
×
757
    mndClose(pMnode);
×
758
    terrno = code;
×
759
    return NULL;
×
760
  }
761

762
  mInfo("mnode open successfully");
1,516!
763
  return pMnode;
1,516✔
764
}
765

766
void mndPreClose(SMnode *pMnode) {
1,515✔
767
  if (pMnode != NULL) {
1,515!
768
    int32_t code = 0;
1,515✔
769
    // TODO check return value
770
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
1,515✔
771
    if (code < 0) {
1,515!
UNCOV
772
      mError("failed to transfer leader since %s", tstrerror(code));
×
773
    }
774
    syncPreStop(pMnode->syncMgmt.sync);
1,515✔
775
    code = sdbWriteFile(pMnode->pSdb, 0);
1,515✔
776
    if (code < 0) {
1,515!
777
      mError("failed to write sdb since %s", tstrerror(code));
×
778
    }
779
  }
780
}
1,515✔
781

782
void mndClose(SMnode *pMnode) {
1,515✔
783
  if (pMnode != NULL) {
1,515!
784
    mInfo("start to close mnode");
1,515!
785
    mndCleanupSteps(pMnode, -1);
1,515✔
786
    taosMemoryFreeClear(pMnode->path);
1,515!
787
    taosMemoryFreeClear(pMnode);
1,515!
788
    mInfo("mnode is closed");
1,515!
789
  }
790
}
1,515✔
791

792
int32_t mndStart(SMnode *pMnode) {
1,515✔
793
  mndSyncStart(pMnode);
1,515✔
794
  if (pMnode->deploy) {
1,515✔
795
    if (sdbDeploy(pMnode->pSdb) != 0) {
1,124!
796
      mError("failed to deploy sdb while start mnode");
×
797
      return -1;
×
798
    }
799
    mndSetRestored(pMnode, true);
1,124✔
800
  } else {
801
    if (sdbPrepare(pMnode->pSdb) != 0) {
391✔
802
      mError("failed to prepare sdb while start mnode");
71!
803
      return -1;
71✔
804
    }
805
    mndSetRestored(pMnode, true);
320✔
806
  }
807

808
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
1,444✔
809

810
  return mndInitTimer(pMnode);
1,444✔
811
}
812

813
int32_t mndIsCatchUp(SMnode *pMnode) {
646✔
814
  int64_t rid = pMnode->syncMgmt.sync;
646✔
815
  return syncIsCatchUp(rid);
646✔
816
}
817

818
ESyncRole mndGetRole(SMnode *pMnode) {
646✔
819
  int64_t rid = pMnode->syncMgmt.sync;
646✔
820
  return syncGetRole(rid);
646✔
821
}
822

823
int64_t mndGetTerm(SMnode *pMnode) {
3,949✔
824
  int64_t rid = pMnode->syncMgmt.sync;
3,949✔
825
  return syncGetTerm(rid);
3,949✔
826
}
827

828
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
12,268✔
829

830
void mndStop(SMnode *pMnode) {
1,515✔
831
  mndSetStop(pMnode);
1,515✔
832
  mndSyncStop(pMnode);
1,515✔
833
  mndCleanupTimer(pMnode);
1,515✔
834
}
1,515✔
835

836
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
129,265✔
837
  SMnode    *pMnode = pMsg->info.node;
129,265✔
838
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
129,265✔
839

840
  const STraceId *trace = &pMsg->info.traceId;
129,265✔
841
  mGTrace("vgId:1, sync msg:%p will be processed, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
129,265!
842

843
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
129,265✔
844
  if (code != 0) {
129,265✔
845
    mGError("vgId:1, failed to process sync msg:%p type:%s, reason: %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
1,814!
846
            tstrerror(code), code);
847
  }
848

849
  return code;
129,265✔
850
}
851

852
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
1,663,786✔
853
  int32_t code = 0;
1,663,786✔
854
  if (!IsReq(pMsg)) TAOS_RETURN(code);
1,663,786✔
855
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
1,565,221✔
856
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
1,340,245!
857
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
1,206,035✔
858
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
755,889!
859
    TAOS_RETURN(code);
809,330✔
860
  }
861

862
  SMnode *pMnode = pMsg->info.node;
755,891✔
863
  (void)taosThreadRwlockRdlock(&pMnode->lock);
755,891✔
864
  if (pMnode->stopped) {
756,009✔
865
    (void)taosThreadRwlockUnlock(&pMnode->lock);
5✔
866
    code = TSDB_CODE_APP_IS_STOPPING;
5✔
867
    TAOS_RETURN(code);
5✔
868
  }
869

870
  terrno = 0;
756,004✔
871
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
755,904✔
872
  if (terrno != 0) {
756,033!
873
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
874
    code = terrno;
×
875
    TAOS_RETURN(code);
×
876
  }
877

878
  if (state.state != TAOS_SYNC_STATE_LEADER) {
756,029✔
879
    (void)taosThreadRwlockUnlock(&pMnode->lock);
14,570✔
880
    code = TSDB_CODE_SYN_NOT_LEADER;
14,570✔
881
    goto _OVER;
14,570✔
882
  }
883

884
  if (!state.restored || !pMnode->restored) {
741,459✔
885
    (void)taosThreadRwlockUnlock(&pMnode->lock);
338✔
886
    code = TSDB_CODE_SYN_RESTORING;
342✔
887
    goto _OVER;
342✔
888
  }
889

890
#if 1
891
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
741,121✔
892
#else
893
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
894
  mTrace("mnode rpc is acquired, ref:%d", ref);
895
#endif
896

897
  (void)taosThreadRwlockUnlock(&pMnode->lock);
741,116✔
898
  TAOS_RETURN(code);
741,122✔
899

900
_OVER:
14,912✔
901
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
14,912!
902
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
14,912!
903
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
14,875!
904
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
14,875!
905
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
14,875!
906
      pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
13,526!
907
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER) {
13,258!
908
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
1,654!
909
           pMnode->stopped, state.restored, syncStr(state.state));
910
    TAOS_RETURN(code);
1,654✔
911
  }
912

913
  const STraceId *trace = &pMsg->info.traceId;
13,258✔
914
  SEpSet          epSet = {0};
13,258✔
915
  mndGetMnodeEpSet(pMnode, &epSet);
13,258✔
916

917
  mGDebug(
13,258!
918
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
919
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
920
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
921
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
922

923
  if (epSet.numOfEps <= 0) return -1;
13,258!
924

925
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
52,576✔
926
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
39,318!
927
  }
928

929
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
13,258✔
930
  pMsg->info.rsp = rpcMallocCont(contLen);
13,258✔
931
  if (pMsg->info.rsp != NULL) {
13,258!
932
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
13,258!
933
      mError("failed to serialize ep set");
×
934
    }
935
    pMsg->info.hasEpSet = 1;
13,258✔
936
    pMsg->info.rspLen = contLen;
13,258✔
937
  }
938

939
  TAOS_RETURN(code);
13,258✔
940
}
941

942
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
1,663,920✔
943
  SMnode         *pMnode = pMsg->info.node;
1,663,920✔
944
  const STraceId *trace = &pMsg->info.traceId;
1,663,920✔
945
  int32_t         code = TSDB_CODE_SUCCESS;
1,663,920✔
946

947
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
1,663,920✔
948
  MndMsgFpExt fpExt = NULL;
1,663,920✔
949
  if (fp == NULL) {
1,663,920✔
950
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
809,550✔
951
    if (fpExt == NULL) {
809,550!
952
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
953
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
954
      TAOS_RETURN(code);
×
955
    }
956
  }
957

958
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
1,663,920✔
959

960
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
1,648,644!
961
  if (fp)
1,648,644✔
962
    code = (*fp)(pMsg);
839,252✔
963
  else
964
    code = (*fpExt)(pMsg, pQueueInfo);
809,392✔
965
  mndReleaseRpc(pMnode);
1,648,374✔
966

967
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
1,649,233✔
968
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
831,608!
969
  } else if (code == 0) {
817,625✔
970
    mGTrace("msg:%p, successfully processed", pMsg);
808,934!
971
  } else {
972
    // TODO removve this wrong set code
973
    if (code == -1) {
8,691✔
974
      code = terrno;
4✔
975
    }
976
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
8,691!
977
            TMSG_INFO(pMsg->msgType));
978
  }
979

980
  TAOS_RETURN(code);
1,649,233✔
981
}
982

983
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
260,752✔
984
  tmsg_t type = TMSG_INDEX(msgType);
260,752✔
985
  if (type < TDMT_MAX) {
260,752!
986
    pMnode->msgFp[type] = fp;
260,752✔
987
  }
988
}
260,752✔
989

990
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
12,128✔
991
  tmsg_t type = TMSG_INDEX(msgType);
12,128✔
992
  if (type < TDMT_MAX) {
12,128!
993
    pMnode->msgFpExt[type] = fp;
12,128✔
994
  }
995
}
12,128✔
996

997
// Note: uid 0 is reserved
998
int64_t mndGenerateUid(const char *name, int32_t len) {
33,176✔
999
  int32_t hashval = MurmurHash3_32(name, len);
33,176✔
1000
  do {
×
1001
    int64_t us = taosGetTimestampUs();
33,177✔
1002
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
33,177✔
1003
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
33,177✔
1004
    if (uuid) {
33,177!
1005
      return llabs(uuid);
33,177✔
1006
    }
1007
  } while (true);
1008
}
1009

1010
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
12✔
1011
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
1012
  int32_t code = mndAcquireRpc(pMnode);
12✔
1013
  if (code < 0) {
12!
1014
    TAOS_RETURN(code);
×
1015
  } else if (code == 1) {
12!
1016
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
1017
  }
1018

1019
  SSdb   *pSdb = pMnode->pSdb;
12✔
1020
  int64_t ms = taosGetTimestampMs();
12✔
1021

1022
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
12✔
1023
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
12✔
1024
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
12✔
1025
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
12✔
1026
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
12!
1027
      pStbInfo->stbs == NULL) {
12!
1028
    mndReleaseRpc(pMnode);
×
1029
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1030
    if (terrno != 0) code = terrno;
×
1031
    TAOS_RETURN(code);
×
1032
  }
1033

1034
  // cluster info
1035
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
12✔
1036
  pClusterInfo->monitor_interval = tsMonitorInterval;
12✔
1037
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
12✔
1038
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
12✔
1039
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
12✔
1040
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
12✔
1041
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
12✔
1042

1043
  void *pIter = NULL;
12✔
1044
  while (1) {
12✔
1045
    SDnodeObj *pObj = NULL;
24✔
1046
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
24✔
1047
    if (pIter == NULL) break;
24✔
1048

1049
    SMonDnodeDesc desc = {0};
12✔
1050
    desc.dnode_id = pObj->id;
12✔
1051
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
12✔
1052
    if (mndIsDnodeOnline(pObj, ms)) {
12✔
1053
      tstrncpy(desc.status, "ready", sizeof(desc.status));
11✔
1054
    } else {
1055
      tstrncpy(desc.status, "offline", sizeof(desc.status));
1✔
1056
    }
1057
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
24!
1058
      mError("failed put dnode into array, but continue at this monitor report")
×
1059
    }
1060
    sdbRelease(pSdb, pObj);
12✔
1061
  }
1062

1063
  pIter = NULL;
12✔
1064
  while (1) {
12✔
1065
    SMnodeObj *pObj = NULL;
24✔
1066
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
24✔
1067
    if (pIter == NULL) break;
24✔
1068

1069
    SMonMnodeDesc desc = {0};
12✔
1070
    desc.mnode_id = pObj->id;
12✔
1071
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
12✔
1072

1073
    if (pObj->id == pMnode->selfDnodeId) {
12!
1074
      pClusterInfo->first_ep_dnode_id = pObj->id;
12✔
1075
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
12✔
1076
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1077
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
12✔
1078
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1079
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
12✔
1080
      desc.syncState = TAOS_SYNC_STATE_LEADER;
12✔
1081
    } else {
1082
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1083
      desc.syncState = pObj->syncState;
×
1084
    }
1085
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
24!
1086
      mError("failed to put mnode into array, but continue at this monitor report");
×
1087
    }
1088
    sdbRelease(pSdb, pObj);
12✔
1089
  }
1090

1091
  // vgroup info
1092
  pIter = NULL;
12✔
1093
  while (1) {
26✔
1094
    SVgObj *pVgroup = NULL;
38✔
1095
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
38✔
1096
    if (pIter == NULL) break;
38✔
1097

1098
    pClusterInfo->vgroups_total++;
26✔
1099
    pClusterInfo->tbs_total += pVgroup->numOfTables;
26✔
1100

1101
    SMonVgroupDesc desc = {0};
26✔
1102
    desc.vgroup_id = pVgroup->vgId;
26✔
1103

1104
    SName name = {0};
26✔
1105
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
26✔
1106
    if (code < 0) {
26!
1107
      mError("failed to get db name since %s", tstrerror(code));
×
1108
      sdbRelease(pSdb, pVgroup);
×
1109
      TAOS_RETURN(code);
×
1110
    }
1111
    (void)tNameGetDbName(&name, desc.database_name);
26✔
1112

1113
    desc.tables_num = pVgroup->numOfTables;
26✔
1114
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
26✔
1115
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
26✔
1116
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
52✔
1117
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
26✔
1118
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
26✔
1119
      pVnDesc->dnode_id = pVgid->dnodeId;
26✔
1120
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
26✔
1121
      pVnDesc->syncState = pVgid->syncState;
26✔
1122
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
26!
1123
        tstrncpy(desc.status, "ready", sizeof(desc.status));
26✔
1124
        pClusterInfo->vgroups_alive++;
26✔
1125
      }
1126
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
26!
1127
        pClusterInfo->vnodes_alive++;
26✔
1128
      }
1129
      pClusterInfo->vnodes_total++;
26✔
1130
    }
1131

1132
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
52!
1133
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1134
    }
1135
    sdbRelease(pSdb, pVgroup);
26✔
1136
  }
1137

1138
  // stb info
1139
  pIter = NULL;
12✔
1140
  while (1) {
11✔
1141
    SStbObj *pStb = NULL;
23✔
1142
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
23✔
1143
    if (pIter == NULL) break;
23✔
1144

1145
    SMonStbDesc desc = {0};
11✔
1146

1147
    SName name1 = {0};
11✔
1148
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
11✔
1149
    if (code < 0) {
11!
1150
      mError("failed to get db name since %s", tstrerror(code));
×
1151
      sdbRelease(pSdb, pStb);
×
1152
      TAOS_RETURN(code);
×
1153
    }
1154
    (void)tNameGetDbName(&name1, desc.database_name);
11✔
1155

1156
    SName name2 = {0};
11✔
1157
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
11✔
1158
    if (code < 0) {
11!
1159
      mError("failed to get table name since %s", tstrerror(code));
×
1160
      sdbRelease(pSdb, pStb);
×
1161
      TAOS_RETURN(code);
×
1162
    }
1163
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
11✔
1164

1165
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
22!
1166
      mError("failed to put stb into array, but continue at this monitor report");
×
1167
    }
1168
    sdbRelease(pSdb, pStb);
11✔
1169
  }
1170

1171
  // grant info
1172
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
12✔
1173
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
12✔
1174
  if (pMnode->grant.expireTimeMS == 0) {
12!
1175
    pGrantInfo->expire_time = 0;
×
1176
    pGrantInfo->timeseries_total = 0;
×
1177
  }
1178

1179
  mndReleaseRpc(pMnode);
12✔
1180
  TAOS_RETURN(code);
12✔
1181
}
1182

1183
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
46,687✔
1184
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
46,687✔
1185
  pLoad->syncState = state.state;
46,687✔
1186
  pLoad->syncRestore = state.restored;
46,687✔
1187
  pLoad->syncTerm = state.term;
46,687✔
1188
  pLoad->roleTimeMs = state.roleTimeMs;
46,687✔
1189
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
46,687✔
1190
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1191
  return 0;
46,687✔
1192
}
1193

1194
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
3,940✔
1195
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
3,940✔
1196
  return state.roleTimeMs;
3,940✔
1197
}
1198

1199
void mndSetRestored(SMnode *pMnode, bool restored) {
1,709✔
1200
  if (restored) {
1,709!
1201
    (void)taosThreadRwlockWrlock(&pMnode->lock);
1,709✔
1202
    pMnode->restored = true;
1,709✔
1203
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,709✔
1204
    mInfo("mnode set restored:%d", restored);
1,709!
1205
  } else {
1206
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1207
    pMnode->restored = false;
×
1208
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1209
    mInfo("mnode set restored:%d", restored);
×
1210
    while (1) {
1211
      if (pMnode->rpcRef <= 0) break;
×
1212
      taosMsleep(3);
×
1213
    }
1214
  }
1215
}
1,709✔
1216

1217
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1218

1219
void mndSetStop(SMnode *pMnode) {
1,515✔
1220
  (void)taosThreadRwlockWrlock(&pMnode->lock);
1,515✔
1221
  pMnode->stopped = true;
1,515✔
1222
  (void)taosThreadRwlockUnlock(&pMnode->lock);
1,515✔
1223
  mInfo("mnode set stopped");
1,515!
1224
}
1,515✔
1225

1226
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
477,058✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc