• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3530

16 Nov 2024 07:44AM UTC coverage: 60.219% (-0.7%) from 60.888%
#3530

push

travis-ci

web-flow
Update 03-ad.md

118417 of 252124 branches covered (46.97%)

Branch coverage included in aggregate %.

198982 of 274951 relevant lines covered (72.37%)

6072359.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.58
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndArbGroup.h"
19
#include "mndAnode.h"
20
#include "mndCluster.h"
21
#include "mndCompact.h"
22
#include "mndCompactDetail.h"
23
#include "mndConsumer.h"
24
#include "mndDb.h"
25
#include "mndDnode.h"
26
#include "mndFunc.h"
27
#include "mndGrant.h"
28
#include "mndIndex.h"
29
#include "mndInfoSchema.h"
30
#include "mndMnode.h"
31
#include "mndPerfSchema.h"
32
#include "mndPrivilege.h"
33
#include "mndProfile.h"
34
#include "mndQnode.h"
35
#include "mndQuery.h"
36
#include "mndShow.h"
37
#include "mndSma.h"
38
#include "mndSnode.h"
39
#include "mndStb.h"
40
#include "mndStream.h"
41
#include "mndSubscribe.h"
42
#include "mndSync.h"
43
#include "mndTelem.h"
44
#include "mndTopic.h"
45
#include "mndTrans.h"
46
#include "mndUser.h"
47
#include "mndVgroup.h"
48
#include "mndView.h"
49

50
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
8,524✔
51
  int32_t code = 0;
8,524✔
52
  (void)taosThreadRwlockRdlock(&pMnode->lock);
8,524✔
53
  if (pMnode->stopped) {
8,524!
54
    code = TSDB_CODE_APP_IS_STOPPING;
×
55
  } else if (!mndIsLeader(pMnode)) {
8,524✔
56
    code = -1;
548✔
57
  } else {
58
#if 1
59
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
7,976✔
60
#else
61
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
62
    mTrace("mnode rpc is acquired, ref:%d", ref);
63
#endif
64
  }
65
  (void)taosThreadRwlockUnlock(&pMnode->lock);
8,524✔
66
  TAOS_RETURN(code);
8,524✔
67
}
68

69
static inline void mndReleaseRpc(SMnode *pMnode) {
1,503,310✔
70
  (void)taosThreadRwlockRdlock(&pMnode->lock);
1,503,310✔
71
#if 1
72
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
1,503,323✔
73
#else
74
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
75
  mTrace("mnode rpc is released, ref:%d", ref);
76
#endif
77
  (void)taosThreadRwlockUnlock(&pMnode->lock);
1,503,322✔
78
}
1,503,320✔
79

80
static void *mndBuildTimerMsg(int32_t *pContLen) {
77,818✔
81
  terrno = 0;
77,818✔
82
  SMTimerReq timerReq = {0};
77,818✔
83

84
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
77,818✔
85
  if (contLen <= 0) return NULL;
77,818!
86
  void *pReq = rpcMallocCont(contLen);
77,818✔
87
  if (pReq == NULL) return NULL;
77,818!
88

89
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
77,818!
90
    mError("failed to serialize timer msg since %s", terrstr());
×
91
  }
92
  *pContLen = contLen;
77,818✔
93
  return pReq;
77,818✔
94
}
95

96
static void mndPullupTrans(SMnode *pMnode) {
21,874✔
97
  mTrace("pullup trans msg");
21,874✔
98
  int32_t contLen = 0;
21,874✔
99
  void   *pReq = mndBuildTimerMsg(&contLen);
21,874✔
100
  if (pReq != NULL) {
21,874!
101
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
21,874✔
102
    // TODO check return value
103
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
21,874✔
104
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
1!
105
    }
106
  }
107
}
21,874✔
108

109
static void mndPullupCompacts(SMnode *pMnode) {
3,691✔
110
  mTrace("pullup compact timer msg");
3,691✔
111
  int32_t contLen = 0;
3,691✔
112
  void   *pReq = mndBuildTimerMsg(&contLen);
3,691✔
113
  if (pReq != NULL) {
3,691!
114
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
3,691✔
115
    // TODO check return value
116
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
3,691!
117
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
118
    }
119
  }
120
}
3,691✔
121

122
static void mndPullupTtl(SMnode *pMnode) {
3,997✔
123
  mTrace("pullup ttl");
3,997✔
124
  int32_t contLen = 0;
3,997✔
125
  void   *pReq = mndBuildTimerMsg(&contLen);
3,997✔
126
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
3,997✔
127
  // TODO check return value
128
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
3,997!
129
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
130
  }
131
}
3,997✔
132

133
static void mndPullupTrimDb(SMnode *pMnode) {
×
134
  mTrace("pullup s3migrate");
×
135
  int32_t contLen = 0;
×
136
  void   *pReq = mndBuildTimerMsg(&contLen);
×
137
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
138
  // TODO check return value
139
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
140
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
141
  }
142
}
×
143

144
static void mndPullupS3MigrateDb(SMnode *pMnode) {
×
145
  mTrace("pullup trim");
×
146
  int32_t contLen = 0;
×
147
  void   *pReq = mndBuildTimerMsg(&contLen);
×
148
  // TODO check return value
149
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_S3MIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
150
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
151
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
152
  }
153
}
×
154

155
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
8,276✔
156
  mTrace("pullup arb hb");
8,276✔
157
  int32_t contLen = 0;
8,276✔
158
  void   *pReq = mndBuildTimerMsg(&contLen);
8,276✔
159
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
8,276✔
160
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
8,276✔
161
}
162

163
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
3,691✔
164
  mTrace("pullup arb sync");
3,691✔
165
  int32_t contLen = 0;
3,691✔
166
  void   *pReq = mndBuildTimerMsg(&contLen);
3,691✔
167
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
3,691✔
168
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
3,691✔
169
}
170

171
static void mndCalMqRebalance(SMnode *pMnode) {
21,171✔
172
  int32_t contLen = 0;
21,171✔
173
  void   *pReq = mndBuildTimerMsg(&contLen);
21,171✔
174
  if (pReq != NULL) {
21,171!
175
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
21,171✔
176
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
21,171✔
177
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
1!
178
    }
179
  }
180
}
21,171✔
181

182
static void mndStreamCheckpointTimer(SMnode *pMnode) {
962✔
183
  SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg));
962✔
184
  if (pMsg != NULL) {
962!
185
    int32_t size = sizeof(SMStreamDoCheckpointMsg);
962✔
186
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size};
962✔
187
    // TODO check return value
188
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
962!
189
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
190
    }
191
  }
192
}
962✔
193

194
static void mndStreamCheckNode(SMnode *pMnode) {
1,614✔
195
  int32_t contLen = 0;
1,614✔
196
  void   *pReq = mndBuildTimerMsg(&contLen);
1,614✔
197
  if (pReq != NULL) {
1,614!
198
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_NODECHECK_TIMER, .pCont = pReq, .contLen = contLen};
1,614✔
199
    // TODO check return value
200
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
1,614!
201
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
202
    }
203
  }
204
}
1,614✔
205

206
static void mndStreamConsensusChkpt(SMnode *pMnode) {
8,276✔
207
  int32_t contLen = 0;
8,276✔
208
  void   *pReq = mndBuildTimerMsg(&contLen);
8,276✔
209
  if (pReq != NULL) {
8,276!
210
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CONSEN_TIMER, .pCont = pReq, .contLen = contLen};
8,276✔
211
    // TODO check return value
212
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
8,276!
213
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
214
    }
215
  }
216
}
8,276✔
217

218
static void mndPullupTelem(SMnode *pMnode) {
215✔
219
  mTrace("pullup telem msg");
215✔
220
  int32_t contLen = 0;
215✔
221
  void   *pReq = mndBuildTimerMsg(&contLen);
215✔
222
  if (pReq != NULL) {
215!
223
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
215✔
224
    // TODO check return value
225
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
215!
226
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
227
    }
228
  }
229
}
215✔
230

231
static void mndPullupGrant(SMnode *pMnode) {
4,996✔
232
  mTrace("pullup grant msg");
4,996✔
233
  int32_t contLen = 0;
4,996✔
234
  void   *pReq = mndBuildTimerMsg(&contLen);
4,996✔
235
  if (pReq != NULL) {
4,996!
236
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
4,996✔
237
                      .pCont = pReq,
238
                      .contLen = contLen,
239
                      .info.notFreeAhandle = 1,
240
                      .info.ahandle = 0};
241
    // TODO check return value
242
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
4,996✔
243
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
1!
244
    }
245
  }
246
}
4,996✔
247

248
static void mndIncreaseUpTime(SMnode *pMnode) {
17✔
249
  mTrace("increate uptime");
17✔
250
  int32_t contLen = 0;
17✔
251
  void   *pReq = mndBuildTimerMsg(&contLen);
17✔
252
  if (pReq != NULL) {
17!
253
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
17✔
254
                      .pCont = pReq,
255
                      .contLen = contLen,
256
                      .info.notFreeAhandle = 1,
257
                      .info.ahandle = (void *)0x9527};
258
    // TODO check return value
259
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
17!
260
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
261
    }
262
  }
263
}
17✔
264

265
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
155✔
266
  SSdb *pSdb = pMnode->pSdb;
155✔
267

268
  void *pIter = NULL;
155✔
269
  while (1) {
421✔
270
    SVgObj *pVgroup = NULL;
576✔
271
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
576✔
272
    if (pIter == NULL) break;
576✔
273

274
    bool stateChanged = false;
421✔
275
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
1,028✔
276
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
722✔
277
      if (pGid->dnodeId == dnodeId) {
722✔
278
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
115✔
279
          mInfo(
65!
280
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:error restored:0 "
281
              "canRead:0",
282
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
283
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
65✔
284
          pGid->syncRestore = 0;
65✔
285
          pGid->syncCanRead = 0;
65✔
286
          pGid->startTimeMs = 0;
65✔
287
          stateChanged = true;
65✔
288
        }
289
        break;
115✔
290
      }
291
    }
292

293
    if (stateChanged) {
421✔
294
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
65✔
295
      if (pDb != NULL && pDb->stateTs != curMs) {
65!
296
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
46!
297
              curMs);
298
        pDb->stateTs = curMs;
46✔
299
      }
300
      mndReleaseDb(pMnode, pDb);
65✔
301
    }
302

303
    sdbRelease(pSdb, pVgroup);
421✔
304
  }
305
}
155✔
306

307
static void mndCheckDnodeOffline(SMnode *pMnode) {
8,512✔
308
  mTrace("check dnode offline");
8,512✔
309
  if (mndAcquireRpc(pMnode) != 0) return;
8,512✔
310

311
  SSdb   *pSdb = pMnode->pSdb;
7,964✔
312
  int64_t curMs = taosGetTimestampMs();
7,964✔
313

314
  void *pIter = NULL;
7,964✔
315
  while (1) {
13,603✔
316
    SDnodeObj *pDnode = NULL;
21,567✔
317
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
21,567✔
318
    if (pIter == NULL) break;
21,567✔
319

320
    bool online = mndIsDnodeOnline(pDnode, curMs);
13,603✔
321
    if (!online) {
13,603✔
322
      mInfo("dnode:%d, in offline state", pDnode->id);
155!
323
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
155✔
324
    }
325

326
    sdbRelease(pSdb, pDnode);
13,603✔
327
  }
328

329
  mndReleaseRpc(pMnode);
7,964✔
330
}
331

332
static bool mnodeIsNotLeader(SMnode *pMnode) {
22,759✔
333
  terrno = 0;
22,759✔
334
  (void)taosThreadRwlockRdlock(&pMnode->lock);
22,759✔
335
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
22,759✔
336
  if (terrno != 0) {
22,759!
337
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
338
    return true;
×
339
  }
340

341
  if (state.state != TAOS_SYNC_STATE_LEADER) {
22,759✔
342
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,585✔
343
    terrno = TSDB_CODE_SYN_NOT_LEADER;
1,585✔
344
    return true;
1,585✔
345
  }
346
  if (!state.restored || !pMnode->restored) {
21,174!
347
    (void)taosThreadRwlockUnlock(&pMnode->lock);
3✔
348
    terrno = TSDB_CODE_SYN_RESTORING;
3✔
349
    return true;
3✔
350
  }
351
  (void)taosThreadRwlockUnlock(&pMnode->lock);
21,171✔
352
  return false;
21,171✔
353
}
354

355
static int32_t minCronTime() {
46,482✔
356
  int32_t min = INT32_MAX;
46,482✔
357
  min = TMIN(min, tsTtlPushIntervalSec);
46,482✔
358
  min = TMIN(min, tsTrimVDbIntervalSec);
46,482✔
359
  min = TMIN(min, tsS3MigrateIntervalSec);
46,482✔
360
  min = TMIN(min, tsTransPullupInterval);
46,482✔
361
  min = TMIN(min, tsCompactPullupInterval);
46,482✔
362
  min = TMIN(min, tsMqRebalanceInterval);
46,482✔
363
  min = TMIN(min, tsStreamCheckpointInterval);
46,482✔
364
  min = TMIN(min, tsStreamNodeCheckInterval);
46,482✔
365
  min = TMIN(min, tsArbHeartBeatIntervalSec);
46,482✔
366
  min = TMIN(min, tsArbCheckSyncIntervalSec);
46,482✔
367

368
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
46,482✔
369
  min = TMIN(min, telemInt);
46,482✔
370
  min = TMIN(min, tsGrantHBInterval);
46,482✔
371
  min = TMIN(min, tsUptimeInterval);
46,482✔
372

373
  return min <= 1 ? 2 : min;
46,482✔
374
}
375
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
44,894✔
376
  int32_t code = 0;
44,894✔
377
  if (sec % tsTtlPushIntervalSec == 0) {
44,894✔
378
    mndPullupTtl(pMnode);
3,997✔
379
  }
380

381
  if (sec % tsTrimVDbIntervalSec == 0) {
44,894!
382
    mndPullupTrimDb(pMnode);
×
383
  }
384

385
  if (tsS3MigrateEnabled && sec % tsS3MigrateIntervalSec == 0) {
44,894!
386
    mndPullupS3MigrateDb(pMnode);
×
387
  }
388

389
  if (sec % tsTransPullupInterval == 0) {
44,894✔
390
    mndPullupTrans(pMnode);
21,874✔
391
  }
392

393
  if (sec % tsCompactPullupInterval == 0) {
44,894✔
394
    mndPullupCompacts(pMnode);
3,691✔
395
  }
396

397
  if (sec % tsMqRebalanceInterval == 0) {
44,894✔
398
    mndCalMqRebalance(pMnode);
21,171✔
399
  }
400

401
  if (sec % 30 == 0) {  // send the checkpoint info every 30 sec
44,894✔
402
    mndStreamCheckpointTimer(pMnode);
962✔
403
  }
404

405
  if (sec % tsStreamNodeCheckInterval == 0) {
44,894✔
406
    mndStreamCheckNode(pMnode);
1,614✔
407
  }
408

409
  if (sec % 5 == 0) {
44,894✔
410
    mndStreamConsensusChkpt(pMnode);
8,276✔
411
  }
412

413
  if (sec % tsTelemInterval == (TMIN(60, (tsTelemInterval - 1)))) {
44,894✔
414
    mndPullupTelem(pMnode);
215✔
415
  }
416

417
  if (sec % tsGrantHBInterval == 0) {
44,894✔
418
    mndPullupGrant(pMnode);
4,996✔
419
  }
420

421
  if (sec % tsUptimeInterval == 0) {
44,894✔
422
    mndIncreaseUpTime(pMnode);
17✔
423
  }
424

425
  if (sec % (tsArbHeartBeatIntervalSec) == 0) {
44,894✔
426
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
8,276!
427
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
428
    }
429
  }
430

431
  if (sec % (tsArbCheckSyncIntervalSec) == 0) {
44,894✔
432
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
3,691!
433
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
434
    }
435
  }
436
}
44,894✔
437
void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) {
46,482✔
438
  if (sec % (tsStatusInterval * 5) == 0) {
46,482✔
439
    mndCheckDnodeOffline(pMnode);
8,512✔
440
  }
441
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
46,482✔
442
    mndSyncCheckTimeout(pMnode);
998✔
443
  }
444
}
46,482✔
445

446
static void *mndThreadFp(void *param) {
1,956✔
447
  SMnode *pMnode = param;
1,956✔
448
  int64_t lastTime = 0;
1,956✔
449
  setThreadName("mnode-timer");
1,956✔
450

451
  while (1) {
474,164✔
452
    lastTime++;
476,120✔
453
    taosMsleep(100);
476,120✔
454
    if (mndGetStop(pMnode)) break;
476,120✔
455
    if (lastTime % 10 != 0) continue;
474,164✔
456

457
    int64_t sec = lastTime / 10;
46,482✔
458
    mndDoTimerCheckTask(pMnode, sec);
46,482✔
459

460
    int64_t minCron = minCronTime();
46,482✔
461
    if (sec % minCron == 0 && mnodeIsNotLeader(pMnode)) {
46,482✔
462
      // not leader, do nothing
463
      mTrace("timer not process since mnode is not leader, reason: %s", tstrerror(terrno));
1,588!
464
      terrno = 0;
1,588✔
465
      continue;
1,588✔
466
    }
467
    mndDoTimerPullupTask(pMnode, sec);
44,894✔
468
  }
469

470
  return NULL;
1,956✔
471
}
472

473
static int32_t mndInitTimer(SMnode *pMnode) {
1,956✔
474
  int32_t      code = 0;
1,956✔
475
  TdThreadAttr thAttr;
476
  (void)taosThreadAttrInit(&thAttr);
1,956✔
477
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
1,956✔
478
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadFp, pMnode)) != 0) {
1,956!
479
    mError("failed to create timer thread since %s", tstrerror(code));
×
480
    TAOS_RETURN(code);
×
481
  }
482

483
  (void)taosThreadAttrDestroy(&thAttr);
1,956✔
484
  tmsgReportStartup("mnode-timer", "initialized");
1,956✔
485
  TAOS_RETURN(code);
1,956✔
486
}
487

488
static void mndCleanupTimer(SMnode *pMnode) {
1,956✔
489
  if (taosCheckPthreadValid(pMnode->thread)) {
1,956!
490
    (void)taosThreadJoin(pMnode->thread, NULL);
1,956✔
491
    taosThreadClear(&pMnode->thread);
1,956✔
492
  }
493
}
1,956✔
494

495
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
1,957✔
496
  int32_t code = 0;
1,957✔
497
  pMnode->path = taosStrdup(path);
1,957✔
498
  if (pMnode->path == NULL) {
1,957!
499
    code = terrno;
×
500
    TAOS_RETURN(code);
×
501
  }
502

503
  if (taosMkDir(pMnode->path) != 0) {
1,957!
504
    code = terrno;
×
505
    TAOS_RETURN(code);
×
506
  }
507

508
  TAOS_RETURN(code);
1,957✔
509
}
510

511
static int32_t mndInitWal(SMnode *pMnode) {
1,957✔
512
  int32_t code = 0;
1,957✔
513
  char    path[PATH_MAX + 20] = {0};
1,957✔
514
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
1,957✔
515
  SWalCfg cfg = {.vgId = 1,
1,957✔
516
                 .fsyncPeriod = 0,
517
                 .rollPeriod = -1,
518
                 .segSize = -1,
519
                 .committed = -1,
520
                 .retentionPeriod = 0,
521
                 .retentionSize = 0,
522
                 .level = TAOS_WAL_FSYNC,
523
                 .encryptAlgorithm = 0,
524
                 .encryptKey = {0}};
525

526
#if defined(TD_ENTERPRISE)
527
  if (tsiEncryptAlgorithm == DND_CA_SM4 && (tsiEncryptScope & DND_CS_MNODE_WAL) == DND_CS_MNODE_WAL) {
1,957!
528
    cfg.encryptAlgorithm = (tsiEncryptScope & DND_CS_MNODE_WAL) ? tsiEncryptAlgorithm : 0;
×
529
    if (tsEncryptKey[0] == '\0') {
×
530
      code = TSDB_CODE_DNODE_INVALID_ENCRYPTKEY;
×
531
      TAOS_RETURN(code);
×
532
    } else {
533
      (void)strncpy(cfg.encryptKey, tsEncryptKey, ENCRYPT_KEY_LEN);
×
534
    }
535
  }
536
#endif
537

538
  pMnode->pWal = walOpen(path, &cfg);
1,957✔
539
  if (pMnode->pWal == NULL) {
1,957!
540
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
541
    if (terrno != 0) code = terrno;
×
542
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
543
    TAOS_RETURN(code);
×
544
  }
545

546
  TAOS_RETURN(code);
1,957✔
547
}
548

549
static void mndCloseWal(SMnode *pMnode) {
1,956✔
550
  if (pMnode->pWal != NULL) {
1,956!
551
    walClose(pMnode->pWal);
1,956✔
552
    pMnode->pWal = NULL;
1,956✔
553
  }
554
}
1,956✔
555

556
static int32_t mndInitSdb(SMnode *pMnode) {
1,957✔
557
  int32_t code = 0;
1,957✔
558
  SSdbOpt opt = {0};
1,957✔
559
  opt.path = pMnode->path;
1,957✔
560
  opt.pMnode = pMnode;
1,957✔
561
  opt.pWal = pMnode->pWal;
1,957✔
562

563
  pMnode->pSdb = sdbInit(&opt);
1,957✔
564
  if (pMnode->pSdb == NULL) {
1,957!
565
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
566
    if (terrno != 0) code = terrno;
×
567
    TAOS_RETURN(code);
×
568
  }
569

570
  TAOS_RETURN(code);
1,957✔
571
}
572

573
static int32_t mndOpenSdb(SMnode *pMnode) {
1,957✔
574
  int32_t code = 0;
1,957✔
575
  if (!pMnode->deploy) {
1,957✔
576
    code = sdbReadFile(pMnode->pSdb);
491✔
577
  }
578

579
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
1,957!
580

581
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
1,957✔
582
  return code;
1,957✔
583
}
584

585
static void mndCleanupSdb(SMnode *pMnode) {
1,956✔
586
  if (pMnode->pSdb) {
1,956!
587
    sdbCleanup(pMnode->pSdb);
1,956✔
588
    pMnode->pSdb = NULL;
1,956✔
589
  }
590
}
1,956✔
591

592
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
68,495✔
593
  SMnodeStep step = {0};
68,495✔
594
  step.name = name;
68,495✔
595
  step.initFp = initFp;
68,495✔
596
  step.cleanupFp = cleanupFp;
68,495✔
597
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
136,990!
598
    TAOS_RETURN(terrno);
×
599
  }
600

601
  TAOS_RETURN(0);
68,495✔
602
}
603

604
static int32_t mndInitSteps(SMnode *pMnode) {
1,957✔
605
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
1,957!
606
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
1,957!
607
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
1,957!
608
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
1,957!
609
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
1,957!
610
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
1,957!
611
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
1,957!
612
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
1,957!
613
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
1,957!
614
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
1,957!
615
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
1,957!
616
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
1,957!
617
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
1,957!
618
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
1,957!
619
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
1,957!
620
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
1,957!
621
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
1,957!
622
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
1,957!
623
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
1,957!
624
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
1,957!
625
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
1,957!
626
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
1,957!
627
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
1,957!
628
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
1,957!
629
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
1,957!
630
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
1,957!
631
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
1,957!
632
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
1,957!
633
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
1,957!
634
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
1,957!
635
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
1,957!
636
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
1,957!
637
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
1,957!
638
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
1,957!
639
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
1,957!
640

641
  return 0;
1,957✔
642
}
643

644
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
1,956✔
645
  if (pMnode->pSteps == NULL) return;
1,956!
646

647
  if (pos == -1) {
1,956!
648
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
1,956✔
649
  }
650

651
  for (int32_t s = pos; s >= 0; s--) {
70,416✔
652
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
68,460✔
653
    mInfo("%s will cleanup", pStep->name);
68,460!
654
    if (pStep->cleanupFp != NULL) {
68,460✔
655
      (*pStep->cleanupFp)(pMnode);
66,504✔
656
    }
657
  }
658

659
  taosArrayClear(pMnode->pSteps);
1,956✔
660
  taosArrayDestroy(pMnode->pSteps);
1,956✔
661
  pMnode->pSteps = NULL;
1,956✔
662
}
663

664
static int32_t mndExecSteps(SMnode *pMnode) {
1,957✔
665
  int32_t code = 0;
1,957✔
666
  int32_t size = taosArrayGetSize(pMnode->pSteps);
1,957✔
667
  for (int32_t pos = 0; pos < size; pos++) {
70,452✔
668
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
68,495✔
669
    if (pStep->initFp == NULL) continue;
68,495!
670

671
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
68,495!
672
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
673
      mndCleanupSteps(pMnode, pos);
×
674
      TAOS_RETURN(code);
×
675
    } else {
676
      mInfo("%s is initialized", pStep->name);
68,495!
677
      tmsgReportStartup(pStep->name, "initialized");
68,495✔
678
    }
679
  }
680

681
  pMnode->clusterId = mndGetClusterId(pMnode);
1,957✔
682
  TAOS_RETURN(0);
1,957✔
683
}
684

685
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
1,957✔
686
  pMnode->msgCb = pOption->msgCb;
1,957✔
687
  pMnode->selfDnodeId = pOption->dnodeId;
1,957✔
688
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
1,957✔
689
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
1,957✔
690
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
1,957✔
691
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
1,957✔
692
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
1,957✔
693
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
1,957✔
694
}
1,957✔
695

696
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
1,957✔
697
  terrno = 0;
1,957✔
698
  mInfo("start to open mnode in %s", path);
1,957!
699

700
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
1,957✔
701
  if (pMnode == NULL) {
1,957!
702
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
703
    mError("failed to open mnode since %s", terrstr());
×
704
    return NULL;
×
705
  }
706
  (void)memset(pMnode, 0, sizeof(SMnode));
1,957✔
707

708
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
1,957✔
709
  if (code != 0) {
1,957!
710
    taosMemoryFree(pMnode);
×
711
    mError("failed to open mnode lock since %s", tstrerror(code));
×
712
    return NULL;
×
713
  }
714

715
  char timestr[24] = "1970-01-01 00:00:00.00";
1,957✔
716
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0);
1,957✔
717
  if (code < 0) {
1,957!
718
    mError("failed to parse time since %s", tstrerror(code));
×
719
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
720
    taosMemoryFree(pMnode);
×
721
    return NULL;
×
722
  }
723
  mndSetOptions(pMnode, pOption);
1,957✔
724

725
  pMnode->deploy = pOption->deploy;
1,957✔
726
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
1,957✔
727
  if (pMnode->pSteps == NULL) {
1,957!
728
    taosMemoryFree(pMnode);
×
729
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
730
    mError("failed to open mnode since %s", terrstr());
×
731
    return NULL;
×
732
  }
733

734
  code = mndCreateDir(pMnode, path);
1,957✔
735
  if (code != 0) {
1,957!
736
    code = terrno;
×
737
    mError("failed to open mnode since %s", tstrerror(code));
×
738
    mndClose(pMnode);
×
739
    terrno = code;
×
740
    return NULL;
×
741
  }
742

743
  code = mndInitSteps(pMnode);
1,957✔
744
  if (code != 0) {
1,957!
745
    code = terrno;
×
746
    mError("failed to open mnode since %s", tstrerror(code));
×
747
    mndClose(pMnode);
×
748
    terrno = code;
×
749
    return NULL;
×
750
  }
751

752
  code = mndExecSteps(pMnode);
1,957✔
753
  if (code != 0) {
1,957!
754
    code = terrno;
×
755
    mError("failed to open mnode since %s", tstrerror(code));
×
756
    mndClose(pMnode);
×
757
    terrno = code;
×
758
    return NULL;
×
759
  }
760

761
  mInfo("mnode open successfully");
1,957!
762
  return pMnode;
1,957✔
763
}
764

765
void mndPreClose(SMnode *pMnode) {
1,956✔
766
  if (pMnode != NULL) {
1,956!
767
    int32_t code = 0;
1,956✔
768
    // TODO check return value
769
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
1,956✔
770
    if (code < 0) {
1,956!
771
      mError("failed to transfer leader since %s", tstrerror(code));
×
772
    }
773
    syncPreStop(pMnode->syncMgmt.sync);
1,956✔
774
    code = sdbWriteFile(pMnode->pSdb, 0);
1,956✔
775
    if (code < 0) {
1,956!
776
      mError("failed to write sdb since %s", tstrerror(code));
×
777
    }
778
  }
779
}
1,956✔
780

781
void mndClose(SMnode *pMnode) {
1,956✔
782
  if (pMnode != NULL) {
1,956!
783
    mInfo("start to close mnode");
1,956!
784
    mndCleanupSteps(pMnode, -1);
1,956✔
785
    taosMemoryFreeClear(pMnode->path);
1,956!
786
    taosMemoryFreeClear(pMnode);
1,956!
787
    mInfo("mnode is closed");
1,956!
788
  }
789
}
1,956✔
790

791
int32_t mndStart(SMnode *pMnode) {
1,956✔
792
  mndSyncStart(pMnode);
1,956✔
793
  if (pMnode->deploy) {
1,956✔
794
    if (sdbDeploy(pMnode->pSdb) != 0) {
1,466!
795
      mError("failed to deploy sdb while start mnode");
×
796
      return -1;
×
797
    }
798
    mndSetRestored(pMnode, true);
1,466✔
799
  }
800

801
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
1,956✔
802

803
  return mndInitTimer(pMnode);
1,956✔
804
}
805

806
int32_t mndIsCatchUp(SMnode *pMnode) {
866✔
807
  int64_t rid = pMnode->syncMgmt.sync;
866✔
808
  return syncIsCatchUp(rid);
866✔
809
}
810

811
ESyncRole mndGetRole(SMnode *pMnode) {
866✔
812
  int64_t rid = pMnode->syncMgmt.sync;
866✔
813
  return syncGetRole(rid);
866✔
814
}
815

816
int64_t mndGetTerm(SMnode *pMnode) {
3,702✔
817
  int64_t rid = pMnode->syncMgmt.sync;
3,702✔
818
  return syncGetTerm(rid);
3,702✔
819
}
820

821
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
11,666✔
822

823
void mndStop(SMnode *pMnode) {
1,956✔
824
  mndSetStop(pMnode);
1,956✔
825
  mndSyncStop(pMnode);
1,956✔
826
  mndCleanupTimer(pMnode);
1,956✔
827
}
1,956✔
828

829
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
119,842✔
830
  SMnode    *pMnode = pMsg->info.node;
119,842✔
831
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
119,842✔
832

833
  const STraceId *trace = &pMsg->info.traceId;
119,842✔
834
  mGTrace("vgId:1, sync msg:%p will be processed, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
119,842!
835

836
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
119,842✔
837
  if (code != 0) {
119,842✔
838
    mGError("vgId:1, failed to process sync msg:%p type:%s, reason: %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
4!
839
            tstrerror(code), code);
840
  }
841

842
  return code;
119,842✔
843
}
844

845
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
1,521,228✔
846
  int32_t code = 0;
1,521,228✔
847
  if (!IsReq(pMsg)) TAOS_RETURN(code);
1,521,228✔
848
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
1,434,741✔
849
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
1,418,895!
850
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
1,408,303✔
851
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
1,376,648✔
852
    TAOS_RETURN(code);
58,098✔
853
  }
854

855
  SMnode *pMnode = pMsg->info.node;
1,376,643✔
856
  (void)taosThreadRwlockRdlock(&pMnode->lock);
1,376,643✔
857
  if (pMnode->stopped) {
1,376,766✔
858
    (void)taosThreadRwlockUnlock(&pMnode->lock);
52✔
859
    code = TSDB_CODE_APP_IS_STOPPING;
52✔
860
    TAOS_RETURN(code);
52✔
861
  }
862

863
  terrno = 0;
1,376,714✔
864
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
1,376,580✔
865
  if (terrno != 0) {
1,376,729!
866
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
867
    code = terrno;
×
868
    TAOS_RETURN(code);
×
869
  }
870

871
  if (state.state != TAOS_SYNC_STATE_LEADER) {
1,376,728✔
872
    (void)taosThreadRwlockUnlock(&pMnode->lock);
25,685✔
873
    code = TSDB_CODE_SYN_NOT_LEADER;
25,683✔
874
    goto _OVER;
25,683✔
875
  }
876

877
  if (!state.restored || !pMnode->restored) {
1,351,043✔
878
    (void)taosThreadRwlockUnlock(&pMnode->lock);
399✔
879
    code = TSDB_CODE_SYN_RESTORING;
398✔
880
    goto _OVER;
398✔
881
  }
882

883
#if 1
884
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
1,350,644✔
885
#else
886
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
887
  mTrace("mnode rpc is acquired, ref:%d", ref);
888
#endif
889

890
  (void)taosThreadRwlockUnlock(&pMnode->lock);
1,350,647✔
891
  TAOS_RETURN(code);
1,350,641✔
892

893
_OVER:
26,081✔
894
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
26,081!
895
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
26,083!
896
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
26,046!
897
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
26,046!
898
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
26,046✔
899
      pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
24,338!
900
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER) {
24,027✔
901
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
2,055!
902
           pMnode->stopped, state.restored, syncStr(state.state));
903
    TAOS_RETURN(code);
2,055✔
904
  }
905

906
  const STraceId *trace = &pMsg->info.traceId;
24,026✔
907
  SEpSet          epSet = {0};
24,026✔
908
  mndGetMnodeEpSet(pMnode, &epSet);
24,026✔
909

910
  mGDebug(
24,027!
911
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
912
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
913
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
914
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
915

916
  if (epSet.numOfEps <= 0) return -1;
24,027!
917

918
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
95,234✔
919
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
71,207!
920
  }
921

922
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
24,027✔
923
  pMsg->info.rsp = rpcMallocCont(contLen);
24,027✔
924
  if (pMsg->info.rsp != NULL) {
24,027!
925
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
24,027!
926
      mError("failed to serialize ep set");
×
927
    }
928
    pMsg->info.hasEpSet = 1;
24,027✔
929
    pMsg->info.rspLen = contLen;
24,027✔
930
  }
931

932
  TAOS_RETURN(code);
24,027✔
933
}
934

935
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
1,521,229✔
936
  SMnode         *pMnode = pMsg->info.node;
1,521,229✔
937
  const STraceId *trace = &pMsg->info.traceId;
1,521,229✔
938
  int32_t         code = TSDB_CODE_SUCCESS;
1,521,229✔
939

940
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
1,521,229✔
941
  MndMsgFpExt fpExt = NULL;
1,521,229✔
942
  if (fp == NULL) {
1,521,229✔
943
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
58,218✔
944
    if (fpExt == NULL) {
58,218!
945
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
946
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
947
      TAOS_RETURN(code);
×
948
    }
949
  }
950

951
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
1,521,229✔
952

953
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
1,495,335!
954
  if (fp)
1,495,335✔
955
    code = (*fp)(pMsg);
1,437,117✔
956
  else
957
    code = (*fpExt)(pMsg, pQueueInfo);
58,218✔
958
  mndReleaseRpc(pMnode);
1,495,346✔
959

960
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
1,495,341✔
961
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
80,940!
962
  } else if (code == 0) {
1,414,401✔
963
    mGTrace("msg:%p, successfully processed", pMsg);
1,406,105!
964
  } else {
965
    // TODO removve this wrong set code
966
    if (code == -1) {
8,296✔
967
      code = terrno;
5✔
968
    }
969
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
8,296!
970
            TMSG_INFO(pMsg->msgType));
971
  }
972

973
  TAOS_RETURN(code);
1,495,341✔
974
}
975

976
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
334,647✔
977
  tmsg_t type = TMSG_INDEX(msgType);
334,647✔
978
  if (type < TDMT_MAX) {
334,647!
979
    pMnode->msgFp[type] = fp;
334,647✔
980
  }
981
}
334,647✔
982

983
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
15,656✔
984
  tmsg_t type = TMSG_INDEX(msgType);
15,656✔
985
  if (type < TDMT_MAX) {
15,656!
986
    pMnode->msgFpExt[type] = fp;
15,656✔
987
  }
988
}
15,656✔
989

990
// Note: uid 0 is reserved
991
int64_t mndGenerateUid(const char *name, int32_t len) {
34,638✔
992
  int32_t hashval = MurmurHash3_32(name, len);
34,638✔
993
  do {
×
994
    int64_t us = taosGetTimestampUs();
34,638✔
995
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
34,638✔
996
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
34,638✔
997
    if (uuid) {
34,638!
998
      return llabs(uuid);
34,638✔
999
    }
1000
  } while (true);
1001
}
1002

1003
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
12✔
1004
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
1005
  int32_t code = 0;
12✔
1006
  TAOS_CHECK_RETURN(mndAcquireRpc(pMnode));
12!
1007

1008
  SSdb   *pSdb = pMnode->pSdb;
12✔
1009
  int64_t ms = taosGetTimestampMs();
12✔
1010

1011
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
12✔
1012
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
12✔
1013
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
12✔
1014
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
12✔
1015
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
12!
1016
      pStbInfo->stbs == NULL) {
12!
1017
    mndReleaseRpc(pMnode);
×
1018
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1019
    if (terrno != 0) code = terrno;
×
1020
    TAOS_RETURN(code);
×
1021
  }
1022

1023
  // cluster info
1024
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
12✔
1025
  pClusterInfo->monitor_interval = tsMonitorInterval;
12✔
1026
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
12✔
1027
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
12✔
1028
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
12✔
1029
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
12✔
1030
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
12✔
1031

1032
  void *pIter = NULL;
12✔
1033
  while (1) {
12✔
1034
    SDnodeObj *pObj = NULL;
24✔
1035
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
24✔
1036
    if (pIter == NULL) break;
24✔
1037

1038
    SMonDnodeDesc desc = {0};
12✔
1039
    desc.dnode_id = pObj->id;
12✔
1040
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
12✔
1041
    if (mndIsDnodeOnline(pObj, ms)) {
12✔
1042
      tstrncpy(desc.status, "ready", sizeof(desc.status));
11✔
1043
    } else {
1044
      tstrncpy(desc.status, "offline", sizeof(desc.status));
1✔
1045
    }
1046
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
24!
1047
      mError("failed put dnode into array, but continue at this monitor report")
×
1048
    }
1049
    sdbRelease(pSdb, pObj);
12✔
1050
  }
1051

1052
  pIter = NULL;
12✔
1053
  while (1) {
12✔
1054
    SMnodeObj *pObj = NULL;
24✔
1055
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
24✔
1056
    if (pIter == NULL) break;
24✔
1057

1058
    SMonMnodeDesc desc = {0};
12✔
1059
    desc.mnode_id = pObj->id;
12✔
1060
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
12✔
1061

1062
    if (pObj->id == pMnode->selfDnodeId) {
12!
1063
      pClusterInfo->first_ep_dnode_id = pObj->id;
12✔
1064
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
12✔
1065
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1066
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
12✔
1067
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1068
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
12✔
1069
      desc.syncState = TAOS_SYNC_STATE_LEADER;
12✔
1070
    } else {
1071
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1072
      desc.syncState = pObj->syncState;
×
1073
    }
1074
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
24!
1075
      mError("failed to put mnode into array, but continue at this monitor report");
×
1076
    }
1077
    sdbRelease(pSdb, pObj);
12✔
1078
  }
1079

1080
  // vgroup info
1081
  pIter = NULL;
12✔
1082
  while (1) {
26✔
1083
    SVgObj *pVgroup = NULL;
38✔
1084
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
38✔
1085
    if (pIter == NULL) break;
38✔
1086

1087
    pClusterInfo->vgroups_total++;
26✔
1088
    pClusterInfo->tbs_total += pVgroup->numOfTables;
26✔
1089

1090
    SMonVgroupDesc desc = {0};
26✔
1091
    desc.vgroup_id = pVgroup->vgId;
26✔
1092

1093
    SName name = {0};
26✔
1094
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
26✔
1095
    if (code < 0) {
26!
1096
      mError("failed to get db name since %s", tstrerror(code));
×
1097
      sdbRelease(pSdb, pVgroup);
×
1098
      TAOS_RETURN(code);
×
1099
    }
1100
    (void)tNameGetDbName(&name, desc.database_name);
26✔
1101

1102
    desc.tables_num = pVgroup->numOfTables;
26✔
1103
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
26✔
1104
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
26✔
1105
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
52✔
1106
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
26✔
1107
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
26✔
1108
      pVnDesc->dnode_id = pVgid->dnodeId;
26✔
1109
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
26✔
1110
      pVnDesc->syncState = pVgid->syncState;
26✔
1111
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
26!
1112
        tstrncpy(desc.status, "ready", sizeof(desc.status));
26✔
1113
        pClusterInfo->vgroups_alive++;
26✔
1114
      }
1115
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
26!
1116
        pClusterInfo->vnodes_alive++;
26✔
1117
      }
1118
      pClusterInfo->vnodes_total++;
26✔
1119
    }
1120

1121
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
52!
1122
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1123
    }
1124
    sdbRelease(pSdb, pVgroup);
26✔
1125
  }
1126

1127
  // stb info
1128
  pIter = NULL;
12✔
1129
  while (1) {
11✔
1130
    SStbObj *pStb = NULL;
23✔
1131
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
23✔
1132
    if (pIter == NULL) break;
23✔
1133

1134
    SMonStbDesc desc = {0};
11✔
1135

1136
    SName name1 = {0};
11✔
1137
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
11✔
1138
    if (code < 0) {
11!
1139
      mError("failed to get db name since %s", tstrerror(code));
×
1140
      sdbRelease(pSdb, pStb);
×
1141
      TAOS_RETURN(code);
×
1142
    }
1143
    (void)tNameGetDbName(&name1, desc.database_name);
11✔
1144

1145
    SName name2 = {0};
11✔
1146
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
11✔
1147
    if (code < 0) {
11!
1148
      mError("failed to get table name since %s", tstrerror(code));
×
1149
      sdbRelease(pSdb, pStb);
×
1150
      TAOS_RETURN(code);
×
1151
    }
1152
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
11✔
1153

1154
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
22!
1155
      mError("failed to put stb into array, but continue at this monitor report");
×
1156
    }
1157
    sdbRelease(pSdb, pStb);
11✔
1158
  }
1159

1160
  // grant info
1161
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
12✔
1162
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
12✔
1163
  if (pMnode->grant.expireTimeMS == 0) {
12!
1164
    pGrantInfo->expire_time = 0;
×
1165
    pGrantInfo->timeseries_total = 0;
×
1166
  }
1167

1168
  mndReleaseRpc(pMnode);
12✔
1169
  TAOS_RETURN(code);
12✔
1170
}
1171

1172
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
45,803✔
1173
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
45,803✔
1174
  pLoad->syncState = state.state;
45,803✔
1175
  pLoad->syncRestore = state.restored;
45,803✔
1176
  pLoad->syncTerm = state.term;
45,803✔
1177
  pLoad->roleTimeMs = state.roleTimeMs;
45,803✔
1178
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
45,803✔
1179
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1180
  return 0;
45,803✔
1181
}
1182

1183
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
3,691✔
1184
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
3,691✔
1185
  return state.roleTimeMs;
3,691✔
1186
}
1187

1188
void mndSetRestored(SMnode *pMnode, bool restored) {
1,948✔
1189
  if (restored) {
1,948!
1190
    (void)taosThreadRwlockWrlock(&pMnode->lock);
1,948✔
1191
    pMnode->restored = true;
1,948✔
1192
    (void)taosThreadRwlockUnlock(&pMnode->lock);
1,948✔
1193
    mInfo("mnode set restored:%d", restored);
1,948!
1194
  } else {
1195
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1196
    pMnode->restored = false;
×
1197
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1198
    mInfo("mnode set restored:%d", restored);
×
1199
    while (1) {
1200
      if (pMnode->rpcRef <= 0) break;
×
1201
      taosMsleep(3);
×
1202
    }
1203
  }
1204
}
1,948✔
1205

1206
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1207

1208
void mndSetStop(SMnode *pMnode) {
1,956✔
1209
  (void)taosThreadRwlockWrlock(&pMnode->lock);
1,956✔
1210
  pMnode->stopped = true;
1,956✔
1211
  (void)taosThreadRwlockUnlock(&pMnode->lock);
1,956✔
1212
  mInfo("mnode set stopped");
1,956!
1213
}
1,956✔
1214

1215
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
476,120✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc