• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #5022

14 Apr 2026 05:32AM UTC coverage: 72.278% (-0.01%) from 72.291%
#5022

push

travis-ci

web-flow
merge: from main to3.0 branch #35128

74 of 97 new or added lines in 12 files covered. (76.29%)

555 existing lines in 127 files now uncovered.

257556 of 356343 relevant lines covered (72.28%)

133355051.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.03
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndBnode.h"
21
#include "mndCluster.h"
22
#include "mndCompact.h"
23
#include "mndCompactDetail.h"
24
#include "mndConfig.h"
25
#include "mndConsumer.h"
26
#include "mndDb.h"
27
#include "mndDnode.h"
28
#include "mndEncryptAlgr.h"
29
#include "mndFunc.h"
30
#include "mndGrant.h"
31
#include "mndIndex.h"
32
#include "mndInfoSchema.h"
33
#include "mndInstance.h"
34
#include "mndMnode.h"
35
#include "mndMount.h"
36
#include "mndPerfSchema.h"
37
#include "mndPrivilege.h"
38
#include "mndProfile.h"
39
#include "mndQnode.h"
40
#include "mndQuery.h"
41
#include "mndRetention.h"
42
#include "mndRetentionDetail.h"
43
#include "mndRole.h"
44
#include "mndRsma.h"
45
#include "mndScan.h"
46
#include "mndScanDetail.h"
47
#include "mndShow.h"
48
#include "mndSma.h"
49
#include "mndSnode.h"
50
#include "mndSsMigrate.h"
51
#include "mndStb.h"
52
#include "mndStream.h"
53
#include "mndSubscribe.h"
54
#include "mndSync.h"
55
#include "mndTelem.h"
56
#include "mndTopic.h"
57
#include "mndTrans.h"
58
#include "mndUser.h"
59
#include "mndToken.h"
60
#include "mndVgroup.h"
61
#include "mndView.h"
62
#include "mndXnode.h"
63
#include "tencrypt.h"
64

65
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
5,926,503✔
66
  int32_t code = 0;
5,926,503✔
67
  (void)taosThreadRwlockRdlock(&pMnode->lock);
5,926,503✔
68
  if (pMnode->stopped) {
5,926,503✔
69
    code = TSDB_CODE_APP_IS_STOPPING;
×
70
  } else if (!mndIsLeader(pMnode)) {
5,926,503✔
71
    code = 1;
×
72
  } else {
73
#if 1
74
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
5,926,503✔
75
#else
76
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
77
    mTrace("mnode rpc is acquired, ref:%d", ref);
78
#endif
79
  }
80
  (void)taosThreadRwlockUnlock(&pMnode->lock);
5,926,503✔
81
  TAOS_RETURN(code);
5,926,503✔
82
}
83

84
static inline void mndReleaseRpc(SMnode *pMnode) {
310,620,646✔
85
  (void)taosThreadRwlockRdlock(&pMnode->lock);
310,620,646✔
86
#if 1
87
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
310,621,633✔
88
#else
89
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
90
  mTrace("mnode rpc is released, ref:%d", ref);
91
#endif
92
  (void)taosThreadRwlockUnlock(&pMnode->lock);
310,621,547✔
93
}
310,621,606✔
94

95
static void *mndBuildTimerMsg(int32_t *pContLen) {
77,763,858✔
96
  terrno = 0;
77,763,858✔
97
  SMTimerReq timerReq = {0};
77,763,858✔
98

99
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
77,763,858✔
100
  if (contLen <= 0) return NULL;
77,763,858✔
101
  void *pReq = rpcMallocCont(contLen);
77,763,858✔
102
  if (pReq == NULL) return NULL;
77,763,858✔
103

104
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
77,763,858✔
105
    mError("failed to serialize timer msg since %s", terrstr());
×
106
  }
107
  *pContLen = contLen;
77,763,858✔
108
  return pReq;
77,763,858✔
109
}
110

111
static void mndPullupTrans(SMnode *pMnode) {
15,678,870✔
112
  mTrace("pullup trans msg");
15,678,870✔
113
  int32_t contLen = 0;
15,678,870✔
114
  void   *pReq = mndBuildTimerMsg(&contLen);
15,678,870✔
115
  if (pReq != NULL) {
15,678,870✔
116
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
15,678,870✔
117
    // TODO check return value
118
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
15,678,870✔
119
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
120
    }
121
  }
122
}
15,678,870✔
123

124
static void mndPullupCompacts(SMnode *pMnode) {
3,130,618✔
125
  mTrace("pullup compact timer msg");
3,130,618✔
126
  int32_t contLen = 0;
3,130,618✔
127
  void   *pReq = mndBuildTimerMsg(&contLen);
3,130,618✔
128
  if (pReq != NULL) {
3,130,618✔
129
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
3,130,618✔
130
    // TODO check return value
131
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
3,130,618✔
132
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
133
    }
134
  }
135
}
3,130,618✔
136

137
static void mndPullupScans(SMnode *pMnode) {
3,130,701✔
138
  mTrace("pullup scan timer msg");
3,130,701✔
139
  int32_t contLen = 0;
3,130,701✔
140
  void   *pReq = mndBuildTimerMsg(&contLen);
3,130,701✔
141
  if (pReq != NULL) {
3,130,701✔
142
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_SCAN_TIMER, .pCont = pReq, .contLen = contLen};
3,130,701✔
143
    // TODO check return value
144
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
3,130,701✔
145
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
146
    }
147
  }
148
}
3,130,701✔
149

150
static void mndPullupInstances(SMnode *pMnode) {
6,259,119✔
151
  mTrace("pullup instance timer msg");
6,259,119✔
152
  int32_t contLen = 0;
6,259,119✔
153
  void   *pReq = mndBuildTimerMsg(&contLen);
6,259,119✔
154
  if (pReq != NULL) {
6,259,119✔
155
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_INSTANCE_TIMER, .pCont = pReq, .contLen = contLen};
6,259,119✔
156
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
6,259,119✔
157
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
158
    }
159
  }
160
}
6,259,119✔
161

162
static void mndPullupTtl(SMnode *pMnode) {
3,298,576✔
163
  mTrace("pullup ttl");
3,298,576✔
164
  int32_t contLen = 0;
3,298,576✔
165
  void   *pReq = mndBuildTimerMsg(&contLen);
3,298,576✔
166
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
3,298,576✔
167
  // TODO check return value
168
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
3,298,576✔
169
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
170
  }
171
}
3,298,576✔
172

173
static void mndPullupTrimDb(SMnode *pMnode) {
10,971✔
174
  mTrace("pullup trim");
10,971✔
175
  int32_t contLen = 0;
10,971✔
176
  void   *pReq = mndBuildTimerMsg(&contLen);
10,971✔
177
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
10,971✔
178
  // TODO check return value
179
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
10,971✔
180
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
181
  }
182
}
10,971✔
183

184
static void mndPullupQueryTrimDb(SMnode *pMnode) {
3,340,997✔
185
  mTrace("pullup trim query");
3,340,997✔
186
  int32_t contLen = 0;
3,340,997✔
187
  void   *pReq = mndBuildTimerMsg(&contLen);
3,340,997✔
188
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_QUERY_TRIM_TIMER, .pCont = pReq, .contLen = contLen};
3,340,997✔
189
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
3,340,997✔
190
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
191
  }
192
}
3,340,997✔
193

194
static void mndPullupSsMigrateDb(SMnode *pMnode) {
×
195
  if (grantCheck(TSDB_GRANT_SHARED_STORAGE) != TSDB_CODE_SUCCESS) {
×
196
    return;
×
197
  }
198

199
  mTrace("pullup ssmigrate db");
×
200
  int32_t contLen = 0;
×
201
  void   *pReq = mndBuildTimerMsg(&contLen);
×
202
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_SSMIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
203
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
204
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
205
  }
206
}
207

208
static void mndPullupUpdateSsMigrateProgress(SMnode *pMnode) {
171,496✔
209
  mTrace("pullup update ssmigrate progress");
171,496✔
210
  int32_t contLen = 0;
171,496✔
211
  void   *pReq = mndBuildTimerMsg(&contLen);
171,496✔
212
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPDATE_SSMIGRATE_PROGRESS_TIMER, .pCont = pReq, .contLen = contLen};
171,496✔
213
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
171,496✔
214
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
215
  }
216
}
171,496✔
217

218
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
15,148,941✔
219
  mTrace("pullup arb hb");
15,148,941✔
220
  int32_t contLen = 0;
15,148,941✔
221
  void   *pReq = mndBuildTimerMsg(&contLen);
15,148,941✔
222
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
15,148,941✔
223
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
15,148,941✔
224
}
225

226
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
10,032,856✔
227
  mTrace("pullup arb sync");
10,032,856✔
228
  int32_t contLen = 0;
10,032,856✔
229
  void   *pReq = mndBuildTimerMsg(&contLen);
10,032,856✔
230
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
10,032,856✔
231
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
10,032,856✔
232
}
233

234
static void mndCalMqRebalance(SMnode *pMnode) {
15,678,613✔
235
  int32_t contLen = 0;
15,678,613✔
236
  void   *pReq = mndBuildTimerMsg(&contLen);
15,678,613✔
237
  if (pReq != NULL) {
15,678,613✔
238
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
15,678,613✔
239
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
15,678,613✔
240
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
241
    }
242
  }
243
}
15,678,613✔
244

245
static void mndPullupTelem(SMnode *pMnode) {
288✔
246
  mTrace("pullup telem msg");
288✔
247
  int32_t contLen = 0;
288✔
248
  void   *pReq = mndBuildTimerMsg(&contLen);
288✔
249
  if (pReq != NULL) {
288✔
250
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
288✔
251
    // TODO check return value
252
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
288✔
253
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
254
    }
255
  }
256
}
288✔
257

258
static void mndPullupGrant(SMnode *pMnode) {
1,768,521✔
259
  mTrace("pullup grant msg");
1,768,521✔
260
  int32_t contLen = 0;
1,768,521✔
261
  void   *pReq = mndBuildTimerMsg(&contLen);
1,768,521✔
262
  if (pReq != NULL) {
1,768,521✔
263
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
1,768,521✔
264
                      .pCont = pReq,
265
                      .contLen = contLen,
266
                      .info.notFreeAhandle = 1,
267
                      .info.ahandle = 0};
268
    // TODO check return value
269
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1,768,521✔
270
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
271
    }
272
  }
273
}
1,768,521✔
274

275
static void mndPullupAuth(SMnode *pMnode) {
×
276
  mTrace("pullup auth msg");
×
277
  int32_t contLen = 0;
×
278
  void   *pReq = mndBuildTimerMsg(&contLen);
×
279
  if (pReq != NULL) {
×
280
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_AUTH_HB_TIMER, .pCont = pReq, .contLen = contLen, .info.notFreeAhandle = 1, .info.ahandle = 0};
×
281
    // TODO check return value
282
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
283
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
284
    }
285
  }
286
}
×
287

288
static void mndIncreaseUpTime(SMnode *pMnode) {
113,291✔
289
  mTrace("increate uptime");
113,291✔
290
  int32_t contLen = 0;
113,291✔
291
  void   *pReq = mndBuildTimerMsg(&contLen);
113,291✔
292
  if (pReq != NULL) {
113,291✔
293
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
113,291✔
294
                      .pCont = pReq,
295
                      .contLen = contLen,
296
                      .info.notFreeAhandle = 1,
297
                      .info.ahandle = 0};
298
    // TODO check return value
299
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
113,291✔
300
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
301
    }
302
  }
303
}
113,291✔
304

305
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
130,789✔
306
  SSdb *pSdb = pMnode->pSdb;
130,789✔
307

308
  void *pIter = NULL;
130,789✔
309
  while (1) {
412,752✔
310
    SVgObj *pVgroup = NULL;
543,541✔
311
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
543,541✔
312
    if (pIter == NULL) break;
543,541✔
313

314
    bool stateChanged = false;
412,752✔
315
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
1,052,613✔
316
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
807,905✔
317
      if (pGid->dnodeId == dnodeId) {
807,905✔
318
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
168,044✔
319
          mInfo(
69,470✔
320
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:offline "
321
              "restored:0 "
322
              "canRead:0",
323
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
324
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
69,470✔
325
          pGid->syncRestore = 0;
69,470✔
326
          pGid->syncCanRead = 0;
69,470✔
327
          pGid->startTimeMs = 0;
69,470✔
328
          pGid->learnerProgress = 0;
69,470✔
329
          pGid->snapSeq = -1;
69,470✔
330
          stateChanged = true;
69,470✔
331
        }
332
        break;
168,044✔
333
      }
334
    }
335

336
    if (stateChanged) {
412,752✔
337
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
69,470✔
338
      if (pDb != NULL && pDb->stateTs != curMs) {
69,470✔
339
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
37,230✔
340
              curMs);
341
        pDb->stateTs = curMs;
37,230✔
342
      }
343
      mndReleaseDb(pMnode, pDb);
69,470✔
344
    }
345

346
    sdbRelease(pSdb, pVgroup);
412,752✔
347
  }
348
}
130,789✔
349

350
static void mndCheckDnodeOffline(SMnode *pMnode) {
5,926,430✔
351
  mTrace("check dnode offline");
5,926,430✔
352
  if (mndAcquireRpc(pMnode) != 0) return;
5,926,430✔
353

354
  SSdb   *pSdb = pMnode->pSdb;
5,926,430✔
355
  int64_t curMs = taosGetTimestampMs();
5,926,430✔
356

357
  void *pIter = NULL;
5,926,430✔
358
  while (1) {
9,954,828✔
359
    SDnodeObj *pDnode = NULL;
15,881,258✔
360
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
15,881,258✔
361
    if (pIter == NULL) break;
15,881,258✔
362

363
    bool online = mndIsDnodeOnline(pDnode, curMs);
9,954,828✔
364
    if (!online) {
9,954,828✔
365
      mInfo("dnode:%d, in offline state", pDnode->id);
130,789✔
366
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
130,789✔
367
    }
368

369
    sdbRelease(pSdb, pDnode);
9,954,828✔
370
  }
371

372
  mndReleaseRpc(pMnode);
5,926,430✔
373
}
374

375
static bool mnodeIsNotLeader(SMnode *pMnode) {
351,197,949✔
376
  terrno = 0;
351,197,949✔
377
  (void)taosThreadRwlockRdlock(&pMnode->lock);
351,197,949✔
378
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
351,197,949✔
379
  if (terrno != 0) {
351,197,404✔
380
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
381
    return true;
×
382
  }
383

384
  if (state.state != TAOS_SYNC_STATE_LEADER) {
351,197,404✔
385
    (void)taosThreadRwlockUnlock(&pMnode->lock);
12,750,988✔
386
    terrno = TSDB_CODE_SYN_NOT_LEADER;
12,750,988✔
387
    return true;
12,750,988✔
388
  }
389
  if (!state.restored || !pMnode->restored) {
338,446,416✔
390
    (void)taosThreadRwlockUnlock(&pMnode->lock);
5,295✔
391
    terrno = TSDB_CODE_SYN_RESTORING;
5,045✔
392
    return true;
5,045✔
393
  }
394
  (void)taosThreadRwlockUnlock(&pMnode->lock);
338,441,121✔
395
  return false;
338,441,608✔
396
}
397

398
static int32_t minCronTime() {
×
399
  int32_t min = INT32_MAX;
×
400
  min = TMIN(min, tsTtlPushIntervalSec);
×
401
  min = TMIN(min, tsTrimVDbIntervalSec);
×
402
  min = TMIN(min, tsSsAutoMigrateIntervalSec);
×
403
  min = TMIN(min, tsTransPullupInterval);
×
404
  min = TMIN(min, tsCompactPullupInterval);
×
405
  min = TMIN(min, tsMqRebalanceInterval);
×
406

407
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
×
408
  min = TMIN(min, telemInt);
×
409
  min = TMIN(min, tsGrantHBInterval);
×
410
  min = TMIN(min, tsUptimeInterval);
×
411

412
  return min <= 1 ? 2 : min;
×
413
}
414
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
31,357,294✔
415
  int32_t code = 0;
31,357,294✔
416
#ifndef TD_ASTRA
417
  if (sec % tsGrantHBInterval == 0) {  // put in the 1st place as to take effect ASAP
31,357,294✔
418
    mndPullupGrant(pMnode);
1,768,521✔
419
  }
420
  if (sec % tsTtlPushIntervalSec == 0) {
31,357,294✔
421
    mndPullupTtl(pMnode);
3,298,576✔
422
  }
423

424
  if (sec % tsTrimVDbIntervalSec == 0) {
31,357,294✔
425
    mndPullupTrimDb(pMnode);
10,971✔
426
  }
427

428
  if (sec % tsQueryTrimIntervalSec == 0) {
31,357,294✔
429
    mndPullupQueryTrimDb(pMnode);
3,340,997✔
430
  }
431
#endif
432
#ifdef USE_SHARED_STORAGE
433
  if (tsSsEnabled) {
31,357,294✔
434
    if (sec % tsQuerySsMigrateIntervalSec == 0) {
236,680✔
435
      mndPullupUpdateSsMigrateProgress(pMnode);
171,496✔
436
    }
437
    if (tsSsEnabled == 2) {
236,680✔
438
      // By default, both tsTrimVDbIntervalSec and tsSsAutoMigrateIntervalSec are 3600 seconds,
439
      // so, delay half interval to do ss migrate to avoid conflict.
440
      //
441
      // NOTE: this solution is not perfect, there could still be conflict if user changes the
442
      // default value, but it is good enough as user is unlikely to change the default value.
443
      // The best solution is adding a new offset config to all cron tasks, but that would add
444
      // extra complexity.
NEW
445
      if ((sec % tsSsAutoMigrateIntervalSec) == (tsSsAutoMigrateIntervalSec / 2)) {
×
NEW
446
        mndPullupSsMigrateDb(pMnode);
×
447
      }
448
    }
449
  }
450
#endif
451
#ifdef TD_ENTERPRISE
452
  if (tsAuthReq) {
31,357,294✔
453
    if (sec % tsAuthReqHBInterval == 0) {
×
454
      mndPullupAuth(pMnode);
×
455
    }
456
  }
457
#endif
458
  if (sec % tsTransPullupInterval == 0) {
31,357,294✔
459
    mndPullupTrans(pMnode);
15,678,870✔
460
  }
461

462
  if (sec % tsCompactPullupInterval == 0) {
31,357,294✔
463
    mndPullupCompacts(pMnode);
3,130,618✔
464
  }
465

466
  if (sec % tsScanPullupInterval == 0) {
31,357,294✔
467
    mndPullupScans(pMnode);
3,130,701✔
468
  }
469
  if (tsInstancePullupInterval > 0 && sec % tsInstancePullupInterval == 0) {  // check instance expired
31,357,294✔
470
    mndPullupInstances(pMnode);
6,259,119✔
471
  }
472
#ifdef USE_TOPIC
473
  if (sec % tsMqRebalanceInterval == 0) {
31,357,294✔
474
    mndCalMqRebalance(pMnode);
15,678,613✔
475
  }
476
#endif
477
  if (tsTelemInterval > 0 && sec % tsTelemInterval == 0) {
31,357,294✔
478
    mndPullupTelem(pMnode);
288✔
479
  }
480
  if (sec % tsUptimeInterval == 0) {
31,357,294✔
481
    mndIncreaseUpTime(pMnode);
113,291✔
482
  }
483
}
31,357,294✔
484

485
void mndDoArbTimerPullupTask(SMnode *pMnode, int64_t ms) {
307,084,622✔
486
  int32_t code = 0;
307,084,622✔
487
#ifndef TD_ASTRA
488
  if (ms % (tsArbHeartBeatIntervalMs) == 0) {
307,084,622✔
489
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
15,148,941✔
490
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
491
    }
492
  }
493

494
  if (ms % (tsArbCheckSyncIntervalMs) == 0) {
307,084,622✔
495
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
10,032,856✔
496
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
497
    }
498
  }
499
#endif
500
}
307,084,622✔
501

502
void mndDoTimerCheckStatus(SMnode *pMnode, int64_t ms) {
307,084,622✔
503
  if (ms % (tsStatusTimeoutMs) == 0) {
307,084,622✔
504
    mndCheckDnodeOffline(pMnode);
5,926,430✔
505
  }
506
}
307,084,622✔
507

508
void mndDoTimerCheckSync(SMnode *pMnode, int64_t sec) {
31,357,294✔
509
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
31,357,294✔
510
    mndSyncCheckTimeout(pMnode);
1,061,511✔
511
  }
512
  if (!tsDisableStream && (sec % MND_STREAM_HEALTH_CHECK_PERIOD_SEC == 0)) {
31,357,294✔
513
    msmHealthCheck(pMnode);
10,451,575✔
514
  }
515
}
31,357,294✔
516

517
static void *mndThreadSecFp(void *param) {
482,097✔
518
  SMnode *pMnode = param;
482,097✔
519
  int64_t lastSec = 0;
482,097✔
520
  setThreadName("mnode-timer");
482,097✔
521

522
  while (1) {
319,142,216✔
523
    if (mndGetStop(pMnode)) break;
319,624,313✔
524

525
    int64_t nowSec = taosGetTimestampMs() / 1000;
319,142,216✔
526
    if (nowSec == lastSec) {
319,142,216✔
527
      taosMsleep(100);
286,573,907✔
528
      continue;
286,573,907✔
529
    }
530
    lastSec = nowSec;
32,568,309✔
531

532
    if (mnodeIsNotLeader(pMnode)) {
32,568,309✔
533
      taosMsleep(100);
1,211,015✔
534
      mTrace("timer not process since mnode is not leader");
1,211,015✔
535
      continue;
1,211,015✔
536
    }
537

538
    mndDoTimerCheckSync(pMnode, nowSec);
31,357,294✔
539

540
    mndDoTimerPullupTask(pMnode, nowSec);
31,357,294✔
541

542
    taosMsleep(100);
31,357,294✔
543
  }
544

545
  return NULL;
482,097✔
546
}
547

548
static void *mndThreadMsFp(void *param) {
482,097✔
549
  SMnode *pMnode = param;
482,097✔
550
  int64_t lastTime = 0;
482,097✔
551
  setThreadName("mnode-arb-timer");
482,097✔
552

553
  while (1) {
554
    lastTime += 100;
319,111,737✔
555
    taosMsleep(100);
319,111,737✔
556

557
    if (mndGetStop(pMnode)) break;
319,111,737✔
558
    if (lastTime % 10 != 0) continue;
318,629,640✔
559

560
    if (mnodeIsNotLeader(pMnode)) {
318,629,640✔
561
      mTrace("timer not process since mnode is not leader");
11,545,018✔
562
      continue;
11,545,018✔
563
    }
564

565
    mndDoTimerCheckStatus(pMnode, lastTime);
307,084,622✔
566

567
    mndDoArbTimerPullupTask(pMnode, lastTime);
307,084,622✔
568
  }
569

570
  return NULL;
482,097✔
571
}
572

573
static int32_t mndInitTimer(SMnode *pMnode) {
482,097✔
574
  int32_t      code = 0;
482,097✔
575
  TdThreadAttr thAttr;
480,354✔
576
  (void)taosThreadAttrInit(&thAttr);
482,097✔
577
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
482,097✔
578
#ifdef TD_COMPACT_OS
579
  (void)taosThreadAttrSetStackSize(&thAttr, STACK_SIZE_SMALL);
580
#endif
581
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadSecFp, pMnode)) != 0) {
482,097✔
582
    mError("failed to create timer thread since %s", tstrerror(code));
×
583
    TAOS_RETURN(code);
×
584
  }
585

586
  (void)taosThreadAttrDestroy(&thAttr);
482,097✔
587
  tmsgReportStartup("mnode-timer", "initialized");
482,097✔
588

589
  TdThreadAttr arbAttr;
480,354✔
590
  (void)taosThreadAttrInit(&arbAttr);
482,097✔
591
  (void)taosThreadAttrSetDetachState(&arbAttr, PTHREAD_CREATE_JOINABLE);
482,097✔
592
#ifdef TD_COMPACT_OS
593
  (void)taosThreadAttrSetStackSize(&arbAttr, STACK_SIZE_SMALL);
594
#endif
595
  if ((code = taosThreadCreate(&pMnode->arbThread, &arbAttr, mndThreadMsFp, pMnode)) != 0) {
482,097✔
596
    mError("failed to create arb timer thread since %s", tstrerror(code));
×
597
    TAOS_RETURN(code);
×
598
  }
599

600
  (void)taosThreadAttrDestroy(&arbAttr);
482,097✔
601
  tmsgReportStartup("mnode-timer", "initialized");
482,097✔
602
  TAOS_RETURN(code);
482,097✔
603
}
604

605
static void mndCleanupTimer(SMnode *pMnode) {
482,097✔
606
  if (taosCheckPthreadValid(pMnode->thread)) {
482,097✔
607
    (void)taosThreadJoin(pMnode->thread, NULL);
482,097✔
608
    taosThreadClear(&pMnode->thread);
482,097✔
609
  }
610
  if (taosCheckPthreadValid(pMnode->arbThread)) {
482,097✔
611
    (void)taosThreadJoin(pMnode->arbThread, NULL);
482,097✔
612
    taosThreadClear(&pMnode->arbThread);
482,097✔
613
  }
614
}
482,097✔
615

616
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
482,255✔
617
  int32_t code = 0;
482,255✔
618
  pMnode->path = taosStrdup(path);
482,255✔
619
  if (pMnode->path == NULL) {
482,255✔
620
    code = terrno;
×
621
    TAOS_RETURN(code);
×
622
  }
623

624
  if (taosMkDir(pMnode->path) != 0) {
482,255✔
625
    code = terrno;
×
626
    TAOS_RETURN(code);
×
627
  }
628

629
  TAOS_RETURN(code);
482,255✔
630
}
631

632
static int32_t mndInitWal(SMnode *pMnode) {
482,255✔
633
  int32_t code = 0;
482,255✔
634
  char    path[PATH_MAX + 20] = {0};
482,255✔
635
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
482,255✔
636
  SWalCfg cfg = {.vgId = 1,
482,255✔
637
                 .fsyncPeriod = 0,
638
                 .rollPeriod = -1,
639
                 .segSize = -1,
640
                 .committed = -1,
641
                 .retentionPeriod = 0,
642
                 .retentionSize = 0,
643
                 .level = TAOS_WAL_FSYNC,
644
                 .encryptAlgr = 0,
645
                 .encryptData = {0}};
646

647
#if defined(TD_ENTERPRISE) || defined(TD_ASTRA_TODO)
648
  if (taosWaitCfgKeyLoaded() != 0) {
482,255✔
649
    code = terrno;
×
650
    TAOS_RETURN(code);
×
651
  }
652
  if (tsMetaKey[0] != '\0') {
482,255✔
653
    tstrncpy(cfg.encryptData.encryptKey, tsMetaKey, ENCRYPT_KEY_LEN + 1);
4,323✔
654
  }
655
#endif
656

657
  pMnode->pWal = walOpen(path, &cfg);
482,255✔
658
  if (pMnode->pWal == NULL) {
482,255✔
659
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
660
    if (terrno != 0) code = terrno;
×
661
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
662
    TAOS_RETURN(code);
×
663
  }
664

665
  TAOS_RETURN(code);
482,255✔
666
}
667

668
static void mndCloseWal(SMnode *pMnode) {
482,186✔
669
  if (pMnode->pWal != NULL) {
482,186✔
670
    walClose(pMnode->pWal);
482,186✔
671
    pMnode->pWal = NULL;
482,186✔
672
  }
673
}
482,186✔
674

675
// Forward declarations for mmFile.c functions
676
extern int32_t mmReadFile(const char *path, SMnodeOpt *pOption);
677
extern int32_t mmWriteFile(const char *path, const SMnodeOpt *pOption);
678

679
// Callback function to persist encrypted flag to mnode.json
680
static int32_t mndPersistEncryptedFlag(void *param) {
7,161✔
681
  SMnode *pMnode = (SMnode *)param;
7,161✔
682
  if (pMnode == NULL) {
7,161✔
683
    return TSDB_CODE_INVALID_PARA;
×
684
  }
685
  
686
  mInfo("persisting encrypted flag to mnode.json");
7,161✔
687
  
688
  SMnodeOpt option = {0};
7,161✔
689
  int32_t code = mmReadFile(pMnode->path, &option);
7,161✔
690
  if (code != 0) {
7,161✔
691
    mError("failed to read mnode.json for persisting encrypted flag since %s", tstrerror(code));
×
692
    return code;
×
693
  }
694
  
695
  option.encrypted = true;
7,161✔
696
  code = mmWriteFile(pMnode->path, &option);
7,161✔
697
  if (code != 0) {
7,161✔
698
    mError("failed to write mnode.json for persisting encrypted flag since %s", tstrerror(code));
×
699
    return code;
×
700
  }
701
  
702
  // Also update mnode's encrypted flag
703
  pMnode->encrypted = true;
7,161✔
704
  
705
  mInfo("successfully persisted encrypted flag to mnode.json");
7,161✔
706
  return 0;
7,161✔
707
}
708

709
static int32_t mndInitSdb(SMnode *pMnode) {
482,255✔
710
  int32_t code = 0;
482,255✔
711
  SSdbOpt opt = {0};
482,255✔
712
  opt.path = pMnode->path;
482,255✔
713
  opt.pMnode = pMnode;
482,255✔
714
  opt.pWal = pMnode->pWal;
482,255✔
715

716
  pMnode->pSdb = sdbInit(&opt);
482,255✔
717
  if (pMnode->pSdb == NULL) {
482,255✔
718
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
719
    if (terrno != 0) code = terrno;
×
720
    TAOS_RETURN(code);
×
721
  }
722

723
  TAOS_RETURN(code);
482,255✔
724
}
725

726
static int32_t mndOpenSdb(SMnode *pMnode) {
482,255✔
727
  int32_t code = 0;
482,255✔
728
  
729
  pMnode->pSdb->encrypted = pMnode->encrypted;
482,255✔
730
  
731
  // Set callback for persisting encrypted flag
732
  pMnode->pSdb->persistEncryptedFlagFp = mndPersistEncryptedFlag;
482,255✔
733
  pMnode->pSdb->pMnodeForCallback = pMnode;
482,255✔
734

735
  if (!pMnode->deploy) {
482,255✔
736
    code = sdbReadFile(pMnode->pSdb);
136,428✔
737
  }
738

739
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
482,255✔
740

741
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
482,255✔
742
  return code;
482,255✔
743
}
744

745
static void mndCleanupSdb(SMnode *pMnode) {
482,186✔
746
  if (pMnode->pSdb) {
482,186✔
747
    sdbCleanup(pMnode->pSdb);
482,186✔
748
    pMnode->pSdb = NULL;
482,186✔
749
  }
750
}
482,186✔
751

752
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
24,112,750✔
753
  SMnodeStep step = {0};
24,112,750✔
754
  step.name = name;
24,112,750✔
755
  step.initFp = initFp;
24,112,750✔
756
  step.cleanupFp = cleanupFp;
24,112,750✔
757
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
48,225,500✔
758
    TAOS_RETURN(terrno);
×
759
  }
760

761
  TAOS_RETURN(0);
24,112,750✔
762
}
763

764
static int32_t mndInitSteps(SMnode *pMnode) {
482,255✔
765
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
482,255✔
766
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
482,255✔
767
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
482,255✔
768
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
482,255✔
769
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-encrypt-algorithms", mndInitEncryptAlgr, mndCleanupEncryptAlgr));
482,255✔
770
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
482,255✔
771
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
482,255✔
772
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
482,255✔
773
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
482,255✔
774
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-bnode", mndInitBnode, mndCleanupBnode));
482,255✔
775
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-xnode", mndInitXnode, mndCleanupXnode));
482,255✔
776
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
482,255✔
777
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
482,255✔
778
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
482,255✔
779
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-role", mndInitRole, mndCleanupRole));
482,255✔
780
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
482,255✔
781
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-token", mndInitToken, mndCleanupToken));
482,255✔
782
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
482,255✔
783
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
482,255✔
784
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
482,255✔
785
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
482,255✔
786
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-instance", mndInitInstance, mndCleanupInstance));
482,255✔
787
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
482,255✔
788
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
482,255✔
789
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
482,255✔
790
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
482,255✔
791
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
482,255✔
792
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
482,255✔
793
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
482,255✔
794
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
482,255✔
795
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
482,255✔
796
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
482,255✔
797
#ifdef USE_MOUNT
798
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount", mndInitMount, mndCleanupMount));
482,255✔
799
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mount-log", mndInitMountLog, mndCleanupMountLog));
482,255✔
800
#endif
801
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-rsma", mndInitRsma, mndCleanupRsma));
482,255✔
802
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
482,255✔
803
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
482,255✔
804
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
482,255✔
805
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-scan", mndInitScan, mndCleanupScan));
482,255✔
806
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-retention", mndInitRetention, mndCleanupRetention));
482,255✔
807
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
482,255✔
808
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-scan-detail", mndInitScanDetail, mndCleanupScanDetail));
482,255✔
809
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-retention-detail", mndInitRetentionDetail, mndCleanupRetentionDetail));
482,255✔
810
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-ssmigrate", mndInitSsMigrate, mndCleanupSsMigrate));
482,255✔
811
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
482,255✔
812
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
482,255✔
813
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
482,255✔
814
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
482,255✔
815
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
482,255✔
816
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
482,255✔
817
  return 0;
482,255✔
818
}
819

820
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
482,186✔
821
  if (pMnode->pSteps == NULL) return;
482,186✔
822

823
  if (pos == -1) {
482,186✔
824
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
482,186✔
825
  }
826

827
  for (int32_t s = pos; s >= 0; s--) {
24,591,486✔
828
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
24,109,300✔
829
    mInfo("%s will cleanup", pStep->name);
24,109,300✔
830
    if (pStep->cleanupFp != NULL) {
24,109,300✔
831
      (*pStep->cleanupFp)(pMnode);
23,144,928✔
832
    }
833
  }
834

835
  taosArrayClear(pMnode->pSteps);
482,186✔
836
  taosArrayDestroy(pMnode->pSteps);
482,186✔
837
  pMnode->pSteps = NULL;
482,186✔
838
}
839

840
static int32_t mndExecSteps(SMnode *pMnode) {
482,255✔
841
  int32_t code = 0;
482,255✔
842
  int32_t size = taosArrayGetSize(pMnode->pSteps);
482,255✔
843
  for (int32_t pos = 0; pos < size; pos++) {
24,595,005✔
844
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
24,112,750✔
845
    if (pStep->initFp == NULL) continue;
24,112,750✔
846

847
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
24,112,750✔
848
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
849
      mndCleanupSteps(pMnode, pos);
×
850
      TAOS_RETURN(code);
×
851
    } else {
852
      mInfo("%s is initialized", pStep->name);
24,112,750✔
853
      tmsgReportStartup(pStep->name, "initialized");
24,112,750✔
854
    }
855
  }
856

857
  pMnode->clusterId = mndGetClusterId(pMnode);
482,255✔
858
  TAOS_RETURN(0);
482,255✔
859
}
860

861
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
482,255✔
862
  pMnode->msgCb = pOption->msgCb;
482,255✔
863
  pMnode->selfDnodeId = pOption->dnodeId;
482,255✔
864
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
482,255✔
865
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
482,255✔
866
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
482,255✔
867
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
482,255✔
868
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
482,255✔
869
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
482,255✔
870
  pMnode->encrypted = pOption->encrypted;
482,255✔
871
}
482,255✔
872

873
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
482,255✔
874
  terrno = 0;
482,255✔
875
  mInfo("start to open mnode in %s", path);
482,255✔
876

877
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
482,255✔
878
  if (pMnode == NULL) {
482,255✔
879
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
880
    mError("failed to open mnode in step 1, since %s", terrstr());
×
881
    return NULL;
×
882
  }
883
  (void)memset(pMnode, 0, sizeof(SMnode));
482,255✔
884

885
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
482,255✔
886
  if (code != 0) {
482,255✔
887
    taosMemoryFree(pMnode);
×
888
    mError("failed to open mnode in step 2, add lock, since %s", tstrerror(code));
×
889
    terrno = code;
×
890
    return NULL;
×
891
  }
892

893
  char timestr[24] = "1970-01-01 00:00:00.00";
482,255✔
894
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, NULL);
482,255✔
895
  if (code < 0) {
482,255✔
896
    mError("failed to open mnode in step 3, parse time, since %s", tstrerror(code));
×
897
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
898
    taosMemoryFree(pMnode);
×
899
    terrno = code;
×
900
    return NULL;
×
901
  }
902

903
  mInfo("vgId:1, mnode set options to syncMgmt, dnodeId:%d, numOfTotalReplicas:%d", pOption->selfIndex,
482,255✔
904
        pOption->numOfTotalReplicas);
905
  mndSetOptions(pMnode, pOption);
482,255✔
906

907
  pMnode->deploy = pOption->deploy;
482,255✔
908
  pMnode->version = pOption->version;
482,255✔
909
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
482,255✔
910
  if (pMnode->pSteps == NULL) {
482,255✔
911
    taosMemoryFree(pMnode);
×
912
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
913
    mError("failed to open mnode in step 4, since %s", terrstr());
×
914
    return NULL;
×
915
  }
916

917
  code = mndCreateDir(pMnode, path);
482,255✔
918
  if (code != 0) {
482,255✔
919
    mError("failed to open mnode in step 5, since %s", tstrerror(code));
×
920
    mndClose(pMnode);
×
921
    terrno = code;
×
922
    return NULL;
×
923
  }
924

925
  code = mndInitSteps(pMnode);
482,255✔
926
  if (code != 0) {
482,255✔
927
    mError("failed to open mnode in step 6, since %s", tstrerror(code));
×
928
    mndClose(pMnode);
×
929
    terrno = code;
×
930
    return NULL;
×
931
  }
932

933
  code = mndExecSteps(pMnode);
482,255✔
934
  if (code != 0) {
482,255✔
935
    mError("failed to open mnode in step 7, since %s", tstrerror(code));
×
936
    mndClose(pMnode);
×
937
    terrno = code;
×
938
    return NULL;
×
939
  }
940

941
  mInfo("mnode open successfully");
482,255✔
942
  return pMnode;
482,255✔
943
}
944

945
void mndPreClose(SMnode *pMnode) {
482,097✔
946
  if (pMnode != NULL) {
482,097✔
947
    int32_t code = 0;
482,097✔
948
    // TODO check return value
949
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
482,097✔
950
    if (code < 0) {
482,097✔
951
      mError("failed to transfer leader since %s", tstrerror(code));
×
952
    }
953
    syncPreStop(pMnode->syncMgmt.sync);
482,097✔
954
    code = sdbWriteFile(pMnode->pSdb, 0);
482,097✔
955
    if (code < 0) {
482,097✔
956
      mError("failed to write sdb since %s", tstrerror(code));
764✔
957
    }
958
  }
959
}
482,097✔
960

961
void mndClose(SMnode *pMnode) {
482,186✔
962
  if (pMnode != NULL) {
482,186✔
963
    mInfo("start to close mnode");
482,186✔
964
    mndCleanupSteps(pMnode, -1);
482,186✔
965
    taosMemoryFreeClear(pMnode->path);
482,186✔
966
    taosMemoryFreeClear(pMnode);
482,186✔
967
    mInfo("mnode is closed");
482,186✔
968
  }
969
}
482,186✔
970

971
int32_t mndStart(SMnode *pMnode) {
482,097✔
972
  mndSyncStart(pMnode);
482,097✔
973
  if (pMnode->deploy) {
482,097✔
974
    if (sdbDeploy(pMnode->pSdb) != 0) {
345,827✔
975
      mError("failed to deploy sdb while start mnode");
×
976
      return -1;
×
977
    }
978
    mndSetRestored(pMnode, true);
345,827✔
979
  }
980
  if (mndIsLeader(pMnode)) {
482,097✔
981
    if (sdbUpgrade(pMnode->pSdb, pMnode->version) != 0) {
434,640✔
982
      mError("failed to upgrade sdb while start mnode");
×
983
      return -1;
×
984
    }
985
  }
986
  pMnode->version = TSDB_MNODE_BUILTIN_DATA_VERSION;
482,097✔
987
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
482,097✔
988

989
  return mndInitTimer(pMnode);
482,097✔
990
}
991

992
bool mndNeedUpgrade(SMnode *pMnode, int32_t version) { return pMnode->version > version; }
482,097✔
993

994
int32_t mndGetVersion(SMnode *pMnode) { return pMnode->version; }
379,193✔
995

996
int32_t mndGetEncryptedFlag(SMnode *pMnode) { return pMnode->encrypted; }
379,193✔
997

998
int32_t mndIsCatchUp(SMnode *pMnode) {
204,544✔
999
  int64_t rid = pMnode->syncMgmt.sync;
204,544✔
1000
  return syncIsCatchUp(rid);
204,544✔
1001
}
1002

1003
ESyncRole mndGetRole(SMnode *pMnode) {
204,544✔
1004
  int64_t rid = pMnode->syncMgmt.sync;
204,544✔
1005
  return syncGetRole(rid);
204,544✔
1006
}
1007

1008
int64_t mndGetTerm(SMnode *pMnode) {
10,157,012✔
1009
  int64_t rid = pMnode->syncMgmt.sync;
10,157,012✔
1010
  return syncGetTerm(rid);
10,157,012✔
1011
}
1012

1013
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
25,306,582✔
1014

1015
void mndStop(SMnode *pMnode) {
482,097✔
1016
  mndSetStop(pMnode);
482,097✔
1017
  mndSyncStop(pMnode);
482,097✔
1018
  mndCleanupTimer(pMnode);
482,097✔
1019
}
482,097✔
1020

1021
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
61,132,467✔
1022
  SMnode    *pMnode = pMsg->info.node;
61,132,467✔
1023
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
61,132,467✔
1024

1025
  const STraceId *trace = &pMsg->info.traceId;
61,132,652✔
1026
  mGTrace("vgId:1, process sync msg:%p, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
61,132,652✔
1027

1028
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
61,132,652✔
1029
  if (code != 0) {
61,132,652✔
1030
    mGError("vgId:1, failed to process sync msg:%p type:%s since %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
340✔
1031
            tstrerror(code), code);
1032
  }
1033

1034
  return code;
61,132,652✔
1035
}
1036

1037
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
307,036,648✔
1038
  int32_t code = 0;
307,036,648✔
1039
  if (!IsReq(pMsg)) TAOS_RETURN(code);
307,036,648✔
1040
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
270,225,806✔
1041
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
264,480,611✔
1042
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
259,636,544✔
1043
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
248,295,743✔
1044
    TAOS_RETURN(code);
21,928,060✔
1045
  }
1046

1047
  SMnode *pMnode = pMsg->info.node;
248,294,753✔
1048
  (void)taosThreadRwlockRdlock(&pMnode->lock);
248,293,233✔
1049
  if (pMnode->stopped) {
248,300,612✔
1050
    (void)taosThreadRwlockUnlock(&pMnode->lock);
2,056✔
1051
    code = TSDB_CODE_APP_IS_STOPPING;
2,056✔
1052
    TAOS_RETURN(code);
2,056✔
1053
  }
1054

1055
  terrno = 0;
248,294,022✔
1056
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
248,289,385✔
1057
  if (terrno != 0) {
248,300,371✔
1058
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1059
    code = terrno;
×
1060
    TAOS_RETURN(code);
×
1061
  }
1062

1063
  if (state.state != TAOS_SYNC_STATE_LEADER) {
248,297,701✔
1064
    (void)taosThreadRwlockUnlock(&pMnode->lock);
2,150,761✔
1065
    code = TSDB_CODE_SYN_NOT_LEADER;
2,150,761✔
1066
    goto _OVER;
2,150,761✔
1067
  }
1068

1069
  if (!state.restored || !pMnode->restored) {
246,146,940✔
1070
    (void)taosThreadRwlockUnlock(&pMnode->lock);
209,161✔
1071
    code = TSDB_CODE_SYN_RESTORING;
204,278✔
1072
    goto _OVER;
204,278✔
1073
  }
1074

1075
#if 1
1076
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
245,938,316✔
1077
#else
1078
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
1079
  mTrace("mnode rpc is acquired, ref:%d", ref);
1080
#endif
1081

1082
  (void)taosThreadRwlockUnlock(&pMnode->lock);
245,943,312✔
1083
  TAOS_RETURN(code);
245,940,703✔
1084

1085
_OVER:
2,355,039✔
1086
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
2,355,039✔
1087
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
2,355,039✔
1088
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
2,355,039✔
1089
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
2,354,940✔
1090
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
2,355,039✔
1091
      pMsg->msgType == TDMT_MND_SSMIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
2,354,851✔
1092
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER || pMsg->msgType == TDMT_MND_CHECK_STREAM_TIMER ||
2,354,847✔
1093
      pMsg->msgType == TDMT_MND_UPDATE_SSMIGRATE_PROGRESS_TIMER || pMsg->msgType == TDMT_MND_SCAN_TIMER ||
2,354,842✔
1094
      pMsg->msgType == TDMT_MND_QUERY_TRIM_TIMER || pMsg->msgType == TDMT_MND_AUTH_HB_TIMER) {
2,354,851✔
1095
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
562✔
1096
           pMnode->stopped, state.restored, syncStr(state.state));
1097
    TAOS_RETURN(code);
562✔
1098
  }
1099

1100
  const STraceId *trace = &pMsg->info.traceId;
2,354,667✔
1101
  SEpSet          epSet = {0};
2,354,573✔
1102
  mndGetMnodeEpSet(pMnode, &epSet);
2,354,573✔
1103

1104
  mGDebug(
2,355,039✔
1105
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
1106
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
1107
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
1108
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
1109

1110
  if (epSet.numOfEps <= 0) return -1;
2,355,039✔
1111

1112
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
8,787,599✔
1113
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
6,432,560✔
1114
  }
1115

1116
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
2,355,039✔
1117
  pMsg->info.rsp = rpcMallocCont(contLen);
2,354,864✔
1118
  if (pMsg->info.rsp != NULL) {
2,354,672✔
1119
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
2,354,672✔
1120
      mError("failed to serialize ep set");
×
1121
    }
1122
    pMsg->info.hasEpSet = 1;
2,354,309✔
1123
    pMsg->info.rspLen = contLen;
2,354,514✔
1124
  }
1125

1126
  TAOS_RETURN(code);
2,354,584✔
1127
}
1128

1129
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
307,054,347✔
1130
  SMnode         *pMnode = pMsg->info.node;
307,054,347✔
1131
  const STraceId *trace = &pMsg->info.traceId;
307,055,814✔
1132
  int32_t         code = TSDB_CODE_SUCCESS;
307,054,608✔
1133

1134
#ifdef TD_ENTERPRISE
1135
  if (pMsg->msgType != TDMT_MND_HEARTBEAT && pMsg->info.conn.isToken) {
307,054,608✔
1136
    SCachedTokenInfo ti = {0};
19,582✔
1137
    if (mndGetCachedTokenInfo(pMsg->info.conn.identifier, &ti) == NULL) {
19,582✔
1138
      mGError("msg:%p, failed to get token info, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
2,203✔
1139
      code = TSDB_CODE_MND_TOKEN_NOT_EXIST;
2,203✔
1140
      TAOS_RETURN(code);
2,203✔
1141
    }
1142
    if (ti.enabled == 0) {
17,379✔
1143
      mGError("msg:%p, token is disabled, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
3,058✔
1144
      code = TSDB_CODE_MND_TOKEN_DISABLED;
3,058✔
1145
      TAOS_RETURN(code);
3,058✔
1146
    }
1147
    if (ti.expireTime > 0 && taosGetTimestampSec() > (ti.expireTime + TSDB_TOKEN_EXPIRY_LEEWAY)) {
14,321✔
1148
      mGError("msg:%p, token is expired, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
1149
      code = TSDB_CODE_MND_TOKEN_EXPIRED;
×
1150
      TAOS_RETURN(code);
×
1151
    }
1152
    tstrncpy(pMsg->info.conn.user, ti.user, sizeof(pMsg->info.conn.user));
14,321✔
1153
  }
1154
#endif
1155

1156
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
307,044,222✔
1157
  MndMsgFpExt fpExt = NULL;
307,047,847✔
1158
  if (fp == NULL) {
307,047,847✔
1159
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
21,940,053✔
1160
    if (fpExt == NULL) {
21,939,360✔
1161
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
1162
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
1163
      TAOS_RETURN(code);
×
1164
    }
1165
  }
1166

1167
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
307,047,154✔
1168

1169
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
304,690,376✔
1170
  if (fp)
304,688,296✔
1171
    code = (*fp)(pMsg);
282,747,840✔
1172
  else
1173
    code = (*fpExt)(pMsg, pQueueInfo);
21,940,456✔
1174
  mndReleaseRpc(pMnode);
304,694,604✔
1175

1176
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
304,694,096✔
1177
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
34,686,089✔
1178
  } else if (code == 0) {
270,008,007✔
1179
    mGTrace("msg:%p, successfully processed", pMsg);
267,030,498✔
1180
  } else {
1181
    // TODO removve this wrong set code
1182
    if (code == -1) {
2,977,509✔
1183
      code = terrno;
×
1184
    }
1185
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
2,977,509✔
1186
            TMSG_INFO(pMsg->msgType));
1187
  }
1188

1189
  TAOS_RETURN(code);
304,694,096✔
1190
}
1191

1192
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
115,741,200✔
1193
  tmsg_t type = TMSG_INDEX(msgType);
115,741,200✔
1194
  if (type < TDMT_MAX) {
115,741,200✔
1195
    pMnode->msgFp[type] = fp;
115,741,200✔
1196
  }
1197
}
115,741,200✔
1198

1199
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
3,858,040✔
1200
  tmsg_t type = TMSG_INDEX(msgType);
3,858,040✔
1201
  if (type < TDMT_MAX) {
3,858,040✔
1202
    pMnode->msgFpExt[type] = fp;
3,858,040✔
1203
  }
1204
}
3,858,040✔
1205

1206
// Note: uid 0 is reserved
1207
int64_t mndGenerateUid(const char *name, int32_t len) {
12,539,475✔
1208
  int32_t hashval = MurmurHash3_32(name, len);
12,539,475✔
1209
  do {
×
1210
    int64_t us = taosGetTimestampUs();
12,539,370✔
1211
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
12,539,370✔
1212
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
12,539,370✔
1213
    if (uuid) {
12,539,591✔
1214
      return llabs(uuid);
12,539,591✔
1215
    }
1216
  } while (true);
1217
}
1218

1219
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
73✔
1220
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
1221
  int32_t code = mndAcquireRpc(pMnode);
73✔
1222
  if (code < 0) {
73✔
1223
    TAOS_RETURN(code);
×
1224
  } else if (code == 1) {
73✔
1225
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
1226
  }
1227

1228
  SSdb   *pSdb = pMnode->pSdb;
73✔
1229
  int64_t ms = taosGetTimestampMs();
73✔
1230

1231
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
73✔
1232
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
73✔
1233
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
73✔
1234
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
73✔
1235
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
73✔
1236
      pStbInfo->stbs == NULL) {
73✔
1237
    mndReleaseRpc(pMnode);
×
1238
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1239
    if (terrno != 0) code = terrno;
×
1240
    TAOS_RETURN(code);
×
1241
  }
1242

1243
  // cluster info
1244
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
73✔
1245
  pClusterInfo->monitor_interval = tsMonitorInterval;
73✔
1246
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
73✔
1247
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
73✔
1248
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
73✔
1249
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
73✔
1250
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
73✔
1251

1252
  void *pIter = NULL;
73✔
1253
  while (1) {
73✔
1254
    SDnodeObj *pObj = NULL;
146✔
1255
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
146✔
1256
    if (pIter == NULL) break;
146✔
1257

1258
    SMonDnodeDesc desc = {0};
73✔
1259
    desc.dnode_id = pObj->id;
73✔
1260
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
73✔
1261
    if (mndIsDnodeOnline(pObj, ms)) {
73✔
1262
      tstrncpy(desc.status, "ready", sizeof(desc.status));
73✔
1263
    } else {
1264
      tstrncpy(desc.status, "offline", sizeof(desc.status));
×
1265
    }
1266
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
146✔
1267
      mError("failed put dnode into array, but continue at this monitor report")
×
1268
    }
1269
    sdbRelease(pSdb, pObj);
73✔
1270
  }
1271

1272
  pIter = NULL;
73✔
1273
  while (1) {
73✔
1274
    SMnodeObj *pObj = NULL;
146✔
1275
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
146✔
1276
    if (pIter == NULL) break;
146✔
1277

1278
    SMonMnodeDesc desc = {0};
73✔
1279
    desc.mnode_id = pObj->id;
73✔
1280
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
73✔
1281

1282
    if (pObj->id == pMnode->selfDnodeId) {
73✔
1283
      pClusterInfo->first_ep_dnode_id = pObj->id;
73✔
1284
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
73✔
1285
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
1286
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
73✔
1287
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
1288
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
73✔
1289
      desc.syncState = TAOS_SYNC_STATE_LEADER;
73✔
1290
    } else {
1291
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1292
      desc.syncState = pObj->syncState;
×
1293
    }
1294
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
146✔
1295
      mError("failed to put mnode into array, but continue at this monitor report");
×
1296
    }
1297
    sdbRelease(pSdb, pObj);
73✔
1298
  }
1299

1300
  // vgroup info
1301
  pIter = NULL;
73✔
1302
  while (1) {
146✔
1303
    SVgObj *pVgroup = NULL;
219✔
1304
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
219✔
1305
    if (pIter == NULL) break;
219✔
1306

1307
    if (pVgroup->mountVgId) {
146✔
1308
      sdbRelease(pSdb, pVgroup);
×
1309
      continue;
×
1310
    }
1311

1312
    pClusterInfo->vgroups_total++;
146✔
1313
    pClusterInfo->tbs_total += pVgroup->numOfTables;
146✔
1314

1315
    SMonVgroupDesc desc = {0};
146✔
1316
    desc.vgroup_id = pVgroup->vgId;
146✔
1317

1318
    SName name = {0};
146✔
1319
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
146✔
1320
    if (code < 0) {
146✔
1321
      mError("failed to get db name since %s", tstrerror(code));
×
1322
      sdbCancelFetch(pSdb, pIter);
×
1323
      sdbRelease(pSdb, pVgroup);
×
1324
      TAOS_RETURN(code);
×
1325
    }
1326
    (void)tNameGetDbName(&name, desc.database_name);
146✔
1327

1328
    desc.tables_num = pVgroup->numOfTables;
146✔
1329
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
146✔
1330
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
146✔
1331
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
292✔
1332
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
146✔
1333
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
146✔
1334
      pVnDesc->dnode_id = pVgid->dnodeId;
146✔
1335
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
146✔
1336
      pVnDesc->syncState = pVgid->syncState;
146✔
1337
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
146✔
1338
        tstrncpy(desc.status, "ready", sizeof(desc.status));
146✔
1339
        pClusterInfo->vgroups_alive++;
146✔
1340
      }
1341
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
146✔
1342
        pClusterInfo->vnodes_alive++;
146✔
1343
      }
1344
      pClusterInfo->vnodes_total++;
146✔
1345
    }
1346

1347
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
292✔
1348
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1349
    }
1350
    sdbRelease(pSdb, pVgroup);
146✔
1351
  }
1352

1353
  // stb info
1354
  pIter = NULL;
73✔
UNCOV
1355
  while (1) {
×
1356
    SStbObj *pStb = NULL;
73✔
1357
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
73✔
1358
    if (pIter == NULL) break;
73✔
1359

UNCOV
1360
    SMonStbDesc desc = {0};
×
1361

UNCOV
1362
    SName name1 = {0};
×
UNCOV
1363
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
×
UNCOV
1364
    if (code < 0) {
×
1365
      mError("failed to get db name since %s", tstrerror(code));
×
1366
      sdbRelease(pSdb, pStb);
×
1367
      TAOS_RETURN(code);
×
1368
    }
UNCOV
1369
    (void)tNameGetDbName(&name1, desc.database_name);
×
1370

UNCOV
1371
    SName name2 = {0};
×
UNCOV
1372
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
×
UNCOV
1373
    if (code < 0) {
×
1374
      mError("failed to get table name since %s", tstrerror(code));
×
1375
      sdbRelease(pSdb, pStb);
×
1376
      TAOS_RETURN(code);
×
1377
    }
UNCOV
1378
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
×
1379

UNCOV
1380
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
×
1381
      mError("failed to put stb into array, but continue at this monitor report");
×
1382
    }
UNCOV
1383
    sdbRelease(pSdb, pStb);
×
1384
  }
1385

1386
  // grant info
1387
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
73✔
1388
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
73✔
1389
  if (pMnode->grant.expireTimeMS == 0) {
73✔
1390
    pGrantInfo->expire_time = 0;
×
1391
    pGrantInfo->timeseries_total = 0;
×
1392
  }
1393

1394
  mndReleaseRpc(pMnode);
73✔
1395
  TAOS_RETURN(code);
73✔
1396
}
1397

1398
int32_t mndResetTimer(SMnode *pMnode){
×
1399
  return syncResetTimer(pMnode->syncMgmt.sync, tsMnodeElectIntervalMs, tsMnodeHeartbeatIntervalMs);
×
1400
}
1401

1402
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
31,520,321✔
1403
  mTrace("mnode get load");
31,520,321✔
1404
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
31,520,321✔
1405
  pLoad->syncState = state.state;
31,520,321✔
1406
  pLoad->syncRestore = state.restored;
31,520,321✔
1407
  pLoad->syncTerm = state.term;
31,520,321✔
1408
  pLoad->roleTimeMs = state.roleTimeMs;
31,520,321✔
1409
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
31,520,321✔
1410
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1411
  return 0;
31,520,321✔
1412
}
1413

1414
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
10,032,856✔
1415
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
10,032,856✔
1416
  return state.roleTimeMs;
10,032,856✔
1417
}
1418

1419
void mndSetRestored(SMnode *pMnode, bool restored) {
482,097✔
1420
  if (restored) {
482,097✔
1421
    (void)taosThreadRwlockWrlock(&pMnode->lock);
482,097✔
1422
    pMnode->restored = true;
482,097✔
1423
    (void)taosThreadRwlockUnlock(&pMnode->lock);
482,097✔
1424
    mInfo("mnode set restored:%d", restored);
482,097✔
1425
  } else {
1426
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
1427
    pMnode->restored = false;
×
1428
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
1429
    mInfo("mnode set restored:%d", restored);
×
1430
    while (1) {
1431
      if (pMnode->rpcRef <= 0) break;
×
1432
      taosMsleep(3);
×
1433
    }
1434
  }
1435
}
482,097✔
1436

1437
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1438

1439
void mndSetStop(SMnode *pMnode) {
482,097✔
1440
  (void)taosThreadRwlockWrlock(&pMnode->lock);
482,097✔
1441
  pMnode->stopped = true;
482,097✔
1442
  (void)taosThreadRwlockUnlock(&pMnode->lock);
482,097✔
1443
  mInfo("mnode set stopped");
482,097✔
1444
}
482,097✔
1445

1446
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
638,724,094✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc