• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3653

14 Mar 2025 08:10AM UTC coverage: 22.565% (-41.0%) from 63.596%
#3653

push

travis-ci

web-flow
feat(keep): support keep on super table level. (#30097)

* Feat: support use keep while create super table.

* Test(keep): add test for create super table with keep option.

* Feat(keep): Add tmsg for create keep.

* Feat(keep): support alter table option keep.

* Fix(keep): Add baisc test for alter table option.

* Fix(keep): memory leek.

* Feat(keep): add keep to metaEntry&metaCache and fix earliestTs with stn keep.

* Test(keep): add some cases for select with stb keep.

* Fix: fix ci core while alter stb.

* Feat(keep): delete expired data in super table level.

* Feat: remove get stb keep while query.

* Fix : build error.

* Revert "Fix : build error."

This reverts commit 0ed66e4e8.

* Revert "Feat(keep): delete expired data in super table level."

This reverts commit 36330f6b4.

* Fix : build errors.

* Feat : support restart taosd.

* Fix : alter table comment problems.

* Test : add tests for super table keep.

* Fix: change sdb stb reserve size.

* Test: add more tests.

* Feat: Disable normal tables and sub tables from setting the keep parameter

* Fix: add more checks to avoid unknown address.

* Docs: Add docs for stable keep.

* Fix: some review changes.

* Fix: review errors.

49248 of 302527 branches covered (16.28%)

Branch coverage included in aggregate %.

53 of 99 new or added lines in 12 files covered. (53.54%)

155872 existing lines in 443 files now uncovered.

87359 of 302857 relevant lines covered (28.84%)

570004.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

44.57
/source/dnode/mnode/impl/src/mndMain.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "mndAcct.h"
18
#include "mndAnode.h"
19
#include "mndArbGroup.h"
20
#include "mndCluster.h"
21
#include "mndCompact.h"
22
#include "mndCompactDetail.h"
23
#include "mndConfig.h"
24
#include "mndConsumer.h"
25
#include "mndDb.h"
26
#include "mndDnode.h"
27
#include "mndFunc.h"
28
#include "mndGrant.h"
29
#include "mndIndex.h"
30
#include "mndInfoSchema.h"
31
#include "mndMnode.h"
32
#include "mndPerfSchema.h"
33
#include "mndPrivilege.h"
34
#include "mndProfile.h"
35
#include "mndQnode.h"
36
#include "mndQuery.h"
37
#include "mndShow.h"
38
#include "mndSma.h"
39
#include "mndSnode.h"
40
#include "mndStb.h"
41
#include "mndStream.h"
42
#include "mndSubscribe.h"
43
#include "mndSync.h"
44
#include "mndTelem.h"
45
#include "mndTopic.h"
46
#include "mndTrans.h"
47
#include "mndUser.h"
48
#include "mndVgroup.h"
49
#include "mndView.h"
50

51
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
2✔
52
  int32_t code = 0;
2✔
53
  (void)taosThreadRwlockRdlock(&pMnode->lock);
2✔
54
  if (pMnode->stopped) {
2!
55
    code = TSDB_CODE_APP_IS_STOPPING;
×
56
  } else if (!mndIsLeader(pMnode)) {
2!
UNCOV
57
    code = 1;
×
58
  } else {
59
#if 1
60
    (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
2✔
61
#else
62
    int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
63
    mTrace("mnode rpc is acquired, ref:%d", ref);
64
#endif
65
  }
66
  (void)taosThreadRwlockUnlock(&pMnode->lock);
2✔
67
  TAOS_RETURN(code);
2✔
68
}
69

70
static inline void mndReleaseRpc(SMnode *pMnode) {
297✔
71
  (void)taosThreadRwlockRdlock(&pMnode->lock);
297✔
72
#if 1
73
  (void)atomic_sub_fetch_32(&pMnode->rpcRef, 1);
297✔
74
#else
75
  int32_t ref = atomic_sub_fetch_32(&pMnode->rpcRef, 1);
76
  mTrace("mnode rpc is released, ref:%d", ref);
77
#endif
78
  (void)taosThreadRwlockUnlock(&pMnode->lock);
297✔
79
}
297✔
80

81
static void *mndBuildTimerMsg(int32_t *pContLen) {
35✔
82
  terrno = 0;
35✔
83
  SMTimerReq timerReq = {0};
35✔
84

85
  int32_t contLen = tSerializeSMTimerMsg(NULL, 0, &timerReq);
35✔
86
  if (contLen <= 0) return NULL;
35!
87
  void *pReq = rpcMallocCont(contLen);
35✔
88
  if (pReq == NULL) return NULL;
35!
89

90
  if (tSerializeSMTimerMsg(pReq, contLen, &timerReq) < 0) {
35!
91
    mError("failed to serialize timer msg since %s", terrstr());
×
92
  }
93
  *pContLen = contLen;
35✔
94
  return pReq;
35✔
95
}
96

97
static void mndPullupTrans(SMnode *pMnode) {
6✔
98
  mTrace("pullup trans msg");
6!
99
  int32_t contLen = 0;
6✔
100
  void   *pReq = mndBuildTimerMsg(&contLen);
6✔
101
  if (pReq != NULL) {
6!
102
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen};
6✔
103
    // TODO check return value
104
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
6!
105
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
106
    }
107
  }
108
}
6✔
109

110
static void mndPullupCompacts(SMnode *pMnode) {
1✔
111
  mTrace("pullup compact timer msg");
1!
112
  int32_t contLen = 0;
1✔
113
  void   *pReq = mndBuildTimerMsg(&contLen);
1✔
114
  if (pReq != NULL) {
1!
115
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_COMPACT_TIMER, .pCont = pReq, .contLen = contLen};
1✔
116
    // TODO check return value
117
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1!
118
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
119
    }
120
  }
121
}
1✔
122

123
static void mndPullupTtl(SMnode *pMnode) {
1✔
124
  mTrace("pullup ttl");
1!
125
  int32_t contLen = 0;
1✔
126
  void   *pReq = mndBuildTimerMsg(&contLen);
1✔
127
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen};
1✔
128
  // TODO check return value
129
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
1!
130
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
131
  }
132
}
1✔
133

UNCOV
134
static void mndPullupTrimDb(SMnode *pMnode) {
×
UNCOV
135
  mTrace("pullup s3migrate");
×
UNCOV
136
  int32_t contLen = 0;
×
UNCOV
137
  void   *pReq = mndBuildTimerMsg(&contLen);
×
UNCOV
138
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRIM_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
139
  // TODO check return value
UNCOV
140
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
141
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
142
  }
UNCOV
143
}
×
144

145
static void mndPullupS3MigrateDb(SMnode *pMnode) {
×
146
  mTrace("pullup trim");
×
147
  int32_t contLen = 0;
×
148
  void   *pReq = mndBuildTimerMsg(&contLen);
×
149
  // TODO check return value
150
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_S3MIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen};
×
151
  if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
152
    mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
153
  }
154
}
×
155

156
static int32_t mndPullupArbHeartbeat(SMnode *pMnode) {
6✔
157
  mTrace("pullup arb hb");
6!
158
  int32_t contLen = 0;
6✔
159
  void   *pReq = mndBuildTimerMsg(&contLen);
6✔
160
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_HEARTBEAT_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
6✔
161
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
6✔
162
}
163

164
static int32_t mndPullupArbCheckSync(SMnode *pMnode) {
4✔
165
  mTrace("pullup arb sync");
4!
166
  int32_t contLen = 0;
4✔
167
  void   *pReq = mndBuildTimerMsg(&contLen);
4✔
168
  SRpcMsg rpcMsg = {.msgType = TDMT_MND_ARB_CHECK_SYNC_TIMER, .pCont = pReq, .contLen = contLen, .info.noResp = 1};
4✔
169
  return tmsgPutToQueue(&pMnode->msgCb, ARB_QUEUE, &rpcMsg);
4✔
170
}
171

172
static void mndCalMqRebalance(SMnode *pMnode) {
6✔
173
  int32_t contLen = 0;
6✔
174
  void   *pReq = mndBuildTimerMsg(&contLen);
6✔
175
  if (pReq != NULL) {
6!
176
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
6✔
177
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
6!
178
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
179
    }
180
  }
181
}
6✔
182

UNCOV
183
static void mndStreamCheckpointTimer(SMnode *pMnode) {
×
UNCOV
184
  SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg));
×
UNCOV
185
  if (pMsg != NULL) {
×
UNCOV
186
    int32_t size = sizeof(SMStreamDoCheckpointMsg);
×
UNCOV
187
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size};
×
188
    // TODO check return value
UNCOV
189
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
190
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
191
    }
192
  }
UNCOV
193
}
×
194

UNCOV
195
static void mndStreamCheckNode(SMnode *pMnode) {
×
UNCOV
196
  int32_t contLen = 0;
×
UNCOV
197
  void   *pReq = mndBuildTimerMsg(&contLen);
×
UNCOV
198
  if (pReq != NULL) {
×
UNCOV
199
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_NODECHECK_TIMER, .pCont = pReq, .contLen = contLen};
×
200
    // TODO check return value
UNCOV
201
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
×
202
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
203
    }
204
  }
UNCOV
205
}
×
206

UNCOV
207
static void mndStreamCheckStatus(SMnode *pMnode) {
×
UNCOV
208
  int32_t contLen = 0;
×
UNCOV
209
  void   *pReq = mndBuildTimerMsg(&contLen);
×
UNCOV
210
  if (pReq != NULL) {
×
UNCOV
211
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_CHECK_STREAM_TIMER, .pCont = pReq, .contLen = contLen};
×
212
    // TODO check return value
UNCOV
213
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
214
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
215
    }
216
  }
UNCOV
217
}
×
218

219
static void mndStreamConsensusChkpt(SMnode *pMnode) {
2✔
220
  int32_t contLen = 0;
2✔
221
  void   *pReq = mndBuildTimerMsg(&contLen);
2✔
222
  if (pReq != NULL) {
2!
223
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CONSEN_TIMER, .pCont = pReq, .contLen = contLen};
2✔
224
    // TODO check return value
225
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
2!
UNCOV
226
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
227
    }
228
  }
229
}
2✔
230

UNCOV
231
static void mndPullupTelem(SMnode *pMnode) {
×
UNCOV
232
  mTrace("pullup telem msg");
×
UNCOV
233
  int32_t contLen = 0;
×
UNCOV
234
  void   *pReq = mndBuildTimerMsg(&contLen);
×
UNCOV
235
  if (pReq != NULL) {
×
UNCOV
236
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen};
×
237
    // TODO check return value
UNCOV
238
    if (tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg) < 0) {
×
UNCOV
239
      mError("failed to put into read-queue since %s, line:%d", terrstr(), __LINE__);
×
240
    }
241
  }
UNCOV
242
}
×
243

244
static void mndPullupGrant(SMnode *pMnode) {
9✔
245
  mTrace("pullup grant msg");
9!
246
  int32_t contLen = 0;
9✔
247
  void   *pReq = mndBuildTimerMsg(&contLen);
9✔
248
  if (pReq != NULL) {
9!
249
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_GRANT_HB_TIMER,
9✔
250
                      .pCont = pReq,
251
                      .contLen = contLen,
252
                      .info.notFreeAhandle = 1,
253
                      .info.ahandle = 0};
254
    // TODO check return value
255
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
9!
UNCOV
256
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
257
    }
258
  }
259
}
9✔
260

261
static void mndIncreaseUpTime(SMnode *pMnode) {
×
UNCOV
262
  mTrace("increate uptime");
×
UNCOV
263
  int32_t contLen = 0;
×
UNCOV
264
  void   *pReq = mndBuildTimerMsg(&contLen);
×
UNCOV
265
  if (pReq != NULL) {
×
UNCOV
266
    SRpcMsg rpcMsg = {.msgType = TDMT_MND_UPTIME_TIMER,
×
267
                      .pCont = pReq,
268
                      .contLen = contLen,
269
                      .info.notFreeAhandle = 1,
270
                      .info.ahandle = 0};
271
    // TODO check return value
UNCOV
272
    if (tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg) < 0) {
×
UNCOV
273
      mError("failed to put into write-queue since %s, line:%d", terrstr(), __LINE__);
×
274
    }
275
  }
UNCOV
276
}
×
277

UNCOV
278
static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs) {
×
UNCOV
279
  SSdb *pSdb = pMnode->pSdb;
×
280

UNCOV
281
  void *pIter = NULL;
×
UNCOV
282
  while (1) {
×
UNCOV
283
    SVgObj *pVgroup = NULL;
×
UNCOV
284
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
×
UNCOV
285
    if (pIter == NULL) break;
×
286

UNCOV
287
    bool stateChanged = false;
×
UNCOV
288
    for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
×
UNCOV
289
      SVnodeGid *pGid = &pVgroup->vnodeGid[vg];
×
UNCOV
290
      if (pGid->dnodeId == dnodeId) {
×
UNCOV
291
        if (pGid->syncState != TAOS_SYNC_STATE_OFFLINE) {
×
UNCOV
292
          mInfo(
×
293
              "vgId:%d, state changed by offline check, old state:%s restored:%d canRead:%d new state:error restored:0 "
294
              "canRead:0",
295
              pVgroup->vgId, syncStr(pGid->syncState), pGid->syncRestore, pGid->syncCanRead);
UNCOV
296
          pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
×
UNCOV
297
          pGid->syncRestore = 0;
×
UNCOV
298
          pGid->syncCanRead = 0;
×
UNCOV
299
          pGid->startTimeMs = 0;
×
UNCOV
300
          stateChanged = true;
×
301
        }
UNCOV
302
        break;
×
303
      }
304
    }
305

UNCOV
306
    if (stateChanged) {
×
UNCOV
307
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
×
UNCOV
308
      if (pDb != NULL && pDb->stateTs != curMs) {
×
UNCOV
309
        mInfo("db:%s, stateTs changed by offline check, old newTs:%" PRId64 " newTs:%" PRId64, pDb->name, pDb->stateTs,
×
310
              curMs);
UNCOV
311
        pDb->stateTs = curMs;
×
312
      }
UNCOV
313
      mndReleaseDb(pMnode, pDb);
×
314
    }
315

UNCOV
316
    sdbRelease(pSdb, pVgroup);
×
317
  }
UNCOV
318
}
×
319

320
static void mndCheckDnodeOffline(SMnode *pMnode) {
2✔
321
  mTrace("check dnode offline");
2!
322
  if (mndAcquireRpc(pMnode) != 0) return;
2!
323

324
  SSdb   *pSdb = pMnode->pSdb;
2✔
325
  int64_t curMs = taosGetTimestampMs();
2✔
326

327
  void *pIter = NULL;
2✔
328
  while (1) {
2✔
329
    SDnodeObj *pDnode = NULL;
4✔
330
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
4✔
331
    if (pIter == NULL) break;
4✔
332

333
    bool online = mndIsDnodeOnline(pDnode, curMs);
2✔
334
    if (!online) {
2!
UNCOV
335
      mInfo("dnode:%d, in offline state", pDnode->id);
×
UNCOV
336
      mndSetVgroupOffline(pMnode, pDnode->id, curMs);
×
337
    }
338

339
    sdbRelease(pSdb, pDnode);
2✔
340
  }
341

342
  mndReleaseRpc(pMnode);
2✔
343
}
344

345
static bool mnodeIsNotLeader(SMnode *pMnode) {
6✔
346
  terrno = 0;
6✔
347
  (void)taosThreadRwlockRdlock(&pMnode->lock);
6✔
348
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
6✔
349
  if (terrno != 0) {
6!
UNCOV
350
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
UNCOV
351
    return true;
×
352
  }
353

354
  if (state.state != TAOS_SYNC_STATE_LEADER) {
6!
UNCOV
355
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
UNCOV
356
    terrno = TSDB_CODE_SYN_NOT_LEADER;
×
UNCOV
357
    return true;
×
358
  }
359
  if (!state.restored || !pMnode->restored) {
6!
UNCOV
360
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
UNCOV
361
    terrno = TSDB_CODE_SYN_RESTORING;
×
UNCOV
362
    return true;
×
363
  }
364
  (void)taosThreadRwlockUnlock(&pMnode->lock);
6✔
365
  return false;
6✔
366
}
367

368
static int32_t minCronTime() {
20✔
369
  int32_t min = INT32_MAX;
20✔
370
  min = TMIN(min, tsTtlPushIntervalSec);
20✔
371
  min = TMIN(min, tsTrimVDbIntervalSec);
20✔
372
  min = TMIN(min, tsS3MigrateIntervalSec);
20✔
373
  min = TMIN(min, tsTransPullupInterval);
20✔
374
  min = TMIN(min, tsCompactPullupInterval);
20✔
375
  min = TMIN(min, tsMqRebalanceInterval);
20✔
376
  min = TMIN(min, tsStreamCheckpointInterval);
20✔
377
  min = TMIN(min, tsStreamNodeCheckInterval);
20✔
378
  min = TMIN(min, tsArbHeartBeatIntervalSec);
20✔
379
  min = TMIN(min, tsArbCheckSyncIntervalSec);
20✔
380

381
  int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
20✔
382
  min = TMIN(min, telemInt);
20✔
383
  min = TMIN(min, tsGrantHBInterval);
20✔
384
  min = TMIN(min, tsUptimeInterval);
20✔
385

386
  return min <= 1 ? 2 : min;
20✔
387
}
388
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
20✔
389
  int32_t code = 0;
20✔
390
#ifndef TD_ASTRA  
391
  if (sec % tsTtlPushIntervalSec == 0) {
20✔
392
    mndPullupTtl(pMnode);
1✔
393
  }
394

395
  if (sec % tsTrimVDbIntervalSec == 0) {
20!
UNCOV
396
    mndPullupTrimDb(pMnode);
×
397
  }
398
#endif
399
#ifdef USE_S3
400
  if (tsS3MigrateEnabled && sec % tsS3MigrateIntervalSec == 0) {
20!
UNCOV
401
    mndPullupS3MigrateDb(pMnode);
×
402
  }
403
#endif
404
  if (sec % tsTransPullupInterval == 0) {
20✔
405
    mndPullupTrans(pMnode);
6✔
406
  }
407

408
  if (sec % tsCompactPullupInterval == 0) {
20✔
409
    mndPullupCompacts(pMnode);
1✔
410
  }
411
#ifdef USE_TOPIC
412
  if (sec % tsMqRebalanceInterval == 0) {
20✔
413
    mndCalMqRebalance(pMnode);
6✔
414
  }
415
#endif
416
#ifdef USE_STREAM
417
  if (sec % 30 == 0) {  // send the checkpoint info every 30 sec
20!
UNCOV
418
    mndStreamCheckpointTimer(pMnode);
×
419
  }
420

421
  if (sec % tsStreamNodeCheckInterval == 0) {
20!
UNCOV
422
    mndStreamCheckNode(pMnode);
×
423
  }
424

425
  if (sec % (tsStreamFailedTimeout/1000) == 0) {
20!
UNCOV
426
    mndStreamCheckStatus(pMnode);
×
427
  }
428

429
  if (sec % 5 == 0) {
20✔
430
    mndStreamConsensusChkpt(pMnode);
2✔
431
  }
432
#endif
433
#ifdef USE_REPORT
434
  if (sec % tsTelemInterval == (TMIN(86400, (tsTelemInterval - 1)))) {
20!
UNCOV
435
    mndPullupTelem(pMnode);
×
436
  }
437
#endif
438
#ifndef TD_ASTRA
439
  if (sec % tsGrantHBInterval == 0) {
20✔
440
    mndPullupGrant(pMnode);
9✔
441
  }
442
#endif
443
  if (sec % tsUptimeInterval == 0) {
20!
UNCOV
444
    mndIncreaseUpTime(pMnode);
×
445
  }
446
#ifndef TD_ASTRA
447
  if (sec % (tsArbHeartBeatIntervalSec) == 0) {
20✔
448
    if ((code = mndPullupArbHeartbeat(pMnode)) != 0) {
6!
UNCOV
449
      mError("failed to pullup arb heartbeat, since:%s", tstrerror(code));
×
450
    }
451
  }
452

453
  if (sec % (tsArbCheckSyncIntervalSec) == 0) {
20✔
454
    if ((code = mndPullupArbCheckSync(pMnode)) != 0) {
4!
UNCOV
455
      mError("failed to pullup arb check sync, since:%s", tstrerror(code));
×
456
    }
457
  }
458
#endif
459
}
20✔
460
void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) {
20✔
461
  if (sec % (tsStatusInterval * 5) == 0) {
20✔
462
    mndCheckDnodeOffline(pMnode);
2✔
463
  }
464
  if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
20!
UNCOV
465
    mndSyncCheckTimeout(pMnode);
×
466
  }
467
}
20✔
468

469
static void *mndThreadFp(void *param) {
8✔
470
  SMnode *pMnode = param;
8✔
471
  int64_t lastTime = 0;
8✔
472
  setThreadName("mnode-timer");
8✔
473

474
  while (1) {
241✔
475
    lastTime++;
249✔
476
    taosMsleep(100);
249✔
477
    if (mndGetStop(pMnode)) break;
249✔
478
    if (lastTime % 10 != 0) continue;
241✔
479

480
    int64_t sec = lastTime / 10;
20✔
481
    mndDoTimerCheckTask(pMnode, sec);
20✔
482

483
    int64_t minCron = minCronTime();
20✔
484
    if (sec % minCron == 0 && mnodeIsNotLeader(pMnode)) {
20!
485
      // not leader, do nothing
UNCOV
486
      mTrace("timer not process since mnode is not leader, reason: %s", tstrerror(terrno));
×
UNCOV
487
      terrno = 0;
×
UNCOV
488
      continue;
×
489
    }
490
    mndDoTimerPullupTask(pMnode, sec);
20✔
491
  }
492

493
  return NULL;
8✔
494
}
495

496
static int32_t mndInitTimer(SMnode *pMnode) {
8✔
497
  int32_t      code = 0;
8✔
498
  TdThreadAttr thAttr;
499
  (void)taosThreadAttrInit(&thAttr);
8✔
500
  (void)taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE);
8✔
501
#ifdef TD_COMPACT_OS
502
  (void)taosThreadAttrSetStackSize(&thAttr, STACK_SIZE_SMALL);
503
#endif
504
  if ((code = taosThreadCreate(&pMnode->thread, &thAttr, mndThreadFp, pMnode)) != 0) {
8!
505
    mError("failed to create timer thread since %s", tstrerror(code));
×
506
    TAOS_RETURN(code);
×
507
  }
508

509
  (void)taosThreadAttrDestroy(&thAttr);
8✔
510
  tmsgReportStartup("mnode-timer", "initialized");
8✔
511
  TAOS_RETURN(code);
8✔
512
}
513

514
static void mndCleanupTimer(SMnode *pMnode) {
8✔
515
  if (taosCheckPthreadValid(pMnode->thread)) {
8!
516
    (void)taosThreadJoin(pMnode->thread, NULL);
8✔
517
    taosThreadClear(&pMnode->thread);
8✔
518
  }
519
}
8✔
520

521
static int32_t mndCreateDir(SMnode *pMnode, const char *path) {
8✔
522
  int32_t code = 0;
8✔
523
  pMnode->path = taosStrdup(path);
8!
524
  if (pMnode->path == NULL) {
8!
UNCOV
525
    code = terrno;
×
UNCOV
526
    TAOS_RETURN(code);
×
527
  }
528

529
  if (taosMkDir(pMnode->path) != 0) {
8!
530
    code = terrno;
×
531
    TAOS_RETURN(code);
×
532
  }
533

534
  TAOS_RETURN(code);
8✔
535
}
536

537
static int32_t mndInitWal(SMnode *pMnode) {
8✔
538
  int32_t code = 0;
8✔
539
  char    path[PATH_MAX + 20] = {0};
8✔
540
  (void)snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
8✔
541
  SWalCfg cfg = {.vgId = 1,
8✔
542
                 .fsyncPeriod = 0,
543
                 .rollPeriod = -1,
544
                 .segSize = -1,
545
                 .committed = -1,
546
                 .retentionPeriod = 0,
547
                 .retentionSize = 0,
548
                 .level = TAOS_WAL_FSYNC,
549
                 .encryptAlgorithm = 0,
550
                 .encryptKey = {0}};
551

552
#if defined(TD_ENTERPRISE) || defined(TD_ASTRA_TODO)
553
  if (tsiEncryptAlgorithm == DND_CA_SM4 && (tsiEncryptScope & DND_CS_MNODE_WAL) == DND_CS_MNODE_WAL) {
8!
UNCOV
554
    cfg.encryptAlgorithm = (tsiEncryptScope & DND_CS_MNODE_WAL) ? tsiEncryptAlgorithm : 0;
×
UNCOV
555
    if (tsEncryptKey[0] == '\0') {
×
UNCOV
556
      code = TSDB_CODE_DNODE_INVALID_ENCRYPTKEY;
×
UNCOV
557
      TAOS_RETURN(code);
×
558
    } else {
UNCOV
559
      tstrncpy(cfg.encryptKey, tsEncryptKey, ENCRYPT_KEY_LEN + 1);
×
560
    }
561
  }
562
#endif
563

564
  pMnode->pWal = walOpen(path, &cfg);
8✔
565
  if (pMnode->pWal == NULL) {
8!
566
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
567
    if (terrno != 0) code = terrno;
×
568
    mError("failed to open wal since %s. wal:%s", tstrerror(code), path);
×
UNCOV
569
    TAOS_RETURN(code);
×
570
  }
571

572
  TAOS_RETURN(code);
8✔
573
}
574

575
static void mndCloseWal(SMnode *pMnode) {
8✔
576
  if (pMnode->pWal != NULL) {
8!
577
    walClose(pMnode->pWal);
8✔
578
    pMnode->pWal = NULL;
8✔
579
  }
580
}
8✔
581

582
static int32_t mndInitSdb(SMnode *pMnode) {
8✔
583
  int32_t code = 0;
8✔
584
  SSdbOpt opt = {0};
8✔
585
  opt.path = pMnode->path;
8✔
586
  opt.pMnode = pMnode;
8✔
587
  opt.pWal = pMnode->pWal;
8✔
588

589
  pMnode->pSdb = sdbInit(&opt);
8✔
590
  if (pMnode->pSdb == NULL) {
8!
UNCOV
591
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
UNCOV
592
    if (terrno != 0) code = terrno;
×
UNCOV
593
    TAOS_RETURN(code);
×
594
  }
595

596
  TAOS_RETURN(code);
8✔
597
}
598

599
static int32_t mndOpenSdb(SMnode *pMnode) {
8✔
600
  int32_t code = 0;
8✔
601
  if (!pMnode->deploy) {
8!
UNCOV
602
    code = sdbReadFile(pMnode->pSdb);
×
603
  }
604

605
  mInfo("vgId:1, mnode sdb is opened, with applied index:%" PRId64, pMnode->pSdb->commitIndex);
8!
606

607
  atomic_store_64(&pMnode->applied, pMnode->pSdb->commitIndex);
8✔
608
  return code;
8✔
609
}
610

611
static void mndCleanupSdb(SMnode *pMnode) {
8✔
612
  if (pMnode->pSdb) {
8!
613
    sdbCleanup(pMnode->pSdb);
8✔
614
    pMnode->pSdb = NULL;
8✔
615
  }
616
}
8✔
617

618
static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
288✔
619
  SMnodeStep step = {0};
288✔
620
  step.name = name;
288✔
621
  step.initFp = initFp;
288✔
622
  step.cleanupFp = cleanupFp;
288✔
623
  if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
576!
UNCOV
624
    TAOS_RETURN(terrno);
×
625
  }
626

627
  TAOS_RETURN(0);
288✔
628
}
629

630
static int32_t mndInitSteps(SMnode *pMnode) {
8✔
631
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal));
8!
632
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb));
8!
633
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans));
8!
634
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster));
8!
635
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode));
8!
636
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode));
8!
637
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode));
8!
638
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-anode", mndInitAnode, mndCleanupAnode));
8!
639
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-arbgroup", mndInitArbGroup, mndCleanupArbGroup));
8!
640
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-config", mndInitConfig, NULL));
8!
641
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode));
8!
642
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser));
8!
643
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant));
8!
644
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-privilege", mndInitPrivilege, mndCleanupPrivilege));
8!
645
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct));
8!
646
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stream", mndInitStream, mndCleanupStream));
8!
647
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-topic", mndInitTopic, mndCleanupTopic));
8!
648
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-consumer", mndInitConsumer, mndCleanupConsumer));
8!
649
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-subscribe", mndInitSubscribe, mndCleanupSubscribe));
8!
650
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup));
8!
651
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb));
8!
652
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma));
8!
653
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-idx", mndInitIdx, mndCleanupIdx));
8!
654
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos));
8!
655
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs));
8!
656
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb));
8!
657
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc));
8!
658
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-view", mndInitView, mndCleanupView));
8!
659
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact", mndInitCompact, mndCleanupCompact));
8!
660
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-compact-detail", mndInitCompactDetail, mndCleanupCompactDetail));
8!
661
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sdb", mndOpenSdb, NULL));
8!
662
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile));
8!
663
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow));
8!
664
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery));
8!
665
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync));
8!
666
  TAOS_CHECK_RETURN(mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem));
8!
667
  return 0;
8✔
668
}
669

670
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
8✔
671
  if (pMnode->pSteps == NULL) return;
8!
672

673
  if (pos == -1) {
8!
674
    pos = taosArrayGetSize(pMnode->pSteps) - 1;
8✔
675
  }
676

677
  for (int32_t s = pos; s >= 0; s--) {
296✔
678
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, s);
288✔
679
    mInfo("%s will cleanup", pStep->name);
288!
680
    if (pStep->cleanupFp != NULL) {
288✔
681
      (*pStep->cleanupFp)(pMnode);
272✔
682
    }
683
  }
684

685
  taosArrayClear(pMnode->pSteps);
8✔
686
  taosArrayDestroy(pMnode->pSteps);
8✔
687
  pMnode->pSteps = NULL;
8✔
688
}
689

690
static int32_t mndExecSteps(SMnode *pMnode) {
8✔
691
  int32_t code = 0;
8✔
692
  int32_t size = taosArrayGetSize(pMnode->pSteps);
8✔
693
  for (int32_t pos = 0; pos < size; pos++) {
296✔
694
    SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
288✔
695
    if (pStep->initFp == NULL) continue;
288!
696

697
    if ((code = (*pStep->initFp)(pMnode)) != 0) {
288!
UNCOV
698
      mError("%s exec failed since %s, start to cleanup", pStep->name, tstrerror(code));
×
UNCOV
699
      mndCleanupSteps(pMnode, pos);
×
UNCOV
700
      TAOS_RETURN(code);
×
701
    } else {
702
      mInfo("%s is initialized", pStep->name);
288!
703
      tmsgReportStartup(pStep->name, "initialized");
288✔
704
    }
705
  }
706

707
  pMnode->clusterId = mndGetClusterId(pMnode);
8✔
708
  TAOS_RETURN(0);
8✔
709
}
710

711
static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
8✔
712
  pMnode->msgCb = pOption->msgCb;
8✔
713
  pMnode->selfDnodeId = pOption->dnodeId;
8✔
714
  pMnode->syncMgmt.selfIndex = pOption->selfIndex;
8✔
715
  pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
8✔
716
  pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
8✔
717
  pMnode->syncMgmt.lastIndex = pOption->lastIndex;
8✔
718
  (void)memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
8✔
719
  (void)memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
8✔
720
}
8✔
721

722
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
8✔
723
  terrno = 0;
8✔
724
  mInfo("start to open mnode in %s", path);
8!
725

726
  SMnode *pMnode = taosMemoryCalloc(1, sizeof(SMnode));
8!
727
  if (pMnode == NULL) {
8!
UNCOV
728
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
729
    mError("failed to open mnode since %s", terrstr());
×
730
    return NULL;
×
731
  }
732
  (void)memset(pMnode, 0, sizeof(SMnode));
8✔
733

734
  int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL);
8✔
735
  if (code != 0) {
8!
UNCOV
736
    taosMemoryFree(pMnode);
×
737
    mError("failed to open mnode lock since %s", tstrerror(code));
×
738
    return NULL;
×
739
  }
740

741
  char timestr[24] = "1970-01-01 00:00:00.00";
8✔
742
  code = taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, NULL);
8✔
743
  if (code < 0) {
8!
UNCOV
744
    mError("failed to parse time since %s", tstrerror(code));
×
UNCOV
745
    (void)taosThreadRwlockDestroy(&pMnode->lock);
×
746
    taosMemoryFree(pMnode);
×
747
    return NULL;
×
748
  }
749
  mndSetOptions(pMnode, pOption);
8✔
750

751
  pMnode->deploy = pOption->deploy;
8✔
752
  pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
8✔
753
  if (pMnode->pSteps == NULL) {
8!
UNCOV
754
    taosMemoryFree(pMnode);
×
755
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
756
    mError("failed to open mnode since %s", terrstr());
×
757
    return NULL;
×
758
  }
759

760
  code = mndCreateDir(pMnode, path);
8✔
761
  if (code != 0) {
8!
UNCOV
762
    code = terrno;
×
UNCOV
763
    mError("failed to open mnode since %s", tstrerror(code));
×
UNCOV
764
    mndClose(pMnode);
×
UNCOV
765
    terrno = code;
×
UNCOV
766
    return NULL;
×
767
  }
768

769
  code = mndInitSteps(pMnode);
8✔
770
  if (code != 0) {
8!
UNCOV
771
    code = terrno;
×
UNCOV
772
    mError("failed to open mnode since %s", tstrerror(code));
×
UNCOV
773
    mndClose(pMnode);
×
UNCOV
774
    terrno = code;
×
UNCOV
775
    return NULL;
×
776
  }
777

778
  code = mndExecSteps(pMnode);
8✔
779
  if (code != 0) {
8!
UNCOV
780
    code = terrno;
×
UNCOV
781
    mError("failed to open mnode since %s", tstrerror(code));
×
UNCOV
782
    mndClose(pMnode);
×
UNCOV
783
    terrno = code;
×
UNCOV
784
    return NULL;
×
785
  }
786

787
  mInfo("mnode open successfully");
8!
788
  return pMnode;
8✔
789
}
790

791
void mndPreClose(SMnode *pMnode) {
8✔
792
  if (pMnode != NULL) {
8!
793
    int32_t code = 0;
8✔
794
    // TODO check return value
795
    code = syncLeaderTransfer(pMnode->syncMgmt.sync);
8✔
796
    if (code < 0) {
8!
797
      mError("failed to transfer leader since %s", tstrerror(code));
×
798
    }
799
    syncPreStop(pMnode->syncMgmt.sync);
8✔
800
    code = sdbWriteFile(pMnode->pSdb, 0);
8✔
801
    if (code < 0) {
8!
UNCOV
802
      mError("failed to write sdb since %s", tstrerror(code));
×
803
    }
804
  }
805
}
8✔
806

807
void mndClose(SMnode *pMnode) {
8✔
808
  if (pMnode != NULL) {
8!
809
    mInfo("start to close mnode");
8!
810
    mndCleanupSteps(pMnode, -1);
8✔
811
    taosMemoryFreeClear(pMnode->path);
8!
812
    taosMemoryFreeClear(pMnode);
8!
813
    mInfo("mnode is closed");
8!
814
  }
815
}
8✔
816

817
int32_t mndStart(SMnode *pMnode) {
8✔
818
  mndSyncStart(pMnode);
8✔
819
  if (pMnode->deploy) {
8!
820
    if (sdbDeploy(pMnode->pSdb) != 0) {
8!
UNCOV
821
      mError("failed to deploy sdb while start mnode");
×
UNCOV
822
      return -1;
×
823
    }
824
    mndSetRestored(pMnode, true);
8✔
825
  }
826
  grantReset(pMnode, TSDB_GRANT_ALL, 0);
8✔
827

828
  return mndInitTimer(pMnode);
8✔
829
}
830

UNCOV
831
int32_t mndIsCatchUp(SMnode *pMnode) {
×
UNCOV
832
  int64_t rid = pMnode->syncMgmt.sync;
×
UNCOV
833
  return syncIsCatchUp(rid);
×
834
}
835

UNCOV
836
ESyncRole mndGetRole(SMnode *pMnode) {
×
UNCOV
837
  int64_t rid = pMnode->syncMgmt.sync;
×
UNCOV
838
  return syncGetRole(rid);
×
839
}
840

841
int64_t mndGetTerm(SMnode *pMnode) {
4✔
842
  int64_t rid = pMnode->syncMgmt.sync;
4✔
843
  return syncGetTerm(rid);
4✔
844
}
845

846
int32_t mndGetArbToken(SMnode *pMnode, char *outToken) { return syncGetArbToken(pMnode->syncMgmt.sync, outToken); }
10✔
847

848
void mndStop(SMnode *pMnode) {
8✔
849
  mndSetStop(pMnode);
8✔
850
  mndSyncStop(pMnode);
8✔
851
  mndCleanupTimer(pMnode);
8✔
852
}
8✔
853

854
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
216✔
855
  SMnode    *pMnode = pMsg->info.node;
216✔
856
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
216✔
857

858
  const STraceId *trace = &pMsg->info.traceId;
216✔
859
  mGTrace("vgId:1, sync msg:%p will be processed, type:%s", pMsg, TMSG_INFO(pMsg->msgType));
216!
860

861
  int32_t code = syncProcessMsg(pMgmt->sync, pMsg);
216✔
862
  if (code != 0) {
216!
UNCOV
863
    mGError("vgId:1, failed to process sync msg:%p type:%s, reason: %s, code:0x%x", pMsg, TMSG_INFO(pMsg->msgType),
×
864
            tstrerror(code), code);
865
  }
866

867
  return code;
216✔
868
}
869

870
static int32_t mndCheckMnodeState(SRpcMsg *pMsg) {
295✔
871
  int32_t code = 0;
295✔
872
  if (!IsReq(pMsg)) TAOS_RETURN(code);
295✔
873
  if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY ||
205!
874
      pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT ||
205!
875
      pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK ||
205!
876
      pMsg->msgType == TDMT_SCH_TASK_NOTIFY) {
205!
UNCOV
877
    TAOS_RETURN(code);
×
878
  }
879

880
  SMnode *pMnode = pMsg->info.node;
205✔
881
  (void)taosThreadRwlockRdlock(&pMnode->lock);
205✔
882
  if (pMnode->stopped) {
205!
UNCOV
883
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
UNCOV
884
    code = TSDB_CODE_APP_IS_STOPPING;
×
UNCOV
885
    TAOS_RETURN(code);
×
886
  }
887

888
  terrno = 0;
205✔
889
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
205✔
890
  if (terrno != 0) {
205!
UNCOV
891
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
UNCOV
892
    code = terrno;
×
UNCOV
893
    TAOS_RETURN(code);
×
894
  }
895

896
  if (state.state != TAOS_SYNC_STATE_LEADER) {
205!
UNCOV
897
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
UNCOV
898
    code = TSDB_CODE_SYN_NOT_LEADER;
×
UNCOV
899
    goto _OVER;
×
900
  }
901

902
  if (!state.restored || !pMnode->restored) {
205!
UNCOV
903
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
UNCOV
904
    code = TSDB_CODE_SYN_RESTORING;
×
UNCOV
905
    goto _OVER;
×
906
  }
907

908
#if 1
909
  (void)atomic_add_fetch_32(&pMnode->rpcRef, 1);
205✔
910
#else
911
  int32_t ref = atomic_add_fetch_32(&pMnode->rpcRef, 1);
912
  mTrace("mnode rpc is acquired, ref:%d", ref);
913
#endif
914

915
  (void)taosThreadRwlockUnlock(&pMnode->lock);
205✔
916
  TAOS_RETURN(code);
205✔
917

UNCOV
918
_OVER:
×
UNCOV
919
  if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
×
UNCOV
920
      pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
×
UNCOV
921
      pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
×
UNCOV
922
      pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
×
UNCOV
923
      pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT ||
×
UNCOV
924
      pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER ||
×
UNCOV
925
      pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER || pMsg->msgType == TDMT_MND_CHECK_STREAM_TIMER) {
×
926
    mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
×
927
           pMnode->stopped, state.restored, syncStr(state.state));
UNCOV
928
    TAOS_RETURN(code);
×
929
  }
930

UNCOV
931
  const STraceId *trace = &pMsg->info.traceId;
×
UNCOV
932
  SEpSet          epSet = {0};
×
UNCOV
933
  mndGetMnodeEpSet(pMnode, &epSet);
×
934

UNCOV
935
  mGDebug(
×
936
      "msg:%p, type:%s failed to process since %s, mnode restored:%d stopped:%d, sync restored:%d "
937
      "role:%s, redirect numOfEps:%d inUse:%d, type:%s",
938
      pMsg, TMSG_INFO(pMsg->msgType), tstrerror(code), pMnode->restored, pMnode->stopped, state.restored,
939
      syncStr(state.state), epSet.numOfEps, epSet.inUse, TMSG_INFO(pMsg->msgType));
940

UNCOV
941
  if (epSet.numOfEps <= 0) return -1;
×
942

UNCOV
943
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
×
UNCOV
944
    mDebug("mnode index:%d, ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
×
945
  }
946

947
  int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet);
×
UNCOV
948
  pMsg->info.rsp = rpcMallocCont(contLen);
×
UNCOV
949
  if (pMsg->info.rsp != NULL) {
×
UNCOV
950
    if (tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet) < 0) {
×
UNCOV
951
      mError("failed to serialize ep set");
×
952
    }
UNCOV
953
    pMsg->info.hasEpSet = 1;
×
UNCOV
954
    pMsg->info.rspLen = contLen;
×
955
  }
956

UNCOV
957
  TAOS_RETURN(code);
×
958
}
959

960
int32_t mndProcessRpcMsg(SRpcMsg *pMsg, SQueueInfo *pQueueInfo) {
295✔
961
  SMnode         *pMnode = pMsg->info.node;
295✔
962
  const STraceId *trace = &pMsg->info.traceId;
295✔
963
  int32_t         code = TSDB_CODE_SUCCESS;
295✔
964

965
  MndMsgFp    fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)];
295✔
966
  MndMsgFpExt fpExt = NULL;
295✔
967
  if (fp == NULL) {
295!
UNCOV
968
    fpExt = pMnode->msgFpExt[TMSG_INDEX(pMsg->msgType)];
×
UNCOV
969
    if (fpExt == NULL) {
×
UNCOV
970
      mGError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
×
UNCOV
971
      code = TSDB_CODE_MSG_NOT_PROCESSED;
×
UNCOV
972
      TAOS_RETURN(code);
×
973
    }
974
  }
975

976
  TAOS_CHECK_RETURN(mndCheckMnodeState(pMsg));
295!
977

978
  mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType));
295!
979
  if (fp)
295!
980
    code = (*fp)(pMsg);
295✔
981
  else
UNCOV
982
    code = (*fpExt)(pMsg, pQueueInfo);
×
983
  mndReleaseRpc(pMnode);
295✔
984

985
  if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
295✔
986
    mGTrace("msg:%p, won't response immediately since in progress", pMsg);
41!
987
  } else if (code == 0) {
254✔
988
    mGTrace("msg:%p, successfully processed", pMsg);
206!
989
  } else {
990
    // TODO removve this wrong set code
991
    if (code == -1) {
48!
UNCOV
992
      code = terrno;
×
993
    }
994
    mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
48!
995
            TMSG_INFO(pMsg->msgType));
996
  }
997

998
  TAOS_RETURN(code);
295✔
999
}
1000

1001
void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp) {
1,432✔
1002
  tmsg_t type = TMSG_INDEX(msgType);
1,432✔
1003
  if (type < TDMT_MAX) {
1,432!
1004
    pMnode->msgFp[type] = fp;
1,432✔
1005
  }
1006
}
1,432✔
1007

1008
void mndSetMsgHandleExt(SMnode *pMnode, tmsg_t msgType, MndMsgFpExt fp) {
64✔
1009
  tmsg_t type = TMSG_INDEX(msgType);
64✔
1010
  if (type < TDMT_MAX) {
64!
1011
    pMnode->msgFpExt[type] = fp;
64✔
1012
  }
1013
}
64✔
1014

1015
// Note: uid 0 is reserved
1016
int64_t mndGenerateUid(const char *name, int32_t len) {
38✔
1017
  int32_t hashval = MurmurHash3_32(name, len);
38✔
UNCOV
1018
  do {
×
1019
    int64_t us = taosGetTimestampUs();
38✔
1020
    int64_t x = (us & 0x000000FFFFFFFFFF) << 24;
38✔
1021
    int64_t uuid = x + ((hashval & ((1ul << 16) - 1ul)) << 8) + (taosRand() & ((1ul << 8) - 1ul));
38✔
1022
    if (uuid) {
38!
1023
      return llabs(uuid);
38✔
1024
    }
1025
  } while (true);
1026
}
1027

UNCOV
1028
int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo,
×
1029
                          SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) {
UNCOV
1030
  int32_t code = mndAcquireRpc(pMnode);
×
UNCOV
1031
  if (code < 0) {
×
UNCOV
1032
    TAOS_RETURN(code);
×
UNCOV
1033
  } else if (code == 1) {
×
UNCOV
1034
    TAOS_RETURN(TSDB_CODE_SUCCESS);
×
1035
  }
1036

UNCOV
1037
  SSdb   *pSdb = pMnode->pSdb;
×
UNCOV
1038
  int64_t ms = taosGetTimestampMs();
×
1039

UNCOV
1040
  pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc));
×
UNCOV
1041
  pClusterInfo->mnodes = taosArrayInit(sdbGetSize(pSdb, SDB_MNODE), sizeof(SMonMnodeDesc));
×
UNCOV
1042
  pVgroupInfo->vgroups = taosArrayInit(sdbGetSize(pSdb, SDB_VGROUP), sizeof(SMonVgroupDesc));
×
UNCOV
1043
  pStbInfo->stbs = taosArrayInit(sdbGetSize(pSdb, SDB_STB), sizeof(SMonStbDesc));
×
UNCOV
1044
  if (pClusterInfo->dnodes == NULL || pClusterInfo->mnodes == NULL || pVgroupInfo->vgroups == NULL ||
×
UNCOV
1045
      pStbInfo->stbs == NULL) {
×
UNCOV
1046
    mndReleaseRpc(pMnode);
×
UNCOV
1047
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
UNCOV
1048
    if (terrno != 0) code = terrno;
×
UNCOV
1049
    TAOS_RETURN(code);
×
1050
  }
1051

1052
  // cluster info
UNCOV
1053
  tstrncpy(pClusterInfo->version, td_version, sizeof(pClusterInfo->version));
×
UNCOV
1054
  pClusterInfo->monitor_interval = tsMonitorInterval;
×
UNCOV
1055
  pClusterInfo->connections_total = mndGetNumOfConnections(pMnode);
×
UNCOV
1056
  pClusterInfo->dbs_total = sdbGetSize(pSdb, SDB_DB);
×
UNCOV
1057
  pClusterInfo->stbs_total = sdbGetSize(pSdb, SDB_STB);
×
UNCOV
1058
  pClusterInfo->topics_toal = sdbGetSize(pSdb, SDB_TOPIC);
×
UNCOV
1059
  pClusterInfo->streams_total = sdbGetSize(pSdb, SDB_STREAM);
×
1060

UNCOV
1061
  void *pIter = NULL;
×
UNCOV
1062
  while (1) {
×
UNCOV
1063
    SDnodeObj *pObj = NULL;
×
UNCOV
1064
    pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pObj);
×
UNCOV
1065
    if (pIter == NULL) break;
×
1066

UNCOV
1067
    SMonDnodeDesc desc = {0};
×
UNCOV
1068
    desc.dnode_id = pObj->id;
×
UNCOV
1069
    tstrncpy(desc.dnode_ep, pObj->ep, sizeof(desc.dnode_ep));
×
UNCOV
1070
    if (mndIsDnodeOnline(pObj, ms)) {
×
UNCOV
1071
      tstrncpy(desc.status, "ready", sizeof(desc.status));
×
1072
    } else {
UNCOV
1073
      tstrncpy(desc.status, "offline", sizeof(desc.status));
×
1074
    }
1075
    if (taosArrayPush(pClusterInfo->dnodes, &desc) == NULL) {
×
1076
      mError("failed put dnode into array, but continue at this monitor report")
×
1077
    }
UNCOV
1078
    sdbRelease(pSdb, pObj);
×
1079
  }
1080

UNCOV
1081
  pIter = NULL;
×
UNCOV
1082
  while (1) {
×
UNCOV
1083
    SMnodeObj *pObj = NULL;
×
UNCOV
1084
    pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pObj);
×
UNCOV
1085
    if (pIter == NULL) break;
×
1086

UNCOV
1087
    SMonMnodeDesc desc = {0};
×
UNCOV
1088
    desc.mnode_id = pObj->id;
×
UNCOV
1089
    tstrncpy(desc.mnode_ep, pObj->pDnode->ep, sizeof(desc.mnode_ep));
×
1090

UNCOV
1091
    if (pObj->id == pMnode->selfDnodeId) {
×
UNCOV
1092
      pClusterInfo->first_ep_dnode_id = pObj->id;
×
UNCOV
1093
      tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep));
×
1094
      // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f;
UNCOV
1095
      pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode);
×
1096
      // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f);
UNCOV
1097
      tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role));
×
UNCOV
1098
      desc.syncState = TAOS_SYNC_STATE_LEADER;
×
1099
    } else {
1100
      tstrncpy(desc.role, syncStr(pObj->syncState), sizeof(desc.role));
×
1101
      desc.syncState = pObj->syncState;
×
1102
    }
UNCOV
1103
    if (taosArrayPush(pClusterInfo->mnodes, &desc) == NULL) {
×
UNCOV
1104
      mError("failed to put mnode into array, but continue at this monitor report");
×
1105
    }
UNCOV
1106
    sdbRelease(pSdb, pObj);
×
1107
  }
1108

1109
  // vgroup info
UNCOV
1110
  pIter = NULL;
×
UNCOV
1111
  while (1) {
×
UNCOV
1112
    SVgObj *pVgroup = NULL;
×
UNCOV
1113
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
×
UNCOV
1114
    if (pIter == NULL) break;
×
1115

UNCOV
1116
    pClusterInfo->vgroups_total++;
×
UNCOV
1117
    pClusterInfo->tbs_total += pVgroup->numOfTables;
×
1118

UNCOV
1119
    SMonVgroupDesc desc = {0};
×
UNCOV
1120
    desc.vgroup_id = pVgroup->vgId;
×
1121

UNCOV
1122
    SName name = {0};
×
UNCOV
1123
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
×
UNCOV
1124
    if (code < 0) {
×
UNCOV
1125
      mError("failed to get db name since %s", tstrerror(code));
×
1126
      sdbRelease(pSdb, pVgroup);
×
UNCOV
1127
      TAOS_RETURN(code);
×
1128
    }
UNCOV
1129
    (void)tNameGetDbName(&name, desc.database_name);
×
1130

UNCOV
1131
    desc.tables_num = pVgroup->numOfTables;
×
UNCOV
1132
    pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries;
×
UNCOV
1133
    tstrncpy(desc.status, "unsynced", sizeof(desc.status));
×
UNCOV
1134
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
×
UNCOV
1135
      SVnodeGid     *pVgid = &pVgroup->vnodeGid[i];
×
UNCOV
1136
      SMonVnodeDesc *pVnDesc = &desc.vnodes[i];
×
UNCOV
1137
      pVnDesc->dnode_id = pVgid->dnodeId;
×
UNCOV
1138
      tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->syncState), sizeof(pVnDesc->vnode_role));
×
UNCOV
1139
      pVnDesc->syncState = pVgid->syncState;
×
UNCOV
1140
      if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
×
UNCOV
1141
        tstrncpy(desc.status, "ready", sizeof(desc.status));
×
UNCOV
1142
        pClusterInfo->vgroups_alive++;
×
1143
      }
1144
      if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
×
1145
        pClusterInfo->vnodes_alive++;
×
1146
      }
UNCOV
1147
      pClusterInfo->vnodes_total++;
×
1148
    }
1149

UNCOV
1150
    if (taosArrayPush(pVgroupInfo->vgroups, &desc) == NULL) {
×
UNCOV
1151
      mError("failed to put vgroup into array, but continue at this monitor report")
×
1152
    }
1153
    sdbRelease(pSdb, pVgroup);
×
1154
  }
1155

1156
  // stb info
UNCOV
1157
  pIter = NULL;
×
UNCOV
1158
  while (1) {
×
1159
    SStbObj *pStb = NULL;
×
UNCOV
1160
    pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
×
UNCOV
1161
    if (pIter == NULL) break;
×
1162

UNCOV
1163
    SMonStbDesc desc = {0};
×
1164

UNCOV
1165
    SName name1 = {0};
×
UNCOV
1166
    code = tNameFromString(&name1, pStb->db, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
×
UNCOV
1167
    if (code < 0) {
×
1168
      mError("failed to get db name since %s", tstrerror(code));
×
1169
      sdbRelease(pSdb, pStb);
×
UNCOV
1170
      TAOS_RETURN(code);
×
1171
    }
UNCOV
1172
    (void)tNameGetDbName(&name1, desc.database_name);
×
1173

UNCOV
1174
    SName name2 = {0};
×
UNCOV
1175
    code = tNameFromString(&name2, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
×
UNCOV
1176
    if (code < 0) {
×
UNCOV
1177
      mError("failed to get table name since %s", tstrerror(code));
×
UNCOV
1178
      sdbRelease(pSdb, pStb);
×
UNCOV
1179
      TAOS_RETURN(code);
×
1180
    }
UNCOV
1181
    tstrncpy(desc.stb_name, tNameGetTableName(&name2), TSDB_TABLE_NAME_LEN);
×
1182

UNCOV
1183
    if (taosArrayPush(pStbInfo->stbs, &desc) == NULL) {
×
UNCOV
1184
      mError("failed to put stb into array, but continue at this monitor report");
×
1185
    }
UNCOV
1186
    sdbRelease(pSdb, pStb);
×
1187
  }
1188

1189
  // grant info
UNCOV
1190
  pGrantInfo->expire_time = (pMnode->grant.expireTimeMS - ms) / 1000;
×
UNCOV
1191
  pGrantInfo->timeseries_total = pMnode->grant.timeseriesAllowed;
×
UNCOV
1192
  if (pMnode->grant.expireTimeMS == 0) {
×
UNCOV
1193
    pGrantInfo->expire_time = 0;
×
UNCOV
1194
    pGrantInfo->timeseries_total = 0;
×
1195
  }
1196

UNCOV
1197
  mndReleaseRpc(pMnode);
×
UNCOV
1198
  TAOS_RETURN(code);
×
1199
}
1200

1201
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
20✔
1202
  mTrace("mnode get load");
20!
1203
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
20✔
1204
  pLoad->syncState = state.state;
20✔
1205
  pLoad->syncRestore = state.restored;
20✔
1206
  pLoad->syncTerm = state.term;
20✔
1207
  pLoad->roleTimeMs = state.roleTimeMs;
20✔
1208
  mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64,
20!
1209
         syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs);
1210
  return 0;
20✔
1211
}
1212

1213
int64_t mndGetRoleTimeMs(SMnode *pMnode) {
4✔
1214
  SSyncState state = syncGetState(pMnode->syncMgmt.sync);
4✔
1215
  return state.roleTimeMs;
4✔
1216
}
1217

1218
void mndSetRestored(SMnode *pMnode, bool restored) {
8✔
1219
  if (restored) {
8!
1220
    (void)taosThreadRwlockWrlock(&pMnode->lock);
8✔
1221
    pMnode->restored = true;
8✔
1222
    (void)taosThreadRwlockUnlock(&pMnode->lock);
8✔
1223
    mInfo("mnode set restored:%d", restored);
8!
1224
  } else {
UNCOV
1225
    (void)taosThreadRwlockWrlock(&pMnode->lock);
×
UNCOV
1226
    pMnode->restored = false;
×
UNCOV
1227
    (void)taosThreadRwlockUnlock(&pMnode->lock);
×
UNCOV
1228
    mInfo("mnode set restored:%d", restored);
×
1229
    while (1) {
UNCOV
1230
      if (pMnode->rpcRef <= 0) break;
×
UNCOV
1231
      taosMsleep(3);
×
1232
    }
1233
  }
1234
}
8✔
1235

UNCOV
1236
bool mndGetRestored(SMnode *pMnode) { return pMnode->restored; }
×
1237

1238
void mndSetStop(SMnode *pMnode) {
8✔
1239
  (void)taosThreadRwlockWrlock(&pMnode->lock);
8✔
1240
  pMnode->stopped = true;
8✔
1241
  (void)taosThreadRwlockUnlock(&pMnode->lock);
8✔
1242
  mInfo("mnode set stopped");
8!
1243
}
8✔
1244

1245
bool mndGetStop(SMnode *pMnode) { return pMnode->stopped; }
249✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc