• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3658

14 Mar 2025 08:10AM UTC coverage: 63.25% (+0.4%) from 62.877%
#3658

push

travis-ci

web-flow
feat(keep): support keep on super table level. (#30097)

* Feat: support use keep while create super table.

* Test(keep): add test for create super table with keep option.

* Feat(keep): Add tmsg for create keep.

* Feat(keep): support alter table option keep.

* Fix(keep): Add baisc test for alter table option.

* Fix(keep): memory leek.

* Feat(keep): add keep to metaEntry&metaCache and fix earliestTs with stn keep.

* Test(keep): add some cases for select with stb keep.

* Fix: fix ci core while alter stb.

* Feat(keep): delete expired data in super table level.

* Feat: remove get stb keep while query.

* Fix : build error.

* Revert "Fix : build error."

This reverts commit 0ed66e4e8.

* Revert "Feat(keep): delete expired data in super table level."

This reverts commit 36330f6b4.

* Fix : build errors.

* Feat : support restart taosd.

* Fix : alter table comment problems.

* Test : add tests for super table keep.

* Fix: change sdb stb reserve size.

* Test: add more tests.

* Feat: Disable normal tables and sub tables from setting the keep parameter

* Fix: add more checks to avoid unknown address.

* Docs: Add docs for stable keep.

* Fix: some review changes.

* Fix: review errors.

148878 of 302527 branches covered (49.21%)

Branch coverage included in aggregate %.

88 of 99 new or added lines in 12 files covered. (88.89%)

3290 existing lines in 68 files now uncovered.

234027 of 302857 relevant lines covered (77.27%)

17847433.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.14
/source/dnode/mnode/impl/src/mndStreamUtil.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#include "mndDb.h"
17
#include "mndStb.h"
18
#include "mndStream.h"
19
#include "mndTrans.h"
20
#include "mndVgroup.h"
21
#include "taoserror.h"
22
#include "tmisce.h"
23

24
struct SStreamTaskIter {
25
  SStreamObj  *pStream;
26
  int32_t      level;
27
  int32_t      ordinalIndex;
28
  int32_t      totalLevel;
29
  SStreamTask *pTask;
30
};
31

32
int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId);
33

34
int32_t createStreamTaskIter(SStreamObj *pStream, SStreamTaskIter **pIter) {
109,031✔
35
  *pIter = taosMemoryCalloc(1, sizeof(SStreamTaskIter));
109,031!
36
  if (*pIter == NULL) {
109,032!
37
    return terrno;
×
38
  }
39

40
  (*pIter)->level = -1;
109,032✔
41
  (*pIter)->ordinalIndex = 0;
109,032✔
42
  (*pIter)->pStream = pStream;
109,032✔
43
  (*pIter)->totalLevel = taosArrayGetSize(pStream->tasks);
109,032✔
44
  (*pIter)->pTask = NULL;
109,022✔
45

46
  return 0;
109,022✔
47
}
48

49
bool streamTaskIterNextTask(SStreamTaskIter *pIter) {
495,850✔
50
  if (pIter->level >= pIter->totalLevel) {
495,850!
51
    pIter->pTask = NULL;
×
52
    return false;
×
53
  }
54

55
  if (pIter->level == -1) {
495,850✔
56
    pIter->level += 1;
109,021✔
57
  }
58

59
  while (pIter->level < pIter->totalLevel) {
710,053✔
60
    SArray *pList = taosArrayGetP(pIter->pStream->tasks, pIter->level);
600,634✔
61
    if (pIter->ordinalIndex >= taosArrayGetSize(pList)) {
600,141✔
62
      pIter->level += 1;
214,203✔
63
      pIter->ordinalIndex = 0;
214,203✔
64
      pIter->pTask = NULL;
214,203✔
65
      continue;
214,203✔
66
    }
67

68
    pIter->pTask = taosArrayGetP(pList, pIter->ordinalIndex);
386,613✔
69
    pIter->ordinalIndex += 1;
386,509✔
70
    return true;
386,509✔
71
  }
72

73
  pIter->pTask = NULL;
109,419✔
74
  return false;
109,419✔
75
}
76

77
int32_t streamTaskIterGetCurrent(SStreamTaskIter *pIter, SStreamTask **pTask) {
386,606✔
78
  if (pTask) {
386,606!
79
    *pTask = pIter->pTask;
386,622✔
80
    if (*pTask != NULL) {
386,622✔
81
      return TSDB_CODE_SUCCESS;
386,619✔
82
    }
83
  }
84

85
  return TSDB_CODE_INVALID_PARA;
×
86
}
87

88
void destroyStreamTaskIter(SStreamTaskIter *pIter) { taosMemoryFree(pIter); }
108,847!
89

90
static bool checkStatusForEachReplica(SVgObj *pVgroup) {
253,875✔
91
  for (int32_t i = 0; i < pVgroup->replica; ++i) {
512,098✔
92
    if (!pVgroup->vnodeGid[i].syncRestore) {
259,406✔
93
      mInfo("vgId:%d not restored, not ready for checkpoint or other operations", pVgroup->vgId);
1,176!
94
      return false;
1,176✔
95
    }
96

97
    ESyncState state = pVgroup->vnodeGid[i].syncState;
258,230✔
98
    if (state == TAOS_SYNC_STATE_OFFLINE || state == TAOS_SYNC_STATE_ERROR || state == TAOS_SYNC_STATE_LEARNER ||
258,230!
99
        state == TAOS_SYNC_STATE_CANDIDATE) {
100
      mInfo("vgId:%d state:%d , not ready for checkpoint or other operations, not check other vgroups", pVgroup->vgId,
7!
101
            state);
102
      return false;
7✔
103
    }
104
  }
105

106
  return true;
252,692✔
107
}
108

109
static int32_t mndAddSnodeInfo(SMnode *pMnode, SArray *pVgroupList) {
14,834✔
110
  SSnodeObj *pObj = NULL;
14,834✔
111
  void      *pIter = NULL;
14,834✔
112
  int32_t    code = 0;
14,834✔
113

114
  while (1) {
6,453✔
115
    pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void **)&pObj);
21,287✔
116
    if (pIter == NULL) {
21,287✔
117
      break;
14,834✔
118
    }
119

120
    SNodeEntry entry = {.nodeId = SNODE_HANDLE};
6,453✔
121
    code = addEpIntoEpSet(&entry.epset, pObj->pDnode->fqdn, pObj->pDnode->port);
6,453✔
122
    if (code) {
6,453!
123
      sdbRelease(pMnode->pSdb, pObj);
×
124
      sdbCancelFetch(pMnode->pSdb, pIter);
×
125
      mError("failed to extract epset for fqdn:%s during task vgroup snapshot", pObj->pDnode->fqdn);
×
126
      return code;
×
127
    }
128

129
    char buf[256] = {0};
6,453✔
130
    code = epsetToStr(&entry.epset, buf, tListLen(buf));
6,453✔
131
    if (code != 0) {  // print error and continue
6,453!
132
      mError("failed to convert epset to str, code:%s", tstrerror(code));
×
133
    }
134

135
    void *p = taosArrayPush(pVgroupList, &entry);
6,453✔
136
    if (p == NULL) {
6,453!
137
      code = terrno;
×
138
      sdbRelease(pMnode->pSdb, pObj);
×
139
      sdbCancelFetch(pMnode->pSdb, pIter);
×
140
      mError("failed to put entry in vgroup list, nodeId:%d code:%s", entry.nodeId, tstrerror(code));
×
141
      return code;
×
142
    } else {
143
      mDebug("take snode snapshot, nodeId:%d %s", entry.nodeId, buf);
6,453✔
144
    }
145

146
    sdbRelease(pMnode->pSdb, pObj);
6,453✔
147
  }
148

149
  return code;
14,834✔
150
}
151

152
static int32_t mndCheckMnodeStatus(SMnode* pMnode) {
14,834✔
153
  int32_t    code = 0;
14,834✔
154
  ESdbStatus objStatus;
155
  void      *pIter = NULL;
14,834✔
156
  SMnodeObj *pObj = NULL;
14,834✔
157

158
  while (1) {
159
    pIter = sdbFetchAll(pMnode->pSdb, SDB_MNODE, pIter, (void **)&pObj, &objStatus, true);
30,211✔
160
    if (pIter == NULL) {
30,211✔
161
      break;
14,714✔
162
    }
163

164
    if (pObj->syncState != TAOS_SYNC_STATE_LEADER && pObj->syncState != TAOS_SYNC_STATE_FOLLOWER) {
15,497✔
165
      mDebug("mnode sync state:%d not leader/follower", pObj->syncState);
119!
166
      sdbRelease(pMnode->pSdb, pObj);
119✔
167
      sdbCancelFetch(pMnode->pSdb, pIter);
119✔
168
      return TSDB_CODE_FAILED;
119✔
169
    }
170

171
    if (objStatus != SDB_STATUS_READY) {
15,378✔
172
      mWarn("mnode status:%d not ready", objStatus);
1!
173
      sdbRelease(pMnode->pSdb, pObj);
1✔
174
      sdbCancelFetch(pMnode->pSdb, pIter);
1✔
175
      return TSDB_CODE_FAILED;
1✔
176
    }
177

178
    sdbRelease(pMnode->pSdb, pObj);
15,377✔
179
  }
180

181
  return TSDB_CODE_SUCCESS;
14,714✔
182
}
183

184
static int32_t mndCheckAndAddVgroupsInfo(SMnode *pMnode, SArray *pVgroupList, bool* allReady) {
14,834✔
185
  SSdb     *pSdb = pMnode->pSdb;
14,834✔
186
  void     *pIter = NULL;
14,834✔
187
  SVgObj   *pVgroup = NULL;
14,834✔
188
  int32_t   code = 0;
14,834✔
189
  SHashObj *pHash = NULL;
14,834✔
190

191
  pHash = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK);
14,834✔
192
  if (pHash == NULL) {
14,834!
193
    mError("failed to prepare hashmap during take vgroup snapshot, code:%s", tstrerror(terrno));
×
194
    return terrno;
×
195
  }
196

197
  while (1) {
256,679✔
198
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
271,513✔
199
    if (pIter == NULL) {
271,513✔
200
      break;
14,834✔
201
    }
202

203
    SNodeEntry entry = {.nodeId = pVgroup->vgId, .hbTimestamp = pVgroup->updateTime};
256,679✔
204
    entry.epset = mndGetVgroupEpset(pMnode, pVgroup);
256,679✔
205

206
    int8_t *pReplica = taosHashGet(pHash, &pVgroup->dbUid, sizeof(pVgroup->dbUid));
256,679✔
207
    if (pReplica == NULL) {  // not exist, add it into hash map
256,679✔
208
      code = taosHashPut(pHash, &pVgroup->dbUid, sizeof(pVgroup->dbUid), &pVgroup->replica, sizeof(pVgroup->replica));
127,688✔
209
      if (code) {
127,688!
210
        mError("failed to put info into hashmap during task vgroup snapshot, code:%s", tstrerror(code));
×
211
        sdbRelease(pSdb, pVgroup);
×
212
        sdbCancelFetch(pSdb, pIter);
×
213
        goto _end;  // take snapshot failed, and not all ready
×
214
      }
215
    } else {
216
      if (*pReplica != pVgroup->replica) {
128,991✔
217
        mInfo("vgId:%d replica:%d inconsistent with other vgroups replica:%d, not ready for stream operations",
408!
218
              pVgroup->vgId, pVgroup->replica, *pReplica);
219
        *allReady = false;  // task snap success, but not all ready
408✔
220
      }
221
    }
222

223
    // if not all ready till now, no need to check the remaining vgroups,
224
    // but still we need to put the info of the existed vgroups into the snapshot list
225
    if (*allReady) {
256,679✔
226
      *allReady = checkStatusForEachReplica(pVgroup);
253,875✔
227
    }
228

229
    char buf[256] = {0};
256,679✔
230
    code = epsetToStr(&entry.epset, buf, tListLen(buf));
256,679✔
231
    if (code != 0) {  // print error and continue
256,679!
232
      mError("failed to convert epset to str, code:%s", tstrerror(code));
×
233
    }
234

235
    void *p = taosArrayPush(pVgroupList, &entry);
256,679✔
236
    if (p == NULL) {
256,679!
237
      mError("failed to put entry in vgroup list, nodeId:%d code:out of memory", entry.nodeId);
×
238
      code = terrno;
×
239
      sdbRelease(pSdb, pVgroup);
×
240
      sdbCancelFetch(pSdb, pIter);
×
241
      goto _end;
×
242
    } else {
243
      mDebug("take node snapshot, nodeId:%d %s", entry.nodeId, buf);
256,679✔
244
    }
245

246
    sdbRelease(pSdb, pVgroup);
256,679✔
247
  }
248

249
_end:
14,834✔
250
  taosHashCleanup(pHash);
14,834✔
251
  return code;
14,834✔
252
}
253

254
int32_t mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady, SArray **pList) {
14,834✔
255
  int32_t   code = 0;
14,834✔
256
  SArray   *pVgroupList = NULL;
14,834✔
257

258
  *pList = NULL;
14,834✔
259
  *allReady = true;
14,834✔
260

261
  pVgroupList = taosArrayInit(4, sizeof(SNodeEntry));
14,834✔
262
  if (pVgroupList == NULL) {
14,834!
263
    mError("failed to prepare arraylist during take vgroup snapshot, code:%s", tstrerror(terrno));
×
264
    code = terrno;
×
265
    goto _err;
×
266
  }
267

268
  // 1. check for all vnodes status
269
  code = mndCheckAndAddVgroupsInfo(pMnode, pVgroupList, allReady);
14,834✔
270
  if (code) {
14,834!
271
    goto _err;
×
272
  }
273

274
  // 2. add snode info
275
  code = mndAddSnodeInfo(pMnode, pVgroupList);
14,834✔
276
  if (code) {
14,834!
277
    goto _err;
×
278
  }
279

280
  // 3. check for mnode status
281
  code = mndCheckMnodeStatus(pMnode);
14,834✔
282
  if (code != TSDB_CODE_SUCCESS) {
14,834✔
283
    *allReady = false;
120✔
284
  }
285

286
  *pList = pVgroupList;
14,834✔
287
  return code;
14,834✔
288

289
_err:
×
290
  *allReady = false;
×
291
  taosArrayDestroy(pVgroupList);
×
292
  return code;
×
293
}
294

295
int32_t mndGetStreamObj(SMnode *pMnode, int64_t streamId, SStreamObj **pStream) {
12,503✔
296
  void *pIter = NULL;
12,503✔
297
  SSdb *pSdb = pMnode->pSdb;
12,503✔
298
  *pStream = NULL;
12,503✔
299

300
  SStreamObj *p = NULL;
12,503✔
301
  while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&p)) != NULL) {
21,331✔
302
    if (p->uid == streamId) {
21,325✔
303
      sdbCancelFetch(pSdb, pIter);
12,497✔
304
      *pStream = p;
12,497✔
305
      return TSDB_CODE_SUCCESS;
12,497✔
306
    }
307
    sdbRelease(pSdb, p);
8,828✔
308
  }
309

310
  return TSDB_CODE_STREAM_TASK_NOT_EXIST;
6✔
311
}
312

313
void mndKillTransImpl(SMnode *pMnode, int32_t transId, const char *pDbName) {
×
314
  STrans *pTrans = mndAcquireTrans(pMnode, transId);
×
315
  if (pTrans != NULL) {
×
316
    mInfo("kill active transId:%d in Db:%s", transId, pDbName);
×
317
    int32_t code = mndKillTrans(pMnode, pTrans);
×
318
    mndReleaseTrans(pMnode, pTrans);
×
319
    if (code) {
×
320
      mError("failed to kill transId:%d, code:%s", pTrans->id, tstrerror(code));
×
321
    }
322
  } else {
323
    mError("failed to acquire trans in Db:%s, transId:%d", pDbName, transId);
×
324
  }
325
}
×
326

327
int32_t extractNodeEpset(SMnode *pMnode, SEpSet *pEpSet, bool *hasEpset, int32_t taskId, int32_t nodeId) {
19,664✔
328
  *hasEpset = false;
19,664✔
329

330
  pEpSet->numOfEps = 0;
19,664✔
331
  if (nodeId == SNODE_HANDLE) {
19,664✔
332
    SSnodeObj *pObj = NULL;
294✔
333
    void      *pIter = NULL;
294✔
334

335
    pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void **)&pObj);
294✔
336
    if (pIter != NULL) {
294!
337
      int32_t code = addEpIntoEpSet(pEpSet, pObj->pDnode->fqdn, pObj->pDnode->port);
294✔
338
      sdbRelease(pMnode->pSdb, pObj);
294✔
339
      sdbCancelFetch(pMnode->pSdb, pIter);
294✔
340
      if (code) {
294!
341
        *hasEpset = false;
×
342
        mError("failed to set epset");
×
343
      } else {
344
        *hasEpset = true;
294✔
345
      }
346
      return code;
294✔
347
    } else {
348
      mError("failed to acquire snode epset");
×
349
      return TSDB_CODE_INVALID_PARA;
×
350
    }
351
  } else {
352
    SVgObj *pVgObj = mndAcquireVgroup(pMnode, nodeId);
19,370✔
353
    if (pVgObj != NULL) {
19,370✔
354
      SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj);
19,368✔
355
      mndReleaseVgroup(pMnode, pVgObj);
19,368✔
356

357
      epsetAssign(pEpSet, &epset);
19,368✔
358
      *hasEpset = true;
19,368✔
359
      return TSDB_CODE_SUCCESS;
19,368✔
360
    } else {
361
      mDebug("orphaned task:0x%x need to be dropped, nodeId:%d, no redo action", taskId, nodeId);
2✔
362
      return TSDB_CODE_SUCCESS;
2✔
363
    }
364
  }
365
}
366

367
int32_t mndGetStreamTask(STaskId *pId, SStreamObj *pStream, SStreamTask **pTask) {
191✔
368
  *pTask = NULL;
191✔
369

370
  SStreamTask     *p = NULL;
191✔
371
  SStreamTaskIter *pIter = NULL;
191✔
372
  int32_t          code = createStreamTaskIter(pStream, &pIter);
191✔
373
  if (code) {
191!
374
    mError("failed to create stream task iter:%s", pStream->name);
×
375
    return code;
×
376
  }
377

378
  while (streamTaskIterNextTask(pIter)) {
734!
379
    code = streamTaskIterGetCurrent(pIter, &p);
734✔
380
    if (code) {
734!
381
      continue;
×
382
    }
383

384
    if (p->id.taskId == pId->taskId) {
734✔
385
      destroyStreamTaskIter(pIter);
191✔
386
      *pTask = p;
191✔
387
      return 0;
191✔
388
    }
389
  }
390

391
  destroyStreamTaskIter(pIter);
×
392
  return TSDB_CODE_FAILED;
×
393
}
394

395
int32_t mndGetNumOfStreamTasks(const SStreamObj *pStream) {
76,909✔
396
  int32_t num = 0;
76,909✔
397
  for (int32_t i = 0; i < taosArrayGetSize(pStream->tasks); ++i) {
229,001✔
398
    SArray *pLevel = taosArrayGetP(pStream->tasks, i);
152,057✔
399
    num += taosArrayGetSize(pLevel);
152,111✔
400
  }
401

402
  return num;
76,874✔
403
}
404

405
int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams) {
54✔
406
  SSdb   *pSdb = pMnode->pSdb;
54✔
407
  SDbObj *pDb = mndAcquireDb(pMnode, dbName);
54✔
408
  if (pDb == NULL) {
54!
409
    TAOS_RETURN(TSDB_CODE_MND_DB_NOT_SELECTED);
×
410
  }
411

412
  int32_t numOfStreams = 0;
54✔
413
  void   *pIter = NULL;
54✔
414
  while (1) {
×
415
    SStreamObj *pStream = NULL;
54✔
416
    pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
54✔
417
    if (pIter == NULL) break;
54!
418

419
    if (pStream->sourceDbUid == pDb->uid) {
×
420
      numOfStreams++;
×
421
    }
422

423
    sdbRelease(pSdb, pStream);
×
424
  }
425

426
  *pNumOfStreams = numOfStreams;
54✔
427
  mndReleaseDb(pMnode, pDb);
54✔
428
  return 0;
54✔
429
}
430

431
static void freeTaskList(void *param) {
1,569✔
432
  SArray **pList = (SArray **)param;
1,569✔
433
  taosArrayDestroy(*pList);
1,569✔
434
}
1,569✔
435

436
int32_t mndInitExecInfo() {
1,807✔
437
  int32_t code = taosThreadMutexInit(&execInfo.lock, NULL);
1,807✔
438
  if (code) {
1,807!
439
    return code;
×
440
  }
441

442
  _hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR);
1,807✔
443

444
  execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId));
1,807✔
445
  execInfo.pTaskMap = taosHashInit(64, fn, true, HASH_NO_LOCK);
1,807✔
446
  execInfo.transMgmt.pDBTrans = taosHashInit(32, fn, true, HASH_NO_LOCK);
1,807✔
447
  execInfo.pTransferStateStreams = taosHashInit(32, fn, true, HASH_NO_LOCK);
1,807✔
448
  execInfo.pChkptStreams = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
1,807✔
449
  execInfo.pStreamConsensus = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
1,807✔
450
  execInfo.pNodeList = taosArrayInit(4, sizeof(SNodeEntry));
1,807✔
451
  execInfo.pKilledChkptTrans = taosArrayInit(4, sizeof(SStreamTaskResetMsg));
1,807✔
452

453
  if (execInfo.pTaskList == NULL || execInfo.pTaskMap == NULL || execInfo.transMgmt.pDBTrans == NULL ||
1,807!
454
      execInfo.pTransferStateStreams == NULL || execInfo.pChkptStreams == NULL || execInfo.pStreamConsensus == NULL ||
1,807!
455
      execInfo.pNodeList == NULL || execInfo.pKilledChkptTrans == NULL) {
1,807!
456
    mError("failed to initialize the stream runtime env, code:%s", tstrerror(terrno));
×
457
    return terrno;
×
458
  }
459

460
  execInfo.role = NODE_ROLE_UNINIT;
1,807✔
461
  execInfo.switchFromFollower = false;
1,807✔
462

463
  taosHashSetFreeFp(execInfo.pTransferStateStreams, freeTaskList);
1,807✔
464
  taosHashSetFreeFp(execInfo.pChkptStreams, freeTaskList);
1,807✔
465
  taosHashSetFreeFp(execInfo.pStreamConsensus, freeTaskList);
1,807✔
466
  return 0;
1,807✔
467
}
468

469
void removeExpiredNodeInfo(const SArray *pNodeSnapshot) {
1,394✔
470
  SArray *pValidList = taosArrayInit(4, sizeof(SNodeEntry));
1,394✔
471
  if (pValidList == NULL) {  // not continue
1,394!
472
    return;
×
473
  }
474

475
  int32_t size = taosArrayGetSize(pNodeSnapshot);
1,394✔
476
  int32_t oldSize = taosArrayGetSize(execInfo.pNodeList);
1,394✔
477

478
  for (int32_t i = 0; i < oldSize; ++i) {
9,465✔
479
    SNodeEntry *p = taosArrayGet(execInfo.pNodeList, i);
8,071✔
480
    if (p == NULL) {
8,071!
481
      continue;
×
482
    }
483

484
    for (int32_t j = 0; j < size; ++j) {
101,822✔
485
      SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j);
100,768✔
486
      if (pEntry == NULL) {
100,768!
487
        continue;
×
488
      }
489

490
      if (pEntry->nodeId == p->nodeId) {
100,768✔
491
        p->hbTimestamp = pEntry->hbTimestamp;
7,017✔
492

493
        void *px = taosArrayPush(pValidList, p);
7,017✔
494
        if (px == NULL) {
7,017!
495
          mError("failed to put node into list, nodeId:%d", p->nodeId);
×
496
        } else {
497
          mDebug("vgId:%d ts:%" PRId64 " HbMsgId:%d is valid", p->nodeId, p->hbTimestamp, p->lastHbMsgId);
7,017✔
498
        }
499
        break;
7,017✔
500
      }
501
    }
502
  }
503

504
  taosArrayDestroy(execInfo.pNodeList);
1,394✔
505
  execInfo.pNodeList = pValidList;
1,394✔
506

507
  mDebug("remain %d valid node entries after clean expired nodes info, prev size:%d",
1,394✔
508
         (int32_t)taosArrayGetSize(pValidList), oldSize);
509
}
510

511
int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) {
7,348✔
512
  void *p = taosHashGet(pExecNode->pTaskMap, pRemovedId, sizeof(*pRemovedId));
7,348✔
513
  if (p == NULL) {
7,348✔
514
    return TSDB_CODE_SUCCESS;
130✔
515
  }
516

517
  int32_t code = taosHashRemove(pExecNode->pTaskMap, pRemovedId, sizeof(*pRemovedId));
7,218✔
518
  if (code) {
7,218!
519
    return code;
×
520
  }
521

522
  for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) {
20,998!
523
    STaskId *pId = taosArrayGet(pExecNode->pTaskList, k);
20,998✔
524
    if (pId == NULL) {
20,998!
525
      continue;
×
526
    }
527

528
    if (pId->taskId == pRemovedId->taskId && pId->streamId == pRemovedId->streamId) {
20,998!
529
      taosArrayRemove(pExecNode->pTaskList, k);
7,218✔
530

531
      int32_t num = taosArrayGetSize(pExecNode->pTaskList);
7,218✔
532
      mInfo("s-task:0x%x removed from buffer, remain:%d in buffer list", (int32_t)pRemovedId->taskId, num);
7,218!
533
      break;
7,218✔
534
    }
535
  }
536

537
  return TSDB_CODE_SUCCESS;
7,218✔
538
}
539

540
void removeTasksInBuf(SArray *pTaskIds, SStreamExecInfo *pExecInfo) {
1,394✔
541
  for (int32_t i = 0; i < taosArrayGetSize(pTaskIds); ++i) {
1,394!
542
    STaskId *pId = taosArrayGet(pTaskIds, i);
×
543
    if (pId == NULL) {
×
544
      continue;
×
545
    }
546

547
    int32_t code = doRemoveTasks(pExecInfo, pId);
×
548
    if (code) {
×
549
      mError("failed to remove task in buffer list, 0x%" PRIx64, pId->taskId);
×
550
    }
551
  }
552
}
1,394✔
553

554
void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) {
1,351✔
555
  SStreamTaskIter *pIter = NULL;
1,351✔
556
  streamMutexLock(&pExecNode->lock);
1,351✔
557

558
  // 1. remove task entries
559
  int32_t code = createStreamTaskIter(pStream, &pIter);
1,351✔
560
  if (code) {
1,351!
561
    streamMutexUnlock(&pExecNode->lock);
×
562
    mError("failed to create stream task iter:%s", pStream->name);
×
563
    return;
×
564
  }
565

566
  while (streamTaskIterNextTask(pIter)) {
8,699✔
567
    SStreamTask *pTask = NULL;
7,348✔
568
    code = streamTaskIterGetCurrent(pIter, &pTask);
7,348✔
569
    if (code) {
7,348!
570
      continue;
×
571
    }
572

573
    STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
7,348✔
574
    code = doRemoveTasks(pExecNode, &id);
7,348✔
575
    if (code) {
7,348!
576
      mError("failed to remove task in buffer list, 0x%" PRIx64, id.taskId);
×
577
    }
578
  }
579

580
  if (taosHashGetSize(pExecNode->pTaskMap) != taosArrayGetSize(pExecNode->pTaskList)) {
1,351!
581
    streamMutexUnlock(&pExecNode->lock);
×
582
    destroyStreamTaskIter(pIter);
×
583
    mError("task map size, task list size, not equal");
×
584
    return;
×
585
  }
586

587
  // 2. remove stream entry in consensus hash table and checkpoint-report hash table
588
  code = mndClearConsensusCheckpointId(execInfo.pStreamConsensus, pStream->uid);
1,351✔
589
  if (code) {
1,351!
590
    mError("failed to clear consensus checkpointId, code:%s", tstrerror(code));
×
591
  }
592

593
  code = mndClearChkptReportInfo(execInfo.pChkptStreams, pStream->uid);
1,351✔
594
  if (code) {
1,351✔
595
    mError("failed to clear the checkpoint report info, code:%s", tstrerror(code));
376!
596
  }
597

598
  streamMutexUnlock(&pExecNode->lock);
1,351✔
599
  destroyStreamTaskIter(pIter);
1,351✔
600
}
601

602
static bool taskNodeExists(SArray *pList, int32_t nodeId) {
14,630✔
603
  size_t num = taosArrayGetSize(pList);
14,630✔
604

605
  for (int32_t i = 0; i < num; ++i) {
114,586!
606
    SNodeEntry *pEntry = taosArrayGet(pList, i);
114,586✔
607
    if (pEntry == NULL) {
114,586!
608
      continue;
×
609
    }
610

611
    if (pEntry->nodeId == nodeId) {
114,586✔
612
      return true;
14,630✔
613
    }
614
  }
615

616
  return false;
×
617
}
618

619
int32_t removeExpiredNodeEntryAndTaskInBuf(SArray *pNodeSnapshot) {
1,394✔
620
  SArray *pRemovedTasks = taosArrayInit(4, sizeof(STaskId));
1,394✔
621
  if (pRemovedTasks == NULL) {
1,394!
622
    return terrno;
×
623
  }
624

625
  int32_t numOfTask = taosArrayGetSize(execInfo.pTaskList);
1,394✔
626
  for (int32_t i = 0; i < numOfTask; ++i) {
16,884✔
627
    STaskId *pId = taosArrayGet(execInfo.pTaskList, i);
15,490✔
628
    if (pId == NULL) {
15,490!
629
      continue;
×
630
    }
631

632
    STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId));
15,490✔
633
    if (pEntry == NULL) {
15,490!
634
      continue;
×
635
    }
636

637
    if (pEntry->nodeId == SNODE_HANDLE) {
15,490✔
638
      continue;
860✔
639
    }
640

641
    bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId);
14,630✔
642
    if (!existed) {
14,630!
643
      void *p = taosArrayPush(pRemovedTasks, pId);
×
644
      if (p == NULL) {
×
645
        mError("failed to put task entry into remove list, taskId:0x%" PRIx64, pId->taskId);
×
646
      }
647
    }
648
  }
649

650
  removeTasksInBuf(pRemovedTasks, &execInfo);
1,394✔
651

652
  mDebug("remove invalid stream tasks:%d, remain:%d", (int32_t)taosArrayGetSize(pRemovedTasks),
1,394✔
653
         (int32_t)taosArrayGetSize(execInfo.pTaskList));
654

655
  removeExpiredNodeInfo(pNodeSnapshot);
1,394✔
656

657
  taosArrayDestroy(pRemovedTasks);
1,394✔
658
  return 0;
1,394✔
659
}
660

661
static int32_t allTasksSendChkptReport(SChkptReportInfo* pReportInfo, int32_t numOfTasks, const char* pName) {
1,139✔
662
  int64_t checkpointId = -1;
1,139✔
663
  int32_t transId = -1;
1,139✔
664
  int32_t taskId = -1;
1,139✔
665

666
  int32_t existed = (int32_t)taosArrayGetSize(pReportInfo->pTaskList);
1,139✔
667
  if (existed != numOfTasks) {
1,139✔
668
    mDebug("stream:0x%" PRIx64 " %s %d/%d tasks send checkpoint-report, %d not send", pReportInfo->streamId, pName,
27✔
669
           existed, numOfTasks, numOfTasks - existed);
670
    return -1;
27✔
671
  }
672

673
  // acquire current active checkpointId, and do cross-check checkpointId info in exec.pTaskList
674
  for(int32_t i = 0; i < numOfTasks; ++i) {
6,439✔
675
    STaskChkptInfo *pInfo = taosArrayGet(pReportInfo->pTaskList, i);
5,327✔
676
    if (pInfo == NULL) {
5,327!
677
      continue;
×
678
    }
679

680
    if (checkpointId == -1) {
5,327✔
681
      checkpointId = pInfo->checkpointId;
1,112✔
682
      transId = pInfo->transId;
1,112✔
683
      taskId = pInfo->taskId;
1,112✔
684
    } else if (checkpointId != pInfo->checkpointId) {
4,215!
685
      mError("stream:0x%" PRIx64
×
686
             " checkpointId in checkpoint-report list are not identical, type 1 taskId:0x%x checkpointId:%" PRId64
687
             ", type 2 taskId:0x%x checkpointId:%" PRId64,
688
             pReportInfo->streamId, taskId, checkpointId, pInfo->taskId, pInfo->checkpointId);
689
      return -1;
×
690
    }
691
  }
692

693
  // check for the correct checkpointId for current task info in STaskChkptInfo
694
  STaskChkptInfo  *p = taosArrayGet(pReportInfo->pTaskList, 0);
1,112✔
695
  STaskId id = {.streamId = p->streamId, .taskId = p->taskId};
1,112✔
696
  STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id));
1,112✔
697

698
  // cross-check failed, there must be something unknown wrong
699
  SStreamTransInfo *pTransInfo = taosHashGet(execInfo.transMgmt.pDBTrans, &id.streamId, sizeof(id.streamId));
1,112✔
700
  if (pTransInfo == NULL) {
1,112✔
701
    mWarn("stream:0x%" PRIx64 " no active trans exists for checkpoint transId:%d, it may have been cleared already",
181!
702
           id.streamId, transId);
703

704
    if (pe->checkpointInfo.activeId != 0 && pe->checkpointInfo.activeId != checkpointId) {
181!
705
      mWarn("stream:0x%" PRIx64 " active checkpointId is not equalled to the required, current:%" PRId64
×
706
            ", req:%" PRId64 " recheck next time",
707
            id.streamId, pe->checkpointInfo.activeId, checkpointId);
708
      return -1;
×
709
    } else {
710
      //  do nothing
711
    }
712
  } else {
713
    if (pTransInfo->transId != transId) {
931✔
714
      mError("stream:0x%" PRIx64
1!
715
             " checkpoint-report list info are expired, active transId:%d trans in list:%d, recheck next time",
716
             id.streamId, pTransInfo->transId, transId);
717
      return -1;
1✔
718
    }
719
  }
720

721
  mDebug("stream:0x%" PRIx64 " %s all %d tasks send checkpoint-report, start to update checkpoint-info", id.streamId,
1,111✔
722
         pName, numOfTasks);
723

724
  return TSDB_CODE_SUCCESS;
1,111✔
725
}
726

727
int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq) {
26,785✔
728
  SMnode *pMnode = pReq->info.node;
26,785✔
729
  void   *pIter = NULL;
26,785✔
730
  int32_t code = 0;
26,785✔
731
  SArray *pDropped = taosArrayInit(4, sizeof(int64_t));
26,785✔
732
  if (pDropped == NULL) {
26,785!
733
    return terrno;
×
734
  }
735

736
  mDebug("start to scan checkpoint report info");
26,785✔
737

738
  streamMutexLock(&execInfo.lock);
26,785✔
739

740
  while ((pIter = taosHashIterate(execInfo.pChkptStreams, pIter)) != NULL) {
90,085✔
741
    SChkptReportInfo *px = (SChkptReportInfo *)pIter;
64,411✔
742
    if (taosArrayGetSize(px->pTaskList) == 0) {
64,411✔
743
      continue;
63,272✔
744
    }
745

746
    STaskChkptInfo *pInfo = taosArrayGet(px->pTaskList, 0);
1,141✔
747
    if (pInfo == NULL) {
1,141!
748
      continue;
×
749
    }
750

751
    SStreamObj *pStream = NULL;
1,141✔
752
    code = mndGetStreamObj(pMnode, pInfo->streamId, &pStream);
1,141✔
753
    if (pStream == NULL || code != 0) {
1,141!
754
      mDebug("failed to acquire stream:0x%" PRIx64 " remove it from checkpoint-report list", pInfo->streamId);
2!
755
      void *p = taosArrayPush(pDropped, &pInfo->streamId);
2✔
756
      if (p == NULL) {
2!
757
        mError("failed to put stream into drop list:0x%" PRIx64, pInfo->streamId);
×
758
      }
759
      continue;
2✔
760
    }
761

762
    int32_t total = mndGetNumOfStreamTasks(pStream);
1,139✔
763
    int32_t ret = allTasksSendChkptReport(px, total, pStream->name);
1,139✔
764
    if (ret == 0) {
1,139✔
765
      code = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_CHKPT_UPDATE_NAME, false);
1,111✔
766
      if (code == 0) {
1,111!
767
        code = mndCreateStreamChkptInfoUpdateTrans(pMnode, pStream, px->pTaskList);
1,111✔
768
        if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) {  // remove this entry
1,111!
769
          taosArrayClear(px->pTaskList);
1,111✔
770
          mInfo("stream:0x%" PRIx64 " clear checkpoint-report list and update the report checkpointId from:%" PRId64
1,111!
771
                " to %" PRId64,
772
                pInfo->streamId, px->reportChkpt, pInfo->checkpointId);
773
          px->reportChkpt = pInfo->checkpointId;
1,111✔
774
        } else {
775
          mDebug("stream:0x%" PRIx64 " not launch chkpt-info update trans, due to checkpoint not finished yet",
×
776
                 pInfo->streamId);
777
        }
778

779
        sdbRelease(pMnode->pSdb, pStream);
1,111✔
780
        break;
1,111✔
781
      } else {
782
        mDebug("stream:0x%" PRIx64 " active checkpoint trans not finished yet, wait", pInfo->streamId);
×
783
      }
784
    }
785

786
    sdbRelease(pMnode->pSdb, pStream);
28✔
787
  }
788

789
  int32_t size = taosArrayGetSize(pDropped);
26,785✔
790
  if (size > 0) {
26,785✔
791
    for (int32_t i = 0; i < size; ++i) {
3✔
792
      int64_t *pStreamId = (int64_t *)taosArrayGet(pDropped, i);
2✔
793
      if (pStreamId == NULL) {
2!
794
        continue;
×
795
      }
796

797
      code = taosHashRemove(execInfo.pChkptStreams, pStreamId, sizeof(*pStreamId));
2✔
798
      if (code) {
2!
799
        mError("failed to remove stream in buf:0x%" PRIx64, *pStreamId);
×
800
      }
801
    }
802

803
    int32_t numOfStreams = taosHashGetSize(execInfo.pChkptStreams);
1✔
804
    mDebug("drop %d stream(s) in checkpoint-report list, remain:%d", size, numOfStreams);
1!
805
  }
806

807
  streamMutexUnlock(&execInfo.lock);
26,785✔
808

809
  taosArrayDestroy(pDropped);
26,785✔
810

811
  mDebug("end to scan checkpoint report info")
26,785✔
812
  return TSDB_CODE_SUCCESS;
26,785✔
813
}
814

815
int32_t mndCreateSetConsensusChkptIdTrans(SMnode *pMnode, SStreamObj *pStream, int32_t taskId, int64_t checkpointId,
191✔
816
                                          int64_t ts) {
817
  char         msg[128] = {0};
191✔
818
  STrans      *pTrans = NULL;
191✔
819
  SStreamTask *pTask = NULL;
191✔
820

821
  snprintf(msg, tListLen(msg), "set consen-chkpt-id for task:0x%x", taskId);
191✔
822

823
  int32_t code = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_CHKPT_CONSEN_NAME, msg, &pTrans);
191✔
824
  if (pTrans == NULL || code != 0) {
191!
825
    return terrno;
×
826
  }
827

828
  STaskId id = {.streamId = pStream->uid, .taskId = taskId};
191✔
829
  code = mndGetStreamTask(&id, pStream, &pTask);
191✔
830
  if (code) {
191!
831
    mError("failed to get task:0x%x in stream:%s, failed to create consensus-checkpointId", taskId, pStream->name);
×
832
    sdbRelease(pMnode->pSdb, pStream);
×
833
    return code;
×
834
  }
835

836
  code = mndStreamRegisterTrans(pTrans, MND_STREAM_CHKPT_CONSEN_NAME, pStream->uid);
191✔
837
  if (code) {
191!
838
    sdbRelease(pMnode->pSdb, pStream);
×
839
    return code;
×
840
  }
841

842
  code = mndStreamSetChkptIdAction(pMnode, pTrans, pTask, checkpointId, ts);
191✔
843
  if (code != 0) {
191!
844
    sdbRelease(pMnode->pSdb, pStream);
×
845
    mndTransDrop(pTrans);
×
846
    return code;
×
847
  }
848

849
  code = mndPersistTransLog(pStream, pTrans, SDB_STATUS_READY);
191✔
850
  if (code) {
191!
851
    sdbRelease(pMnode->pSdb, pStream);
×
852
    mndTransDrop(pTrans);
×
853
    return code;
×
854
  }
855

856
  code = mndTransPrepare(pMnode, pTrans);
191✔
857
  if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_ACTION_IN_PROGRESS) {
191!
858
    mError("trans:%d, failed to prepare set consensus-chkptId trans since %s", pTrans->id, terrstr());
×
859
    sdbRelease(pMnode->pSdb, pStream);
×
860
    mndTransDrop(pTrans);
×
861
    return code;
×
862
  }
863

864
  sdbRelease(pMnode->pSdb, pStream);
191✔
865
  mndTransDrop(pTrans);
191✔
866

867
  return TSDB_CODE_ACTION_IN_PROGRESS;
191✔
868
}
869

870
int32_t mndGetConsensusInfo(SHashObj *pHash, int64_t streamId, int32_t numOfTasks, SCheckpointConsensusInfo **pInfo) {
201✔
871
  *pInfo = NULL;
201✔
872

873
  void *px = taosHashGet(pHash, &streamId, sizeof(streamId));
201✔
874
  if (px != NULL) {
201✔
875
    *pInfo = px;
162✔
876
    return 0;
162✔
877
  }
878

879
  SCheckpointConsensusInfo p = {
39✔
880
      .pTaskList = taosArrayInit(4, sizeof(SCheckpointConsensusEntry)),
39✔
881
      .numOfTasks = numOfTasks,
882
      .streamId = streamId,
883
  };
884

885
  if (p.pTaskList == NULL) {
39!
886
    return terrno;
×
887
  }
888

889
  int32_t code = taosHashPut(pHash, &streamId, sizeof(streamId), &p, sizeof(p));
39✔
890
  if (code == 0) {
39!
891
    void *pChkptInfo = (SCheckpointConsensusInfo *)taosHashGet(pHash, &streamId, sizeof(streamId));
39✔
892
    *pInfo = pChkptInfo;
39✔
893
  } else {
894
    *pInfo = NULL;
×
895
  }
896

897
  return code;
39✔
898
}
899

900
// no matter existed or not, add the request into info list anyway, since we need to send rsp mannually
901
// discard the msg may lead to the lost of connections.
902
void mndAddConsensusTasks(SCheckpointConsensusInfo *pInfo, const SRestoreCheckpointInfo *pRestoreInfo) {
201✔
903
  SCheckpointConsensusEntry info = {.ts = taosGetTimestampMs()};
201✔
904
  memcpy(&info.req, pRestoreInfo, sizeof(info.req));
201✔
905

906
  int32_t num = (int32_t) taosArrayGetSize(pInfo->pTaskList);
201✔
907
  for (int32_t i = 0; i < num; ++i) {
739✔
908
    SCheckpointConsensusEntry *p = taosArrayGet(pInfo->pTaskList, i);
548✔
909
    if (p == NULL) {
548!
910
      continue;
×
911
    }
912

913
    if (p->req.taskId == info.req.taskId) {
548✔
914
      mDebug("s-task:0x%x already in consensus-checkpointId list for stream:0x%" PRIx64 ", update ts %" PRId64
10✔
915
             "->%" PRId64 " checkpointId:%" PRId64 " -> %" PRId64 " total existed:%d",
916
             pRestoreInfo->taskId, pRestoreInfo->streamId, p->req.startTs, info.req.startTs, p->req.checkpointId,
917
             info.req.checkpointId, num);
918
      p->req.startTs = info.req.startTs;
10✔
919
      p->req.checkpointId = info.req.checkpointId;
10✔
920
      p->req.transId = info.req.transId;
10✔
921
      return;
10✔
922
    }
923
  }
924

925
  void *p = taosArrayPush(pInfo->pTaskList, &info);
191✔
926
  if (p == NULL) {
191!
927
    mError("s-task:0x%x failed to put task into consensus-checkpointId list, code: out of memory", info.req.taskId);
×
928
  } else {
929
    num = taosArrayGetSize(pInfo->pTaskList);
191✔
930
    mDebug("s-task:0x%x checkpointId:%" PRId64 " added into consensus-checkpointId list, stream:0x%" PRIx64
191✔
931
           " waiting tasks:%d",
932
           pRestoreInfo->taskId, pRestoreInfo->checkpointId, pRestoreInfo->streamId, num);
933
  }
934
}
935

936
void mndClearConsensusRspEntry(SCheckpointConsensusInfo *pInfo) {
39✔
937
  taosArrayDestroy(pInfo->pTaskList);
39✔
938
  pInfo->pTaskList = NULL;
39✔
939
}
39✔
940

941
int32_t mndClearConsensusCheckpointId(SHashObj *pHash, int64_t streamId) {
1,390✔
942
  int32_t code = 0;
1,390✔
943
  int32_t numOfStreams = taosHashGetSize(pHash);
1,390✔
944
  if (numOfStreams == 0) {
1,390✔
945
    return code;
1,351✔
946
  }
947

948
  code = taosHashRemove(pHash, &streamId, sizeof(streamId));
39✔
949
  if (code == 0) {
39!
950
    mDebug("drop stream:0x%" PRIx64 " in consensus-checkpointId list, remain:%d", streamId, numOfStreams);
39✔
951
  } else {
952
    mError("failed to remove stream:0x%" PRIx64 " in consensus-checkpointId list, remain:%d", streamId, numOfStreams);
×
953
  }
954

955
  return code;
39✔
956
}
957

958
int32_t mndClearChkptReportInfo(SHashObj *pHash, int64_t streamId) {
1,351✔
959
  int32_t code = 0;
1,351✔
960
  int32_t numOfStreams = taosHashGetSize(pHash);
1,351✔
961
  if (numOfStreams == 0) {
1,351✔
962
    return code;
338✔
963
  }
964

965
  code = taosHashRemove(pHash, &streamId, sizeof(streamId));
1,013✔
966
  if (code == 0) {
1,013✔
967
    mDebug("drop stream:0x%" PRIx64 " in chkpt-report list, remain:%d", streamId, numOfStreams);
637✔
968
  } else {
969
    mError("failed to remove stream:0x%" PRIx64 " in chkpt-report list, remain:%d", streamId, numOfStreams);
376!
970
  }
971

972
  return code;
1,013✔
973
}
974

975
int32_t mndResetChkptReportInfo(SHashObj *pHash, int64_t streamId) {
×
976
  SChkptReportInfo *pInfo = taosHashGet(pHash, &streamId, sizeof(streamId));
×
977
  if (pInfo != NULL) {
×
978
    taosArrayClear(pInfo->pTaskList);
×
979
    mDebug("stream:0x%" PRIx64 " checkpoint-report list cleared, prev report checkpointId:%" PRId64, streamId,
×
980
           pInfo->reportChkpt);
981
    return 0;
×
982
  }
983

984
  return TSDB_CODE_MND_STREAM_NOT_EXIST;
×
985
}
986

987
static void mndShowStreamStatus(char *dst, int8_t status) {
35,318✔
988
  if (status == STREAM_STATUS__NORMAL) {
35,318✔
989
    tstrncpy(dst, "ready", MND_STREAM_TRIGGER_NAME_SIZE);
35,297✔
990
  } else if (status == STREAM_STATUS__STOP) {
21!
991
    tstrncpy(dst, "stop", MND_STREAM_TRIGGER_NAME_SIZE);
×
992
  } else if (status == STREAM_STATUS__FAILED) {
21✔
993
    tstrncpy(dst, "failed", MND_STREAM_TRIGGER_NAME_SIZE);
1✔
994
  } else if (status == STREAM_STATUS__RECOVER) {
20!
995
    tstrncpy(dst, "recover", MND_STREAM_TRIGGER_NAME_SIZE);
×
996
  } else if (status == STREAM_STATUS__PAUSE) {
20!
997
    tstrncpy(dst, "paused", MND_STREAM_TRIGGER_NAME_SIZE);
24✔
UNCOV
998
  } else if (status == STREAM_STATUS__INIT) {
×
999
    tstrncpy(dst, "init", MND_STREAM_TRIGGER_NAME_SIZE);
2✔
1000
  }
1001
}
35,318✔
1002

1003
static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) {
35,276✔
1004
  int8_t trigger = pStream->conf.trigger;
35,276✔
1005
  if (trigger == STREAM_TRIGGER_AT_ONCE) {
35,276✔
1006
    tstrncpy(dst, "at once", MND_STREAM_TRIGGER_NAME_SIZE);
12,077✔
1007
  } else if (trigger == STREAM_TRIGGER_WINDOW_CLOSE) {
23,199✔
1008
    tstrncpy(dst, "window close", MND_STREAM_TRIGGER_NAME_SIZE);
11,614✔
1009
  } else if (trigger == STREAM_TRIGGER_MAX_DELAY) {
11,585✔
1010
    tstrncpy(dst, "max delay", MND_STREAM_TRIGGER_NAME_SIZE);
11,553✔
1011
  } else if (trigger == STREAM_TRIGGER_FORCE_WINDOW_CLOSE) {
32!
1012
    tstrncpy(dst, "force window close", MND_STREAM_TRIGGER_NAME_SIZE);
70✔
1013
  }
1014
}
35,276✔
1015

1016
static void int64ToHexStr(int64_t id, char *pBuf, int32_t bufLen) {
259,676✔
1017
  memset(pBuf, 0, bufLen);
259,676✔
1018
  pBuf[2] = '0';
259,676✔
1019
  pBuf[3] = 'x';
259,676✔
1020

1021
  int32_t len = tintToHex(id, &pBuf[4]);
259,676✔
1022
  varDataSetLen(pBuf, len + 2);
260,045✔
1023
}
260,045✔
1024

1025
static int32_t isAllTaskPaused(SStreamObj *pStream, bool *pRes) {
35,291✔
1026
  int32_t          code = TSDB_CODE_SUCCESS;
35,291✔
1027
  int32_t          lino = 0;
35,291✔
1028
  SStreamTaskIter *pIter = NULL;
35,291✔
1029
  bool             isPaused =  true;
35,291✔
1030

1031
  taosRLockLatch(&pStream->lock);
35,291✔
1032
  code = createStreamTaskIter(pStream, &pIter);
35,363✔
1033
  TSDB_CHECK_CODE(code, lino, _end);
35,347!
1034

1035
  while (streamTaskIterNextTask(pIter)) {
154,461✔
1036
    SStreamTask *pTask = NULL;
118,532✔
1037
    code = streamTaskIterGetCurrent(pIter, &pTask);
118,532✔
1038
    TSDB_CHECK_CODE(code, lino, _end);
118,844!
1039

1040
    STaskId           id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
118,844✔
1041
    STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id));
118,844✔
1042
    if (pe == NULL) {
119,114✔
1043
      continue;
116✔
1044
    }
1045
    if (pe->status != TASK_STATUS__PAUSE) {
118,998✔
1046
      isPaused = false;
118,992✔
1047
    }
1048
  }
1049
  (*pRes) = isPaused;
35,304✔
1050

1051
_end:
35,304✔
1052
  destroyStreamTaskIter(pIter);
35,304✔
1053
  taosRUnLockLatch(&pStream->lock);
35,336✔
1054
  if (code != TSDB_CODE_SUCCESS) {
35,362!
1055
    mError("error happens when get stream status, lino:%d, code:%s", lino, tstrerror(code));
×
1056
  }
1057
  return code;
35,362✔
1058
}
1059

1060
int32_t setStreamAttrInResBlock(SStreamObj *pStream, SSDataBlock *pBlock, int32_t numOfRows) {
35,342✔
1061
  int32_t code = 0;
35,342✔
1062
  int32_t cols = 0;
35,342✔
1063
  int32_t lino = 0;
35,342✔
1064

1065
  char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
35,342✔
1066
  STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName));
35,342✔
1067
  SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,345✔
1068
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,326!
1069

1070
  code = colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false);
35,326✔
1071
  TSDB_CHECK_CODE(code, lino, _end);
35,336!
1072

1073
  // create time
1074
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,336✔
1075
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,329!
1076
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->createTime, false);
35,329✔
1077
  TSDB_CHECK_CODE(code, lino, _end);
35,313!
1078

1079
  // stream id
1080
  char buf[128] = {0};
35,313✔
1081
  int64ToHexStr(pStream->uid, buf, tListLen(buf));
35,313✔
1082
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,352✔
1083
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,344!
1084
  code = colDataSetVal(pColInfo, numOfRows, buf, false);
35,344✔
1085
  TSDB_CHECK_CODE(code, lino, _end);
35,340!
1086

1087
  // related fill-history stream id
1088
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,340✔
1089
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,333!
1090
  if (pStream->hTaskUid != 0) {
35,333!
1091
    int64ToHexStr(pStream->hTaskUid, buf, tListLen(buf));
×
1092
    code = colDataSetVal(pColInfo, numOfRows, buf, false);
×
1093
  } else {
1094
    code = colDataSetVal(pColInfo, numOfRows, buf, true);
35,333✔
1095
  }
1096
  TSDB_CHECK_CODE(code, lino, _end);
35,324!
1097

1098
  // related fill-history stream id
1099
  char sql[TSDB_SHOW_SQL_LEN + VARSTR_HEADER_SIZE] = {0};
35,324✔
1100
  STR_WITH_MAXSIZE_TO_VARSTR(sql, pStream->sql, sizeof(sql));
35,324✔
1101
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,324✔
1102
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,330!
1103
  code = colDataSetVal(pColInfo, numOfRows, (const char *)sql, false);
35,330✔
1104
  TSDB_CHECK_CODE(code, lino, _end);
35,303!
1105

1106
  char status[20 + VARSTR_HEADER_SIZE] = {0};
35,303✔
1107
  char status2[MND_STREAM_TRIGGER_NAME_SIZE] = {0};
35,303✔
1108
  bool isPaused = false;
35,303✔
1109
  code = isAllTaskPaused(pStream, &isPaused);
35,303✔
1110
  TSDB_CHECK_CODE(code, lino, _end);
35,362!
1111

1112
  int8_t streamStatus = atomic_load_8(&pStream->status);
35,362✔
1113
  if (isPaused && pStream->tasks != NULL) {
35,324✔
1114
    streamStatus = STREAM_STATUS__PAUSE;
24✔
1115
  }
1116
  mndShowStreamStatus(status2, streamStatus);
35,324✔
1117
  STR_WITH_MAXSIZE_TO_VARSTR(status, status2, sizeof(status));
35,305✔
1118
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,305✔
1119
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,319!
1120

1121
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&status, false);
35,319✔
1122
  TSDB_CHECK_CODE(code, lino, _end);
35,328!
1123

1124
  char sourceDB[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
35,328✔
1125
  STR_WITH_MAXSIZE_TO_VARSTR(sourceDB, mndGetDbStr(pStream->sourceDb), sizeof(sourceDB));
35,328✔
1126
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,307✔
1127
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,297!
1128

1129
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&sourceDB, false);
35,297✔
1130
  TSDB_CHECK_CODE(code, lino, _end);
35,332!
1131

1132
  char targetDB[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
35,332✔
1133
  STR_WITH_MAXSIZE_TO_VARSTR(targetDB, mndGetDbStr(pStream->targetDb), sizeof(targetDB));
35,332✔
1134
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,323✔
1135
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,297!
1136

1137
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&targetDB, false);
35,297✔
1138
  TSDB_CHECK_CODE(code, lino, _end);
35,328!
1139

1140
  if (pStream->targetSTbName[0] == 0) {
35,328✔
1141
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
2✔
1142
    TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
2!
1143

1144
    code = colDataSetVal(pColInfo, numOfRows, NULL, true);
2✔
1145
  } else {
1146
    char targetSTB[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
35,326✔
1147
    STR_WITH_MAXSIZE_TO_VARSTR(targetSTB, mndGetStbStr(pStream->targetSTbName), sizeof(targetSTB));
35,326✔
1148
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,334✔
1149
    TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,313!
1150

1151
    code = colDataSetVal(pColInfo, numOfRows, (const char *)&targetSTB, false);
35,313✔
1152
  }
1153
  TSDB_CHECK_CODE(code, lino, _end);
35,304!
1154

1155
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,304✔
1156
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,305!
1157

1158
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->conf.watermark, false);
35,305✔
1159
  TSDB_CHECK_CODE(code, lino, _end);
35,297!
1160

1161
  char trigger[20 + VARSTR_HEADER_SIZE] = {0};
35,297✔
1162
  char trigger2[MND_STREAM_TRIGGER_NAME_SIZE] = {0};
35,297✔
1163
  mndShowStreamTrigger(trigger2, pStream);
35,297✔
1164
  STR_WITH_MAXSIZE_TO_VARSTR(trigger, trigger2, sizeof(trigger));
35,292✔
1165
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,292✔
1166
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,328!
1167

1168
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&trigger, false);
35,328✔
1169
  TSDB_CHECK_CODE(code, lino, _end);
35,323!
1170

1171
  // sink_quota
1172
  char sinkQuota[20 + VARSTR_HEADER_SIZE] = {0};
35,323✔
1173
  sinkQuota[0] = '0';
35,323✔
1174
  char dstStr[20] = {0};
35,323✔
1175
  STR_TO_VARSTR(dstStr, sinkQuota)
35,323✔
1176
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,323✔
1177
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,342!
1178

1179
  code = colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false);
35,342✔
1180
  TSDB_CHECK_CODE(code, lino, _end);
35,322!
1181

1182
  // checkpoint interval
1183
  char tmp[20 + VARSTR_HEADER_SIZE] = {0};
35,322✔
1184
  (void)tsnprintf(varDataVal(tmp), sizeof(tmp) - VARSTR_HEADER_SIZE, "%d sec", tsStreamCheckpointInterval);
35,322✔
1185
  varDataSetLen(tmp, strlen(varDataVal(tmp)));
35,326✔
1186

1187
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,326✔
1188
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,315!
1189

1190
  code = colDataSetVal(pColInfo, numOfRows, (const char *)tmp, false);
35,315✔
1191
  TSDB_CHECK_CODE(code, lino, _end);
35,330!
1192

1193
  // checkpoint backup type
1194
  char backup[20 + VARSTR_HEADER_SIZE] = {0};
35,330✔
1195
  STR_TO_VARSTR(backup, "none")
35,330✔
1196
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,330✔
1197
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,318!
1198

1199
  code = colDataSetVal(pColInfo, numOfRows, (const char *)backup, false);
35,318✔
1200
  TSDB_CHECK_CODE(code, lino, _end);
35,332!
1201

1202
  // history scan idle
1203
  char scanHistoryIdle[20 + VARSTR_HEADER_SIZE] = {0};
35,332✔
1204
  tstrncpy(scanHistoryIdle, "100a", sizeof(scanHistoryIdle));
35,332✔
1205

1206
  memset(dstStr, 0, tListLen(dstStr));
35,332✔
1207
  STR_TO_VARSTR(dstStr, scanHistoryIdle)
35,332✔
1208
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,332✔
1209
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,336!
1210

1211
  code = colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false);
35,336✔
1212
  TSDB_CHECK_CODE(code, lino, _end);
35,342!
1213

1214
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
35,342✔
1215
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
35,330!
1216
  char msg[TSDB_RESERVE_VALUE_LEN + VARSTR_HEADER_SIZE] = {0};
35,335✔
1217
  if (streamStatus == STREAM_STATUS__FAILED){
35,335✔
1218
    STR_TO_VARSTR(msg, pStream->reserve)
1✔
1219
  } else {
1220
    STR_TO_VARSTR(msg, " ")
35,334✔
1221
  }
1222
  code = colDataSetVal(pColInfo, numOfRows, (const char *)msg, false);
35,335✔
1223

1224
_end:
35,331✔
1225
  if (code) {
35,331!
1226
    mError("error happens when build stream attr result block, lino:%d, code:%s", lino, tstrerror(code));
×
1227
  }
1228
  return code;
35,330✔
1229
}
1230

1231
int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDataBlock *pBlock, int32_t numOfRows,
223,873✔
1232
                              int32_t precision) {
1233
  SColumnInfoData *pColInfo = NULL;
223,873✔
1234
  int32_t          cols = 0;
223,873✔
1235
  int32_t          code = 0;
223,873✔
1236
  int32_t          lino = 0;
223,873✔
1237

1238
  STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
223,873✔
1239

1240
  STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id));
223,873✔
1241
  if (pe == NULL) {
224,362!
1242
    mError("task:0x%" PRIx64 " not exists in any vnodes, streamName:%s, streamId:0x%" PRIx64 " createTs:%" PRId64
×
1243
           " no valid status/stage info",
1244
           id.taskId, pStream->name, pStream->uid, pStream->createTime);
1245
    return TSDB_CODE_STREAM_TASK_NOT_EXIST;
×
1246
  }
1247

1248
  // stream name
1249
  char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
224,362✔
1250
  STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName));
224,362✔
1251

1252
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,226✔
1253
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,978!
1254

1255
  code = colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false);
223,978✔
1256
  TSDB_CHECK_CODE(code, lino, _end);
224,016!
1257

1258
  // task id
1259
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,016✔
1260
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,898!
1261

1262
  char idstr[128] = {0};
223,898✔
1263
  int64ToHexStr(pTask->id.taskId, idstr, tListLen(idstr));
223,898✔
1264
  code = colDataSetVal(pColInfo, numOfRows, idstr, false);
224,298✔
1265
  TSDB_CHECK_CODE(code, lino, _end);
224,100!
1266

1267
  // node type
1268
  char nodeType[20 + VARSTR_HEADER_SIZE] = {0};
224,100✔
1269
  varDataSetLen(nodeType, 5);
224,100✔
1270
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,100✔
1271
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
224,026!
1272

1273
  if (pTask->info.nodeId > 0) {
224,129✔
1274
    memcpy(varDataVal(nodeType), "vnode", 5);
206,063✔
1275
  } else {
1276
    memcpy(varDataVal(nodeType), "snode", 5);
18,066✔
1277
  }
1278
  code = colDataSetVal(pColInfo, numOfRows, nodeType, false);
224,129✔
1279
  TSDB_CHECK_CODE(code, lino, _end);
224,067!
1280

1281
  // node id
1282
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,067✔
1283
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,983!
1284

1285
  int64_t nodeId = TMAX(pTask->info.nodeId, 0);
223,983✔
1286
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&nodeId, false);
223,983✔
1287
  TSDB_CHECK_CODE(code, lino, _end);
223,813!
1288

1289
  // level
1290
  char level[20 + VARSTR_HEADER_SIZE] = {0};
223,813✔
1291
  if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
223,813✔
1292
    STR_WITH_SIZE_TO_VARSTR(level, "source", 6);
114,778✔
1293
  } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
109,035✔
1294
    STR_WITH_SIZE_TO_VARSTR(level, "agg", 3);
19,594✔
1295
  } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
89,441!
1296
    STR_WITH_SIZE_TO_VARSTR(level, "sink", 4);
89,806✔
1297
  }
1298

1299
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,813✔
1300
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,779!
1301

1302
  code = colDataSetVal(pColInfo, numOfRows, (const char *)level, false);
223,779✔
1303
  TSDB_CHECK_CODE(code, lino, _end);
224,005!
1304

1305
  // status
1306
  char status[20 + VARSTR_HEADER_SIZE] = {0};
224,005✔
1307

1308
  const char *pStatus = streamTaskGetStatusStr(pe->status);
224,005✔
1309
  STR_TO_VARSTR(status, pStatus);
223,966✔
1310

1311
  // status
1312
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,966✔
1313
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,876!
1314

1315
  code = colDataSetVal(pColInfo, numOfRows, (const char *)status, false);
223,876✔
1316
  TSDB_CHECK_CODE(code, lino, _end);
224,040!
1317

1318
  // stage
1319
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,040✔
1320
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,942!
1321

1322
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false);
223,942✔
1323
  TSDB_CHECK_CODE(code, lino, _end);
223,821!
1324

1325
  // input queue
1326
  char        vbuf[TSDB_STREAM_NOTIFY_STAT_LEN + 2] = {0};
223,821✔
1327
  char        buf[TSDB_STREAM_NOTIFY_STAT_LEN] = {0};
223,821✔
1328
  const char *queueInfoStr = "%4.2f MiB (%6.2f%)";
223,821✔
1329
  snprintf(buf, tListLen(buf), queueInfoStr, pe->inputQUsed, pe->inputRate);
223,821✔
1330
  STR_TO_VARSTR(vbuf, buf);
223,821✔
1331

1332
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,821✔
1333
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
224,275!
1334

1335
  code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
224,275✔
1336
  TSDB_CHECK_CODE(code, lino, _end);
224,144!
1337

1338
  // input total
1339
  const char *formatTotalMb = "%7.2f MiB";
224,144✔
1340
  const char *formatTotalGb = "%7.2f GiB";
224,144✔
1341
  if (pe->procsTotal < 1024) {
224,144!
1342
    snprintf(buf, tListLen(buf), formatTotalMb, pe->procsTotal);
224,176✔
1343
  } else {
1344
    snprintf(buf, tListLen(buf), formatTotalGb, pe->procsTotal / 1024);
×
1345
  }
1346

1347
  memset(vbuf, 0, tListLen(vbuf));
224,144✔
1348
  STR_TO_VARSTR(vbuf, buf);
224,144✔
1349

1350
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,144✔
1351
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
224,340!
1352

1353
  code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
224,340✔
1354
  TSDB_CHECK_CODE(code, lino, _end);
224,143!
1355

1356
  // process throughput
1357
  const char *formatKb = "%7.2f KiB/s";
224,143✔
1358
  const char *formatMb = "%7.2f MiB/s";
224,143✔
1359
  if (pe->procsThroughput < 1024) {
224,143✔
1360
    snprintf(buf, tListLen(buf), formatKb, pe->procsThroughput);
224,000✔
1361
  } else {
1362
    snprintf(buf, tListLen(buf), formatMb, pe->procsThroughput / 1024);
143✔
1363
  }
1364

1365
  memset(vbuf, 0, tListLen(vbuf));
224,143✔
1366
  STR_TO_VARSTR(vbuf, buf);
224,143✔
1367

1368
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,143✔
1369
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
224,344!
1370

1371
  code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
224,344✔
1372
  TSDB_CHECK_CODE(code, lino, _end);
224,148!
1373

1374
  // output total
1375
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,148✔
1376
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
224,030!
1377

1378
  if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
224,274✔
1379
    colDataSetNULL(pColInfo, numOfRows);
89,827!
1380
  } else {
1381
    (void)tsnprintf(buf, sizeof(buf), formatTotalMb, pe->outputTotal);
134,447✔
1382
    memset(vbuf, 0, tListLen(vbuf));
134,583✔
1383
    STR_TO_VARSTR(vbuf, buf);
134,583✔
1384

1385
    code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
134,583✔
1386
    TSDB_CHECK_CODE(code, lino, _end);
134,540!
1387
  }
1388

1389
  // output throughput
1390
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,367✔
1391
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
224,092!
1392

1393
  if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
224,320✔
1394
    colDataSetNULL(pColInfo, numOfRows);
89,818!
1395
  } else {
1396
    if (pe->outputThroughput < 1024) {
134,502✔
1397
      snprintf(buf, tListLen(buf), formatKb, pe->outputThroughput);
134,459✔
1398
    } else {
1399
      snprintf(buf, tListLen(buf), formatMb, pe->outputThroughput / 1024);
43✔
1400
    }
1401

1402
    memset(vbuf, 0, tListLen(vbuf));
134,502✔
1403
    STR_TO_VARSTR(vbuf, buf);
134,502✔
1404

1405
    code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
134,502✔
1406
    TSDB_CHECK_CODE(code, lino, _end);
134,542!
1407
  }
1408
  // info
1409
  if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
224,360✔
1410
    const char *sinkStr = "%.2f MiB";
89,799✔
1411
    snprintf(buf, tListLen(buf), sinkStr, pe->sinkDataSize);
89,799✔
1412
  } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {  // offset info
134,561✔
1413
    if (pTask->info.trigger == STREAM_TRIGGER_FORCE_WINDOW_CLOSE) {
114,935✔
1414
      int32_t ret = taosFormatUtcTime(buf, tListLen(buf), pe->processedVer, precision);
5,413✔
1415
      if (ret != 0) {
5,413!
1416
        mError("failed to format processed timewindow, skey:%" PRId64, pe->processedVer);
×
1417
        memset(buf, 0, tListLen(buf));
×
1418
      }
1419
    } else {
1420
      const char *offsetStr = "%" PRId64 " [%" PRId64 ", %" PRId64 "]";
109,522✔
1421
      snprintf(buf, tListLen(buf), offsetStr, pe->processedVer, pe->verRange.minVer, pe->verRange.maxVer);
109,522✔
1422
    }
1423
  } else {
1424
    memset(buf, 0, tListLen(buf));
19,626✔
1425
  }
1426

1427
  STR_TO_VARSTR(vbuf, buf);
224,360✔
1428

1429
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,360✔
1430
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
224,278!
1431

1432
  code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
224,278✔
1433
  TSDB_CHECK_CODE(code, lino, _end);
224,095!
1434

1435
  // start_time
1436
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
224,095✔
1437
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,986!
1438

1439
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->startTime, false);
223,986✔
1440
  TSDB_CHECK_CODE(code, lino, _end);
223,839!
1441

1442
  // start id
1443
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,839✔
1444
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,781!
1445

1446
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->startCheckpointId, false);
223,781✔
1447
  TSDB_CHECK_CODE(code, lino, _end);
223,643!
1448

1449
  // start ver
1450
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,643✔
1451
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,576!
1452

1453
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->startCheckpointVer, false);
223,576✔
1454
  TSDB_CHECK_CODE(code, lino, _end);
223,555!
1455

1456
  // checkpoint time
1457
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,555✔
1458
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,505!
1459

1460
  if (pe->checkpointInfo.latestTime != 0) {
223,528✔
1461
    code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->checkpointInfo.latestTime, false);
181,277✔
1462
  } else {
1463
    code = colDataSetVal(pColInfo, numOfRows, 0, true);
42,251✔
1464
  }
1465
  TSDB_CHECK_CODE(code, lino, _end);
223,560!
1466

1467
  // checkpoint_id
1468
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,560✔
1469
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,558!
1470

1471
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->checkpointInfo.latestId, false);
223,558✔
1472
  TSDB_CHECK_CODE(code, lino, _end);
223,627!
1473

1474
  // checkpoint version
1475
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,627✔
1476
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,558!
1477

1478
  code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->checkpointInfo.latestVer, false);
223,558✔
1479
  TSDB_CHECK_CODE(code, lino, _end);
223,542!
1480

1481
  // checkpoint size
1482
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,542✔
1483
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,487✔
1484

1485
  colDataSetNULL(pColInfo, numOfRows);
223,449!
1486

1487
  // checkpoint backup status
1488
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,449✔
1489
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,372!
1490

1491
  code = colDataSetVal(pColInfo, numOfRows, 0, true);
223,372✔
1492
  TSDB_CHECK_CODE(code, lino, _end);
223,584!
1493

1494
  // ds_err_info
1495
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,584✔
1496
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,502!
1497

1498
  code = colDataSetVal(pColInfo, numOfRows, 0, true);
223,502✔
1499
  TSDB_CHECK_CODE(code, lino, _end);
223,644!
1500

1501
  // history_task_id
1502
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,644✔
1503
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,521✔
1504

1505
  if (pe->hTaskId != 0) {
223,513✔
1506
    int64ToHexStr(pe->hTaskId, idstr, tListLen(idstr));
382✔
1507
    code = colDataSetVal(pColInfo, numOfRows, idstr, false);
382✔
1508
  } else {
1509
    code = colDataSetVal(pColInfo, numOfRows, 0, true);
223,131✔
1510
  }
1511
  TSDB_CHECK_CODE(code, lino, _end);
223,593!
1512

1513
  // history_task_status
1514
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,593✔
1515
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
223,547!
1516

1517
  code = colDataSetVal(pColInfo, numOfRows, 0, true);
223,547✔
1518
  TSDB_CHECK_CODE(code, lino, _end);
223,717!
1519

1520
  // notify_event_stat
1521
  int32_t offset =0;
223,717✔
1522
  if (pe->notifyEventStat.notifyEventAddTimes > 0) {
223,717!
1523
    offset += tsnprintf(buf + offset, sizeof(buf) - offset, "Add %" PRId64 "x, %" PRId64 " elems in %lfs; ",
×
1524
                        pe->notifyEventStat.notifyEventAddTimes, pe->notifyEventStat.notifyEventAddElems,
1525
                        pe->notifyEventStat.notifyEventAddCostSec);
1526
  }
1527
  if (pe->notifyEventStat.notifyEventPushTimes > 0) {
223,717!
1528
    offset += tsnprintf(buf + offset, sizeof(buf) - offset, "Push %" PRId64 "x, %" PRId64 " elems in %lfs; ",
×
1529
                        pe->notifyEventStat.notifyEventPushTimes, pe->notifyEventStat.notifyEventPushElems,
1530
                        pe->notifyEventStat.notifyEventPushCostSec);
1531
  }
1532
  if (pe->notifyEventStat.notifyEventPackTimes > 0) {
223,717!
1533
    offset += tsnprintf(buf + offset, sizeof(buf) - offset, "Pack %" PRId64 "x, %" PRId64 " elems in %lfs; ",
×
1534
                        pe->notifyEventStat.notifyEventPackTimes, pe->notifyEventStat.notifyEventPackElems,
1535
                        pe->notifyEventStat.notifyEventPackCostSec);
1536
  }
1537
  if (pe->notifyEventStat.notifyEventSendTimes > 0) {
223,717!
1538
    offset += tsnprintf(buf + offset, sizeof(buf) - offset, "Send %" PRId64 "x, %" PRId64 " elems in %lfs; ",
×
1539
                        pe->notifyEventStat.notifyEventSendTimes, pe->notifyEventStat.notifyEventSendElems,
1540
                        pe->notifyEventStat.notifyEventSendCostSec);
1541
  }
1542
  if (pe->notifyEventStat.notifyEventHoldElems > 0) {
223,717!
1543
    offset += tsnprintf(buf + offset, sizeof(buf) - offset, "[Hold %" PRId64 " elems] ",
×
1544
                        pe->notifyEventStat.notifyEventHoldElems);
1545
  }
1546
  TSDB_CHECK_CONDITION(offset < sizeof(buf), code, lino, _end, TSDB_CODE_INTERNAL_ERROR);
223,717!
1547
  buf[offset] = '\0';
223,717✔
1548

1549
  STR_TO_VARSTR(vbuf, buf);
223,717✔
1550

1551
  pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
223,717✔
1552
  TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno);
224,406✔
1553

1554
  if (offset == 0) {
224,390!
1555
    colDataSetNULL(pColInfo, numOfRows);
224,390!
1556
  } else {
1557
    code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
×
1558
    TSDB_CHECK_CODE(code, lino, _end);
×
1559
  }
1560

1561
_end:
×
1562
  if (code) {
224,390!
1563
    mError("error happens during build task attr result blocks, lino:%d, code:%s", lino, tstrerror(code));
×
1564
  }
1565
  return code;
224,360✔
1566
}
1567

1568
static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent) {
12,487✔
1569
  const SEp *pEp = GET_ACTIVE_EP(pPrevEpset);
12,487✔
1570
  const SEp *p = GET_ACTIVE_EP(pCurrent);
12,487✔
1571

1572
  if (pEp->port == p->port && strncmp(pEp->fqdn, p->fqdn, TSDB_FQDN_LEN) == 0) {
12,487!
1573
    return false;
12,487✔
1574
  }
1575
  return true;
×
1576
}
1577

1578
void mndDestroyVgroupChangeInfo(SVgroupChangeInfo *pInfo) {
2,663✔
1579
  if (pInfo != NULL) {
2,663!
1580
    taosArrayDestroy(pInfo->pUpdateNodeList);
2,663✔
1581
    taosHashCleanup(pInfo->pDBMap);
2,663✔
1582
  }
1583
}
2,663✔
1584

1585
// 1. increase the replica does not affect the stream process.
1586
// 2. decreasing the replica may affect the stream task execution in the way that there is one or more running stream
1587
// tasks on the will be removed replica.
1588
// 3. vgroup redistribution is an combination operation of first increase replica and then decrease replica. So we
1589
// will handle it as mentioned in 1 & 2 items.
1590
int32_t mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList,
2,663✔
1591
                               SVgroupChangeInfo *pInfo) {
1592
  int32_t code = 0;
2,663✔
1593
  int32_t lino = 0;
2,663✔
1594

1595
  if (pInfo == NULL) {
2,663!
1596
    return TSDB_CODE_INVALID_PARA;
×
1597
  }
1598

1599
  pInfo->pUpdateNodeList = taosArrayInit(4, sizeof(SNodeUpdateInfo));
2,663✔
1600
  pInfo->pDBMap = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_NO_LOCK);
2,663✔
1601

1602
  if (pInfo->pUpdateNodeList == NULL || pInfo->pDBMap == NULL) {
2,663!
1603
    mndDestroyVgroupChangeInfo(pInfo);
×
1604
    TSDB_CHECK_NULL(NULL, code, lino, _err, terrno);
×
1605
  }
1606

1607
  int32_t numOfNodes = taosArrayGetSize(pPrevNodeList);
2,663✔
1608
  for (int32_t i = 0; i < numOfNodes; ++i) {
15,925✔
1609
    SNodeEntry *pPrevEntry = taosArrayGet(pPrevNodeList, i);
13,262✔
1610
    if (pPrevEntry == NULL) {
13,262!
1611
      continue;
×
1612
    }
1613

1614
    int32_t num = taosArrayGetSize(pNodeList);
13,262✔
1615
    for (int32_t j = 0; j < num; ++j) {
166,421✔
1616
      SNodeEntry *pCurrent = taosArrayGet(pNodeList, j);
165,656✔
1617
      if (pCurrent == NULL) {
165,656!
1618
        continue;
×
1619
      }
1620

1621
      if (pCurrent->nodeId == pPrevEntry->nodeId) {
165,656✔
1622
        if (pPrevEntry->stageUpdated || isNodeEpsetChanged(&pPrevEntry->epset, &pCurrent->epset)) {
12,497!
1623
          const SEp *pPrevEp = GET_ACTIVE_EP(&pPrevEntry->epset);
10✔
1624

1625
          char buf[256] = {0};
10✔
1626
          code = epsetToStr(&pCurrent->epset, buf, tListLen(buf));  // ignore this error
10✔
1627
          if (code) {
10!
1628
            mError("failed to convert epset string, code:%s", tstrerror(code));
×
1629
            TSDB_CHECK_CODE(code, lino, _err);
×
1630
          }
1631

1632
          mDebug("nodeId:%d restart/epset changed detected, old:%s:%d -> new:%s, stageUpdate:%d", pCurrent->nodeId,
10✔
1633
                 pPrevEp->fqdn, pPrevEp->port, buf, pPrevEntry->stageUpdated);
1634

1635
          SNodeUpdateInfo updateInfo = {.nodeId = pPrevEntry->nodeId};
10✔
1636
          epsetAssign(&updateInfo.prevEp, &pPrevEntry->epset);
10✔
1637
          epsetAssign(&updateInfo.newEp, &pCurrent->epset);
10✔
1638

1639
          void *p = taosArrayPush(pInfo->pUpdateNodeList, &updateInfo);
10✔
1640
          TSDB_CHECK_NULL(p, code, lino, _err, terrno);
10!
1641
        }
1642

1643
        // todo handle the snode info
1644
        if (pCurrent->nodeId != SNODE_HANDLE) {
12,497✔
1645
          SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId);
10,989✔
1646
          code = taosHashPut(pInfo->pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0);
10,989✔
1647
          mndReleaseVgroup(pMnode, pVgroup);
10,989✔
1648
          TSDB_CHECK_CODE(code, lino, _err);
10,989!
1649
        }
1650

1651
        break;
12,497✔
1652
      }
1653
    }
1654
  }
1655

1656
  return code;
2,663✔
1657

1658
_err:
×
1659
  mError("failed to find node change info, code:%s at %s line:%d", tstrerror(code), __func__, lino);
×
1660
  mndDestroyVgroupChangeInfo(pInfo);
×
1661
  return code;
×
1662
}
1663

1664
static int32_t doCheckForUpdated(SMnode *pMnode, SArray **ppNodeSnapshot) {
2,005✔
1665
  bool              allReady = false;
2,005✔
1666
  bool              nodeUpdated = false;
2,005✔
1667
  SVgroupChangeInfo changeInfo = {0};
2,005✔
1668

1669
  int32_t numOfNodes = extractStreamNodeList(pMnode);
2,005✔
1670

1671
  if (numOfNodes == 0) {
2,005✔
1672
    mDebug("stream task node change checking done, no vgroups exist, do nothing");
710✔
1673
    execInfo.ts = taosGetTimestampSec();
710✔
1674
    return false;
710✔
1675
  }
1676

1677
  for (int32_t i = 0; i < numOfNodes; ++i) {
7,670✔
1678
    SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, i);
6,384✔
1679
    if (pNodeEntry == NULL) {
6,384!
1680
      continue;
×
1681
    }
1682

1683
    if (pNodeEntry->stageUpdated) {
6,384✔
1684
      mDebug("stream task not ready due to node update detected, checkpoint not issued");
9✔
1685
      return true;
9✔
1686
    }
1687
  }
1688

1689
  int32_t code = mndTakeVgroupSnapshot(pMnode, &allReady, ppNodeSnapshot);
1,286✔
1690
  if (code) {
1,286!
1691
    mError("failed to get the vgroup snapshot, ignore it and continue");
×
1692
  }
1693

1694
  if (!allReady) {
1,286✔
1695
    mWarn("not all vnodes ready, quit from vnodes status check");
17!
1696
    return true;
17✔
1697
  }
1698

1699
  code = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, *ppNodeSnapshot, &changeInfo);
1,269✔
1700
  if (code) {
1,269!
1701
    nodeUpdated = false;
×
1702
  } else {
1703
    nodeUpdated = (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0);
1,269✔
1704
    if (nodeUpdated) {
1,269!
1705
      mDebug("stream tasks not ready due to node update");
×
1706
    }
1707
  }
1708

1709
  mndDestroyVgroupChangeInfo(&changeInfo);
1,269✔
1710
  return nodeUpdated;
1,269✔
1711
}
1712

1713
// check if the node update happens or not
1714
bool mndStreamNodeIsUpdated(SMnode *pMnode) {
2,005✔
1715
  SArray *pNodeSnapshot = NULL;
2,005✔
1716

1717
  streamMutexLock(&execInfo.lock);
2,005✔
1718
  bool updated = doCheckForUpdated(pMnode, &pNodeSnapshot);
2,005✔
1719
  streamMutexUnlock(&execInfo.lock);
2,005✔
1720

1721
  taosArrayDestroy(pNodeSnapshot);
2,005✔
1722
  return updated;
2,005✔
1723
}
1724

1725
int32_t mndCheckForSnode(SMnode *pMnode, SDbObj *pSrcDb) {
1,755✔
1726
  SSdb      *pSdb = pMnode->pSdb;
1,755✔
1727
  void      *pIter = NULL;
1,755✔
1728
  SSnodeObj *pObj = NULL;
1,755✔
1729

1730
  if (pSrcDb->cfg.replications == 1) {
1,755✔
1731
    return TSDB_CODE_SUCCESS;
1,752✔
1732
  } else {
1733
    while (1) {
1734
      pIter = sdbFetch(pSdb, SDB_SNODE, pIter, (void **)&pObj);
3✔
1735
      if (pIter == NULL) {
3✔
1736
        break;
2✔
1737
      }
1738

1739
      sdbRelease(pSdb, pObj);
1✔
1740
      sdbCancelFetch(pSdb, pIter);
1✔
1741
      return TSDB_CODE_SUCCESS;
1✔
1742
    }
1743

1744
    mError("snode not existed when trying to create stream in db with multiple replica");
2!
1745
    return TSDB_CODE_SNODE_NOT_DEPLOYED;
2✔
1746
  }
1747
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc