• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3529

14 Nov 2024 01:56PM UTC coverage: 60.888% (-0.02%) from 60.905%
#3529

push

travis-ci

web-flow
Merge pull request #28764 from taosdata/docs/TS-4937

doc(arch/last): new section for last/last_row cache

119990 of 252020 branches covered (47.61%)

Branch coverage included in aggregate %.

200800 of 274829 relevant lines covered (73.06%)

15624555.39 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.21
/source/libs/stream/src/streamTask.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#include "executor.h"
17
#include "osDir.h"
18
#include "osMemory.h"
19
#include "streamInt.h"
20
#include "streamsm.h"
21
#include "tmisce.h"
22
#include "tstream.h"
23
#include "ttimer.h"
24
#include "wal.h"
25

26
static void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo);
27
static int32_t streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated);
28
static int32_t streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdate);
29
static void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo);
30

31
static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) {
13,429✔
32
  int32_t childId = taosArrayGetSize(pArray);
13,429✔
33
  pTask->info.selfChildId = childId;
13,429✔
34
  void* p = taosArrayPush(pArray, &pTask);
13,429✔
35
  return (p == NULL) ? terrno : TSDB_CODE_SUCCESS;
13,429!
36
}
37

38
static int32_t doUpdateTaskEpset(SStreamTask* pTask, int32_t nodeId, SEpSet* pEpSet, bool* pUpdated) {
262✔
39
  int32_t code = 0;
262✔
40
  char    buf[512] = {0};
262✔
41

42
  if (pTask->info.nodeId == nodeId) {  // execution task should be moved away
262✔
43
    bool isEqual = isEpsetEqual(&pTask->info.epSet, pEpSet);
84✔
44
    code = epsetToStr(pEpSet, buf, tListLen(buf));
84✔
45
    if (code) { // print error and continue
84!
46
      stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
47
      return code;
×
48
    }
49

50
    if (!isEqual) {
84✔
51
      (*pUpdated) = true;
48✔
52
      char tmp[512] = {0};
48✔
53
      code = epsetToStr(&pTask->info.epSet, tmp, tListLen(tmp));  // only for log file, ignore errors
48✔
54
      if (code) { // print error and continue
48!
55
        stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
56
        return code;
×
57
      }
58

59
      epsetAssign(&pTask->info.epSet, pEpSet);
48✔
60
      stDebug("s-task:0x%x (vgId:%d) self node epset is updated %s, old:%s", pTask->id.taskId, nodeId, buf, tmp);
48!
61
    } else {
62
      stDebug("s-task:0x%x (vgId:%d) not updated task epset, since epset identical, %s", pTask->id.taskId, nodeId, buf);
36!
63
    }
64
  }
65

66
  // check for the dispatch info and the upstream task info
67
  int32_t level = pTask->info.taskLevel;
262✔
68
  if (level == TASK_LEVEL__SOURCE) {
262✔
69
    code = streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet, pUpdated);
130✔
70
  } else if (level == TASK_LEVEL__AGG) {
132✔
71
    code = streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet, pUpdated);
2✔
72
    code = streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet, pUpdated);
2✔
73
  } else {  // TASK_LEVEL__SINK
74
    code = streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet, pUpdated);
130✔
75
  }
76

77
  return code;
262✔
78
}
79

80
static void freeItem(void* p) {
×
81
  SStreamContinueExecInfo* pInfo = p;
×
82
  rpcFreeCont(pInfo->msg.pCont);
×
83
}
×
84

85
static void freeUpstreamItem(void* p) {
82,105✔
86
  SStreamUpstreamEpInfo** pInfo = p;
82,105✔
87
  taosMemoryFree(*pInfo);
82,105✔
88
}
82,108✔
89

90
static SStreamUpstreamEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) {
19,297✔
91
  SStreamUpstreamEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamUpstreamEpInfo));
19,297✔
92
  if (pEpInfo == NULL) {
19,297!
93
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
94
    return NULL;
×
95
  }
96

97
  pEpInfo->childId = pTask->info.selfChildId;
19,297✔
98
  pEpInfo->epSet = pTask->info.epSet;
19,297✔
99
  pEpInfo->nodeId = pTask->info.nodeId;
19,297✔
100
  pEpInfo->taskId = pTask->id.taskId;
19,297✔
101
  pEpInfo->stage = -1;
19,297✔
102

103
  return pEpInfo;
19,297✔
104
}
105

106
int32_t tNewStreamTask(int64_t streamId, int8_t taskLevel, SEpSet* pEpset, bool fillHistory, int32_t trigger,
13,429✔
107
                       int64_t triggerParam, SArray* pTaskList, bool hasFillhistory, int8_t subtableWithoutMd5,
108
                       SStreamTask** p) {
109
  *p = NULL;
13,429✔
110

111
  SStreamTask* pTask = (SStreamTask*)taosMemoryCalloc(1, sizeof(SStreamTask));
13,429✔
112
  if (pTask == NULL) {
13,429!
113
    stError("s-task:0x%" PRIx64 " failed malloc new stream task, size:%d, code:%s", streamId,
×
114
            (int32_t)sizeof(SStreamTask), tstrerror(terrno));
115
    return terrno;
×
116
  }
117

118
  pTask->ver = SSTREAM_TASK_VER;
13,429✔
119
  pTask->id.taskId = tGenIdPI32();
13,429✔
120
  pTask->id.streamId = streamId;
13,429✔
121

122
  pTask->info.taskLevel = taskLevel;
13,429✔
123
  pTask->info.fillHistory = fillHistory;
13,429✔
124
  pTask->info.trigger = trigger;
13,429✔
125
  pTask->info.delaySchedParam = triggerParam;
13,429✔
126
  pTask->subtableWithoutMd5 = subtableWithoutMd5;
13,429✔
127

128
  int32_t code = streamCreateStateMachine(pTask);
13,429✔
129
  if (pTask->status.pSM == NULL || code != TSDB_CODE_SUCCESS) {
13,429!
130
    taosMemoryFreeClear(pTask);
×
131
    return code;
×
132
  }
133

134
  char buf[128] = {0};
13,429✔
135
  sprintf(buf, "0x%" PRIx64 "-0x%x", pTask->id.streamId, pTask->id.taskId);
13,429✔
136

137
  pTask->id.idStr = taosStrdup(buf);
13,429✔
138
  if (pTask->id.idStr == NULL) {
13,429!
139
    stError("s-task:0x%x failed to build task id, code: out of memory", pTask->id.taskId);
×
140
    return terrno;
×
141
  }
142

143
  pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
13,429✔
144
  pTask->status.taskStatus = fillHistory ? TASK_STATUS__SCAN_HISTORY : TASK_STATUS__READY;
13,429✔
145
  pTask->inputq.status = TASK_INPUT_STATUS__NORMAL;
13,429✔
146
  pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL;
13,429✔
147

148
  pTask->taskCheckInfo.pList = taosArrayInit(4, sizeof(SDownstreamStatusInfo));
13,429✔
149
  code = taosThreadMutexInit(&pTask->taskCheckInfo.checkInfoLock, NULL);
13,429✔
150
  if (code) {
13,429!
151
    return code;
×
152
  }
153

154
  if (fillHistory && !hasFillhistory) {
13,429!
155
    stError("s-task:0x%x create task failed, due to inconsistent fill-history flag", pTask->id.taskId);
×
156
    return TSDB_CODE_INVALID_PARA;
×
157
  }
158

159
  epsetAssign(&(pTask->info.mnodeEpset), pEpset);
13,429✔
160

161
  code = addToTaskset(pTaskList, pTask);
13,429✔
162
  *p = pTask;
13,429✔
163

164
  return code;
13,429✔
165
}
166

167
int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo) {
631✔
168
  int64_t skip64;
169
  int8_t  skip8;
170
  int32_t skip32;
171
  int16_t skip16;
172
  SEpSet  epSet;
173

174
  if (tStartDecode(pDecoder) < 0) return -1;
631!
175
  if (tDecodeI64(pDecoder, &pChkpInfo->msgVer) < 0) return -1;
1,262!
176
  // if (ver <= SSTREAM_TASK_INCOMPATIBLE_VER) return -1;
177

178
  if (tDecodeI64(pDecoder, &skip64) < 0) return -1;
631!
179
  if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
631!
180
  if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
631!
181
  if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
631!
182
  if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
631!
183
  if (tDecodeI16(pDecoder, &skip16) < 0) return -1;
631!
184

185
  if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
631!
186
  if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
631!
187

188
  if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
631!
189
  if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
631!
190
  if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1;
631!
191
  if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1;
631!
192

193
  if (tDecodeI64(pDecoder, &pChkpInfo->checkpointId) < 0) return -1;
1,262!
194
  if (tDecodeI64(pDecoder, &pChkpInfo->checkpointVer) < 0) return -1;
1,262!
195

196
  tEndDecode(pDecoder);
631✔
197
  return 0;
631✔
198
}
199

200
int32_t tDecodeStreamTaskId(SDecoder* pDecoder, STaskId* pTaskId) {
44✔
201
  int64_t ver;
202
  if (tStartDecode(pDecoder) < 0) return -1;
44!
203
  if (tDecodeI64(pDecoder, &ver) < 0) return -1;
44!
204
  if (ver <= SSTREAM_TASK_INCOMPATIBLE_VER) return -1;
44!
205

206
  if (tDecodeI64(pDecoder, &pTaskId->streamId) < 0) return -1;
88!
207

208
  int32_t taskId = 0;
44✔
209
  if (tDecodeI32(pDecoder, &taskId) < 0) return -1;
44!
210

211
  pTaskId->taskId = taskId;
44✔
212
  tEndDecode(pDecoder);
44✔
213
  return 0;
44✔
214
}
215

216
void tFreeStreamTask(void* pParam) {
58,767✔
217
  char*        p = NULL;
58,767✔
218
  SStreamTask* pTask = pParam;
58,767✔
219
  int32_t      taskId = pTask->id.taskId;
58,767✔
220

221
  STaskExecStatisInfo* pStatis = &pTask->execInfo;
58,767✔
222

223
  ETaskStatus status1 = TASK_STATUS__UNINIT;
58,767✔
224
  streamMutexLock(&pTask->lock);
58,767✔
225
  if (pTask->status.pSM != NULL) {
58,797✔
226
    SStreamTaskState status = streamTaskGetStatus(pTask);
27,343✔
227
    p = status.name;
27,328✔
228
    status1 = status.state;
27,328✔
229
  }
230
  streamMutexUnlock(&pTask->lock);
58,782✔
231

232
  stDebug("start to free s-task:0x%x %p, state:%s, refId:%" PRId64, taskId, pTask, p, pTask->id.refId);
58,785✔
233

234
  SCheckpointInfo* pCkInfo = &pTask->chkInfo;
58,785✔
235
  stDebug("s-task:0x%x task exec summary: create:%" PRId64 ", init:%" PRId64 ", start:%" PRId64
58,785✔
236
          ", updateCount:%d latestUpdate:%" PRId64 ", latestCheckPoint:%" PRId64 ", ver:%" PRId64
237
          " nextProcessVer:%" PRId64 ", checkpointCount:%d",
238
          taskId, pStatis->created, pStatis->checkTs, pStatis->readyTs, pStatis->updateCount, pStatis->latestUpdateTs,
239
          pCkInfo->checkpointId, pCkInfo->checkpointVer, pCkInfo->nextProcessVer, pStatis->checkpoint);
240

241
  if (pTask->schedInfo.pDelayTimer != NULL) {
58,785✔
242
    streamTmrStop(pTask->schedInfo.pDelayTimer);
1,177✔
243
    pTask->schedInfo.pDelayTimer = NULL;
1,177✔
244
  }
245

246
  if (pTask->hTaskInfo.pTimer != NULL) {
58,785✔
247
    streamTmrStop(pTask->hTaskInfo.pTimer);
1,830✔
248
    pTask->hTaskInfo.pTimer = NULL;
1,830✔
249
  }
250

251
  if (pTask->msgInfo.pRetryTmr != NULL) {
58,785✔
252
    streamTmrStop(pTask->msgInfo.pRetryTmr);
5,589✔
253
    pTask->msgInfo.pRetryTmr = NULL;
5,590✔
254
  }
255

256
  if (pTask->inputq.queue) {
58,786✔
257
    streamQueueClose(pTask->inputq.queue, pTask->id.taskId);
13,877✔
258
    pTask->inputq.queue = NULL;
13,877✔
259
  }
260

261
  if (pTask->outputq.queue) {
58,786✔
262
    streamQueueClose(pTask->outputq.queue, pTask->id.taskId);
13,873✔
263
    pTask->outputq.queue = NULL;
13,882✔
264
  }
265

266
  if (pTask->exec.qmsg) {
58,795✔
267
    taosMemoryFree(pTask->exec.qmsg);
30,898✔
268
  }
269

270
  if (pTask->exec.pExecutor) {
58,795✔
271
    qDestroyTask(pTask->exec.pExecutor);
6,996✔
272
    pTask->exec.pExecutor = NULL;
6,996✔
273
  }
274

275
  if (pTask->exec.pWalReader != NULL) {
58,795✔
276
    walCloseReader(pTask->exec.pWalReader);
6,944✔
277
    pTask->exec.pWalReader = NULL;
6,944✔
278
  }
279

280
  streamClearChkptReadyMsg(pTask->chkInfo.pActiveInfo);
58,795✔
281

282
  if (pTask->msgInfo.pData != NULL) {
58,790✔
283
    clearBufferedDispatchMsg(pTask);
29✔
284
  }
285

286
  if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) {
58,791✔
287
    tDeleteSchemaWrapper(pTask->outputInfo.tbSink.pSchemaWrapper);
28,664!
288
    taosMemoryFree(pTask->outputInfo.tbSink.pTSchema);
28,670✔
289
    tSimpleHashCleanup(pTask->outputInfo.tbSink.pTbInfo);
28,667✔
290
    tDeleteSchemaWrapper(pTask->outputInfo.tbSink.pTagSchema);
28,668✔
291
  } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
30,127✔
292
    taosArrayDestroy(pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos);
25,727✔
293
  }
294

295
  streamTaskCleanupCheckInfo(&pTask->taskCheckInfo);
58,796✔
296
  streamFreeTaskState(pTask, pTask->status.removeBackendFiles ? 1 : 0);
58,794✔
297

298
  if (pTask->pNameMap) {
58,789✔
299
    tSimpleHashCleanup(pTask->pNameMap);
2,238✔
300
  }
301

302
  streamDestroyStateMachine(pTask->status.pSM);
58,789✔
303
  pTask->status.pSM = NULL;
58,798✔
304

305
  streamTaskDestroyUpstreamInfo(&pTask->upstreamInfo);
58,798✔
306

307
  taosMemoryFree(pTask->outputInfo.pTokenBucket);
58,792✔
308
  streamMutexDestroy(&pTask->lock);
58,797✔
309

310
  taosArrayDestroy(pTask->msgInfo.pSendInfo);
58,792✔
311
  pTask->msgInfo.pSendInfo = NULL;
58,798✔
312
  streamMutexDestroy(&pTask->msgInfo.lock);
58,798✔
313

314
  taosArrayDestroy(pTask->outputInfo.pNodeEpsetUpdateList);
58,798✔
315
  pTask->outputInfo.pNodeEpsetUpdateList = NULL;
58,798✔
316

317
  if (pTask->id.idStr != NULL) {
58,798✔
318
    taosMemoryFree((void*)pTask->id.idStr);
27,309✔
319
  }
320

321
  streamTaskDestroyActiveChkptInfo(pTask->chkInfo.pActiveInfo);
58,800✔
322
  pTask->chkInfo.pActiveInfo = NULL;
58,792✔
323

324
  taosMemoryFree(pTask);
58,792✔
325
  stDebug("s-task:0x%x free task completed", taskId);
58,797✔
326
}
58,797✔
327

328
void streamFreeTaskState(SStreamTask* pTask, int8_t remove) {
58,792✔
329
  stDebug("s-task:0x%x start to free task state/backend", pTask->id.taskId);
58,792✔
330
  if (pTask->pState != NULL) {
58,793✔
331
    stDebug("s-task:0x%x start to free task state", pTask->id.taskId);
6,995✔
332
    streamStateClose(pTask->pState, remove);
6,995✔
333

334
    if (remove) taskDbSetClearFileFlag(pTask->pBackend);
6,996✔
335
    taskDbRemoveRef(pTask->pBackend);
6,996✔
336
    pTask->pBackend = NULL;
6,996✔
337
    pTask->pState = NULL;
6,996✔
338
  } else {
339
    stDebug("s-task:0x%x task state is NULL, may del backend:%s", pTask->id.taskId,
51,798✔
340
            pTask->backendPath ? pTask->backendPath : "NULL");
341
    if (remove) {
51,798✔
342
      if (pTask->backendPath != NULL) {
3,416!
343
        stDebug("s-task:0x%x task state is NULL, do del backend:%s", pTask->id.taskId, pTask->backendPath);
3,418✔
344
        taosRemoveDir(pTask->backendPath);
3,418✔
345
      }
346
    }
347
  }
348

349
  if (pTask->backendPath != NULL) {
58,790✔
350
    taosMemoryFree(pTask->backendPath);
13,878✔
351
    pTask->backendPath = NULL;
13,880✔
352
  }
353
}
58,792✔
354

355
static void setInitialVersionInfo(SStreamTask* pTask, int64_t ver) {
14,102✔
356
  SCheckpointInfo* pChkInfo = &pTask->chkInfo;
14,102✔
357
  SDataRange*      pRange = &pTask->dataRange;
14,102✔
358

359
  // only set the version info for stream tasks without fill-history task
360
  if ((pTask->info.fillHistory == 0) && (!HAS_RELATED_FILLHISTORY_TASK(pTask))) {
14,102✔
361
    pChkInfo->checkpointVer = ver - 1;  // only update when generating checkpoint
4,190✔
362
    pChkInfo->processedVer = ver - 1;   // already processed version
4,190✔
363
    pChkInfo->nextProcessVer = ver;     // next processed version
4,190✔
364

365
    pRange->range.maxVer = ver;
4,190✔
366
    pRange->range.minVer = ver;
4,190✔
367
  } else {
368
    // the initial value of processedVer/nextProcessVer/checkpointVer for stream task with related fill-history task
369
    // is set at the mnode.
370
    if (pTask->info.fillHistory == 1) {
9,912✔
371
      pChkInfo->checkpointVer = pRange->range.maxVer;
5,016✔
372
      pChkInfo->processedVer = pRange->range.maxVer;
5,016✔
373
      pChkInfo->nextProcessVer = pRange->range.maxVer + 1;
5,016✔
374
    } else {
375
      pChkInfo->checkpointVer = pRange->range.minVer - 1;
4,896✔
376
      pChkInfo->processedVer = pRange->range.minVer - 1;
4,896✔
377
      pChkInfo->nextProcessVer = pRange->range.minVer;
4,896✔
378

379
      {  // for compatible purpose, remove it later
380
        if (pRange->range.minVer == 0) {
4,896✔
381
          pChkInfo->checkpointVer = 0;
2,473✔
382
          pChkInfo->processedVer = 0;
2,473✔
383
          pChkInfo->nextProcessVer = 1;
2,473✔
384
          stDebug("s-task:%s update the processedVer to 0 from -1 due to compatible purpose", pTask->id.idStr);
2,473✔
385
        }
386
      }
387
    }
388
  }
389
}
14,102✔
390

391
int32_t streamTaskSetBackendPath(SStreamTask* pTask) {
14,107✔
392
  int64_t streamId = 0;
14,107✔
393
  int32_t taskId = 0;
14,107✔
394

395
  if (pTask->info.fillHistory) {
14,107✔
396
    streamId = pTask->streamTaskId.streamId;
5,016✔
397
    taskId = pTask->streamTaskId.taskId;
5,016✔
398
  } else {
399
    streamId = pTask->id.streamId;
9,091✔
400
    taskId = pTask->id.taskId;
9,091✔
401
  }
402

403
  char    id[128] = {0};
14,107✔
404
  int32_t nBytes = sprintf(id, "0x%" PRIx64 "-0x%x", streamId, taskId);
14,107✔
405
  if (nBytes < 0 || nBytes >= sizeof(id)) {
14,107!
406
    return TSDB_CODE_OUT_OF_BUFFER;
×
407
  }
408

409
  int32_t len = strlen(pTask->pMeta->path);
14,114✔
410
  pTask->backendPath = (char*)taosMemoryMalloc(len + nBytes + 2);
14,114✔
411
  if (pTask->backendPath == NULL) {
14,114!
412
    return terrno;
×
413
  }
414

415
  (void)sprintf(pTask->backendPath, "%s%s%s", pTask->pMeta->path, TD_DIRSEP, id);
14,114✔
416
  stDebug("s-task:%s set backend path:%s", pTask->id.idStr, pTask->backendPath);
14,114✔
417

418
  return 0;
14,112✔
419
}
420

421
int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver) {
14,104✔
422
  int32_t code = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId, &pTask->id.idStr);
14,104✔
423
  if (code) {
14,098!
424
    stError("0x%x failed create stream task id str, code:%s", pTask->id.taskId, tstrerror(code));
×
425
    return code;
×
426
  }
427

428
  pTask->id.refId = 0;
14,098✔
429
  pTask->inputq.status = TASK_INPUT_STATUS__NORMAL;
14,098✔
430
  pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL;
14,098✔
431

432
  int32_t code1 = streamQueueOpen(512 << 10, &pTask->inputq.queue);
14,098✔
433
  int32_t code2 = streamQueueOpen(512 << 10, &pTask->outputq.queue);
14,105✔
434
  if (code1 || code2) {
14,112!
435
    stError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr);
×
436
    return TSDB_CODE_OUT_OF_MEMORY;
×
437
  }
438

439
  pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
14,113✔
440

441
  code = streamCreateStateMachine(pTask);
14,113✔
442
  if (pTask->status.pSM == NULL || code != TSDB_CODE_SUCCESS) {
14,101!
443
    stError("s-task:%s failed create state-machine for stream task, initialization failed, code:%s", pTask->id.idStr,
×
444
            tstrerror(code));
445
    return code;
×
446
  }
447

448
  pTask->execInfo.created = taosGetTimestampMs();
14,108✔
449
  setInitialVersionInfo(pTask, ver);
14,108✔
450

451
  pTask->pMeta = pMeta;
14,111✔
452
  pTask->pMsgCb = pMsgCb;
14,111✔
453
  pTask->msgInfo.pSendInfo = taosArrayInit(4, sizeof(SDispatchEntry));
14,111✔
454
  if (pTask->msgInfo.pSendInfo == NULL) {
14,106!
455
    stError("s-task:%s failed to create sendInfo struct for stream task, code:Out of memory", pTask->id.idStr);
×
456
    return terrno;
×
457
  }
458

459
  code = taosThreadMutexInit(&pTask->msgInfo.lock, NULL);
14,106✔
460
  if (code) {
14,101!
461
    stError("s-task:0x%x failed to init msgInfo mutex, code:%s", pTask->id.taskId, tstrerror(code));
×
462
    return code;
×
463
  }
464

465
  TdThreadMutexAttr attr = {0};
14,101✔
466
  code = taosThreadMutexAttrInit(&attr);
14,101✔
467
  if (code != 0) {
14,100!
468
    stError("s-task:%s initElapsed mutex attr failed, code:%s", pTask->id.idStr, tstrerror(code));
×
469
    return code;
×
470
  }
471

472
  code = taosThreadMutexAttrSetType(&attr, PTHREAD_MUTEX_RECURSIVE);
14,100✔
473
  if (code != 0) {
14,093!
474
    stError("s-task:%s set mutex attr recursive, code:%s", pTask->id.idStr, tstrerror(code));
×
475
    return code;
×
476
  }
477

478
  code = taosThreadMutexInit(&pTask->lock, &attr);
14,093✔
479
  if (code) {
14,094!
480
    return code;
×
481
  }
482

483
  code = taosThreadMutexAttrDestroy(&attr);
14,094✔
484
  if (code) {
14,094!
485
    return code;
×
486
  }
487

488
  streamTaskOpenAllUpstreamInput(pTask);
14,094✔
489

490
  STaskOutputInfo* pOutputInfo = &pTask->outputInfo;
14,100✔
491
  pOutputInfo->pTokenBucket = taosMemoryCalloc(1, sizeof(STokenBucket));
14,100✔
492
  if (pOutputInfo->pTokenBucket == NULL) {
14,112!
493
    stError("s-task:%s failed to prepare the tokenBucket, code:%s", pTask->id.idStr, tstrerror(terrno));
×
494
    return terrno;
×
495
  }
496

497
  // 2MiB per second for sink task
498
  // 50 times sink operator per second
499
  code = streamTaskInitTokenBucket(pOutputInfo->pTokenBucket, 35, 35, tsSinkDataRate, pTask->id.idStr);
14,112✔
500
  if (code) {
14,106!
501
    return code;
×
502
  }
503

504
  pOutputInfo->pNodeEpsetUpdateList = taosArrayInit(4, sizeof(SDownstreamTaskEpset));
14,106✔
505
  if (pOutputInfo->pNodeEpsetUpdateList == NULL) {
14,097!
506
    stError("s-task:%s failed to prepare downstreamUpdateList, code:%s", pTask->id.idStr, tstrerror(terrno));
×
507
    return terrno;
×
508
  }
509

510
  pTask->taskCheckInfo.pList = taosArrayInit(4, sizeof(SDownstreamStatusInfo));
14,097✔
511
  if (pTask->taskCheckInfo.pList == NULL) {
14,104!
512
    stError("s-task:%s failed to prepare taskCheckInfo list, code:%s", pTask->id.idStr, tstrerror(terrno));
×
513
    return terrno;
×
514
  }
515

516
  if (pTask->chkInfo.pActiveInfo == NULL) {
14,104!
517
    code = streamTaskCreateActiveChkptInfo(&pTask->chkInfo.pActiveInfo);
14,104✔
518
    if (code) {
14,108!
519
      stError("s-task:%s failed to create active checkpoint info, code:%s", pTask->id.idStr, tstrerror(code));
×
520
      return code;
×
521
    }
522
  }
523

524
  return streamTaskSetBackendPath(pTask);
14,108✔
525
}
526

527
int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask) {
127,344✔
528
  if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
127,344✔
529
    return 0;
6,500✔
530
  }
531

532
  int32_t type = pTask->outputInfo.type;
120,844✔
533
  if (type == TASK_OUTPUT__TABLE) {
120,844✔
534
    return 0;
184✔
535
  } else if (type == TASK_OUTPUT__FIXED_DISPATCH) {
120,660✔
536
    return 1;
12,704✔
537
  } else {
538
    SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos;
107,956✔
539
    return taosArrayGetSize(vgInfo);
107,956✔
540
  }
541
}
542

543
int32_t streamTaskGetNumOfUpstream(const SStreamTask* pTask) { return taosArrayGetSize(pTask->upstreamInfo.pList); }
20,280✔
544

545
int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask) {
19,297✔
546
  SStreamUpstreamEpInfo* pEpInfo = createStreamTaskEpInfo(pUpstreamTask);
19,297✔
547
  if (pEpInfo == NULL) {
19,297!
548
    return terrno;
×
549
  }
550

551
  if (pTask->upstreamInfo.pList == NULL) {
19,297✔
552
    pTask->upstreamInfo.pList = taosArrayInit(4, POINTER_BYTES);
6,702✔
553
  }
554

555
  void* p = taosArrayPush(pTask->upstreamInfo.pList, &pEpInfo);
19,297✔
556
  return (p == NULL) ? terrno : TSDB_CODE_SUCCESS;
19,297!
557
}
558

559
int32_t streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated) {
132✔
560
  int32_t code = 0;
132✔
561
  char    buf[512] = {0};
132✔
562
  code = epsetToStr(pEpSet, buf, tListLen(buf));  // ignore error since it is only for log file.
132✔
563
  if (code != 0) {  // print error and continue
132!
564
    stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
565
    return code;
×
566
  }
567

568
  int32_t numOfUpstream = taosArrayGetSize(pTask->upstreamInfo.pList);
132✔
569
  for (int32_t i = 0; i < numOfUpstream; ++i) {
267✔
570
    SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i);
265✔
571
    if (pInfo->nodeId == nodeId) {
265✔
572
      bool equal = isEpsetEqual(&pInfo->epSet, pEpSet);
130✔
573
      if (!equal) {
130✔
574
        *pUpdated = true;
82✔
575

576
        char tmp[512] = {0};
82✔
577
        code = epsetToStr(&pInfo->epSet, tmp, tListLen(tmp));
82✔
578
        if (code != 0) {  // print error and continue
82!
579
          stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
580
          return code;
×
581
        }
582

583
        epsetAssign(&pInfo->epSet, pEpSet);
82✔
584
        stDebug("s-task:0x%x update the upstreamInfo taskId:0x%x(nodeId:%d) newEpset:%s old:%s", pTask->id.taskId,
82!
585
                pInfo->taskId, nodeId, buf, tmp);
586
      } else {
587
        stDebug("s-task:0x%x not update upstreamInfo, since identical, task:0x%x(nodeId:%d) epset:%s", pTask->id.taskId,
48!
588
                pInfo->taskId, nodeId, buf);
589
      }
590

591
      break;
130✔
592
    }
593
  }
594

595
  return code;
132✔
596
}
597

598
void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo) {
58,783✔
599
  if (pUpstreamInfo->pList != NULL) {
58,783✔
600
    taosArrayDestroyEx(pUpstreamInfo->pList, freeUpstreamItem);
52,031✔
601
    pUpstreamInfo->numOfClosed = 0;
52,036✔
602
    pUpstreamInfo->pList = NULL;
52,036✔
603
  }
604
}
58,788✔
605

606
void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask) {
947✔
607
  STaskDispatcherFixed* pDispatcher = &pTask->outputInfo.fixedDispatcher;
947✔
608
  pDispatcher->taskId = pDownstreamTask->id.taskId;
947✔
609
  pDispatcher->nodeId = pDownstreamTask->info.nodeId;
947✔
610
  pDispatcher->epSet = pDownstreamTask->info.epSet;
947✔
611

612
  pTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH;
947✔
613
  pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
947✔
614
}
947✔
615

616
int32_t streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated) {
132✔
617
  char    buf[512] = {0};
132✔
618
  int32_t code = epsetToStr(pEpSet, buf, tListLen(buf));  // ignore the error since only for log files.
132✔
619
  if (code != 0) {                                        // print error and continue
132!
620
    stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
621
    return code;
×
622
  }
623

624
  int32_t id = pTask->id.taskId;
132✔
625
  int8_t  type = pTask->outputInfo.type;
132✔
626

627
  if (type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
132✔
628
    SArray* pVgs = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos;
128✔
629

630
    for (int32_t i = 0; i < taosArrayGetSize(pVgs); i++) {
263✔
631
      SVgroupInfo* pVgInfo = taosArrayGet(pVgs, i);
261✔
632
      if (pVgInfo == NULL) {
261!
633
        continue;
×
634
      }
635

636
      if (pVgInfo->vgId == nodeId) {
261✔
637
        bool isEqual = isEpsetEqual(&pVgInfo->epSet, pEpSet);
126✔
638
        if (!isEqual) {
126✔
639
          *pUpdated = true;
82✔
640

641
          char tmp[512] = {0};
82✔
642
          code = epsetToStr(&pVgInfo->epSet, tmp, tListLen(tmp));
82✔
643
          if (code != 0) {  // print error and continue
82!
644
            stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
645
            return code;
×
646
          }
647

648
          epsetAssign(&pVgInfo->epSet, pEpSet);
82✔
649
          stDebug("s-task:0x%x update dispatch info, task:0x%x(nodeId:%d) newEpset:%s old:%s", id, pVgInfo->taskId,
82!
650
                  nodeId, buf, tmp);
651
        } else {
652
          stDebug("s-task:0x%x not update dispatch info, since identical, task:0x%x(nodeId:%d) epset:%s", id,
44!
653
                  pVgInfo->taskId, nodeId, buf);
654
        }
655
        break;
126✔
656
      }
657
    }
658
  } else if (type == TASK_OUTPUT__FIXED_DISPATCH) {
4!
659
    STaskDispatcherFixed* pDispatcher = &pTask->outputInfo.fixedDispatcher;
4✔
660
    if (pDispatcher->nodeId == nodeId) {
4!
661
      bool equal = isEpsetEqual(&pDispatcher->epSet, pEpSet);
4✔
662
      if (!equal) {
4!
663
        *pUpdated = true;
×
664

665
        char tmp[512] = {0};
×
666
        code = epsetToStr(&pDispatcher->epSet, tmp, tListLen(tmp));
×
667
        if (code != 0) {  // print error and continue
×
668
          stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
669
          return code;
×
670
        }
671

672
        epsetAssign(&pDispatcher->epSet, pEpSet);
×
673
        stDebug("s-task:0x%x update dispatch info, task:0x%x(nodeId:%d) newEpset:%s old:%s", id, pDispatcher->taskId,
×
674
                nodeId, buf, tmp);
675
      } else {
676
        stDebug("s-task:0x%x not update dispatch info, since identical, task:0x%x(nodeId:%d) epset:%s", id,
4!
677
                pDispatcher->taskId, nodeId, buf);
678
      }
679
    }
680
  }
681

682
  return code;
132✔
683
}
684

685
int32_t streamTaskStop(SStreamTask* pTask) {
2,362✔
686
  int32_t     vgId = pTask->pMeta->vgId;
2,362✔
687
  int64_t     st = taosGetTimestampMs();
2,362✔
688
  const char* id = pTask->id.idStr;
2,362✔
689

690
  int32_t code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_STOP);
2,362✔
691
  if (code) {
2,362!
692
    stError("failed to handle STOP event, s-task:%s, code:%s", id, tstrerror(code));
×
693
    return code;
×
694
  }
695

696
  if (pTask->info.taskLevel != TASK_LEVEL__SINK && pTask->exec.pExecutor != NULL) {
2,362✔
697
    code = qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
1,198✔
698
    if (code != TSDB_CODE_SUCCESS) {
1,198!
699
      stError("s-task:%s failed to kill task related query handle, code:%s", id, tstrerror(code));
×
700
    }
701
  }
702

703
  while (!streamTaskIsIdle(pTask)) {
2,361!
704
    stDebug("s-task:%s level:%d wait for task to be idle and then close, check again in 100ms", id,
×
705
            pTask->info.taskLevel);
706
    taosMsleep(100);
×
707
  }
708

709
  int64_t el = taosGetTimestampMs() - st;
2,361✔
710
  stDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", vgId, id, el);
2,361✔
711
  return code;
2,361✔
712
}
713

714
bool streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) {
128✔
715
  STaskExecStatisInfo* p = &pTask->execInfo;
128✔
716

717
  int32_t numOfNodes = taosArrayGetSize(pNodeList);
128✔
718
  int64_t prevTs = p->latestUpdateTs;
128✔
719

720
  p->latestUpdateTs = taosGetTimestampMs();
128✔
721
  p->updateCount += 1;
128✔
722
  stDebug("s-task:0x%x update task nodeEp epset, updatedNodes:%d, updateCount:%d, prevTs:%" PRId64, pTask->id.taskId,
128!
723
          numOfNodes, p->updateCount, prevTs);
724

725
  bool updated = false;
128✔
726
  for (int32_t i = 0; i < numOfNodes; ++i) {
390✔
727
    SNodeUpdateInfo* pInfo = taosArrayGet(pNodeList, i);
262✔
728
    if (pInfo == NULL) {
262!
729
      continue;
×
730
    }
731

732
    int32_t code = doUpdateTaskEpset(pTask, pInfo->nodeId, &pInfo->newEp, &updated);
262✔
733
    if (code) {
262!
734
      stError("s-task:0x%x failed to update the task nodeEp epset, code:%s", pTask->id.taskId, tstrerror(code));
×
735
    }
736
  }
737

738
  return updated;
128✔
739
}
740

741
void streamTaskResetUpstreamStageInfo(SStreamTask* pTask) {
14,108✔
742
  if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
14,108✔
743
    return;
7,059✔
744
  }
745

746
  int32_t size = taosArrayGetSize(pTask->upstreamInfo.pList);
7,049✔
747
  for (int32_t i = 0; i < size; ++i) {
27,194✔
748
    SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i);
20,153✔
749
    pInfo->stage = -1;
20,150✔
750
  }
751

752
  stDebug("s-task:%s reset all upstream tasks stage info", pTask->id.idStr);
7,041✔
753
}
754

755
void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) {
24,040✔
756
  int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList);
24,040✔
757
  if (num == 0) {
24,054✔
758
    return;
11,992✔
759
  }
760

761
  for (int32_t i = 0; i < num; ++i) {
46,549✔
762
    SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i);
34,491✔
763
    pInfo->dataAllowed = true;
34,487✔
764
  }
765

766
  pTask->upstreamInfo.numOfClosed = 0;
12,058✔
767
  stDebug("s-task:%s opening up inputQ for %d upstream tasks", pTask->id.idStr, num);
12,058✔
768
}
769

770
void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) {
8,562✔
771
  SStreamUpstreamEpInfo* pInfo = NULL;
8,562✔
772
  streamTaskGetUpstreamTaskEpInfo(pTask, taskId, &pInfo);
8,562✔
773

774
  if ((pInfo != NULL) && pInfo->dataAllowed) {
8,556!
775
    pInfo->dataAllowed = false;
8,557✔
776
    if (pTask->upstreamInfo.numOfClosed < streamTaskGetNumOfUpstream(pTask)) {
8,557!
777
      int32_t t = atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1);
8,554✔
778
    } else {
779
      stError("s-task:%s not inc closed input, since they have been all closed already", pTask->id.idStr);
×
780
    }
781
  }
782
}
8,593✔
783

784
void streamTaskOpenUpstreamInput(SStreamTask* pTask, int32_t taskId) {
×
785
  SStreamUpstreamEpInfo* pInfo = NULL;
×
786
  streamTaskGetUpstreamTaskEpInfo(pTask, taskId, &pInfo);
×
787

788
  if (pInfo != NULL && (!pInfo->dataAllowed)) {
×
789
    int32_t t = atomic_sub_fetch_32(&pTask->upstreamInfo.numOfClosed, 1);
×
790
    stDebug("s-task:%s open inputQ for upstream:0x%x, remain closed:%d", pTask->id.idStr, taskId, t);
×
791
    pInfo->dataAllowed = true;
×
792
  }
793
}
×
794

795
bool streamTaskIsAllUpstreamClosed(SStreamTask* pTask) {
×
796
  return pTask->upstreamInfo.numOfClosed == taosArrayGetSize(pTask->upstreamInfo.pList);
×
797
}
798

799
bool streamTaskSetSchedStatusWait(SStreamTask* pTask) {
124,074✔
800
  bool ret = false;
124,074✔
801

802
  streamMutexLock(&pTask->lock);
124,074✔
803
  if (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE) {
124,106✔
804
    pTask->status.schedStatus = TASK_SCHED_STATUS__WAITING;
83,171✔
805
    ret = true;
83,171✔
806
  }
807

808
  streamMutexUnlock(&pTask->lock);
124,106✔
809
  return ret;
124,108✔
810
}
811

812
int8_t streamTaskSetSchedStatusActive(SStreamTask* pTask) {
81,277✔
813
  streamMutexLock(&pTask->lock);
81,277✔
814
  int8_t status = pTask->status.schedStatus;
81,383✔
815
  if (status == TASK_SCHED_STATUS__WAITING) {
81,383✔
816
    pTask->status.schedStatus = TASK_SCHED_STATUS__ACTIVE;
81,356✔
817
  }
818
  streamMutexUnlock(&pTask->lock);
81,383✔
819

820
  return status;
81,392✔
821
}
822

823
int8_t streamTaskSetSchedStatusInactive(SStreamTask* pTask) {
1,747✔
824
  streamMutexLock(&pTask->lock);
1,747✔
825
  int8_t status = pTask->status.schedStatus;
1,747✔
826
  pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
1,747✔
827
  streamMutexUnlock(&pTask->lock);
1,747✔
828

829
  return status;
1,747✔
830
}
831

832
int32_t streamTaskClearHTaskAttr(SStreamTask* pTask, int32_t resetRelHalt) {
6,974✔
833
  int32_t      code = 0;
6,974✔
834
  SStreamMeta* pMeta = pTask->pMeta;
6,974✔
835
  SStreamTask* pStreamTask = NULL;
6,974✔
836

837
  if (pTask->info.fillHistory == 0) {
6,974!
838
    return code;
6,980✔
839
  }
840

841
  code = streamMetaAcquireTaskUnsafe(pMeta, &pTask->streamTaskId, &pStreamTask);
×
842
  if (code == 0) {
6!
843
    stDebug("s-task:%s clear the related stream task:0x%x attr to fill-history task", pTask->id.idStr,
×
844
            (int32_t)pTask->streamTaskId.taskId);
845

846
    streamMutexLock(&(pStreamTask->lock));
×
847
    CLEAR_RELATED_FILLHISTORY_TASK(pStreamTask);
×
848

849
    if (resetRelHalt) {
×
850
      stDebug("s-task:0x%" PRIx64 " set the persistent status attr to be ready, prev:%s, status in sm:%s",
×
851
              pTask->streamTaskId.taskId, streamTaskGetStatusStr(pStreamTask->status.taskStatus),
852
              streamTaskGetStatus(pStreamTask).name);
853
      pStreamTask->status.taskStatus = TASK_STATUS__READY;
×
854
    }
855

856
    code = streamMetaSaveTask(pMeta, pStreamTask);
×
857
    streamMutexUnlock(&(pStreamTask->lock));
×
858

859
    streamMetaReleaseTask(pMeta, pStreamTask);
×
860
  }
861

862
  return code;
6✔
863
}
864

865
int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId, int64_t resetRelHalt) {
6✔
866
  SVDropStreamTaskReq* pReq = rpcMallocCont(sizeof(SVDropStreamTaskReq));
6✔
867
  if (pReq == NULL) {
6!
868
    return terrno;
×
869
  }
870

871
  pReq->head.vgId = vgId;
6✔
872
  pReq->taskId = pTaskId->taskId;
6✔
873
  pReq->streamId = pTaskId->streamId;
6✔
874
  pReq->resetRelHalt = resetRelHalt;
6✔
875

876
  SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_DROP, .pCont = pReq, .contLen = sizeof(SVDropStreamTaskReq)};
6✔
877
  int32_t code = tmsgPutToQueue(pMsgCb, WRITE_QUEUE, &msg);
6✔
878
  if (code != TSDB_CODE_SUCCESS) {
6!
879
    stError("vgId:%d failed to send drop task:0x%x msg, code:%s", vgId, pTaskId->taskId, tstrerror(code));
×
880
  } else {
881
    stDebug("vgId:%d build and send drop task:0x%x msg", vgId, pTaskId->taskId);
6!
882
  }
883

884
  return code;
6✔
885
}
886

887
int32_t streamSendChkptReportMsg(SStreamTask* pTask, SCheckpointInfo* pCheckpointInfo, int8_t dropRelHTask) {
6,355✔
888
  int32_t                code = 0;
6,355✔
889
  int32_t                tlen = 0;
6,355✔
890
  int32_t                vgId = pTask->pMeta->vgId;
6,355✔
891
  const char*            id = pTask->id.idStr;
6,355✔
892
  SActiveCheckpointInfo* pActive = pCheckpointInfo->pActiveInfo;
6,355✔
893

894
  SCheckpointReport req = {.streamId = pTask->id.streamId,
6,355✔
895
                           .taskId = pTask->id.taskId,
6,355✔
896
                           .nodeId = vgId,
897
                           .dropHTask = dropRelHTask,
898
                           .transId = pActive->transId,
6,355✔
899
                           .checkpointId = pActive->activeId,
6,355✔
900
                           .checkpointVer = pCheckpointInfo->processedVer,
6,355✔
901
                           .checkpointTs = pCheckpointInfo->startTs};
6,355✔
902

903
  tEncodeSize(tEncodeStreamTaskChkptReport, &req, tlen, code);
6,355!
904
  if (code < 0) {
6,354!
905
    stError("s-task:%s vgId:%d encode stream task checkpoint-report failed, code:%s", id, vgId, tstrerror(code));
×
906
    return -1;
×
907
  }
908

909
  void* buf = rpcMallocCont(tlen);
6,354✔
910
  if (buf == NULL) {
6,353!
911
    stError("s-task:%s vgId:%d encode stream task checkpoint-report msg failed, code:%s", id, vgId,
×
912
            tstrerror(TSDB_CODE_OUT_OF_MEMORY));
913
    return -1;
×
914
  }
915

916
  SEncoder encoder;
917
  tEncoderInit(&encoder, buf, tlen);
6,353✔
918
  if ((code = tEncodeStreamTaskChkptReport(&encoder, &req)) < 0) {
6,353!
919
    rpcFreeCont(buf);
×
920
    tEncoderClear(&encoder);
×
921
    stError("s-task:%s vgId:%d encode stream task checkpoint-report msg failed, code:%s", id, vgId, tstrerror(code));
×
922
    return -1;
×
923
  }
924
  tEncoderClear(&encoder);
6,353✔
925

926
  SRpcMsg msg = {0};
6,350✔
927
  initRpcMsg(&msg, TDMT_MND_STREAM_CHKPT_REPORT, buf, tlen);
6,350✔
928
  stDebug("s-task:%s vgId:%d build and send task checkpoint-report to mnode", id, vgId);
6,350✔
929

930
  return tmsgSendReq(&pTask->info.mnodeEpset, &msg);
6,350✔
931
}
932

933
STaskId streamTaskGetTaskId(const SStreamTask* pTask) {
66,501✔
934
  STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
66,501✔
935
  return id;
66,501✔
936
}
937

938
void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo) {
1,877✔
939
  pInfo->waitInterval = LAUNCH_HTASK_INTERVAL;
1,877✔
940
  pInfo->tickCount = ceil(LAUNCH_HTASK_INTERVAL / WAIT_FOR_MINIMAL_INTERVAL);
1,877✔
941
  pInfo->retryTimes = 0;
1,877✔
942
}
1,877✔
943

944
void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo) {
1,872✔
945
  pInfo->waitInterval *= RETRY_LAUNCH_INTERVAL_INC_RATE;
1,872✔
946
  pInfo->tickCount = ceil(pInfo->waitInterval / WAIT_FOR_MINIMAL_INTERVAL);
1,872✔
947
  pInfo->retryTimes += 1;
1,872✔
948
}
1,872✔
949

950
void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask) {
8,623✔
951
  pEntry->id.streamId = pTask->id.streamId;
8,623✔
952
  pEntry->id.taskId = pTask->id.taskId;
8,623✔
953
  pEntry->stage = -1;
8,623✔
954
  pEntry->nodeId = pTask->info.nodeId;
8,623✔
955
  pEntry->status = TASK_STATUS__STOP;
8,623✔
956
}
8,623✔
957

958
void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc) {
50,482✔
959
  pDst->stage = pSrc->stage;
50,482✔
960
  pDst->inputQUsed = pSrc->inputQUsed;
50,482✔
961
  pDst->inputRate = pSrc->inputRate;
50,482✔
962
  pDst->procsTotal = pSrc->procsTotal;
50,482✔
963
  pDst->procsThroughput = pSrc->procsThroughput;
50,482✔
964
  pDst->outputTotal = pSrc->outputTotal;
50,482✔
965
  pDst->outputThroughput = pSrc->outputThroughput;
50,482✔
966
  pDst->processedVer = pSrc->processedVer;
50,482✔
967
  pDst->verRange = pSrc->verRange;
50,482✔
968
  pDst->sinkQuota = pSrc->sinkQuota;
50,482✔
969
  pDst->sinkDataSize = pSrc->sinkDataSize;
50,482✔
970
  pDst->checkpointInfo = pSrc->checkpointInfo;
50,482✔
971
  pDst->startCheckpointId = pSrc->startCheckpointId;
50,482✔
972
  pDst->startCheckpointVer = pSrc->startCheckpointVer;
50,482✔
973
  pDst->status = pSrc->status;
50,482✔
974

975
  pDst->startTime = pSrc->startTime;
50,482✔
976
  pDst->hTaskId = pSrc->hTaskId;
50,482✔
977
}
50,482✔
978

979
STaskStatusEntry streamTaskGetStatusEntry(SStreamTask* pTask) {
50,900✔
980
  SStreamMeta*         pMeta = pTask->pMeta;
50,900✔
981
  STaskExecStatisInfo* pExecInfo = &pTask->execInfo;
50,900✔
982

983
  STaskStatusEntry entry = {
152,700✔
984
      .id = streamTaskGetTaskId(pTask),
50,900✔
985
      .status = streamTaskGetStatus(pTask).state,
50,900✔
986
      .nodeId = pMeta->vgId,
50,900✔
987
      .stage = pMeta->stage,
50,900✔
988

989
      .inputQUsed = SIZE_IN_MiB(streamQueueGetItemSize(pTask->inputq.queue)),
50,900✔
990
      .startTime = pExecInfo->readyTs,
50,900✔
991
      .checkpointInfo.latestId = pTask->chkInfo.checkpointId,
50,900✔
992
      .checkpointInfo.latestVer = pTask->chkInfo.checkpointVer,
50,900✔
993
      .checkpointInfo.latestTime = pTask->chkInfo.checkpointTime,
50,900✔
994
      .checkpointInfo.latestSize = 0,
995
      .checkpointInfo.remoteBackup = 0,
996
      .checkpointInfo.consensusChkptId = 0,
997
      .checkpointInfo.consensusTs = 0,
998
      .hTaskId = pTask->hTaskInfo.id.taskId,
50,900✔
999
      .procsTotal = SIZE_IN_MiB(pExecInfo->inputDataSize),
50,900✔
1000
      .outputTotal = SIZE_IN_MiB(pExecInfo->outputDataSize),
50,900✔
1001
      .procsThroughput = SIZE_IN_KiB(pExecInfo->procsThroughput),
50,900✔
1002
      .outputThroughput = SIZE_IN_KiB(pExecInfo->outputThroughput),
50,900✔
1003
      .startCheckpointId = pExecInfo->startCheckpointId,
50,900✔
1004
      .startCheckpointVer = pExecInfo->startCheckpointVer,
50,900✔
1005
  };
1006
  return entry;
50,900✔
1007
}
1008

1009
static int32_t taskPauseCallback(SStreamTask* pTask, void* param) {
1,311✔
1010
  SStreamMeta* pMeta = pTask->pMeta;
1,311✔
1011
  int32_t      code = 0;
1,311✔
1012

1013
  int32_t num = atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1);
1,311✔
1014
  stInfo("vgId:%d s-task:%s pause stream task. paused task num:%d", pMeta->vgId, pTask->id.idStr, num);
1,314!
1015

1016
  // in case of fill-history task, stop the tsdb file scan operation.
1017
  if (pTask->info.fillHistory == 1) {
1,315✔
1018
    void* pExecutor = pTask->exec.pExecutor;
60✔
1019
    code = qKillTask(pExecutor, TSDB_CODE_SUCCESS);
60✔
1020
  }
1021

1022
  stDebug("vgId:%d s-task:%s set pause flag and pause task", pMeta->vgId, pTask->id.idStr);
1,315✔
1023
  return code;
1,315✔
1024
}
1025

1026
void streamTaskPause(SStreamTask* pTask) {
1,359✔
1027
  int32_t code = streamTaskHandleEventAsync(pTask->status.pSM, TASK_EVENT_PAUSE, taskPauseCallback, NULL);
1,359✔
1028
  if (code) {
1,362!
1029
    stError("s-task:%s failed handle pause event async, code:%s", pTask->id.idStr, tstrerror(code));
×
1030
  }
1031
}
1,362✔
1032

1033
void streamTaskResume(SStreamTask* pTask) {
1,340✔
1034
  SStreamTaskState prevState = streamTaskGetStatus(pTask);
1,340✔
1035

1036
  SStreamMeta* pMeta = pTask->pMeta;
1,341✔
1037
  int32_t      code = streamTaskRestoreStatus(pTask);
1,341✔
1038
  if (code == TSDB_CODE_SUCCESS) {
1,342✔
1039
    char*   pNew = streamTaskGetStatus(pTask).name;
1,294✔
1040
    int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1);
1,294✔
1041
    stInfo("s-task:%s status:%s resume from %s, paused task(s):%d", pTask->id.idStr, pNew, prevState.name, num);
1,296!
1042
  } else {
1043
    stInfo("s-task:%s status:%s no need to resume, paused task(s):%d", pTask->id.idStr, prevState.name,
48!
1044
           pMeta->numOfPausedTasks);
1045
  }
1046
}
1,344✔
1047

1048
bool streamTaskIsSinkTask(const SStreamTask* pTask) { return pTask->info.taskLevel == TASK_LEVEL__SINK; }
98,441✔
1049

1050
// this task must success
1051
int32_t streamTaskSendCheckpointReq(SStreamTask* pTask) {
4,709✔
1052
  int32_t     code;
1053
  int32_t     tlen = 0;
4,709✔
1054
  int32_t     vgId = pTask->pMeta->vgId;
4,709✔
1055
  const char* id = pTask->id.idStr;
4,709✔
1056

1057
  SStreamTaskCheckpointReq req = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId, .nodeId = vgId};
4,709✔
1058
  tEncodeSize(tEncodeStreamTaskCheckpointReq, &req, tlen, code);
4,709!
1059
  if (code < 0) {
4,718!
1060
    stError("s-task:%s vgId:%d encode stream task req checkpoint failed, code:%s", id, vgId, tstrerror(code));
×
1061
    return TSDB_CODE_INVALID_MSG;
×
1062
  }
1063

1064
  void* buf = rpcMallocCont(tlen);
4,718✔
1065
  if (buf == NULL) {
4,715!
1066
    stError("s-task:%s vgId:%d encode stream task req checkpoint msg failed, code:Out of memory", id, vgId);
×
1067
    return terrno;
×
1068
  }
1069

1070
  SEncoder encoder;
1071
  tEncoderInit(&encoder, buf, tlen);
4,715✔
1072
  if ((code = tEncodeStreamTaskCheckpointReq(&encoder, &req)) < 0) {
4,720!
1073
    rpcFreeCont(buf);
×
1074
    tEncoderClear(&encoder);
×
1075
    stError("s-task:%s vgId:%d encode stream task req checkpoint msg failed, code:%s", id, vgId, tstrerror(code));
×
1076
    return code;
×
1077
  }
1078

1079
  tEncoderClear(&encoder);
4,715✔
1080

1081
  SRpcMsg msg = {0};
4,727✔
1082
  initRpcMsg(&msg, TDMT_MND_STREAM_REQ_CHKPT, buf, tlen);
4,727✔
1083
  stDebug("s-task:%s vgId:%d build and send task checkpoint req", id, vgId);
4,717✔
1084

1085
  return tmsgSendReq(&pTask->info.mnodeEpset, &msg);
4,717✔
1086
}
1087

1088
void streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId, SStreamUpstreamEpInfo** pEpInfo) {
101,504✔
1089
  *pEpInfo = NULL;
101,504✔
1090

1091
  int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList);
101,504✔
1092
  for (int32_t i = 0; i < num; ++i) {
201,322!
1093
    SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i);
201,344✔
1094
    if (pInfo == NULL) {
201,230!
1095
      return;
×
1096
    }
1097

1098
    if (pInfo->taskId == taskId) {
201,230✔
1099
      *pEpInfo = pInfo;
101,491✔
1100
      return;
101,491✔
1101
    }
1102
  }
1103

1104
  stError("s-task:%s failed to find upstream task:0x%x", pTask->id.idStr, taskId);
×
1105
}
1106

1107
SEpSet* streamTaskGetDownstreamEpInfo(SStreamTask* pTask, int32_t taskId) {
×
1108
  if (pTask->info.taskLevel == TASK_OUTPUT__FIXED_DISPATCH) {
×
1109
    if (pTask->outputInfo.fixedDispatcher.taskId == taskId) {
×
1110
      return &pTask->outputInfo.fixedDispatcher.epSet;
×
1111
    }
1112
  } else if (pTask->info.taskLevel == TASK_OUTPUT__SHUFFLE_DISPATCH) {
×
1113
    SArray* pList = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos;
×
1114
    for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) {
×
1115
      SVgroupInfo* pVgInfo = taosArrayGet(pList, i);
×
1116
      if (pVgInfo == NULL) {
×
1117
        continue;
×
1118
      }
1119

1120
      if (pVgInfo->taskId == taskId) {
×
1121
        return &pVgInfo->epSet;
×
1122
      }
1123
    }
1124
  }
1125

1126
  return NULL;
×
1127
}
1128

1129
int32_t createStreamTaskIdStr(int64_t streamId, int32_t taskId, const char** pId) {
14,102✔
1130
  char buf[128] = {0};
14,102✔
1131
  sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId);
14,102✔
1132
  *pId = taosStrdup(buf);
14,102✔
1133

1134
  if (*pId == NULL) {
14,099!
1135
    return terrno;
×
1136
  } else {
1137
    return TSDB_CODE_SUCCESS;
14,103✔
1138
  }
1139
}
1140

1141
static int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq) {
549✔
1142
  int32_t           code;
1143
  SStreamDataBlock* pData;
1144

1145
  code = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SStreamDataBlock), (void**)&pData);
549✔
1146
  if (code) {
551!
1147
    stError("s-task:%s failed to allocated retrieve-block", pTask->id.idStr);
×
1148
    return terrno = code;
×
1149
  }
1150

1151
  pData->type = STREAM_INPUT__DATA_RETRIEVE;
551✔
1152
  pData->srcVgId = 0;
551✔
1153

1154
  code = streamRetrieveReqToData(pReq, pData, pTask->id.idStr);
551✔
1155
  if (code != TSDB_CODE_SUCCESS) {
551!
1156
    stError("s-task:%s failed to convert retrieve-data to block, code:%s", pTask->id.idStr, tstrerror(code));
×
1157
    taosFreeQitem(pData);
×
1158
    return code;
×
1159
  }
1160

1161
  code = streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pData);
551✔
1162
  if (code != TSDB_CODE_SUCCESS) {
551!
1163
    stError("s-task:%s failed to put retrieve-block into inputQ, inputQ is full, discard the retrieve msg",
×
1164
            pTask->id.idStr);
1165
  }
1166

1167
  return code;
551✔
1168
}
1169

1170
int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq) {
549✔
1171
  int32_t code = streamTaskEnqueueRetrieve(pTask, pReq);
549✔
1172
  if (code != 0) {
551!
1173
    return code;
×
1174
  }
1175
  return streamTrySchedExec(pTask);
551✔
1176
}
1177

1178
void streamTaskSetRemoveBackendFiles(SStreamTask* pTask) { pTask->status.removeBackendFiles = true; }
6,984✔
1179

1180
void streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId) {
×
1181
  if (pTransId != NULL) {
×
1182
    *pTransId = pTask->chkInfo.pActiveInfo->transId;
×
1183
  }
1184

1185
  if (pCheckpointId != NULL) {
×
1186
    *pCheckpointId = pTask->chkInfo.pActiveInfo->activeId;
×
1187
  }
1188
}
×
1189

1190
int32_t streamTaskSetActiveCheckpointInfo(SStreamTask* pTask, int64_t activeCheckpointId) {
28✔
1191
  pTask->chkInfo.pActiveInfo->activeId = activeCheckpointId;
28✔
1192
  return TSDB_CODE_SUCCESS;
28✔
1193
}
1194

1195
void streamTaskSetFailedChkptInfo(SStreamTask* pTask, int32_t transId, int64_t checkpointId) {
×
1196
  pTask->chkInfo.pActiveInfo->transId = transId;
×
1197
  pTask->chkInfo.pActiveInfo->activeId = checkpointId;
×
1198
  pTask->chkInfo.pActiveInfo->failedId = checkpointId;
×
1199
  stDebug("s-task:%s set failed checkpointId:%"PRId64, pTask->id.idStr, checkpointId);
×
1200
}
×
1201

1202
int32_t streamTaskCreateActiveChkptInfo(SActiveCheckpointInfo** pRes) {
14,137✔
1203
  SActiveCheckpointInfo* pInfo = taosMemoryCalloc(1, sizeof(SActiveCheckpointInfo));
14,137✔
1204
  if (pInfo == NULL) {
14,134!
1205
    return terrno;
×
1206
  }
1207

1208
  int32_t code = taosThreadMutexInit(&pInfo->lock, NULL);
14,134✔
1209
  if (code != TSDB_CODE_SUCCESS) {
14,136!
1210
    return code;
×
1211
  }
1212

1213
  pInfo->pDispatchTriggerList = taosArrayInit(4, sizeof(STaskTriggerSendInfo));
14,136✔
1214
  pInfo->pReadyMsgList = taosArrayInit(4, sizeof(STaskCheckpointReadyInfo));
14,148✔
1215
  pInfo->pCheckpointReadyRecvList = taosArrayInit(4, sizeof(STaskDownstreamReadyInfo));
14,132✔
1216

1217
  *pRes = pInfo;
14,142✔
1218
  return code;
14,142✔
1219
}
1220

1221
void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) {
58,793✔
1222
  if (pInfo == NULL) {
58,793✔
1223
    return;
44,883✔
1224
  }
1225

1226
  streamMutexDestroy(&pInfo->lock);
13,910✔
1227
  taosArrayDestroy(pInfo->pDispatchTriggerList);
13,913✔
1228
  pInfo->pDispatchTriggerList = NULL;
13,912✔
1229
  taosArrayDestroy(pInfo->pReadyMsgList);
13,912✔
1230
  pInfo->pReadyMsgList = NULL;
13,913✔
1231
  taosArrayDestroy(pInfo->pCheckpointReadyRecvList);
13,913✔
1232
  pInfo->pCheckpointReadyRecvList = NULL;
13,913✔
1233

1234
  SStreamTmrInfo* pTriggerTmr = &pInfo->chkptTriggerMsgTmr;
13,913✔
1235
  if (pTriggerTmr->tmrHandle != NULL) {
13,913✔
1236
    streamTmrStop(pTriggerTmr->tmrHandle);
2,318✔
1237
    pTriggerTmr->tmrHandle = NULL;
2,319✔
1238
  }
1239

1240
  SStreamTmrInfo* pReadyTmr = &pInfo->chkptReadyMsgTmr;
13,914✔
1241
  if (pReadyTmr->tmrHandle != NULL) {
13,914✔
1242
    streamTmrStop(pReadyTmr->tmrHandle);
2,311✔
1243
    pReadyTmr->tmrHandle = NULL;
2,310✔
1244
  }
1245

1246
  taosMemoryFree(pInfo);
13,913✔
1247
}
1248

1249
//NOTE: clear the checkpoint id, and keep the failed id
1250
void streamTaskClearActiveInfo(SActiveCheckpointInfo* pInfo) {
5,151✔
1251
  pInfo->activeId = 0;
5,151✔
1252
  pInfo->transId = 0;
5,151✔
1253
  pInfo->allUpstreamTriggerRecv = 0;
5,151✔
1254
  pInfo->dispatchTrigger = false;
5,151✔
1255
//  pInfo->failedId = 0;
1256

1257
  taosArrayClear(pInfo->pDispatchTriggerList);
5,151✔
1258
  taosArrayClear(pInfo->pCheckpointReadyRecvList);
5,147✔
1259
}
5,149✔
1260

1261
const char* streamTaskGetExecType(int32_t type) {
129,096✔
1262
  switch (type) {
129,096!
1263
    case STREAM_EXEC_T_EXTRACT_WAL_DATA:
52,064✔
1264
      return "scan-wal-file";
52,064✔
1265
    case STREAM_EXEC_T_START_ALL_TASKS:
9,649✔
1266
      return "start-all-tasks";
9,649✔
1267
    case STREAM_EXEC_T_START_ONE_TASK:
5,139✔
1268
      return "start-one-task";
5,139✔
1269
    case STREAM_EXEC_T_RESTART_ALL_TASKS:
21✔
1270
      return "restart-all-tasks";
21✔
1271
    case STREAM_EXEC_T_STOP_ALL_TASKS:
4,904✔
1272
      return "stop-all-tasks";
4,904✔
1273
    case STREAM_EXEC_T_RESUME_TASK:
12,547✔
1274
      return "resume-task-from-idle";
12,547✔
1275
    case STREAM_EXEC_T_ADD_FAILED_TASK:
2✔
1276
      return "record-start-failed-task";
2✔
1277
    case 0:
44,821✔
1278
      return "exec-all-tasks";
44,821✔
1279
    default:
×
1280
      return "invalid-exec-type";
×
1281
  }
1282
}
1283

1284
int32_t streamTaskAllocRefId(SStreamTask* pTask, int64_t** pRefId) {
47,847✔
1285
  *pRefId = taosMemoryMalloc(sizeof(int64_t));
47,847✔
1286
  if (*pRefId != NULL) {
47,849!
1287
    **pRefId = pTask->id.refId;
47,853✔
1288
    int32_t code = metaRefMgtAdd(pTask->pMeta->vgId, *pRefId);
47,853✔
1289
    if (code != 0) {
47,872!
1290
      stError("s-task:%s failed to add refId:%" PRId64 " into refId-mgmt, code:%s", pTask->id.idStr, pTask->id.refId,
×
1291
              tstrerror(code));
1292
    }
1293
    return code;
47,872✔
1294
  } else {
1295
    stError("s-task:%s failed to alloc new ref id, code:%s", pTask->id.idStr, tstrerror(terrno));
×
1296
    return terrno;
×
1297
  }
1298
}
1299

1300
void streamTaskFreeRefId(int64_t* pRefId) {
44,444✔
1301
  if (pRefId == NULL) {
44,444✔
1302
    return;
2,588✔
1303
  }
1304

1305
  metaRefMgtRemove(pRefId);
41,856✔
1306
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc