• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3530

16 Nov 2024 07:44AM UTC coverage: 60.219% (-0.7%) from 60.888%
#3530

push

travis-ci

web-flow
Update 03-ad.md

118417 of 252124 branches covered (46.97%)

Branch coverage included in aggregate %.

198982 of 274951 relevant lines covered (72.37%)

6072359.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.76
/source/libs/stream/src/streamTask.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#include "executor.h"
17
#include "osDir.h"
18
#include "osMemory.h"
19
#include "streamInt.h"
20
#include "streamsm.h"
21
#include "tmisce.h"
22
#include "tstream.h"
23
#include "ttimer.h"
24
#include "wal.h"
25

26
static void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo);
27
static int32_t streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated);
28
static int32_t streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdate);
29
static void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo);
30

31
static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) {
8,104✔
32
  int32_t childId = taosArrayGetSize(pArray);
8,104✔
33
  pTask->info.selfChildId = childId;
8,104✔
34
  void* p = taosArrayPush(pArray, &pTask);
8,104✔
35
  return (p == NULL) ? terrno : TSDB_CODE_SUCCESS;
8,104!
36
}
37

38
static int32_t doUpdateTaskEpset(SStreamTask* pTask, int32_t nodeId, SEpSet* pEpSet, bool* pUpdated) {
10✔
39
  int32_t code = 0;
10✔
40
  char    buf[512] = {0};
10✔
41

42
  if (pTask->info.nodeId == nodeId) {  // execution task should be moved away
10✔
43
    bool isEqual = isEpsetEqual(&pTask->info.epSet, pEpSet);
2✔
44
    code = epsetToStr(pEpSet, buf, tListLen(buf));
2✔
45
    if (code) { // print error and continue
2!
46
      stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
47
      return code;
×
48
    }
49

50
    if (!isEqual) {
2!
51
      (*pUpdated) = true;
×
52
      char tmp[512] = {0};
×
53
      code = epsetToStr(&pTask->info.epSet, tmp, tListLen(tmp));  // only for log file, ignore errors
×
54
      if (code) { // print error and continue
×
55
        stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
56
        return code;
×
57
      }
58

59
      epsetAssign(&pTask->info.epSet, pEpSet);
×
60
      stDebug("s-task:0x%x (vgId:%d) self node epset is updated %s, old:%s", pTask->id.taskId, nodeId, buf, tmp);
×
61
    } else {
62
      stDebug("s-task:0x%x (vgId:%d) not updated task epset, since epset identical, %s", pTask->id.taskId, nodeId, buf);
2!
63
    }
64
  }
65

66
  // check for the dispatch info and the upstream task info
67
  int32_t level = pTask->info.taskLevel;
10✔
68
  if (level == TASK_LEVEL__SOURCE) {
10✔
69
    code = streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet, pUpdated);
4✔
70
  } else if (level == TASK_LEVEL__AGG) {
6✔
71
    code = streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet, pUpdated);
2✔
72
    code = streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet, pUpdated);
2✔
73
  } else {  // TASK_LEVEL__SINK
74
    code = streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet, pUpdated);
4✔
75
  }
76

77
  return code;
10✔
78
}
79

80
static void freeItem(void* p) {
×
81
  SStreamContinueExecInfo* pInfo = p;
×
82
  rpcFreeCont(pInfo->msg.pCont);
×
83
}
×
84

85
static void freeUpstreamItem(void* p) {
46,123✔
86
  SStreamUpstreamEpInfo** pInfo = p;
46,123✔
87
  taosMemoryFree(*pInfo);
46,123✔
88
}
46,123✔
89

90
static SStreamUpstreamEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) {
11,658✔
91
  SStreamUpstreamEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamUpstreamEpInfo));
11,658✔
92
  if (pEpInfo == NULL) {
11,658!
93
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
94
    return NULL;
×
95
  }
96

97
  pEpInfo->childId = pTask->info.selfChildId;
11,658✔
98
  pEpInfo->epSet = pTask->info.epSet;
11,658✔
99
  pEpInfo->nodeId = pTask->info.nodeId;
11,658✔
100
  pEpInfo->taskId = pTask->id.taskId;
11,658✔
101
  pEpInfo->stage = -1;
11,658✔
102

103
  return pEpInfo;
11,658✔
104
}
105

106
int32_t tNewStreamTask(int64_t streamId, int8_t taskLevel, SEpSet* pEpset, bool fillHistory, int32_t trigger,
8,104✔
107
                       int64_t triggerParam, SArray* pTaskList, bool hasFillhistory, int8_t subtableWithoutMd5,
108
                       SStreamTask** p) {
109
  *p = NULL;
8,104✔
110

111
  SStreamTask* pTask = (SStreamTask*)taosMemoryCalloc(1, sizeof(SStreamTask));
8,104✔
112
  if (pTask == NULL) {
8,104!
113
    stError("s-task:0x%" PRIx64 " failed malloc new stream task, size:%d, code:%s", streamId,
×
114
            (int32_t)sizeof(SStreamTask), tstrerror(terrno));
115
    return terrno;
×
116
  }
117

118
  pTask->ver = SSTREAM_TASK_VER;
8,104✔
119
  pTask->id.taskId = tGenIdPI32();
8,104✔
120
  pTask->id.streamId = streamId;
8,104✔
121

122
  pTask->info.taskLevel = taskLevel;
8,104✔
123
  pTask->info.fillHistory = fillHistory;
8,104✔
124
  pTask->info.trigger = trigger;
8,104✔
125
  pTask->info.delaySchedParam = triggerParam;
8,104✔
126
  pTask->subtableWithoutMd5 = subtableWithoutMd5;
8,104✔
127

128
  int32_t code = streamCreateStateMachine(pTask);
8,104✔
129
  if (pTask->status.pSM == NULL || code != TSDB_CODE_SUCCESS) {
8,104!
130
    taosMemoryFreeClear(pTask);
×
131
    return code;
×
132
  }
133

134
  char buf[128] = {0};
8,104✔
135
  sprintf(buf, "0x%" PRIx64 "-0x%x", pTask->id.streamId, pTask->id.taskId);
8,104✔
136

137
  pTask->id.idStr = taosStrdup(buf);
8,104✔
138
  if (pTask->id.idStr == NULL) {
8,104!
139
    stError("s-task:0x%x failed to build task id, code: out of memory", pTask->id.taskId);
×
140
    return terrno;
×
141
  }
142

143
  pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
8,104✔
144
  pTask->status.taskStatus = fillHistory ? TASK_STATUS__SCAN_HISTORY : TASK_STATUS__READY;
8,104✔
145
  pTask->inputq.status = TASK_INPUT_STATUS__NORMAL;
8,104✔
146
  pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL;
8,104✔
147

148
  pTask->taskCheckInfo.pList = taosArrayInit(4, sizeof(SDownstreamStatusInfo));
8,104✔
149
  code = taosThreadMutexInit(&pTask->taskCheckInfo.checkInfoLock, NULL);
8,104✔
150
  if (code) {
8,104!
151
    return code;
×
152
  }
153

154
  if (fillHistory && !hasFillhistory) {
8,104!
155
    stError("s-task:0x%x create task failed, due to inconsistent fill-history flag", pTask->id.taskId);
×
156
    return TSDB_CODE_INVALID_PARA;
×
157
  }
158

159
  epsetAssign(&(pTask->info.mnodeEpset), pEpset);
8,104✔
160

161
  code = addToTaskset(pTaskList, pTask);
8,104✔
162
  *p = pTask;
8,104✔
163

164
  return code;
8,104✔
165
}
166

167
int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo) {
139✔
168
  int64_t skip64;
169
  int8_t  skip8;
170
  int32_t skip32;
171
  int16_t skip16;
172
  SEpSet  epSet;
173

174
  if (tStartDecode(pDecoder) < 0) return -1;
139!
175
  if (tDecodeI64(pDecoder, &pChkpInfo->msgVer) < 0) return -1;
278!
176
  // if (ver <= SSTREAM_TASK_INCOMPATIBLE_VER) return -1;
177

178
  if (tDecodeI64(pDecoder, &skip64) < 0) return -1;
139!
179
  if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
139!
180
  if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
139!
181
  if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
139!
182
  if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
139!
183
  if (tDecodeI16(pDecoder, &skip16) < 0) return -1;
139!
184

185
  if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
139!
186
  if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
139!
187

188
  if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
139!
189
  if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
139!
190
  if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1;
139!
191
  if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1;
139!
192

193
  if (tDecodeI64(pDecoder, &pChkpInfo->checkpointId) < 0) return -1;
278!
194
  if (tDecodeI64(pDecoder, &pChkpInfo->checkpointVer) < 0) return -1;
278!
195

196
  tEndDecode(pDecoder);
139✔
197
  return 0;
139✔
198
}
199

200
int32_t tDecodeStreamTaskId(SDecoder* pDecoder, STaskId* pTaskId) {
4✔
201
  int64_t ver;
202
  if (tStartDecode(pDecoder) < 0) return -1;
4!
203
  if (tDecodeI64(pDecoder, &ver) < 0) return -1;
4!
204
  if (ver <= SSTREAM_TASK_INCOMPATIBLE_VER) return -1;
4!
205

206
  if (tDecodeI64(pDecoder, &pTaskId->streamId) < 0) return -1;
8!
207

208
  int32_t taskId = 0;
4✔
209
  if (tDecodeI32(pDecoder, &taskId) < 0) return -1;
4!
210

211
  pTaskId->taskId = taskId;
4✔
212
  tEndDecode(pDecoder);
4✔
213
  return 0;
4✔
214
}
215

216
void tFreeStreamTask(void* pParam) {
32,294✔
217
  char*        p = NULL;
32,294✔
218
  SStreamTask* pTask = pParam;
32,294✔
219
  int32_t      taskId = pTask->id.taskId;
32,294✔
220

221
  STaskExecStatisInfo* pStatis = &pTask->execInfo;
32,294✔
222

223
  ETaskStatus status1 = TASK_STATUS__UNINIT;
32,294✔
224
  streamMutexLock(&pTask->lock);
32,294✔
225
  if (pTask->status.pSM != NULL) {
32,294✔
226
    SStreamTaskState status = streamTaskGetStatus(pTask);
16,223✔
227
    p = status.name;
16,223✔
228
    status1 = status.state;
16,223✔
229
  }
230
  streamMutexUnlock(&pTask->lock);
32,294✔
231

232
  stDebug("start to free s-task:0x%x %p, state:%s, refId:%" PRId64, taskId, pTask, p, pTask->id.refId);
32,294✔
233

234
  SCheckpointInfo* pCkInfo = &pTask->chkInfo;
32,294✔
235
  stDebug("s-task:0x%x task exec summary: create:%" PRId64 ", init:%" PRId64 ", start:%" PRId64
32,294✔
236
          ", updateCount:%d latestUpdate:%" PRId64 ", latestCheckPoint:%" PRId64 ", ver:%" PRId64
237
          " nextProcessVer:%" PRId64 ", checkpointCount:%d",
238
          taskId, pStatis->created, pStatis->checkTs, pStatis->readyTs, pStatis->updateCount, pStatis->latestUpdateTs,
239
          pCkInfo->checkpointId, pCkInfo->checkpointVer, pCkInfo->nextProcessVer, pStatis->checkpoint);
240

241
  if (pTask->schedInfo.pDelayTimer != NULL) {
32,294✔
242
    streamTmrStop(pTask->schedInfo.pDelayTimer);
1,116✔
243
    pTask->schedInfo.pDelayTimer = NULL;
1,116✔
244
  }
245

246
  if (pTask->hTaskInfo.pTimer != NULL) {
32,294✔
247
    streamTmrStop(pTask->hTaskInfo.pTimer);
1,168✔
248
    pTask->hTaskInfo.pTimer = NULL;
1,168✔
249
  }
250

251
  if (pTask->msgInfo.pRetryTmr != NULL) {
32,294✔
252
    streamTmrStop(pTask->msgInfo.pRetryTmr);
3,436✔
253
    pTask->msgInfo.pRetryTmr = NULL;
3,436✔
254
  }
255

256
  if (pTask->inputq.queue) {
32,294✔
257
    streamQueueClose(pTask->inputq.queue, pTask->id.taskId);
8,085✔
258
    pTask->inputq.queue = NULL;
8,085✔
259
  }
260

261
  if (pTask->outputq.queue) {
32,294✔
262
    streamQueueClose(pTask->outputq.queue, pTask->id.taskId);
8,085✔
263
    pTask->outputq.queue = NULL;
8,084✔
264
  }
265

266
  if (pTask->exec.qmsg) {
32,293✔
267
    taosMemoryFree(pTask->exec.qmsg);
17,139✔
268
  }
269

270
  if (pTask->exec.pExecutor) {
32,293✔
271
    qDestroyTask(pTask->exec.pExecutor);
4,252✔
272
    pTask->exec.pExecutor = NULL;
4,252✔
273
  }
274

275
  if (pTask->exec.pWalReader != NULL) {
32,293✔
276
    walCloseReader(pTask->exec.pWalReader);
4,082✔
277
    pTask->exec.pWalReader = NULL;
4,082✔
278
  }
279

280
  streamClearChkptReadyMsg(pTask->chkInfo.pActiveInfo);
32,293✔
281

282
  if (pTask->msgInfo.pData != NULL) {
32,294✔
283
    clearBufferedDispatchMsg(pTask);
35✔
284
  }
285

286
  if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) {
32,294✔
287
    tDeleteSchemaWrapper(pTask->outputInfo.tbSink.pSchemaWrapper);
15,601!
288
    taosMemoryFree(pTask->outputInfo.tbSink.pTSchema);
15,601✔
289
    tSimpleHashCleanup(pTask->outputInfo.tbSink.pTbInfo);
15,601✔
290
    tDeleteSchemaWrapper(pTask->outputInfo.tbSink.pTagSchema);
15,601✔
291
  } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
16,693✔
292
    taosArrayDestroy(pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos);
13,888✔
293
  }
294

295
  streamTaskCleanupCheckInfo(&pTask->taskCheckInfo);
32,294✔
296
  streamFreeTaskState(pTask, pTask->status.removeBackendFiles ? 1 : 0);
32,294✔
297

298
  if (pTask->pNameMap) {
32,294✔
299
    tSimpleHashCleanup(pTask->pNameMap);
1,483✔
300
  }
301

302
  streamDestroyStateMachine(pTask->status.pSM);
32,294✔
303
  pTask->status.pSM = NULL;
32,294✔
304

305
  streamTaskDestroyUpstreamInfo(&pTask->upstreamInfo);
32,294✔
306

307
  taosMemoryFree(pTask->outputInfo.pTokenBucket);
32,294✔
308
  streamMutexDestroy(&pTask->lock);
32,294✔
309

310
  taosArrayDestroy(pTask->msgInfo.pSendInfo);
32,294✔
311
  pTask->msgInfo.pSendInfo = NULL;
32,294✔
312
  streamMutexDestroy(&pTask->msgInfo.lock);
32,294✔
313

314
  taosArrayDestroy(pTask->outputInfo.pNodeEpsetUpdateList);
32,294✔
315
  pTask->outputInfo.pNodeEpsetUpdateList = NULL;
32,294✔
316

317
  if (pTask->id.idStr != NULL) {
32,294✔
318
    taosMemoryFree((void*)pTask->id.idStr);
16,189✔
319
  }
320

321
  streamTaskDestroyActiveChkptInfo(pTask->chkInfo.pActiveInfo);
32,294✔
322
  pTask->chkInfo.pActiveInfo = NULL;
32,294✔
323

324
  taosMemoryFree(pTask);
32,294✔
325
  stDebug("s-task:0x%x free task completed", taskId);
32,294✔
326
}
32,294✔
327

328
void streamFreeTaskState(SStreamTask* pTask, int8_t remove) {
32,293✔
329
  stDebug("s-task:0x%x start to free task state/backend", pTask->id.taskId);
32,293✔
330
  if (pTask->pState != NULL) {
32,294✔
331
    stDebug("s-task:0x%x start to free task state", pTask->id.taskId);
4,252✔
332
    streamStateClose(pTask->pState, remove);
4,252✔
333

334
    if (remove) taskDbSetClearFileFlag(pTask->pBackend);
4,252✔
335
    taskDbRemoveRef(pTask->pBackend);
4,252✔
336
    pTask->pBackend = NULL;
4,252✔
337
    pTask->pState = NULL;
4,252✔
338
  } else {
339
    stDebug("s-task:0x%x task state is NULL, may del backend:%s", pTask->id.taskId,
28,042✔
340
            pTask->backendPath ? pTask->backendPath : "NULL");
341
    if (remove) {
28,042✔
342
      if (pTask->backendPath != NULL) {
1,650!
343
        stDebug("s-task:0x%x task state is NULL, do del backend:%s", pTask->id.taskId, pTask->backendPath);
1,650!
344
        taosRemoveDir(pTask->backendPath);
1,650✔
345
      }
346
    }
347
  }
348

349
  if (pTask->backendPath != NULL) {
32,294✔
350
    taosMemoryFree(pTask->backendPath);
8,085✔
351
    pTask->backendPath = NULL;
8,085✔
352
  }
353
}
32,294✔
354

355
static void setInitialVersionInfo(SStreamTask* pTask, int64_t ver) {
8,210✔
356
  SCheckpointInfo* pChkInfo = &pTask->chkInfo;
8,210✔
357
  SDataRange*      pRange = &pTask->dataRange;
8,210✔
358

359
  // only set the version info for stream tasks without fill-history task
360
  if ((pTask->info.fillHistory == 0) && (!HAS_RELATED_FILLHISTORY_TASK(pTask))) {
8,210✔
361
    pChkInfo->checkpointVer = ver - 1;  // only update when generating checkpoint
2,237✔
362
    pChkInfo->processedVer = ver - 1;   // already processed version
2,237✔
363
    pChkInfo->nextProcessVer = ver;     // next processed version
2,237✔
364

365
    pRange->range.maxVer = ver;
2,237✔
366
    pRange->range.minVer = ver;
2,237✔
367
  } else {
368
    // the initial value of processedVer/nextProcessVer/checkpointVer for stream task with related fill-history task
369
    // is set at the mnode.
370
    if (pTask->info.fillHistory == 1) {
5,973✔
371
      pChkInfo->checkpointVer = pRange->range.maxVer;
2,987✔
372
      pChkInfo->processedVer = pRange->range.maxVer;
2,987✔
373
      pChkInfo->nextProcessVer = pRange->range.maxVer + 1;
2,987✔
374
    } else {
375
      pChkInfo->checkpointVer = pRange->range.minVer - 1;
2,986✔
376
      pChkInfo->processedVer = pRange->range.minVer - 1;
2,986✔
377
      pChkInfo->nextProcessVer = pRange->range.minVer;
2,986✔
378

379
      {  // for compatible purpose, remove it later
380
        if (pRange->range.minVer == 0) {
2,986✔
381
          pChkInfo->checkpointVer = 0;
1,503✔
382
          pChkInfo->processedVer = 0;
1,503✔
383
          pChkInfo->nextProcessVer = 1;
1,503✔
384
          stDebug("s-task:%s update the processedVer to 0 from -1 due to compatible purpose", pTask->id.idStr);
1,503✔
385
        }
386
      }
387
    }
388
  }
389
}
8,210✔
390

391
int32_t streamTaskSetBackendPath(SStreamTask* pTask) {
8,210✔
392
  int64_t streamId = 0;
8,210✔
393
  int32_t taskId = 0;
8,210✔
394

395
  if (pTask->info.fillHistory) {
8,210✔
396
    streamId = pTask->streamTaskId.streamId;
2,987✔
397
    taskId = pTask->streamTaskId.taskId;
2,987✔
398
  } else {
399
    streamId = pTask->id.streamId;
5,223✔
400
    taskId = pTask->id.taskId;
5,223✔
401
  }
402

403
  char    id[128] = {0};
8,210✔
404
  int32_t nBytes = sprintf(id, "0x%" PRIx64 "-0x%x", streamId, taskId);
8,210✔
405
  if (nBytes < 0 || nBytes >= sizeof(id)) {
8,210!
406
    return TSDB_CODE_OUT_OF_BUFFER;
×
407
  }
408

409
  int32_t len = strlen(pTask->pMeta->path);
8,210✔
410
  pTask->backendPath = (char*)taosMemoryMalloc(len + nBytes + 2);
8,210✔
411
  if (pTask->backendPath == NULL) {
8,210!
412
    return terrno;
×
413
  }
414

415
  (void)sprintf(pTask->backendPath, "%s%s%s", pTask->pMeta->path, TD_DIRSEP, id);
8,210✔
416
  stDebug("s-task:%s set backend path:%s", pTask->id.idStr, pTask->backendPath);
8,210✔
417

418
  return 0;
8,210✔
419
}
420

421
int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver) {
8,210✔
422
  int32_t code = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId, &pTask->id.idStr);
8,210✔
423
  if (code) {
8,210!
424
    stError("0x%x failed create stream task id str, code:%s", pTask->id.taskId, tstrerror(code));
×
425
    return code;
×
426
  }
427

428
  pTask->id.refId = 0;
8,210✔
429
  pTask->inputq.status = TASK_INPUT_STATUS__NORMAL;
8,210✔
430
  pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL;
8,210✔
431

432
  int32_t code1 = streamQueueOpen(512 << 10, &pTask->inputq.queue);
8,210✔
433
  int32_t code2 = streamQueueOpen(512 << 10, &pTask->outputq.queue);
8,210✔
434
  if (code1 || code2) {
8,210!
435
    stError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr);
×
436
    return TSDB_CODE_OUT_OF_MEMORY;
×
437
  }
438

439
  pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
8,210✔
440

441
  code = streamCreateStateMachine(pTask);
8,210✔
442
  if (pTask->status.pSM == NULL || code != TSDB_CODE_SUCCESS) {
8,210!
443
    stError("s-task:%s failed create state-machine for stream task, initialization failed, code:%s", pTask->id.idStr,
×
444
            tstrerror(code));
445
    return code;
×
446
  }
447

448
  pTask->execInfo.created = taosGetTimestampMs();
8,210✔
449
  setInitialVersionInfo(pTask, ver);
8,210✔
450

451
  pTask->pMeta = pMeta;
8,210✔
452
  pTask->pMsgCb = pMsgCb;
8,210✔
453
  pTask->msgInfo.pSendInfo = taosArrayInit(4, sizeof(SDispatchEntry));
8,210✔
454
  if (pTask->msgInfo.pSendInfo == NULL) {
8,210!
455
    stError("s-task:%s failed to create sendInfo struct for stream task, code:Out of memory", pTask->id.idStr);
×
456
    return terrno;
×
457
  }
458

459
  code = taosThreadMutexInit(&pTask->msgInfo.lock, NULL);
8,210✔
460
  if (code) {
8,210!
461
    stError("s-task:0x%x failed to init msgInfo mutex, code:%s", pTask->id.taskId, tstrerror(code));
×
462
    return code;
×
463
  }
464

465
  TdThreadMutexAttr attr = {0};
8,210✔
466
  code = taosThreadMutexAttrInit(&attr);
8,210✔
467
  if (code != 0) {
8,210!
468
    stError("s-task:%s initElapsed mutex attr failed, code:%s", pTask->id.idStr, tstrerror(code));
×
469
    return code;
×
470
  }
471

472
  code = taosThreadMutexAttrSetType(&attr, PTHREAD_MUTEX_RECURSIVE);
8,210✔
473
  if (code != 0) {
8,210!
474
    stError("s-task:%s set mutex attr recursive, code:%s", pTask->id.idStr, tstrerror(code));
×
475
    return code;
×
476
  }
477

478
  code = taosThreadMutexInit(&pTask->lock, &attr);
8,210✔
479
  if (code) {
8,210!
480
    return code;
×
481
  }
482

483
  code = taosThreadMutexAttrDestroy(&attr);
8,210✔
484
  if (code) {
8,210!
485
    return code;
×
486
  }
487

488
  streamTaskOpenAllUpstreamInput(pTask);
8,210✔
489

490
  STaskOutputInfo* pOutputInfo = &pTask->outputInfo;
8,210✔
491
  pOutputInfo->pTokenBucket = taosMemoryCalloc(1, sizeof(STokenBucket));
8,210✔
492
  if (pOutputInfo->pTokenBucket == NULL) {
8,210!
493
    stError("s-task:%s failed to prepare the tokenBucket, code:%s", pTask->id.idStr, tstrerror(terrno));
×
494
    return terrno;
×
495
  }
496

497
  // 2MiB per second for sink task
498
  // 50 times sink operator per second
499
  code = streamTaskInitTokenBucket(pOutputInfo->pTokenBucket, 35, 35, tsSinkDataRate, pTask->id.idStr);
8,210✔
500
  if (code) {
8,210!
501
    return code;
×
502
  }
503

504
  pOutputInfo->pNodeEpsetUpdateList = taosArrayInit(4, sizeof(SDownstreamTaskEpset));
8,210✔
505
  if (pOutputInfo->pNodeEpsetUpdateList == NULL) {
8,210!
506
    stError("s-task:%s failed to prepare downstreamUpdateList, code:%s", pTask->id.idStr, tstrerror(terrno));
×
507
    return terrno;
×
508
  }
509

510
  pTask->taskCheckInfo.pList = taosArrayInit(4, sizeof(SDownstreamStatusInfo));
8,210✔
511
  if (pTask->taskCheckInfo.pList == NULL) {
8,210!
512
    stError("s-task:%s failed to prepare taskCheckInfo list, code:%s", pTask->id.idStr, tstrerror(terrno));
×
513
    return terrno;
×
514
  }
515

516
  if (pTask->chkInfo.pActiveInfo == NULL) {
8,210!
517
    code = streamTaskCreateActiveChkptInfo(&pTask->chkInfo.pActiveInfo);
8,210✔
518
    if (code) {
8,210!
519
      stError("s-task:%s failed to create active checkpoint info, code:%s", pTask->id.idStr, tstrerror(code));
×
520
      return code;
×
521
    }
522
  }
523

524
  return streamTaskSetBackendPath(pTask);
8,210✔
525
}
526

527
int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask) {
59,821✔
528
  if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
59,821✔
529
    return 0;
3,785✔
530
  }
531

532
  int32_t type = pTask->outputInfo.type;
56,036✔
533
  if (type == TASK_OUTPUT__TABLE) {
56,036✔
534
    return 0;
183✔
535
  } else if (type == TASK_OUTPUT__FIXED_DISPATCH) {
55,853✔
536
    return 1;
8,409✔
537
  } else {
538
    SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos;
47,444✔
539
    return taosArrayGetSize(vgInfo);
47,444✔
540
  }
541
}
542

543
int32_t streamTaskGetNumOfUpstream(const SStreamTask* pTask) { return taosArrayGetSize(pTask->upstreamInfo.pList); }
10,688✔
544

545
int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask) {
11,658✔
546
  SStreamUpstreamEpInfo* pEpInfo = createStreamTaskEpInfo(pUpstreamTask);
11,658✔
547
  if (pEpInfo == NULL) {
11,658!
548
    return terrno;
×
549
  }
550

551
  if (pTask->upstreamInfo.pList == NULL) {
11,658✔
552
    pTask->upstreamInfo.pList = taosArrayInit(4, POINTER_BYTES);
4,003✔
553
  }
554

555
  void* p = taosArrayPush(pTask->upstreamInfo.pList, &pEpInfo);
11,658✔
556
  return (p == NULL) ? terrno : TSDB_CODE_SUCCESS;
11,658!
557
}
558

559
int32_t streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated) {
6✔
560
  int32_t code = 0;
6✔
561
  char    buf[512] = {0};
6✔
562
  code = epsetToStr(pEpSet, buf, tListLen(buf));  // ignore error since it is only for log file.
6✔
563
  if (code != 0) {  // print error and continue
6!
564
    stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
565
    return code;
×
566
  }
567

568
  int32_t numOfUpstream = taosArrayGetSize(pTask->upstreamInfo.pList);
6✔
569
  for (int32_t i = 0; i < numOfUpstream; ++i) {
10✔
570
    SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i);
8✔
571
    if (pInfo->nodeId == nodeId) {
8✔
572
      bool equal = isEpsetEqual(&pInfo->epSet, pEpSet);
4✔
573
      if (!equal) {
4!
574
        *pUpdated = true;
×
575

576
        char tmp[512] = {0};
×
577
        code = epsetToStr(&pInfo->epSet, tmp, tListLen(tmp));
×
578
        if (code != 0) {  // print error and continue
×
579
          stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
580
          return code;
×
581
        }
582

583
        epsetAssign(&pInfo->epSet, pEpSet);
×
584
        stDebug("s-task:0x%x update the upstreamInfo taskId:0x%x(nodeId:%d) newEpset:%s old:%s", pTask->id.taskId,
×
585
                pInfo->taskId, nodeId, buf, tmp);
586
      } else {
587
        stDebug("s-task:0x%x not update upstreamInfo, since identical, task:0x%x(nodeId:%d) epset:%s", pTask->id.taskId,
4!
588
                pInfo->taskId, nodeId, buf);
589
      }
590

591
      break;
4✔
592
    }
593
  }
594

595
  return code;
6✔
596
}
597

598
void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo) {
32,294✔
599
  if (pUpstreamInfo->pList != NULL) {
32,294✔
600
    taosArrayDestroyEx(pUpstreamInfo->pList, freeUpstreamItem);
28,159✔
601
    pUpstreamInfo->numOfClosed = 0;
28,159✔
602
    pUpstreamInfo->pList = NULL;
28,159✔
603
  }
604
}
32,294✔
605

606
void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask) {
722✔
607
  STaskDispatcherFixed* pDispatcher = &pTask->outputInfo.fixedDispatcher;
722✔
608
  pDispatcher->taskId = pDownstreamTask->id.taskId;
722✔
609
  pDispatcher->nodeId = pDownstreamTask->info.nodeId;
722✔
610
  pDispatcher->epSet = pDownstreamTask->info.epSet;
722✔
611

612
  pTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH;
722✔
613
  pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
722✔
614
}
722✔
615

616
int32_t streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated) {
6✔
617
  char    buf[512] = {0};
6✔
618
  int32_t code = epsetToStr(pEpSet, buf, tListLen(buf));  // ignore the error since only for log files.
6✔
619
  if (code != 0) {                                        // print error and continue
6!
620
    stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
621
    return code;
×
622
  }
623

624
  int32_t id = pTask->id.taskId;
6✔
625
  int8_t  type = pTask->outputInfo.type;
6✔
626

627
  if (type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
6✔
628
    SArray* pVgs = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos;
2✔
629

630
    for (int32_t i = 0; i < taosArrayGetSize(pVgs); i++) {
6✔
631
      SVgroupInfo* pVgInfo = taosArrayGet(pVgs, i);
4✔
632
      if (pVgInfo == NULL) {
4!
633
        continue;
×
634
      }
635

636
      if (pVgInfo->vgId == nodeId) {
4!
637
        bool isEqual = isEpsetEqual(&pVgInfo->epSet, pEpSet);
×
638
        if (!isEqual) {
×
639
          *pUpdated = true;
×
640

641
          char tmp[512] = {0};
×
642
          code = epsetToStr(&pVgInfo->epSet, tmp, tListLen(tmp));
×
643
          if (code != 0) {  // print error and continue
×
644
            stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
645
            return code;
×
646
          }
647

648
          epsetAssign(&pVgInfo->epSet, pEpSet);
×
649
          stDebug("s-task:0x%x update dispatch info, task:0x%x(nodeId:%d) newEpset:%s old:%s", id, pVgInfo->taskId,
×
650
                  nodeId, buf, tmp);
651
        } else {
652
          stDebug("s-task:0x%x not update dispatch info, since identical, task:0x%x(nodeId:%d) epset:%s", id,
×
653
                  pVgInfo->taskId, nodeId, buf);
654
        }
655
        break;
×
656
      }
657
    }
658
  } else if (type == TASK_OUTPUT__FIXED_DISPATCH) {
4!
659
    STaskDispatcherFixed* pDispatcher = &pTask->outputInfo.fixedDispatcher;
4✔
660
    if (pDispatcher->nodeId == nodeId) {
4!
661
      bool equal = isEpsetEqual(&pDispatcher->epSet, pEpSet);
4✔
662
      if (!equal) {
4!
663
        *pUpdated = true;
×
664

665
        char tmp[512] = {0};
×
666
        code = epsetToStr(&pDispatcher->epSet, tmp, tListLen(tmp));
×
667
        if (code != 0) {  // print error and continue
×
668
          stError("%s failed to convert epset to str, code:%s", pTask->id.idStr, tstrerror(code));
×
669
          return code;
×
670
        }
671

672
        epsetAssign(&pDispatcher->epSet, pEpSet);
×
673
        stDebug("s-task:0x%x update dispatch info, task:0x%x(nodeId:%d) newEpset:%s old:%s", id, pDispatcher->taskId,
×
674
                nodeId, buf, tmp);
675
      } else {
676
        stDebug("s-task:0x%x not update dispatch info, since identical, task:0x%x(nodeId:%d) epset:%s", id,
4!
677
                pDispatcher->taskId, nodeId, buf);
678
      }
679
    }
680
  }
681

682
  return code;
6✔
683
}
684

685
int32_t streamTaskStop(SStreamTask* pTask) {
1,799✔
686
  int32_t     vgId = pTask->pMeta->vgId;
1,799✔
687
  int64_t     st = taosGetTimestampMs();
1,799✔
688
  const char* id = pTask->id.idStr;
1,799✔
689

690
  int32_t code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_STOP);
1,799✔
691
  if (code) {
1,799!
692
    stError("failed to handle STOP event, s-task:%s, code:%s", id, tstrerror(code));
×
693
    return code;
×
694
  }
695

696
  if (pTask->info.taskLevel != TASK_LEVEL__SINK && pTask->exec.pExecutor != NULL) {
1,799✔
697
    code = qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
1,024✔
698
    if (code != TSDB_CODE_SUCCESS) {
1,024!
699
      stError("s-task:%s failed to kill task related query handle, code:%s", id, tstrerror(code));
×
700
    }
701
  }
702

703
  while (!streamTaskIsIdle(pTask)) {
1,799!
704
    stDebug("s-task:%s level:%d wait for task to be idle and then close, check again in 100ms", id,
×
705
            pTask->info.taskLevel);
706
    taosMsleep(100);
×
707
  }
708

709
  int64_t el = taosGetTimestampMs() - st;
1,799✔
710
  stDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", vgId, id, el);
1,799✔
711
  return code;
1,799✔
712
}
713

714
bool streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) {
10✔
715
  STaskExecStatisInfo* p = &pTask->execInfo;
10✔
716

717
  int32_t numOfNodes = taosArrayGetSize(pNodeList);
10✔
718
  int64_t prevTs = p->latestUpdateTs;
10✔
719

720
  p->latestUpdateTs = taosGetTimestampMs();
10✔
721
  p->updateCount += 1;
10✔
722
  stDebug("s-task:0x%x update task nodeEp epset, updatedNodes:%d, updateCount:%d, prevTs:%" PRId64, pTask->id.taskId,
10!
723
          numOfNodes, p->updateCount, prevTs);
724

725
  bool updated = false;
10✔
726
  for (int32_t i = 0; i < numOfNodes; ++i) {
20✔
727
    SNodeUpdateInfo* pInfo = taosArrayGet(pNodeList, i);
10✔
728
    if (pInfo == NULL) {
10!
729
      continue;
×
730
    }
731

732
    int32_t code = doUpdateTaskEpset(pTask, pInfo->nodeId, &pInfo->newEp, &updated);
10✔
733
    if (code) {
10!
734
      stError("s-task:0x%x failed to update the task nodeEp epset, code:%s", pTask->id.taskId, tstrerror(code));
×
735
    }
736
  }
737

738
  return updated;
10✔
739
}
740

741
void streamTaskResetUpstreamStageInfo(SStreamTask* pTask) {
8,209✔
742
  if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
8,209✔
743
    return;
4,143✔
744
  }
745

746
  int32_t size = taosArrayGetSize(pTask->upstreamInfo.pList);
4,066✔
747
  for (int32_t i = 0; i < size; ++i) {
15,778✔
748
    SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i);
11,712✔
749
    pInfo->stage = -1;
11,712✔
750
  }
751

752
  stDebug("s-task:%s reset all upstream tasks stage info", pTask->id.idStr);
4,066✔
753
}
754

755
void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) {
13,593✔
756
  int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList);
13,593✔
757
  if (num == 0) {
13,593✔
758
    return;
6,842✔
759
  }
760

761
  for (int32_t i = 0; i < num; ++i) {
27,259✔
762
    SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i);
20,508✔
763
    pInfo->dataAllowed = true;
20,508✔
764
  }
765

766
  pTask->upstreamInfo.numOfClosed = 0;
6,751✔
767
  stDebug("s-task:%s opening up inputQ for %d upstream tasks", pTask->id.idStr, num);
6,751✔
768
}
769

770
void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) {
4,631✔
771
  SStreamUpstreamEpInfo* pInfo = NULL;
4,631✔
772
  streamTaskGetUpstreamTaskEpInfo(pTask, taskId, &pInfo);
4,631✔
773

774
  if ((pInfo != NULL) && pInfo->dataAllowed) {
4,631!
775
    pInfo->dataAllowed = false;
4,631✔
776
    if (pTask->upstreamInfo.numOfClosed < streamTaskGetNumOfUpstream(pTask)) {
4,631!
777
      int32_t t = atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1);
4,631✔
778
    } else {
779
      stError("s-task:%s not inc closed input, since they have been all closed already", pTask->id.idStr);
×
780
    }
781
  }
782
}
4,631✔
783

784
void streamTaskOpenUpstreamInput(SStreamTask* pTask, int32_t taskId) {
×
785
  SStreamUpstreamEpInfo* pInfo = NULL;
×
786
  streamTaskGetUpstreamTaskEpInfo(pTask, taskId, &pInfo);
×
787

788
  if (pInfo != NULL && (!pInfo->dataAllowed)) {
×
789
    int32_t t = atomic_sub_fetch_32(&pTask->upstreamInfo.numOfClosed, 1);
×
790
    stDebug("s-task:%s open inputQ for upstream:0x%x, remain closed:%d", pTask->id.idStr, taskId, t);
×
791
    pInfo->dataAllowed = true;
×
792
  }
793
}
×
794

795
bool streamTaskIsAllUpstreamClosed(SStreamTask* pTask) {
×
796
  return pTask->upstreamInfo.numOfClosed == taosArrayGetSize(pTask->upstreamInfo.pList);
×
797
}
798

799
bool streamTaskSetSchedStatusWait(SStreamTask* pTask) {
47,083✔
800
  bool ret = false;
47,083✔
801

802
  streamMutexLock(&pTask->lock);
47,083✔
803
  if (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE) {
47,082✔
804
    pTask->status.schedStatus = TASK_SCHED_STATUS__WAITING;
37,436✔
805
    ret = true;
37,436✔
806
  }
807

808
  streamMutexUnlock(&pTask->lock);
47,082✔
809
  return ret;
47,083✔
810
}
811

812
int8_t streamTaskSetSchedStatusActive(SStreamTask* pTask) {
35,967✔
813
  streamMutexLock(&pTask->lock);
35,967✔
814
  int8_t status = pTask->status.schedStatus;
35,968✔
815
  if (status == TASK_SCHED_STATUS__WAITING) {
35,968!
816
    pTask->status.schedStatus = TASK_SCHED_STATUS__ACTIVE;
35,968✔
817
  }
818
  streamMutexUnlock(&pTask->lock);
35,968✔
819

820
  return status;
35,968✔
821
}
822

823
int8_t streamTaskSetSchedStatusInactive(SStreamTask* pTask) {
1,428✔
824
  streamMutexLock(&pTask->lock);
1,428✔
825
  int8_t status = pTask->status.schedStatus;
1,428✔
826
  pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
1,428✔
827
  streamMutexUnlock(&pTask->lock);
1,428✔
828

829
  return status;
1,428✔
830
}
831

832
int32_t streamTaskClearHTaskAttr(SStreamTask* pTask, int32_t resetRelHalt) {
3,526✔
833
  int32_t      code = 0;
3,526✔
834
  SStreamMeta* pMeta = pTask->pMeta;
3,526✔
835
  SStreamTask* pStreamTask = NULL;
3,526✔
836

837
  if (pTask->info.fillHistory == 0) {
3,526✔
838
    return code;
3,522✔
839
  }
840

841
  code = streamMetaAcquireTaskUnsafe(pMeta, &pTask->streamTaskId, &pStreamTask);
4✔
842
  if (code == 0) {
4!
843
    stDebug("s-task:%s clear the related stream task:0x%x attr to fill-history task", pTask->id.idStr,
×
844
            (int32_t)pTask->streamTaskId.taskId);
845

846
    streamMutexLock(&(pStreamTask->lock));
×
847
    CLEAR_RELATED_FILLHISTORY_TASK(pStreamTask);
×
848

849
    if (resetRelHalt) {
×
850
      stDebug("s-task:0x%" PRIx64 " set the persistent status attr to be ready, prev:%s, status in sm:%s",
×
851
              pTask->streamTaskId.taskId, streamTaskGetStatusStr(pStreamTask->status.taskStatus),
852
              streamTaskGetStatus(pStreamTask).name);
853
      pStreamTask->status.taskStatus = TASK_STATUS__READY;
×
854
    }
855

856
    code = streamMetaSaveTask(pMeta, pStreamTask);
×
857
    streamMutexUnlock(&(pStreamTask->lock));
×
858

859
    streamMetaReleaseTask(pMeta, pStreamTask);
×
860
  }
861

862
  return code;
4✔
863
}
864

865
int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId, int64_t resetRelHalt) {
4✔
866
  SVDropStreamTaskReq* pReq = rpcMallocCont(sizeof(SVDropStreamTaskReq));
4✔
867
  if (pReq == NULL) {
4!
868
    return terrno;
×
869
  }
870

871
  pReq->head.vgId = vgId;
4✔
872
  pReq->taskId = pTaskId->taskId;
4✔
873
  pReq->streamId = pTaskId->streamId;
4✔
874
  pReq->resetRelHalt = resetRelHalt;
4✔
875

876
  SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_DROP, .pCont = pReq, .contLen = sizeof(SVDropStreamTaskReq)};
4✔
877
  int32_t code = tmsgPutToQueue(pMsgCb, WRITE_QUEUE, &msg);
4✔
878
  if (code != TSDB_CODE_SUCCESS) {
4!
879
    stError("vgId:%d failed to send drop task:0x%x msg, code:%s", vgId, pTaskId->taskId, tstrerror(code));
×
880
  } else {
881
    stDebug("vgId:%d build and send drop task:0x%x msg", vgId, pTaskId->taskId);
4!
882
  }
883

884
  return code;
4✔
885
}
886

887
int32_t streamSendChkptReportMsg(SStreamTask* pTask, SCheckpointInfo* pCheckpointInfo, int8_t dropRelHTask) {
2,848✔
888
  int32_t                code = 0;
2,848✔
889
  int32_t                tlen = 0;
2,848✔
890
  int32_t                vgId = pTask->pMeta->vgId;
2,848✔
891
  const char*            id = pTask->id.idStr;
2,848✔
892
  SActiveCheckpointInfo* pActive = pCheckpointInfo->pActiveInfo;
2,848✔
893

894
  SCheckpointReport req = {.streamId = pTask->id.streamId,
2,848✔
895
                           .taskId = pTask->id.taskId,
2,848✔
896
                           .nodeId = vgId,
897
                           .dropHTask = dropRelHTask,
898
                           .transId = pActive->transId,
2,848✔
899
                           .checkpointId = pActive->activeId,
2,848✔
900
                           .checkpointVer = pCheckpointInfo->processedVer,
2,848✔
901
                           .checkpointTs = pCheckpointInfo->startTs};
2,848✔
902

903
  tEncodeSize(tEncodeStreamTaskChkptReport, &req, tlen, code);
2,848!
904
  if (code < 0) {
2,848!
905
    stError("s-task:%s vgId:%d encode stream task checkpoint-report failed, code:%s", id, vgId, tstrerror(code));
×
906
    return -1;
×
907
  }
908

909
  void* buf = rpcMallocCont(tlen);
2,848✔
910
  if (buf == NULL) {
2,848!
911
    stError("s-task:%s vgId:%d encode stream task checkpoint-report msg failed, code:%s", id, vgId,
×
912
            tstrerror(TSDB_CODE_OUT_OF_MEMORY));
913
    return -1;
×
914
  }
915

916
  SEncoder encoder;
917
  tEncoderInit(&encoder, buf, tlen);
2,848✔
918
  if ((code = tEncodeStreamTaskChkptReport(&encoder, &req)) < 0) {
2,848!
919
    rpcFreeCont(buf);
×
920
    tEncoderClear(&encoder);
×
921
    stError("s-task:%s vgId:%d encode stream task checkpoint-report msg failed, code:%s", id, vgId, tstrerror(code));
×
922
    return -1;
×
923
  }
924
  tEncoderClear(&encoder);
2,848✔
925

926
  SRpcMsg msg = {0};
2,848✔
927
  initRpcMsg(&msg, TDMT_MND_STREAM_CHKPT_REPORT, buf, tlen);
2,848✔
928
  stDebug("s-task:%s vgId:%d build and send task checkpoint-report to mnode", id, vgId);
2,848✔
929

930
  return tmsgSendReq(&pTask->info.mnodeEpset, &msg);
2,848✔
931
}
932

933
STaskId streamTaskGetTaskId(const SStreamTask* pTask) {
24,105✔
934
  STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
24,105✔
935
  return id;
24,105✔
936
}
937

938
void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo) {
1,181✔
939
  pInfo->waitInterval = LAUNCH_HTASK_INTERVAL;
1,181✔
940
  pInfo->tickCount = ceil(LAUNCH_HTASK_INTERVAL / WAIT_FOR_MINIMAL_INTERVAL);
1,181✔
941
  pInfo->retryTimes = 0;
1,181✔
942
}
1,181✔
943

944
void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo) {
1,177✔
945
  pInfo->waitInterval *= RETRY_LAUNCH_INTERVAL_INC_RATE;
1,177✔
946
  pInfo->tickCount = ceil(pInfo->waitInterval / WAIT_FOR_MINIMAL_INTERVAL);
1,177✔
947
  pInfo->retryTimes += 1;
1,177✔
948
}
1,177✔
949

950
void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask) {
5,071✔
951
  pEntry->id.streamId = pTask->id.streamId;
5,071✔
952
  pEntry->id.taskId = pTask->id.taskId;
5,071✔
953
  pEntry->stage = -1;
5,071✔
954
  pEntry->nodeId = pTask->info.nodeId;
5,071✔
955
  pEntry->status = TASK_STATUS__STOP;
5,071✔
956
}
5,071✔
957

958
void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc) {
14,328✔
959
  pDst->stage = pSrc->stage;
14,328✔
960
  pDst->inputQUsed = pSrc->inputQUsed;
14,328✔
961
  pDst->inputRate = pSrc->inputRate;
14,328✔
962
  pDst->procsTotal = pSrc->procsTotal;
14,328✔
963
  pDst->procsThroughput = pSrc->procsThroughput;
14,328✔
964
  pDst->outputTotal = pSrc->outputTotal;
14,328✔
965
  pDst->outputThroughput = pSrc->outputThroughput;
14,328✔
966
  pDst->processedVer = pSrc->processedVer;
14,328✔
967
  pDst->verRange = pSrc->verRange;
14,328✔
968
  pDst->sinkQuota = pSrc->sinkQuota;
14,328✔
969
  pDst->sinkDataSize = pSrc->sinkDataSize;
14,328✔
970
  pDst->checkpointInfo = pSrc->checkpointInfo;
14,328✔
971
  pDst->startCheckpointId = pSrc->startCheckpointId;
14,328✔
972
  pDst->startCheckpointVer = pSrc->startCheckpointVer;
14,328✔
973
  pDst->status = pSrc->status;
14,328✔
974

975
  pDst->startTime = pSrc->startTime;
14,328✔
976
  pDst->hTaskId = pSrc->hTaskId;
14,328✔
977
}
14,328✔
978

979
STaskStatusEntry streamTaskGetStatusEntry(SStreamTask* pTask) {
14,786✔
980
  SStreamMeta*         pMeta = pTask->pMeta;
14,786✔
981
  STaskExecStatisInfo* pExecInfo = &pTask->execInfo;
14,786✔
982

983
  STaskStatusEntry entry = {
44,358✔
984
      .id = streamTaskGetTaskId(pTask),
14,786✔
985
      .status = streamTaskGetStatus(pTask).state,
14,786✔
986
      .nodeId = pMeta->vgId,
14,786✔
987
      .stage = pMeta->stage,
14,786✔
988

989
      .inputQUsed = SIZE_IN_MiB(streamQueueGetItemSize(pTask->inputq.queue)),
14,786✔
990
      .startTime = pExecInfo->readyTs,
14,786✔
991
      .checkpointInfo.latestId = pTask->chkInfo.checkpointId,
14,786✔
992
      .checkpointInfo.latestVer = pTask->chkInfo.checkpointVer,
14,786✔
993
      .checkpointInfo.latestTime = pTask->chkInfo.checkpointTime,
14,786✔
994
      .checkpointInfo.latestSize = 0,
995
      .checkpointInfo.remoteBackup = 0,
996
      .checkpointInfo.consensusChkptId = 0,
997
      .checkpointInfo.consensusTs = 0,
998
      .hTaskId = pTask->hTaskInfo.id.taskId,
14,786✔
999
      .procsTotal = SIZE_IN_MiB(pExecInfo->inputDataSize),
14,786✔
1000
      .outputTotal = SIZE_IN_MiB(pExecInfo->outputDataSize),
14,786✔
1001
      .procsThroughput = SIZE_IN_KiB(pExecInfo->procsThroughput),
14,786✔
1002
      .outputThroughput = SIZE_IN_KiB(pExecInfo->outputThroughput),
14,786✔
1003
      .startCheckpointId = pExecInfo->startCheckpointId,
14,786✔
1004
      .startCheckpointVer = pExecInfo->startCheckpointVer,
14,786✔
1005
  };
1006
  return entry;
14,786✔
1007
}
1008

1009
static int32_t taskPauseCallback(SStreamTask* pTask, void* param) {
709✔
1010
  SStreamMeta* pMeta = pTask->pMeta;
709✔
1011
  int32_t      code = 0;
709✔
1012

1013
  int32_t num = atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1);
709✔
1014
  stInfo("vgId:%d s-task:%s pause stream task. paused task num:%d", pMeta->vgId, pTask->id.idStr, num);
709!
1015

1016
  // in case of fill-history task, stop the tsdb file scan operation.
1017
  if (pTask->info.fillHistory == 1) {
709!
1018
    void* pExecutor = pTask->exec.pExecutor;
×
1019
    code = qKillTask(pExecutor, TSDB_CODE_SUCCESS);
×
1020
  }
1021

1022
  stDebug("vgId:%d s-task:%s set pause flag and pause task", pMeta->vgId, pTask->id.idStr);
709!
1023
  return code;
709✔
1024
}
1025

1026
void streamTaskPause(SStreamTask* pTask) {
708✔
1027
  int32_t code = streamTaskHandleEventAsync(pTask->status.pSM, TASK_EVENT_PAUSE, taskPauseCallback, NULL);
708✔
1028
  if (code) {
709!
1029
    stError("s-task:%s failed handle pause event async, code:%s", pTask->id.idStr, tstrerror(code));
×
1030
  }
1031
}
709✔
1032

1033
void streamTaskResume(SStreamTask* pTask) {
702✔
1034
  SStreamTaskState prevState = streamTaskGetStatus(pTask);
702✔
1035

1036
  SStreamMeta* pMeta = pTask->pMeta;
702✔
1037
  int32_t      code = streamTaskRestoreStatus(pTask);
702✔
1038
  if (code == TSDB_CODE_SUCCESS) {
702!
1039
    char*   pNew = streamTaskGetStatus(pTask).name;
702✔
1040
    int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1);
702✔
1041
    stInfo("s-task:%s status:%s resume from %s, paused task(s):%d", pTask->id.idStr, pNew, prevState.name, num);
702!
1042
  } else {
1043
    stInfo("s-task:%s status:%s no need to resume, paused task(s):%d", pTask->id.idStr, prevState.name,
×
1044
           pMeta->numOfPausedTasks);
1045
  }
1046
}
702✔
1047

1048
bool streamTaskIsSinkTask(const SStreamTask* pTask) { return pTask->info.taskLevel == TASK_LEVEL__SINK; }
42,717✔
1049

1050
// this task must success
1051
int32_t streamTaskSendCheckpointReq(SStreamTask* pTask) {
2,893✔
1052
  int32_t     code;
1053
  int32_t     tlen = 0;
2,893✔
1054
  int32_t     vgId = pTask->pMeta->vgId;
2,893✔
1055
  const char* id = pTask->id.idStr;
2,893✔
1056

1057
  SStreamTaskCheckpointReq req = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId, .nodeId = vgId};
2,893✔
1058
  tEncodeSize(tEncodeStreamTaskCheckpointReq, &req, tlen, code);
2,893!
1059
  if (code < 0) {
2,893!
1060
    stError("s-task:%s vgId:%d encode stream task req checkpoint failed, code:%s", id, vgId, tstrerror(code));
×
1061
    return TSDB_CODE_INVALID_MSG;
×
1062
  }
1063

1064
  void* buf = rpcMallocCont(tlen);
2,893✔
1065
  if (buf == NULL) {
2,893!
1066
    stError("s-task:%s vgId:%d encode stream task req checkpoint msg failed, code:Out of memory", id, vgId);
×
1067
    return terrno;
×
1068
  }
1069

1070
  SEncoder encoder;
1071
  tEncoderInit(&encoder, buf, tlen);
2,893✔
1072
  if ((code = tEncodeStreamTaskCheckpointReq(&encoder, &req)) < 0) {
2,893!
1073
    rpcFreeCont(buf);
×
1074
    tEncoderClear(&encoder);
×
1075
    stError("s-task:%s vgId:%d encode stream task req checkpoint msg failed, code:%s", id, vgId, tstrerror(code));
×
1076
    return code;
×
1077
  }
1078

1079
  tEncoderClear(&encoder);
2,893✔
1080

1081
  SRpcMsg msg = {0};
2,893✔
1082
  initRpcMsg(&msg, TDMT_MND_STREAM_REQ_CHKPT, buf, tlen);
2,893✔
1083
  stDebug("s-task:%s vgId:%d build and send task checkpoint req", id, vgId);
2,893✔
1084

1085
  return tmsgSendReq(&pTask->info.mnodeEpset, &msg);
2,893✔
1086
}
1087

1088
void streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId, SStreamUpstreamEpInfo** pEpInfo) {
41,336✔
1089
  *pEpInfo = NULL;
41,336✔
1090

1091
  int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList);
41,336✔
1092
  for (int32_t i = 0; i < num; ++i) {
87,391!
1093
    SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i);
87,391✔
1094
    if (pInfo == NULL) {
87,391!
1095
      return;
×
1096
    }
1097

1098
    if (pInfo->taskId == taskId) {
87,391✔
1099
      *pEpInfo = pInfo;
41,336✔
1100
      return;
41,336✔
1101
    }
1102
  }
1103

1104
  stError("s-task:%s failed to find upstream task:0x%x", pTask->id.idStr, taskId);
×
1105
}
1106

1107
SEpSet* streamTaskGetDownstreamEpInfo(SStreamTask* pTask, int32_t taskId) {
×
1108
  if (pTask->info.taskLevel == TASK_OUTPUT__FIXED_DISPATCH) {
×
1109
    if (pTask->outputInfo.fixedDispatcher.taskId == taskId) {
×
1110
      return &pTask->outputInfo.fixedDispatcher.epSet;
×
1111
    }
1112
  } else if (pTask->info.taskLevel == TASK_OUTPUT__SHUFFLE_DISPATCH) {
×
1113
    SArray* pList = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos;
×
1114
    for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) {
×
1115
      SVgroupInfo* pVgInfo = taosArrayGet(pList, i);
×
1116
      if (pVgInfo == NULL) {
×
1117
        continue;
×
1118
      }
1119

1120
      if (pVgInfo->taskId == taskId) {
×
1121
        return &pVgInfo->epSet;
×
1122
      }
1123
    }
1124
  }
1125

1126
  return NULL;
×
1127
}
1128

1129
int32_t createStreamTaskIdStr(int64_t streamId, int32_t taskId, const char** pId) {
8,210✔
1130
  char buf[128] = {0};
8,210✔
1131
  sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId);
8,210✔
1132
  *pId = taosStrdup(buf);
8,210✔
1133

1134
  if (*pId == NULL) {
8,210!
1135
    return terrno;
×
1136
  } else {
1137
    return TSDB_CODE_SUCCESS;
8,210✔
1138
  }
1139
}
1140

1141
static int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq) {
444✔
1142
  int32_t           code;
1143
  SStreamDataBlock* pData;
1144

1145
  code = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SStreamDataBlock), (void**)&pData);
444✔
1146
  if (code) {
444!
1147
    stError("s-task:%s failed to allocated retrieve-block", pTask->id.idStr);
×
1148
    return terrno = code;
×
1149
  }
1150

1151
  pData->type = STREAM_INPUT__DATA_RETRIEVE;
444✔
1152
  pData->srcVgId = 0;
444✔
1153

1154
  code = streamRetrieveReqToData(pReq, pData, pTask->id.idStr);
444✔
1155
  if (code != TSDB_CODE_SUCCESS) {
444!
1156
    stError("s-task:%s failed to convert retrieve-data to block, code:%s", pTask->id.idStr, tstrerror(code));
×
1157
    taosFreeQitem(pData);
×
1158
    return code;
×
1159
  }
1160

1161
  code = streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pData);
444✔
1162
  if (code != TSDB_CODE_SUCCESS) {
444!
1163
    stError("s-task:%s failed to put retrieve-block into inputQ, inputQ is full, discard the retrieve msg",
×
1164
            pTask->id.idStr);
1165
  }
1166

1167
  return code;
444✔
1168
}
1169

1170
int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq) {
444✔
1171
  int32_t code = streamTaskEnqueueRetrieve(pTask, pReq);
444✔
1172
  if (code != 0) {
444!
1173
    return code;
×
1174
  }
1175
  return streamTrySchedExec(pTask);
444✔
1176
}
1177

1178
void streamTaskSetRemoveBackendFiles(SStreamTask* pTask) { pTask->status.removeBackendFiles = true; }
3,526✔
1179

1180
void streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId) {
×
1181
  if (pTransId != NULL) {
×
1182
    *pTransId = pTask->chkInfo.pActiveInfo->transId;
×
1183
  }
1184

1185
  if (pCheckpointId != NULL) {
×
1186
    *pCheckpointId = pTask->chkInfo.pActiveInfo->activeId;
×
1187
  }
1188
}
×
1189

1190
int32_t streamTaskSetActiveCheckpointInfo(SStreamTask* pTask, int64_t activeCheckpointId) {
28✔
1191
  pTask->chkInfo.pActiveInfo->activeId = activeCheckpointId;
28✔
1192
  return TSDB_CODE_SUCCESS;
28✔
1193
}
1194

1195
void streamTaskSetFailedChkptInfo(SStreamTask* pTask, int32_t transId, int64_t checkpointId) {
×
1196
  pTask->chkInfo.pActiveInfo->transId = transId;
×
1197
  pTask->chkInfo.pActiveInfo->activeId = checkpointId;
×
1198
  pTask->chkInfo.pActiveInfo->failedId = checkpointId;
×
1199
  stDebug("s-task:%s set failed checkpointId:%"PRId64, pTask->id.idStr, checkpointId);
×
1200
}
×
1201

1202
int32_t streamTaskCreateActiveChkptInfo(SActiveCheckpointInfo** pRes) {
8,244✔
1203
  SActiveCheckpointInfo* pInfo = taosMemoryCalloc(1, sizeof(SActiveCheckpointInfo));
8,244✔
1204
  if (pInfo == NULL) {
8,244!
1205
    return terrno;
×
1206
  }
1207

1208
  int32_t code = taosThreadMutexInit(&pInfo->lock, NULL);
8,244✔
1209
  if (code != TSDB_CODE_SUCCESS) {
8,244!
1210
    return code;
×
1211
  }
1212

1213
  pInfo->pDispatchTriggerList = taosArrayInit(4, sizeof(STaskTriggerSendInfo));
8,244✔
1214
  pInfo->pReadyMsgList = taosArrayInit(4, sizeof(STaskCheckpointReadyInfo));
8,244✔
1215
  pInfo->pCheckpointReadyRecvList = taosArrayInit(4, sizeof(STaskDownstreamReadyInfo));
8,243✔
1216

1217
  *pRes = pInfo;
8,244✔
1218
  return code;
8,244✔
1219
}
1220

1221
void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) {
32,294✔
1222
  if (pInfo == NULL) {
32,294✔
1223
    return;
24,175✔
1224
  }
1225

1226
  streamMutexDestroy(&pInfo->lock);
8,119✔
1227
  taosArrayDestroy(pInfo->pDispatchTriggerList);
8,119✔
1228
  pInfo->pDispatchTriggerList = NULL;
8,119✔
1229
  taosArrayDestroy(pInfo->pReadyMsgList);
8,119✔
1230
  pInfo->pReadyMsgList = NULL;
8,119✔
1231
  taosArrayDestroy(pInfo->pCheckpointReadyRecvList);
8,119✔
1232
  pInfo->pCheckpointReadyRecvList = NULL;
8,119✔
1233

1234
  SStreamTmrInfo* pTriggerTmr = &pInfo->chkptTriggerMsgTmr;
8,119✔
1235
  if (pTriggerTmr->tmrHandle != NULL) {
8,119✔
1236
    streamTmrStop(pTriggerTmr->tmrHandle);
1,411✔
1237
    pTriggerTmr->tmrHandle = NULL;
1,411✔
1238
  }
1239

1240
  SStreamTmrInfo* pReadyTmr = &pInfo->chkptReadyMsgTmr;
8,119✔
1241
  if (pReadyTmr->tmrHandle != NULL) {
8,119✔
1242
    streamTmrStop(pReadyTmr->tmrHandle);
1,399✔
1243
    pReadyTmr->tmrHandle = NULL;
1,399✔
1244
  }
1245

1246
  taosMemoryFree(pInfo);
8,119✔
1247
}
1248

1249
//NOTE: clear the checkpoint id, and keep the failed id
1250
void streamTaskClearActiveInfo(SActiveCheckpointInfo* pInfo) {
2,479✔
1251
  pInfo->activeId = 0;
2,479✔
1252
  pInfo->transId = 0;
2,479✔
1253
  pInfo->allUpstreamTriggerRecv = 0;
2,479✔
1254
  pInfo->dispatchTrigger = false;
2,479✔
1255
//  pInfo->failedId = 0;
1256

1257
  taosArrayClear(pInfo->pDispatchTriggerList);
2,479✔
1258
  taosArrayClear(pInfo->pCheckpointReadyRecvList);
2,479✔
1259
}
2,479✔
1260

1261
const char* streamTaskGetExecType(int32_t type) {
104,981✔
1262
  switch (type) {
104,981!
1263
    case STREAM_EXEC_T_EXTRACT_WAL_DATA:
44,426✔
1264
      return "scan-wal-file";
44,426✔
1265
    case STREAM_EXEC_T_START_ALL_TASKS:
9,483✔
1266
      return "start-all-tasks";
9,483✔
1267
    case STREAM_EXEC_T_START_ONE_TASK:
5,131✔
1268
      return "start-one-task";
5,131✔
1269
    case STREAM_EXEC_T_RESTART_ALL_TASKS:
3✔
1270
      return "restart-all-tasks";
3✔
1271
    case STREAM_EXEC_T_STOP_ALL_TASKS:
4,779✔
1272
      return "stop-all-tasks";
4,779✔
1273
    case STREAM_EXEC_T_RESUME_TASK:
6,502✔
1274
      return "resume-task-from-idle";
6,502✔
1275
    case STREAM_EXEC_T_ADD_FAILED_TASK:
×
1276
      return "record-start-failed-task";
×
1277
    case 0:
34,729✔
1278
      return "exec-all-tasks";
34,729✔
1279
    default:
×
1280
      return "invalid-exec-type";
×
1281
  }
1282
}
1283

1284
int32_t streamTaskAllocRefId(SStreamTask* pTask, int64_t** pRefId) {
23,093✔
1285
  *pRefId = taosMemoryMalloc(sizeof(int64_t));
23,093✔
1286
  if (*pRefId != NULL) {
23,093!
1287
    **pRefId = pTask->id.refId;
23,093✔
1288
    int32_t code = metaRefMgtAdd(pTask->pMeta->vgId, *pRefId);
23,093✔
1289
    if (code != 0) {
23,093!
1290
      stError("s-task:%s failed to add refId:%" PRId64 " into refId-mgmt, code:%s", pTask->id.idStr, pTask->id.refId,
×
1291
              tstrerror(code));
1292
    }
1293
    return code;
23,093✔
1294
  } else {
1295
    stError("s-task:%s failed to alloc new ref id, code:%s", pTask->id.idStr, tstrerror(terrno));
×
1296
    return terrno;
×
1297
  }
1298
}
1299

1300
void streamTaskFreeRefId(int64_t* pRefId) {
20,700✔
1301
  if (pRefId == NULL) {
20,700✔
1302
    return;
1,361✔
1303
  }
1304

1305
  metaRefMgtRemove(pRefId);
19,339✔
1306
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc