• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3593

24 Jan 2025 08:57AM UTC coverage: 63.239% (-0.3%) from 63.546%
#3593

push

travis-ci

web-flow
Merge pull request #29638 from taosdata/docs/TS-5846-3.0

enh: TDengine modify taosBenchmark new query rule cases and add doc

140619 of 285630 branches covered (49.23%)

Branch coverage included in aggregate %.

218877 of 282844 relevant lines covered (77.38%)

19647377.39 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.3
/source/dnode/vnode/src/tq/tq.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#include "tq.h"
17
#include "osDef.h"
18
#include "taoserror.h"
19
#include "tqCommon.h"
20
#include "tstream.h"
21
#include "vnd.h"
22

23
// 0: not init
24
// 1: already inited
25
// 2: wait to be inited or cleanup
26
static int32_t tqInitialize(STQ* pTq);
27

28
static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return pHandle != NULL ? TMQ_HANDLE_STATUS_EXEC == pHandle->status : true; }
65,116!
29
static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) { if (pHandle != NULL) pHandle->status = TMQ_HANDLE_STATUS_EXEC; }
62,534✔
30
static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) { if (pHandle != NULL) pHandle->status = TMQ_HANDLE_STATUS_IDLE; }
62,541✔
31

32
void tqDestroyTqHandle(void* data) {
1,659✔
33
  if (data == NULL) return;
1,659!
34
  STqHandle* pData = (STqHandle*)data;
1,659✔
35
  qDestroyTask(pData->execHandle.task);
1,659✔
36

37
  if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
1,659✔
38
    taosMemoryFreeClear(pData->execHandle.execCol.qmsg);
1,315!
39
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) {
344✔
40
    tqReaderClose(pData->execHandle.pTqReader);
258✔
41
    walCloseReader(pData->pWalReader);
258✔
42
    taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid);
258✔
43
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
86!
44
    walCloseReader(pData->pWalReader);
86✔
45
    tqReaderClose(pData->execHandle.pTqReader);
86✔
46
    taosMemoryFreeClear(pData->execHandle.execTb.qmsg);
86!
47
    nodesDestroyNode(pData->execHandle.execTb.node);
86✔
48
  }
49
  if (pData->msg != NULL) {
1,659!
50
    rpcFreeCont(pData->msg->pCont);
×
51
    taosMemoryFree(pData->msg);
×
52
    pData->msg = NULL;
×
53
  }
54
  if (pData->block != NULL) {
1,659!
55
    blockDataDestroy(pData->block);
×
56
  }
57
  if (pData->pRef) {
1,659✔
58
    walCloseRef(pData->pRef->pWal, pData->pRef->refId);
1,614✔
59
  }
60
}
61

62
static bool tqOffsetEqual(const STqOffset* pLeft, const STqOffset* pRight) {
5,964✔
63
  if (pLeft == NULL || pRight == NULL) {
5,964!
64
    return false;
×
65
  }
66
  return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
11,591✔
67
         pLeft->val.version == pRight->val.version;
5,627✔
68
}
69

70
int32_t tqOpen(const char* path, SVnode* pVnode) {
12,180✔
71
  if (path == NULL || pVnode == NULL) {
12,180!
72
    return TSDB_CODE_INVALID_PARA;
×
73
  }
74
  STQ* pTq = taosMemoryCalloc(1, sizeof(STQ));
12,196!
75
  if (pTq == NULL) {
12,195!
76
    return terrno;
×
77
  }
78

79
  pVnode->pTq = pTq;
12,195✔
80
  pTq->pVnode = pVnode;
12,195✔
81

82
  pTq->path = taosStrdup(path);
12,195!
83
  if (pTq->path == NULL) {
12,194!
84
    return terrno;
×
85
  }
86

87
  pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
12,194✔
88
  if (pTq->pHandle == NULL) {
12,195!
89
    return terrno;
×
90
  }
91
  taosHashSetFreeFp(pTq->pHandle, tqDestroyTqHandle);
12,195✔
92

93
  taosInitRWLatch(&pTq->lock);
12,195✔
94

95
  pTq->pPushMgr = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK);
12,195✔
96
  if (pTq->pPushMgr == NULL) {
12,191!
97
    return terrno;
×
98
  }
99

100
  pTq->pCheckInfo = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
12,191✔
101
  if (pTq->pCheckInfo == NULL) {
12,194!
102
    return terrno;
×
103
  }
104
  taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo);
12,194✔
105

106
  pTq->pOffset = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_ENTRY_LOCK);
12,194✔
107
  if (pTq->pOffset == NULL) {
12,190!
108
    return terrno;
×
109
  }
110
  taosHashSetFreeFp(pTq->pOffset, (FDelete)tDeleteSTqOffset);
12,190✔
111

112
  return tqInitialize(pTq);
12,190✔
113
}
114

115
int32_t tqInitialize(STQ* pTq) {
12,190✔
116
  if (pTq == NULL) {
12,190!
117
    return TSDB_CODE_INVALID_PARA;
×
118
  }
119
  int32_t vgId = TD_VID(pTq->pVnode);
12,190✔
120
  int32_t code = streamMetaOpen(pTq->path, pTq, tqBuildStreamTask, tqExpandStreamTask, vgId, -1,
12,190✔
121
                                tqStartTaskCompleteCallback, &pTq->pStreamMeta);
122
  if (code != TSDB_CODE_SUCCESS) {
12,198!
123
    return code;
×
124
  }
125

126
  streamMetaLoadAllTasks(pTq->pStreamMeta);
12,198✔
127
  return tqMetaOpen(pTq);
12,198✔
128
}
129

130
void tqClose(STQ* pTq) {
12,199✔
131
  qDebug("start to close tq");
12,199✔
132
  if (pTq == NULL) {
12,199!
133
    return;
×
134
  }
135

136
  int32_t vgId = 0;
12,199✔
137
  if (pTq->pVnode != NULL) {
12,199✔
138
    vgId = TD_VID(pTq->pVnode);
12,198✔
139
  } else if (pTq->pStreamMeta != NULL) {
1!
140
    vgId = pTq->pStreamMeta->vgId;
×
141
  }
142

143
  // close the stream meta firstly
144
  streamMetaClose(pTq->pStreamMeta);
12,199✔
145

146
  void* pIter = taosHashIterate(pTq->pPushMgr, NULL);
12,199✔
147
  while (pIter) {
12,229✔
148
    STqHandle* pHandle = *(STqHandle**)pIter;
30✔
149
    if (pHandle->msg != NULL) {
30!
150
      tqPushEmptyDataRsp(pHandle, vgId);
30✔
151
      rpcFreeCont(pHandle->msg->pCont);
30✔
152
      taosMemoryFree(pHandle->msg);
30!
153
      pHandle->msg = NULL;
30✔
154
    }
155
    pIter = taosHashIterate(pTq->pPushMgr, pIter);
30✔
156
  }
157

158
  taosHashCleanup(pTq->pHandle);
12,199✔
159
  taosHashCleanup(pTq->pPushMgr);
12,199✔
160
  taosHashCleanup(pTq->pCheckInfo);
12,199✔
161
  taosHashCleanup(pTq->pOffset);
12,199✔
162
  taosMemoryFree(pTq->path);
12,199!
163
  tqMetaClose(pTq);
12,199✔
164
  qDebug("vgId:%d end to close tq", vgId);
12,198✔
165

166
#if 0
167
  streamMetaFreeTQDuringScanWalError(pTq);
168
#endif
169

170
  taosMemoryFree(pTq);
12,199!
171
}
172

173
void tqNotifyClose(STQ* pTq) {
12,176✔
174
  if (pTq == NULL) {
12,176!
175
    return;
×
176
  }
177
  streamMetaNotifyClose(pTq->pStreamMeta);
12,176✔
178
}
179

180
void tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId) {
682✔
181
  if (pHandle == NULL) {
682!
182
    return;
×
183
  }
184
  int32_t    code = 0;
682✔
185
  SMqPollReq req = {0};
682✔
186
  code = tDeserializeSMqPollReq(pHandle->msg->pCont, pHandle->msg->contLen, &req);
682✔
187
  if (code < 0) {
682!
188
    tqError("tDeserializeSMqPollReq %d failed, code:%d", pHandle->msg->contLen, code);
×
189
    return;
×
190
  }
191

192
  SMqDataRsp dataRsp = {0};
682✔
193
  code = tqInitDataRsp(&dataRsp, req.reqOffset);
682✔
194
  if (code != 0) {
682!
195
    tqError("tqInitDataRsp failed, code:%d", code);
×
196
    return;
×
197
  }
198
  dataRsp.blockNum = 0;
682✔
199
  char buf[TSDB_OFFSET_LEN] = {0};
682✔
200
  (void)tFormatOffset(buf, TSDB_OFFSET_LEN, &dataRsp.reqOffset);
682✔
201
  tqInfo("tqPushEmptyDataRsp to consumer:0x%" PRIx64 " vgId:%d, offset:%s,QID:0x%" PRIx64, req.consumerId, vgId, buf,
682!
202
         req.reqId);
203

204
  code = tqSendDataRsp(pHandle, pHandle->msg, &req, &dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
682✔
205
  if (code != 0) {
682!
206
    tqError("tqSendDataRsp failed, code:%d", code);
×
207
  }
208
  tDeleteMqDataRsp(&dataRsp);
682✔
209
}
210

211
int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type,
41,421✔
212
                      int32_t vgId) {
213
  if (pHandle == NULL || pMsg == NULL || pReq == NULL || pRsp == NULL) {
41,421!
214
    return TSDB_CODE_INVALID_PARA;
×
215
  }
216
  int64_t sver = 0, ever = 0;
41,421✔
217
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
41,421✔
218

219
  char buf1[TSDB_OFFSET_LEN] = {0};
41,421✔
220
  char buf2[TSDB_OFFSET_LEN] = {0};
41,421✔
221
  (void)tFormatOffset(buf1, TSDB_OFFSET_LEN, &(pRsp->reqOffset));
41,421✔
222
  (void)tFormatOffset(buf2, TSDB_OFFSET_LEN, &(pRsp->rspOffset));
41,420✔
223

224
  tqDebug("tmq poll vgId:%d consumer:0x%" PRIx64 " (epoch %d) start to send rsp, block num:%d, req:%s, rsp:%s,QID:0x%" PRIx64,
41,421!
225
          vgId, pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId);
226

227
  return tqDoSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type, sver, ever);
41,421✔
228
}
229

230
int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
6,925✔
231
  if (pTq == NULL) {
6,925!
232
    return TSDB_CODE_INVALID_PARA;
×
233
  }
234
  SMqVgOffset vgOffset = {0};
6,925✔
235
  int32_t     vgId = TD_VID(pTq->pVnode);
6,925✔
236

237
  int32_t  code = 0;
6,925✔
238
  SDecoder decoder;
239
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
6,925✔
240
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
6,931!
241
    code = TSDB_CODE_INVALID_MSG;
×
242
    goto end;
×
243
  }
244

245
  tDecoderClear(&decoder);
6,930✔
246

247
  STqOffset* pOffset = &vgOffset.offset;
6,925✔
248

249
  if (pOffset->val.type == TMQ_OFFSET__SNAPSHOT_DATA || pOffset->val.type == TMQ_OFFSET__SNAPSHOT_META) {
6,925!
250
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:snapshot) uid:%" PRId64 ", ts:%" PRId64,
404!
251
            pOffset->subKey, vgId, pOffset->val.uid, pOffset->val.ts);
252
  } else if (pOffset->val.type == TMQ_OFFSET__LOG) {
6,521✔
253
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:log) version:%" PRId64, pOffset->subKey, vgId,
6,520✔
254
            pOffset->val.version);
255
  } else {
256
    tqError("invalid commit offset type:%d", pOffset->val.type);
1!
257
    code = TSDB_CODE_INVALID_MSG;
×
258
    goto end;
×
259
  }
260

261
  STqOffset* pSavedOffset = NULL;
6,931✔
262
  code = tqMetaGetOffset(pTq, pOffset->subKey, &pSavedOffset);
6,931✔
263
  if (code == 0 && tqOffsetEqual(pOffset, pSavedOffset)) {
6,934✔
264
    tqInfo("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64,
4!
265
           vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version);
266
    goto end;  // no need to update the offset value
4✔
267
  }
268

269
  // save the new offset value
270
  if (taosHashPut(pTq->pOffset, pOffset->subKey, strlen(pOffset->subKey), pOffset, sizeof(STqOffset))) {
6,930!
271
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
272
    return -1;
×
273
  }
274

275
  if (tqMetaSaveInfo(pTq, pTq->pOffsetStore, pOffset->subKey, strlen(pOffset->subKey), msg,
6,930!
276
                     msgLen >= sizeof(vgOffset.consumerId) ? msgLen - sizeof(vgOffset.consumerId) : 0) < 0) {
277
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
278
    return -1;
×
279
  }
280

281
  return 0;
6,929✔
282
end:
4✔
283
  tOffsetDestroy(&vgOffset.offset.val);
4✔
284
  return code;
4✔
285
}
286

287
int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg) {
24✔
288
  if (pTq == NULL || pMsg == NULL) {
24!
289
    return TSDB_CODE_INVALID_PARA;
×
290
  }
291
  SMqSeekReq req = {0};
24✔
292
  int32_t    vgId = TD_VID(pTq->pVnode);
24✔
293
  SRpcMsg    rsp = {.info = pMsg->info};
24✔
294
  int        code = 0;
24✔
295

296
  if (tDeserializeSMqSeekReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
24!
297
    code = TSDB_CODE_OUT_OF_MEMORY;
×
298
    goto end;
×
299
  }
300

301
  tqDebug("tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s", req.consumerId, vgId, req.subKey);
24!
302
  taosWLockLatch(&pTq->lock);
24✔
303

304
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
24✔
305
  if (pHandle == NULL) {
24!
306
    tqWarn("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", req.consumerId, vgId, req.subKey);
×
307
    code = 0;
×
308
    taosWUnLockLatch(&pTq->lock);
×
309
    goto end;
×
310
  }
311

312
  // 2. check consumer-vg assignment status
313
  if (pHandle->consumerId != req.consumerId) {
24!
314
    tqError("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
×
315
            req.consumerId, vgId, req.subKey, pHandle->consumerId);
316
    taosWUnLockLatch(&pTq->lock);
×
317
    code = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
×
318
    goto end;
×
319
  }
320

321
  // if consumer register to push manager, push empty to consumer to change vg status from TMQ_VG_STATUS__WAIT to
322
  // TMQ_VG_STATUS__IDLE, otherwise poll data failed after seek.
323
  tqUnregisterPushHandle(pTq, pHandle);
24✔
324
  taosWUnLockLatch(&pTq->lock);
24✔
325

326
end:
24✔
327
  rsp.code = code;
24✔
328
  tmsgSendRsp(&rsp);
24✔
329
  return 0;
24✔
330
}
331

332
int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) {
138✔
333
  if (pTq == NULL) {
138!
334
    return TSDB_CODE_INVALID_PARA;
×
335
  }
336
  void* pIter = NULL;
138✔
337

338
  while (1) {
11✔
339
    pIter = taosHashIterate(pTq->pCheckInfo, pIter);
149✔
340
    if (pIter == NULL) {
149✔
341
      break;
103✔
342
    }
343

344
    STqCheckInfo* pCheck = (STqCheckInfo*)pIter;
46✔
345

346
    if (pCheck->ntbUid == tbUid) {
46!
347
      int32_t sz = taosArrayGetSize(pCheck->colIdList);
46✔
348
      for (int32_t i = 0; i < sz; i++) {
168✔
349
        int16_t* pForbidColId = taosArrayGet(pCheck->colIdList, i);
157✔
350
        if (pForbidColId == NULL) {
157!
351
          continue;
×
352
        }
353

354
        if ((*pForbidColId) == colId) {
157✔
355
          taosHashCancelIterate(pTq->pCheckInfo, pIter);
35✔
356
          return -1;
35✔
357
        }
358
      }
359
    }
360
  }
361

362
  return 0;
103✔
363
}
364

365
int32_t tqProcessPollPush(STQ* pTq, SRpcMsg* pMsg) {
19,164✔
366
  if (pTq == NULL) {
19,164!
367
    return TSDB_CODE_INVALID_PARA;
×
368
  }
369
  int32_t vgId = TD_VID(pTq->pVnode);
19,164✔
370
  taosWLockLatch(&pTq->lock);
19,164✔
371
  if (taosHashGetSize(pTq->pPushMgr) > 0) {
19,164!
372
    void* pIter = taosHashIterate(pTq->pPushMgr, NULL);
19,164✔
373

374
    while (pIter) {
38,799✔
375
      STqHandle* pHandle = *(STqHandle**)pIter;
19,635✔
376
      tqDebug("vgId:%d start set submit for pHandle:%p, consumer:0x%" PRIx64, vgId, pHandle, pHandle->consumerId);
19,635!
377

378
      if (pHandle->msg == NULL) {
19,635!
379
        tqError("pHandle->msg should not be null");
×
380
        taosHashCancelIterate(pTq->pPushMgr, pIter);
×
381
        break;
×
382
      } else {
383
        SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME,
19,635✔
384
                       .pCont = pHandle->msg->pCont,
19,635✔
385
                       .contLen = pHandle->msg->contLen,
19,635✔
386
                       .info = pHandle->msg->info};
19,635✔
387
        if (tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg) != 0){
19,635!
388
          tqError("vgId:%d tmsgPutToQueue failed, consumer:0x%" PRIx64, vgId, pHandle->consumerId);
×
389
        }
390
        taosMemoryFree(pHandle->msg);
19,635!
391
        pHandle->msg = NULL;
19,635✔
392
      }
393

394
      pIter = taosHashIterate(pTq->pPushMgr, pIter);
19,635✔
395
    }
396

397
    taosHashClear(pTq->pPushMgr);
19,164✔
398
  }
399
  taosWUnLockLatch(&pTq->lock);
19,164✔
400
  return 0;
19,164✔
401
}
402

403
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
62,547✔
404
  if (pTq == NULL || pMsg == NULL) {
62,547!
405
    return TSDB_CODE_INVALID_PARA;
×
406
  }
407
  SMqPollReq req = {0};
62,550✔
408
  int        code = tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req);
62,550✔
409
  if (code < 0) {
62,544!
410
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
×
411
    terrno = TSDB_CODE_INVALID_MSG;
×
412
    goto END;
×
413
  }
414

415
  int64_t      consumerId = req.consumerId;
62,554✔
416
  int32_t      reqEpoch = req.epoch;
62,554✔
417
  STqOffsetVal reqOffset = req.reqOffset;
62,554✔
418
  int32_t      vgId = TD_VID(pTq->pVnode);
62,554✔
419
  STqHandle*   pHandle = NULL;
62,554✔
420

421
  while (1) {
12✔
422
    taosWLockLatch(&pTq->lock);
62,566✔
423
    // 1. find handle
424
    code = tqMetaGetHandle(pTq, req.subKey, &pHandle);
62,574✔
425
    if (code != TDB_CODE_SUCCESS) {
62,565✔
426
      tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey);
13!
427
      terrno = TSDB_CODE_INVALID_MSG;
13✔
428
      taosWUnLockLatch(&pTq->lock);
13✔
429
      return -1;
16✔
430
    }
431

432
    // 2. check rebalance status
433
    if (pHandle->consumerId != consumerId) {
62,552✔
434
      tqError("ERROR tmq poll: consumer:0x%" PRIx64
8!
435
              " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
436
              consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId);
437
      terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
8✔
438
      taosWUnLockLatch(&pTq->lock);
8✔
439
      code = -1;
8✔
440
      goto END;
8✔
441
    }
442

443
    bool exec = tqIsHandleExec(pHandle);
62,544!
444
    if (!exec) {
62,544✔
445
      tqSetHandleExec(pHandle);
62,534!
446
      //      qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS);
447
      tqDebug("tmq poll: consumer:0x%" PRIx64 " vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId,
62,534!
448
              req.subKey, pHandle);
449
      taosWUnLockLatch(&pTq->lock);
62,538✔
450
      break;
62,543✔
451
    }
452
    taosWUnLockLatch(&pTq->lock);
10✔
453

454
    tqDebug("tmq poll: consumer:0x%" PRIx64
12!
455
            " vgId:%d, topic:%s, subscription is executing, wait for 10ms and retry, pHandle:%p",
456
            consumerId, vgId, req.subKey, pHandle);
457
    taosMsleep(10);
12✔
458
  }
459

460
  // 3. update the epoch value
461
  if (pHandle->epoch < reqEpoch) {
62,543✔
462
    tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, pHandle->epoch,
1,423!
463
            reqEpoch);
464
    pHandle->epoch = reqEpoch;
1,423✔
465
  }
466

467
  char buf[TSDB_OFFSET_LEN] = {0};
62,543✔
468
  (void)tFormatOffset(buf, TSDB_OFFSET_LEN, &reqOffset);
62,543✔
469
  tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s,QID:0x%" PRIx64,
62,542!
470
          consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);
471

472
  code = tqExtractDataForMq(pTq, pHandle, &req, pMsg);
62,543✔
473
  tqSetHandleIdle(pHandle);
62,541✔
474

475
  tqDebug("tmq poll: consumer:0x%" PRIx64 " vgId:%d, topic:%s, set handle idle, pHandle:%p", consumerId, vgId,
62,541!
476
          req.subKey, pHandle);
477

478
END:
×
479
  tDestroySMqPollReq(&req);
62,550✔
480
  return code;
62,551✔
481
}
482

483
int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg) {
2✔
484
  if (pTq == NULL || pMsg == NULL) {
2!
485
    return TSDB_CODE_INVALID_PARA;
×
486
  }
487
  void*   data = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
2✔
488
  int32_t len = pMsg->contLen - sizeof(SMsgHead);
2✔
489

490
  SMqVgOffset vgOffset = {0};
2✔
491

492
  SDecoder decoder;
493
  tDecoderInit(&decoder, (uint8_t*)data, len);
2✔
494
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
2!
495
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
496
    return terrno;
×
497
  }
498

499
  tDecoderClear(&decoder);
2✔
500

501
  STqOffset* pSavedOffset = NULL;
2✔
502
  int32_t    code = tqMetaGetOffset(pTq, vgOffset.offset.subKey, &pSavedOffset);
2✔
503
  if (code != 0) {
2✔
504
    return TSDB_CODE_TMQ_NO_COMMITTED;
1✔
505
  }
506
  vgOffset.offset = *pSavedOffset;
1✔
507

508
  tEncodeSize(tEncodeMqVgOffset, &vgOffset, len, code);
1!
509
  if (code < 0) {
1!
510
    return TAOS_GET_TERRNO(TSDB_CODE_INVALID_PARA);
×
511
  }
512

513
  void* buf = rpcMallocCont(len);
1✔
514
  if (buf == NULL) {
1!
515
    return terrno;
×
516
  }
517
  SEncoder encoder = {0};
1✔
518
  tEncoderInit(&encoder, buf, len);
1✔
519
  code = tEncodeMqVgOffset(&encoder, &vgOffset);
1✔
520
  tEncoderClear(&encoder);
1✔
521
  if (code < 0) {
1!
522
    rpcFreeCont(buf);
×
523
    return TAOS_GET_TERRNO(TSDB_CODE_INVALID_PARA);
×
524
  }
525

526
  SRpcMsg rsp = {.info = pMsg->info, .pCont = buf, .contLen = len, .code = 0};
1✔
527

528
  tmsgSendRsp(&rsp);
1✔
529
  return 0;
1✔
530
}
531

532
int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) {
13✔
533
  if (pTq == NULL || pMsg == NULL) {
13!
534
    return TSDB_CODE_INVALID_PARA;
×
535
  }
536
  int32_t    code = 0;
13✔
537
  SMqPollReq req = {0};
13✔
538
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
13!
539
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
×
540
    return TSDB_CODE_INVALID_MSG;
×
541
  }
542

543
  int64_t      consumerId = req.consumerId;
13✔
544
  STqOffsetVal reqOffset = req.reqOffset;
13✔
545
  int32_t      vgId = TD_VID(pTq->pVnode);
13✔
546

547
  // 1. find handle
548
  taosRLockLatch(&pTq->lock);
13✔
549
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
13✔
550
  if (pHandle == NULL) {
13!
551
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s not found", consumerId, vgId, req.subKey);
×
552
    taosRUnLockLatch(&pTq->lock);
×
553
    return TSDB_CODE_INVALID_MSG;
×
554
  }
555

556
  // 2. check rebalance status
557
  if (pHandle->consumerId != consumerId) {
13!
558
    tqDebug("ERROR consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
×
559
            consumerId, vgId, req.subKey, pHandle->consumerId);
560
    taosRUnLockLatch(&pTq->lock);
×
561
    return TSDB_CODE_TMQ_CONSUMER_MISMATCH;
×
562
  }
563

564
  int64_t sver = 0, ever = 0;
13✔
565
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
13✔
566
  taosRUnLockLatch(&pTq->lock);
13✔
567

568
  SMqDataRsp dataRsp = {0};
13✔
569
  code = tqInitDataRsp(&dataRsp, req.reqOffset);
13✔
570
  if (code != 0) {
13!
571
    return code;
×
572
  }
573

574
  if (req.useSnapshot == true) {
13!
575
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s snapshot not support wal info", consumerId, vgId, req.subKey);
×
576
    code = TSDB_CODE_INVALID_PARA;
×
577
    goto END;
×
578
  }
579

580
  dataRsp.rspOffset.type = TMQ_OFFSET__LOG;
13✔
581

582
  if (reqOffset.type == TMQ_OFFSET__LOG) {
13✔
583
    dataRsp.rspOffset.version = reqOffset.version;
3✔
584
  } else if (reqOffset.type < 0) {
10!
585
    STqOffset* pOffset = NULL;
10✔
586
    code = tqMetaGetOffset(pTq, req.subKey, &pOffset);
10✔
587
    if (code == 0) {
10✔
588
      if (pOffset->val.type != TMQ_OFFSET__LOG) {
1!
589
        tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s, no valid wal info", consumerId, vgId, req.subKey);
×
590
        code = TSDB_CODE_INVALID_PARA;
×
591
        goto END;
×
592
      }
593

594
      dataRsp.rspOffset.version = pOffset->val.version;
1✔
595
      tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from store:%" PRId64, consumerId, vgId,
1!
596
             req.subKey, dataRsp.rspOffset.version);
597
    } else {
598
      if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEST) {
9✔
599
        dataRsp.rspOffset.version = sver;  // not consume yet, set the earliest position
8✔
600
      } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
1!
601
        dataRsp.rspOffset.version = ever;
1✔
602
      }
603
      tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from init:%" PRId64, consumerId, vgId, req.subKey,
9!
604
             dataRsp.rspOffset.version);
605
    }
606
  } else {
607
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s invalid offset type:%d", consumerId, vgId, req.subKey,
×
608
            reqOffset.type);
609
    code = TSDB_CODE_INVALID_PARA;
×
610
    goto END;
×
611
  }
612

613
  code = tqDoSendDataRsp(&pMsg->info, &dataRsp, req.epoch, req.consumerId, TMQ_MSG_TYPE__WALINFO_RSP, sver, ever);
13✔
614

615
END:
13✔
616
  tDeleteMqDataRsp(&dataRsp);
13✔
617
  return code;
13✔
618
}
619

620
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
866✔
621
  if (pTq == NULL || msg == NULL) {
866!
622
    return TSDB_CODE_INVALID_PARA;
×
623
  }
624
  SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg;
867✔
625
  int32_t        vgId = TD_VID(pTq->pVnode);
867✔
626

627
  tqInfo("vgId:%d, tq process delete sub req %s", vgId, pReq->subKey);
867!
628
  int32_t code = 0;
867✔
629

630
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
867✔
631
  if (pHandle) {
867✔
632
    while (1) {
×
633
      taosWLockLatch(&pTq->lock);
864✔
634
      bool exec = tqIsHandleExec(pHandle);
864✔
635

636
      if (exec) {
864!
637
        tqInfo("vgId:%d, topic:%s, subscription is executing, delete wait for 10ms and retry, pHandle:%p", vgId,
×
638
               pHandle->subKey, pHandle);
639
        taosWUnLockLatch(&pTq->lock);
×
640
        taosMsleep(10);
×
641
        continue;
×
642
      }
643
      tqUnregisterPushHandle(pTq, pHandle);
864✔
644
      code = taosHashRemove(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
864✔
645
      if (code != 0) {
864!
646
        tqError("cannot process tq delete req %s, since no such handle", pReq->subKey);
×
647
      }
648
      taosWUnLockLatch(&pTq->lock);
864✔
649
      break;
864✔
650
    }
651
  }
652

653
  taosWLockLatch(&pTq->lock);
867✔
654
  if (taosHashRemove(pTq->pOffset, pReq->subKey, strlen(pReq->subKey)) != 0) {
867✔
655
    tqError("cannot process tq delete req %s, since no such offset in hash", pReq->subKey);
261!
656
  }
657
  if (tqMetaDeleteInfo(pTq, pTq->pOffsetStore, pReq->subKey, strlen(pReq->subKey)) != 0) {
867✔
658
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
258✔
659
  }
660

661
  if (tqMetaDeleteInfo(pTq, pTq->pExecStore, pReq->subKey, strlen(pReq->subKey)) < 0) {
867!
662
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
×
663
  }
664
  taosWUnLockLatch(&pTq->lock);
867✔
665

666
  return 0;
867✔
667
}
668

669
int32_t tqProcessAddCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
118✔
670
  if (pTq == NULL || msg == NULL) {
118!
671
    return TSDB_CODE_INVALID_PARA;
×
672
  }
673
  STqCheckInfo info = {0};
119✔
674
  int32_t      code = tqMetaDecodeCheckInfo(&info, msg, msgLen >= 0 ? msgLen : 0);
119✔
675
  if (code != 0) {
118!
676
    return code;
×
677
  }
678

679
  code = taosHashPut(pTq->pCheckInfo, info.topic, strlen(info.topic), &info, sizeof(STqCheckInfo));
118✔
680
  if (code != 0) {
119!
681
    tDeleteSTqCheckInfo(&info);
×
682
    return code;
×
683
  }
684

685
  return tqMetaSaveInfo(pTq, pTq->pCheckStore, info.topic, strlen(info.topic), msg, msgLen >= 0 ? msgLen : 0);
119✔
686
}
687

688
int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
14✔
689
  if (pTq == NULL || msg == NULL) {
14!
690
    return TSDB_CODE_INVALID_PARA;
×
691
  }
692
  if (taosHashRemove(pTq->pCheckInfo, msg, strlen(msg)) < 0) {
14✔
693
    return TSDB_CODE_TSC_INTERNAL_ERROR;
2✔
694
  }
695
  return tqMetaDeleteInfo(pTq, pTq->pCheckStore, msg, strlen(msg));
12✔
696
}
697

698
int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
3,126✔
699
  if (pTq == NULL || msg == NULL) {
3,126!
700
    return TSDB_CODE_INVALID_PARA;
×
701
  }
702
  int         ret = 0;
3,132✔
703
  SMqRebVgReq req = {0};
3,132✔
704
  SDecoder    dc = {0};
3,132✔
705

706
  tDecoderInit(&dc, (uint8_t*)msg, msgLen);
3,132✔
707
  ret = tDecodeSMqRebVgReq(&dc, &req);
3,127✔
708
  if (ret < 0) {
3,130!
709
    goto end;
×
710
  }
711

712
  tqInfo("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey,
3,130!
713
         req.oldConsumerId, req.newConsumerId);
714

715
  taosRLockLatch(&pTq->lock);
3,133✔
716
  STqHandle* pHandle = NULL;
3,134✔
717
  int32_t code = tqMetaGetHandle(pTq, req.subKey, &pHandle);
3,134✔
718
  if (code != 0){
3,133✔
719
    tqInfo("vgId:%d, tq process sub req:%s, no such handle, create new one", pTq->pVnode->config.vgId, req.subKey);
1,425!
720
  }
721
  taosRUnLockLatch(&pTq->lock);
3,134✔
722
  if (pHandle == NULL) {
3,134✔
723
    if (req.oldConsumerId != -1) {
1,426✔
724
      tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId:0x%" PRIx64,
1!
725
              req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId);
726
    }
727
    if (req.newConsumerId == -1) {
1,426✔
728
      tqError("vgId:%d, tq invalid rebalance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
1!
729
      ret = TSDB_CODE_INVALID_PARA;
1✔
730
      goto end;
1✔
731
    }
732
    STqHandle handle = {0};
1,425✔
733
    ret = tqMetaCreateHandle(pTq, &req, &handle);
1,425✔
734
    if (ret < 0) {
1,425!
735
      tqDestroyTqHandle(&handle);
×
736
      goto end;
×
737
    }
738
    taosWLockLatch(&pTq->lock);
1,425✔
739
    ret = tqMetaSaveHandle(pTq, req.subKey, &handle);
1,425✔
740
    taosWUnLockLatch(&pTq->lock);
1,425✔
741
  } else {
742
    while (1) {
×
743
      taosWLockLatch(&pTq->lock);
1,708✔
744
      bool exec = tqIsHandleExec(pHandle);
1,708!
745
      if (exec) {
1,708!
746
        tqInfo("vgId:%d, topic:%s, subscription is executing, sub wait for 10ms and retry, pHandle:%p",
×
747
               pTq->pVnode->config.vgId, pHandle->subKey, pHandle);
748
        taosWUnLockLatch(&pTq->lock);
×
749
        taosMsleep(10);
×
750
        continue;
×
751
      }
752
      if (pHandle->consumerId == req.newConsumerId) {  // do nothing
1,708✔
753
        tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId);
101!
754
      } else {
755
        tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId,
1,607!
756
               req.newConsumerId);
757

758
        atomic_store_64(&pHandle->consumerId, req.newConsumerId);
1,607✔
759
        atomic_store_32(&pHandle->epoch, 0);
1,607✔
760
        tqUnregisterPushHandle(pTq, pHandle);
1,607✔
761
        ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
1,607✔
762
      }
763
      taosWUnLockLatch(&pTq->lock);
1,707✔
764
      break;
1,708✔
765
    }
766
  }
767

768
end:
3,134✔
769
  tDecoderClear(&dc);
3,134✔
770
  return ret;
3,134✔
771
}
772

773
static void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); }
48,680!
774

775
int32_t tqBuildStreamTask(void* pTqObj, SStreamTask* pTask, int64_t nextProcessVer) {
14,861✔
776
  STQ*             pTq = (STQ*)pTqObj;
14,861✔
777
  int32_t          vgId = TD_VID(pTq->pVnode);
14,861✔
778
  SCheckpointInfo* pChkInfo = NULL;
14,861✔
779

780
  tqDebug("s-task:0x%x start to build task", pTask->id.taskId);
14,861✔
781

782
  int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, nextProcessVer);
14,861✔
783
  if (code != TSDB_CODE_SUCCESS) {
14,864!
784
    return code;
×
785
  }
786

787
  pTask->pBackend = NULL;
14,864✔
788

789
  // sink
790
  STaskOutputInfo* pOutputInfo = &pTask->outputInfo;
14,864✔
791
  if (pOutputInfo->type == TASK_OUTPUT__SMA) {
14,864✔
792
    pOutputInfo->smaSink.vnode = pTq->pVnode;
66✔
793
    pOutputInfo->smaSink.smaSink = smaHandleRes;
66✔
794
  } else if (pOutputInfo->type == TASK_OUTPUT__TABLE) {
14,798✔
795
    pOutputInfo->tbSink.vnode = pTq->pVnode;
7,315✔
796
    pOutputInfo->tbSink.tbSinkFunc = tqSinkDataIntoDstTable;
7,315✔
797

798
    int32_t   ver1 = 1;
7,315✔
799
    SMetaInfo info = {0};
7,315✔
800
    code = metaGetInfo(pTq->pVnode->pMeta, pOutputInfo->tbSink.stbUid, &info, NULL);
7,315✔
801
    if (code == TSDB_CODE_SUCCESS) {
7,307✔
802
      ver1 = info.skmVer;
6,709✔
803
    }
804

805
    SSchemaWrapper* pschemaWrapper = pOutputInfo->tbSink.pSchemaWrapper;
7,307✔
806
    pOutputInfo->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1);
7,307✔
807
    if (pOutputInfo->tbSink.pTSchema == NULL) {
7,320!
808
      return terrno;
×
809
    }
810

811
    pOutputInfo->tbSink.pTbInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
7,320✔
812
    if (pOutputInfo->tbSink.pTbInfo == NULL) {
7,322!
813
      tqError("vgId:%d failed init sink tableInfo, code:%s", vgId, tstrerror(terrno));
×
814
      return terrno;
×
815
    }
816

817
    tSimpleHashSetFreeFp(pOutputInfo->tbSink.pTbInfo, freePtr);
7,322✔
818
  }
819

820
  if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
14,869✔
821
    bool scanDropCtb = pTask->subtableWithoutMd5 ? true : false;
7,531✔
822
    SWalFilterCond cond = {.deleteMsg = 1, .scanDropCtb = scanDropCtb};  // delete msg also extract from wal files
7,531✔
823
    pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond, pTask->id.taskId);
7,531✔
824
    if (pTask->exec.pWalReader == NULL) {
7,531!
825
      tqError("vgId:%d failed init wal reader, code:%s", vgId, tstrerror(terrno));
×
826
      return terrno;
×
827
    }
828
  }
829

830
  streamTaskResetUpstreamStageInfo(pTask);
14,869✔
831

832
  pChkInfo = &pTask->chkInfo;
14,871✔
833
  tqSetRestoreVersionInfo(pTask);
14,871✔
834

835
  char*       p = streamTaskGetStatus(pTask).name;
14,872✔
836
  const char* pNext = streamTaskGetStatusStr(pTask->status.taskStatus);
14,870✔
837

838
  if (pTask->info.fillHistory) {
14,869✔
839
    tqInfo("vgId:%d build stream task, s-task:%s, %p checkpointId:%" PRId64 " checkpointVer:%" PRId64
5,140!
840
           " nextProcessVer:%" PRId64
841
           " child id:%d, level:%d, cur-status:%s, next-status:%s fill-history:%d, related stream task:0x%x "
842
           "delaySched:%" PRId64 " ms, inputVer:%" PRId64,
843
           vgId, pTask->id.idStr, pTask, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer,
844
           pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory,
845
           (int32_t)pTask->streamTaskId.taskId, pTask->info.delaySchedParam, nextProcessVer);
846
  } else {
847
    tqInfo("vgId:%d build stream task, s-task:%s, %p checkpointId:%" PRId64 " checkpointVer:%" PRId64
9,729✔
848
           " nextProcessVer:%" PRId64
849
           " child id:%d, level:%d, cur-status:%s next-status:%s fill-history:%d, related fill-task:0x%x "
850
           "delaySched:%" PRId64 " ms, inputVer:%" PRId64,
851
           vgId, pTask->id.idStr, pTask, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer,
852
           pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory,
853
           (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.delaySchedParam, nextProcessVer);
854

855
    if (pChkInfo->checkpointVer > pChkInfo->nextProcessVer) {
9,735!
856
      tqError("vgId:%d build stream task, s-task:%s, checkpointVer:%" PRId64 " > nextProcessVer:%" PRId64, vgId,
×
857
              pTask->id.idStr, pChkInfo->checkpointVer, pChkInfo->nextProcessVer);
858
      return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
859
    }
860
  }
861

862
  return 0;
14,875✔
863
}
864

865
int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { return tqStreamTaskProcessCheckReq(pTq->pStreamMeta, pMsg); }
21,747✔
866

867
int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) {
22,301✔
868
  return tqStreamTaskProcessCheckRsp(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode));
22,301✔
869
}
870

871
int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
14,300✔
872
  return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, sversion, msg, msgLen,
14,319✔
873
                                      vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored);
14,300✔
874
}
875

876
static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask, STQ* pTq) {
2,373✔
877
  const char*    id = pTask->id.idStr;
2,373✔
878
  int64_t        nextProcessedVer = pStreamTask->hTaskInfo.haltVer;
2,373✔
879
  SVersionRange* pStep2Range = &pTask->step2Range;
2,373✔
880
  int32_t        vgId = pTask->pMeta->vgId;
2,373✔
881

882
  // if it's an source task, extract the last version in wal.
883
  bool done = streamHistoryTaskSetVerRangeStep2(pTask, nextProcessedVer);
2,373✔
884
  pTask->execInfo.step2Start = taosGetTimestampMs();
2,372✔
885

886
  if (done) {
2,372✔
887
    qDebug("s-task:%s scan wal(step 2) verRange:%" PRId64 "-%" PRId64 " ended, elapsed time:%.2fs", id,
1,548✔
888
           pStep2Range->minVer, pStep2Range->maxVer, 0.0);
889
    int32_t code = streamTaskPutTranstateIntoInputQ(pTask);  // todo: msg lost.
1,548✔
890
    if (code) {
1,548!
891
      qError("s-task:%s failed put trans-state into inputQ, code:%s", id, tstrerror(code));
×
892
    }
893
    (void)streamExecTask(pTask);  // exec directly
1,548✔
894
  } else {
895
    STimeWindow* pWindow = &pTask->dataRange.window;
824✔
896
    tqDebug("s-task:%s level:%d verRange:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64
824✔
897
            ", do secondary scan-history from WAL after halt the related stream task:%s",
898
            id, pTask->info.taskLevel, pStep2Range->minVer, pStep2Range->maxVer, pWindow->skey, pWindow->ekey,
899
            pStreamTask->id.idStr);
900
    if (pTask->status.schedStatus != TASK_SCHED_STATUS__WAITING) {
824!
901
      tqError("s-task:%s level:%d unexpected sched-status:%d", id, pTask->info.taskLevel, pTask->status.schedStatus);
×
902
    }
903

904
    int32_t code = streamSetParamForStreamScannerStep2(pTask, pStep2Range, pWindow);
824✔
905
    if (code) {
824!
906
      tqError("s-task:%s level:%d failed to set step2 param", id, pTask->info.taskLevel);
×
907
    }
908

909
    int64_t dstVer = pStep2Range->minVer;
824✔
910
    pTask->chkInfo.nextProcessVer = dstVer;
824✔
911

912
    walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer);
824✔
913
    tqDebug("s-task:%s wal reader start scan WAL verRange:%" PRId64 "-%" PRId64 ", set sched-status:%d", id, dstVer,
825✔
914
            pStep2Range->maxVer, TASK_SCHED_STATUS__INACTIVE);
915

916
    int8_t status = streamTaskSetSchedStatusInactive(pTask);
825✔
917

918
    // now the fill-history task starts to scan data from wal files.
919
    code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE);
824✔
920
    if (code == TSDB_CODE_SUCCESS) {
825!
921
      code = tqScanWalAsync(pTq, false);
825✔
922
      if (code) {
825!
923
        tqError("vgId:%d failed to start scan wal file, code:%s", vgId, tstrerror(code));
×
924
      }
925
    }
926
  }
927
}
2,373✔
928

929
int32_t handleStep2Async(SStreamTask* pStreamTask, void* param) {
2,373✔
930
  STQ* pTq = param;
2,373✔
931

932
  SStreamMeta* pMeta = pStreamTask->pMeta;
2,373✔
933
  STaskId      hId = pStreamTask->hTaskInfo.id;
2,373✔
934
  SStreamTask* pTask = NULL;
2,373✔
935
  int32_t      code = streamMetaAcquireTask(pStreamTask->pMeta, hId.streamId, hId.taskId, &pTask);
2,373✔
936
  if (pTask == NULL) {
2,373!
937
    tqWarn("s-task:0x%x failed to acquired it to exec step 2, scan wal quit", (int32_t)hId.taskId);
×
938
    return TSDB_CODE_SUCCESS;
×
939
  }
940

941
  doStartFillhistoryStep2(pTask, pStreamTask, pTq);
2,373✔
942

943
  streamMetaReleaseTask(pMeta, pTask);
2,373✔
944
  return TSDB_CODE_SUCCESS;
2,373✔
945
}
946

947
// this function should be executed by only one thread, so we set an sentinel to protect this function
948
int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
2,597✔
949
  SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont;
2,597✔
950
  SStreamMeta*           pMeta = pTq->pStreamMeta;
2,597✔
951
  int32_t                code = TSDB_CODE_SUCCESS;
2,597✔
952
  SStreamTask*           pTask = NULL;
2,597✔
953
  SStreamTask*           pStreamTask = NULL;
2,597✔
954

955
  code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId, &pTask);
2,597✔
956
  if (pTask == NULL) {
2,597!
957
    tqError("vgId:%d failed to acquire stream task:0x%x during scan history data, task may have been destroyed",
×
958
            pMeta->vgId, pReq->taskId);
959
    return code;
×
960
  }
961

962
  // do recovery step1
963
  const char* id = pTask->id.idStr;
2,597✔
964
  char*       pStatus = streamTaskGetStatus(pTask).name;
2,597✔
965

966
  // avoid multi-thread exec
967
  while (1) {
×
968
    int32_t sentinel = atomic_val_compare_exchange_32(&pTask->status.inScanHistorySentinel, 0, 1);
2,597✔
969
    if (sentinel != 0) {
2,597!
970
      tqDebug("s-task:%s already in scan-history func, wait for 100ms, and try again", id);
×
971
      taosMsleep(100);
×
972
    } else {
973
      break;
2,597✔
974
    }
975
  }
976

977
  // let's decide which step should be executed now
978
  if (pTask->execInfo.step1Start == 0) {
2,597✔
979
    int64_t ts = taosGetTimestampMs();
2,383✔
980
    pTask->execInfo.step1Start = ts;
2,383✔
981
    tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts);
2,383✔
982
  } else {
983
    if (pTask->execInfo.step2Start == 0) {
214✔
984
      tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64 ", already elapsed:%.2fs",
186!
985
              id, pTask->execInfo.step1Start, pTask->execInfo.step1El);
986
    } else {
987
      tqDebug("s-task:%s already in step2, no need to scan-history data, step2 startTs:%" PRId64, id,
28!
988
              pTask->execInfo.step2Start);
989

990
      atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
28✔
991
      streamMetaReleaseTask(pMeta, pTask);
27✔
992
      return 0;
27✔
993
    }
994
  }
995

996
  // we have to continue retrying to successfully execute the scan history task.
997
  if (!streamTaskSetSchedStatusWait(pTask)) {
2,569!
998
    tqError(
×
999
        "s-task:%s failed to start scan-history in first stream time window since already started, unexpected "
1000
        "sched-status:%d",
1001
        id, pTask->status.schedStatus);
1002
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
×
1003
    streamMetaReleaseTask(pMeta, pTask);
×
1004
    return 0;
×
1005
  }
1006

1007
  int64_t              st = taosGetTimestampMs();
2,570✔
1008
  SScanhistoryDataInfo retInfo = streamScanHistoryData(pTask, st);
2,570✔
1009

1010
  double el = (taosGetTimestampMs() - st) / 1000.0;
2,570✔
1011
  pTask->execInfo.step1El += el;
2,570✔
1012

1013
  if (retInfo.ret == TASK_SCANHISTORY_QUIT || retInfo.ret == TASK_SCANHISTORY_REXEC) {
2,570✔
1014
    int8_t status = streamTaskSetSchedStatusInactive(pTask);
193✔
1015
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
193✔
1016

1017
    if (retInfo.ret == TASK_SCANHISTORY_REXEC) {
193✔
1018
      streamExecScanHistoryInFuture(pTask, retInfo.idleTime);
186✔
1019
    } else {
1020
      SStreamTaskState p = streamTaskGetStatus(pTask);
7✔
1021
      ETaskStatus      s = p.state;
7✔
1022

1023
      if (s == TASK_STATUS__PAUSE) {
7!
1024
        tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs total:%.2fs, sched-status:%d", id, el,
×
1025
                pTask->execInfo.step1El, status);
1026
      } else if (s == TASK_STATUS__STOP || s == TASK_STATUS__DROPPING) {
7!
1027
        tqDebug("s-task:%s status:%p not continue scan-history data, total elapsed time:%.2fs quit", id, p.name,
7!
1028
                pTask->execInfo.step1El);
1029
      }
1030
    }
1031

1032
    streamMetaReleaseTask(pMeta, pTask);
193✔
1033
    return 0;
193✔
1034
  }
1035

1036
  // the following procedure should be executed, no matter status is stop/pause or not
1037
  tqDebug("s-task:%s scan-history(step 1) ended, elapsed time:%.2fs", id, pTask->execInfo.step1El);
2,377✔
1038

1039
  if (pTask->info.fillHistory != 1) {
2,377!
1040
    tqError("s-task:%s fill-history is disabled, unexpected", id);
×
1041
    return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
1042
  }
1043

1044
  // 1. get the related stream task
1045
  code = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId, &pStreamTask);
2,377✔
1046
  if (pStreamTask == NULL) {
2,377✔
1047
    tqError("failed to find s-task:0x%" PRIx64 ", it may have been destroyed, drop related fill-history task:%s",
4!
1048
            pTask->streamTaskId.taskId, pTask->id.idStr);
1049

1050
    tqDebug("s-task:%s fill-history task set status to be dropping", id);
4!
1051
    code = streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id, 0);
4✔
1052

1053
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
4✔
1054
    streamMetaReleaseTask(pMeta, pTask);
4✔
1055
    return code;
4✔
1056
  }
1057

1058
  if (pStreamTask->info.taskLevel != TASK_LEVEL__SOURCE) {
2,373!
1059
    tqError("s-task:%s fill-history task related stream task level:%d, unexpected", id, pStreamTask->info.taskLevel);
×
1060
    return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
1061
  }
1062

1063
  code = streamTaskHandleEventAsync(pStreamTask->status.pSM, TASK_EVENT_HALT, handleStep2Async, pTq);
2,373✔
1064
  streamMetaReleaseTask(pMeta, pStreamTask);
2,373✔
1065

1066
  atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
2,373✔
1067
  streamMetaReleaseTask(pMeta, pTask);
2,373✔
1068
  return code;
2,373✔
1069
}
1070

1071
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
162,712✔
1072
  int32_t  code = 0;
162,712✔
1073
  char*    msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
162,712✔
1074
  int32_t  len = pMsg->contLen - sizeof(SMsgHead);
162,712✔
1075
  SDecoder decoder;
1076

1077
  SStreamTaskRunReq req = {0};
162,712✔
1078
  tDecoderInit(&decoder, (uint8_t*)msg, len);
162,712✔
1079
  if ((code = tDecodeStreamTaskRunReq(&decoder, &req)) < 0) {
162,753!
1080
    tqError("vgId:%d failed to decode task run req, code:%s", pTq->pStreamMeta->vgId, tstrerror(code));
×
1081
    tDecoderClear(&decoder);
×
1082
    return TSDB_CODE_SUCCESS;
×
1083
  }
1084

1085
  tDecoderClear(&decoder);
162,734✔
1086

1087
  // extracted submit data from wal files for all tasks
1088
  if (req.reqType == STREAM_EXEC_T_EXTRACT_WAL_DATA) {
162,736✔
1089
    return tqScanWal(pTq);
66,575✔
1090
  }
1091

1092
  code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode));
96,161✔
1093
  if (code) {
96,163✔
1094
    tqError("vgId:%d failed to create task run req, code:%s", TD_VID(pTq->pVnode), tstrerror(code));
32!
1095
    return code;
32✔
1096
  }
1097

1098
  // let's continue scan data in the wal files
1099
  if (req.reqType >= 0 || req.reqType == STREAM_EXEC_T_RESUME_TASK) {
96,131✔
1100
    code = tqScanWalAsync(pTq, false);  // it's ok to failed
71,884✔
1101
    if (code) {
71,920✔
1102
      tqError("vgId:%d failed to start scan wal file, code:%s", pTq->pStreamMeta->vgId, tstrerror(code));
4!
1103
    }
1104
  }
1105

1106
  return code;
96,162✔
1107
}
1108

1109
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg) {
44,788✔
1110
  return tqStreamTaskProcessDispatchReq(pTq->pStreamMeta, pMsg);
44,788✔
1111
}
1112

1113
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) {
44,731✔
1114
  return tqStreamTaskProcessDispatchRsp(pTq->pStreamMeta, pMsg);
44,731✔
1115
}
1116

1117
int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) {
6,959✔
1118
  return tqStreamTaskProcessDropReq(pTq->pStreamMeta, msg, msgLen);
6,959✔
1119
}
1120

1121
int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen) {
5,194✔
1122
  return tqStreamTaskProcessUpdateCheckpointReq(pTq->pStreamMeta, pTq->pVnode->restored, msg);
5,194✔
1123
}
1124

1125
int32_t tqProcessTaskConsenChkptIdReq(STQ* pTq, SRpcMsg* pMsg) {
175✔
1126
  return tqStreamTaskProcessConsenChkptIdReq(pTq->pStreamMeta, pMsg);
175✔
1127
}
1128

1129
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
1,383✔
1130
  return tqStreamTaskProcessTaskPauseReq(pTq->pStreamMeta, msg);
1,383✔
1131
}
1132

1133
int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
2,542✔
1134
  return tqStreamTaskProcessTaskResumeReq(pTq, sversion, msg, true);
2,542✔
1135
}
1136

1137
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
543✔
1138
  return tqStreamTaskProcessRetrieveReq(pTq->pStreamMeta, pMsg);
543✔
1139
}
1140

1141
int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { return 0; }
432✔
1142

1143
int32_t tqStreamProgressRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
×
1144
  char*               msgStr = pMsg->pCont;
×
1145
  char*               msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
×
1146
  int32_t             msgLen = pMsg->contLen - sizeof(SMsgHead);
×
1147
  int32_t             code = 0;
×
1148
  SStreamProgressReq  req;
1149
  char*               pRspBuf = taosMemoryCalloc(1, sizeof(SMsgHead) + sizeof(SStreamProgressRsp));
×
1150
  SStreamProgressRsp* pRsp = POINTER_SHIFT(pRspBuf, sizeof(SMsgHead));
×
1151
  if (!pRspBuf) {
×
1152
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
1153
    code = -1;
×
1154
    goto _OVER;
×
1155
  }
1156

1157
  code = tDeserializeStreamProgressReq(msgBody, msgLen, &req);
×
1158
  if (code == TSDB_CODE_SUCCESS) {
×
1159
    code = tqGetStreamExecInfo(pTq->pVnode, req.streamId, &pRsp->progressDelay, &pRsp->fillHisFinished);
×
1160
  }
1161
  if (code == TSDB_CODE_SUCCESS) {
×
1162
    pRsp->fetchIdx = req.fetchIdx;
×
1163
    pRsp->subFetchIdx = req.subFetchIdx;
×
1164
    pRsp->vgId = req.vgId;
×
1165
    pRsp->streamId = req.streamId;
×
1166
    code = tSerializeStreamProgressRsp(pRsp, sizeof(SStreamProgressRsp) + sizeof(SMsgHead), pRsp);
×
1167
    if (code) {
×
1168
      goto _OVER;
×
1169
    }
1170

1171
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
×
1172
    rsp.pCont = pRspBuf;
×
1173
    pRspBuf = NULL;
×
1174
    rsp.contLen = sizeof(SMsgHead) + sizeof(SStreamProgressRsp);
×
1175
    tmsgSendRsp(&rsp);
×
1176
  }
1177

1178
_OVER:
×
1179
  if (pRspBuf) {
×
1180
    taosMemoryFree(pRspBuf);
×
1181
  }
1182
  return code;
×
1183
}
1184

1185
// always return success to mnode
1186
//todo: handle failure of build and send msg to mnode
1187
static void doSendChkptSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, int32_t code,
75✔
1188
                                 int32_t taskId) {
1189
  SRpcMsg rsp = {0};
75✔
1190
  int32_t ret = streamTaskBuildCheckpointSourceRsp(pReq, pRpcInfo, &rsp, code);
75✔
1191
  if (ret) {  // suppress the error in build checkpoint source rsp
75!
1192
    tqError("s-task:0x%x failed to build checkpoint-source rsp, code:%s", taskId, tstrerror(ret));
×
1193
  }
1194
  tmsgSendRsp(&rsp);  // error occurs
75✔
1195
}
75✔
1196

1197
// no matter what kinds of error happened, make sure the mnode will receive the success execution code.
1198
int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) {
3,200✔
1199
  int32_t                    vgId = TD_VID(pTq->pVnode);
3,200✔
1200
  SStreamMeta*               pMeta = pTq->pStreamMeta;
3,200✔
1201
  char*                      msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
3,200✔
1202
  int32_t                    len = pMsg->contLen - sizeof(SMsgHead);
3,200✔
1203
  int32_t                    code = 0;
3,200✔
1204
  SStreamCheckpointSourceReq req = {0};
3,200✔
1205
  SDecoder                   decoder = {0};
3,200✔
1206
  SStreamTask*               pTask = NULL;
3,200✔
1207
  int64_t                    checkpointId = 0;
3,200✔
1208

1209
  // disable auto rsp to mnode
1210
  pRsp->info.handle = NULL;
3,200✔
1211

1212
  tDecoderInit(&decoder, (uint8_t*)msg, len);
3,200✔
1213
  if (tDecodeStreamCheckpointSourceReq(&decoder, &req) < 0) {
3,199!
1214
    code = TSDB_CODE_MSG_DECODE_ERROR;
×
1215
    tDecoderClear(&decoder);
×
1216
    tqError("vgId:%d failed to decode checkpoint-source msg, code:%s", vgId, tstrerror(code));
×
1217
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1218
    return TSDB_CODE_SUCCESS;  // always return success to mnode,
×
1219
  }
1220

1221
  tDecoderClear(&decoder);
3,199✔
1222

1223
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
3,200✔
1224
    tqDebug("vgId:%d not leader, ignore checkpoint-source msg, s-task:0x%x", vgId, req.taskId);
13!
1225
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
13✔
1226
    return TSDB_CODE_SUCCESS;  // always return success to mnode
13✔
1227
  }
1228

1229
  if (!pTq->pVnode->restored) {
3,188✔
1230
    tqDebug("vgId:%d checkpoint-source msg received during restoring, checkpointId:%" PRId64
62✔
1231
            ", transId:%d s-task:0x%x ignore it",
1232
            vgId, req.checkpointId, req.transId, req.taskId);
1233
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
62✔
1234
    return TSDB_CODE_SUCCESS;  // always return success to mnode
62✔
1235
  }
1236

1237
  code = streamMetaAcquireTask(pMeta, req.streamId, req.taskId, &pTask);
3,126✔
1238
  if (pTask == NULL || code != 0) {
3,129!
1239
    tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. checkpointId:%" PRId64
2!
1240
            " transId:%d it may have been destroyed",
1241
            vgId, req.taskId, req.checkpointId, req.transId);
1242
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
2✔
1243
    return TSDB_CODE_SUCCESS;
×
1244
  }
1245

1246
  if (pTask->status.downstreamReady != 1) {
3,127!
1247
    // record the latest failed checkpoint id
1248
    streamTaskSetFailedChkptInfo(pTask, req.transId, req.checkpointId);
×
1249
    tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpointId:%" PRId64
×
1250
            ", transId:%d set it failed",
1251
            pTask->id.idStr, req.checkpointId, req.transId);
1252

1253
    streamMetaReleaseTask(pMeta, pTask);
×
1254
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1255
    return TSDB_CODE_SUCCESS;  // todo retry handle error
×
1256
  }
1257

1258
  // todo save the checkpoint failed info
1259
  streamMutexLock(&pTask->lock);
3,127✔
1260
  ETaskStatus status = streamTaskGetStatus(pTask).state;
3,130✔
1261

1262
  if (req.mndTrigger == 1) {
3,125✔
1263
    if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) {
931!
1264
      tqError("s-task:%s not ready for checkpoint, since it is halt, ignore checkpointId:%" PRId64 ", set it failure",
×
1265
              pTask->id.idStr, req.checkpointId);
1266

1267
      streamMutexUnlock(&pTask->lock);
×
1268
      streamMetaReleaseTask(pMeta, pTask);
×
1269
      doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1270
      return TSDB_CODE_SUCCESS;
×
1271
    }
1272
  } else {
1273
    if (status != TASK_STATUS__HALT) {
2,194!
1274
      tqError("s-task:%s should in halt status, let's halt it directly", pTask->id.idStr);
×
1275
      //      streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_HALT);
1276
    }
1277
  }
1278

1279
  // check if the checkpoint msg already sent or not.
1280
  if (status == TASK_STATUS__CK) {
3,119!
1281
    streamTaskGetActiveCheckpointInfo(pTask, NULL, &checkpointId);
×
1282

1283
    tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64
×
1284
           " transId:%d already handled, ignore msg and continue process checkpoint",
1285
           pTask->id.idStr, checkpointId, req.transId);
1286

1287
    streamMutexUnlock(&pTask->lock);
×
1288
    streamMetaReleaseTask(pMeta, pTask);
×
1289
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SYN_PROPOSE_NOT_READY, req.taskId);
×
1290
    return TSDB_CODE_SUCCESS;
×
1291
  } else {  // checkpoint already finished, and not in checkpoint status
1292
    if (req.checkpointId <= pTask->chkInfo.checkpointId) {
3,119!
1293
      tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64
×
1294
             " transId:%d already handled, return success",
1295
             pTask->id.idStr, req.checkpointId, req.transId);
1296

1297
      streamMutexUnlock(&pTask->lock);
×
1298
      streamMetaReleaseTask(pMeta, pTask);
×
1299
      doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1300
      return TSDB_CODE_SUCCESS;
×
1301
    }
1302
  }
1303

1304
  code = streamProcessCheckpointSourceReq(pTask, &req);
3,119✔
1305
  streamMutexUnlock(&pTask->lock);
3,127✔
1306

1307
  if (code) {
3,127!
1308
    qError("s-task:%s (vgId:%d) failed to process checkpoint-source req, code:%s", pTask->id.idStr, vgId,
×
1309
           tstrerror(code));
1310
    streamMetaReleaseTask(pMeta, pTask);
×
1311
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1312
    return TSDB_CODE_SUCCESS;
×
1313
  }
1314

1315
  if (req.mndTrigger) {
3,127✔
1316
    tqInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64 ", transId:%d, ",
931!
1317
           pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, req.transId);
1318
  } else {
1319
    const char* pPrevStatus = streamTaskGetStatusStr(streamTaskGetPrevStatus(pTask));
2,196✔
1320
    tqInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64
2,198!
1321
           ", transId:%d after transfer-state, prev status:%s",
1322
           pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, req.transId, pPrevStatus);
1323
  }
1324

1325
  code = streamAddCheckpointSourceRspMsg(&req, &pMsg->info, pTask);
3,130✔
1326
  if (code != TSDB_CODE_SUCCESS) {
3,130!
1327
    streamTaskSetCheckpointFailed(pTask);  // set the checkpoint failed
×
1328
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1329
  }
1330

1331
  streamMetaReleaseTask(pMeta, pTask);
3,130✔
1332
  return TSDB_CODE_SUCCESS;
3,130✔
1333
}
1334

1335
// downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task
1336
int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) {
7,682✔
1337
  int32_t vgId = TD_VID(pTq->pVnode);
7,682✔
1338

1339
  SStreamCheckpointReadyMsg* pReq = (SStreamCheckpointReadyMsg*)pMsg->pCont;
7,682✔
1340
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
7,682!
1341
    tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from 0x%x", vgId,
×
1342
            (int32_t)pReq->downstreamTaskId);
1343
    return TSDB_CODE_STREAM_NOT_LEADER;
×
1344
  }
1345

1346
  return tqStreamTaskProcessCheckpointReadyMsg(pTq->pStreamMeta, pMsg);
7,687✔
1347
}
1348

1349
int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
84✔
1350
  return tqStreamTaskProcessUpdateReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, pMsg, pTq->pVnode->restored);
84✔
1351
}
1352

1353
int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) {
×
1354
  return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg->pCont);
×
1355
}
1356

1357
int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg) {
×
1358
  int32_t vgId = TD_VID(pTq->pVnode);
×
1359

1360
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
×
1361
    SRetrieveChkptTriggerReq req = {0};
×
1362

1363
    char*    msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
×
1364
    int32_t  len = pMsg->contLen - sizeof(SMsgHead);
×
1365
    SDecoder decoder = {0};
×
1366

1367
    tDecoderInit(&decoder, (uint8_t*)msg, len);
×
1368
    if (tDecodeRetrieveChkptTriggerReq(&decoder, &req) < 0) {
×
1369
      tDecoderClear(&decoder);
×
1370
      tqError("vgId:%d invalid retrieve checkpoint-trigger req received", vgId);
×
1371
      return TSDB_CODE_INVALID_MSG;
×
1372
    }
1373
    tDecoderClear(&decoder);
×
1374

1375
    tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from s-task:0x%" PRId64, vgId,
×
1376
            req.downstreamTaskId);
1377
    return TSDB_CODE_STREAM_NOT_LEADER;
×
1378
  }
1379

1380
  return tqStreamTaskProcessRetrieveTriggerReq(pTq->pStreamMeta, pMsg);
×
1381
}
1382

1383
int32_t tqProcessTaskRetrieveTriggerRsp(STQ* pTq, SRpcMsg* pMsg) {
×
1384
  return tqStreamTaskProcessRetrieveTriggerRsp(pTq->pStreamMeta, pMsg);
×
1385
}
1386

1387
// this function is needed, do not try to remove it.
1388
int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg) { return tqStreamProcessStreamHbRsp(pTq->pStreamMeta, pMsg); }
23,235✔
1389

1390
int32_t tqProcessStreamReqCheckpointRsp(STQ* pTq, SRpcMsg* pMsg) {
4,488✔
1391
  return tqStreamProcessReqCheckpointRsp(pTq->pStreamMeta, pMsg);
4,488✔
1392
}
1393

1394
int32_t tqProcessTaskCheckpointReadyRsp(STQ* pTq, SRpcMsg* pMsg) {
7,678✔
1395
  return tqStreamProcessCheckpointReadyRsp(pTq->pStreamMeta, pMsg);
7,678✔
1396
}
1397

1398
int32_t tqProcessTaskChkptReportRsp(STQ* pTq, SRpcMsg* pMsg) {
5,926✔
1399
  return tqStreamProcessChkptReportRsp(pTq->pStreamMeta, pMsg);
5,926✔
1400
}
1401

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc