• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3613

14 Feb 2025 09:14AM UTC coverage: 63.499% (-0.01%) from 63.513%
#3613

push

travis-ci

web-flow
Merge pull request #29781 from taosdata/doc/internal

docs: minor changes

141396 of 286269 branches covered (49.39%)

Branch coverage included in aggregate %.

220278 of 283307 relevant lines covered (77.75%)

19138445.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

63.82
/source/dnode/vnode/src/tq/tq.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#include "tq.h"
17
#include "osDef.h"
18
#include "taoserror.h"
19
#include "tqCommon.h"
20
#include "tstream.h"
21
#include "vnd.h"
22

23
// 0: not init
24
// 1: already inited
25
// 2: wait to be inited or cleanup
26
static int32_t tqInitialize(STQ* pTq);
27

28
static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return pHandle != NULL ? TMQ_HANDLE_STATUS_EXEC == pHandle->status : true; }
64,938!
29
static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) { if (pHandle != NULL) pHandle->status = TMQ_HANDLE_STATUS_EXEC; }
62,380✔
30
static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) { if (pHandle != NULL) pHandle->status = TMQ_HANDLE_STATUS_IDLE; }
62,399✔
31

32
void tqDestroyTqHandle(void* data) {
1,658✔
33
  if (data == NULL) return;
1,658!
34
  STqHandle* pData = (STqHandle*)data;
1,658✔
35
  qDestroyTask(pData->execHandle.task);
1,658✔
36

37
  if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
1,657✔
38
    taosMemoryFreeClear(pData->execHandle.execCol.qmsg);
1,311!
39
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) {
346✔
40
    tqReaderClose(pData->execHandle.pTqReader);
261✔
41
    walCloseReader(pData->pWalReader);
261✔
42
    taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid);
261✔
43
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
85!
44
    walCloseReader(pData->pWalReader);
85✔
45
    tqReaderClose(pData->execHandle.pTqReader);
85✔
46
    taosMemoryFreeClear(pData->execHandle.execTb.qmsg);
85!
47
    nodesDestroyNode(pData->execHandle.execTb.node);
85✔
48
  }
49
  if (pData->msg != NULL) {
1,658!
50
    rpcFreeCont(pData->msg->pCont);
×
51
    taosMemoryFree(pData->msg);
×
52
    pData->msg = NULL;
×
53
  }
54
  if (pData->block != NULL) {
1,658!
55
    blockDataDestroy(pData->block);
×
56
  }
57
  if (pData->pRef) {
1,658✔
58
    walCloseRef(pData->pRef->pWal, pData->pRef->refId);
1,612✔
59
  }
60
}
61

62
static bool tqOffsetEqual(const STqOffset* pLeft, const STqOffset* pRight) {
8,754✔
63
  if (pLeft == NULL || pRight == NULL) {
8,754!
64
    return false;
×
65
  }
66
  return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
17,151✔
67
         pLeft->val.version == pRight->val.version;
8,397✔
68
}
69

70
int32_t tqOpen(const char* path, SVnode* pVnode) {
11,616✔
71
  if (path == NULL || pVnode == NULL) {
11,616!
72
    return TSDB_CODE_INVALID_PARA;
×
73
  }
74
  STQ* pTq = taosMemoryCalloc(1, sizeof(STQ));
11,626!
75
  if (pTq == NULL) {
11,627!
76
    return terrno;
×
77
  }
78

79
  pVnode->pTq = pTq;
11,627✔
80
  pTq->pVnode = pVnode;
11,627✔
81

82
  pTq->path = taosStrdup(path);
11,627!
83
  if (pTq->path == NULL) {
11,622!
84
    return terrno;
×
85
  }
86

87
  pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
11,622✔
88
  if (pTq->pHandle == NULL) {
11,625!
89
    return terrno;
×
90
  }
91
  taosHashSetFreeFp(pTq->pHandle, tqDestroyTqHandle);
11,625✔
92

93
  taosInitRWLatch(&pTq->lock);
11,625✔
94

95
  pTq->pPushMgr = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK);
11,625✔
96
  if (pTq->pPushMgr == NULL) {
11,628!
97
    return terrno;
×
98
  }
99

100
  pTq->pCheckInfo = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
11,628✔
101
  if (pTq->pCheckInfo == NULL) {
11,626!
102
    return terrno;
×
103
  }
104
  taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo);
11,626✔
105

106
  pTq->pOffset = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_ENTRY_LOCK);
11,626✔
107
  if (pTq->pOffset == NULL) {
11,625!
108
    return terrno;
×
109
  }
110
  taosHashSetFreeFp(pTq->pOffset, (FDelete)tDeleteSTqOffset);
11,625✔
111

112
  return tqInitialize(pTq);
11,625✔
113
}
114

115
int32_t tqInitialize(STQ* pTq) {
11,625✔
116
  if (pTq == NULL) {
11,625!
117
    return TSDB_CODE_INVALID_PARA;
×
118
  }
119
  int32_t vgId = TD_VID(pTq->pVnode);
11,625✔
120
  int32_t code = streamMetaOpen(pTq->path, pTq, tqBuildStreamTask, tqExpandStreamTask, vgId, -1,
11,625✔
121
                                tqStartTaskCompleteCallback, &pTq->pStreamMeta);
122
  if (code != TSDB_CODE_SUCCESS) {
11,630!
123
    return code;
×
124
  }
125

126
  streamMetaLoadAllTasks(pTq->pStreamMeta);
11,630✔
127
  return tqMetaOpen(pTq);
11,630✔
128
}
129

130
void tqClose(STQ* pTq) {
11,631✔
131
  qDebug("start to close tq");
11,631✔
132
  if (pTq == NULL) {
11,631!
133
    return;
×
134
  }
135

136
  int32_t vgId = 0;
11,631✔
137
  if (pTq->pVnode != NULL) {
11,631✔
138
    vgId = TD_VID(pTq->pVnode);
11,630✔
139
  } else if (pTq->pStreamMeta != NULL) {
1!
140
    vgId = pTq->pStreamMeta->vgId;
×
141
  }
142

143
  // close the stream meta firstly
144
  streamMetaClose(pTq->pStreamMeta);
11,631✔
145

146
  void* pIter = taosHashIterate(pTq->pPushMgr, NULL);
11,631✔
147
  while (pIter) {
11,663✔
148
    STqHandle* pHandle = *(STqHandle**)pIter;
32✔
149
    if (pHandle->msg != NULL) {
32!
150
      tqPushEmptyDataRsp(pHandle, vgId);
32✔
151
      rpcFreeCont(pHandle->msg->pCont);
32✔
152
      taosMemoryFree(pHandle->msg);
32!
153
      pHandle->msg = NULL;
32✔
154
    }
155
    pIter = taosHashIterate(pTq->pPushMgr, pIter);
32✔
156
  }
157

158
  taosHashCleanup(pTq->pHandle);
11,631✔
159
  taosHashCleanup(pTq->pPushMgr);
11,631✔
160
  taosHashCleanup(pTq->pCheckInfo);
11,631✔
161
  taosHashCleanup(pTq->pOffset);
11,631✔
162
  taosMemoryFree(pTq->path);
11,631!
163
  tqMetaClose(pTq);
11,631✔
164
  qDebug("vgId:%d end to close tq", vgId);
11,631✔
165

166
#if 0
167
  streamMetaFreeTQDuringScanWalError(pTq);
168
#endif
169

170
  taosMemoryFree(pTq);
11,631!
171
}
172

173
void tqNotifyClose(STQ* pTq) {
17,571✔
174
  if (pTq == NULL) {
17,571!
175
    return;
×
176
  }
177

178
  if (pTq->pStreamMeta != NULL) {
17,571!
179
    streamMetaNotifyClose(pTq->pStreamMeta);
17,572✔
180
  }
181
}
182

183
void tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId) {
667✔
184
  if (pHandle == NULL) {
667!
185
    return;
×
186
  }
187
  int32_t    code = 0;
667✔
188
  SMqPollReq req = {0};
667✔
189
  code = tDeserializeSMqPollReq(pHandle->msg->pCont, pHandle->msg->contLen, &req);
667✔
190
  if (code < 0) {
667!
191
    tqError("tDeserializeSMqPollReq %d failed, code:%d", pHandle->msg->contLen, code);
×
192
    return;
×
193
  }
194

195
  SMqDataRsp dataRsp = {0};
667✔
196
  code = tqInitDataRsp(&dataRsp, req.reqOffset);
667✔
197
  if (code != 0) {
667!
198
    tqError("tqInitDataRsp failed, code:%d", code);
×
199
    return;
×
200
  }
201
  dataRsp.blockNum = 0;
667✔
202
  char buf[TSDB_OFFSET_LEN] = {0};
667✔
203
  (void)tFormatOffset(buf, TSDB_OFFSET_LEN, &dataRsp.reqOffset);
667✔
204
  tqInfo("tqPushEmptyDataRsp to consumer:0x%" PRIx64 " vgId:%d, offset:%s,QID:0x%" PRIx64, req.consumerId, vgId, buf,
667!
205
         req.reqId);
206

207
  code = tqSendDataRsp(pHandle, pHandle->msg, &req, &dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
667✔
208
  if (code != 0) {
667!
209
    tqError("tqSendDataRsp failed, code:%d", code);
×
210
  }
211
  tDeleteMqDataRsp(&dataRsp);
667✔
212
}
213

214
int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type,
40,647✔
215
                      int32_t vgId) {
216
  if (pHandle == NULL || pMsg == NULL || pReq == NULL || pRsp == NULL) {
40,647!
217
    return TSDB_CODE_INVALID_PARA;
×
218
  }
219
  int64_t sver = 0, ever = 0;
40,647✔
220
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
40,647✔
221

222
  char buf1[TSDB_OFFSET_LEN] = {0};
40,646✔
223
  char buf2[TSDB_OFFSET_LEN] = {0};
40,646✔
224
  (void)tFormatOffset(buf1, TSDB_OFFSET_LEN, &(pRsp->reqOffset));
40,646✔
225
  (void)tFormatOffset(buf2, TSDB_OFFSET_LEN, &(pRsp->rspOffset));
40,647✔
226

227
  tqDebug("tmq poll vgId:%d consumer:0x%" PRIx64 " (epoch %d) start to send rsp, block num:%d, req:%s, rsp:%s,QID:0x%" PRIx64,
40,647!
228
          vgId, pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId);
229

230
  return tqDoSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type, sver, ever);
40,646✔
231
}
232

233
int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
9,710✔
234
  if (pTq == NULL) {
9,710!
235
    return TSDB_CODE_INVALID_PARA;
×
236
  }
237
  SMqVgOffset vgOffset = {0};
9,710✔
238
  int32_t     vgId = TD_VID(pTq->pVnode);
9,710✔
239

240
  int32_t  code = 0;
9,710✔
241
  SDecoder decoder;
242
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
9,710✔
243
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
9,718!
244
    code = TSDB_CODE_INVALID_MSG;
×
245
    goto end;
×
246
  }
247

248
  tDecoderClear(&decoder);
9,713✔
249

250
  STqOffset* pOffset = &vgOffset.offset;
9,709✔
251

252
  if (pOffset->val.type == TMQ_OFFSET__SNAPSHOT_DATA || pOffset->val.type == TMQ_OFFSET__SNAPSHOT_META) {
9,709✔
253
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:snapshot) uid:%" PRId64 ", ts:%" PRId64,
423!
254
            pOffset->subKey, vgId, pOffset->val.uid, pOffset->val.ts);
255
  } else if (pOffset->val.type == TMQ_OFFSET__LOG) {
9,286!
256
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:log) version:%" PRId64, pOffset->subKey, vgId,
9,286✔
257
            pOffset->val.version);
258
  } else {
259
    tqError("invalid commit offset type:%d", pOffset->val.type);
×
260
    code = TSDB_CODE_INVALID_MSG;
×
261
    goto end;
×
262
  }
263

264
  STqOffset* pSavedOffset = NULL;
9,718✔
265
  code = tqMetaGetOffset(pTq, pOffset->subKey, &pSavedOffset);
9,718✔
266
  if (code == 0 && tqOffsetEqual(pOffset, pSavedOffset)) {
9,721✔
267
    tqInfo("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64,
4!
268
           vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version);
269
    goto end;  // no need to update the offset value
4✔
270
  }
271

272
  // save the new offset value
273
  if (taosHashPut(pTq->pOffset, pOffset->subKey, strlen(pOffset->subKey), pOffset, sizeof(STqOffset))) {
9,717!
274
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
275
    return -1;
×
276
  }
277

278
  if (tqMetaSaveInfo(pTq, pTq->pOffsetStore, pOffset->subKey, strlen(pOffset->subKey), msg,
9,718!
279
                     msgLen >= sizeof(vgOffset.consumerId) ? msgLen - sizeof(vgOffset.consumerId) : 0) < 0) {
280
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
281
    return -1;
×
282
  }
283

284
  return 0;
9,717✔
285
end:
4✔
286
  tOffsetDestroy(&vgOffset.offset.val);
4✔
287
  return code;
4✔
288
}
289

290
int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg) {
26✔
291
  if (pTq == NULL || pMsg == NULL) {
26!
292
    return TSDB_CODE_INVALID_PARA;
×
293
  }
294
  SMqSeekReq req = {0};
26✔
295
  int32_t    vgId = TD_VID(pTq->pVnode);
26✔
296
  SRpcMsg    rsp = {.info = pMsg->info};
26✔
297
  int        code = 0;
26✔
298

299
  if (tDeserializeSMqSeekReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
26!
300
    code = TSDB_CODE_OUT_OF_MEMORY;
×
301
    goto end;
×
302
  }
303

304
  tqDebug("tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s", req.consumerId, vgId, req.subKey);
26!
305
  taosWLockLatch(&pTq->lock);
26✔
306

307
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
26✔
308
  if (pHandle == NULL) {
26!
309
    tqWarn("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", req.consumerId, vgId, req.subKey);
×
310
    code = 0;
×
311
    taosWUnLockLatch(&pTq->lock);
×
312
    goto end;
×
313
  }
314

315
  // 2. check consumer-vg assignment status
316
  if (pHandle->consumerId != req.consumerId) {
26!
317
    tqError("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
×
318
            req.consumerId, vgId, req.subKey, pHandle->consumerId);
319
    taosWUnLockLatch(&pTq->lock);
×
320
    code = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
×
321
    goto end;
×
322
  }
323

324
  // if consumer register to push manager, push empty to consumer to change vg status from TMQ_VG_STATUS__WAIT to
325
  // TMQ_VG_STATUS__IDLE, otherwise poll data failed after seek.
326
  tqUnregisterPushHandle(pTq, pHandle);
26✔
327
  taosWUnLockLatch(&pTq->lock);
26✔
328

329
end:
26✔
330
  rsp.code = code;
26✔
331
  tmsgSendRsp(&rsp);
26✔
332
  return 0;
26✔
333
}
334

335
int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) {
138✔
336
  if (pTq == NULL) {
138!
337
    return TSDB_CODE_INVALID_PARA;
×
338
  }
339
  void* pIter = NULL;
138✔
340

341
  while (1) {
11✔
342
    pIter = taosHashIterate(pTq->pCheckInfo, pIter);
149✔
343
    if (pIter == NULL) {
149✔
344
      break;
103✔
345
    }
346

347
    STqCheckInfo* pCheck = (STqCheckInfo*)pIter;
46✔
348

349
    if (pCheck->ntbUid == tbUid) {
46!
350
      int32_t sz = taosArrayGetSize(pCheck->colIdList);
46✔
351
      for (int32_t i = 0; i < sz; i++) {
168✔
352
        int16_t* pForbidColId = taosArrayGet(pCheck->colIdList, i);
157✔
353
        if (pForbidColId == NULL) {
157!
354
          continue;
×
355
        }
356

357
        if ((*pForbidColId) == colId) {
157✔
358
          taosHashCancelIterate(pTq->pCheckInfo, pIter);
35✔
359
          return -1;
35✔
360
        }
361
      }
362
    }
363
  }
364

365
  return 0;
103✔
366
}
367

368
int32_t tqProcessPollPush(STQ* pTq, SRpcMsg* pMsg) {
19,806✔
369
  if (pTq == NULL) {
19,806!
370
    return TSDB_CODE_INVALID_PARA;
×
371
  }
372
  int32_t vgId = TD_VID(pTq->pVnode);
19,806✔
373
  taosWLockLatch(&pTq->lock);
19,806✔
374
  if (taosHashGetSize(pTq->pPushMgr) > 0) {
19,806!
375
    void* pIter = taosHashIterate(pTq->pPushMgr, NULL);
19,806✔
376

377
    while (pIter) {
40,079✔
378
      STqHandle* pHandle = *(STqHandle**)pIter;
20,273✔
379
      tqDebug("vgId:%d start set submit for pHandle:%p, consumer:0x%" PRIx64, vgId, pHandle, pHandle->consumerId);
20,273!
380

381
      if (pHandle->msg == NULL) {
20,273!
382
        tqError("pHandle->msg should not be null");
×
383
        taosHashCancelIterate(pTq->pPushMgr, pIter);
×
384
        break;
×
385
      } else {
386
        SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME,
20,273✔
387
                       .pCont = pHandle->msg->pCont,
20,273✔
388
                       .contLen = pHandle->msg->contLen,
20,273✔
389
                       .info = pHandle->msg->info};
20,273✔
390
        if (tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg) != 0){
20,273!
391
          tqError("vgId:%d tmsgPutToQueue failed, consumer:0x%" PRIx64, vgId, pHandle->consumerId);
×
392
        }
393
        taosMemoryFree(pHandle->msg);
20,273!
394
        pHandle->msg = NULL;
20,273✔
395
      }
396

397
      pIter = taosHashIterate(pTq->pPushMgr, pIter);
20,273✔
398
    }
399

400
    taosHashClear(pTq->pPushMgr);
19,806✔
401
  }
402
  taosWUnLockLatch(&pTq->lock);
19,806✔
403
  return 0;
19,806✔
404
}
405

406
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
62,396✔
407
  if (pTq == NULL || pMsg == NULL) {
62,396!
408
    return TSDB_CODE_INVALID_PARA;
×
409
  }
410
  SMqPollReq req = {0};
62,403✔
411
  int        code = tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req);
62,403✔
412
  if (code < 0) {
62,378!
413
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
×
414
    terrno = TSDB_CODE_INVALID_MSG;
×
415
    goto END;
×
416
  }
417

418
  int64_t      consumerId = req.consumerId;
62,405✔
419
  int32_t      reqEpoch = req.epoch;
62,405✔
420
  STqOffsetVal reqOffset = req.reqOffset;
62,405✔
421
  int32_t      vgId = TD_VID(pTq->pVnode);
62,405✔
422
  STqHandle*   pHandle = NULL;
62,405✔
423

424
  while (1) {
8✔
425
    taosWLockLatch(&pTq->lock);
62,413✔
426
    // 1. find handle
427
    code = tqMetaGetHandle(pTq, req.subKey, &pHandle);
62,422✔
428
    if (code != TDB_CODE_SUCCESS) {
62,393✔
429
      tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey);
13!
430
      terrno = TSDB_CODE_INVALID_MSG;
13✔
431
      taosWUnLockLatch(&pTq->lock);
13✔
432
      return -1;
20✔
433
    }
434

435
    // 2. check rebalance status
436
    if (pHandle->consumerId != consumerId) {
62,380✔
437
      tqError("ERROR tmq poll: consumer:0x%" PRIx64
4!
438
              " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
439
              consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId);
440
      terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
4✔
441
      taosWUnLockLatch(&pTq->lock);
4✔
442
      code = -1;
4✔
443
      goto END;
4✔
444
    }
445

446
    bool exec = tqIsHandleExec(pHandle);
62,376!
447
    if (!exec) {
62,376!
448
      tqSetHandleExec(pHandle);
62,380!
449
      //      qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS);
450
      tqDebug("tmq poll: consumer:0x%" PRIx64 " vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId,
62,380!
451
              req.subKey, pHandle);
452
      taosWUnLockLatch(&pTq->lock);
62,390✔
453
      break;
62,400✔
454
    }
455
    taosWUnLockLatch(&pTq->lock);
×
456

457
    tqDebug("tmq poll: consumer:0x%" PRIx64
8!
458
            " vgId:%d, topic:%s, subscription is executing, wait for 10ms and retry, pHandle:%p",
459
            consumerId, vgId, req.subKey, pHandle);
460
    taosMsleep(10);
8✔
461
  }
462

463
  // 3. update the epoch value
464
  if (pHandle->epoch < reqEpoch) {
62,400✔
465
    tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, pHandle->epoch,
1,406!
466
            reqEpoch);
467
    pHandle->epoch = reqEpoch;
1,406✔
468
  }
469

470
  char buf[TSDB_OFFSET_LEN] = {0};
62,400✔
471
  (void)tFormatOffset(buf, TSDB_OFFSET_LEN, &reqOffset);
62,400✔
472
  tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s,QID:0x%" PRIx64,
62,400!
473
          consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);
474

475
  code = tqExtractDataForMq(pTq, pHandle, &req, pMsg);
62,400✔
476
  tqSetHandleIdle(pHandle);
62,399!
477

478
  tqDebug("tmq poll: consumer:0x%" PRIx64 " vgId:%d, topic:%s, set handle idle, pHandle:%p", consumerId, vgId,
62,399!
479
          req.subKey, pHandle);
480

481
END:
×
482
  tDestroySMqPollReq(&req);
62,404✔
483
  return code;
62,404✔
484
}
485

486
int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg) {
6✔
487
  if (pTq == NULL || pMsg == NULL) {
6!
488
    return TSDB_CODE_INVALID_PARA;
×
489
  }
490
  void*   data = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
6✔
491
  int32_t len = pMsg->contLen - sizeof(SMsgHead);
6✔
492

493
  SMqVgOffset vgOffset = {0};
6✔
494

495
  SDecoder decoder;
496
  tDecoderInit(&decoder, (uint8_t*)data, len);
6✔
497
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
6!
498
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
499
    return terrno;
×
500
  }
501

502
  tDecoderClear(&decoder);
6✔
503

504
  STqOffset* pSavedOffset = NULL;
6✔
505
  int32_t    code = tqMetaGetOffset(pTq, vgOffset.offset.subKey, &pSavedOffset);
6✔
506
  if (code != 0) {
6✔
507
    return TSDB_CODE_TMQ_NO_COMMITTED;
3✔
508
  }
509
  vgOffset.offset = *pSavedOffset;
3✔
510

511
  tEncodeSize(tEncodeMqVgOffset, &vgOffset, len, code);
3!
512
  if (code < 0) {
3!
513
    return TAOS_GET_TERRNO(TSDB_CODE_INVALID_PARA);
×
514
  }
515

516
  void* buf = rpcMallocCont(len);
3✔
517
  if (buf == NULL) {
3!
518
    return terrno;
×
519
  }
520
  SEncoder encoder = {0};
3✔
521
  tEncoderInit(&encoder, buf, len);
3✔
522
  code = tEncodeMqVgOffset(&encoder, &vgOffset);
3✔
523
  tEncoderClear(&encoder);
3✔
524
  if (code < 0) {
3!
525
    rpcFreeCont(buf);
×
526
    return TAOS_GET_TERRNO(TSDB_CODE_INVALID_PARA);
×
527
  }
528

529
  SRpcMsg rsp = {.info = pMsg->info, .pCont = buf, .contLen = len, .code = 0};
3✔
530

531
  tmsgSendRsp(&rsp);
3✔
532
  return 0;
3✔
533
}
534

535
int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) {
13✔
536
  if (pTq == NULL || pMsg == NULL) {
13!
537
    return TSDB_CODE_INVALID_PARA;
×
538
  }
539
  int32_t    code = 0;
13✔
540
  SMqPollReq req = {0};
13✔
541
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
13!
542
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
×
543
    return TSDB_CODE_INVALID_MSG;
×
544
  }
545

546
  int64_t      consumerId = req.consumerId;
13✔
547
  STqOffsetVal reqOffset = req.reqOffset;
13✔
548
  int32_t      vgId = TD_VID(pTq->pVnode);
13✔
549

550
  // 1. find handle
551
  taosRLockLatch(&pTq->lock);
13✔
552
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
13✔
553
  if (pHandle == NULL) {
13!
554
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s not found", consumerId, vgId, req.subKey);
×
555
    taosRUnLockLatch(&pTq->lock);
×
556
    return TSDB_CODE_INVALID_MSG;
×
557
  }
558

559
  // 2. check rebalance status
560
  if (pHandle->consumerId != consumerId) {
13!
561
    tqDebug("ERROR consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
×
562
            consumerId, vgId, req.subKey, pHandle->consumerId);
563
    taosRUnLockLatch(&pTq->lock);
×
564
    return TSDB_CODE_TMQ_CONSUMER_MISMATCH;
×
565
  }
566

567
  int64_t sver = 0, ever = 0;
13✔
568
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
13✔
569
  taosRUnLockLatch(&pTq->lock);
13✔
570

571
  SMqDataRsp dataRsp = {0};
13✔
572
  code = tqInitDataRsp(&dataRsp, req.reqOffset);
13✔
573
  if (code != 0) {
13!
574
    return code;
×
575
  }
576

577
  if (req.useSnapshot == true) {
13!
578
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s snapshot not support wal info", consumerId, vgId, req.subKey);
×
579
    code = TSDB_CODE_INVALID_PARA;
×
580
    goto END;
×
581
  }
582

583
  dataRsp.rspOffset.type = TMQ_OFFSET__LOG;
13✔
584

585
  if (reqOffset.type == TMQ_OFFSET__LOG) {
13✔
586
    dataRsp.rspOffset.version = reqOffset.version;
3✔
587
  } else if (reqOffset.type < 0) {
10!
588
    STqOffset* pOffset = NULL;
10✔
589
    code = tqMetaGetOffset(pTq, req.subKey, &pOffset);
10✔
590
    if (code == 0) {
10✔
591
      if (pOffset->val.type != TMQ_OFFSET__LOG) {
1!
592
        tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s, no valid wal info", consumerId, vgId, req.subKey);
×
593
        code = TSDB_CODE_INVALID_PARA;
×
594
        goto END;
×
595
      }
596

597
      dataRsp.rspOffset.version = pOffset->val.version;
1✔
598
      tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from store:%" PRId64, consumerId, vgId,
1!
599
             req.subKey, dataRsp.rspOffset.version);
600
    } else {
601
      if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEST) {
9✔
602
        dataRsp.rspOffset.version = sver;  // not consume yet, set the earliest position
8✔
603
      } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
1!
604
        dataRsp.rspOffset.version = ever;
1✔
605
      }
606
      tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from init:%" PRId64, consumerId, vgId, req.subKey,
9!
607
             dataRsp.rspOffset.version);
608
    }
609
  } else {
610
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s invalid offset type:%d", consumerId, vgId, req.subKey,
×
611
            reqOffset.type);
612
    code = TSDB_CODE_INVALID_PARA;
×
613
    goto END;
×
614
  }
615

616
  code = tqDoSendDataRsp(&pMsg->info, &dataRsp, req.epoch, req.consumerId, TMQ_MSG_TYPE__WALINFO_RSP, sver, ever);
13✔
617

618
END:
13✔
619
  tDeleteMqDataRsp(&dataRsp);
13✔
620
  return code;
13✔
621
}
622

623
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
861✔
624
  if (pTq == NULL || msg == NULL) {
861!
625
    return TSDB_CODE_INVALID_PARA;
×
626
  }
627
  SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg;
863✔
628
  int32_t        vgId = TD_VID(pTq->pVnode);
863✔
629

630
  tqInfo("vgId:%d, tq process delete sub req %s", vgId, pReq->subKey);
863!
631
  int32_t code = 0;
863✔
632

633
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
863✔
634
  if (pHandle) {
864✔
635
    while (1) {
×
636
      taosWLockLatch(&pTq->lock);
861✔
637
      bool exec = tqIsHandleExec(pHandle);
861✔
638

639
      if (exec) {
861!
640
        tqInfo("vgId:%d, topic:%s, subscription is executing, delete wait for 10ms and retry, pHandle:%p", vgId,
×
641
               pHandle->subKey, pHandle);
642
        taosWUnLockLatch(&pTq->lock);
×
643
        taosMsleep(10);
×
644
        continue;
×
645
      }
646
      tqUnregisterPushHandle(pTq, pHandle);
861✔
647
      code = taosHashRemove(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
861✔
648
      if (code != 0) {
861!
649
        tqError("cannot process tq delete req %s, since no such handle", pReq->subKey);
×
650
      }
651
      taosWUnLockLatch(&pTq->lock);
861✔
652
      break;
861✔
653
    }
654
  }
655

656
  taosWLockLatch(&pTq->lock);
864✔
657
  if (taosHashRemove(pTq->pOffset, pReq->subKey, strlen(pReq->subKey)) != 0) {
864✔
658
    tqError("cannot process tq delete req %s, since no such offset in hash", pReq->subKey);
261!
659
  }
660
  if (tqMetaDeleteInfo(pTq, pTq->pOffsetStore, pReq->subKey, strlen(pReq->subKey)) != 0) {
864✔
661
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
258!
662
  }
663

664
  if (tqMetaDeleteInfo(pTq, pTq->pExecStore, pReq->subKey, strlen(pReq->subKey)) < 0) {
864!
665
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
×
666
  }
667
  taosWUnLockLatch(&pTq->lock);
864✔
668

669
  return 0;
864✔
670
}
671

672
int32_t tqProcessAddCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
124✔
673
  if (pTq == NULL || msg == NULL) {
124!
674
    return TSDB_CODE_INVALID_PARA;
×
675
  }
676
  STqCheckInfo info = {0};
124✔
677
  int32_t      code = tqMetaDecodeCheckInfo(&info, msg, msgLen >= 0 ? msgLen : 0);
124✔
678
  if (code != 0) {
124!
679
    return code;
×
680
  }
681

682
  code = taosHashPut(pTq->pCheckInfo, info.topic, strlen(info.topic), &info, sizeof(STqCheckInfo));
124✔
683
  if (code != 0) {
124!
684
    tDeleteSTqCheckInfo(&info);
×
685
    return code;
×
686
  }
687

688
  return tqMetaSaveInfo(pTq, pTq->pCheckStore, info.topic, strlen(info.topic), msg, msgLen >= 0 ? msgLen : 0);
124✔
689
}
690

691
int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
14✔
692
  if (pTq == NULL || msg == NULL) {
14!
693
    return TSDB_CODE_INVALID_PARA;
×
694
  }
695
  if (taosHashRemove(pTq->pCheckInfo, msg, strlen(msg)) < 0) {
14✔
696
    return TSDB_CODE_TSC_INTERNAL_ERROR;
2✔
697
  }
698
  return tqMetaDeleteInfo(pTq, pTq->pCheckStore, msg, strlen(msg));
12✔
699
}
700

701
int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
3,117✔
702
  if (pTq == NULL || msg == NULL) {
3,117!
703
    return TSDB_CODE_INVALID_PARA;
×
704
  }
705
  int         ret = 0;
3,119✔
706
  SMqRebVgReq req = {0};
3,119✔
707
  SDecoder    dc = {0};
3,119✔
708

709
  tDecoderInit(&dc, (uint8_t*)msg, msgLen);
3,119✔
710
  ret = tDecodeSMqRebVgReq(&dc, &req);
3,115✔
711
  if (ret < 0) {
3,119!
712
    goto end;
×
713
  }
714

715
  tqInfo("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey,
3,119!
716
         req.oldConsumerId, req.newConsumerId);
717

718
  taosRLockLatch(&pTq->lock);
3,123✔
719
  STqHandle* pHandle = NULL;
3,124✔
720
  int32_t code = tqMetaGetHandle(pTq, req.subKey, &pHandle);
3,124✔
721
  if (code != 0){
3,122✔
722
    tqInfo("vgId:%d, tq process sub req:%s, no such handle, create new one", pTq->pVnode->config.vgId, req.subKey);
1,421!
723
  }
724
  taosRUnLockLatch(&pTq->lock);
3,123✔
725
  if (pHandle == NULL) {
3,124✔
726
    if (req.oldConsumerId != -1) {
1,423✔
727
      tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId:0x%" PRIx64,
2!
728
              req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId);
729
    }
730
    if (req.newConsumerId == -1) {
1,423✔
731
      tqError("vgId:%d, tq invalid rebalance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
1!
732
      ret = TSDB_CODE_INVALID_PARA;
1✔
733
      goto end;
1✔
734
    }
735
    STqHandle handle = {0};
1,422✔
736
    ret = tqMetaCreateHandle(pTq, &req, &handle);
1,422✔
737
    if (ret < 0) {
1,422!
738
      tqDestroyTqHandle(&handle);
×
739
      goto end;
×
740
    }
741
    taosWLockLatch(&pTq->lock);
1,422✔
742
    ret = tqMetaSaveHandle(pTq, req.subKey, &handle);
1,422✔
743
    taosWUnLockLatch(&pTq->lock);
1,421✔
744
  } else {
745
    while (1) {
×
746
      taosWLockLatch(&pTq->lock);
1,701✔
747
      bool exec = tqIsHandleExec(pHandle);
1,701!
748
      if (exec) {
1,701!
749
        tqInfo("vgId:%d, topic:%s, subscription is executing, sub wait for 10ms and retry, pHandle:%p",
×
750
               pTq->pVnode->config.vgId, pHandle->subKey, pHandle);
751
        taosWUnLockLatch(&pTq->lock);
×
752
        taosMsleep(10);
×
753
        continue;
×
754
      }
755
      if (pHandle->consumerId == req.newConsumerId) {  // do nothing
1,701✔
756
        tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId);
104!
757
      } else {
758
        tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId,
1,597!
759
               req.newConsumerId);
760

761
        atomic_store_64(&pHandle->consumerId, req.newConsumerId);
1,597✔
762
        atomic_store_32(&pHandle->epoch, 0);
1,597✔
763
        tqUnregisterPushHandle(pTq, pHandle);
1,597✔
764
        ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
1,596✔
765
      }
766
      taosWUnLockLatch(&pTq->lock);
1,699✔
767
      break;
1,700✔
768
    }
769
  }
770

771
end:
3,123✔
772
  tDecoderClear(&dc);
3,123✔
773
  return ret;
3,123✔
774
}
775

776
static void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); }
39,833!
777

778
int32_t tqBuildStreamTask(void* pTqObj, SStreamTask* pTask, int64_t nextProcessVer) {
14,712✔
779
  STQ*             pTq = (STQ*)pTqObj;
14,712✔
780
  int32_t          vgId = TD_VID(pTq->pVnode);
14,712✔
781
  SCheckpointInfo* pChkInfo = NULL;
14,712✔
782

783
  tqDebug("s-task:0x%x start to build task", pTask->id.taskId);
14,712✔
784

785
  int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, nextProcessVer);
14,712✔
786
  if (code != TSDB_CODE_SUCCESS) {
14,720!
787
    return code;
×
788
  }
789

790
  pTask->pBackend = NULL;
14,720✔
791

792
  // sink
793
  STaskOutputInfo* pOutputInfo = &pTask->outputInfo;
14,720✔
794
  if (pOutputInfo->type == TASK_OUTPUT__SMA) {
14,720✔
795
    pOutputInfo->smaSink.vnode = pTq->pVnode;
60✔
796
    pOutputInfo->smaSink.smaSink = smaHandleRes;
60✔
797
  } else if (pOutputInfo->type == TASK_OUTPUT__TABLE) {
14,660✔
798
    pOutputInfo->tbSink.vnode = pTq->pVnode;
7,261✔
799
    pOutputInfo->tbSink.tbSinkFunc = tqSinkDataIntoDstTable;
7,261✔
800

801
    int32_t   ver1 = 1;
7,261✔
802
    SMetaInfo info = {0};
7,261✔
803
    code = metaGetInfo(pTq->pVnode->pMeta, pOutputInfo->tbSink.stbUid, &info, NULL);
7,261✔
804
    if (code == TSDB_CODE_SUCCESS) {
7,258✔
805
      ver1 = info.skmVer;
6,669✔
806
    }
807

808
    SSchemaWrapper* pschemaWrapper = pOutputInfo->tbSink.pSchemaWrapper;
7,258✔
809
    pOutputInfo->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1);
7,258✔
810
    if (pOutputInfo->tbSink.pTSchema == NULL) {
7,263!
811
      return terrno;
×
812
    }
813

814
    pOutputInfo->tbSink.pTbInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
7,263✔
815
    if (pOutputInfo->tbSink.pTbInfo == NULL) {
7,261!
816
      tqError("vgId:%d failed init sink tableInfo, code:%s", vgId, tstrerror(terrno));
×
817
      return terrno;
×
818
    }
819

820
    tSimpleHashSetFreeFp(pOutputInfo->tbSink.pTbInfo, freePtr);
7,261✔
821
  }
822

823
  if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
14,720✔
824
    bool scanDropCtb = pTask->subtableWithoutMd5 ? true : false;
7,460✔
825
    SWalFilterCond cond = {.deleteMsg = 1, .scanDropCtb = scanDropCtb};  // delete msg also extract from wal files
7,460✔
826
    pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond, pTask->id.taskId);
7,460✔
827
    if (pTask->exec.pWalReader == NULL) {
7,461✔
828
      tqError("vgId:%d failed init wal reader, code:%s", vgId, tstrerror(terrno));
2!
829
      return terrno;
2✔
830
    }
831
  }
832

833
  streamTaskResetUpstreamStageInfo(pTask);
14,719✔
834

835
  pChkInfo = &pTask->chkInfo;
14,721✔
836
  tqSetRestoreVersionInfo(pTask);
14,721✔
837

838
  char*       p = streamTaskGetStatus(pTask).name;
14,721✔
839
  const char* pNext = streamTaskGetStatusStr(pTask->status.taskStatus);
14,720✔
840

841
  if (pTask->info.fillHistory) {
14,722✔
842
    tqInfo("vgId:%d build stream task, s-task:%s, %p checkpointId:%" PRId64 " checkpointVer:%" PRId64
5,048!
843
           " nextProcessVer:%" PRId64
844
           " child id:%d, level:%d, cur-status:%s, next-status:%s fill-history:%d, related stream task:0x%x "
845
           "delaySched:%" PRId64 " ms, inputVer:%" PRId64,
846
           vgId, pTask->id.idStr, pTask, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer,
847
           pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory,
848
           (int32_t)pTask->streamTaskId.taskId, pTask->info.delaySchedParam, nextProcessVer);
849
  } else {
850
    tqInfo("vgId:%d build stream task, s-task:%s, %p checkpointId:%" PRId64 " checkpointVer:%" PRId64
9,674!
851
           " nextProcessVer:%" PRId64
852
           " child id:%d, level:%d, cur-status:%s next-status:%s fill-history:%d, related fill-task:0x%x "
853
           "delaySched:%" PRId64 " ms, inputVer:%" PRId64,
854
           vgId, pTask->id.idStr, pTask, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer,
855
           pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory,
856
           (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.delaySchedParam, nextProcessVer);
857

858
    if (pChkInfo->checkpointVer > pChkInfo->nextProcessVer) {
9,681!
859
      tqError("vgId:%d build stream task, s-task:%s, checkpointVer:%" PRId64 " > nextProcessVer:%" PRId64, vgId,
×
860
              pTask->id.idStr, pChkInfo->checkpointVer, pChkInfo->nextProcessVer);
861
      return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
862
    }
863
  }
864

865
  return 0;
14,729✔
866
}
867

868
int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { return tqStreamTaskProcessCheckReq(pTq->pStreamMeta, pMsg); }
22,344✔
869

870
int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) {
22,873✔
871
  return tqStreamTaskProcessCheckRsp(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode));
22,873✔
872
}
873

874
int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
14,248✔
875
  return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, sversion, msg, msgLen,
14,266✔
876
                                      vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored);
14,248✔
877
}
878

879
static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask, STQ* pTq) {
2,362✔
880
  const char*    id = pTask->id.idStr;
2,362✔
881
  int64_t        nextProcessedVer = pStreamTask->hTaskInfo.haltVer;
2,362✔
882
  SVersionRange* pStep2Range = &pTask->step2Range;
2,362✔
883
  int32_t        vgId = pTask->pMeta->vgId;
2,362✔
884

885
  // if it's an source task, extract the last version in wal.
886
  bool done = streamHistoryTaskSetVerRangeStep2(pTask, nextProcessedVer);
2,362✔
887
  pTask->execInfo.step2Start = taosGetTimestampMs();
2,362✔
888

889
  if (done) {
2,362✔
890
    qDebug("s-task:%s scan wal(step 2) verRange:%" PRId64 "-%" PRId64 " ended, elapsed time:%.2fs", id,
1,435✔
891
           pStep2Range->minVer, pStep2Range->maxVer, 0.0);
892
    int32_t code = streamTaskPutTranstateIntoInputQ(pTask);  // todo: msg lost.
1,435✔
893
    if (code) {
1,436!
894
      qError("s-task:%s failed put trans-state into inputQ, code:%s", id, tstrerror(code));
×
895
    }
896
    (void)streamExecTask(pTask);  // exec directly
1,436✔
897
  } else {
898
    STimeWindow* pWindow = &pTask->dataRange.window;
927✔
899
    tqDebug("s-task:%s level:%d verRange:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64
927✔
900
            ", do secondary scan-history from WAL after halt the related stream task:%s",
901
            id, pTask->info.taskLevel, pStep2Range->minVer, pStep2Range->maxVer, pWindow->skey, pWindow->ekey,
902
            pStreamTask->id.idStr);
903
    if (pTask->status.schedStatus != TASK_SCHED_STATUS__WAITING) {
927!
904
      tqError("s-task:%s level:%d unexpected sched-status:%d", id, pTask->info.taskLevel, pTask->status.schedStatus);
×
905
    }
906

907
    int32_t code = streamSetParamForStreamScannerStep2(pTask, pStep2Range, pWindow);
927✔
908
    if (code) {
927!
909
      tqError("s-task:%s level:%d failed to set step2 param", id, pTask->info.taskLevel);
×
910
    }
911

912
    int64_t dstVer = pStep2Range->minVer;
927✔
913
    pTask->chkInfo.nextProcessVer = dstVer;
927✔
914

915
    walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer);
927✔
916
    tqDebug("s-task:%s wal reader start scan WAL verRange:%" PRId64 "-%" PRId64 ", set sched-status:%d", id, dstVer,
927✔
917
            pStep2Range->maxVer, TASK_SCHED_STATUS__INACTIVE);
918

919
    int8_t status = streamTaskSetSchedStatusInactive(pTask);
927✔
920

921
    // now the fill-history task starts to scan data from wal files.
922
    code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE);
927✔
923
    if (code == TSDB_CODE_SUCCESS) {
927!
924
      code = tqScanWalAsync(pTq, false);
927✔
925
      if (code) {
927!
926
        tqError("vgId:%d failed to start scan wal file, code:%s", vgId, tstrerror(code));
×
927
      }
928
    }
929
  }
930
}
2,363✔
931

932
int32_t handleStep2Async(SStreamTask* pStreamTask, void* param) {
2,363✔
933
  STQ* pTq = param;
2,363✔
934

935
  SStreamMeta* pMeta = pStreamTask->pMeta;
2,363✔
936
  STaskId      hId = pStreamTask->hTaskInfo.id;
2,363✔
937
  SStreamTask* pTask = NULL;
2,363✔
938
  int32_t      code = streamMetaAcquireTask(pStreamTask->pMeta, hId.streamId, hId.taskId, &pTask);
2,363✔
939
  if (pTask == NULL) {
2,362!
940
    tqWarn("s-task:0x%x failed to acquired it to exec step 2, scan wal quit", (int32_t)hId.taskId);
×
941
    return TSDB_CODE_SUCCESS;
×
942
  }
943

944
  doStartFillhistoryStep2(pTask, pStreamTask, pTq);
2,362✔
945

946
  streamMetaReleaseTask(pMeta, pTask);
2,363✔
947
  return TSDB_CODE_SUCCESS;
2,363✔
948
}
949

950
// this function should be executed by only one thread, so we set an sentinel to protect this function
951
int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
2,602✔
952
  SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont;
2,602✔
953
  SStreamMeta*           pMeta = pTq->pStreamMeta;
2,602✔
954
  int32_t                code = TSDB_CODE_SUCCESS;
2,602✔
955
  SStreamTask*           pTask = NULL;
2,602✔
956
  SStreamTask*           pStreamTask = NULL;
2,602✔
957

958
  code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId, &pTask);
2,602✔
959
  if (pTask == NULL) {
2,602!
960
    tqError("vgId:%d failed to acquire stream task:0x%x during scan history data, task may have been destroyed",
×
961
            pMeta->vgId, pReq->taskId);
962
    return code;
×
963
  }
964

965
  // do recovery step1
966
  const char* id = pTask->id.idStr;
2,602✔
967
  char*       pStatus = streamTaskGetStatus(pTask).name;
2,602✔
968

969
  // avoid multi-thread exec
970
  while (1) {
×
971
    int32_t sentinel = atomic_val_compare_exchange_32(&pTask->status.inScanHistorySentinel, 0, 1);
2,601✔
972
    if (sentinel != 0) {
2,602!
973
      tqDebug("s-task:%s already in scan-history func, wait for 100ms, and try again", id);
×
974
      taosMsleep(100);
×
975
    } else {
976
      break;
2,602✔
977
    }
978
  }
979

980
  // let's decide which step should be executed now
981
  if (pTask->execInfo.step1Start == 0) {
2,602✔
982
    int64_t ts = taosGetTimestampMs();
2,372✔
983
    pTask->execInfo.step1Start = ts;
2,372✔
984
    tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts);
2,372✔
985
  } else {
986
    if (pTask->execInfo.step2Start == 0) {
230✔
987
      tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64 ", already elapsed:%.2fs",
181!
988
              id, pTask->execInfo.step1Start, pTask->execInfo.step1El);
989
    } else {
990
      tqDebug("s-task:%s already in step2, no need to scan-history data, step2 startTs:%" PRId64, id,
49!
991
              pTask->execInfo.step2Start);
992

993
      atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
49✔
994
      streamMetaReleaseTask(pMeta, pTask);
49✔
995
      return 0;
49✔
996
    }
997
  }
998

999
  // we have to continue retrying to successfully execute the scan history task.
1000
  if (!streamTaskSetSchedStatusWait(pTask)) {
2,553!
1001
    tqError(
×
1002
        "s-task:%s failed to start scan-history in first stream time window since already started, unexpected "
1003
        "sched-status:%d",
1004
        id, pTask->status.schedStatus);
1005
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
×
1006
    streamMetaReleaseTask(pMeta, pTask);
×
1007
    return 0;
×
1008
  }
1009

1010
  int64_t              st = taosGetTimestampMs();
2,552✔
1011
  SScanhistoryDataInfo retInfo = streamScanHistoryData(pTask, st);
2,552✔
1012

1013
  double el = (taosGetTimestampMs() - st) / 1000.0;
2,553✔
1014
  pTask->execInfo.step1El += el;
2,553✔
1015

1016
  if (retInfo.ret == TASK_SCANHISTORY_QUIT || retInfo.ret == TASK_SCANHISTORY_REXEC) {
2,553✔
1017
    int8_t status = streamTaskSetSchedStatusInactive(pTask);
186✔
1018
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
186✔
1019

1020
    if (retInfo.ret == TASK_SCANHISTORY_REXEC) {
186✔
1021
      streamExecScanHistoryInFuture(pTask, retInfo.idleTime);
181✔
1022
    } else {
1023
      SStreamTaskState p = streamTaskGetStatus(pTask);
5✔
1024
      ETaskStatus      s = p.state;
5✔
1025

1026
      if (s == TASK_STATUS__PAUSE) {
5!
1027
        tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs total:%.2fs, sched-status:%d", id, el,
×
1028
                pTask->execInfo.step1El, status);
1029
      } else if (s == TASK_STATUS__STOP || s == TASK_STATUS__DROPPING) {
5!
1030
        tqDebug("s-task:%s status:%p not continue scan-history data, total elapsed time:%.2fs quit", id, p.name,
5!
1031
                pTask->execInfo.step1El);
1032
      }
1033
    }
1034

1035
    streamMetaReleaseTask(pMeta, pTask);
186✔
1036
    return 0;
186✔
1037
  }
1038

1039
  // the following procedure should be executed, no matter status is stop/pause or not
1040
  tqDebug("s-task:%s scan-history(step 1) ended, elapsed time:%.2fs", id, pTask->execInfo.step1El);
2,367✔
1041

1042
  if (pTask->info.fillHistory != 1) {
2,367!
1043
    tqError("s-task:%s fill-history is disabled, unexpected", id);
×
1044
    return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
1045
  }
1046

1047
  // 1. get the related stream task
1048
  code = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId, &pStreamTask);
2,367✔
1049
  if (pStreamTask == NULL) {
2,367✔
1050
    tqError("failed to find s-task:0x%" PRIx64 ", it may have been destroyed, drop related fill-history task:%s",
4!
1051
            pTask->streamTaskId.taskId, pTask->id.idStr);
1052

1053
    tqDebug("s-task:%s fill-history task set status to be dropping", id);
4!
1054
    code = streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id, 0);
4✔
1055

1056
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
4✔
1057
    streamMetaReleaseTask(pMeta, pTask);
4✔
1058
    return code;
4✔
1059
  }
1060

1061
  if (pStreamTask->info.taskLevel != TASK_LEVEL__SOURCE) {
2,363!
1062
    tqError("s-task:%s fill-history task related stream task level:%d, unexpected", id, pStreamTask->info.taskLevel);
×
1063
    return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
1064
  }
1065

1066
  code = streamTaskHandleEventAsync(pStreamTask->status.pSM, TASK_EVENT_HALT, handleStep2Async, pTq);
2,363✔
1067
  streamMetaReleaseTask(pMeta, pStreamTask);
2,363✔
1068

1069
  atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
2,363✔
1070
  streamMetaReleaseTask(pMeta, pTask);
2,363✔
1071
  return code;
2,363✔
1072
}
1073

1074
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
168,923✔
1075
  int32_t  code = 0;
168,923✔
1076
  char*    msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
168,923✔
1077
  int32_t  len = pMsg->contLen - sizeof(SMsgHead);
168,923✔
1078
  SDecoder decoder;
1079

1080
  SStreamTaskRunReq req = {0};
168,923✔
1081
  tDecoderInit(&decoder, (uint8_t*)msg, len);
168,923✔
1082
  if ((code = tDecodeStreamTaskRunReq(&decoder, &req)) < 0) {
169,003!
1083
    tqError("vgId:%d failed to decode task run req, code:%s", pTq->pStreamMeta->vgId, tstrerror(code));
×
1084
    tDecoderClear(&decoder);
×
1085
    return TSDB_CODE_SUCCESS;
×
1086
  }
1087

1088
  tDecoderClear(&decoder);
168,990✔
1089

1090
  // extracted submit data from wal files for all tasks
1091
  if (req.reqType == STREAM_EXEC_T_EXTRACT_WAL_DATA) {
168,974✔
1092
    return tqScanWal(pTq);
66,654✔
1093
  }
1094

1095
  code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode));
102,320✔
1096
  if (code) {
102,337✔
1097
    tqError("vgId:%d failed to create task run req, code:%s", TD_VID(pTq->pVnode), tstrerror(code));
13!
1098
    return code;
13✔
1099
  }
1100

1101
  // let's continue scan data in the wal files
1102
  if (req.reqType >= 0 || req.reqType == STREAM_EXEC_T_RESUME_TASK) {
102,324✔
1103
    code = tqScanWalAsync(pTq, false);  // it's ok to failed
78,640✔
1104
    if (code) {
78,669✔
1105
      tqError("vgId:%d failed to start scan wal file, code:%s", pTq->pStreamMeta->vgId, tstrerror(code));
4!
1106
    }
1107
  }
1108

1109
  return code;
102,352✔
1110
}
1111

1112
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg) {
57,321✔
1113
  return tqStreamTaskProcessDispatchReq(pTq->pStreamMeta, pMsg);
57,321✔
1114
}
1115

1116
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) {
57,287✔
1117
  return tqStreamTaskProcessDispatchRsp(pTq->pStreamMeta, pMsg);
57,287✔
1118
}
1119

1120
int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) {
6,953✔
1121
  return tqStreamTaskProcessDropReq(pTq->pStreamMeta, msg, msgLen);
6,953✔
1122
}
1123

1124
int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen) {
5,172✔
1125
  return tqStreamTaskProcessUpdateCheckpointReq(pTq->pStreamMeta, pTq->pVnode->restored, msg);
5,172✔
1126
}
1127

1128
int32_t tqProcessTaskConsenChkptIdReq(STQ* pTq, SRpcMsg* pMsg) {
195✔
1129
  return tqStreamTaskProcessConsenChkptIdReq(pTq->pStreamMeta, pMsg);
195✔
1130
}
1131

1132
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
1,388✔
1133
  return tqStreamTaskProcessTaskPauseReq(pTq->pStreamMeta, msg);
1,388✔
1134
}
1135

1136
int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
2,557✔
1137
  return tqStreamTaskProcessTaskResumeReq(pTq, sversion, msg, true);
2,557✔
1138
}
1139

1140
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
573✔
1141
  return tqStreamTaskProcessRetrieveReq(pTq->pStreamMeta, pMsg);
573✔
1142
}
1143

1144
int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { return 0; }
450✔
1145

1146
int32_t tqStreamProgressRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
×
1147
  char*               msgStr = pMsg->pCont;
×
1148
  char*               msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
×
1149
  int32_t             msgLen = pMsg->contLen - sizeof(SMsgHead);
×
1150
  int32_t             code = 0;
×
1151
  SStreamProgressReq  req;
1152
  char*               pRspBuf = taosMemoryCalloc(1, sizeof(SMsgHead) + sizeof(SStreamProgressRsp));
×
1153
  SStreamProgressRsp* pRsp = POINTER_SHIFT(pRspBuf, sizeof(SMsgHead));
×
1154
  if (!pRspBuf) {
×
1155
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
1156
    code = -1;
×
1157
    goto _OVER;
×
1158
  }
1159

1160
  code = tDeserializeStreamProgressReq(msgBody, msgLen, &req);
×
1161
  if (code == TSDB_CODE_SUCCESS) {
×
1162
    code = tqGetStreamExecInfo(pTq->pVnode, req.streamId, &pRsp->progressDelay, &pRsp->fillHisFinished);
×
1163
  }
1164
  if (code == TSDB_CODE_SUCCESS) {
×
1165
    pRsp->fetchIdx = req.fetchIdx;
×
1166
    pRsp->subFetchIdx = req.subFetchIdx;
×
1167
    pRsp->vgId = req.vgId;
×
1168
    pRsp->streamId = req.streamId;
×
1169
    code = tSerializeStreamProgressRsp(pRsp, sizeof(SStreamProgressRsp) + sizeof(SMsgHead), pRsp);
×
1170
    if (code) {
×
1171
      goto _OVER;
×
1172
    }
1173

1174
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
×
1175
    rsp.pCont = pRspBuf;
×
1176
    pRspBuf = NULL;
×
1177
    rsp.contLen = sizeof(SMsgHead) + sizeof(SStreamProgressRsp);
×
1178
    tmsgSendRsp(&rsp);
×
1179
  }
1180

1181
_OVER:
×
1182
  if (pRspBuf) {
×
1183
    taosMemoryFree(pRspBuf);
×
1184
  }
1185
  return code;
×
1186
}
1187

1188
// always return success to mnode
1189
//todo: handle failure of build and send msg to mnode
1190
static void doSendChkptSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, int32_t code,
76✔
1191
                                 int32_t taskId) {
1192
  SRpcMsg rsp = {0};
76✔
1193
  int32_t ret = streamTaskBuildCheckpointSourceRsp(pReq, pRpcInfo, &rsp, code);
76✔
1194
  if (ret) {  // suppress the error in build checkpoint source rsp
76!
1195
    tqError("s-task:0x%x failed to build checkpoint-source rsp, code:%s", taskId, tstrerror(ret));
×
1196
  }
1197
  tmsgSendRsp(&rsp);  // error occurs
76✔
1198
}
76✔
1199

1200
// no matter what kinds of error happened, make sure the mnode will receive the success execution code.
1201
int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) {
3,139✔
1202
  int32_t                    vgId = TD_VID(pTq->pVnode);
3,139✔
1203
  SStreamMeta*               pMeta = pTq->pStreamMeta;
3,139✔
1204
  char*                      msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
3,139✔
1205
  int32_t                    len = pMsg->contLen - sizeof(SMsgHead);
3,139✔
1206
  int32_t                    code = 0;
3,139✔
1207
  SStreamCheckpointSourceReq req = {0};
3,139✔
1208
  SDecoder                   decoder = {0};
3,139✔
1209
  SStreamTask*               pTask = NULL;
3,139✔
1210
  int64_t                    checkpointId = 0;
3,139✔
1211

1212
  // disable auto rsp to mnode
1213
  pRsp->info.handle = NULL;
3,139✔
1214

1215
  tDecoderInit(&decoder, (uint8_t*)msg, len);
3,139✔
1216
  if (tDecodeStreamCheckpointSourceReq(&decoder, &req) < 0) {
3,141!
1217
    code = TSDB_CODE_MSG_DECODE_ERROR;
×
1218
    tDecoderClear(&decoder);
×
1219
    tqError("vgId:%d failed to decode checkpoint-source msg, code:%s", vgId, tstrerror(code));
×
1220
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1221
    return TSDB_CODE_SUCCESS;  // always return success to mnode,
×
1222
  }
1223

1224
  tDecoderClear(&decoder);
3,136✔
1225

1226
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
3,139✔
1227
    tqDebug("vgId:%d not leader, ignore checkpoint-source msg, s-task:0x%x", vgId, req.taskId);
13!
1228
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
13✔
1229
    return TSDB_CODE_SUCCESS;  // always return success to mnode
13✔
1230
  }
1231

1232
  if (!pTq->pVnode->restored) {
3,129✔
1233
    tqDebug("vgId:%d checkpoint-source msg received during restoring, checkpointId:%" PRId64
63✔
1234
            ", transId:%d s-task:0x%x ignore it",
1235
            vgId, req.checkpointId, req.transId, req.taskId);
1236
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
63✔
1237
    return TSDB_CODE_SUCCESS;  // always return success to mnode
63✔
1238
  }
1239

1240
  code = streamMetaAcquireTask(pMeta, req.streamId, req.taskId, &pTask);
3,066✔
1241
  if (pTask == NULL || code != 0) {
3,063!
1242
    tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. checkpointId:%" PRId64
×
1243
            " transId:%d it may have been destroyed",
1244
            vgId, req.taskId, req.checkpointId, req.transId);
1245
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1246
    return TSDB_CODE_SUCCESS;
×
1247
  }
1248

1249
  if (pTask->status.downstreamReady != 1) {
3,066!
1250
    // record the latest failed checkpoint id
1251
    streamTaskSetFailedChkptInfo(pTask, req.transId, req.checkpointId);
×
1252
    tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpointId:%" PRId64
×
1253
            ", transId:%d set it failed",
1254
            pTask->id.idStr, req.checkpointId, req.transId);
1255

1256
    streamMetaReleaseTask(pMeta, pTask);
×
1257
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1258
    return TSDB_CODE_SUCCESS;  // todo retry handle error
×
1259
  }
1260

1261
  // todo save the checkpoint failed info
1262
  streamMutexLock(&pTask->lock);
3,066✔
1263
  ETaskStatus status = streamTaskGetStatus(pTask).state;
3,063✔
1264

1265
  if (req.mndTrigger == 1) {
3,062✔
1266
    if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) {
939!
1267
      tqError("s-task:%s not ready for checkpoint, since it is halt, ignore checkpointId:%" PRId64 ", set it failure",
×
1268
              pTask->id.idStr, req.checkpointId);
1269

1270
      streamMutexUnlock(&pTask->lock);
×
1271
      streamMetaReleaseTask(pMeta, pTask);
×
1272
      doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1273
      return TSDB_CODE_SUCCESS;
×
1274
    }
1275
  } else {
1276
    if (status != TASK_STATUS__HALT) {
2,123!
1277
      tqError("s-task:%s should in halt status, let's halt it directly", pTask->id.idStr);
×
1278
      //      streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_HALT);
1279
    }
1280
  }
1281

1282
  // check if the checkpoint msg already sent or not.
1283
  if (status == TASK_STATUS__CK) {
3,060!
1284
    streamTaskGetActiveCheckpointInfo(pTask, NULL, &checkpointId);
×
1285

1286
    tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64
×
1287
           " transId:%d already handled, ignore msg and continue process checkpoint",
1288
           pTask->id.idStr, checkpointId, req.transId);
1289

1290
    streamMutexUnlock(&pTask->lock);
×
1291
    streamMetaReleaseTask(pMeta, pTask);
×
1292
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SYN_PROPOSE_NOT_READY, req.taskId);
×
1293
    return TSDB_CODE_SUCCESS;
×
1294
  } else {  // checkpoint already finished, and not in checkpoint status
1295
    if (req.checkpointId <= pTask->chkInfo.checkpointId) {
3,060!
1296
      tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64
×
1297
             " transId:%d already handled, return success",
1298
             pTask->id.idStr, req.checkpointId, req.transId);
1299

1300
      streamMutexUnlock(&pTask->lock);
×
1301
      streamMetaReleaseTask(pMeta, pTask);
×
1302
      doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1303
      return TSDB_CODE_SUCCESS;
×
1304
    }
1305
  }
1306

1307
  code = streamProcessCheckpointSourceReq(pTask, &req);
3,060✔
1308
  streamMutexUnlock(&pTask->lock);
3,066✔
1309

1310
  if (code) {
3,067!
1311
    qError("s-task:%s (vgId:%d) failed to process checkpoint-source req, code:%s", pTask->id.idStr, vgId,
×
1312
           tstrerror(code));
1313
    streamMetaReleaseTask(pMeta, pTask);
×
1314
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1315
    return TSDB_CODE_SUCCESS;
×
1316
  }
1317

1318
  if (req.mndTrigger) {
3,067✔
1319
    tqInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64 ", transId:%d, ",
941!
1320
           pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, req.transId);
1321
  } else {
1322
    const char* pPrevStatus = streamTaskGetStatusStr(streamTaskGetPrevStatus(pTask));
2,126✔
1323
    tqInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64
2,126!
1324
           ", transId:%d after transfer-state, prev status:%s",
1325
           pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, req.transId, pPrevStatus);
1326
  }
1327

1328
  code = streamAddCheckpointSourceRspMsg(&req, &pMsg->info, pTask);
3,068✔
1329
  if (code != TSDB_CODE_SUCCESS) {
3,067!
1330
    streamTaskSetCheckpointFailed(pTask);  // set the checkpoint failed
×
1331
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1332
  }
1333

1334
  streamMetaReleaseTask(pMeta, pTask);
3,067✔
1335
  return TSDB_CODE_SUCCESS;
3,067✔
1336
}
1337

1338
// downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task
1339
int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) {
7,552✔
1340
  int32_t vgId = TD_VID(pTq->pVnode);
7,552✔
1341

1342
  SStreamCheckpointReadyMsg* pReq = (SStreamCheckpointReadyMsg*)pMsg->pCont;
7,552✔
1343
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
7,552!
1344
    tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from 0x%x", vgId,
×
1345
            (int32_t)pReq->downstreamTaskId);
1346
    return TSDB_CODE_STREAM_NOT_LEADER;
×
1347
  }
1348

1349
  return tqStreamTaskProcessCheckpointReadyMsg(pTq->pStreamMeta, pMsg);
7,547✔
1350
}
1351

1352
int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
68✔
1353
  return tqStreamTaskProcessUpdateReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, pMsg, pTq->pVnode->restored);
68✔
1354
}
1355

1356
int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) {
×
1357
  return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg->pCont);
×
1358
}
1359

1360
int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg) {
×
1361
  int32_t vgId = TD_VID(pTq->pVnode);
×
1362

1363
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
×
1364
    SRetrieveChkptTriggerReq req = {0};
×
1365

1366
    char*    msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
×
1367
    int32_t  len = pMsg->contLen - sizeof(SMsgHead);
×
1368
    SDecoder decoder = {0};
×
1369

1370
    tDecoderInit(&decoder, (uint8_t*)msg, len);
×
1371
    if (tDecodeRetrieveChkptTriggerReq(&decoder, &req) < 0) {
×
1372
      tDecoderClear(&decoder);
×
1373
      tqError("vgId:%d invalid retrieve checkpoint-trigger req received", vgId);
×
1374
      return TSDB_CODE_INVALID_MSG;
×
1375
    }
1376
    tDecoderClear(&decoder);
×
1377

1378
    tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from s-task:0x%" PRId64, vgId,
×
1379
            req.downstreamTaskId);
1380
    return TSDB_CODE_STREAM_NOT_LEADER;
×
1381
  }
1382

1383
  return tqStreamTaskProcessRetrieveTriggerReq(pTq->pStreamMeta, pMsg);
×
1384
}
1385

1386
int32_t tqProcessTaskRetrieveTriggerRsp(STQ* pTq, SRpcMsg* pMsg) {
×
1387
  return tqStreamTaskProcessRetrieveTriggerRsp(pTq->pStreamMeta, pMsg);
×
1388
}
1389

1390
// this function is needed, do not try to remove it.
1391
int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg) { return tqStreamProcessStreamHbRsp(pTq->pStreamMeta, pMsg); }
23,456✔
1392

1393
int32_t tqProcessStreamReqCheckpointRsp(STQ* pTq, SRpcMsg* pMsg) {
4,354✔
1394
  return tqStreamProcessReqCheckpointRsp(pTq->pStreamMeta, pMsg);
4,354✔
1395
}
1396

1397
int32_t tqProcessTaskCheckpointReadyRsp(STQ* pTq, SRpcMsg* pMsg) {
7,551✔
1398
  return tqStreamProcessCheckpointReadyRsp(pTq->pStreamMeta, pMsg);
7,551✔
1399
}
1400

1401
int32_t tqProcessTaskChkptReportRsp(STQ* pTq, SRpcMsg* pMsg) {
5,840✔
1402
  return tqStreamProcessChkptReportRsp(pTq->pStreamMeta, pMsg);
5,840✔
1403
}
1404

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc