• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3572

02 Jan 2025 08:57AM UTC coverage: 63.077% (-0.2%) from 63.276%
#3572

push

travis-ci

web-flow
Merge pull request #29450 from taosdata/fix/TS-5651-skip-sync-heartbeat

fix:[TS-5651]skip-sync-heartbeat

139525 of 284348 branches covered (49.07%)

Branch coverage included in aggregate %.

217427 of 281548 relevant lines covered (77.23%)

18571459.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

63.94
/source/dnode/vnode/src/tq/tq.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#include "tq.h"
17
#include "osDef.h"
18
#include "taoserror.h"
19
#include "tqCommon.h"
20
#include "tstream.h"
21
#include "vnd.h"
22

23
// 0: not init
24
// 1: already inited
25
// 2: wait to be inited or cleanup
26
static int32_t tqInitialize(STQ* pTq);
27

28
static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return pHandle != NULL ? TMQ_HANDLE_STATUS_EXEC == pHandle->status : true; }
61,575!
29
static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) { if (pHandle != NULL) pHandle->status = TMQ_HANDLE_STATUS_EXEC; }
58,996✔
30
static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) { if (pHandle != NULL) pHandle->status = TMQ_HANDLE_STATUS_IDLE; }
59,006✔
31

32
void tqDestroyTqHandle(void* data) {
1,663✔
33
  if (data == NULL) return;
1,663!
34
  STqHandle* pData = (STqHandle*)data;
1,663✔
35
  qDestroyTask(pData->execHandle.task);
1,663✔
36

37
  if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
1,662✔
38
    taosMemoryFreeClear(pData->execHandle.execCol.qmsg);
1,298!
39
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) {
364✔
40
    tqReaderClose(pData->execHandle.pTqReader);
277✔
41
    walCloseReader(pData->pWalReader);
277✔
42
    taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid);
277✔
43
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
87!
44
    walCloseReader(pData->pWalReader);
87✔
45
    tqReaderClose(pData->execHandle.pTqReader);
87✔
46
    taosMemoryFreeClear(pData->execHandle.execTb.qmsg);
87!
47
    nodesDestroyNode(pData->execHandle.execTb.node);
87✔
48
  }
49
  if (pData->msg != NULL) {
1,662!
50
    rpcFreeCont(pData->msg->pCont);
×
51
    taosMemoryFree(pData->msg);
×
52
    pData->msg = NULL;
×
53
  }
54
  if (pData->block != NULL) {
1,662!
55
    blockDataDestroy(pData->block);
×
56
  }
57
  if (pData->pRef) {
1,662✔
58
    walCloseRef(pData->pRef->pWal, pData->pRef->refId);
1,616✔
59
  }
60
}
61

62
static bool tqOffsetEqual(const STqOffset* pLeft, const STqOffset* pRight) {
4,075✔
63
  if (pLeft == NULL || pRight == NULL) {
4,075!
64
    return false;
×
65
  }
66
  return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
7,791✔
67
         pLeft->val.version == pRight->val.version;
3,716✔
68
}
69

70
int32_t tqOpen(const char* path, SVnode* pVnode) {
11,626✔
71
  if (path == NULL || pVnode == NULL) {
11,626!
72
    return TSDB_CODE_INVALID_PARA;
×
73
  }
74
  STQ* pTq = taosMemoryCalloc(1, sizeof(STQ));
11,637!
75
  if (pTq == NULL) {
11,634!
76
    return terrno;
×
77
  }
78
  pVnode->pTq = pTq;
11,634✔
79
  pTq->path = taosStrdup(path);
11,634!
80
  if (pTq->path == NULL) {
11,628!
81
    return terrno;
×
82
  }
83
  pTq->pVnode = pVnode;
11,628✔
84

85
  pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
11,628✔
86
  if (pTq->pHandle == NULL) {
11,633!
87
    return terrno;
×
88
  }
89
  taosHashSetFreeFp(pTq->pHandle, tqDestroyTqHandle);
11,633✔
90

91
  taosInitRWLatch(&pTq->lock);
11,633✔
92

93
  pTq->pPushMgr = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK);
11,631✔
94
  if (pTq->pPushMgr == NULL) {
11,628!
95
    return terrno;
×
96
  }
97

98
  pTq->pCheckInfo = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
11,628✔
99
  if (pTq->pCheckInfo == NULL) {
11,634!
100
    return terrno;
×
101
  }
102
  taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo);
11,634✔
103

104
  pTq->pOffset = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_ENTRY_LOCK);
11,634✔
105
  if (pTq->pOffset == NULL) {
11,634!
106
    return terrno;
×
107
  }
108
  taosHashSetFreeFp(pTq->pOffset, (FDelete)tDeleteSTqOffset);
11,634✔
109

110
  return tqInitialize(pTq);
11,634✔
111
}
112

113
int32_t tqInitialize(STQ* pTq) {
11,634✔
114
  if (pTq == NULL) {
11,634!
115
    return TSDB_CODE_INVALID_PARA;
×
116
  }
117
  int32_t vgId = TD_VID(pTq->pVnode);
11,634✔
118
  int32_t code = streamMetaOpen(pTq->path, pTq, tqBuildStreamTask, tqExpandStreamTask, vgId, -1,
11,634✔
119
                                tqStartTaskCompleteCallback, &pTq->pStreamMeta);
120
  if (code != TSDB_CODE_SUCCESS) {
11,637!
121
    return code;
×
122
  }
123

124
  streamMetaLoadAllTasks(pTq->pStreamMeta);
11,637✔
125
  return tqMetaOpen(pTq);
11,637✔
126
}
127

128
void tqClose(STQ* pTq) {
11,638✔
129
  qDebug("start to close tq");
11,638✔
130
  if (pTq == NULL) {
11,638!
131
    return;
×
132
  }
133

134
  void* pIter = taosHashIterate(pTq->pPushMgr, NULL);
11,638✔
135
  while (pIter) {
11,669✔
136
    STqHandle* pHandle = *(STqHandle**)pIter;
30✔
137
    int32_t    vgId = TD_VID(pTq->pVnode);
30✔
138

139
    if (pHandle->msg != NULL) {
30!
140
      tqPushEmptyDataRsp(pHandle, vgId);
30✔
141
      rpcFreeCont(pHandle->msg->pCont);
30✔
142
      taosMemoryFree(pHandle->msg);
30!
143
      pHandle->msg = NULL;
30✔
144
    }
145
    pIter = taosHashIterate(pTq->pPushMgr, pIter);
30✔
146
  }
147

148
  taosHashCleanup(pTq->pHandle);
11,639✔
149
  taosHashCleanup(pTq->pPushMgr);
11,639✔
150
  taosHashCleanup(pTq->pCheckInfo);
11,639✔
151
  taosHashCleanup(pTq->pOffset);
11,639✔
152
  taosMemoryFree(pTq->path);
11,639!
153
  tqMetaClose(pTq);
11,639✔
154
  qDebug("vgId:%d end to close tq", pTq->pStreamMeta != NULL ? pTq->pStreamMeta->vgId : -1);
11,637!
155
  streamMetaClose(pTq->pStreamMeta);
11,637✔
156
  taosMemoryFree(pTq);
11,639!
157
}
158

159
void tqNotifyClose(STQ* pTq) {
11,637✔
160
  if (pTq == NULL) {
11,637!
161
    return;
×
162
  }
163
  streamMetaNotifyClose(pTq->pStreamMeta);
11,637✔
164
}
165

166
void tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId) {
671✔
167
  if (pHandle == NULL) {
671!
168
    return;
×
169
  }
170
  int32_t    code = 0;
671✔
171
  SMqPollReq req = {0};
671✔
172
  code = tDeserializeSMqPollReq(pHandle->msg->pCont, pHandle->msg->contLen, &req);
671✔
173
  if (code < 0) {
671!
174
    tqError("tDeserializeSMqPollReq %d failed, code:%d", pHandle->msg->contLen, code);
×
175
    return;
×
176
  }
177

178
  SMqDataRsp dataRsp = {0};
671✔
179
  code = tqInitDataRsp(&dataRsp, req.reqOffset);
671✔
180
  if (code != 0) {
671!
181
    tqError("tqInitDataRsp failed, code:%d", code);
×
182
    return;
×
183
  }
184
  dataRsp.blockNum = 0;
671✔
185
  char buf[TSDB_OFFSET_LEN] = {0};
671✔
186
  (void)tFormatOffset(buf, TSDB_OFFSET_LEN, &dataRsp.reqOffset);
671✔
187
  tqInfo("tqPushEmptyDataRsp to consumer:0x%" PRIx64 " vgId:%d, offset:%s,QID:0x%" PRIx64, req.consumerId, vgId, buf,
671!
188
         req.reqId);
189

190
  code = tqSendDataRsp(pHandle, pHandle->msg, &req, &dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
671✔
191
  if (code != 0) {
671!
192
    tqError("tqSendDataRsp failed, code:%d", code);
×
193
  }
194
  tDeleteMqDataRsp(&dataRsp);
671✔
195
}
196

197
int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type,
39,009✔
198
                      int32_t vgId) {
199
  if (pHandle == NULL || pMsg == NULL || pReq == NULL || pRsp == NULL) {
39,009!
200
    return TSDB_CODE_INVALID_PARA;
×
201
  }
202
  int64_t sver = 0, ever = 0;
39,009✔
203
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
39,009✔
204

205
  char buf1[TSDB_OFFSET_LEN] = {0};
39,009✔
206
  char buf2[TSDB_OFFSET_LEN] = {0};
39,009✔
207
  (void)tFormatOffset(buf1, TSDB_OFFSET_LEN, &(pRsp->reqOffset));
39,009✔
208
  (void)tFormatOffset(buf2, TSDB_OFFSET_LEN, &(pRsp->rspOffset));
39,008✔
209

210
  tqDebug("tmq poll vgId:%d consumer:0x%" PRIx64 " (epoch %d) start to send rsp, block num:%d, req:%s, rsp:%s,QID:0x%" PRIx64,
39,009!
211
          vgId, pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId);
212

213
  return tqDoSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type, sver, ever);
39,009✔
214
}
215

216
int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
5,039✔
217
  if (pTq == NULL) {
5,039!
218
    return TSDB_CODE_INVALID_PARA;
×
219
  }
220
  SMqVgOffset vgOffset = {0};
5,039✔
221
  int32_t     vgId = TD_VID(pTq->pVnode);
5,039✔
222

223
  int32_t  code = 0;
5,039✔
224
  SDecoder decoder;
225
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
5,039✔
226
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
5,051!
227
    code = TSDB_CODE_INVALID_MSG;
×
228
    goto end;
×
229
  }
230

231
  tDecoderClear(&decoder);
5,044✔
232

233
  STqOffset* pOffset = &vgOffset.offset;
5,037✔
234

235
  if (pOffset->val.type == TMQ_OFFSET__SNAPSHOT_DATA || pOffset->val.type == TMQ_OFFSET__SNAPSHOT_META) {
5,037✔
236
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:snapshot) uid:%" PRId64 ", ts:%" PRId64,
428!
237
            pOffset->subKey, vgId, pOffset->val.uid, pOffset->val.ts);
238
  } else if (pOffset->val.type == TMQ_OFFSET__LOG) {
4,609✔
239
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:log) version:%" PRId64, pOffset->subKey, vgId,
4,606✔
240
            pOffset->val.version);
241
  } else {
242
    tqError("invalid commit offset type:%d", pOffset->val.type);
3!
243
    code = TSDB_CODE_INVALID_MSG;
×
244
    goto end;
×
245
  }
246

247
  STqOffset* pSavedOffset = NULL;
5,047✔
248
  code = tqMetaGetOffset(pTq, pOffset->subKey, &pSavedOffset);
5,047✔
249
  if (code == 0 && tqOffsetEqual(pOffset, pSavedOffset)) {
5,053✔
250
    tqInfo("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64,
4!
251
           vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version);
252
    goto end;  // no need to update the offset value
4✔
253
  }
254

255
  // save the new offset value
256
  if (taosHashPut(pTq->pOffset, pOffset->subKey, strlen(pOffset->subKey), pOffset, sizeof(STqOffset))) {
5,049!
257
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
258
    return -1;
×
259
  }
260

261
  if (tqMetaSaveInfo(pTq, pTq->pOffsetStore, pOffset->subKey, strlen(pOffset->subKey), msg,
5,049!
262
                     msgLen >= sizeof(vgOffset.consumerId) ? msgLen - sizeof(vgOffset.consumerId) : 0) < 0) {
263
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
264
    return -1;
×
265
  }
266

267
  return 0;
5,047✔
268
end:
4✔
269
  tOffsetDestroy(&vgOffset.offset.val);
4✔
270
  return code;
4✔
271
}
272

273
int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg) {
24✔
274
  if (pTq == NULL || pMsg == NULL) {
24!
275
    return TSDB_CODE_INVALID_PARA;
×
276
  }
277
  SMqSeekReq req = {0};
24✔
278
  int32_t    vgId = TD_VID(pTq->pVnode);
24✔
279
  SRpcMsg    rsp = {.info = pMsg->info};
24✔
280
  int        code = 0;
24✔
281

282
  if (tDeserializeSMqSeekReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
24!
283
    code = TSDB_CODE_OUT_OF_MEMORY;
×
284
    goto end;
×
285
  }
286

287
  tqDebug("tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s", req.consumerId, vgId, req.subKey);
24!
288
  taosWLockLatch(&pTq->lock);
24✔
289

290
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
24✔
291
  if (pHandle == NULL) {
24!
292
    tqWarn("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", req.consumerId, vgId, req.subKey);
×
293
    code = 0;
×
294
    taosWUnLockLatch(&pTq->lock);
×
295
    goto end;
×
296
  }
297

298
  // 2. check consumer-vg assignment status
299
  if (pHandle->consumerId != req.consumerId) {
24!
300
    tqError("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
×
301
            req.consumerId, vgId, req.subKey, pHandle->consumerId);
302
    taosWUnLockLatch(&pTq->lock);
×
303
    code = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
×
304
    goto end;
×
305
  }
306

307
  // if consumer register to push manager, push empty to consumer to change vg status from TMQ_VG_STATUS__WAIT to
308
  // TMQ_VG_STATUS__IDLE, otherwise poll data failed after seek.
309
  tqUnregisterPushHandle(pTq, pHandle);
24✔
310
  taosWUnLockLatch(&pTq->lock);
24✔
311

312
end:
24✔
313
  rsp.code = code;
24✔
314
  tmsgSendRsp(&rsp);
24✔
315
  return 0;
24✔
316
}
317

318
int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) {
138✔
319
  if (pTq == NULL) {
138!
320
    return TSDB_CODE_INVALID_PARA;
×
321
  }
322
  void* pIter = NULL;
138✔
323

324
  while (1) {
11✔
325
    pIter = taosHashIterate(pTq->pCheckInfo, pIter);
149✔
326
    if (pIter == NULL) {
149✔
327
      break;
103✔
328
    }
329

330
    STqCheckInfo* pCheck = (STqCheckInfo*)pIter;
46✔
331

332
    if (pCheck->ntbUid == tbUid) {
46!
333
      int32_t sz = taosArrayGetSize(pCheck->colIdList);
46✔
334
      for (int32_t i = 0; i < sz; i++) {
168✔
335
        int16_t* pForbidColId = taosArrayGet(pCheck->colIdList, i);
157✔
336
        if (pForbidColId == NULL) {
157!
337
          continue;
×
338
        }
339

340
        if ((*pForbidColId) == colId) {
157✔
341
          taosHashCancelIterate(pTq->pCheckInfo, pIter);
35✔
342
          return -1;
35✔
343
        }
344
      }
345
    }
346
  }
347

348
  return 0;
103✔
349
}
350

351
int32_t tqProcessPollPush(STQ* pTq, SRpcMsg* pMsg) {
18,043✔
352
  if (pTq == NULL) {
18,043!
353
    return TSDB_CODE_INVALID_PARA;
×
354
  }
355
  int32_t vgId = TD_VID(pTq->pVnode);
18,043✔
356
  taosWLockLatch(&pTq->lock);
18,043✔
357
  if (taosHashGetSize(pTq->pPushMgr) > 0) {
18,043!
358
    void* pIter = taosHashIterate(pTq->pPushMgr, NULL);
18,043✔
359

360
    while (pIter) {
36,548✔
361
      STqHandle* pHandle = *(STqHandle**)pIter;
18,505✔
362
      tqDebug("vgId:%d start set submit for pHandle:%p, consumer:0x%" PRIx64, vgId, pHandle, pHandle->consumerId);
18,505!
363

364
      if (pHandle->msg == NULL) {
18,505!
365
        tqError("pHandle->msg should not be null");
×
366
        taosHashCancelIterate(pTq->pPushMgr, pIter);
×
367
        break;
×
368
      } else {
369
        SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME,
18,505✔
370
                       .pCont = pHandle->msg->pCont,
18,505✔
371
                       .contLen = pHandle->msg->contLen,
18,505✔
372
                       .info = pHandle->msg->info};
18,505✔
373
        if (tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg) != 0){
18,505!
374
          tqError("vgId:%d tmsgPutToQueue failed, consumer:0x%" PRIx64, vgId, pHandle->consumerId);
×
375
        }
376
        taosMemoryFree(pHandle->msg);
18,505!
377
        pHandle->msg = NULL;
18,505✔
378
      }
379

380
      pIter = taosHashIterate(pTq->pPushMgr, pIter);
18,505✔
381
    }
382

383
    taosHashClear(pTq->pPushMgr);
18,043✔
384
  }
385
  taosWUnLockLatch(&pTq->lock);
18,043✔
386
  return 0;
18,043✔
387
}
388

389
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
59,024✔
390
  if (pTq == NULL || pMsg == NULL) {
59,024!
391
    return TSDB_CODE_INVALID_PARA;
×
392
  }
393
  SMqPollReq req = {0};
59,027✔
394
  int        code = tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req);
59,027✔
395
  if (code < 0) {
59,015!
396
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
×
397
    terrno = TSDB_CODE_INVALID_MSG;
×
398
    goto END;
×
399
  }
400

401
  int64_t      consumerId = req.consumerId;
59,029✔
402
  int32_t      reqEpoch = req.epoch;
59,029✔
403
  STqOffsetVal reqOffset = req.reqOffset;
59,029✔
404
  int32_t      vgId = TD_VID(pTq->pVnode);
59,029✔
405
  STqHandle*   pHandle = NULL;
59,029✔
406

407
  while (1) {
14✔
408
    taosWLockLatch(&pTq->lock);
59,043✔
409
    // 1. find handle
410
    code = tqMetaGetHandle(pTq, req.subKey, &pHandle);
59,050✔
411
    if (code != TDB_CODE_SUCCESS) {
59,031✔
412
      tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey);
16!
413
      terrno = TSDB_CODE_INVALID_MSG;
16✔
414
      taosWUnLockLatch(&pTq->lock);
16✔
415
      return -1;
19✔
416
    }
417

418
    // 2. check rebalance status
419
    if (pHandle->consumerId != consumerId) {
59,015✔
420
      tqError("ERROR tmq poll: consumer:0x%" PRIx64
10!
421
              " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
422
              consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId);
423
      terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
10✔
424
      taosWUnLockLatch(&pTq->lock);
10✔
425
      code = -1;
10✔
426
      goto END;
10✔
427
    }
428

429
    bool exec = tqIsHandleExec(pHandle);
59,005!
430
    if (!exec) {
59,005✔
431
      tqSetHandleExec(pHandle);
58,996!
432
      //      qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS);
433
      tqDebug("tmq poll: consumer:0x%" PRIx64 " vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId,
58,996!
434
              req.subKey, pHandle);
435
      taosWUnLockLatch(&pTq->lock);
59,007✔
436
      break;
59,011✔
437
    }
438
    taosWUnLockLatch(&pTq->lock);
9✔
439

440
    tqDebug("tmq poll: consumer:0x%" PRIx64
14!
441
            " vgId:%d, topic:%s, subscription is executing, wait for 10ms and retry, pHandle:%p",
442
            consumerId, vgId, req.subKey, pHandle);
443
    taosMsleep(10);
14✔
444
  }
445

446
  // 3. update the epoch value
447
  if (pHandle->epoch < reqEpoch) {
59,011✔
448
    tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, pHandle->epoch,
1,402!
449
            reqEpoch);
450
    pHandle->epoch = reqEpoch;
1,402✔
451
  }
452

453
  char buf[TSDB_OFFSET_LEN] = {0};
59,011✔
454
  (void)tFormatOffset(buf, TSDB_OFFSET_LEN, &reqOffset);
59,011✔
455
  tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s,QID:0x%" PRIx64,
59,010!
456
          consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);
457

458
  code = tqExtractDataForMq(pTq, pHandle, &req, pMsg);
59,010✔
459
  tqSetHandleIdle(pHandle);
59,006!
460

461
  tqDebug("tmq poll: consumer:0x%" PRIx64 " vgId:%d, topic:%s, set handle idle, pHandle:%p", consumerId, vgId,
59,006!
462
          req.subKey, pHandle);
463

464
END:
×
465
  tDestroySMqPollReq(&req);
59,019✔
466
  return code;
59,021✔
467
}
468

469
int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg) {
2✔
470
  if (pTq == NULL || pMsg == NULL) {
2!
471
    return TSDB_CODE_INVALID_PARA;
×
472
  }
473
  void*   data = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
2✔
474
  int32_t len = pMsg->contLen - sizeof(SMsgHead);
2✔
475

476
  SMqVgOffset vgOffset = {0};
2✔
477

478
  SDecoder decoder;
479
  tDecoderInit(&decoder, (uint8_t*)data, len);
2✔
480
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
2!
481
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
482
    return terrno;
×
483
  }
484

485
  tDecoderClear(&decoder);
2✔
486

487
  STqOffset* pSavedOffset = NULL;
2✔
488
  int32_t    code = tqMetaGetOffset(pTq, vgOffset.offset.subKey, &pSavedOffset);
2✔
489
  if (code != 0) {
2✔
490
    return TSDB_CODE_TMQ_NO_COMMITTED;
1✔
491
  }
492
  vgOffset.offset = *pSavedOffset;
1✔
493

494
  tEncodeSize(tEncodeMqVgOffset, &vgOffset, len, code);
1!
495
  if (code < 0) {
1!
496
    return TAOS_GET_TERRNO(TSDB_CODE_INVALID_PARA);
×
497
  }
498

499
  void* buf = rpcMallocCont(len);
1✔
500
  if (buf == NULL) {
1!
501
    return terrno;
×
502
  }
503
  SEncoder encoder = {0};
1✔
504
  tEncoderInit(&encoder, buf, len);
1✔
505
  code = tEncodeMqVgOffset(&encoder, &vgOffset);
1✔
506
  tEncoderClear(&encoder);
1✔
507
  if (code < 0) {
1!
508
    rpcFreeCont(buf);
×
509
    return TAOS_GET_TERRNO(TSDB_CODE_INVALID_PARA);
×
510
  }
511

512
  SRpcMsg rsp = {.info = pMsg->info, .pCont = buf, .contLen = len, .code = 0};
1✔
513

514
  tmsgSendRsp(&rsp);
1✔
515
  return 0;
1✔
516
}
517

518
int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) {
13✔
519
  if (pTq == NULL || pMsg == NULL) {
13!
520
    return TSDB_CODE_INVALID_PARA;
×
521
  }
522
  int32_t    code = 0;
13✔
523
  SMqPollReq req = {0};
13✔
524
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
13!
525
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
×
526
    return TSDB_CODE_INVALID_MSG;
×
527
  }
528

529
  int64_t      consumerId = req.consumerId;
13✔
530
  STqOffsetVal reqOffset = req.reqOffset;
13✔
531
  int32_t      vgId = TD_VID(pTq->pVnode);
13✔
532

533
  // 1. find handle
534
  taosRLockLatch(&pTq->lock);
13✔
535
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
13✔
536
  if (pHandle == NULL) {
13!
537
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s not found", consumerId, vgId, req.subKey);
×
538
    taosRUnLockLatch(&pTq->lock);
×
539
    return TSDB_CODE_INVALID_MSG;
×
540
  }
541

542
  // 2. check rebalance status
543
  if (pHandle->consumerId != consumerId) {
13!
544
    tqDebug("ERROR consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
×
545
            consumerId, vgId, req.subKey, pHandle->consumerId);
546
    taosRUnLockLatch(&pTq->lock);
×
547
    return TSDB_CODE_TMQ_CONSUMER_MISMATCH;
×
548
  }
549

550
  int64_t sver = 0, ever = 0;
13✔
551
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
13✔
552
  taosRUnLockLatch(&pTq->lock);
13✔
553

554
  SMqDataRsp dataRsp = {0};
13✔
555
  code = tqInitDataRsp(&dataRsp, req.reqOffset);
13✔
556
  if (code != 0) {
13!
557
    return code;
×
558
  }
559

560
  if (req.useSnapshot == true) {
13!
561
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s snapshot not support wal info", consumerId, vgId, req.subKey);
×
562
    code = TSDB_CODE_INVALID_PARA;
×
563
    goto END;
×
564
  }
565

566
  dataRsp.rspOffset.type = TMQ_OFFSET__LOG;
13✔
567

568
  if (reqOffset.type == TMQ_OFFSET__LOG) {
13✔
569
    dataRsp.rspOffset.version = reqOffset.version;
3✔
570
  } else if (reqOffset.type < 0) {
10!
571
    STqOffset* pOffset = NULL;
10✔
572
    code = tqMetaGetOffset(pTq, req.subKey, &pOffset);
10✔
573
    if (code == 0) {
10✔
574
      if (pOffset->val.type != TMQ_OFFSET__LOG) {
1!
575
        tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s, no valid wal info", consumerId, vgId, req.subKey);
×
576
        code = TSDB_CODE_INVALID_PARA;
×
577
        goto END;
×
578
      }
579

580
      dataRsp.rspOffset.version = pOffset->val.version;
1✔
581
      tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from store:%" PRId64, consumerId, vgId,
1!
582
             req.subKey, dataRsp.rspOffset.version);
583
    } else {
584
      if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEST) {
9✔
585
        dataRsp.rspOffset.version = sver;  // not consume yet, set the earliest position
8✔
586
      } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
1!
587
        dataRsp.rspOffset.version = ever;
1✔
588
      }
589
      tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from init:%" PRId64, consumerId, vgId, req.subKey,
9!
590
             dataRsp.rspOffset.version);
591
    }
592
  } else {
593
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s invalid offset type:%d", consumerId, vgId, req.subKey,
×
594
            reqOffset.type);
595
    code = TSDB_CODE_INVALID_PARA;
×
596
    goto END;
×
597
  }
598

599
  code = tqDoSendDataRsp(&pMsg->info, &dataRsp, req.epoch, req.consumerId, TMQ_MSG_TYPE__WALINFO_RSP, sver, ever);
13✔
600

601
END:
13✔
602
  tDeleteMqDataRsp(&dataRsp);
13✔
603
  return code;
13✔
604
}
605

606
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
877✔
607
  if (pTq == NULL || msg == NULL) {
877!
608
    return TSDB_CODE_INVALID_PARA;
×
609
  }
610
  SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg;
878✔
611
  int32_t        vgId = TD_VID(pTq->pVnode);
878✔
612

613
  tqInfo("vgId:%d, tq process delete sub req %s", vgId, pReq->subKey);
878!
614
  int32_t code = 0;
878✔
615

616
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
878✔
617
  if (pHandle) {
879✔
618
    while (1) {
×
619
      taosWLockLatch(&pTq->lock);
876✔
620
      bool exec = tqIsHandleExec(pHandle);
876✔
621

622
      if (exec) {
876!
623
        tqInfo("vgId:%d, topic:%s, subscription is executing, delete wait for 10ms and retry, pHandle:%p", vgId,
×
624
               pHandle->subKey, pHandle);
625
        taosWUnLockLatch(&pTq->lock);
×
626
        taosMsleep(10);
×
627
        continue;
×
628
      }
629
      tqUnregisterPushHandle(pTq, pHandle);
876✔
630
      code = taosHashRemove(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
876✔
631
      if (code != 0) {
876!
632
        tqError("cannot process tq delete req %s, since no such handle", pReq->subKey);
×
633
      }
634
      taosWUnLockLatch(&pTq->lock);
876✔
635
      break;
876✔
636
    }
637
  }
638

639
  taosWLockLatch(&pTq->lock);
879✔
640
  if (taosHashRemove(pTq->pOffset, pReq->subKey, strlen(pReq->subKey)) != 0) {
879✔
641
    tqError("cannot process tq delete req %s, since no such offset in hash", pReq->subKey);
259!
642
  }
643
  if (tqMetaDeleteInfo(pTq, pTq->pOffsetStore, pReq->subKey, strlen(pReq->subKey)) != 0) {
879✔
644
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
256!
645
  }
646

647
  if (tqMetaDeleteInfo(pTq, pTq->pExecStore, pReq->subKey, strlen(pReq->subKey)) < 0) {
878!
648
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
×
649
  }
650
  taosWUnLockLatch(&pTq->lock);
877✔
651

652
  return 0;
879✔
653
}
654

655
int32_t tqProcessAddCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
122✔
656
  if (pTq == NULL || msg == NULL) {
122!
657
    return TSDB_CODE_INVALID_PARA;
×
658
  }
659
  STqCheckInfo info = {0};
122✔
660
  int32_t      code = tqMetaDecodeCheckInfo(&info, msg, msgLen >= 0 ? msgLen : 0);
122✔
661
  if (code != 0) {
122!
662
    return code;
×
663
  }
664

665
  code = taosHashPut(pTq->pCheckInfo, info.topic, strlen(info.topic), &info, sizeof(STqCheckInfo));
122✔
666
  if (code != 0) {
122!
667
    tDeleteSTqCheckInfo(&info);
×
668
    return code;
×
669
  }
670

671
  return tqMetaSaveInfo(pTq, pTq->pCheckStore, info.topic, strlen(info.topic), msg, msgLen >= 0 ? msgLen : 0);
122✔
672
}
673

674
int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
14✔
675
  if (pTq == NULL || msg == NULL) {
14!
676
    return TSDB_CODE_INVALID_PARA;
×
677
  }
678
  if (taosHashRemove(pTq->pCheckInfo, msg, strlen(msg)) < 0) {
14✔
679
    return TSDB_CODE_TSC_INTERNAL_ERROR;
2✔
680
  }
681
  return tqMetaDeleteInfo(pTq, pTq->pCheckStore, msg, strlen(msg));
12✔
682
}
683

684
int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
3,119✔
685
  if (pTq == NULL || msg == NULL) {
3,119!
686
    return TSDB_CODE_INVALID_PARA;
×
687
  }
688
  int         ret = 0;
3,124✔
689
  SMqRebVgReq req = {0};
3,124✔
690
  SDecoder    dc = {0};
3,124✔
691

692
  tDecoderInit(&dc, (uint8_t*)msg, msgLen);
3,124✔
693
  ret = tDecodeSMqRebVgReq(&dc, &req);
3,119✔
694
  if (ret < 0) {
3,121!
695
    goto end;
×
696
  }
697

698
  tqInfo("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey,
3,121!
699
         req.oldConsumerId, req.newConsumerId);
700

701
  taosRLockLatch(&pTq->lock);
3,125✔
702
  STqHandle* pHandle = NULL;
3,126✔
703
  int32_t code = tqMetaGetHandle(pTq, req.subKey, &pHandle);
3,126✔
704
  if (code != 0){
3,126✔
705
    tqInfo("vgId:%d, tq process sub req:%s, no such handle, create new one", pTq->pVnode->config.vgId, req.subKey);
1,432!
706
  }
707
  taosRUnLockLatch(&pTq->lock);
3,126✔
708
  if (pHandle == NULL) {
3,126✔
709
    if (req.oldConsumerId != -1) {
1,432✔
710
      tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId:0x%" PRIx64,
2!
711
              req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId);
712
    }
713
    if (req.newConsumerId == -1) {
1,432✔
714
      tqError("vgId:%d, tq invalid rebalance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
1!
715
      ret = TSDB_CODE_INVALID_PARA;
1✔
716
      goto end;
1✔
717
    }
718
    STqHandle handle = {0};
1,431✔
719
    ret = tqMetaCreateHandle(pTq, &req, &handle);
1,431✔
720
    if (ret < 0) {
1,431!
721
      tqDestroyTqHandle(&handle);
×
722
      goto end;
×
723
    }
724
    taosWLockLatch(&pTq->lock);
1,431✔
725
    ret = tqMetaSaveHandle(pTq, req.subKey, &handle);
1,431✔
726
    taosWUnLockLatch(&pTq->lock);
1,430✔
727
  } else {
728
    while (1) {
×
729
      taosWLockLatch(&pTq->lock);
1,694✔
730
      bool exec = tqIsHandleExec(pHandle);
1,694!
731
      if (exec) {
1,694!
732
        tqInfo("vgId:%d, topic:%s, subscription is executing, sub wait for 10ms and retry, pHandle:%p",
×
733
               pTq->pVnode->config.vgId, pHandle->subKey, pHandle);
734
        taosWUnLockLatch(&pTq->lock);
×
735
        taosMsleep(10);
×
736
        continue;
×
737
      }
738
      if (pHandle->consumerId == req.newConsumerId) {  // do nothing
1,694✔
739
        tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId);
102!
740
      } else {
741
        tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId,
1,592!
742
               req.newConsumerId);
743

744
        atomic_store_64(&pHandle->consumerId, req.newConsumerId);
1,591✔
745
        atomic_store_32(&pHandle->epoch, 0);
1,592✔
746
        tqUnregisterPushHandle(pTq, pHandle);
1,592✔
747
        ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
1,592✔
748
      }
749
      taosWUnLockLatch(&pTq->lock);
1,694✔
750
      break;
1,694✔
751
    }
752
  }
753

754
end:
3,126✔
755
  tDecoderClear(&dc);
3,126✔
756
  return ret;
3,125✔
757
}
758

759
static void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); }
44,103!
760

761
int32_t tqBuildStreamTask(void* pTqObj, SStreamTask* pTask, int64_t nextProcessVer) {
14,822✔
762
  STQ*             pTq = (STQ*)pTqObj;
14,822✔
763
  int32_t          vgId = TD_VID(pTq->pVnode);
14,822✔
764
  SCheckpointInfo* pChkInfo = NULL;
14,822✔
765

766
  tqDebug("s-task:0x%x start to build task", pTask->id.taskId);
14,822✔
767

768
  int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, nextProcessVer);
14,822✔
769
  if (code != TSDB_CODE_SUCCESS) {
14,827!
770
    return code;
×
771
  }
772

773
  pTask->pBackend = NULL;
14,827✔
774

775
  // sink
776
  STaskOutputInfo* pOutputInfo = &pTask->outputInfo;
14,827✔
777
  if (pOutputInfo->type == TASK_OUTPUT__SMA) {
14,827✔
778
    pOutputInfo->smaSink.vnode = pTq->pVnode;
60✔
779
    pOutputInfo->smaSink.smaSink = smaHandleRes;
60✔
780
  } else if (pOutputInfo->type == TASK_OUTPUT__TABLE) {
14,767✔
781
    pOutputInfo->tbSink.vnode = pTq->pVnode;
7,304✔
782
    pOutputInfo->tbSink.tbSinkFunc = tqSinkDataIntoDstTable;
7,304✔
783

784
    int32_t   ver1 = 1;
7,304✔
785
    SMetaInfo info = {0};
7,304✔
786
    code = metaGetInfo(pTq->pVnode->pMeta, pOutputInfo->tbSink.stbUid, &info, NULL);
7,304✔
787
    if (code == TSDB_CODE_SUCCESS) {
7,302✔
788
      ver1 = info.skmVer;
6,769✔
789
    }
790

791
    SSchemaWrapper* pschemaWrapper = pOutputInfo->tbSink.pSchemaWrapper;
7,302✔
792
    pOutputInfo->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1);
7,302✔
793
    if (pOutputInfo->tbSink.pTSchema == NULL) {
7,314!
794
      return terrno;
×
795
    }
796

797
    pOutputInfo->tbSink.pTbInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
7,314✔
798
    if (pOutputInfo->tbSink.pTbInfo == NULL) {
7,314!
799
      tqError("vgId:%d failed init sink tableInfo, code:%s", vgId, tstrerror(terrno));
×
800
      return terrno;
×
801
    }
802

803
    tSimpleHashSetFreeFp(pOutputInfo->tbSink.pTbInfo, freePtr);
7,314✔
804
  }
805

806
  if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
14,836✔
807
    bool scanDropCtb = pTask->subtableWithoutMd5 ? true : false;
7,515✔
808
    SWalFilterCond cond = {.deleteMsg = 1, .scanDropCtb = scanDropCtb};  // delete msg also extract from wal files
7,515✔
809
    pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond, pTask->id.taskId);
7,515✔
810
    if (pTask->exec.pWalReader == NULL) {
7,513!
811
      tqError("vgId:%d failed init wal reader, code:%s", vgId, tstrerror(terrno));
×
812
      return terrno;
×
813
    }
814
  }
815

816
  streamTaskResetUpstreamStageInfo(pTask);
14,834✔
817

818
  pChkInfo = &pTask->chkInfo;
14,835✔
819
  tqSetRestoreVersionInfo(pTask);
14,835✔
820

821
  char*       p = streamTaskGetStatus(pTask).name;
14,833✔
822
  const char* pNext = streamTaskGetStatusStr(pTask->status.taskStatus);
14,832✔
823

824
  if (pTask->info.fillHistory) {
14,838✔
825
    tqInfo("vgId:%d build stream task, s-task:%s, %p checkpointId:%" PRId64 " checkpointVer:%" PRId64
5,132!
826
           " nextProcessVer:%" PRId64
827
           " child id:%d, level:%d, cur-status:%s, next-status:%s fill-history:%d, related stream task:0x%x "
828
           "delaySched:%" PRId64 " ms, inputVer:%" PRId64,
829
           vgId, pTask->id.idStr, pTask, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer,
830
           pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory,
831
           (int32_t)pTask->streamTaskId.taskId, pTask->info.delaySchedParam, nextProcessVer);
832
  } else {
833
    tqInfo("vgId:%d build stream task, s-task:%s, %p checkpointId:%" PRId64 " checkpointVer:%" PRId64
9,706✔
834
           " nextProcessVer:%" PRId64
835
           " child id:%d, level:%d, cur-status:%s next-status:%s fill-history:%d, related fill-task:0x%x "
836
           "delaySched:%" PRId64 " ms, inputVer:%" PRId64,
837
           vgId, pTask->id.idStr, pTask, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer,
838
           pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory,
839
           (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.delaySchedParam, nextProcessVer);
840

841
    if (pChkInfo->checkpointVer > pChkInfo->nextProcessVer) {
9,712!
842
      tqError("vgId:%d build stream task, s-task:%s, checkpointVer:%" PRId64 " > nextProcessVer:%" PRId64, vgId,
×
843
              pTask->id.idStr, pChkInfo->checkpointVer, pChkInfo->nextProcessVer);
844
      return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
845
    }
846
  }
847

848
  return 0;
14,844✔
849
}
850

851
int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { return tqStreamTaskProcessCheckReq(pTq->pStreamMeta, pMsg); }
21,711✔
852

853
int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) {
22,006✔
854
  return tqStreamTaskProcessCheckRsp(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode));
22,006✔
855
}
856

857
int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
14,253✔
858
  return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, sversion, msg, msgLen,
14,270✔
859
                                      vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored);
14,253✔
860
}
861

862
static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask, STQ* pTq) {
2,369✔
863
  const char*    id = pTask->id.idStr;
2,369✔
864
  int64_t        nextProcessedVer = pStreamTask->hTaskInfo.haltVer;
2,369✔
865
  SVersionRange* pStep2Range = &pTask->step2Range;
2,369✔
866
  int32_t        vgId = pTask->pMeta->vgId;
2,369✔
867

868
  // if it's an source task, extract the last version in wal.
869
  bool done = streamHistoryTaskSetVerRangeStep2(pTask, nextProcessedVer);
2,369✔
870
  pTask->execInfo.step2Start = taosGetTimestampMs();
2,369✔
871

872
  if (done) {
2,369✔
873
    qDebug("s-task:%s scan wal(step 2) verRange:%" PRId64 "-%" PRId64 " ended, elapsed time:%.2fs", id,
1,501✔
874
           pStep2Range->minVer, pStep2Range->maxVer, 0.0);
875
    int32_t code = streamTaskPutTranstateIntoInputQ(pTask);  // todo: msg lost.
1,501✔
876
    if (code) {
1,501!
877
      qError("s-task:%s failed put trans-state into inputQ, code:%s", id, tstrerror(code));
×
878
    }
879
    (void)streamExecTask(pTask);  // exec directly
1,501✔
880
  } else {
881
    STimeWindow* pWindow = &pTask->dataRange.window;
868✔
882
    tqDebug("s-task:%s level:%d verRange:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64
868✔
883
            ", do secondary scan-history from WAL after halt the related stream task:%s",
884
            id, pTask->info.taskLevel, pStep2Range->minVer, pStep2Range->maxVer, pWindow->skey, pWindow->ekey,
885
            pStreamTask->id.idStr);
886
    if (pTask->status.schedStatus != TASK_SCHED_STATUS__WAITING) {
868!
887
      tqError("s-task:%s level:%d unexpected sched-status:%d", id, pTask->info.taskLevel, pTask->status.schedStatus);
×
888
    }
889

890
    int32_t code = streamSetParamForStreamScannerStep2(pTask, pStep2Range, pWindow);
868✔
891
    if (code) {
868!
892
      tqError("s-task:%s level:%d failed to set step2 param", id, pTask->info.taskLevel);
×
893
    }
894

895
    int64_t dstVer = pStep2Range->minVer;
868✔
896
    pTask->chkInfo.nextProcessVer = dstVer;
868✔
897

898
    walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer);
868✔
899
    tqDebug("s-task:%s wal reader start scan WAL verRange:%" PRId64 "-%" PRId64 ", set sched-status:%d", id, dstVer,
868✔
900
            pStep2Range->maxVer, TASK_SCHED_STATUS__INACTIVE);
901

902
    int8_t status = streamTaskSetSchedStatusInactive(pTask);
868✔
903

904
    // now the fill-history task starts to scan data from wal files.
905
    code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE);
868✔
906
    if (code == TSDB_CODE_SUCCESS) {
867!
907
      code = tqScanWalAsync(pTq, false);
867✔
908
      if (code) {
868!
909
        tqError("vgId:%d failed to start scan wal file, code:%s", vgId, tstrerror(code));
×
910
      }
911
    }
912
  }
913
}
2,369✔
914

915
int32_t handleStep2Async(SStreamTask* pStreamTask, void* param) {
2,369✔
916
  STQ* pTq = param;
2,369✔
917

918
  SStreamMeta* pMeta = pStreamTask->pMeta;
2,369✔
919
  STaskId      hId = pStreamTask->hTaskInfo.id;
2,369✔
920
  SStreamTask* pTask = NULL;
2,369✔
921
  int32_t      code = streamMetaAcquireTask(pStreamTask->pMeta, hId.streamId, hId.taskId, &pTask);
2,369✔
922
  if (pTask == NULL) {
2,369!
923
    tqWarn("s-task:0x%x failed to acquired it to exec step 2, scan wal quit", (int32_t)hId.taskId);
×
924
    return TSDB_CODE_SUCCESS;
×
925
  }
926

927
  doStartFillhistoryStep2(pTask, pStreamTask, pTq);
2,369✔
928

929
  streamMetaReleaseTask(pMeta, pTask);
2,369✔
930
  return TSDB_CODE_SUCCESS;
2,369✔
931
}
932

933
// this function should be executed by only one thread, so we set an sentinel to protect this function
934
int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
2,564✔
935
  SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont;
2,564✔
936
  SStreamMeta*           pMeta = pTq->pStreamMeta;
2,564✔
937
  int32_t                code = TSDB_CODE_SUCCESS;
2,564✔
938
  SStreamTask*           pTask = NULL;
2,564✔
939
  SStreamTask*           pStreamTask = NULL;
2,564✔
940

941
  code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId, &pTask);
2,564✔
942
  if (pTask == NULL) {
2,565!
943
    tqError("vgId:%d failed to acquire stream task:0x%x during scan history data, task may have been destroyed",
×
944
            pMeta->vgId, pReq->taskId);
945
    return code;
×
946
  }
947

948
  // do recovery step1
949
  const char* id = pTask->id.idStr;
2,565✔
950
  char*       pStatus = streamTaskGetStatus(pTask).name;
2,565✔
951

952
  // avoid multi-thread exec
953
  while (1) {
×
954
    int32_t sentinel = atomic_val_compare_exchange_32(&pTask->status.inScanHistorySentinel, 0, 1);
2,565✔
955
    if (sentinel != 0) {
2,565!
956
      tqDebug("s-task:%s already in scan-history func, wait for 100ms, and try again", id);
×
957
      taosMsleep(100);
×
958
    } else {
959
      break;
2,565✔
960
    }
961
  }
962

963
  // let's decide which step should be executed now
964
  if (pTask->execInfo.step1Start == 0) {
2,565✔
965
    int64_t ts = taosGetTimestampMs();
2,378✔
966
    pTask->execInfo.step1Start = ts;
2,378✔
967
    tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts);
2,378✔
968
  } else {
969
    if (pTask->execInfo.step2Start == 0) {
187✔
970
      tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64 ", already elapsed:%.2fs",
186!
971
              id, pTask->execInfo.step1Start, pTask->execInfo.step1El);
972
    } else {
973
      tqDebug("s-task:%s already in step2, no need to scan-history data, step2 startTs:%" PRId64, id,
1!
974
              pTask->execInfo.step2Start);
975

976
      atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
1✔
977
      streamMetaReleaseTask(pMeta, pTask);
×
978
      return 0;
×
979
    }
980
  }
981

982
  // we have to continue retrying to successfully execute the scan history task.
983
  if (!streamTaskSetSchedStatusWait(pTask)) {
2,564!
984
    tqError(
×
985
        "s-task:%s failed to start scan-history in first stream time window since already started, unexpected "
986
        "sched-status:%d",
987
        id, pTask->status.schedStatus);
988
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
×
989
    streamMetaReleaseTask(pMeta, pTask);
×
990
    return 0;
×
991
  }
992

993
  int64_t              st = taosGetTimestampMs();
2,565✔
994
  SScanhistoryDataInfo retInfo = streamScanHistoryData(pTask, st);
2,565✔
995

996
  double el = (taosGetTimestampMs() - st) / 1000.0;
2,565✔
997
  pTask->execInfo.step1El += el;
2,565✔
998

999
  if (retInfo.ret == TASK_SCANHISTORY_QUIT || retInfo.ret == TASK_SCANHISTORY_REXEC) {
2,565✔
1000
    int8_t status = streamTaskSetSchedStatusInactive(pTask);
192✔
1001
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
192✔
1002

1003
    if (retInfo.ret == TASK_SCANHISTORY_REXEC) {
192✔
1004
      streamExecScanHistoryInFuture(pTask, retInfo.idleTime);
186✔
1005
    } else {
1006
      SStreamTaskState p = streamTaskGetStatus(pTask);
6✔
1007
      ETaskStatus      s = p.state;
6✔
1008

1009
      if (s == TASK_STATUS__PAUSE) {
6!
1010
        tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs total:%.2fs, sched-status:%d", id, el,
×
1011
                pTask->execInfo.step1El, status);
1012
      } else if (s == TASK_STATUS__STOP || s == TASK_STATUS__DROPPING) {
6!
1013
        tqDebug("s-task:%s status:%p not continue scan-history data, total elapsed time:%.2fs quit", id, p.name,
6!
1014
                pTask->execInfo.step1El);
1015
      }
1016
    }
1017

1018
    streamMetaReleaseTask(pMeta, pTask);
192✔
1019
    return 0;
192✔
1020
  }
1021

1022
  // the following procedure should be executed, no matter status is stop/pause or not
1023
  tqDebug("s-task:%s scan-history(step 1) ended, elapsed time:%.2fs", id, pTask->execInfo.step1El);
2,373✔
1024

1025
  if (pTask->info.fillHistory != 1) {
2,373!
1026
    tqError("s-task:%s fill-history is disabled, unexpected", id);
×
1027
    return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
1028
  }
1029

1030
  // 1. get the related stream task
1031
  code = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId, &pStreamTask);
2,373✔
1032
  if (pStreamTask == NULL) {
2,373✔
1033
    tqError("failed to find s-task:0x%" PRIx64 ", it may have been destroyed, drop related fill-history task:%s",
4!
1034
            pTask->streamTaskId.taskId, pTask->id.idStr);
1035

1036
    tqDebug("s-task:%s fill-history task set status to be dropping", id);
4!
1037
    code = streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id, 0);
4✔
1038

1039
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
4✔
1040
    streamMetaReleaseTask(pMeta, pTask);
4✔
1041
    return code;
4✔
1042
  }
1043

1044
  if (pStreamTask->info.taskLevel != TASK_LEVEL__SOURCE) {
2,369!
1045
    tqError("s-task:%s fill-history task related stream task level:%d, unexpected", id, pStreamTask->info.taskLevel);
×
1046
    return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
1047
  }
1048

1049
  code = streamTaskHandleEventAsync(pStreamTask->status.pSM, TASK_EVENT_HALT, handleStep2Async, pTq);
2,369✔
1050
  streamMetaReleaseTask(pMeta, pStreamTask);
2,369✔
1051

1052
  atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
2,369✔
1053
  streamMetaReleaseTask(pMeta, pTask);
2,369✔
1054
  return code;
2,369✔
1055
}
1056

1057
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
161,734✔
1058
  int32_t  code = 0;
161,734✔
1059
  char*    msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
161,734✔
1060
  int32_t  len = pMsg->contLen - sizeof(SMsgHead);
161,734✔
1061
  SDecoder decoder;
1062

1063
  SStreamTaskRunReq req = {0};
161,734✔
1064
  tDecoderInit(&decoder, (uint8_t*)msg, len);
161,734✔
1065
  if ((code = tDecodeStreamTaskRunReq(&decoder, &req)) < 0) {
161,785!
1066
    tqError("vgId:%d failed to decode task run req, code:%s", pTq->pStreamMeta->vgId, tstrerror(code));
×
1067
    tDecoderClear(&decoder);
×
1068
    return TSDB_CODE_SUCCESS;
×
1069
  }
1070

1071
  tDecoderClear(&decoder);
161,752✔
1072

1073
  // extracted submit data from wal files for all tasks
1074
  if (req.reqType == STREAM_EXEC_T_EXTRACT_WAL_DATA) {
161,761✔
1075
    return tqScanWal(pTq);
65,832✔
1076
  }
1077

1078
  code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode));
95,929✔
1079
  if (code) {
95,943✔
1080
    tqError("vgId:%d failed to create task run req, code:%s", TD_VID(pTq->pVnode), tstrerror(code));
31!
1081
    return code;
31✔
1082
  }
1083

1084
  // let's continue scan data in the wal files
1085
  if (req.reqType >= 0 || req.reqType == STREAM_EXEC_T_RESUME_TASK) {
95,912✔
1086
    code = tqScanWalAsync(pTq, false);  // it's ok to failed
72,390✔
1087
    if (code) {
72,395✔
1088
      tqError("vgId:%d failed to start scan wal file, code:%s", pTq->pStreamMeta->vgId, tstrerror(code));
4!
1089
    }
1090
  }
1091

1092
  return code;
95,919✔
1093
}
1094

1095
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg) {
48,296✔
1096
  return tqStreamTaskProcessDispatchReq(pTq->pStreamMeta, pMsg);
48,296✔
1097
}
1098

1099
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) {
48,339✔
1100
  return tqStreamTaskProcessDispatchRsp(pTq->pStreamMeta, pMsg);
48,339✔
1101
}
1102

1103
int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) {
6,968✔
1104
  return tqStreamTaskProcessDropReq(pTq->pStreamMeta, msg, msgLen);
6,968✔
1105
}
1106

1107
int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen) {
5,130✔
1108
  return tqStreamTaskProcessUpdateCheckpointReq(pTq->pStreamMeta, pTq->pVnode->restored, msg);
5,130✔
1109
}
1110

1111
int32_t tqProcessTaskConsenChkptIdReq(STQ* pTq, SRpcMsg* pMsg) {
171✔
1112
  return tqStreamTaskProcessConsenChkptIdReq(pTq->pStreamMeta, pMsg);
171✔
1113
}
1114

1115
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
1,388✔
1116
  return tqStreamTaskProcessTaskPauseReq(pTq->pStreamMeta, msg);
1,388✔
1117
}
1118

1119
int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
2,561✔
1120
  return tqStreamTaskProcessTaskResumeReq(pTq, sversion, msg, true);
2,561✔
1121
}
1122

1123
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
565✔
1124
  return tqStreamTaskProcessRetrieveReq(pTq->pStreamMeta, pMsg);
565✔
1125
}
1126

1127
int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { return 0; }
448✔
1128

1129
int32_t tqStreamProgressRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
×
1130
  char*               msgStr = pMsg->pCont;
×
1131
  char*               msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
×
1132
  int32_t             msgLen = pMsg->contLen - sizeof(SMsgHead);
×
1133
  int32_t             code = 0;
×
1134
  SStreamProgressReq  req;
1135
  char*               pRspBuf = taosMemoryCalloc(1, sizeof(SMsgHead) + sizeof(SStreamProgressRsp));
×
1136
  SStreamProgressRsp* pRsp = POINTER_SHIFT(pRspBuf, sizeof(SMsgHead));
×
1137
  if (!pRspBuf) {
×
1138
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
1139
    code = -1;
×
1140
    goto _OVER;
×
1141
  }
1142

1143
  code = tDeserializeStreamProgressReq(msgBody, msgLen, &req);
×
1144
  if (code == TSDB_CODE_SUCCESS) {
×
1145
    code = tqGetStreamExecInfo(pTq->pVnode, req.streamId, &pRsp->progressDelay, &pRsp->fillHisFinished);
×
1146
  }
1147
  if (code == TSDB_CODE_SUCCESS) {
×
1148
    pRsp->fetchIdx = req.fetchIdx;
×
1149
    pRsp->subFetchIdx = req.subFetchIdx;
×
1150
    pRsp->vgId = req.vgId;
×
1151
    pRsp->streamId = req.streamId;
×
1152
    code = tSerializeStreamProgressRsp(pRsp, sizeof(SStreamProgressRsp) + sizeof(SMsgHead), pRsp);
×
1153
    if (code) {
×
1154
      goto _OVER;
×
1155
    }
1156

1157
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
×
1158
    rsp.pCont = pRspBuf;
×
1159
    pRspBuf = NULL;
×
1160
    rsp.contLen = sizeof(SMsgHead) + sizeof(SStreamProgressRsp);
×
1161
    tmsgSendRsp(&rsp);
×
1162
  }
1163

1164
_OVER:
×
1165
  if (pRspBuf) {
×
1166
    taosMemoryFree(pRspBuf);
×
1167
  }
1168
  return code;
×
1169
}
1170

1171
// always return success to mnode
1172
//todo: handle failure of build and send msg to mnode
1173
static void doSendChkptSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, int32_t code,
89✔
1174
                                 int32_t taskId) {
1175
  SRpcMsg rsp = {0};
89✔
1176
  int32_t ret = streamTaskBuildCheckpointSourceRsp(pReq, pRpcInfo, &rsp, code);
89✔
1177
  if (ret) {  // suppress the error in build checkpoint source rsp
89!
1178
    tqError("s-task:0x%x failed to build checkpoint-source rsp, code:%s", taskId, tstrerror(ret));
×
1179
  }
1180
  tmsgSendRsp(&rsp);  // error occurs
89✔
1181
}
89✔
1182

1183
// no matter what kinds of error happened, make sure the mnode will receive the success execution code.
1184
int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) {
3,122✔
1185
  int32_t                    vgId = TD_VID(pTq->pVnode);
3,122✔
1186
  SStreamMeta*               pMeta = pTq->pStreamMeta;
3,122✔
1187
  char*                      msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
3,122✔
1188
  int32_t                    len = pMsg->contLen - sizeof(SMsgHead);
3,122✔
1189
  int32_t                    code = 0;
3,122✔
1190
  SStreamCheckpointSourceReq req = {0};
3,122✔
1191
  SDecoder                   decoder = {0};
3,122✔
1192
  SStreamTask*               pTask = NULL;
3,122✔
1193
  int64_t                    checkpointId = 0;
3,122✔
1194

1195
  // disable auto rsp to mnode
1196
  pRsp->info.handle = NULL;
3,122✔
1197

1198
  tDecoderInit(&decoder, (uint8_t*)msg, len);
3,122✔
1199
  if (tDecodeStreamCheckpointSourceReq(&decoder, &req) < 0) {
3,125!
1200
    code = TSDB_CODE_MSG_DECODE_ERROR;
×
1201
    tDecoderClear(&decoder);
×
1202
    tqError("vgId:%d failed to decode checkpoint-source msg, code:%s", vgId, tstrerror(code));
×
1203
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1204
    return TSDB_CODE_SUCCESS;  // always return success to mnode,
×
1205
  }
1206

1207
  tDecoderClear(&decoder);
3,120✔
1208

1209
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
3,124✔
1210
    tqDebug("vgId:%d not leader, ignore checkpoint-source msg, s-task:0x%x", vgId, req.taskId);
9!
1211
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
9✔
1212
    return TSDB_CODE_SUCCESS;  // always return success to mnode
9✔
1213
  }
1214

1215
  if (!pTq->pVnode->restored) {
3,117✔
1216
    tqDebug("vgId:%d checkpoint-source msg received during restoring, checkpointId:%" PRId64
80✔
1217
            ", transId:%d s-task:0x%x ignore it",
1218
            vgId, req.checkpointId, req.transId, req.taskId);
1219
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
80✔
1220
    return TSDB_CODE_SUCCESS;  // always return success to mnode
80✔
1221
  }
1222

1223
  code = streamMetaAcquireTask(pMeta, req.streamId, req.taskId, &pTask);
3,037✔
1224
  if (pTask == NULL || code != 0) {
3,038!
1225
    tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. checkpointId:%" PRId64
1!
1226
            " transId:%d it may have been destroyed",
1227
            vgId, req.taskId, req.checkpointId, req.transId);
1228
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
1✔
1229
    return TSDB_CODE_SUCCESS;
×
1230
  }
1231

1232
  if (pTask->status.downstreamReady != 1) {
3,037!
1233
    // record the latest failed checkpoint id
1234
    streamTaskSetFailedChkptInfo(pTask, req.transId, req.checkpointId);
×
1235
    tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpointId:%" PRId64
×
1236
            ", transId:%d set it failed",
1237
            pTask->id.idStr, req.checkpointId, req.transId);
1238

1239
    streamMetaReleaseTask(pMeta, pTask);
×
1240
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1241
    return TSDB_CODE_SUCCESS;  // todo retry handle error
×
1242
  }
1243

1244
  // todo save the checkpoint failed info
1245
  streamMutexLock(&pTask->lock);
3,037✔
1246
  ETaskStatus status = streamTaskGetStatus(pTask).state;
3,040✔
1247

1248
  if (req.mndTrigger == 1) {
3,033✔
1249
    if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) {
874!
1250
      tqError("s-task:%s not ready for checkpoint, since it is halt, ignore checkpointId:%" PRId64 ", set it failure",
×
1251
              pTask->id.idStr, req.checkpointId);
1252

1253
      streamMutexUnlock(&pTask->lock);
×
1254
      streamMetaReleaseTask(pMeta, pTask);
×
1255
      doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1256
      return TSDB_CODE_SUCCESS;
×
1257
    }
1258
  } else {
1259
    if (status != TASK_STATUS__HALT) {
2,159!
1260
      tqError("s-task:%s should in halt status, let's halt it directly", pTask->id.idStr);
×
1261
      //      streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_HALT);
1262
    }
1263
  }
1264

1265
  // check if the checkpoint msg already sent or not.
1266
  if (status == TASK_STATUS__CK) {
3,031!
1267
    streamTaskGetActiveCheckpointInfo(pTask, NULL, &checkpointId);
×
1268

1269
    tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64
×
1270
           " transId:%d already handled, ignore msg and continue process checkpoint",
1271
           pTask->id.idStr, checkpointId, req.transId);
1272

1273
    streamMutexUnlock(&pTask->lock);
×
1274
    streamMetaReleaseTask(pMeta, pTask);
×
1275
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SYN_PROPOSE_NOT_READY, req.taskId);
×
1276
    return TSDB_CODE_SUCCESS;
×
1277
  } else {  // checkpoint already finished, and not in checkpoint status
1278
    if (req.checkpointId <= pTask->chkInfo.checkpointId) {
3,031!
1279
      tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64
×
1280
             " transId:%d already handled, return success",
1281
             pTask->id.idStr, req.checkpointId, req.transId);
1282

1283
      streamMutexUnlock(&pTask->lock);
×
1284
      streamMetaReleaseTask(pMeta, pTask);
×
1285
      doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1286
      return TSDB_CODE_SUCCESS;
×
1287
    }
1288
  }
1289

1290
  code = streamProcessCheckpointSourceReq(pTask, &req);
3,031✔
1291
  streamMutexUnlock(&pTask->lock);
3,041✔
1292

1293
  if (code) {
3,042!
1294
    qError("s-task:%s (vgId:%d) failed to process checkpoint-source req, code:%s", pTask->id.idStr, vgId,
×
1295
           tstrerror(code));
1296
    streamMetaReleaseTask(pMeta, pTask);
×
1297
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1298
    return TSDB_CODE_SUCCESS;
×
1299
  }
1300

1301
  if (req.mndTrigger) {
3,042✔
1302
    tqInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64 ", transId:%d, ",
876!
1303
           pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, req.transId);
1304
  } else {
1305
    const char* pPrevStatus = streamTaskGetStatusStr(streamTaskGetPrevStatus(pTask));
2,166✔
1306
    tqInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64
2,164!
1307
           ", transId:%d after transfer-state, prev status:%s",
1308
           pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, req.transId, pPrevStatus);
1309
  }
1310

1311
  code = streamAddCheckpointSourceRspMsg(&req, &pMsg->info, pTask);
3,042✔
1312
  if (code != TSDB_CODE_SUCCESS) {
3,043!
1313
    streamTaskSetCheckpointFailed(pTask);  // set the checkpoint failed
×
1314
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1315
  }
1316

1317
  streamMetaReleaseTask(pMeta, pTask);
3,043✔
1318
  return TSDB_CODE_SUCCESS;
3,042✔
1319
}
1320

1321
// downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task
1322
int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) {
7,570✔
1323
  int32_t vgId = TD_VID(pTq->pVnode);
7,570✔
1324

1325
  SStreamCheckpointReadyMsg* pReq = (SStreamCheckpointReadyMsg*)pMsg->pCont;
7,570✔
1326
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
7,570!
1327
    tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from 0x%x", vgId,
×
1328
            (int32_t)pReq->downstreamTaskId);
1329
    return TSDB_CODE_STREAM_NOT_LEADER;
×
1330
  }
1331

1332
  return tqStreamTaskProcessCheckpointReadyMsg(pTq->pStreamMeta, pMsg);
7,567✔
1333
}
1334

1335
int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
68✔
1336
  return tqStreamTaskProcessUpdateReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, pMsg, pTq->pVnode->restored);
68✔
1337
}
1338

1339
int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) {
×
1340
  return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg->pCont);
×
1341
}
1342

1343
int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg) {
×
1344
  int32_t vgId = TD_VID(pTq->pVnode);
×
1345

1346
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
×
1347
    SRetrieveChkptTriggerReq req = {0};
×
1348

1349
    char*    msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
×
1350
    int32_t  len = pMsg->contLen - sizeof(SMsgHead);
×
1351
    SDecoder decoder = {0};
×
1352

1353
    tDecoderInit(&decoder, (uint8_t*)msg, len);
×
1354
    if (tDecodeRetrieveChkptTriggerReq(&decoder, &req) < 0) {
×
1355
      tDecoderClear(&decoder);
×
1356
      tqError("vgId:%d invalid retrieve checkpoint-trigger req received", vgId);
×
1357
      return TSDB_CODE_INVALID_MSG;
×
1358
    }
1359
    tDecoderClear(&decoder);
×
1360

1361
    tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from s-task:0x%" PRId64, vgId,
×
1362
            req.downstreamTaskId);
1363
    return TSDB_CODE_STREAM_NOT_LEADER;
×
1364
  }
1365

1366
  return tqStreamTaskProcessRetrieveTriggerReq(pTq->pStreamMeta, pMsg);
×
1367
}
1368

1369
int32_t tqProcessTaskRetrieveTriggerRsp(STQ* pTq, SRpcMsg* pMsg) {
×
1370
  return tqStreamTaskProcessRetrieveTriggerRsp(pTq->pStreamMeta, pMsg);
×
1371
}
1372

1373
// this function is needed, do not try to remove it.
1374
int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg) { return tqStreamProcessStreamHbRsp(pTq->pStreamMeta, pMsg); }
22,678✔
1375

1376
int32_t tqProcessStreamReqCheckpointRsp(STQ* pTq, SRpcMsg* pMsg) {
4,405✔
1377
  return tqStreamProcessReqCheckpointRsp(pTq->pStreamMeta, pMsg);
4,405✔
1378
}
1379

1380
int32_t tqProcessTaskCheckpointReadyRsp(STQ* pTq, SRpcMsg* pMsg) {
7,562✔
1381
  return tqStreamProcessCheckpointReadyRsp(pTq->pStreamMeta, pMsg);
7,562✔
1382
}
1383

1384
int32_t tqProcessTaskChkptReportRsp(STQ* pTq, SRpcMsg* pMsg) {
5,780✔
1385
  return tqStreamProcessChkptReportRsp(pTq->pStreamMeta, pMsg);
5,780✔
1386
}
1387

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc