• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3562

20 Dec 2024 09:57AM UTC coverage: 26.655% (-32.2%) from 58.812%
#3562

push

travis-ci

web-flow
Merge pull request #29229 from taosdata/enh/TS-5749-3.0

enh: seperate tsdb async tasks to different thread pools

21498 of 109421 branches covered (19.65%)

Branch coverage included in aggregate %.

66 of 96 new or added lines in 7 files covered. (68.75%)

39441 existing lines in 157 files now uncovered.

35007 of 102566 relevant lines covered (34.13%)

53922.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

6.16
/source/dnode/vnode/src/tq/tq.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#include "tq.h"
17
#include "osDef.h"
18
#include "taoserror.h"
19
#include "tqCommon.h"
20
#include "tstream.h"
21
#include "vnd.h"
22

23
// 0: not init
24
// 1: already inited
25
// 2: wait to be inited or cleanup
26
static int32_t tqInitialize(STQ* pTq);
27

UNCOV
28
static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return pHandle != NULL ? TMQ_HANDLE_STATUS_EXEC == pHandle->status : true; }
×
UNCOV
29
static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) { if (pHandle != NULL) pHandle->status = TMQ_HANDLE_STATUS_EXEC; }
×
UNCOV
30
static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) { if (pHandle != NULL) pHandle->status = TMQ_HANDLE_STATUS_IDLE; }
×
31

UNCOV
32
void tqDestroyTqHandle(void* data) {
×
UNCOV
33
  if (data == NULL) return;
×
UNCOV
34
  STqHandle* pData = (STqHandle*)data;
×
UNCOV
35
  qDestroyTask(pData->execHandle.task);
×
36

UNCOV
37
  if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
×
UNCOV
38
    taosMemoryFreeClear(pData->execHandle.execCol.qmsg);
×
UNCOV
39
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) {
×
UNCOV
40
    tqReaderClose(pData->execHandle.pTqReader);
×
UNCOV
41
    walCloseReader(pData->pWalReader);
×
UNCOV
42
    taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid);
×
UNCOV
43
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
×
UNCOV
44
    walCloseReader(pData->pWalReader);
×
UNCOV
45
    tqReaderClose(pData->execHandle.pTqReader);
×
UNCOV
46
    taosMemoryFreeClear(pData->execHandle.execTb.qmsg);
×
UNCOV
47
    nodesDestroyNode(pData->execHandle.execTb.node);
×
48
  }
UNCOV
49
  if (pData->msg != NULL) {
×
50
    rpcFreeCont(pData->msg->pCont);
×
51
    taosMemoryFree(pData->msg);
×
52
    pData->msg = NULL;
×
53
  }
UNCOV
54
  if (pData->block != NULL) {
×
55
    blockDataDestroy(pData->block);
×
56
  }
UNCOV
57
  if (pData->pRef) {
×
UNCOV
58
    walCloseRef(pData->pRef->pWal, pData->pRef->refId);
×
59
  }
60
}
61

UNCOV
62
static bool tqOffsetEqual(const STqOffset* pLeft, const STqOffset* pRight) {
×
UNCOV
63
  if (pLeft == NULL || pRight == NULL) {
×
64
    return false;
×
65
  }
UNCOV
66
  return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
×
UNCOV
67
         pLeft->val.version == pRight->val.version;
×
68
}
69

70
int32_t tqOpen(const char* path, SVnode* pVnode) {
553✔
71
  if (path == NULL || pVnode == NULL) {
553!
72
    return TSDB_CODE_INVALID_PARA;
×
73
  }
74
  STQ* pTq = taosMemoryCalloc(1, sizeof(STQ));
553!
75
  if (pTq == NULL) {
553!
76
    return terrno;
×
77
  }
78
  pVnode->pTq = pTq;
553✔
79
  pTq->path = taosStrdup(path);
553!
80
  if (pTq->path == NULL) {
553!
81
    return terrno;
×
82
  }
83
  pTq->pVnode = pVnode;
553✔
84

85
  pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
553✔
86
  if (pTq->pHandle == NULL) {
553!
87
    return terrno;
×
88
  }
89
  taosHashSetFreeFp(pTq->pHandle, tqDestroyTqHandle);
553✔
90

91
  taosInitRWLatch(&pTq->lock);
553✔
92

93
  pTq->pPushMgr = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK);
553✔
94
  if (pTq->pPushMgr == NULL) {
553!
95
    return terrno;
×
96
  }
97

98
  pTq->pCheckInfo = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
553✔
99
  if (pTq->pCheckInfo == NULL) {
553!
100
    return terrno;
×
101
  }
102
  taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo);
553✔
103

104
  pTq->pOffset = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_ENTRY_LOCK);
553✔
105
  if (pTq->pOffset == NULL) {
553!
106
    return terrno;
×
107
  }
108
  taosHashSetFreeFp(pTq->pOffset, (FDelete)tDeleteSTqOffset);
553✔
109

110
  return tqInitialize(pTq);
553✔
111
}
112

113
int32_t tqInitialize(STQ* pTq) {
553✔
114
  if (pTq == NULL) {
553!
115
    return TSDB_CODE_INVALID_PARA;
×
116
  }
117
  int32_t vgId = TD_VID(pTq->pVnode);
553✔
118
  int32_t code = streamMetaOpen(pTq->path, pTq, tqBuildStreamTask, tqExpandStreamTask, vgId, -1,
553✔
119
                                tqStartTaskCompleteCallback, &pTq->pStreamMeta);
120
  if (code != TSDB_CODE_SUCCESS) {
553!
121
    return code;
×
122
  }
123

124
  streamMetaLoadAllTasks(pTq->pStreamMeta);
553✔
125
  return tqMetaOpen(pTq);
553✔
126
}
127

128
void tqClose(STQ* pTq) {
553✔
129
  qDebug("start to close tq");
553✔
130
  if (pTq == NULL) {
553!
131
    return;
×
132
  }
133

134
  void* pIter = taosHashIterate(pTq->pPushMgr, NULL);
553✔
135
  while (pIter) {
553!
UNCOV
136
    STqHandle* pHandle = *(STqHandle**)pIter;
×
UNCOV
137
    int32_t    vgId = TD_VID(pTq->pVnode);
×
138

UNCOV
139
    if (pHandle->msg != NULL) {
×
UNCOV
140
      tqPushEmptyDataRsp(pHandle, vgId);
×
UNCOV
141
      rpcFreeCont(pHandle->msg->pCont);
×
UNCOV
142
      taosMemoryFree(pHandle->msg);
×
UNCOV
143
      pHandle->msg = NULL;
×
144
    }
UNCOV
145
    pIter = taosHashIterate(pTq->pPushMgr, pIter);
×
146
  }
147

148
  taosHashCleanup(pTq->pHandle);
553✔
149
  taosHashCleanup(pTq->pPushMgr);
553✔
150
  taosHashCleanup(pTq->pCheckInfo);
553✔
151
  taosHashCleanup(pTq->pOffset);
553✔
152
  taosMemoryFree(pTq->path);
553!
153
  tqMetaClose(pTq);
553✔
154

155
  int32_t vgId = pTq->pStreamMeta->vgId;
553✔
156
  streamMetaClose(pTq->pStreamMeta);
553✔
157

158
  qDebug("vgId:%d end to close tq", vgId);
553✔
159
  taosMemoryFree(pTq);
553!
160
}
161

162
void tqNotifyClose(STQ* pTq) {
553✔
163
  if (pTq == NULL) {
553!
164
    return;
×
165
  }
166
  streamMetaNotifyClose(pTq->pStreamMeta);
553✔
167
}
168

UNCOV
169
void tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId) {
×
UNCOV
170
  if (pHandle == NULL) {
×
171
    return;
×
172
  }
UNCOV
173
  int32_t    code = 0;
×
UNCOV
174
  SMqPollReq req = {0};
×
UNCOV
175
  code = tDeserializeSMqPollReq(pHandle->msg->pCont, pHandle->msg->contLen, &req);
×
UNCOV
176
  if (code < 0) {
×
177
    tqError("tDeserializeSMqPollReq %d failed, code:%d", pHandle->msg->contLen, code);
×
178
    return;
×
179
  }
180

UNCOV
181
  SMqDataRsp dataRsp = {0};
×
UNCOV
182
  code = tqInitDataRsp(&dataRsp, req.reqOffset);
×
UNCOV
183
  if (code != 0) {
×
184
    tqError("tqInitDataRsp failed, code:%d", code);
×
185
    return;
×
186
  }
UNCOV
187
  dataRsp.blockNum = 0;
×
UNCOV
188
  char buf[TSDB_OFFSET_LEN] = {0};
×
UNCOV
189
  (void)tFormatOffset(buf, TSDB_OFFSET_LEN, &dataRsp.reqOffset);
×
UNCOV
190
  tqInfo("tqPushEmptyDataRsp to consumer:0x%" PRIx64 " vgId:%d, offset:%s,QID:0x%" PRIx64, req.consumerId, vgId, buf,
×
191
         req.reqId);
192

UNCOV
193
  code = tqSendDataRsp(pHandle, pHandle->msg, &req, &dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
×
UNCOV
194
  if (code != 0) {
×
195
    tqError("tqSendDataRsp failed, code:%d", code);
×
196
  }
UNCOV
197
  tDeleteMqDataRsp(&dataRsp);
×
198
}
199

UNCOV
200
int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type,
×
201
                      int32_t vgId) {
UNCOV
202
  if (pHandle == NULL || pMsg == NULL || pReq == NULL || pRsp == NULL) {
×
203
    return TSDB_CODE_INVALID_PARA;
×
204
  }
UNCOV
205
  int64_t sver = 0, ever = 0;
×
UNCOV
206
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
×
207

UNCOV
208
  char buf1[TSDB_OFFSET_LEN] = {0};
×
UNCOV
209
  char buf2[TSDB_OFFSET_LEN] = {0};
×
UNCOV
210
  (void)tFormatOffset(buf1, TSDB_OFFSET_LEN, &(pRsp->reqOffset));
×
UNCOV
211
  (void)tFormatOffset(buf2, TSDB_OFFSET_LEN, &(pRsp->rspOffset));
×
212

UNCOV
213
  tqDebug("tmq poll vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s,QID:0x%" PRIx64,
×
214
          vgId, pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId);
215

UNCOV
216
  return tqDoSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type, sver, ever);
×
217
}
218

UNCOV
219
int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
×
UNCOV
220
  if (pTq == NULL) {
×
221
    return TSDB_CODE_INVALID_PARA;
×
222
  }
UNCOV
223
  SMqVgOffset vgOffset = {0};
×
UNCOV
224
  int32_t     vgId = TD_VID(pTq->pVnode);
×
225

UNCOV
226
  int32_t  code = 0;
×
227
  SDecoder decoder;
UNCOV
228
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
×
UNCOV
229
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
×
230
    code = TSDB_CODE_INVALID_MSG;
×
231
    goto end;
×
232
  }
233

UNCOV
234
  tDecoderClear(&decoder);
×
235

UNCOV
236
  STqOffset* pOffset = &vgOffset.offset;
×
237

UNCOV
238
  if (pOffset->val.type == TMQ_OFFSET__SNAPSHOT_DATA || pOffset->val.type == TMQ_OFFSET__SNAPSHOT_META) {
×
UNCOV
239
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:snapshot) uid:%" PRId64 ", ts:%" PRId64,
×
240
            pOffset->subKey, vgId, pOffset->val.uid, pOffset->val.ts);
UNCOV
241
  } else if (pOffset->val.type == TMQ_OFFSET__LOG) {
×
UNCOV
242
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:log) version:%" PRId64, pOffset->subKey, vgId,
×
243
            pOffset->val.version);
244
  } else {
245
    tqError("invalid commit offset type:%d", pOffset->val.type);
×
246
    code = TSDB_CODE_INVALID_MSG;
×
247
    goto end;
×
248
  }
249

UNCOV
250
  STqOffset* pSavedOffset = NULL;
×
UNCOV
251
  code = tqMetaGetOffset(pTq, pOffset->subKey, &pSavedOffset);
×
UNCOV
252
  if (code == 0 && tqOffsetEqual(pOffset, pSavedOffset)) {
×
UNCOV
253
    tqInfo("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64,
×
254
           vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version);
UNCOV
255
    goto end;  // no need to update the offset value
×
256
  }
257

258
  // save the new offset value
UNCOV
259
  if (taosHashPut(pTq->pOffset, pOffset->subKey, strlen(pOffset->subKey), pOffset, sizeof(STqOffset))) {
×
260
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
261
    return -1;
×
262
  }
263

UNCOV
264
  if (tqMetaSaveInfo(pTq, pTq->pOffsetStore, pOffset->subKey, strlen(pOffset->subKey), msg,
×
265
                     msgLen >= sizeof(vgOffset.consumerId) ? msgLen - sizeof(vgOffset.consumerId) : 0) < 0) {
266
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
267
    return -1;
×
268
  }
269

UNCOV
270
  return 0;
×
UNCOV
271
end:
×
UNCOV
272
  tOffsetDestroy(&vgOffset.offset.val);
×
UNCOV
273
  return code;
×
274
}
275

UNCOV
276
int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
277
  if (pTq == NULL || pMsg == NULL) {
×
278
    return TSDB_CODE_INVALID_PARA;
×
279
  }
UNCOV
280
  SMqSeekReq req = {0};
×
UNCOV
281
  int32_t    vgId = TD_VID(pTq->pVnode);
×
UNCOV
282
  SRpcMsg    rsp = {.info = pMsg->info};
×
UNCOV
283
  int        code = 0;
×
284

UNCOV
285
  if (tDeserializeSMqSeekReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
×
286
    code = TSDB_CODE_OUT_OF_MEMORY;
×
287
    goto end;
×
288
  }
289

UNCOV
290
  tqDebug("tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s", req.consumerId, vgId, req.subKey);
×
UNCOV
291
  taosWLockLatch(&pTq->lock);
×
292

UNCOV
293
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
×
UNCOV
294
  if (pHandle == NULL) {
×
295
    tqWarn("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", req.consumerId, vgId, req.subKey);
×
296
    code = 0;
×
297
    taosWUnLockLatch(&pTq->lock);
×
298
    goto end;
×
299
  }
300

301
  // 2. check consumer-vg assignment status
UNCOV
302
  if (pHandle->consumerId != req.consumerId) {
×
303
    tqError("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
×
304
            req.consumerId, vgId, req.subKey, pHandle->consumerId);
305
    taosWUnLockLatch(&pTq->lock);
×
306
    code = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
×
307
    goto end;
×
308
  }
309

310
  // if consumer register to push manager, push empty to consumer to change vg status from TMQ_VG_STATUS__WAIT to
311
  // TMQ_VG_STATUS__IDLE, otherwise poll data failed after seek.
UNCOV
312
  tqUnregisterPushHandle(pTq, pHandle);
×
UNCOV
313
  taosWUnLockLatch(&pTq->lock);
×
314

UNCOV
315
end:
×
UNCOV
316
  rsp.code = code;
×
UNCOV
317
  tmsgSendRsp(&rsp);
×
UNCOV
318
  return 0;
×
319
}
320

UNCOV
321
int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) {
×
UNCOV
322
  if (pTq == NULL) {
×
323
    return TSDB_CODE_INVALID_PARA;
×
324
  }
UNCOV
325
  void* pIter = NULL;
×
326

UNCOV
327
  while (1) {
×
UNCOV
328
    pIter = taosHashIterate(pTq->pCheckInfo, pIter);
×
UNCOV
329
    if (pIter == NULL) {
×
UNCOV
330
      break;
×
331
    }
332

UNCOV
333
    STqCheckInfo* pCheck = (STqCheckInfo*)pIter;
×
334

UNCOV
335
    if (pCheck->ntbUid == tbUid) {
×
UNCOV
336
      int32_t sz = taosArrayGetSize(pCheck->colIdList);
×
UNCOV
337
      for (int32_t i = 0; i < sz; i++) {
×
UNCOV
338
        int16_t* pForbidColId = taosArrayGet(pCheck->colIdList, i);
×
UNCOV
339
        if (pForbidColId == NULL) {
×
340
          continue;
×
341
        }
342

UNCOV
343
        if ((*pForbidColId) == colId) {
×
UNCOV
344
          taosHashCancelIterate(pTq->pCheckInfo, pIter);
×
UNCOV
345
          return -1;
×
346
        }
347
      }
348
    }
349
  }
350

UNCOV
351
  return 0;
×
352
}
353

UNCOV
354
int32_t tqProcessPollPush(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
355
  if (pTq == NULL) {
×
356
    return TSDB_CODE_INVALID_PARA;
×
357
  }
UNCOV
358
  int32_t vgId = TD_VID(pTq->pVnode);
×
UNCOV
359
  taosWLockLatch(&pTq->lock);
×
UNCOV
360
  if (taosHashGetSize(pTq->pPushMgr) > 0) {
×
UNCOV
361
    void* pIter = taosHashIterate(pTq->pPushMgr, NULL);
×
362

UNCOV
363
    while (pIter) {
×
UNCOV
364
      STqHandle* pHandle = *(STqHandle**)pIter;
×
UNCOV
365
      tqDebug("vgId:%d start set submit for pHandle:%p, consumer:0x%" PRIx64, vgId, pHandle, pHandle->consumerId);
×
366

UNCOV
367
      if (pHandle->msg == NULL) {
×
368
        tqError("pHandle->msg should not be null");
×
369
        taosHashCancelIterate(pTq->pPushMgr, pIter);
×
370
        break;
×
371
      } else {
UNCOV
372
        SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME,
×
UNCOV
373
                       .pCont = pHandle->msg->pCont,
×
UNCOV
374
                       .contLen = pHandle->msg->contLen,
×
UNCOV
375
                       .info = pHandle->msg->info};
×
UNCOV
376
        if (tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg) != 0){
×
377
          tqError("vgId:%d tmsgPutToQueue failed, consumer:0x%" PRIx64, vgId, pHandle->consumerId);
×
378
        }
UNCOV
379
        taosMemoryFree(pHandle->msg);
×
UNCOV
380
        pHandle->msg = NULL;
×
381
      }
382

UNCOV
383
      pIter = taosHashIterate(pTq->pPushMgr, pIter);
×
384
    }
385

UNCOV
386
    taosHashClear(pTq->pPushMgr);
×
387
  }
UNCOV
388
  taosWUnLockLatch(&pTq->lock);
×
UNCOV
389
  return 0;
×
390
}
391

UNCOV
392
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
393
  if (pTq == NULL || pMsg == NULL) {
×
394
    return TSDB_CODE_INVALID_PARA;
×
395
  }
UNCOV
396
  SMqPollReq req = {0};
×
UNCOV
397
  int        code = tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req);
×
UNCOV
398
  if (code < 0) {
×
399
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
×
400
    terrno = TSDB_CODE_INVALID_MSG;
×
401
    goto END;
×
402
  }
403

UNCOV
404
  int64_t      consumerId = req.consumerId;
×
UNCOV
405
  int32_t      reqEpoch = req.epoch;
×
UNCOV
406
  STqOffsetVal reqOffset = req.reqOffset;
×
UNCOV
407
  int32_t      vgId = TD_VID(pTq->pVnode);
×
UNCOV
408
  STqHandle*   pHandle = NULL;
×
409

UNCOV
410
  while (1) {
×
UNCOV
411
    taosWLockLatch(&pTq->lock);
×
412
    // 1. find handle
UNCOV
413
    code = tqMetaGetHandle(pTq, req.subKey, &pHandle);
×
UNCOV
414
    if (code != TDB_CODE_SUCCESS) {
×
UNCOV
415
      tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey);
×
UNCOV
416
      terrno = TSDB_CODE_INVALID_MSG;
×
UNCOV
417
      taosWUnLockLatch(&pTq->lock);
×
UNCOV
418
      return -1;
×
419
    }
420

421
    // 2. check rebalance status
UNCOV
422
    if (pHandle->consumerId != consumerId) {
×
UNCOV
423
      tqError("ERROR tmq poll: consumer:0x%" PRIx64
×
424
              " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
425
              consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId);
UNCOV
426
      terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
×
UNCOV
427
      taosWUnLockLatch(&pTq->lock);
×
UNCOV
428
      code = -1;
×
UNCOV
429
      goto END;
×
430
    }
431

UNCOV
432
    bool exec = tqIsHandleExec(pHandle);
×
UNCOV
433
    if (!exec) {
×
UNCOV
434
      tqSetHandleExec(pHandle);
×
435
      //      qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS);
UNCOV
436
      tqDebug("tmq poll: consumer:0x%" PRIx64 " vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId,
×
437
              req.subKey, pHandle);
UNCOV
438
      taosWUnLockLatch(&pTq->lock);
×
UNCOV
439
      break;
×
440
    }
441
    taosWUnLockLatch(&pTq->lock);
×
442

UNCOV
443
    tqDebug("tmq poll: consumer:0x%" PRIx64
×
444
            " vgId:%d, topic:%s, subscription is executing, wait for 10ms and retry, pHandle:%p",
445
            consumerId, vgId, req.subKey, pHandle);
UNCOV
446
    taosMsleep(10);
×
447
  }
448

449
  // 3. update the epoch value
UNCOV
450
  if (pHandle->epoch < reqEpoch) {
×
UNCOV
451
    tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, pHandle->epoch,
×
452
            reqEpoch);
UNCOV
453
    pHandle->epoch = reqEpoch;
×
454
  }
455

UNCOV
456
  char buf[TSDB_OFFSET_LEN] = {0};
×
UNCOV
457
  (void)tFormatOffset(buf, TSDB_OFFSET_LEN, &reqOffset);
×
UNCOV
458
  tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s,QID:0x%" PRIx64,
×
459
          consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);
460

UNCOV
461
  code = tqExtractDataForMq(pTq, pHandle, &req, pMsg);
×
UNCOV
462
  tqSetHandleIdle(pHandle);
×
463

UNCOV
464
  tqDebug("tmq poll: consumer:0x%" PRIx64 " vgId:%d, topic:%s, set handle idle, pHandle:%p", consumerId, vgId,
×
465
          req.subKey, pHandle);
466

467
END:
×
UNCOV
468
  tDestroySMqPollReq(&req);
×
UNCOV
469
  return code;
×
470
}
471

UNCOV
472
int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
473
  if (pTq == NULL || pMsg == NULL) {
×
474
    return TSDB_CODE_INVALID_PARA;
×
475
  }
UNCOV
476
  void*   data = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
×
UNCOV
477
  int32_t len = pMsg->contLen - sizeof(SMsgHead);
×
478

UNCOV
479
  SMqVgOffset vgOffset = {0};
×
480

481
  SDecoder decoder;
UNCOV
482
  tDecoderInit(&decoder, (uint8_t*)data, len);
×
UNCOV
483
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
×
484
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
485
    return terrno;
×
486
  }
487

UNCOV
488
  tDecoderClear(&decoder);
×
489

UNCOV
490
  STqOffset* pSavedOffset = NULL;
×
UNCOV
491
  int32_t    code = tqMetaGetOffset(pTq, vgOffset.offset.subKey, &pSavedOffset);
×
UNCOV
492
  if (code != 0) {
×
UNCOV
493
    return TSDB_CODE_TMQ_NO_COMMITTED;
×
494
  }
UNCOV
495
  vgOffset.offset = *pSavedOffset;
×
496

UNCOV
497
  tEncodeSize(tEncodeMqVgOffset, &vgOffset, len, code);
×
UNCOV
498
  if (code < 0) {
×
499
    return TAOS_GET_TERRNO(TSDB_CODE_INVALID_PARA);
×
500
  }
501

UNCOV
502
  void* buf = rpcMallocCont(len);
×
UNCOV
503
  if (buf == NULL) {
×
504
    return terrno;
×
505
  }
UNCOV
506
  SEncoder encoder = {0};
×
UNCOV
507
  tEncoderInit(&encoder, buf, len);
×
UNCOV
508
  code = tEncodeMqVgOffset(&encoder, &vgOffset);
×
UNCOV
509
  tEncoderClear(&encoder);
×
UNCOV
510
  if (code < 0) {
×
511
    rpcFreeCont(buf);
×
512
    return TAOS_GET_TERRNO(TSDB_CODE_INVALID_PARA);
×
513
  }
514

UNCOV
515
  SRpcMsg rsp = {.info = pMsg->info, .pCont = buf, .contLen = len, .code = 0};
×
516

UNCOV
517
  tmsgSendRsp(&rsp);
×
UNCOV
518
  return 0;
×
519
}
520

UNCOV
521
int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
522
  if (pTq == NULL || pMsg == NULL) {
×
523
    return TSDB_CODE_INVALID_PARA;
×
524
  }
UNCOV
525
  int32_t    code = 0;
×
UNCOV
526
  SMqPollReq req = {0};
×
UNCOV
527
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
×
528
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
×
529
    return TSDB_CODE_INVALID_MSG;
×
530
  }
531

UNCOV
532
  int64_t      consumerId = req.consumerId;
×
UNCOV
533
  STqOffsetVal reqOffset = req.reqOffset;
×
UNCOV
534
  int32_t      vgId = TD_VID(pTq->pVnode);
×
535

536
  // 1. find handle
UNCOV
537
  taosRLockLatch(&pTq->lock);
×
UNCOV
538
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
×
UNCOV
539
  if (pHandle == NULL) {
×
540
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s not found", consumerId, vgId, req.subKey);
×
541
    taosRUnLockLatch(&pTq->lock);
×
542
    return TSDB_CODE_INVALID_MSG;
×
543
  }
544

545
  // 2. check rebalance status
UNCOV
546
  if (pHandle->consumerId != consumerId) {
×
547
    tqDebug("ERROR consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
×
548
            consumerId, vgId, req.subKey, pHandle->consumerId);
549
    taosRUnLockLatch(&pTq->lock);
×
550
    return TSDB_CODE_TMQ_CONSUMER_MISMATCH;
×
551
  }
552

UNCOV
553
  int64_t sver = 0, ever = 0;
×
UNCOV
554
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
×
UNCOV
555
  taosRUnLockLatch(&pTq->lock);
×
556

UNCOV
557
  SMqDataRsp dataRsp = {0};
×
UNCOV
558
  code = tqInitDataRsp(&dataRsp, req.reqOffset);
×
UNCOV
559
  if (code != 0) {
×
560
    return code;
×
561
  }
562

UNCOV
563
  if (req.useSnapshot == true) {
×
564
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s snapshot not support wal info", consumerId, vgId, req.subKey);
×
565
    code = TSDB_CODE_INVALID_PARA;
×
566
    goto END;
×
567
  }
568

UNCOV
569
  dataRsp.rspOffset.type = TMQ_OFFSET__LOG;
×
570

UNCOV
571
  if (reqOffset.type == TMQ_OFFSET__LOG) {
×
UNCOV
572
    dataRsp.rspOffset.version = reqOffset.version;
×
UNCOV
573
  } else if (reqOffset.type < 0) {
×
UNCOV
574
    STqOffset* pOffset = NULL;
×
UNCOV
575
    code = tqMetaGetOffset(pTq, req.subKey, &pOffset);
×
UNCOV
576
    if (code == 0) {
×
UNCOV
577
      if (pOffset->val.type != TMQ_OFFSET__LOG) {
×
578
        tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s, no valid wal info", consumerId, vgId, req.subKey);
×
579
        code = TSDB_CODE_INVALID_PARA;
×
580
        goto END;
×
581
      }
582

UNCOV
583
      dataRsp.rspOffset.version = pOffset->val.version;
×
UNCOV
584
      tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from store:%" PRId64, consumerId, vgId,
×
585
             req.subKey, dataRsp.rspOffset.version);
586
    } else {
UNCOV
587
      if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEST) {
×
UNCOV
588
        dataRsp.rspOffset.version = sver;  // not consume yet, set the earliest position
×
UNCOV
589
      } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
×
UNCOV
590
        dataRsp.rspOffset.version = ever;
×
591
      }
UNCOV
592
      tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from init:%" PRId64, consumerId, vgId, req.subKey,
×
593
             dataRsp.rspOffset.version);
594
    }
595
  } else {
596
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s invalid offset type:%d", consumerId, vgId, req.subKey,
×
597
            reqOffset.type);
598
    code = TSDB_CODE_INVALID_PARA;
×
599
    goto END;
×
600
  }
601

UNCOV
602
  code = tqDoSendDataRsp(&pMsg->info, &dataRsp, req.epoch, req.consumerId, TMQ_MSG_TYPE__WALINFO_RSP, sver, ever);
×
603

UNCOV
604
END:
×
UNCOV
605
  tDeleteMqDataRsp(&dataRsp);
×
UNCOV
606
  return code;
×
607
}
608

UNCOV
609
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
×
UNCOV
610
  if (pTq == NULL || msg == NULL) {
×
611
    return TSDB_CODE_INVALID_PARA;
×
612
  }
UNCOV
613
  SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg;
×
UNCOV
614
  int32_t        vgId = TD_VID(pTq->pVnode);
×
615

UNCOV
616
  tqInfo("vgId:%d, tq process delete sub req %s", vgId, pReq->subKey);
×
UNCOV
617
  int32_t code = 0;
×
618

UNCOV
619
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
×
UNCOV
620
  if (pHandle) {
×
621
    while (1) {
×
UNCOV
622
      taosWLockLatch(&pTq->lock);
×
UNCOV
623
      bool exec = tqIsHandleExec(pHandle);
×
624

UNCOV
625
      if (exec) {
×
626
        tqInfo("vgId:%d, topic:%s, subscription is executing, delete wait for 10ms and retry, pHandle:%p", vgId,
×
627
               pHandle->subKey, pHandle);
628
        taosWUnLockLatch(&pTq->lock);
×
629
        taosMsleep(10);
×
630
        continue;
×
631
      }
UNCOV
632
      tqUnregisterPushHandle(pTq, pHandle);
×
UNCOV
633
      code = taosHashRemove(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
×
UNCOV
634
      if (code != 0) {
×
635
        tqError("cannot process tq delete req %s, since no such handle", pReq->subKey);
×
636
      }
UNCOV
637
      taosWUnLockLatch(&pTq->lock);
×
UNCOV
638
      break;
×
639
    }
640
  }
641

UNCOV
642
  taosWLockLatch(&pTq->lock);
×
UNCOV
643
  if (taosHashRemove(pTq->pOffset, pReq->subKey, strlen(pReq->subKey)) != 0) {
×
UNCOV
644
    tqError("cannot process tq delete req %s, since no such offset in hash", pReq->subKey);
×
645
  }
UNCOV
646
  if (tqMetaDeleteInfo(pTq, pTq->pOffsetStore, pReq->subKey, strlen(pReq->subKey)) != 0) {
×
UNCOV
647
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
×
648
  }
649

UNCOV
650
  if (tqMetaDeleteInfo(pTq, pTq->pExecStore, pReq->subKey, strlen(pReq->subKey)) < 0) {
×
651
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
×
652
  }
UNCOV
653
  taosWUnLockLatch(&pTq->lock);
×
654

UNCOV
655
  return 0;
×
656
}
657

UNCOV
658
int32_t tqProcessAddCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
×
UNCOV
659
  if (pTq == NULL || msg == NULL) {
×
660
    return TSDB_CODE_INVALID_PARA;
×
661
  }
UNCOV
662
  STqCheckInfo info = {0};
×
UNCOV
663
  int32_t      code = tqMetaDecodeCheckInfo(&info, msg, msgLen >= 0 ? msgLen : 0);
×
UNCOV
664
  if (code != 0) {
×
665
    return code;
×
666
  }
667

UNCOV
668
  code = taosHashPut(pTq->pCheckInfo, info.topic, strlen(info.topic), &info, sizeof(STqCheckInfo));
×
UNCOV
669
  if (code != 0) {
×
670
    tDeleteSTqCheckInfo(&info);
×
671
    return code;
×
672
  }
673

UNCOV
674
  return tqMetaSaveInfo(pTq, pTq->pCheckStore, info.topic, strlen(info.topic), msg, msgLen >= 0 ? msgLen : 0);
×
675
}
676

UNCOV
677
int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
×
UNCOV
678
  if (pTq == NULL || msg == NULL) {
×
679
    return TSDB_CODE_INVALID_PARA;
×
680
  }
UNCOV
681
  if (taosHashRemove(pTq->pCheckInfo, msg, strlen(msg)) < 0) {
×
UNCOV
682
    return TSDB_CODE_TSC_INTERNAL_ERROR;
×
683
  }
UNCOV
684
  return tqMetaDeleteInfo(pTq, pTq->pCheckStore, msg, strlen(msg));
×
685
}
686

UNCOV
687
int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
×
UNCOV
688
  if (pTq == NULL || msg == NULL) {
×
689
    return TSDB_CODE_INVALID_PARA;
×
690
  }
UNCOV
691
  int         ret = 0;
×
UNCOV
692
  SMqRebVgReq req = {0};
×
UNCOV
693
  SDecoder    dc = {0};
×
694

UNCOV
695
  tDecoderInit(&dc, (uint8_t*)msg, msgLen);
×
UNCOV
696
  ret = tDecodeSMqRebVgReq(&dc, &req);
×
UNCOV
697
  if (ret < 0) {
×
698
    goto end;
×
699
  }
700

UNCOV
701
  tqInfo("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey,
×
702
         req.oldConsumerId, req.newConsumerId);
703

UNCOV
704
  taosRLockLatch(&pTq->lock);
×
UNCOV
705
  STqHandle* pHandle = NULL;
×
UNCOV
706
  int32_t code = tqMetaGetHandle(pTq, req.subKey, &pHandle);
×
UNCOV
707
  if (code != 0){
×
UNCOV
708
    tqInfo("vgId:%d, tq process sub req:%s, no such handle, create new one", pTq->pVnode->config.vgId, req.subKey);
×
709
  }
UNCOV
710
  taosRUnLockLatch(&pTq->lock);
×
UNCOV
711
  if (pHandle == NULL) {
×
UNCOV
712
    if (req.oldConsumerId != -1) {
×
UNCOV
713
      tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId:0x%" PRIx64,
×
714
              req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId);
715
    }
UNCOV
716
    if (req.newConsumerId == -1) {
×
UNCOV
717
      tqError("vgId:%d, tq invalid rebalance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
×
UNCOV
718
      ret = TSDB_CODE_INVALID_PARA;
×
UNCOV
719
      goto end;
×
720
    }
UNCOV
721
    STqHandle handle = {0};
×
UNCOV
722
    ret = tqMetaCreateHandle(pTq, &req, &handle);
×
UNCOV
723
    if (ret < 0) {
×
724
      tqDestroyTqHandle(&handle);
×
725
      goto end;
×
726
    }
UNCOV
727
    taosWLockLatch(&pTq->lock);
×
UNCOV
728
    ret = tqMetaSaveHandle(pTq, req.subKey, &handle);
×
UNCOV
729
    taosWUnLockLatch(&pTq->lock);
×
730
  } else {
UNCOV
731
    while (1) {
×
UNCOV
732
      taosWLockLatch(&pTq->lock);
×
UNCOV
733
      bool exec = tqIsHandleExec(pHandle);
×
UNCOV
734
      if (exec) {
×
UNCOV
735
        tqInfo("vgId:%d, topic:%s, subscription is executing, sub wait for 10ms and retry, pHandle:%p",
×
736
               pTq->pVnode->config.vgId, pHandle->subKey, pHandle);
UNCOV
737
        taosWUnLockLatch(&pTq->lock);
×
UNCOV
738
        taosMsleep(10);
×
UNCOV
739
        continue;
×
740
      }
UNCOV
741
      if (pHandle->consumerId == req.newConsumerId) {  // do nothing
×
UNCOV
742
        tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId);
×
743
      } else {
UNCOV
744
        tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId,
×
745
               req.newConsumerId);
746

UNCOV
747
        atomic_store_64(&pHandle->consumerId, req.newConsumerId);
×
UNCOV
748
        atomic_store_32(&pHandle->epoch, 0);
×
UNCOV
749
        tqUnregisterPushHandle(pTq, pHandle);
×
UNCOV
750
        ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
×
751
      }
UNCOV
752
      taosWUnLockLatch(&pTq->lock);
×
UNCOV
753
      break;
×
754
    }
755
  }
756

UNCOV
757
end:
×
UNCOV
758
  tDecoderClear(&dc);
×
UNCOV
759
  return ret;
×
760
}
761

UNCOV
762
static void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); }
×
763

UNCOV
764
int32_t tqBuildStreamTask(void* pTqObj, SStreamTask* pTask, int64_t nextProcessVer) {
×
UNCOV
765
  STQ*             pTq = (STQ*)pTqObj;
×
UNCOV
766
  int32_t          vgId = TD_VID(pTq->pVnode);
×
UNCOV
767
  SCheckpointInfo* pChkInfo = NULL;
×
768

UNCOV
769
  tqDebug("s-task:0x%x start to build task", pTask->id.taskId);
×
770

UNCOV
771
  int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, nextProcessVer);
×
UNCOV
772
  if (code != TSDB_CODE_SUCCESS) {
×
773
    return code;
×
774
  }
775

UNCOV
776
  pTask->pBackend = NULL;
×
777

778
  // sink
UNCOV
779
  STaskOutputInfo* pOutputInfo = &pTask->outputInfo;
×
UNCOV
780
  if (pOutputInfo->type == TASK_OUTPUT__SMA) {
×
UNCOV
781
    pOutputInfo->smaSink.vnode = pTq->pVnode;
×
UNCOV
782
    pOutputInfo->smaSink.smaSink = smaHandleRes;
×
UNCOV
783
  } else if (pOutputInfo->type == TASK_OUTPUT__TABLE) {
×
UNCOV
784
    pOutputInfo->tbSink.vnode = pTq->pVnode;
×
UNCOV
785
    pOutputInfo->tbSink.tbSinkFunc = tqSinkDataIntoDstTable;
×
786

UNCOV
787
    int32_t   ver1 = 1;
×
UNCOV
788
    SMetaInfo info = {0};
×
UNCOV
789
    code = metaGetInfo(pTq->pVnode->pMeta, pOutputInfo->tbSink.stbUid, &info, NULL);
×
UNCOV
790
    if (code == TSDB_CODE_SUCCESS) {
×
UNCOV
791
      ver1 = info.skmVer;
×
792
    }
793

UNCOV
794
    SSchemaWrapper* pschemaWrapper = pOutputInfo->tbSink.pSchemaWrapper;
×
UNCOV
795
    pOutputInfo->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1);
×
UNCOV
796
    if (pOutputInfo->tbSink.pTSchema == NULL) {
×
797
      return terrno;
×
798
    }
799

UNCOV
800
    pOutputInfo->tbSink.pTbInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
×
UNCOV
801
    if (pOutputInfo->tbSink.pTbInfo == NULL) {
×
802
      tqError("vgId:%d failed init sink tableInfo, code:%s", vgId, tstrerror(terrno));
×
803
      return terrno;
×
804
    }
805

UNCOV
806
    tSimpleHashSetFreeFp(pOutputInfo->tbSink.pTbInfo, freePtr);
×
807
  }
808

UNCOV
809
  if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
×
UNCOV
810
    bool scanDropCtb = pTask->subtableWithoutMd5 ? true : false;
×
UNCOV
811
    SWalFilterCond cond = {.deleteMsg = 1, .scanDropCtb = scanDropCtb};  // delete msg also extract from wal files
×
UNCOV
812
    pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond, pTask->id.taskId);
×
UNCOV
813
    if (pTask->exec.pWalReader == NULL) {
×
814
      tqError("vgId:%d failed init wal reader, code:%s", vgId, tstrerror(terrno));
×
815
      return terrno;
×
816
    }
817
  }
818

UNCOV
819
  streamTaskResetUpstreamStageInfo(pTask);
×
820

UNCOV
821
  pChkInfo = &pTask->chkInfo;
×
UNCOV
822
  tqSetRestoreVersionInfo(pTask);
×
823

UNCOV
824
  char*       p = streamTaskGetStatus(pTask).name;
×
UNCOV
825
  const char* pNext = streamTaskGetStatusStr(pTask->status.taskStatus);
×
826

UNCOV
827
  if (pTask->info.fillHistory) {
×
UNCOV
828
    tqInfo("vgId:%d build stream task, s-task:%s, %p checkpointId:%" PRId64 " checkpointVer:%" PRId64
×
829
           " nextProcessVer:%" PRId64
830
           " child id:%d, level:%d, cur-status:%s, next-status:%s fill-history:%d, related stream task:0x%x "
831
           "delaySched:%" PRId64 " ms, inputVer:%" PRId64,
832
           vgId, pTask->id.idStr, pTask, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer,
833
           pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory,
834
           (int32_t)pTask->streamTaskId.taskId, pTask->info.delaySchedParam, nextProcessVer);
835
  } else {
UNCOV
836
    tqInfo("vgId:%d build stream task, s-task:%s, %p checkpointId:%" PRId64 " checkpointVer:%" PRId64
×
837
           " nextProcessVer:%" PRId64
838
           " child id:%d, level:%d, cur-status:%s next-status:%s fill-history:%d, related fill-task:0x%x "
839
           "delaySched:%" PRId64 " ms, inputVer:%" PRId64,
840
           vgId, pTask->id.idStr, pTask, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer,
841
           pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory,
842
           (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.delaySchedParam, nextProcessVer);
843

UNCOV
844
    if (pChkInfo->checkpointVer > pChkInfo->nextProcessVer) {
×
845
      tqError("vgId:%d build stream task, s-task:%s, checkpointVer:%" PRId64 " > nextProcessVer:%" PRId64, vgId,
×
846
              pTask->id.idStr, pChkInfo->checkpointVer, pChkInfo->nextProcessVer);
847
      return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
848
    }
849
  }
850

UNCOV
851
  return 0;
×
852
}
853

UNCOV
854
int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { return tqStreamTaskProcessCheckReq(pTq->pStreamMeta, pMsg); }
×
855

UNCOV
856
int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
857
  return tqStreamTaskProcessCheckRsp(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode));
×
858
}
859

UNCOV
860
int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
×
UNCOV
861
  return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, sversion, msg, msgLen,
×
UNCOV
862
                                      vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored);
×
863
}
864

UNCOV
865
static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask, STQ* pTq) {
×
UNCOV
866
  const char*    id = pTask->id.idStr;
×
UNCOV
867
  int64_t        nextProcessedVer = pStreamTask->hTaskInfo.haltVer;
×
UNCOV
868
  SVersionRange* pStep2Range = &pTask->step2Range;
×
UNCOV
869
  int32_t        vgId = pTask->pMeta->vgId;
×
870

871
  // if it's an source task, extract the last version in wal.
UNCOV
872
  bool done = streamHistoryTaskSetVerRangeStep2(pTask, nextProcessedVer);
×
UNCOV
873
  pTask->execInfo.step2Start = taosGetTimestampMs();
×
874

UNCOV
875
  if (done) {
×
UNCOV
876
    qDebug("s-task:%s scan wal(step 2) verRange:%" PRId64 "-%" PRId64 " ended, elapsed time:%.2fs", id,
×
877
           pStep2Range->minVer, pStep2Range->maxVer, 0.0);
UNCOV
878
    int32_t code = streamTaskPutTranstateIntoInputQ(pTask);  // todo: msg lost.
×
UNCOV
879
    if (code) {
×
880
      qError("s-task:%s failed put trans-state into inputQ, code:%s", id, tstrerror(code));
×
881
    }
UNCOV
882
    (void)streamExecTask(pTask);  // exec directly
×
883
  } else {
UNCOV
884
    STimeWindow* pWindow = &pTask->dataRange.window;
×
UNCOV
885
    tqDebug("s-task:%s level:%d verRange:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64
×
886
            ", do secondary scan-history from WAL after halt the related stream task:%s",
887
            id, pTask->info.taskLevel, pStep2Range->minVer, pStep2Range->maxVer, pWindow->skey, pWindow->ekey,
888
            pStreamTask->id.idStr);
UNCOV
889
    if (pTask->status.schedStatus != TASK_SCHED_STATUS__WAITING) {
×
890
      tqError("s-task:%s level:%d unexpected sched-status:%d", id, pTask->info.taskLevel, pTask->status.schedStatus);
×
891
    }
892

UNCOV
893
    int32_t code = streamSetParamForStreamScannerStep2(pTask, pStep2Range, pWindow);
×
UNCOV
894
    if (code) {
×
895
      tqError("s-task:%s level:%d failed to set step2 param", id, pTask->info.taskLevel);
×
896
    }
897

UNCOV
898
    int64_t dstVer = pStep2Range->minVer;
×
UNCOV
899
    pTask->chkInfo.nextProcessVer = dstVer;
×
900

UNCOV
901
    walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer);
×
UNCOV
902
    tqDebug("s-task:%s wal reader start scan WAL verRange:%" PRId64 "-%" PRId64 ", set sched-status:%d", id, dstVer,
×
903
            pStep2Range->maxVer, TASK_SCHED_STATUS__INACTIVE);
904

UNCOV
905
    int8_t status = streamTaskSetSchedStatusInactive(pTask);
×
906

907
    // now the fill-history task starts to scan data from wal files.
UNCOV
908
    code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE);
×
UNCOV
909
    if (code == TSDB_CODE_SUCCESS) {
×
UNCOV
910
      code = tqScanWalAsync(pTq, false);
×
UNCOV
911
      if (code) {
×
912
        tqError("vgId:%d failed to start scan wal file, code:%s", vgId, tstrerror(code));
×
913
      }
914
    }
915
  }
UNCOV
916
}
×
917

UNCOV
918
int32_t handleStep2Async(SStreamTask* pStreamTask, void* param) {
×
UNCOV
919
  STQ* pTq = param;
×
920

UNCOV
921
  SStreamMeta* pMeta = pStreamTask->pMeta;
×
UNCOV
922
  STaskId      hId = pStreamTask->hTaskInfo.id;
×
UNCOV
923
  SStreamTask* pTask = NULL;
×
UNCOV
924
  int32_t      code = streamMetaAcquireTask(pStreamTask->pMeta, hId.streamId, hId.taskId, &pTask);
×
UNCOV
925
  if (pTask == NULL) {
×
926
    tqWarn("s-task:0x%x failed to acquired it to exec step 2, scan wal quit", (int32_t)hId.taskId);
×
927
    return TSDB_CODE_SUCCESS;
×
928
  }
929

UNCOV
930
  doStartFillhistoryStep2(pTask, pStreamTask, pTq);
×
931

UNCOV
932
  streamMetaReleaseTask(pMeta, pTask);
×
UNCOV
933
  return TSDB_CODE_SUCCESS;
×
934
}
935

936
// this function should be executed by only one thread, so we set an sentinel to protect this function
UNCOV
937
int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
938
  SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont;
×
UNCOV
939
  SStreamMeta*           pMeta = pTq->pStreamMeta;
×
UNCOV
940
  int32_t                code = TSDB_CODE_SUCCESS;
×
UNCOV
941
  SStreamTask*           pTask = NULL;
×
UNCOV
942
  SStreamTask*           pStreamTask = NULL;
×
943

UNCOV
944
  code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId, &pTask);
×
UNCOV
945
  if (pTask == NULL) {
×
946
    tqError("vgId:%d failed to acquire stream task:0x%x during scan history data, task may have been destroyed",
×
947
            pMeta->vgId, pReq->taskId);
948
    return code;
×
949
  }
950

951
  // do recovery step1
UNCOV
952
  const char* id = pTask->id.idStr;
×
UNCOV
953
  char*       pStatus = streamTaskGetStatus(pTask).name;
×
954

955
  // avoid multi-thread exec
956
  while (1) {
×
UNCOV
957
    int32_t sentinel = atomic_val_compare_exchange_32(&pTask->status.inScanHistorySentinel, 0, 1);
×
UNCOV
958
    if (sentinel != 0) {
×
959
      tqDebug("s-task:%s already in scan-history func, wait for 100ms, and try again", id);
×
960
      taosMsleep(100);
×
961
    } else {
UNCOV
962
      break;
×
963
    }
964
  }
965

966
  // let's decide which step should be executed now
UNCOV
967
  if (pTask->execInfo.step1Start == 0) {
×
UNCOV
968
    int64_t ts = taosGetTimestampMs();
×
UNCOV
969
    pTask->execInfo.step1Start = ts;
×
UNCOV
970
    tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts);
×
971
  } else {
UNCOV
972
    if (pTask->execInfo.step2Start == 0) {
×
UNCOV
973
      tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64 ", already elapsed:%.2fs",
×
974
              id, pTask->execInfo.step1Start, pTask->execInfo.step1El);
975
    } else {
UNCOV
976
      tqDebug("s-task:%s already in step2, no need to scan-history data, step2 startTs:%" PRId64, id,
×
977
              pTask->execInfo.step2Start);
978

UNCOV
979
      atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
×
UNCOV
980
      streamMetaReleaseTask(pMeta, pTask);
×
UNCOV
981
      return 0;
×
982
    }
983
  }
984

985
  // we have to continue retrying to successfully execute the scan history task.
UNCOV
986
  if (!streamTaskSetSchedStatusWait(pTask)) {
×
987
    tqError(
×
988
        "s-task:%s failed to start scan-history in first stream time window since already started, unexpected "
989
        "sched-status:%d",
990
        id, pTask->status.schedStatus);
991
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
×
992
    streamMetaReleaseTask(pMeta, pTask);
×
993
    return 0;
×
994
  }
995

UNCOV
996
  int64_t              st = taosGetTimestampMs();
×
UNCOV
997
  SScanhistoryDataInfo retInfo = streamScanHistoryData(pTask, st);
×
998

UNCOV
999
  double el = (taosGetTimestampMs() - st) / 1000.0;
×
UNCOV
1000
  pTask->execInfo.step1El += el;
×
1001

UNCOV
1002
  if (retInfo.ret == TASK_SCANHISTORY_QUIT || retInfo.ret == TASK_SCANHISTORY_REXEC) {
×
UNCOV
1003
    int8_t status = streamTaskSetSchedStatusInactive(pTask);
×
UNCOV
1004
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
×
1005

UNCOV
1006
    if (retInfo.ret == TASK_SCANHISTORY_REXEC) {
×
UNCOV
1007
      streamExecScanHistoryInFuture(pTask, retInfo.idleTime);
×
1008
    } else {
UNCOV
1009
      SStreamTaskState p = streamTaskGetStatus(pTask);
×
UNCOV
1010
      ETaskStatus      s = p.state;
×
1011

UNCOV
1012
      if (s == TASK_STATUS__PAUSE) {
×
1013
        tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs total:%.2fs, sched-status:%d", id, el,
×
1014
                pTask->execInfo.step1El, status);
UNCOV
1015
      } else if (s == TASK_STATUS__STOP || s == TASK_STATUS__DROPPING) {
×
UNCOV
1016
        tqDebug("s-task:%s status:%p not continue scan-history data, total elapsed time:%.2fs quit", id, p.name,
×
1017
                pTask->execInfo.step1El);
1018
      }
1019
    }
1020

UNCOV
1021
    streamMetaReleaseTask(pMeta, pTask);
×
UNCOV
1022
    return 0;
×
1023
  }
1024

1025
  // the following procedure should be executed, no matter status is stop/pause or not
UNCOV
1026
  tqDebug("s-task:%s scan-history(step 1) ended, elapsed time:%.2fs", id, pTask->execInfo.step1El);
×
1027

UNCOV
1028
  if (pTask->info.fillHistory != 1) {
×
1029
    tqError("s-task:%s fill-history is disabled, unexpected", id);
×
1030
    return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
1031
  }
1032

1033
  // 1. get the related stream task
UNCOV
1034
  code = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId, &pStreamTask);
×
UNCOV
1035
  if (pStreamTask == NULL) {
×
UNCOV
1036
    tqError("failed to find s-task:0x%" PRIx64 ", it may have been destroyed, drop related fill-history task:%s",
×
1037
            pTask->streamTaskId.taskId, pTask->id.idStr);
1038

UNCOV
1039
    tqDebug("s-task:%s fill-history task set status to be dropping", id);
×
UNCOV
1040
    code = streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id, 0);
×
1041

UNCOV
1042
    atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
×
UNCOV
1043
    streamMetaReleaseTask(pMeta, pTask);
×
UNCOV
1044
    return code;
×
1045
  }
1046

UNCOV
1047
  if (pStreamTask->info.taskLevel != TASK_LEVEL__SOURCE) {
×
1048
    tqError("s-task:%s fill-history task related stream task level:%d, unexpected", id, pStreamTask->info.taskLevel);
×
1049
    return TSDB_CODE_STREAM_INTERNAL_ERROR;
×
1050
  }
1051

UNCOV
1052
  code = streamTaskHandleEventAsync(pStreamTask->status.pSM, TASK_EVENT_HALT, handleStep2Async, pTq);
×
UNCOV
1053
  streamMetaReleaseTask(pMeta, pStreamTask);
×
1054

UNCOV
1055
  atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
×
UNCOV
1056
  streamMetaReleaseTask(pMeta, pTask);
×
UNCOV
1057
  return code;
×
1058
}
1059

1060
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
1,003✔
1061
  int32_t  code = 0;
1,003✔
1062
  char*    msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
1,003✔
1063
  int32_t  len = pMsg->contLen - sizeof(SMsgHead);
1,003✔
1064
  SDecoder decoder;
1065

1066
  SStreamTaskRunReq req = {0};
1,003✔
1067
  tDecoderInit(&decoder, (uint8_t*)msg, len);
1,003✔
1068
  if ((code = tDecodeStreamTaskRunReq(&decoder, &req)) < 0) {
1,003!
1069
    tqError("vgId:%d failed to decode task run req, code:%s", pTq->pStreamMeta->vgId, tstrerror(code));
×
1070
    tDecoderClear(&decoder);
×
1071
    return TSDB_CODE_SUCCESS;
×
1072
  }
1073

1074
  tDecoderClear(&decoder);
1,003✔
1075

1076
  // extracted submit data from wal files for all tasks
1077
  if (req.reqType == STREAM_EXEC_T_EXTRACT_WAL_DATA) {
1,003!
UNCOV
1078
    return tqScanWal(pTq);
×
1079
  }
1080

1081
  code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode));
1,003✔
1082
  if (code) {
1,003!
UNCOV
1083
    tqError("vgId:%d failed to create task run req, code:%s", TD_VID(pTq->pVnode), tstrerror(code));
×
UNCOV
1084
    return code;
×
1085
  }
1086

1087
  // let's continue scan data in the wal files
1088
  if (req.reqType >= 0 || req.reqType == STREAM_EXEC_T_RESUME_TASK) {
1,003!
UNCOV
1089
    code = tqScanWalAsync(pTq, false);  // it's ok to failed
×
UNCOV
1090
    if (code) {
×
1091
      tqError("vgId:%d failed to start scan wal file, code:%s", pTq->pStreamMeta->vgId, tstrerror(code));
×
1092
    }
1093
  }
1094

1095
  return code;
1,003✔
1096
}
1097

UNCOV
1098
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
1099
  return tqStreamTaskProcessDispatchReq(pTq->pStreamMeta, pMsg);
×
1100
}
1101

UNCOV
1102
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
1103
  return tqStreamTaskProcessDispatchRsp(pTq->pStreamMeta, pMsg);
×
1104
}
1105

UNCOV
1106
int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) {
×
UNCOV
1107
  return tqStreamTaskProcessDropReq(pTq->pStreamMeta, msg, msgLen);
×
1108
}
1109

UNCOV
1110
int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen) {
×
UNCOV
1111
  return tqStreamTaskProcessUpdateCheckpointReq(pTq->pStreamMeta, pTq->pVnode->restored, msg);
×
1112
}
1113

UNCOV
1114
int32_t tqProcessTaskConsenChkptIdReq(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
1115
  return tqStreamTaskProcessConsenChkptIdReq(pTq->pStreamMeta, pMsg);
×
1116
}
1117

UNCOV
1118
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
×
UNCOV
1119
  return tqStreamTaskProcessTaskPauseReq(pTq->pStreamMeta, msg);
×
1120
}
1121

UNCOV
1122
int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
×
UNCOV
1123
  return tqStreamTaskProcessTaskResumeReq(pTq, sversion, msg, true);
×
1124
}
1125

UNCOV
1126
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
1127
  return tqStreamTaskProcessRetrieveReq(pTq->pStreamMeta, pMsg);
×
1128
}
1129

UNCOV
1130
int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { return 0; }
×
1131

1132
int32_t tqStreamProgressRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
×
1133
  char*               msgStr = pMsg->pCont;
×
1134
  char*               msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
×
1135
  int32_t             msgLen = pMsg->contLen - sizeof(SMsgHead);
×
1136
  int32_t             code = 0;
×
1137
  SStreamProgressReq  req;
1138
  char*               pRspBuf = taosMemoryCalloc(1, sizeof(SMsgHead) + sizeof(SStreamProgressRsp));
×
1139
  SStreamProgressRsp* pRsp = POINTER_SHIFT(pRspBuf, sizeof(SMsgHead));
×
1140
  if (!pRspBuf) {
×
1141
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
1142
    code = -1;
×
1143
    goto _OVER;
×
1144
  }
1145

1146
  code = tDeserializeStreamProgressReq(msgBody, msgLen, &req);
×
1147
  if (code == TSDB_CODE_SUCCESS) {
×
1148
    code = tqGetStreamExecInfo(pTq->pVnode, req.streamId, &pRsp->progressDelay, &pRsp->fillHisFinished);
×
1149
  }
1150
  if (code == TSDB_CODE_SUCCESS) {
×
1151
    pRsp->fetchIdx = req.fetchIdx;
×
1152
    pRsp->subFetchIdx = req.subFetchIdx;
×
1153
    pRsp->vgId = req.vgId;
×
1154
    pRsp->streamId = req.streamId;
×
1155
    code = tSerializeStreamProgressRsp(pRsp, sizeof(SStreamProgressRsp) + sizeof(SMsgHead), pRsp);
×
1156
    if (code) {
×
1157
      goto _OVER;
×
1158
    }
1159

1160
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
×
1161
    rsp.pCont = pRspBuf;
×
1162
    pRspBuf = NULL;
×
1163
    rsp.contLen = sizeof(SMsgHead) + sizeof(SStreamProgressRsp);
×
1164
    tmsgSendRsp(&rsp);
×
1165
  }
1166

1167
_OVER:
×
1168
  if (pRspBuf) {
×
1169
    taosMemoryFree(pRspBuf);
×
1170
  }
1171
  return code;
×
1172
}
1173

1174
// always return success to mnode
1175
//todo: handle failure of build and send msg to mnode
UNCOV
1176
static void doSendChkptSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, int32_t code,
×
1177
                                 int32_t taskId) {
UNCOV
1178
  SRpcMsg rsp = {0};
×
UNCOV
1179
  int32_t ret = streamTaskBuildCheckpointSourceRsp(pReq, pRpcInfo, &rsp, code);
×
UNCOV
1180
  if (ret) {  // suppress the error in build checkpoint source rsp
×
1181
    tqError("s-task:0x%x failed to build checkpoint-source rsp, code:%s", taskId, tstrerror(ret));
×
1182
  }
UNCOV
1183
  tmsgSendRsp(&rsp);  // error occurs
×
UNCOV
1184
}
×
1185

1186
// no matter what kinds of error happened, make sure the mnode will receive the success execution code.
UNCOV
1187
int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) {
×
UNCOV
1188
  int32_t                    vgId = TD_VID(pTq->pVnode);
×
UNCOV
1189
  SStreamMeta*               pMeta = pTq->pStreamMeta;
×
UNCOV
1190
  char*                      msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
×
UNCOV
1191
  int32_t                    len = pMsg->contLen - sizeof(SMsgHead);
×
UNCOV
1192
  int32_t                    code = 0;
×
UNCOV
1193
  SStreamCheckpointSourceReq req = {0};
×
UNCOV
1194
  SDecoder                   decoder = {0};
×
UNCOV
1195
  SStreamTask*               pTask = NULL;
×
UNCOV
1196
  int64_t                    checkpointId = 0;
×
1197

1198
  // disable auto rsp to mnode
UNCOV
1199
  pRsp->info.handle = NULL;
×
1200

UNCOV
1201
  tDecoderInit(&decoder, (uint8_t*)msg, len);
×
UNCOV
1202
  if (tDecodeStreamCheckpointSourceReq(&decoder, &req) < 0) {
×
1203
    code = TSDB_CODE_MSG_DECODE_ERROR;
×
1204
    tDecoderClear(&decoder);
×
1205
    tqError("vgId:%d failed to decode checkpoint-source msg, code:%s", vgId, tstrerror(code));
×
1206
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1207
    return TSDB_CODE_SUCCESS;  // always return success to mnode,
×
1208
  }
1209

UNCOV
1210
  tDecoderClear(&decoder);
×
1211

UNCOV
1212
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
×
UNCOV
1213
    tqDebug("vgId:%d not leader, ignore checkpoint-source msg, s-task:0x%x", vgId, req.taskId);
×
UNCOV
1214
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
UNCOV
1215
    return TSDB_CODE_SUCCESS;  // always return success to mnode
×
1216
  }
1217

UNCOV
1218
  if (!pTq->pVnode->restored) {
×
UNCOV
1219
    tqDebug("vgId:%d checkpoint-source msg received during restoring, checkpointId:%" PRId64
×
1220
            ", transId:%d s-task:0x%x ignore it",
1221
            vgId, req.checkpointId, req.transId, req.taskId);
UNCOV
1222
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
UNCOV
1223
    return TSDB_CODE_SUCCESS;  // always return success to mnode
×
1224
  }
1225

UNCOV
1226
  code = streamMetaAcquireTask(pMeta, req.streamId, req.taskId, &pTask);
×
UNCOV
1227
  if (pTask == NULL || code != 0) {
×
UNCOV
1228
    tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. checkpointId:%" PRId64
×
1229
            " transId:%d it may have been destroyed",
1230
            vgId, req.taskId, req.checkpointId, req.transId);
UNCOV
1231
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1232
    return TSDB_CODE_SUCCESS;
×
1233
  }
1234

UNCOV
1235
  if (pTask->status.downstreamReady != 1) {
×
1236
    // record the latest failed checkpoint id
1237
    streamTaskSetFailedChkptInfo(pTask, req.transId, req.checkpointId);
×
1238
    tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpointId:%" PRId64
×
1239
            ", transId:%d set it failed",
1240
            pTask->id.idStr, req.checkpointId, req.transId);
1241

1242
    streamMetaReleaseTask(pMeta, pTask);
×
1243
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1244
    return TSDB_CODE_SUCCESS;  // todo retry handle error
×
1245
  }
1246

1247
  // todo save the checkpoint failed info
UNCOV
1248
  streamMutexLock(&pTask->lock);
×
UNCOV
1249
  ETaskStatus status = streamTaskGetStatus(pTask).state;
×
1250

UNCOV
1251
  if (req.mndTrigger == 1) {
×
UNCOV
1252
    if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) {
×
1253
      tqError("s-task:%s not ready for checkpoint, since it is halt, ignore checkpointId:%" PRId64 ", set it failure",
×
1254
              pTask->id.idStr, req.checkpointId);
1255

1256
      streamMutexUnlock(&pTask->lock);
×
1257
      streamMetaReleaseTask(pMeta, pTask);
×
1258
      doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1259
      return TSDB_CODE_SUCCESS;
×
1260
    }
1261
  } else {
UNCOV
1262
    if (status != TASK_STATUS__HALT) {
×
1263
      tqError("s-task:%s should in halt status, let's halt it directly", pTask->id.idStr);
×
1264
      //      streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_HALT);
1265
    }
1266
  }
1267

1268
  // check if the checkpoint msg already sent or not.
UNCOV
1269
  if (status == TASK_STATUS__CK) {
×
1270
    streamTaskGetActiveCheckpointInfo(pTask, NULL, &checkpointId);
×
1271

1272
    tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64
×
1273
           " transId:%d already handled, ignore msg and continue process checkpoint",
1274
           pTask->id.idStr, checkpointId, req.transId);
1275

1276
    streamMutexUnlock(&pTask->lock);
×
1277
    streamMetaReleaseTask(pMeta, pTask);
×
1278
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SYN_PROPOSE_NOT_READY, req.taskId);
×
1279
    return TSDB_CODE_SUCCESS;
×
1280
  } else {  // checkpoint already finished, and not in checkpoint status
UNCOV
1281
    if (req.checkpointId <= pTask->chkInfo.checkpointId) {
×
1282
      tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64
×
1283
             " transId:%d already handled, return success",
1284
             pTask->id.idStr, req.checkpointId, req.transId);
1285

1286
      streamMutexUnlock(&pTask->lock);
×
1287
      streamMetaReleaseTask(pMeta, pTask);
×
1288
      doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1289
      return TSDB_CODE_SUCCESS;
×
1290
    }
1291
  }
1292

UNCOV
1293
  code = streamProcessCheckpointSourceReq(pTask, &req);
×
UNCOV
1294
  streamMutexUnlock(&pTask->lock);
×
1295

UNCOV
1296
  if (code) {
×
1297
    qError("s-task:%s (vgId:%d) failed to process checkpoint-source req, code:%s", pTask->id.idStr, vgId,
×
1298
           tstrerror(code));
1299
    streamMetaReleaseTask(pMeta, pTask);
×
1300
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1301
    return TSDB_CODE_SUCCESS;
×
1302
  }
1303

UNCOV
1304
  if (req.mndTrigger) {
×
UNCOV
1305
    tqInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64 ", transId:%d, ",
×
1306
           pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, req.transId);
1307
  } else {
UNCOV
1308
    const char* pPrevStatus = streamTaskGetStatusStr(streamTaskGetPrevStatus(pTask));
×
UNCOV
1309
    tqInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64
×
1310
           ", transId:%d after transfer-state, prev status:%s",
1311
           pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, req.transId, pPrevStatus);
1312
  }
1313

UNCOV
1314
  code = streamAddCheckpointSourceRspMsg(&req, &pMsg->info, pTask);
×
UNCOV
1315
  if (code != TSDB_CODE_SUCCESS) {
×
1316
    streamTaskSetCheckpointFailed(pTask);  // set the checkpoint failed
×
1317
    doSendChkptSourceRsp(&req, &pMsg->info, TSDB_CODE_SUCCESS, req.taskId);
×
1318
  }
1319

UNCOV
1320
  streamMetaReleaseTask(pMeta, pTask);
×
UNCOV
1321
  return TSDB_CODE_SUCCESS;
×
1322
}
1323

1324
// downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task
UNCOV
1325
int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
1326
  int32_t vgId = TD_VID(pTq->pVnode);
×
1327

UNCOV
1328
  SStreamCheckpointReadyMsg* pReq = (SStreamCheckpointReadyMsg*)pMsg->pCont;
×
UNCOV
1329
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
×
1330
    tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from 0x%x", vgId,
×
1331
            (int32_t)pReq->downstreamTaskId);
1332
    return TSDB_CODE_STREAM_NOT_LEADER;
×
1333
  }
1334

UNCOV
1335
  return tqStreamTaskProcessCheckpointReadyMsg(pTq->pStreamMeta, pMsg);
×
1336
}
1337

UNCOV
1338
int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
1339
  return tqStreamTaskProcessUpdateReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, pMsg, pTq->pVnode->restored);
×
1340
}
1341

1342
int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) {
×
1343
  return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg->pCont);
×
1344
}
1345

1346
int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg) {
×
1347
  int32_t vgId = TD_VID(pTq->pVnode);
×
1348

1349
  if (!vnodeIsRoleLeader(pTq->pVnode)) {
×
1350
    SRetrieveChkptTriggerReq req = {0};
×
1351

1352
    char*    msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
×
1353
    int32_t  len = pMsg->contLen - sizeof(SMsgHead);
×
1354
    SDecoder decoder = {0};
×
1355

1356
    tDecoderInit(&decoder, (uint8_t*)msg, len);
×
1357
    if (tDecodeRetrieveChkptTriggerReq(&decoder, &req) < 0) {
×
1358
      tDecoderClear(&decoder);
×
1359
      tqError("vgId:%d invalid retrieve checkpoint-trigger req received", vgId);
×
1360
      return TSDB_CODE_INVALID_MSG;
×
1361
    }
1362
    tDecoderClear(&decoder);
×
1363

1364
    tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from s-task:0x%" PRId64, vgId,
×
1365
            req.downstreamTaskId);
1366
    return TSDB_CODE_STREAM_NOT_LEADER;
×
1367
  }
1368

1369
  return tqStreamTaskProcessRetrieveTriggerReq(pTq->pStreamMeta, pMsg);
×
1370
}
1371

1372
int32_t tqProcessTaskRetrieveTriggerRsp(STQ* pTq, SRpcMsg* pMsg) {
×
1373
  return tqStreamTaskProcessRetrieveTriggerRsp(pTq->pStreamMeta, pMsg);
×
1374
}
1375

1376
// this function is needed, do not try to remove it.
UNCOV
1377
int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg) { return tqStreamProcessStreamHbRsp(pTq->pStreamMeta, pMsg); }
×
1378

UNCOV
1379
int32_t tqProcessStreamReqCheckpointRsp(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
1380
  return tqStreamProcessReqCheckpointRsp(pTq->pStreamMeta, pMsg);
×
1381
}
1382

UNCOV
1383
int32_t tqProcessTaskCheckpointReadyRsp(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
1384
  return tqStreamProcessCheckpointReadyRsp(pTq->pStreamMeta, pMsg);
×
1385
}
1386

UNCOV
1387
int32_t tqProcessTaskChkptReportRsp(STQ* pTq, SRpcMsg* pMsg) {
×
UNCOV
1388
  return tqStreamProcessChkptReportRsp(pTq->pStreamMeta, pMsg);
×
1389
}
1390

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc