• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3798

31 Mar 2025 10:39AM UTC coverage: 9.424% (-20.9%) from 30.372%
#3798

push

travis-ci

happyguoxy
test:add test cases

21549 of 307601 branches covered (7.01%)

Branch coverage included in aggregate %.

36084 of 303967 relevant lines covered (11.87%)

58620.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/source/dnode/vnode/src/vnd/vnodeSync.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "sync.h"
18
#include "tq.h"
19
#include "tqCommon.h"
20
#include "tsdb.h"
21
#include "vnd.h"
22

23
#define BATCH_ENABLE 0
24

25
static inline bool vnodeIsMsgWeak(tmsg_t type) { return false; }
×
26

27
static inline void vnodeWaitBlockMsg(SVnode *pVnode, const SRpcMsg *pMsg) {
×
28
  vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, wait block, type:%s sec:%d seq:%" PRId64, pVnode->config.vgId, pMsg,
×
29
          TMSG_INFO(pMsg->msgType), pVnode->blockSec, pVnode->blockSeq);
30
  if (tsem_wait(&pVnode->syncSem) != 0) {
×
31
    vError("vgId:%d, failed to wait sem", pVnode->config.vgId);
×
32
  }
33
}
×
34

35
static inline void vnodePostBlockMsg(SVnode *pVnode, const SRpcMsg *pMsg) {
×
36
  if (vnodeIsMsgBlock(pMsg->msgType)) {
×
37
    (void)taosThreadMutexLock(&pVnode->lock);
×
38
    if (pVnode->blocked) {
×
39
      vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, post block, type:%s sec:%d seq:%" PRId64, pVnode->config.vgId, pMsg,
×
40
              TMSG_INFO(pMsg->msgType), pVnode->blockSec, pVnode->blockSeq);
41
      pVnode->blocked = false;
×
42
      pVnode->blockSec = 0;
×
43
      pVnode->blockSeq = 0;
×
44
      if (tsem_post(&pVnode->syncSem) != 0) {
×
45
        vError("vgId:%d, failed to post sem", pVnode->config.vgId);
×
46
      }
47
    }
48
    (void)taosThreadMutexUnlock(&pVnode->lock);
×
49
  }
50
}
×
51

52
void vnodeRedirectRpcMsg(SVnode *pVnode, SRpcMsg *pMsg, int32_t code) {
×
53
  SEpSet newEpSet = {0};
×
54
  syncGetRetryEpSet(pVnode->sync, &newEpSet);
×
55

56
  vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, is redirect since not leader, numOfEps:%d inUse:%d",
×
57
          pVnode->config.vgId, pMsg, newEpSet.numOfEps, newEpSet.inUse);
58
  for (int32_t i = 0; i < newEpSet.numOfEps; ++i) {
×
59
    vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, redirect:%d ep:%s:%u", pVnode->config.vgId, pMsg, i,
×
60
            newEpSet.eps[i].fqdn, newEpSet.eps[i].port);
61
  }
62
  pMsg->info.hasEpSet = 1;
×
63

64
  if (code == 0) code = TSDB_CODE_SYN_NOT_LEADER;
×
65

66
  SRpcMsg rsp = {.code = code, .info = pMsg->info, .msgType = pMsg->msgType + 1};
×
67
  int32_t contLen = tSerializeSEpSet(NULL, 0, &newEpSet);
×
68

69
  rsp.pCont = rpcMallocCont(contLen);
×
70
  if (rsp.pCont == NULL) {
×
71
    pMsg->code = TSDB_CODE_OUT_OF_MEMORY;
×
72
  } else {
73
    if (tSerializeSEpSet(rsp.pCont, contLen, &newEpSet) < 0) {
×
74
      vError("vgId:%d, failed to serialize ep set", pVnode->config.vgId);
×
75
    }
76
    rsp.contLen = contLen;
×
77
  }
78

79
  tmsgSendRsp(&rsp);
×
80
}
×
81

82
static void inline vnodeHandleWriteMsg(SVnode *pVnode, SRpcMsg *pMsg) {
×
83
  SRpcMsg rsp = {.code = pMsg->code, .info = pMsg->info};
×
84
  if (vnodeProcessWriteMsg(pVnode, pMsg, pMsg->info.conn.applyIndex, &rsp) < 0) {
×
85
    rsp.code = terrno;
×
86
    vGError(&pMsg->info.traceId, "vgId:%d, msg:%p, failed to apply right now since %s", pVnode->config.vgId, pMsg,
×
87
            terrstr());
88
  }
89
  if (rsp.info.handle != NULL) {
×
90
    tmsgSendRsp(&rsp);
×
91
  } else {
92
    if (rsp.pCont) {
×
93
      rpcFreeCont(rsp.pCont);
×
94
    }
95
  }
96
}
×
97

98
static void vnodeHandleProposeError(SVnode *pVnode, SRpcMsg *pMsg, int32_t code) {
×
99
  if (code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_SYN_RESTORING) {
×
100
    vnodeRedirectRpcMsg(pVnode, pMsg, code);
×
101
  } else if (code == TSDB_CODE_MSG_PREPROCESSED) {
×
102
    SRpcMsg rsp = {.code = TSDB_CODE_SUCCESS, .info = pMsg->info};
×
103
    if (rsp.info.handle != NULL) {
×
104
      tmsgSendRsp(&rsp);
×
105
    }
106
  } else {
107
    vGError(&pMsg->info.traceId, "vgId:%d, msg:%p, failed to propose since %s, code:0x%x", pVnode->config.vgId, pMsg,
×
108
            tstrerror(code), code);
109
    SRpcMsg rsp = {.code = code, .info = pMsg->info};
×
110
    if (rsp.info.handle != NULL) {
×
111
      tmsgSendRsp(&rsp);
×
112
    }
113
  }
114
}
×
115

116
static int32_t inline vnodeProposeMsg(SVnode *pVnode, SRpcMsg *pMsg, bool isWeak) {
×
117
  int64_t seq = 0;
×
118

119
  (void)taosThreadMutexLock(&pVnode->lock);
×
120
  int32_t code = syncPropose(pVnode->sync, pMsg, isWeak, &seq);
×
121
  bool    wait = (code == 0 && vnodeIsMsgBlock(pMsg->msgType));
×
122
  if (wait) {
×
123
    if (pVnode->blocked) {
×
124
      return TSDB_CODE_INTERNAL_ERROR;
×
125
    }
126
    pVnode->blocked = true;
×
127
    pVnode->blockSec = taosGetTimestampSec();
×
128
    pVnode->blockSeq = seq;
×
129
  }
130
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
131

132
  if (code > 0) {
×
133
    vnodeHandleWriteMsg(pVnode, pMsg);
×
134
  } else if (code < 0) {
×
135
    if (terrno != 0) code = terrno;
×
136
    vnodeHandleProposeError(pVnode, pMsg, code);
×
137
  }
138

139
  if (wait) vnodeWaitBlockMsg(pVnode, pMsg);
×
140
  return code;
×
141
}
142

143
void vnodeProposeCommitOnNeed(SVnode *pVnode, bool atExit) {
×
144
  if (!vnodeShouldCommit(pVnode, atExit)) {
×
145
    return;
×
146
  }
147

148
  int32_t   contLen = sizeof(SMsgHead);
×
149
  SMsgHead *pHead = rpcMallocCont(contLen);
×
150
  pHead->contLen = contLen;
×
151
  pHead->vgId = pVnode->config.vgId;
×
152

153
  SRpcMsg rpcMsg = {0};
×
154
  rpcMsg.msgType = TDMT_VND_COMMIT;
×
155
  rpcMsg.contLen = contLen;
×
156
  rpcMsg.pCont = pHead;
×
157
  rpcMsg.info.noResp = 1;
×
158

159
  vInfo("vgId:%d, propose vnode commit", pVnode->config.vgId);
×
160
  bool isWeak = false;
×
161

162
  if (!atExit) {
×
163
    if (vnodeProposeMsg(pVnode, &rpcMsg, isWeak) < 0) {
×
164
      vTrace("vgId:%d, failed to propose vnode commit since %s", pVnode->config.vgId, terrstr());
×
165
    }
166
    rpcFreeCont(rpcMsg.pCont);
×
167
    rpcMsg.pCont = NULL;
×
168
  } else {
169
    int32_t code = 0;
×
170
    if ((code = tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &rpcMsg)) < 0) {
×
171
      vError("vgId:%d, failed to put vnode commit to write_queue since %s", pVnode->config.vgId, tstrerror(code));
×
172
    }
173
  }
174
}
175

176
#if BATCH_ENABLE
177

178
static void inline vnodeProposeBatchMsg(SVnode *pVnode, SRpcMsg **pMsgArr, bool *pIsWeakArr, int32_t *arrSize) {
179
  if (*arrSize <= 0) return;
180
  SRpcMsg *pLastMsg = pMsgArr[*arrSize - 1];
181

182
  (void)taosThreadMutexLock(&pVnode->lock);
183
  int32_t code = syncProposeBatch(pVnode->sync, pMsgArr, pIsWeakArr, *arrSize);
184
  bool    wait = (code == 0 && vnodeIsBlockMsg(pLastMsg->msgType));
185
  if (wait) {
186
    pVnode->blocked = true;
187
  }
188
  (void)taosThreadMutexUnlock(&pVnode->lock);
189

190
  if (code > 0) {
191
    for (int32_t i = 0; i < *arrSize; ++i) {
192
      vnodeHandleWriteMsg(pVnode, pMsgArr[i]);
193
    }
194
  } else if (code < 0) {
195
    if (terrno != 0) code = terrno;
196
    for (int32_t i = 0; i < *arrSize; ++i) {
197
      vnodeHandleProposeError(pVnode, pMsgArr[i], code);
198
    }
199
  }
200

201
  if (wait) vnodeWaitBlockMsg(pVnode, pLastMsg);
202
  pLastMsg = NULL;
203

204
  for (int32_t i = 0; i < *arrSize; ++i) {
205
    SRpcMsg        *pMsg = pMsgArr[i];
206
    vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, is freed, code:0x%x", pVnode->config.vgId, pMsg, code);
207
    rpcFreeCont(pMsg->pCont);
208
    taosFreeQitem(pMsg);
209
  }
210

211
  *arrSize = 0;
212
}
213

214
void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) {
215
  SVnode   *pVnode = pInfo->ahandle;
216
  int32_t   vgId = pVnode->config.vgId;
217
  int32_t   code = 0;
218
  SRpcMsg  *pMsg = NULL;
219
  int32_t   arrayPos = 0;
220
  SRpcMsg **pMsgArr = taosMemoryCalloc(numOfMsgs, sizeof(SRpcMsg *));
221
  bool     *pIsWeakArr = taosMemoryCalloc(numOfMsgs, sizeof(bool));
222
  vTrace("vgId:%d, get %d msgs from vnode-write queue", vgId, numOfMsgs);
223

224
  for (int32_t msg = 0; msg < numOfMsgs; msg++) {
225
    if (taosGetQitem(qall, (void **)&pMsg) == 0) continue;
226
    bool isWeak = vnodeIsMsgWeak(pMsg->msgType);
227
    bool isBlock = vnodeIsMsgBlock(pMsg->msgType);
228

229
    vGDebug(&pMsg->info.traceId, "vgId:%d, msg:%p, get from vnode-write queue, weak:%d block:%d msg:%d:%d pos:%d, handle:%p", vgId, pMsg,
230
            isWeak, isBlock, msg, numOfMsgs, arrayPos, pMsg->info.handle);
231

232
    if (!pVnode->restored) {
233
      vGWarn(&pMsg->info.traceId, "vgId:%d, msg:%p, failed to process since restore not finished, type:%s", vgId, pMsg,
234
             TMSG_INFO(pMsg->msgType));
235
      terrno = TSDB_CODE_SYN_RESTORING;
236
      vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING);
237
      rpcFreeCont(pMsg->pCont);
238
      taosFreeQitem(pMsg);
239
      continue;
240
    }
241

242
    if (pMsgArr == NULL || pIsWeakArr == NULL) {
243
      vGError(&pMsg->info.traceId, "vgId:%d, msg:%p, failed to process since out of memory, type:%s", vgId, pMsg, TMSG_INFO(pMsg->msgType));
244
      terrno = TSDB_CODE_OUT_OF_MEMORY;
245
      vnodeHandleProposeError(pVnode, pMsg, terrno);
246
      rpcFreeCont(pMsg->pCont);
247
      taosFreeQitem(pMsg);
248
      continue;
249
    }
250

251
    bool atExit = false;
252
    vnodeProposeCommitOnNeed(pVnode, atExit);
253

254
    code = vnodePreProcessWriteMsg(pVnode, pMsg);
255
    if (code != 0) {
256
      vGError(&pMsg->info.traceId, "vgId:%d, msg:%p, failed to pre-process since %s", vgId, pMsg, terrstr());
257
      rpcFreeCont(pMsg->pCont);
258
      taosFreeQitem(pMsg);
259
      continue;
260
    }
261

262
    if (isBlock) {
263
      vnodeProposeBatchMsg(pVnode, pMsgArr, pIsWeakArr, &arrayPos);
264
    }
265

266
    pMsgArr[arrayPos] = pMsg;
267
    pIsWeakArr[arrayPos] = isWeak;
268
    arrayPos++;
269

270
    if (isBlock || msg == numOfMsgs - 1) {
271
      vnodeProposeBatchMsg(pVnode, pMsgArr, pIsWeakArr, &arrayPos);
272
    }
273
  }
274

275
  taosMemoryFree(pMsgArr);
276
  taosMemoryFree(pIsWeakArr);
277
}
278

279
#else
280

281
void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) {
×
282
  SVnode  *pVnode = pInfo->ahandle;
×
283
  int32_t  vgId = pVnode->config.vgId;
×
284
  int32_t  code = 0;
×
285
  SRpcMsg *pMsg = NULL;
×
286
  vTrace("vgId:%d, get %d msgs from vnode-write queue", vgId, numOfMsgs);
×
287

288
  for (int32_t msg = 0; msg < numOfMsgs; msg++) {
×
289
    if (taosGetQitem(qall, (void **)&pMsg) == 0) continue;
×
290
    bool isWeak = vnodeIsMsgWeak(pMsg->msgType);
×
291

292
    vGDebug(&pMsg->info.traceId, "vgId:%d, msg:%p, get from vnode-write queue, weak:%d block:%d msg:%d:%d, handle:%p",
×
293
            vgId, pMsg, isWeak, vnodeIsMsgBlock(pMsg->msgType), msg, numOfMsgs, pMsg->info.handle);
294

295
    if (!pVnode->restored) {
×
296
      vGWarn(&pMsg->info.traceId, "vgId:%d, msg:%p, failed to process since restore not finished, type:%s", vgId, pMsg,
×
297
             TMSG_INFO(pMsg->msgType));
298
      vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING);
×
299
      rpcFreeCont(pMsg->pCont);
×
300
      taosFreeQitem(pMsg);
×
301
      continue;
×
302
    }
303

304
    bool atExit = false;
×
305
    vnodeProposeCommitOnNeed(pVnode, atExit);
×
306

307
    code = vnodePreProcessWriteMsg(pVnode, pMsg);
×
308
    if (code != 0) {
×
309
      if (code != TSDB_CODE_MSG_PREPROCESSED) {
×
310
        vGError(&pMsg->info.traceId, "vgId:%d, msg:%p, failed to pre-process since %s", vgId, pMsg, tstrerror(code));
×
311
      }
312
      vnodeHandleProposeError(pVnode, pMsg, code);
×
313
      rpcFreeCont(pMsg->pCont);
×
314
      taosFreeQitem(pMsg);
×
315
      continue;
×
316
    }
317

318
    code = vnodeProposeMsg(pVnode, pMsg, isWeak);
×
319

320
    vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, is freed, code:0x%x", vgId, pMsg, code);
×
321
    rpcFreeCont(pMsg->pCont);
×
322
    taosFreeQitem(pMsg);
×
323
  }
324
}
×
325

326
#endif
327

328
void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) {
×
329
  SVnode  *pVnode = pInfo->ahandle;
×
330
  int32_t  vgId = pVnode->config.vgId;
×
331
  int32_t  code = 0;
×
332
  SRpcMsg *pMsg = NULL;
×
333

334
  for (int32_t i = 0; i < numOfMsgs; ++i) {
×
335
    if (taosGetQitem(qall, (void **)&pMsg) == 0) continue;
×
336

337
    if (vnodeIsMsgBlock(pMsg->msgType)) {
×
338
      vGDebug(&pMsg->info.traceId, "vgId:%d, msg:%p, get from vnode-apply queue, type:%s handle:%p index:%" PRId64
×
339
              ", blocking msg obtained sec:%d seq:%" PRId64,
340
              vgId, pMsg, TMSG_INFO(pMsg->msgType), pMsg->info.handle, pMsg->info.conn.applyIndex, pVnode->blockSec,
341
              pVnode->blockSeq);
342
    } else {
343
      vGDebug(&pMsg->info.traceId, "vgId:%d, msg:%p, get from vnode-apply queue, type:%s handle:%p index:%" PRId64, vgId, pMsg,
×
344
              TMSG_INFO(pMsg->msgType), pMsg->info.handle, pMsg->info.conn.applyIndex);
345
    }
346

347
    SRpcMsg rsp = {.code = pMsg->code, .info = pMsg->info};
×
348
    if (rsp.code == 0) {
×
349
      if (vnodeProcessWriteMsg(pVnode, pMsg, pMsg->info.conn.applyIndex, &rsp) < 0) {
×
350
        rsp.code = terrno;
×
351
        vGError(&pMsg->info.traceId, "vgId:%d, msg:%p, failed to apply since %s, index:%" PRId64, vgId, pMsg, terrstr(),
×
352
                pMsg->info.conn.applyIndex);
353
      }
354
    }
355

356
    vnodePostBlockMsg(pVnode, pMsg);
×
357
    if (rsp.info.handle != NULL) {
×
358
      tmsgSendRsp(&rsp);
×
359
    } else {
360
      if (rsp.pCont) {
×
361
        rpcFreeCont(rsp.pCont);
×
362
      }
363
    }
364

365
    vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, is freed, code:0x%x index:%" PRId64, vgId, pMsg, rsp.code,
×
366
            pMsg->info.conn.applyIndex);
367
    rpcFreeCont(pMsg->pCont);
×
368
    taosFreeQitem(pMsg);
×
369
  }
370
}
×
371

372
int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
×
373
  vGDebug(&pMsg->info.traceId, "vgId:%d, msg:%p, get from vnode-sync queue, type:%s", pVnode->config.vgId, pMsg, TMSG_INFO(pMsg->msgType));
×
374

375
  int32_t code = syncProcessMsg(pVnode->sync, pMsg);
×
376
  if (code != 0) {
×
377
    vGError(&pMsg->info.traceId, "vgId:%d, msg:%p, failed to process since %s, type:%s", pVnode->config.vgId, pMsg, tstrerror(code),
×
378
            TMSG_INFO(pMsg->msgType));
379
  }
380

381
  return code;
×
382
}
383

384
static int32_t vnodeSyncEqCtrlMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) {
×
385
  if (pMsg == NULL || pMsg->pCont == NULL) {
×
386
    return TSDB_CODE_INVALID_PARA;
×
387
  }
388

389
  if (msgcb == NULL || msgcb->putToQueueFp == NULL) {
×
390
    rpcFreeCont(pMsg->pCont);
×
391
    pMsg->pCont = NULL;
×
392
    return TSDB_CODE_INVALID_PARA;
×
393
  }
394

395
  int32_t code = tmsgPutToQueue(msgcb, SYNC_RD_QUEUE, pMsg);
×
396
  if (code != 0) {
×
397
    rpcFreeCont(pMsg->pCont);
×
398
    pMsg->pCont = NULL;
×
399
  }
400
  return code;
×
401
}
402

403
static int32_t vnodeSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) {
×
404
  if (pMsg == NULL || pMsg->pCont == NULL) {
×
405
    return TSDB_CODE_INVALID_PARA;
×
406
  }
407

408
  if (msgcb == NULL || msgcb->putToQueueFp == NULL) {
×
409
    rpcFreeCont(pMsg->pCont);
×
410
    pMsg->pCont = NULL;
×
411
    return TSDB_CODE_INVALID_PARA;
×
412
  }
413

414
  int32_t code = tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg);
×
415
  if (code != 0) {
×
416
    rpcFreeCont(pMsg->pCont);
×
417
    pMsg->pCont = NULL;
×
418
  }
419
  return code;
×
420
}
421

422
static int32_t vnodeSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) {
×
423
  int32_t code = tmsgSendSyncReq(pEpSet, pMsg);
×
424
  if (code != 0) {
×
425
    rpcFreeCont(pMsg->pCont);
×
426
    pMsg->pCont = NULL;
×
427
  }
428
  return code;
×
429
}
430

431
static int32_t vnodeSyncGetSnapshotInfo(const SSyncFSM *pFsm, SSnapshot *pSnapshot) {
×
432
  return vnodeGetSnapshot(pFsm->data, pSnapshot);
×
433
}
434

435
static int32_t vnodeSyncApplyMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, const SFsmCbMeta *pMeta) {
×
436
  SVnode *pVnode = pFsm->data;
×
437
  pMsg->info.conn.applyIndex = pMeta->index;
×
438
  pMsg->info.conn.applyTerm = pMeta->term;
×
439

440
  vGDebug(&pMsg->info.traceId,
×
441
          "vgId:%d, index:%" PRId64 ", execute commit cb, fsm:%p, term:%" PRIu64 ", msg-index:%" PRId64
442
          ", weak:%d, code:%d, state:%d %s, type:%s code:0x%x",
443
          pVnode->config.vgId, pMeta->index, pFsm, pMeta->term, pMsg->info.conn.applyIndex, pMeta->isWeak, pMeta->code,
444
          pMeta->state, syncStr(pMeta->state), TMSG_INFO(pMsg->msgType), pMsg->code);
445

446
  int32_t code = tmsgPutToQueue(&pVnode->msgCb, APPLY_QUEUE, pMsg);
×
447
  if (code < 0) vError("vgId:%d, failed to put into apply_queue since %s", pVnode->config.vgId, tstrerror(code));
×
448
  return code;
×
449
}
450

451
static int32_t vnodeSyncCommitMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta *pMeta) {
×
452
  if (pMsg->code == 0) {
×
453
    return vnodeSyncApplyMsg(pFsm, pMsg, pMeta);
×
454
  }
455

456
  SVnode *pVnode = pFsm->data;
×
457
  vnodePostBlockMsg(pVnode, pMsg);
×
458

459
  SRpcMsg rsp = {
×
460
      .code = pMsg->code,
×
461
      .info = pMsg->info,
462
  };
463

464
  if (rsp.info.handle != NULL) {
×
465
    tmsgSendRsp(&rsp);
×
466
  }
467

468
  vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, is freed, code:0x%x index:%" PRId64, TD_VID(pVnode), pMsg, rsp.code,
×
469
          pMeta->index);
470
  rpcFreeCont(pMsg->pCont);
×
471
  pMsg->pCont = NULL;
×
472
  return 0;
×
473
}
474

475
static int32_t vnodeSyncPreCommitMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta *pMeta) {
×
476
  if (pMeta->isWeak == 1) {
×
477
    return vnodeSyncApplyMsg(pFsm, pMsg, pMeta);
×
478
  }
479
  return 0;
×
480
}
481

482
static SyncIndex vnodeSyncAppliedIndex(const SSyncFSM *pFSM) {
×
483
  SVnode *pVnode = pFSM->data;
×
484
  return atomic_load_64(&pVnode->state.applied);
×
485
}
486

487
static void vnodeSyncRollBackMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta *pMeta) {
×
488
  SVnode *pVnode = pFsm->data;
×
489
  vGDebug(&pMsg->info.traceId,
×
490
          "vgId:%d, rollback-cb is excuted, fsm:%p, index:%" PRId64 ", weak:%d, code:%d, state:%d %s, type:%s",
491
          pVnode->config.vgId, pFsm, pMeta->index, pMeta->isWeak, pMeta->code, pMeta->state, syncStr(pMeta->state),
492
          TMSG_INFO(pMsg->msgType));
493
}
×
494

495
static int32_t vnodeSnapshotStartRead(const SSyncFSM *pFsm, void *pParam, void **ppReader) {
×
496
  SVnode *pVnode = pFsm->data;
×
497
  return vnodeSnapReaderOpen(pVnode, (SSnapshotParam *)pParam, (SVSnapReader **)ppReader);
×
498
}
499

500
static void vnodeSnapshotStopRead(const SSyncFSM *pFsm, void *pReader) {
×
501
  SVnode *pVnode = pFsm->data;
×
502
  vnodeSnapReaderClose(pReader);
×
503
}
×
504

505
static int32_t vnodeSnapshotDoRead(const SSyncFSM *pFsm, void *pReader, void **ppBuf, int32_t *len) {
×
506
  SVnode *pVnode = pFsm->data;
×
507
  return vnodeSnapRead(pReader, (uint8_t **)ppBuf, len);
×
508
}
509

510
static int32_t vnodeSnapshotStartWrite(const SSyncFSM *pFsm, void *pParam, void **ppWriter) {
×
511
  SVnode *pVnode = pFsm->data;
×
512

513
  do {
×
514
    int32_t itemSize = tmsgGetQueueSize(&pVnode->msgCb, pVnode->config.vgId, APPLY_QUEUE);
×
515
    if (itemSize == 0) {
×
516
      vInfo("vgId:%d, start write vnode snapshot since apply queue is empty", pVnode->config.vgId);
×
517
      break;
×
518
    } else {
519
      vInfo("vgId:%d, write vnode snapshot later since %d items in apply queue", pVnode->config.vgId, itemSize);
×
520
      taosMsleep(10);
×
521
    }
522
  } while (true);
523

524
  return vnodeSnapWriterOpen(pVnode, (SSnapshotParam *)pParam, (SVSnapWriter **)ppWriter);
×
525
}
526

527
static int32_t vnodeSnapshotStopWrite(const SSyncFSM *pFsm, void *pWriter, bool isApply, SSnapshot *pSnapshot) {
×
528
  SVnode *pVnode = pFsm->data;
×
529
  vInfo("vgId:%d, stop write vnode snapshot, apply:%d, index:%" PRId64 " term:%" PRIu64 " config:%" PRId64,
×
530
        pVnode->config.vgId, isApply, pSnapshot->lastApplyIndex, pSnapshot->lastApplyTerm, pSnapshot->lastConfigIndex);
531

532
  int32_t code = vnodeSnapWriterClose(pWriter, !isApply, pSnapshot);
×
533
  if (code != 0) {
×
534
    vError("vgId:%d, failed to finish applying vnode snapshot since %s, code:0x%x", pVnode->config.vgId, terrstr(),
×
535
           code);
536
  }
537
  return code;
×
538
}
539

540
static int32_t vnodeSnapshotDoWrite(const SSyncFSM *pFsm, void *pWriter, void *pBuf, int32_t len) {
×
541
  SVnode *pVnode = pFsm->data;
×
542
  vDebug("vgId:%d, continue write vnode snapshot, blockLen:%d", pVnode->config.vgId, len);
×
543
  int32_t code = vnodeSnapWrite(pWriter, pBuf, len);
×
544
  vDebug("vgId:%d, continue write vnode snapshot finished, blockLen:%d", pVnode->config.vgId, len);
×
545
  return code;
×
546
}
547

548
static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) {
×
549
  SVnode   *pVnode = pFsm->data;
×
550
  int32_t   vgId = pVnode->config.vgId;
×
551
  SyncIndex appliedIdx = -1;
×
552

553
  do {
554
    appliedIdx = vnodeSyncAppliedIndex(pFsm);
×
555
    if (appliedIdx > commitIdx) {
×
556
      vError("vgId:%d, restore failed since applied-index:%" PRId64 " is larger than commit-index:%" PRId64, vgId,
×
557
             appliedIdx, commitIdx);
558
      break;
×
559
    }
560
    if (appliedIdx == commitIdx) {
×
561
      vInfo("vgId:%d, no items to be applied, restore finish", pVnode->config.vgId);
×
562
      break;
×
563
    } else {
564
      vInfo("vgId:%d, restore not finish since %" PRId64 " items to be applied. commit-index:%" PRId64
×
565
            ", applied-index:%" PRId64,
566
            vgId, commitIdx - appliedIdx, commitIdx, appliedIdx);
567
      taosMsleep(10);
×
568
    }
569
  } while (true);
570

571
  walApplyVer(pVnode->pWal, commitIdx);
×
572
  pVnode->restored = true;
×
573

574
#ifdef USE_STREAM
575
  SStreamMeta *pMeta = pVnode->pTq->pStreamMeta;
×
576
  streamMetaWLock(pMeta);
×
577

578
  if (pMeta->startInfo.tasksWillRestart) {
×
579
    vInfo("vgId:%d, sync restore finished, stream tasks will be launched by other thread", vgId);
×
580
    streamMetaWUnLock(pMeta);
×
581
    return;
×
582
  }
583

584
  if (vnodeIsRoleLeader(pVnode)) {
×
585
    // start to restore all stream tasks
586
    if (tsDisableStream) {
×
587
      vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId);
×
588
    } else {
589
      vInfo("vgId:%d sync restore finished, start to launch stream task(s)", vgId);
×
590
      if (pMeta->startInfo.startAllTasks == 1) {
×
591
        pMeta->startInfo.restartCount += 1;
×
592
        vDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId,
×
593
               pMeta->startInfo.restartCount);
594
      } else {
595
        pMeta->startInfo.startAllTasks = 1;
×
596
        streamMetaWUnLock(pMeta);
×
597

598
        tqInfo("vgId:%d stream task already loaded, start them", vgId);
×
599
        int32_t code = streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_START_ALL_TASKS, false);
×
600
        if (code != 0) {
×
601
          tqError("vgId:%d failed to sched stream task, code:%s", vgId, tstrerror(code));
×
602
        }
603
        return;
×
604
      }
605
    }
606
  } else {
607
    vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId);
×
608
  }
609

610
  streamMetaWUnLock(pMeta);
×
611
#endif
612
}
613

614
static void vnodeBecomeFollower(const SSyncFSM *pFsm) {
×
615
  SVnode *pVnode = pFsm->data;
×
616
  vInfo("vgId:%d, become follower", pVnode->config.vgId);
×
617

618
  (void)taosThreadMutexLock(&pVnode->lock);
×
619
  if (pVnode->blocked) {
×
620
    pVnode->blocked = false;
×
621
    vDebug("vgId:%d, become follower and post block", pVnode->config.vgId);
×
622
    if (tsem_post(&pVnode->syncSem) != 0) {
×
623
      vError("vgId:%d, failed to post sync semaphore", pVnode->config.vgId);
×
624
    }
625
  }
626
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
627

628
#ifdef USE_TQ
629
  if (pVnode->pTq) {
×
630
    tqUpdateNodeStage(pVnode->pTq, false);
×
631
    if (tqStopStreamAllTasksAsync(pVnode->pTq->pStreamMeta, &pVnode->msgCb) != 0) {
×
632
      vError("vgId:%d, failed to stop stream tasks", pVnode->config.vgId);
×
633
    }
634
  }
635
#endif
636
}
×
637

638
static void vnodeBecomeLearner(const SSyncFSM *pFsm) {
×
639
  SVnode *pVnode = pFsm->data;
×
640
  vInfo("vgId:%d, become learner", pVnode->config.vgId);
×
641

642
  (void)taosThreadMutexLock(&pVnode->lock);
×
643
  if (pVnode->blocked) {
×
644
    pVnode->blocked = false;
×
645
    vDebug("vgId:%d, become learner and post block", pVnode->config.vgId);
×
646
    if (tsem_post(&pVnode->syncSem) != 0) {
×
647
      vError("vgId:%d, failed to post sync semaphore", pVnode->config.vgId);
×
648
    }
649
  }
650
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
651
}
×
652

653
static void vnodeBecomeLeader(const SSyncFSM *pFsm) {
×
654
  SVnode *pVnode = pFsm->data;
×
655
  vDebug("vgId:%d, become leader", pVnode->config.vgId);
×
656
#ifdef USE_TQ
657
  if (pVnode->pTq) {
×
658
    tqUpdateNodeStage(pVnode->pTq, true);
×
659
  }
660
#endif
661
}
×
662

663
static void vnodeBecomeAssignedLeader(const SSyncFSM *pFsm) {
×
664
  SVnode *pVnode = pFsm->data;
×
665
  vDebug("vgId:%d, become assigned leader", pVnode->config.vgId);
×
666
#ifdef USE_TQ
667
  if (pVnode->pTq) {
×
668
    tqUpdateNodeStage(pVnode->pTq, true);
×
669
  }
670
#endif
671
}
×
672

673
static bool vnodeApplyQueueEmpty(const SSyncFSM *pFsm) {
×
674
  SVnode *pVnode = pFsm->data;
×
675

676
  if (pVnode != NULL && pVnode->msgCb.qsizeFp != NULL) {
×
677
    int32_t itemSize = tmsgGetQueueSize(&pVnode->msgCb, pVnode->config.vgId, APPLY_QUEUE);
×
678
    return (itemSize == 0);
×
679
  } else {
680
    return true;
×
681
  }
682
}
683

684
static int32_t vnodeApplyQueueItems(const SSyncFSM *pFsm) {
×
685
  SVnode *pVnode = pFsm->data;
×
686

687
  if (pVnode != NULL && pVnode->msgCb.qsizeFp != NULL) {
×
688
    int32_t itemSize = tmsgGetQueueSize(&pVnode->msgCb, pVnode->config.vgId, APPLY_QUEUE);
×
689
    return itemSize;
×
690
  } else {
691
    return TSDB_CODE_INVALID_PARA;
×
692
  }
693
}
694

695
static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) {
×
696
  SSyncFSM *pFsm = taosMemoryCalloc(1, sizeof(SSyncFSM));
×
697
  if (pFsm == NULL) {
×
698
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
699
    return NULL;
×
700
  }
701
  pFsm->data = pVnode;
×
702
  pFsm->FpCommitCb = vnodeSyncCommitMsg;
×
703
  pFsm->FpAppliedIndexCb = vnodeSyncAppliedIndex;
×
704
  pFsm->FpPreCommitCb = vnodeSyncPreCommitMsg;
×
705
  pFsm->FpRollBackCb = vnodeSyncRollBackMsg;
×
706
  pFsm->FpGetSnapshot = NULL;
×
707
  pFsm->FpGetSnapshotInfo = vnodeSyncGetSnapshotInfo;
×
708
  pFsm->FpRestoreFinishCb = vnodeRestoreFinish;
×
709
  pFsm->FpAfterRestoredCb = NULL;
×
710
  pFsm->FpLeaderTransferCb = NULL;
×
711
  pFsm->FpApplyQueueEmptyCb = vnodeApplyQueueEmpty;
×
712
  pFsm->FpApplyQueueItems = vnodeApplyQueueItems;
×
713
  pFsm->FpBecomeLeaderCb = vnodeBecomeLeader;
×
714
  pFsm->FpBecomeAssignedLeaderCb = vnodeBecomeAssignedLeader;
×
715
  pFsm->FpBecomeFollowerCb = vnodeBecomeFollower;
×
716
  pFsm->FpBecomeLearnerCb = vnodeBecomeLearner;
×
717
  pFsm->FpReConfigCb = NULL;
×
718
  pFsm->FpSnapshotStartRead = vnodeSnapshotStartRead;
×
719
  pFsm->FpSnapshotStopRead = vnodeSnapshotStopRead;
×
720
  pFsm->FpSnapshotDoRead = vnodeSnapshotDoRead;
×
721
  pFsm->FpSnapshotStartWrite = vnodeSnapshotStartWrite;
×
722
  pFsm->FpSnapshotStopWrite = vnodeSnapshotStopWrite;
×
723
  pFsm->FpSnapshotDoWrite = vnodeSnapshotDoWrite;
×
724

725
  return pFsm;
×
726
}
727

728
int32_t vnodeSyncOpen(SVnode *pVnode, char *path, int32_t vnodeVersion) {
×
729
  SSyncInfo syncInfo = {
×
730
      .snapshotStrategy = SYNC_STRATEGY_WAL_FIRST,
731
      .batchSize = 1,
732
      .vgId = pVnode->config.vgId,
×
733
      .syncCfg = pVnode->config.syncCfg,
734
      .pWal = pVnode->pWal,
×
735
      .msgcb = &pVnode->msgCb,
×
736
      .syncSendMSg = vnodeSyncSendMsg,
737
      .syncEqMsg = vnodeSyncEqMsg,
738
      .syncEqCtrlMsg = vnodeSyncEqCtrlMsg,
739
      .pingMs = 5000,
740
      .electMs = 4000,
741
      .heartbeatMs = 700,
742
  };
743

744
  snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", path, TD_DIRSEP);
×
745
  syncInfo.pFsm = vnodeSyncMakeFsm(pVnode);
×
746

747
  SSyncCfg *pCfg = &syncInfo.syncCfg;
×
748
  vInfo("vgId:%d, start to open sync, replica:%d selfIndex:%d", pVnode->config.vgId, pCfg->replicaNum, pCfg->myIndex);
×
749
  for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
×
750
    SNodeInfo *pNode = &pCfg->nodeInfo[i];
×
751
    vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn,
×
752
          pNode->nodePort, pNode->nodeId, pNode->clusterId);
753
  }
754

755
  pVnode->sync = syncOpen(&syncInfo, vnodeVersion);
×
756
  if (pVnode->sync <= 0) {
×
757
    vError("vgId:%d, failed to open sync since %s", pVnode->config.vgId, terrstr());
×
758
    return terrno;
×
759
  }
760

761
  return 0;
×
762
}
763

764
int32_t vnodeSyncStart(SVnode *pVnode) {
×
765
  vInfo("vgId:%d, start sync", pVnode->config.vgId);
×
766
  int32_t code = syncStart(pVnode->sync);
×
767
  if (code) {
×
768
    vError("vgId:%d, failed to start sync subsystem since %s", pVnode->config.vgId, tstrerror(code));
×
769
    return code;
×
770
  }
771
  return 0;
×
772
}
773

774
void vnodeSyncPreClose(SVnode *pVnode) {
×
775
  vInfo("vgId:%d, sync pre close", pVnode->config.vgId);
×
776
  int32_t code = syncLeaderTransfer(pVnode->sync);
×
777
  if (code) {
×
778
    vError("vgId:%d, failed to transfer leader since %s", pVnode->config.vgId, tstrerror(code));
×
779
  }
780
  syncPreStop(pVnode->sync);
×
781

782
  (void)taosThreadMutexLock(&pVnode->lock);
×
783
  if (pVnode->blocked) {
×
784
    vInfo("vgId:%d, post block after close sync", pVnode->config.vgId);
×
785
    pVnode->blocked = false;
×
786
    if (tsem_post(&pVnode->syncSem) != 0) {
×
787
      vError("vgId:%d, failed to post block", pVnode->config.vgId);
×
788
    }
789
  }
790
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
791
}
×
792

793
void vnodeSyncPostClose(SVnode *pVnode) {
×
794
  vInfo("vgId:%d, sync post close", pVnode->config.vgId);
×
795
  syncPostStop(pVnode->sync);
×
796
}
×
797

798
void vnodeSyncClose(SVnode *pVnode) {
×
799
  vInfo("vgId:%d, close sync", pVnode->config.vgId);
×
800
  syncStop(pVnode->sync);
×
801
}
×
802

803
void vnodeSyncCheckTimeout(SVnode *pVnode) {
×
804
  vTrace("vgId:%d, check sync timeout msg", pVnode->config.vgId);
×
805
  (void)taosThreadMutexLock(&pVnode->lock);
×
806
  if (pVnode->blocked) {
×
807
    int32_t curSec = taosGetTimestampSec();
×
808
    int32_t delta = curSec - pVnode->blockSec;
×
809
    if (delta > VNODE_TIMEOUT_SEC) {
×
810
      vError("vgId:%d, failed to propose since timeout and post block, start:%d cur:%d delta:%d seq:%" PRId64,
×
811
             pVnode->config.vgId, pVnode->blockSec, curSec, delta, pVnode->blockSeq);
812
      if (syncSendTimeoutRsp(pVnode->sync, pVnode->blockSeq) != 0) {
×
813
#if 0
814
        SRpcMsg rpcMsg = {.code = TSDB_CODE_SYN_TIMEOUT, .info = pVnode->blockInfo};
815
        vError("send timeout response since its applyed, seq:%" PRId64 " handle:%p ahandle:%p", pVnode->blockSeq,
816
              rpcMsg.info.handle, rpcMsg.info.ahandle);
817
        rpcSendResponse(&rpcMsg);
818
#endif
819
      }
820
      pVnode->blocked = false;
×
821
      pVnode->blockSec = 0;
×
822
      pVnode->blockSeq = 0;
×
823
      if (tsem_post(&pVnode->syncSem) != 0) {
×
824
        vError("vgId:%d, failed to post block", pVnode->config.vgId);
×
825
      }
826
    }
827
  }
828
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
829
}
×
830

831
bool vnodeIsRoleLeader(SVnode *pVnode) {
×
832
  SSyncState state = syncGetState(pVnode->sync);
×
833
  return state.state == TAOS_SYNC_STATE_LEADER;
×
834
}
835

836
bool vnodeIsLeader(SVnode *pVnode) {
×
837
  terrno = 0;
×
838
  SSyncState state = syncGetState(pVnode->sync);
×
839

840
  if (terrno != 0) {
×
841
    vInfo("vgId:%d, vnode is stopping", pVnode->config.vgId);
×
842
    return false;
×
843
  }
844

845
  if (state.state != TAOS_SYNC_STATE_LEADER) {
×
846
    terrno = TSDB_CODE_SYN_NOT_LEADER;
×
847
    vInfo("vgId:%d, vnode not leader, state:%s", pVnode->config.vgId, syncStr(state.state));
×
848
    return false;
×
849
  }
850

851
  if (!state.restored || !pVnode->restored) {
×
852
    terrno = TSDB_CODE_SYN_RESTORING;
×
853
    vInfo("vgId:%d, vnode not restored:%d:%d", pVnode->config.vgId, state.restored, pVnode->restored);
×
854
    return false;
×
855
  }
856

857
  return true;
×
858
}
859

860
int64_t vnodeClusterId(SVnode *pVnode) {
×
861
  SSyncCfg *syncCfg = &pVnode->config.syncCfg;
×
862
  return syncCfg->nodeInfo[syncCfg->myIndex].clusterId;
×
863
}
864

865
int32_t vnodeNodeId(SVnode *pVnode) {
×
866
  SSyncCfg *syncCfg = &pVnode->config.syncCfg;
×
867
  return syncCfg->nodeInfo[syncCfg->myIndex].nodeId;
×
868
}
869

870
int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnap) {
×
871
  int code = 0;
×
872
  pSnap->lastApplyIndex = pVnode->state.committed;
×
873
  pSnap->lastApplyTerm = pVnode->state.commitTerm;
×
874
  pSnap->lastConfigIndex = -1;
×
875
  pSnap->state = SYNC_FSM_STATE_COMPLETE;
×
876

877
  if (tsdbSnapGetFsState(pVnode) != TSDB_FS_STATE_NORMAL) {
×
878
    pSnap->state = SYNC_FSM_STATE_INCOMPLETE;
×
879
  }
880

881
  if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT || pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) {
×
882
    code = tsdbSnapPrepDescription(pVnode, pSnap);
×
883
  }
884
  return code;
×
885
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc