• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3633

11 Mar 2025 12:59PM UTC coverage: 0.0% (-60.7%) from 60.719%
#3633

push

travis-ci

web-flow
Merge pull request #30118 from taosdata/wl30

udpate ci workflow

0 of 280412 branches covered (0.0%)

Branch coverage included in aggregate %.

0 of 275582 relevant lines covered (0.0%)

0.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/source/dnode/vnode/src/vnd/vnodeSync.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "sync.h"
18
#include "tq.h"
19
#include "tqCommon.h"
20
#include "tsdb.h"
21
#include "vnd.h"
22

23
#define BATCH_ENABLE 0
24

25
static inline bool vnodeIsMsgWeak(tmsg_t type) { return false; }
×
26

27
static inline void vnodeWaitBlockMsg(SVnode *pVnode, const SRpcMsg *pMsg) {
×
28
  const STraceId *trace = &pMsg->info.traceId;
×
29
  vGTrace("vgId:%d, msg:%p wait block, type:%s sec:%d seq:%" PRId64, pVnode->config.vgId, pMsg,
×
30
          TMSG_INFO(pMsg->msgType), pVnode->blockSec, pVnode->blockSeq);
31
  if (tsem_wait(&pVnode->syncSem) != 0) {
×
32
    vError("vgId:%d, failed to wait sem", pVnode->config.vgId);
×
33
  }
34
}
×
35

36
static inline void vnodePostBlockMsg(SVnode *pVnode, const SRpcMsg *pMsg) {
×
37
  if (vnodeIsMsgBlock(pMsg->msgType)) {
×
38
    const STraceId *trace = &pMsg->info.traceId;
×
39
    (void)taosThreadMutexLock(&pVnode->lock);
×
40
    if (pVnode->blocked) {
×
41
      vGTrace("vgId:%d, msg:%p post block, type:%s sec:%d seq:%" PRId64, pVnode->config.vgId, pMsg,
×
42
              TMSG_INFO(pMsg->msgType), pVnode->blockSec, pVnode->blockSeq);
43
      pVnode->blocked = false;
×
44
      pVnode->blockSec = 0;
×
45
      pVnode->blockSeq = 0;
×
46
      if (tsem_post(&pVnode->syncSem) != 0) {
×
47
        vError("vgId:%d, failed to post sem", pVnode->config.vgId);
×
48
      }
49
    }
50
    (void)taosThreadMutexUnlock(&pVnode->lock);
×
51
  }
52
}
×
53

54
void vnodeRedirectRpcMsg(SVnode *pVnode, SRpcMsg *pMsg, int32_t code) {
×
55
  SEpSet newEpSet = {0};
×
56
  syncGetRetryEpSet(pVnode->sync, &newEpSet);
×
57

58
  const STraceId *trace = &pMsg->info.traceId;
×
59
  vGTrace("vgId:%d, msg:%p is redirect since not leader, numOfEps:%d inUse:%d", pVnode->config.vgId, pMsg,
×
60
          newEpSet.numOfEps, newEpSet.inUse);
61
  for (int32_t i = 0; i < newEpSet.numOfEps; ++i) {
×
62
    vGTrace("vgId:%d, msg:%p redirect:%d ep:%s:%u", pVnode->config.vgId, pMsg, i, newEpSet.eps[i].fqdn,
×
63
            newEpSet.eps[i].port);
64
  }
65
  pMsg->info.hasEpSet = 1;
×
66

67
  if (code == 0) code = TSDB_CODE_SYN_NOT_LEADER;
×
68

69
  SRpcMsg rsp = {.code = code, .info = pMsg->info, .msgType = pMsg->msgType + 1};
×
70
  int32_t contLen = tSerializeSEpSet(NULL, 0, &newEpSet);
×
71

72
  rsp.pCont = rpcMallocCont(contLen);
×
73
  if (rsp.pCont == NULL) {
×
74
    pMsg->code = TSDB_CODE_OUT_OF_MEMORY;
×
75
  } else {
76
    if (tSerializeSEpSet(rsp.pCont, contLen, &newEpSet) < 0) {
×
77
      vError("vgId:%d, failed to serialize ep set", pVnode->config.vgId);
×
78
    }
79
    rsp.contLen = contLen;
×
80
  }
81

82
  tmsgSendRsp(&rsp);
×
83
}
×
84

85
static void inline vnodeHandleWriteMsg(SVnode *pVnode, SRpcMsg *pMsg) {
×
86
  SRpcMsg rsp = {.code = pMsg->code, .info = pMsg->info};
×
87
  if (vnodeProcessWriteMsg(pVnode, pMsg, pMsg->info.conn.applyIndex, &rsp) < 0) {
×
88
    rsp.code = terrno;
×
89
    const STraceId *trace = &pMsg->info.traceId;
×
90
    vGError("vgId:%d, msg:%p failed to apply right now since %s", pVnode->config.vgId, pMsg, terrstr());
×
91
  }
92
  if (rsp.info.handle != NULL) {
×
93
    tmsgSendRsp(&rsp);
×
94
  } else {
95
    if (rsp.pCont) {
×
96
      rpcFreeCont(rsp.pCont);
×
97
    }
98
  }
99
}
×
100

101
static void vnodeHandleProposeError(SVnode *pVnode, SRpcMsg *pMsg, int32_t code) {
×
102
  if (code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_SYN_RESTORING) {
×
103
    vnodeRedirectRpcMsg(pVnode, pMsg, code);
×
104
  } else if (code == TSDB_CODE_MSG_PREPROCESSED) {
×
105
    SRpcMsg rsp = {.code = TSDB_CODE_SUCCESS, .info = pMsg->info};
×
106
    if (rsp.info.handle != NULL) {
×
107
      tmsgSendRsp(&rsp);
×
108
    }
109
  } else {
110
    const STraceId *trace = &pMsg->info.traceId;
×
111
    vGError("vgId:%d, msg:%p failed to propose since %s, code:0x%x", pVnode->config.vgId, pMsg, tstrerror(code), code);
×
112
    SRpcMsg rsp = {.code = code, .info = pMsg->info};
×
113
    if (rsp.info.handle != NULL) {
×
114
      tmsgSendRsp(&rsp);
×
115
    }
116
  }
117
}
×
118

119
static int32_t inline vnodeProposeMsg(SVnode *pVnode, SRpcMsg *pMsg, bool isWeak) {
×
120
  int64_t seq = 0;
×
121

122
  (void)taosThreadMutexLock(&pVnode->lock);
×
123
  int32_t code = syncPropose(pVnode->sync, pMsg, isWeak, &seq);
×
124
  bool    wait = (code == 0 && vnodeIsMsgBlock(pMsg->msgType));
×
125
  if (wait) {
×
126
    if (pVnode->blocked) {
×
127
      return TSDB_CODE_INTERNAL_ERROR;
×
128
    }
129
    pVnode->blocked = true;
×
130
    pVnode->blockSec = taosGetTimestampSec();
×
131
    pVnode->blockSeq = seq;
×
132
  }
133
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
134

135
  if (code > 0) {
×
136
    vnodeHandleWriteMsg(pVnode, pMsg);
×
137
  } else if (code < 0) {
×
138
    if (terrno != 0) code = terrno;
×
139
    vnodeHandleProposeError(pVnode, pMsg, code);
×
140
  }
141

142
  if (wait) vnodeWaitBlockMsg(pVnode, pMsg);
×
143
  return code;
×
144
}
145

146
void vnodeProposeCommitOnNeed(SVnode *pVnode, bool atExit) {
×
147
  if (!vnodeShouldCommit(pVnode, atExit)) {
×
148
    return;
×
149
  }
150

151
  int32_t   contLen = sizeof(SMsgHead);
×
152
  SMsgHead *pHead = rpcMallocCont(contLen);
×
153
  pHead->contLen = contLen;
×
154
  pHead->vgId = pVnode->config.vgId;
×
155

156
  SRpcMsg rpcMsg = {0};
×
157
  rpcMsg.msgType = TDMT_VND_COMMIT;
×
158
  rpcMsg.contLen = contLen;
×
159
  rpcMsg.pCont = pHead;
×
160
  rpcMsg.info.noResp = 1;
×
161

162
  vInfo("vgId:%d, propose vnode commit", pVnode->config.vgId);
×
163
  bool isWeak = false;
×
164

165
  if (!atExit) {
×
166
    if (vnodeProposeMsg(pVnode, &rpcMsg, isWeak) < 0) {
×
167
      vTrace("vgId:%d, failed to propose vnode commit since %s", pVnode->config.vgId, terrstr());
×
168
    }
169
    rpcFreeCont(rpcMsg.pCont);
×
170
    rpcMsg.pCont = NULL;
×
171
  } else {
172
    int32_t code = 0;
×
173
    if ((code = tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &rpcMsg)) < 0) {
×
174
      vError("vgId:%d, failed to put vnode commit to write_queue since %s", pVnode->config.vgId, tstrerror(code));
×
175
    }
176
  }
177
}
178

179
#if BATCH_ENABLE
180

181
static void inline vnodeProposeBatchMsg(SVnode *pVnode, SRpcMsg **pMsgArr, bool *pIsWeakArr, int32_t *arrSize) {
182
  if (*arrSize <= 0) return;
183
  SRpcMsg *pLastMsg = pMsgArr[*arrSize - 1];
184

185
  (void)taosThreadMutexLock(&pVnode->lock);
186
  int32_t code = syncProposeBatch(pVnode->sync, pMsgArr, pIsWeakArr, *arrSize);
187
  bool    wait = (code == 0 && vnodeIsBlockMsg(pLastMsg->msgType));
188
  if (wait) {
189
    pVnode->blocked = true;
190
  }
191
  (void)taosThreadMutexUnlock(&pVnode->lock);
192

193
  if (code > 0) {
194
    for (int32_t i = 0; i < *arrSize; ++i) {
195
      vnodeHandleWriteMsg(pVnode, pMsgArr[i]);
196
    }
197
  } else if (code < 0) {
198
    if (terrno != 0) code = terrno;
199
    for (int32_t i = 0; i < *arrSize; ++i) {
200
      vnodeHandleProposeError(pVnode, pMsgArr[i], code);
201
    }
202
  }
203

204
  if (wait) vnodeWaitBlockMsg(pVnode, pLastMsg);
205
  pLastMsg = NULL;
206

207
  for (int32_t i = 0; i < *arrSize; ++i) {
208
    SRpcMsg        *pMsg = pMsgArr[i];
209
    const STraceId *trace = &pMsg->info.traceId;
210
    vGTrace("vgId:%d, msg:%p is freed, code:0x%x", pVnode->config.vgId, pMsg, code);
211
    rpcFreeCont(pMsg->pCont);
212
    taosFreeQitem(pMsg);
213
  }
214

215
  *arrSize = 0;
216
}
217

218
void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) {
219
  SVnode   *pVnode = pInfo->ahandle;
220
  int32_t   vgId = pVnode->config.vgId;
221
  int32_t   code = 0;
222
  SRpcMsg  *pMsg = NULL;
223
  int32_t   arrayPos = 0;
224
  SRpcMsg **pMsgArr = taosMemoryCalloc(numOfMsgs, sizeof(SRpcMsg *));
225
  bool     *pIsWeakArr = taosMemoryCalloc(numOfMsgs, sizeof(bool));
226
  vTrace("vgId:%d, get %d msgs from vnode-write queue", vgId, numOfMsgs);
227

228
  for (int32_t msg = 0; msg < numOfMsgs; msg++) {
229
    if (taosGetQitem(qall, (void **)&pMsg) == 0) continue;
230
    bool isWeak = vnodeIsMsgWeak(pMsg->msgType);
231
    bool isBlock = vnodeIsMsgBlock(pMsg->msgType);
232

233
    const STraceId *trace = &pMsg->info.traceId;
234
    vGTrace("vgId:%d, msg:%p get from vnode-write queue, weak:%d block:%d msg:%d:%d pos:%d, handle:%p", vgId, pMsg,
235
            isWeak, isBlock, msg, numOfMsgs, arrayPos, pMsg->info.handle);
236

237
    if (!pVnode->restored) {
238
      vGWarn("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg,
239
             TMSG_INFO(pMsg->msgType));
240
      terrno = TSDB_CODE_SYN_RESTORING;
241
      vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING);
242
      rpcFreeCont(pMsg->pCont);
243
      taosFreeQitem(pMsg);
244
      continue;
245
    }
246

247
    if (pMsgArr == NULL || pIsWeakArr == NULL) {
248
      vGError("vgId:%d, msg:%p failed to process since out of memory, type:%s", vgId, pMsg, TMSG_INFO(pMsg->msgType));
249
      terrno = TSDB_CODE_OUT_OF_MEMORY;
250
      vnodeHandleProposeError(pVnode, pMsg, terrno);
251
      rpcFreeCont(pMsg->pCont);
252
      taosFreeQitem(pMsg);
253
      continue;
254
    }
255

256
    bool atExit = false;
257
    vnodeProposeCommitOnNeed(pVnode, atExit);
258

259
    code = vnodePreProcessWriteMsg(pVnode, pMsg);
260
    if (code != 0) {
261
      vGError("vgId:%d, msg:%p failed to pre-process since %s", vgId, pMsg, terrstr());
262
      rpcFreeCont(pMsg->pCont);
263
      taosFreeQitem(pMsg);
264
      continue;
265
    }
266

267
    if (isBlock) {
268
      vnodeProposeBatchMsg(pVnode, pMsgArr, pIsWeakArr, &arrayPos);
269
    }
270

271
    pMsgArr[arrayPos] = pMsg;
272
    pIsWeakArr[arrayPos] = isWeak;
273
    arrayPos++;
274

275
    if (isBlock || msg == numOfMsgs - 1) {
276
      vnodeProposeBatchMsg(pVnode, pMsgArr, pIsWeakArr, &arrayPos);
277
    }
278
  }
279

280
  taosMemoryFree(pMsgArr);
281
  taosMemoryFree(pIsWeakArr);
282
}
283

284
#else
285

286
void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) {
×
287
  SVnode  *pVnode = pInfo->ahandle;
×
288
  int32_t  vgId = pVnode->config.vgId;
×
289
  int32_t  code = 0;
×
290
  SRpcMsg *pMsg = NULL;
×
291
  vTrace("vgId:%d, get %d msgs from vnode-write queue", vgId, numOfMsgs);
×
292

293
  for (int32_t msg = 0; msg < numOfMsgs; msg++) {
×
294
    if (taosGetQitem(qall, (void **)&pMsg) == 0) continue;
×
295
    bool isWeak = vnodeIsMsgWeak(pMsg->msgType);
×
296

297
    const STraceId *trace = &pMsg->info.traceId;
×
298
    vGTrace("vgId:%d, msg:%p get from vnode-write queue, weak:%d block:%d msg:%d:%d, handle:%p", vgId, pMsg, isWeak,
×
299
            vnodeIsMsgBlock(pMsg->msgType), msg, numOfMsgs, pMsg->info.handle);
300

301
    if (!pVnode->restored) {
×
302
      vGWarn("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg,
×
303
             TMSG_INFO(pMsg->msgType));
304
      vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING);
×
305
      rpcFreeCont(pMsg->pCont);
×
306
      taosFreeQitem(pMsg);
×
307
      continue;
×
308
    }
309

310
    bool atExit = false;
×
311
    vnodeProposeCommitOnNeed(pVnode, atExit);
×
312

313
    code = vnodePreProcessWriteMsg(pVnode, pMsg);
×
314
    if (code != 0) {
×
315
      if (code != TSDB_CODE_MSG_PREPROCESSED) {
×
316
        vGError("vgId:%d, msg:%p failed to pre-process since %s", vgId, pMsg, tstrerror(code));
×
317
      }
318
      vnodeHandleProposeError(pVnode, pMsg, code);
×
319
      rpcFreeCont(pMsg->pCont);
×
320
      taosFreeQitem(pMsg);
×
321
      continue;
×
322
    }
323

324
    code = vnodeProposeMsg(pVnode, pMsg, isWeak);
×
325

326
    vGTrace("vgId:%d, msg:%p is freed, code:0x%x", vgId, pMsg, code);
×
327
    rpcFreeCont(pMsg->pCont);
×
328
    taosFreeQitem(pMsg);
×
329
  }
330
}
×
331

332
#endif
333

334
void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) {
×
335
  SVnode  *pVnode = pInfo->ahandle;
×
336
  int32_t  vgId = pVnode->config.vgId;
×
337
  int32_t  code = 0;
×
338
  SRpcMsg *pMsg = NULL;
×
339

340
  for (int32_t i = 0; i < numOfMsgs; ++i) {
×
341
    if (taosGetQitem(qall, (void **)&pMsg) == 0) continue;
×
342
    const STraceId *trace = &pMsg->info.traceId;
×
343

344
    if (vnodeIsMsgBlock(pMsg->msgType)) {
×
345
      vGTrace("vgId:%d, msg:%p get from vnode-apply queue, type:%s handle:%p index:%" PRId64
×
346
              ", blocking msg obtained sec:%d seq:%" PRId64,
347
              vgId, pMsg, TMSG_INFO(pMsg->msgType), pMsg->info.handle, pMsg->info.conn.applyIndex, pVnode->blockSec,
348
              pVnode->blockSeq);
349
    } else {
350
      vGTrace("vgId:%d, msg:%p get from vnode-apply queue, type:%s handle:%p index:%" PRId64, vgId, pMsg,
×
351
              TMSG_INFO(pMsg->msgType), pMsg->info.handle, pMsg->info.conn.applyIndex);
352
    }
353

354
    SRpcMsg rsp = {.code = pMsg->code, .info = pMsg->info};
×
355
    if (rsp.code == 0) {
×
356
      if (vnodeProcessWriteMsg(pVnode, pMsg, pMsg->info.conn.applyIndex, &rsp) < 0) {
×
357
        rsp.code = terrno;
×
358
        vGError("vgId:%d, msg:%p failed to apply since %s, index:%" PRId64, vgId, pMsg, terrstr(),
×
359
                pMsg->info.conn.applyIndex);
360
      }
361
    }
362

363
    vnodePostBlockMsg(pVnode, pMsg);
×
364
    if (rsp.info.handle != NULL) {
×
365
      tmsgSendRsp(&rsp);
×
366
    } else {
367
      if (rsp.pCont) {
×
368
        rpcFreeCont(rsp.pCont);
×
369
      }
370
    }
371

372
    vGTrace("vgId:%d, msg:%p is freed, code:0x%x index:%" PRId64, vgId, pMsg, rsp.code, pMsg->info.conn.applyIndex);
×
373
    rpcFreeCont(pMsg->pCont);
×
374
    taosFreeQitem(pMsg);
×
375
  }
376
}
×
377

378
int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
×
379
  const STraceId *trace = &pMsg->info.traceId;
×
380
  vGTrace("vgId:%d, sync msg:%p will be processed, type:%s", pVnode->config.vgId, pMsg, TMSG_INFO(pMsg->msgType));
×
381

382
  int32_t code = syncProcessMsg(pVnode->sync, pMsg);
×
383
  if (code != 0) {
×
384
    vGError("vgId:%d, failed to process sync msg:%p type:%s, reason: %s", pVnode->config.vgId, pMsg,
×
385
            TMSG_INFO(pMsg->msgType), tstrerror(code));
386
  }
387

388
  return code;
×
389
}
390

391
static int32_t vnodeSyncEqCtrlMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) {
×
392
  if (pMsg == NULL || pMsg->pCont == NULL) {
×
393
    return TSDB_CODE_INVALID_PARA;
×
394
  }
395

396
  if (msgcb == NULL || msgcb->putToQueueFp == NULL) {
×
397
    rpcFreeCont(pMsg->pCont);
×
398
    pMsg->pCont = NULL;
×
399
    return TSDB_CODE_INVALID_PARA;
×
400
  }
401

402
  int32_t code = tmsgPutToQueue(msgcb, SYNC_RD_QUEUE, pMsg);
×
403
  if (code != 0) {
×
404
    rpcFreeCont(pMsg->pCont);
×
405
    pMsg->pCont = NULL;
×
406
  }
407
  return code;
×
408
}
409

410
static int32_t vnodeSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) {
×
411
  if (pMsg == NULL || pMsg->pCont == NULL) {
×
412
    return TSDB_CODE_INVALID_PARA;
×
413
  }
414

415
  if (msgcb == NULL || msgcb->putToQueueFp == NULL) {
×
416
    rpcFreeCont(pMsg->pCont);
×
417
    pMsg->pCont = NULL;
×
418
    return TSDB_CODE_INVALID_PARA;
×
419
  }
420

421
  int32_t code = tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg);
×
422
  if (code != 0) {
×
423
    rpcFreeCont(pMsg->pCont);
×
424
    pMsg->pCont = NULL;
×
425
  }
426
  return code;
×
427
}
428

429
static int32_t vnodeSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) {
×
430
  int32_t code = tmsgSendSyncReq(pEpSet, pMsg);
×
431
  if (code != 0) {
×
432
    rpcFreeCont(pMsg->pCont);
×
433
    pMsg->pCont = NULL;
×
434
  }
435
  return code;
×
436
}
437

438
static int32_t vnodeSyncGetSnapshotInfo(const SSyncFSM *pFsm, SSnapshot *pSnapshot) {
×
439
  return vnodeGetSnapshot(pFsm->data, pSnapshot);
×
440
}
441

442
static int32_t vnodeSyncApplyMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, const SFsmCbMeta *pMeta) {
×
443
  SVnode *pVnode = pFsm->data;
×
444
  pMsg->info.conn.applyIndex = pMeta->index;
×
445
  pMsg->info.conn.applyTerm = pMeta->term;
×
446

447
  const STraceId *trace = &pMsg->info.traceId;
×
448
  vGTrace("vgId:%d, commit-cb is excuted, fsm:%p, index:%" PRId64 ", term:%" PRIu64 ", msg-index:%" PRId64
×
449
          ", weak:%d, code:%d, state:%d %s, type:%s code:0x%x",
450
          pVnode->config.vgId, pFsm, pMeta->index, pMeta->term, pMsg->info.conn.applyIndex, pMeta->isWeak, pMeta->code,
451
          pMeta->state, syncStr(pMeta->state), TMSG_INFO(pMsg->msgType), pMsg->code);
452

453
  int32_t code = tmsgPutToQueue(&pVnode->msgCb, APPLY_QUEUE, pMsg);
×
454
  if (code < 0) vError("vgId:%d, failed to put into apply_queue since %s", pVnode->config.vgId, tstrerror(code));
×
455
  return code;
×
456
}
457

458
static int32_t vnodeSyncCommitMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta *pMeta) {
×
459
  if (pMsg->code == 0) {
×
460
    return vnodeSyncApplyMsg(pFsm, pMsg, pMeta);
×
461
  }
462

463
  const STraceId *trace = &pMsg->info.traceId;
×
464
  SVnode         *pVnode = pFsm->data;
×
465
  vnodePostBlockMsg(pVnode, pMsg);
×
466

467
  SRpcMsg rsp = {.code = pMsg->code, .info = pMsg->info};
×
468
  if (rsp.info.handle != NULL) {
×
469
    tmsgSendRsp(&rsp);
×
470
  }
471

472
  vGTrace("vgId:%d, msg:%p is freed, code:0x%x index:%" PRId64, TD_VID(pVnode), pMsg, rsp.code, pMeta->index);
×
473
  rpcFreeCont(pMsg->pCont);
×
474
  pMsg->pCont = NULL;
×
475
  return 0;
×
476
}
477

478
static int32_t vnodeSyncPreCommitMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta *pMeta) {
×
479
  if (pMeta->isWeak == 1) {
×
480
    return vnodeSyncApplyMsg(pFsm, pMsg, pMeta);
×
481
  }
482
  return 0;
×
483
}
484

485
static SyncIndex vnodeSyncAppliedIndex(const SSyncFSM *pFSM) {
×
486
  SVnode *pVnode = pFSM->data;
×
487
  return atomic_load_64(&pVnode->state.applied);
×
488
}
489

490
static void vnodeSyncRollBackMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta *pMeta) {
×
491
  SVnode *pVnode = pFsm->data;
×
492
  vTrace("vgId:%d, rollback-cb is excuted, fsm:%p, index:%" PRId64 ", weak:%d, code:%d, state:%d %s, type:%s",
×
493
         pVnode->config.vgId, pFsm, pMeta->index, pMeta->isWeak, pMeta->code, pMeta->state, syncStr(pMeta->state),
494
         TMSG_INFO(pMsg->msgType));
495
}
×
496

497
static int32_t vnodeSnapshotStartRead(const SSyncFSM *pFsm, void *pParam, void **ppReader) {
×
498
  SVnode *pVnode = pFsm->data;
×
499
  return vnodeSnapReaderOpen(pVnode, (SSnapshotParam *)pParam, (SVSnapReader **)ppReader);
×
500
}
501

502
static void vnodeSnapshotStopRead(const SSyncFSM *pFsm, void *pReader) {
×
503
  SVnode *pVnode = pFsm->data;
×
504
  vnodeSnapReaderClose(pReader);
×
505
}
×
506

507
static int32_t vnodeSnapshotDoRead(const SSyncFSM *pFsm, void *pReader, void **ppBuf, int32_t *len) {
×
508
  SVnode *pVnode = pFsm->data;
×
509
  return vnodeSnapRead(pReader, (uint8_t **)ppBuf, len);
×
510
}
511

512
static int32_t vnodeSnapshotStartWrite(const SSyncFSM *pFsm, void *pParam, void **ppWriter) {
×
513
  SVnode *pVnode = pFsm->data;
×
514

515
  do {
×
516
    int32_t itemSize = tmsgGetQueueSize(&pVnode->msgCb, pVnode->config.vgId, APPLY_QUEUE);
×
517
    if (itemSize == 0) {
×
518
      vInfo("vgId:%d, start write vnode snapshot since apply queue is empty", pVnode->config.vgId);
×
519
      break;
×
520
    } else {
521
      vInfo("vgId:%d, write vnode snapshot later since %d items in apply queue", pVnode->config.vgId, itemSize);
×
522
      taosMsleep(10);
×
523
    }
524
  } while (true);
525

526
  return vnodeSnapWriterOpen(pVnode, (SSnapshotParam *)pParam, (SVSnapWriter **)ppWriter);
×
527
}
528

529
static int32_t vnodeSnapshotStopWrite(const SSyncFSM *pFsm, void *pWriter, bool isApply, SSnapshot *pSnapshot) {
×
530
  SVnode *pVnode = pFsm->data;
×
531
  vInfo("vgId:%d, stop write vnode snapshot, apply:%d, index:%" PRId64 " term:%" PRIu64 " config:%" PRId64,
×
532
        pVnode->config.vgId, isApply, pSnapshot->lastApplyIndex, pSnapshot->lastApplyTerm, pSnapshot->lastConfigIndex);
533

534
  int32_t code = vnodeSnapWriterClose(pWriter, !isApply, pSnapshot);
×
535
  if (code != 0) {
×
536
    vError("vgId:%d, failed to finish applying vnode snapshot since %s, code:0x%x", pVnode->config.vgId, terrstr(),
×
537
           code);
538
  }
539
  return code;
×
540
}
541

542
static int32_t vnodeSnapshotDoWrite(const SSyncFSM *pFsm, void *pWriter, void *pBuf, int32_t len) {
×
543
  SVnode *pVnode = pFsm->data;
×
544
  vDebug("vgId:%d, continue write vnode snapshot, blockLen:%d", pVnode->config.vgId, len);
×
545
  int32_t code = vnodeSnapWrite(pWriter, pBuf, len);
×
546
  vDebug("vgId:%d, continue write vnode snapshot finished, blockLen:%d", pVnode->config.vgId, len);
×
547
  return code;
×
548
}
549

550
static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) {
×
551
  SVnode   *pVnode = pFsm->data;
×
552
  int32_t   vgId = pVnode->config.vgId;
×
553
  SyncIndex appliedIdx = -1;
×
554

555
  do {
556
    appliedIdx = vnodeSyncAppliedIndex(pFsm);
×
557
    if (appliedIdx > commitIdx) {
×
558
      vError("vgId:%d, restore failed since applied-index:%" PRId64 " is larger than commit-index:%" PRId64, vgId,
×
559
             appliedIdx, commitIdx);
560
      break;
×
561
    }
562
    if (appliedIdx == commitIdx) {
×
563
      vInfo("vgId:%d, no items to be applied, restore finish", pVnode->config.vgId);
×
564
      break;
×
565
    } else {
566
      vInfo("vgId:%d, restore not finish since %" PRId64 " items to be applied. commit-index:%" PRId64
×
567
            ", applied-index:%" PRId64,
568
            vgId, commitIdx - appliedIdx, commitIdx, appliedIdx);
569
      taosMsleep(10);
×
570
    }
571
  } while (true);
572

573
  walApplyVer(pVnode->pWal, commitIdx);
×
574
  pVnode->restored = true;
×
575

576
  SStreamMeta *pMeta = pVnode->pTq->pStreamMeta;
×
577
  streamMetaWLock(pMeta);
×
578

579
  if (pMeta->startInfo.tasksWillRestart) {
×
580
    vInfo("vgId:%d, sync restore finished, stream tasks will be launched by other thread", vgId);
×
581
    streamMetaWUnLock(pMeta);
×
582
    return;
×
583
  }
584

585
  if (vnodeIsRoleLeader(pVnode)) {
×
586
    // start to restore all stream tasks
587
    if (tsDisableStream) {
×
588
      vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId);
×
589
    } else {
590
      vInfo("vgId:%d sync restore finished, start to launch stream task(s)", vgId);
×
591
      if (pMeta->startInfo.startAllTasks == 1) {
×
592
        pMeta->startInfo.restartCount += 1;
×
593
        vDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId,
×
594
               pMeta->startInfo.restartCount);
595
      } else {
596
        pMeta->startInfo.startAllTasks = 1;
×
597
        streamMetaWUnLock(pMeta);
×
598

599
        tqInfo("vgId:%d stream task already loaded, start them", vgId);
×
600
        int32_t code = streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_START_ALL_TASKS, false);
×
601
        if (code != 0) {
×
602
          tqError("vgId:%d failed to sched stream task, code:%s", vgId, tstrerror(code));
×
603
        }
604
        return;
×
605
      }
606
    }
607
  } else {
608
    vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId);
×
609
  }
610

611
  streamMetaWUnLock(pMeta);
×
612
}
613

614
static void vnodeBecomeFollower(const SSyncFSM *pFsm) {
×
615
  SVnode *pVnode = pFsm->data;
×
616
  vInfo("vgId:%d, become follower", pVnode->config.vgId);
×
617

618
  (void)taosThreadMutexLock(&pVnode->lock);
×
619
  if (pVnode->blocked) {
×
620
    pVnode->blocked = false;
×
621
    vDebug("vgId:%d, become follower and post block", pVnode->config.vgId);
×
622
    if (tsem_post(&pVnode->syncSem) != 0) {
×
623
      vError("vgId:%d, failed to post sync semaphore", pVnode->config.vgId);
×
624
    }
625
  }
626
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
627

628
  if (pVnode->pTq) {
×
629
    tqUpdateNodeStage(pVnode->pTq, false);
×
630
    if (tqStopStreamAllTasksAsync(pVnode->pTq->pStreamMeta, &pVnode->msgCb) != 0) {
×
631
      vError("vgId:%d, failed to stop stream tasks", pVnode->config.vgId);
×
632
    }
633
  }
634
}
×
635

636
static void vnodeBecomeLearner(const SSyncFSM *pFsm) {
×
637
  SVnode *pVnode = pFsm->data;
×
638
  vInfo("vgId:%d, become learner", pVnode->config.vgId);
×
639

640
  (void)taosThreadMutexLock(&pVnode->lock);
×
641
  if (pVnode->blocked) {
×
642
    pVnode->blocked = false;
×
643
    vDebug("vgId:%d, become learner and post block", pVnode->config.vgId);
×
644
    if (tsem_post(&pVnode->syncSem) != 0) {
×
645
      vError("vgId:%d, failed to post sync semaphore", pVnode->config.vgId);
×
646
    }
647
  }
648
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
649
}
×
650

651
static void vnodeBecomeLeader(const SSyncFSM *pFsm) {
×
652
  SVnode *pVnode = pFsm->data;
×
653
  vDebug("vgId:%d, become leader", pVnode->config.vgId);
×
654
  if (pVnode->pTq) {
×
655
    tqUpdateNodeStage(pVnode->pTq, true);
×
656
  }
657
}
×
658

659
static void vnodeBecomeAssignedLeader(const SSyncFSM *pFsm) {
×
660
  SVnode *pVnode = pFsm->data;
×
661
  vDebug("vgId:%d, become assigned leader", pVnode->config.vgId);
×
662
  if (pVnode->pTq) {
×
663
    tqUpdateNodeStage(pVnode->pTq, true);
×
664
  }
665
}
×
666

667
static bool vnodeApplyQueueEmpty(const SSyncFSM *pFsm) {
×
668
  SVnode *pVnode = pFsm->data;
×
669

670
  if (pVnode != NULL && pVnode->msgCb.qsizeFp != NULL) {
×
671
    int32_t itemSize = tmsgGetQueueSize(&pVnode->msgCb, pVnode->config.vgId, APPLY_QUEUE);
×
672
    return (itemSize == 0);
×
673
  } else {
674
    return true;
×
675
  }
676
}
677

678
static int32_t vnodeApplyQueueItems(const SSyncFSM *pFsm) {
×
679
  SVnode *pVnode = pFsm->data;
×
680

681
  if (pVnode != NULL && pVnode->msgCb.qsizeFp != NULL) {
×
682
    int32_t itemSize = tmsgGetQueueSize(&pVnode->msgCb, pVnode->config.vgId, APPLY_QUEUE);
×
683
    return itemSize;
×
684
  } else {
685
    return TSDB_CODE_INVALID_PARA;
×
686
  }
687
}
688

689
static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) {
×
690
  SSyncFSM *pFsm = taosMemoryCalloc(1, sizeof(SSyncFSM));
×
691
  if (pFsm == NULL) {
×
692
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
693
    return NULL;
×
694
  }
695
  pFsm->data = pVnode;
×
696
  pFsm->FpCommitCb = vnodeSyncCommitMsg;
×
697
  pFsm->FpAppliedIndexCb = vnodeSyncAppliedIndex;
×
698
  pFsm->FpPreCommitCb = vnodeSyncPreCommitMsg;
×
699
  pFsm->FpRollBackCb = vnodeSyncRollBackMsg;
×
700
  pFsm->FpGetSnapshot = NULL;
×
701
  pFsm->FpGetSnapshotInfo = vnodeSyncGetSnapshotInfo;
×
702
  pFsm->FpRestoreFinishCb = vnodeRestoreFinish;
×
703
  pFsm->FpAfterRestoredCb = NULL;
×
704
  pFsm->FpLeaderTransferCb = NULL;
×
705
  pFsm->FpApplyQueueEmptyCb = vnodeApplyQueueEmpty;
×
706
  pFsm->FpApplyQueueItems = vnodeApplyQueueItems;
×
707
  pFsm->FpBecomeLeaderCb = vnodeBecomeLeader;
×
708
  pFsm->FpBecomeAssignedLeaderCb = vnodeBecomeAssignedLeader;
×
709
  pFsm->FpBecomeFollowerCb = vnodeBecomeFollower;
×
710
  pFsm->FpBecomeLearnerCb = vnodeBecomeLearner;
×
711
  pFsm->FpReConfigCb = NULL;
×
712
  pFsm->FpSnapshotStartRead = vnodeSnapshotStartRead;
×
713
  pFsm->FpSnapshotStopRead = vnodeSnapshotStopRead;
×
714
  pFsm->FpSnapshotDoRead = vnodeSnapshotDoRead;
×
715
  pFsm->FpSnapshotStartWrite = vnodeSnapshotStartWrite;
×
716
  pFsm->FpSnapshotStopWrite = vnodeSnapshotStopWrite;
×
717
  pFsm->FpSnapshotDoWrite = vnodeSnapshotDoWrite;
×
718

719
  return pFsm;
×
720
}
721

722
int32_t vnodeSyncOpen(SVnode *pVnode, char *path, int32_t vnodeVersion) {
×
723
  SSyncInfo syncInfo = {
×
724
      .snapshotStrategy = SYNC_STRATEGY_WAL_FIRST,
725
      .batchSize = 1,
726
      .vgId = pVnode->config.vgId,
×
727
      .syncCfg = pVnode->config.syncCfg,
728
      .pWal = pVnode->pWal,
×
729
      .msgcb = &pVnode->msgCb,
×
730
      .syncSendMSg = vnodeSyncSendMsg,
731
      .syncEqMsg = vnodeSyncEqMsg,
732
      .syncEqCtrlMsg = vnodeSyncEqCtrlMsg,
733
      .pingMs = 5000,
734
      .electMs = 4000,
735
      .heartbeatMs = 700,
736
  };
737

738
  snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", path, TD_DIRSEP);
×
739
  syncInfo.pFsm = vnodeSyncMakeFsm(pVnode);
×
740

741
  SSyncCfg *pCfg = &syncInfo.syncCfg;
×
742
  vInfo("vgId:%d, start to open sync, replica:%d selfIndex:%d", pVnode->config.vgId, pCfg->replicaNum, pCfg->myIndex);
×
743
  for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
×
744
    SNodeInfo *pNode = &pCfg->nodeInfo[i];
×
745
    vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn,
×
746
          pNode->nodePort, pNode->nodeId, pNode->clusterId);
747
  }
748

749
  pVnode->sync = syncOpen(&syncInfo, vnodeVersion);
×
750
  if (pVnode->sync <= 0) {
×
751
    vError("vgId:%d, failed to open sync since %s", pVnode->config.vgId, terrstr());
×
752
    return terrno;
×
753
  }
754

755
  return 0;
×
756
}
757

758
int32_t vnodeSyncStart(SVnode *pVnode) {
×
759
  vInfo("vgId:%d, start sync", pVnode->config.vgId);
×
760
  int32_t code = syncStart(pVnode->sync);
×
761
  if (code) {
×
762
    vError("vgId:%d, failed to start sync subsystem since %s", pVnode->config.vgId, tstrerror(code));
×
763
    return code;
×
764
  }
765
  return 0;
×
766
}
767

768
void vnodeSyncPreClose(SVnode *pVnode) {
×
769
  vInfo("vgId:%d, sync pre close", pVnode->config.vgId);
×
770
  int32_t code = syncLeaderTransfer(pVnode->sync);
×
771
  if (code) {
×
772
    vError("vgId:%d, failed to transfer leader since %s", pVnode->config.vgId, tstrerror(code));
×
773
  }
774
  syncPreStop(pVnode->sync);
×
775

776
  (void)taosThreadMutexLock(&pVnode->lock);
×
777
  if (pVnode->blocked) {
×
778
    vInfo("vgId:%d, post block after close sync", pVnode->config.vgId);
×
779
    pVnode->blocked = false;
×
780
    if (tsem_post(&pVnode->syncSem) != 0) {
×
781
      vError("vgId:%d, failed to post block", pVnode->config.vgId);
×
782
    }
783
  }
784
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
785
}
×
786

787
void vnodeSyncPostClose(SVnode *pVnode) {
×
788
  vInfo("vgId:%d, sync post close", pVnode->config.vgId);
×
789
  syncPostStop(pVnode->sync);
×
790
}
×
791

792
void vnodeSyncClose(SVnode *pVnode) {
×
793
  vInfo("vgId:%d, close sync", pVnode->config.vgId);
×
794
  syncStop(pVnode->sync);
×
795
}
×
796

797
void vnodeSyncCheckTimeout(SVnode *pVnode) {
×
798
  vTrace("vgId:%d, check sync timeout msg", pVnode->config.vgId);
×
799
  (void)taosThreadMutexLock(&pVnode->lock);
×
800
  if (pVnode->blocked) {
×
801
    int32_t curSec = taosGetTimestampSec();
×
802
    int32_t delta = curSec - pVnode->blockSec;
×
803
    if (delta > VNODE_TIMEOUT_SEC) {
×
804
      vError("vgId:%d, failed to propose since timeout and post block, start:%d cur:%d delta:%d seq:%" PRId64,
×
805
             pVnode->config.vgId, pVnode->blockSec, curSec, delta, pVnode->blockSeq);
806
      if (syncSendTimeoutRsp(pVnode->sync, pVnode->blockSeq) != 0) {
×
807
#if 0
808
        SRpcMsg rpcMsg = {.code = TSDB_CODE_SYN_TIMEOUT, .info = pVnode->blockInfo};
809
        vError("send timeout response since its applyed, seq:%" PRId64 " handle:%p ahandle:%p", pVnode->blockSeq,
810
              rpcMsg.info.handle, rpcMsg.info.ahandle);
811
        rpcSendResponse(&rpcMsg);
812
#endif
813
      }
814
      pVnode->blocked = false;
×
815
      pVnode->blockSec = 0;
×
816
      pVnode->blockSeq = 0;
×
817
      if (tsem_post(&pVnode->syncSem) != 0) {
×
818
        vError("vgId:%d, failed to post block", pVnode->config.vgId);
×
819
      }
820
    }
821
  }
822
  (void)taosThreadMutexUnlock(&pVnode->lock);
×
823
}
×
824

825
bool vnodeIsRoleLeader(SVnode *pVnode) {
×
826
  SSyncState state = syncGetState(pVnode->sync);
×
827
  return state.state == TAOS_SYNC_STATE_LEADER;
×
828
}
829

830
bool vnodeIsLeader(SVnode *pVnode) {
×
831
  terrno = 0;
×
832
  SSyncState state = syncGetState(pVnode->sync);
×
833

834
  if (terrno != 0) {
×
835
    vInfo("vgId:%d, vnode is stopping", pVnode->config.vgId);
×
836
    return false;
×
837
  }
838

839
  if (state.state != TAOS_SYNC_STATE_LEADER) {
×
840
    terrno = TSDB_CODE_SYN_NOT_LEADER;
×
841
    vInfo("vgId:%d, vnode not leader, state:%s", pVnode->config.vgId, syncStr(state.state));
×
842
    return false;
×
843
  }
844

845
  if (!state.restored || !pVnode->restored) {
×
846
    terrno = TSDB_CODE_SYN_RESTORING;
×
847
    vInfo("vgId:%d, vnode not restored:%d:%d", pVnode->config.vgId, state.restored, pVnode->restored);
×
848
    return false;
×
849
  }
850

851
  return true;
×
852
}
853

854
int64_t vnodeClusterId(SVnode *pVnode) {
×
855
  SSyncCfg *syncCfg = &pVnode->config.syncCfg;
×
856
  return syncCfg->nodeInfo[syncCfg->myIndex].clusterId;
×
857
}
858

859
int32_t vnodeNodeId(SVnode *pVnode) {
×
860
  SSyncCfg *syncCfg = &pVnode->config.syncCfg;
×
861
  return syncCfg->nodeInfo[syncCfg->myIndex].nodeId;
×
862
}
863

864
int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnap) {
×
865
  int code = 0;
×
866
  pSnap->lastApplyIndex = pVnode->state.committed;
×
867
  pSnap->lastApplyTerm = pVnode->state.commitTerm;
×
868
  pSnap->lastConfigIndex = -1;
×
869
  pSnap->state = SYNC_FSM_STATE_COMPLETE;
×
870

871
  if (tsdbSnapGetFsState(pVnode) != TSDB_FS_STATE_NORMAL) {
×
872
    pSnap->state = SYNC_FSM_STATE_INCOMPLETE;
×
873
  }
874

875
  if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT || pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) {
×
876
    code = tsdbSnapPrepDescription(pVnode, pSnap);
×
877
  }
878
  return code;
×
879
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc