• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3599

08 Feb 2025 11:23AM UTC coverage: 1.77% (-61.6%) from 63.396%
#3599

push

travis-ci

web-flow
Merge pull request #29712 from taosdata/fix/TD-33652-3.0

fix: reduce write rows from 30w to 3w

3776 of 278949 branches covered (1.35%)

Branch coverage included in aggregate %.

6012 of 274147 relevant lines covered (2.19%)

1642.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/source/dnode/mnode/impl/src/mndVgroup.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17
#include "audit.h"
18
#include "mndArbGroup.h"
19
#include "mndDb.h"
20
#include "mndDnode.h"
21
#include "mndMnode.h"
22
#include "mndPrivilege.h"
23
#include "mndShow.h"
24
#include "mndStb.h"
25
#include "mndStream.h"
26
#include "mndTopic.h"
27
#include "mndTrans.h"
28
#include "mndUser.h"
29
#include "mndVgroup.h"
30
#include "tmisce.h"
31

32
#define VGROUP_VER_NUMBER   1
33
#define VGROUP_RESERVE_SIZE 64
34

35
static int32_t mndVgroupActionInsert(SSdb *pSdb, SVgObj *pVgroup);
36
static int32_t mndVgroupActionDelete(SSdb *pSdb, SVgObj *pVgroup);
37
static int32_t mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew);
38
static int32_t mndNewVgActionValidate(SMnode *pMnode, STrans *pTrans, SSdbRaw *pRaw);
39

40
static int32_t mndRetrieveVgroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows);
41
static void    mndCancelGetNextVgroup(SMnode *pMnode, void *pIter);
42
static int32_t mndRetrieveVnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows);
43
static void    mndCancelGetNextVnode(SMnode *pMnode, void *pIter);
44

45
static int32_t mndProcessRedistributeVgroupMsg(SRpcMsg *pReq);
46
static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq);
47
static int32_t mndProcessBalanceVgroupMsg(SRpcMsg *pReq);
48
static int32_t mndProcessVgroupBalanceLeaderMsg(SRpcMsg *pReq);
49

50
int32_t mndInitVgroup(SMnode *pMnode) {
×
51
  SSdbTable table = {
×
52
      .sdbType = SDB_VGROUP,
53
      .keyType = SDB_KEY_INT32,
54
      .encodeFp = (SdbEncodeFp)mndVgroupActionEncode,
55
      .decodeFp = (SdbDecodeFp)mndVgroupActionDecode,
56
      .insertFp = (SdbInsertFp)mndVgroupActionInsert,
57
      .updateFp = (SdbUpdateFp)mndVgroupActionUpdate,
58
      .deleteFp = (SdbDeleteFp)mndVgroupActionDelete,
59
      .validateFp = (SdbValidateFp)mndNewVgActionValidate,
60
  };
61

62
  mndSetMsgHandle(pMnode, TDMT_DND_CREATE_VNODE_RSP, mndTransProcessRsp);
×
63
  mndSetMsgHandle(pMnode, TDMT_VND_ALTER_REPLICA_RSP, mndTransProcessRsp);
×
64
  mndSetMsgHandle(pMnode, TDMT_VND_ALTER_CONFIG_RSP, mndTransProcessRsp);
×
65
  mndSetMsgHandle(pMnode, TDMT_VND_ALTER_CONFIRM_RSP, mndTransProcessRsp);
×
66
  mndSetMsgHandle(pMnode, TDMT_VND_ALTER_HASHRANGE_RSP, mndTransProcessRsp);
×
67
  mndSetMsgHandle(pMnode, TDMT_DND_DROP_VNODE_RSP, mndTransProcessRsp);
×
68
  mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp);
×
69
  mndSetMsgHandle(pMnode, TDMT_VND_DISABLE_WRITE_RSP, mndTransProcessRsp);
×
70
  mndSetMsgHandle(pMnode, TDMT_SYNC_FORCE_FOLLOWER_RSP, mndTransProcessRsp);
×
71
  mndSetMsgHandle(pMnode, TDMT_DND_ALTER_VNODE_TYPE_RSP, mndTransProcessRsp);
×
72
  mndSetMsgHandle(pMnode, TDMT_DND_CHECK_VNODE_LEARNER_CATCHUP_RSP, mndTransProcessRsp);
×
73
  mndSetMsgHandle(pMnode, TDMT_SYNC_CONFIG_CHANGE_RSP, mndTransProcessRsp);
×
74

75
  mndSetMsgHandle(pMnode, TDMT_MND_REDISTRIBUTE_VGROUP, mndProcessRedistributeVgroupMsg);
×
76
  mndSetMsgHandle(pMnode, TDMT_MND_SPLIT_VGROUP, mndProcessSplitVgroupMsg);
×
77
  // mndSetMsgHandle(pMnode, TDMT_MND_BALANCE_VGROUP, mndProcessVgroupBalanceLeaderMsg);
78
  mndSetMsgHandle(pMnode, TDMT_MND_BALANCE_VGROUP, mndProcessBalanceVgroupMsg);
×
79
  mndSetMsgHandle(pMnode, TDMT_MND_BALANCE_VGROUP_LEADER, mndProcessVgroupBalanceLeaderMsg);
×
80

81
  mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_VGROUP, mndRetrieveVgroups);
×
82
  mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_VGROUP, mndCancelGetNextVgroup);
×
83
  mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_VNODES, mndRetrieveVnodes);
×
84
  mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_VNODES, mndCancelGetNextVnode);
×
85

86
  return sdbSetTable(pMnode->pSdb, table);
×
87
}
88

89
void mndCleanupVgroup(SMnode *pMnode) {}
×
90

91
SSdbRaw *mndVgroupActionEncode(SVgObj *pVgroup) {
×
92
  int32_t code = 0;
×
93
  int32_t lino = 0;
×
94
  terrno = TSDB_CODE_OUT_OF_MEMORY;
×
95

96
  SSdbRaw *pRaw = sdbAllocRaw(SDB_VGROUP, VGROUP_VER_NUMBER, sizeof(SVgObj) + VGROUP_RESERVE_SIZE);
×
97
  if (pRaw == NULL) goto _OVER;
×
98

99
  int32_t dataPos = 0;
×
100
  SDB_SET_INT32(pRaw, dataPos, pVgroup->vgId, _OVER)
×
101
  SDB_SET_INT64(pRaw, dataPos, pVgroup->createdTime, _OVER)
×
102
  SDB_SET_INT64(pRaw, dataPos, pVgroup->updateTime, _OVER)
×
103
  SDB_SET_INT32(pRaw, dataPos, pVgroup->version, _OVER)
×
104
  SDB_SET_INT32(pRaw, dataPos, pVgroup->hashBegin, _OVER)
×
105
  SDB_SET_INT32(pRaw, dataPos, pVgroup->hashEnd, _OVER)
×
106
  SDB_SET_BINARY(pRaw, dataPos, pVgroup->dbName, TSDB_DB_FNAME_LEN, _OVER)
×
107
  SDB_SET_INT64(pRaw, dataPos, pVgroup->dbUid, _OVER)
×
108
  SDB_SET_INT8(pRaw, dataPos, pVgroup->isTsma, _OVER)
×
109
  SDB_SET_INT8(pRaw, dataPos, pVgroup->replica, _OVER)
×
110
  for (int8_t i = 0; i < pVgroup->replica; ++i) {
×
111
    SVnodeGid *pVgid = &pVgroup->vnodeGid[i];
×
112
    SDB_SET_INT32(pRaw, dataPos, pVgid->dnodeId, _OVER)
×
113
  }
114
  SDB_SET_INT32(pRaw, dataPos, pVgroup->syncConfChangeVer, _OVER)
×
115
  SDB_SET_RESERVE(pRaw, dataPos, VGROUP_RESERVE_SIZE, _OVER)
×
116
  SDB_SET_DATALEN(pRaw, dataPos, _OVER)
×
117

118
  terrno = 0;
×
119

120
_OVER:
×
121
  if (terrno != 0) {
×
122
    mError("vgId:%d, failed to encode to raw:%p since %s", pVgroup->vgId, pRaw, terrstr());
×
123
    sdbFreeRaw(pRaw);
×
124
    return NULL;
×
125
  }
126

127
  mTrace("vgId:%d, encode to raw:%p, row:%p", pVgroup->vgId, pRaw, pVgroup);
×
128
  return pRaw;
×
129
}
130

131
SSdbRow *mndVgroupActionDecode(SSdbRaw *pRaw) {
×
132
  int32_t code = 0;
×
133
  int32_t lino = 0;
×
134
  terrno = TSDB_CODE_OUT_OF_MEMORY;
×
135
  SSdbRow *pRow = NULL;
×
136
  SVgObj  *pVgroup = NULL;
×
137

138
  int8_t sver = 0;
×
139
  if (sdbGetRawSoftVer(pRaw, &sver) != 0) goto _OVER;
×
140

141
  if (sver < 1 || sver > VGROUP_VER_NUMBER) {
×
142
    terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
×
143
    goto _OVER;
×
144
  }
145

146
  pRow = sdbAllocRow(sizeof(SVgObj));
×
147
  if (pRow == NULL) goto _OVER;
×
148

149
  pVgroup = sdbGetRowObj(pRow);
×
150
  if (pVgroup == NULL) goto _OVER;
×
151

152
  int32_t dataPos = 0;
×
153
  SDB_GET_INT32(pRaw, dataPos, &pVgroup->vgId, _OVER)
×
154
  SDB_GET_INT64(pRaw, dataPos, &pVgroup->createdTime, _OVER)
×
155
  SDB_GET_INT64(pRaw, dataPos, &pVgroup->updateTime, _OVER)
×
156
  SDB_GET_INT32(pRaw, dataPos, &pVgroup->version, _OVER)
×
157
  SDB_GET_INT32(pRaw, dataPos, &pVgroup->hashBegin, _OVER)
×
158
  SDB_GET_INT32(pRaw, dataPos, &pVgroup->hashEnd, _OVER)
×
159
  SDB_GET_BINARY(pRaw, dataPos, pVgroup->dbName, TSDB_DB_FNAME_LEN, _OVER)
×
160
  SDB_GET_INT64(pRaw, dataPos, &pVgroup->dbUid, _OVER)
×
161
  SDB_GET_INT8(pRaw, dataPos, &pVgroup->isTsma, _OVER)
×
162
  SDB_GET_INT8(pRaw, dataPos, &pVgroup->replica, _OVER)
×
163
  for (int8_t i = 0; i < pVgroup->replica; ++i) {
×
164
    SVnodeGid *pVgid = &pVgroup->vnodeGid[i];
×
165
    SDB_GET_INT32(pRaw, dataPos, &pVgid->dnodeId, _OVER)
×
166
    if (pVgroup->replica == 1) {
×
167
      pVgid->syncState = TAOS_SYNC_STATE_LEADER;
×
168
    }
169
  }
170
  if (dataPos + sizeof(int32_t) + VGROUP_RESERVE_SIZE <= pRaw->dataLen) {
×
171
    SDB_GET_INT32(pRaw, dataPos, &pVgroup->syncConfChangeVer, _OVER)
×
172
  }
173

174
  SDB_GET_RESERVE(pRaw, dataPos, VGROUP_RESERVE_SIZE, _OVER)
×
175

176
  terrno = 0;
×
177

178
_OVER:
×
179
  if (terrno != 0) {
×
180
    mError("vgId:%d, failed to decode from raw:%p since %s", pVgroup == NULL ? 0 : pVgroup->vgId, pRaw, terrstr());
×
181
    taosMemoryFreeClear(pRow);
×
182
    return NULL;
×
183
  }
184

185
  mTrace("vgId:%d, decode from raw:%p, row:%p", pVgroup->vgId, pRaw, pVgroup);
×
186
  return pRow;
×
187
}
188

189
static int32_t mndNewVgActionValidate(SMnode *pMnode, STrans *pTrans, SSdbRaw *pRaw) {
×
190
  SSdb    *pSdb = pMnode->pSdb;
×
191
  SSdbRow *pRow = NULL;
×
192
  SVgObj  *pVgroup = NULL;
×
193
  int      code = -1;
×
194

195
  pRow = mndVgroupActionDecode(pRaw);
×
196
  if (pRow == NULL) {
×
197
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
198
    if (terrno != 0) code = terrno;
×
199
    goto _OVER;
×
200
  }
201
  pVgroup = sdbGetRowObj(pRow);
×
202
  if (pVgroup == NULL) {
×
203
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
204
    if (terrno != 0) code = terrno;
×
205
    goto _OVER;
×
206
  }
207

208
  int32_t maxVgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP);
×
209
  if (maxVgId > pVgroup->vgId) {
×
210
    mError("trans:%d, vgroup id %d already in use. maxVgId:%d", pTrans->id, pVgroup->vgId, maxVgId);
×
211
    goto _OVER;
×
212
  }
213

214
  code = 0;
×
215
_OVER:
×
216
  if (pVgroup) mndVgroupActionDelete(pSdb, pVgroup);
×
217
  taosMemoryFreeClear(pRow);
×
218
  TAOS_RETURN(code);
×
219
}
220

221
static int32_t mndVgroupActionInsert(SSdb *pSdb, SVgObj *pVgroup) {
×
222
  mTrace("vgId:%d, perform insert action, row:%p", pVgroup->vgId, pVgroup);
×
223
  return 0;
×
224
}
225

226
static int32_t mndVgroupActionDelete(SSdb *pSdb, SVgObj *pVgroup) {
×
227
  mTrace("vgId:%d, perform delete action, row:%p", pVgroup->vgId, pVgroup);
×
228
  return 0;
×
229
}
230

231
static int32_t mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew) {
×
232
  mTrace("vgId:%d, perform update action, old row:%p new row:%p", pOld->vgId, pOld, pNew);
×
233
  pOld->updateTime = pNew->updateTime;
×
234
  pOld->version = pNew->version;
×
235
  pOld->hashBegin = pNew->hashBegin;
×
236
  pOld->hashEnd = pNew->hashEnd;
×
237
  pOld->replica = pNew->replica;
×
238
  pOld->isTsma = pNew->isTsma;
×
239
  for (int32_t i = 0; i < pNew->replica; ++i) {
×
240
    SVnodeGid *pNewGid = &pNew->vnodeGid[i];
×
241
    for (int32_t j = 0; j < pOld->replica; ++j) {
×
242
      SVnodeGid *pOldGid = &pOld->vnodeGid[j];
×
243
      if (pNewGid->dnodeId == pOldGid->dnodeId) {
×
244
        pNewGid->syncState = pOldGid->syncState;
×
245
        pNewGid->syncRestore = pOldGid->syncRestore;
×
246
        pNewGid->syncCanRead = pOldGid->syncCanRead;
×
247
      }
248
    }
249
  }
250
  pNew->numOfTables = pOld->numOfTables;
×
251
  pNew->numOfTimeSeries = pOld->numOfTimeSeries;
×
252
  pNew->totalStorage = pOld->totalStorage;
×
253
  pNew->compStorage = pOld->compStorage;
×
254
  pNew->pointsWritten = pOld->pointsWritten;
×
255
  pNew->compact = pOld->compact;
×
256
  memcpy(pOld->vnodeGid, pNew->vnodeGid, (TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA) * sizeof(SVnodeGid));
×
257
  pOld->syncConfChangeVer = pNew->syncConfChangeVer;
×
258
  return 0;
×
259
}
260

261
SVgObj *mndAcquireVgroup(SMnode *pMnode, int32_t vgId) {
×
262
  SSdb   *pSdb = pMnode->pSdb;
×
263
  SVgObj *pVgroup = sdbAcquire(pSdb, SDB_VGROUP, &vgId);
×
264
  if (pVgroup == NULL && terrno == TSDB_CODE_SDB_OBJ_NOT_THERE) {
×
265
    terrno = TSDB_CODE_MND_VGROUP_NOT_EXIST;
×
266
  }
267
  return pVgroup;
×
268
}
269

270
void mndReleaseVgroup(SMnode *pMnode, SVgObj *pVgroup) {
×
271
  SSdb *pSdb = pMnode->pSdb;
×
272
  sdbRelease(pSdb, pVgroup);
×
273
}
×
274

275
void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen) {
×
276
  SCreateVnodeReq createReq = {0};
×
277
  createReq.vgId = pVgroup->vgId;
×
278
  memcpy(createReq.db, pDb->name, TSDB_DB_FNAME_LEN);
×
279
  createReq.dbUid = pDb->uid;
×
280
  createReq.vgVersion = pVgroup->version;
×
281
  createReq.numOfStables = pDb->cfg.numOfStables;
×
282
  createReq.buffer = pDb->cfg.buffer;
×
283
  createReq.pageSize = pDb->cfg.pageSize;
×
284
  createReq.pages = pDb->cfg.pages;
×
285
  createReq.cacheLastSize = pDb->cfg.cacheLastSize;
×
286
  createReq.daysPerFile = pDb->cfg.daysPerFile;
×
287
  createReq.daysToKeep0 = pDb->cfg.daysToKeep0;
×
288
  createReq.daysToKeep1 = pDb->cfg.daysToKeep1;
×
289
  createReq.daysToKeep2 = pDb->cfg.daysToKeep2;
×
290
  createReq.keepTimeOffset = pDb->cfg.keepTimeOffset;
×
291
  createReq.s3ChunkSize = pDb->cfg.s3ChunkSize;
×
292
  createReq.s3KeepLocal = pDb->cfg.s3KeepLocal;
×
293
  createReq.s3Compact = pDb->cfg.s3Compact;
×
294
  createReq.minRows = pDb->cfg.minRows;
×
295
  createReq.maxRows = pDb->cfg.maxRows;
×
296
  createReq.walFsyncPeriod = pDb->cfg.walFsyncPeriod;
×
297
  createReq.walLevel = pDb->cfg.walLevel;
×
298
  createReq.precision = pDb->cfg.precision;
×
299
  createReq.compression = pDb->cfg.compression;
×
300
  createReq.strict = pDb->cfg.strict;
×
301
  createReq.cacheLast = pDb->cfg.cacheLast;
×
302
  createReq.replica = 0;
×
303
  createReq.learnerReplica = 0;
×
304
  createReq.selfIndex = -1;
×
305
  createReq.learnerSelfIndex = -1;
×
306
  createReq.hashBegin = pVgroup->hashBegin;
×
307
  createReq.hashEnd = pVgroup->hashEnd;
×
308
  createReq.hashMethod = pDb->cfg.hashMethod;
×
309
  createReq.numOfRetensions = pDb->cfg.numOfRetensions;
×
310
  createReq.pRetensions = pDb->cfg.pRetensions;
×
311
  createReq.isTsma = pVgroup->isTsma;
×
312
  createReq.pTsma = pVgroup->pTsma;
×
313
  createReq.walRetentionPeriod = pDb->cfg.walRetentionPeriod;
×
314
  createReq.walRetentionSize = pDb->cfg.walRetentionSize;
×
315
  createReq.walRollPeriod = pDb->cfg.walRollPeriod;
×
316
  createReq.walSegmentSize = pDb->cfg.walSegmentSize;
×
317
  createReq.sstTrigger = pDb->cfg.sstTrigger;
×
318
  createReq.hashPrefix = pDb->cfg.hashPrefix;
×
319
  createReq.hashSuffix = pDb->cfg.hashSuffix;
×
320
  createReq.tsdbPageSize = pDb->cfg.tsdbPageSize;
×
321
  createReq.changeVersion = ++(pVgroup->syncConfChangeVer);
×
322
  createReq.encryptAlgorithm = pDb->cfg.encryptAlgorithm;
×
323
  int32_t code = 0;
×
324

325
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
×
326
    SReplica *pReplica = NULL;
×
327

328
    if (pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER) {
×
329
      pReplica = &createReq.replicas[createReq.replica];
×
330
    } else {
331
      pReplica = &createReq.learnerReplicas[createReq.learnerReplica];
×
332
    }
333

334
    SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
×
335
    SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
×
336
    if (pVgidDnode == NULL) {
×
337
      return NULL;
×
338
    }
339

340
    pReplica->id = pVgidDnode->id;
×
341
    pReplica->port = pVgidDnode->port;
×
342
    memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
×
343
    mndReleaseDnode(pMnode, pVgidDnode);
×
344

345
    if (pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER) {
×
346
      if (pDnode->id == pVgid->dnodeId) {
×
347
        createReq.selfIndex = createReq.replica;
×
348
      }
349
    } else {
350
      if (pDnode->id == pVgid->dnodeId) {
×
351
        createReq.learnerSelfIndex = createReq.learnerReplica;
×
352
      }
353
    }
354

355
    if (pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER) {
×
356
      createReq.replica++;
×
357
    } else {
358
      createReq.learnerReplica++;
×
359
    }
360
  }
361

362
  if (createReq.selfIndex == -1 && createReq.learnerSelfIndex == -1) {
×
363
    terrno = TSDB_CODE_APP_ERROR;
×
364
    return NULL;
×
365
  }
366

367
  createReq.changeVersion = pVgroup->syncConfChangeVer;
×
368

369
  mInfo(
×
370
      "vgId:%d, build create vnode req, replica:%d selfIndex:%d learnerReplica:%d learnerSelfIndex:%d strict:%d "
371
      "changeVersion:%d",
372
      createReq.vgId, createReq.replica, createReq.selfIndex, createReq.learnerReplica, createReq.learnerSelfIndex,
373
      createReq.strict, createReq.changeVersion);
374
  for (int32_t i = 0; i < createReq.replica; ++i) {
×
375
    mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.replicas[i].fqdn, createReq.replicas[i].port);
×
376
  }
377
  for (int32_t i = 0; i < createReq.learnerReplica; ++i) {
×
378
    mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.learnerReplicas[i].fqdn,
×
379
          createReq.learnerReplicas[i].port);
380
  }
381

382
  int32_t contLen = tSerializeSCreateVnodeReq(NULL, 0, &createReq);
×
383
  if (contLen < 0) {
×
384
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
385
    return NULL;
×
386
  }
387

388
  void *pReq = taosMemoryMalloc(contLen);
×
389
  if (pReq == NULL) {
×
390
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
391
    return NULL;
×
392
  }
393

394
  code = tSerializeSCreateVnodeReq(pReq, contLen, &createReq);
×
395
  if (code < 0) {
×
396
    terrno = TSDB_CODE_APP_ERROR;
×
397
    taosMemoryFree(pReq);
×
398
    mError("vgId:%d, failed to serialize create vnode req,since %s", createReq.vgId, terrstr());
×
399
    return NULL;
×
400
  }
401
  *pContLen = contLen;
×
402
  return pReq;
×
403
}
404

405
static void *mndBuildAlterVnodeConfigReq(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen) {
×
406
  SAlterVnodeConfigReq alterReq = {0};
×
407
  alterReq.vgVersion = pVgroup->version;
×
408
  alterReq.buffer = pDb->cfg.buffer;
×
409
  alterReq.pageSize = pDb->cfg.pageSize;
×
410
  alterReq.pages = pDb->cfg.pages;
×
411
  alterReq.cacheLastSize = pDb->cfg.cacheLastSize;
×
412
  alterReq.daysPerFile = pDb->cfg.daysPerFile;
×
413
  alterReq.daysToKeep0 = pDb->cfg.daysToKeep0;
×
414
  alterReq.daysToKeep1 = pDb->cfg.daysToKeep1;
×
415
  alterReq.daysToKeep2 = pDb->cfg.daysToKeep2;
×
416
  alterReq.keepTimeOffset = pDb->cfg.keepTimeOffset;
×
417
  alterReq.walFsyncPeriod = pDb->cfg.walFsyncPeriod;
×
418
  alterReq.walLevel = pDb->cfg.walLevel;
×
419
  alterReq.strict = pDb->cfg.strict;
×
420
  alterReq.cacheLast = pDb->cfg.cacheLast;
×
421
  alterReq.sttTrigger = pDb->cfg.sstTrigger;
×
422
  alterReq.minRows = pDb->cfg.minRows;
×
423
  alterReq.walRetentionPeriod = pDb->cfg.walRetentionPeriod;
×
424
  alterReq.walRetentionSize = pDb->cfg.walRetentionSize;
×
425
  alterReq.s3KeepLocal = pDb->cfg.s3KeepLocal;
×
426
  alterReq.s3Compact = pDb->cfg.s3Compact;
×
427

428
  mInfo("vgId:%d, build alter vnode config req", pVgroup->vgId);
×
429
  int32_t contLen = tSerializeSAlterVnodeConfigReq(NULL, 0, &alterReq);
×
430
  if (contLen < 0) {
×
431
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
432
    return NULL;
×
433
  }
434
  contLen += sizeof(SMsgHead);
×
435

436
  void *pReq = taosMemoryMalloc(contLen);
×
437
  if (pReq == NULL) {
×
438
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
439
    return NULL;
×
440
  }
441

442
  SMsgHead *pHead = pReq;
×
443
  pHead->contLen = htonl(contLen);
×
444
  pHead->vgId = htonl(pVgroup->vgId);
×
445

446
  if (tSerializeSAlterVnodeConfigReq((char *)pReq + sizeof(SMsgHead), contLen, &alterReq) < 0) {
×
447
    taosMemoryFree(pReq);
×
448
    mError("vgId:%d, failed to serialize alter vnode config req,since %s", pVgroup->vgId, terrstr());
×
449
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
450
    return NULL;
×
451
  }
452
  *pContLen = contLen;
×
453
  return pReq;
×
454
}
455

456
static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId,
×
457
                                          int32_t *pContLen) {
458
  SAlterVnodeReplicaReq alterReq = {
×
459
      .vgId = pVgroup->vgId,
×
460
      .strict = pDb->cfg.strict,
×
461
      .replica = 0,
462
      .learnerReplica = 0,
463
      .selfIndex = -1,
464
      .learnerSelfIndex = -1,
465
      .changeVersion = ++(pVgroup->syncConfChangeVer),
×
466
  };
467

468
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
×
469
    SReplica *pReplica = NULL;
×
470

471
    if (pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER) {
×
472
      pReplica = &alterReq.replicas[alterReq.replica];
×
473
      alterReq.replica++;
×
474
    } else {
475
      pReplica = &alterReq.learnerReplicas[alterReq.learnerReplica];
×
476
      alterReq.learnerReplica++;
×
477
    }
478

479
    SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
×
480
    SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
×
481
    if (pVgidDnode == NULL) return NULL;
×
482

483
    pReplica->id = pVgidDnode->id;
×
484
    pReplica->port = pVgidDnode->port;
×
485
    memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
×
486
    mndReleaseDnode(pMnode, pVgidDnode);
×
487

488
    if (pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER) {
×
489
      if (dnodeId == pVgid->dnodeId) {
×
490
        alterReq.selfIndex = v;
×
491
      }
492
    } else {
493
      if (dnodeId == pVgid->dnodeId) {
×
494
        alterReq.learnerSelfIndex = v;
×
495
      }
496
    }
497
  }
498

499
  mInfo(
×
500
      "vgId:%d, build alter vnode req, replica:%d selfIndex:%d learnerReplica:%d learnerSelfIndex:%d strict:%d "
501
      "changeVersion:%d",
502
      alterReq.vgId, alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica, alterReq.learnerSelfIndex,
503
      alterReq.strict, alterReq.changeVersion);
504
  for (int32_t i = 0; i < alterReq.replica; ++i) {
×
505
    mInfo("vgId:%d, replica:%d ep:%s:%u", alterReq.vgId, i, alterReq.replicas[i].fqdn, alterReq.replicas[i].port);
×
506
  }
507
  for (int32_t i = 0; i < alterReq.learnerReplica; ++i) {
×
508
    mInfo("vgId:%d, learnerReplica:%d ep:%s:%u", alterReq.vgId, i, alterReq.learnerReplicas[i].fqdn,
×
509
          alterReq.learnerReplicas[i].port);
510
  }
511

512
  if (alterReq.selfIndex == -1 && alterReq.learnerSelfIndex == -1) {
×
513
    terrno = TSDB_CODE_APP_ERROR;
×
514
    return NULL;
×
515
  }
516

517
  int32_t contLen = tSerializeSAlterVnodeReplicaReq(NULL, 0, &alterReq);
×
518
  if (contLen < 0) {
×
519
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
520
    return NULL;
×
521
  }
522

523
  void *pReq = taosMemoryMalloc(contLen);
×
524
  if (pReq == NULL) {
×
525
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
526
    return NULL;
×
527
  }
528

529
  if (tSerializeSAlterVnodeReplicaReq(pReq, contLen, &alterReq) < 0) {
×
530
    mError("vgId:%d, failed to serialize alter vnode req,since %s", alterReq.vgId, terrstr());
×
531
    taosMemoryFree(pReq);
×
532
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
533
    return NULL;
×
534
  }
535
  *pContLen = contLen;
×
536
  return pReq;
×
537
}
538

539
static void *mndBuildCheckLearnCatchupReq(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId,
×
540
                                          int32_t *pContLen) {
541
  SCheckLearnCatchupReq req = {
×
542
      .vgId = pVgroup->vgId,
×
543
      .strict = pDb->cfg.strict,
×
544
      .replica = 0,
545
      .learnerReplica = 0,
546
      .selfIndex = -1,
547
      .learnerSelfIndex = -1,
548
  };
549

550
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
×
551
    SReplica *pReplica = NULL;
×
552

553
    if (pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER) {
×
554
      pReplica = &req.replicas[req.replica];
×
555
      req.replica++;
×
556
    } else {
557
      pReplica = &req.learnerReplicas[req.learnerReplica];
×
558
      req.learnerReplica++;
×
559
    }
560

561
    SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
×
562
    SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
×
563
    if (pVgidDnode == NULL) return NULL;
×
564

565
    pReplica->id = pVgidDnode->id;
×
566
    pReplica->port = pVgidDnode->port;
×
567
    memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
×
568
    mndReleaseDnode(pMnode, pVgidDnode);
×
569

570
    if (pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER) {
×
571
      if (dnodeId == pVgid->dnodeId) {
×
572
        req.selfIndex = v;
×
573
      }
574
    } else {
575
      if (dnodeId == pVgid->dnodeId) {
×
576
        req.learnerSelfIndex = v;
×
577
      }
578
    }
579
  }
580

581
  mInfo("vgId:%d, build alter vnode req, replica:%d selfIndex:%d learnerReplica:%d learnerSelfIndex:%d strict:%d",
×
582
        req.vgId, req.replica, req.selfIndex, req.learnerReplica, req.learnerSelfIndex, req.strict);
583
  for (int32_t i = 0; i < req.replica; ++i) {
×
584
    mInfo("vgId:%d, replica:%d ep:%s:%u", req.vgId, i, req.replicas[i].fqdn, req.replicas[i].port);
×
585
  }
586
  for (int32_t i = 0; i < req.learnerReplica; ++i) {
×
587
    mInfo("vgId:%d, learnerReplica:%d ep:%s:%u", req.vgId, i, req.learnerReplicas[i].fqdn, req.learnerReplicas[i].port);
×
588
  }
589

590
  if (req.selfIndex == -1 && req.learnerSelfIndex == -1) {
×
591
    terrno = TSDB_CODE_APP_ERROR;
×
592
    return NULL;
×
593
  }
594

595
  int32_t contLen = tSerializeSAlterVnodeReplicaReq(NULL, 0, &req);
×
596
  if (contLen < 0) {
×
597
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
598
    return NULL;
×
599
  }
600

601
  void *pReq = taosMemoryMalloc(contLen);
×
602
  if (pReq == NULL) {
×
603
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
604
    return NULL;
×
605
  }
606

607
  if (tSerializeSAlterVnodeReplicaReq(pReq, contLen, &req) < 0) {
×
608
    mError("vgId:%d, failed to serialize alter vnode req,since %s", req.vgId, terrstr());
×
609
    taosMemoryFree(pReq);
×
610
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
611
    return NULL;
×
612
  }
613
  *pContLen = contLen;
×
614
  return pReq;
×
615
}
616

617
static void *mndBuildDisableVnodeWriteReq(SMnode *pMnode, SDbObj *pDb, int32_t vgId, int32_t *pContLen) {
×
618
  SDisableVnodeWriteReq disableReq = {
×
619
      .vgId = vgId,
620
      .disable = 1,
621
  };
622

623
  mInfo("vgId:%d, build disable vnode write req", vgId);
×
624
  int32_t contLen = tSerializeSDisableVnodeWriteReq(NULL, 0, &disableReq);
×
625
  if (contLen < 0) {
×
626
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
627
    return NULL;
×
628
  }
629

630
  void *pReq = taosMemoryMalloc(contLen);
×
631
  if (pReq == NULL) {
×
632
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
633
    return NULL;
×
634
  }
635

636
  if (tSerializeSDisableVnodeWriteReq(pReq, contLen, &disableReq) < 0) {
×
637
    mError("vgId:%d, failed to serialize disable vnode write req,since %s", vgId, terrstr());
×
638
    taosMemoryFree(pReq);
×
639
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
640
    return NULL;
×
641
  }
642
  *pContLen = contLen;
×
643
  return pReq;
×
644
}
645

646
static void *mndBuildAlterVnodeHashRangeReq(SMnode *pMnode, int32_t srcVgId, SVgObj *pVgroup, int32_t *pContLen) {
×
647
  SAlterVnodeHashRangeReq alterReq = {
×
648
      .srcVgId = srcVgId,
649
      .dstVgId = pVgroup->vgId,
×
650
      .hashBegin = pVgroup->hashBegin,
×
651
      .hashEnd = pVgroup->hashEnd,
×
652
      .changeVersion = ++(pVgroup->syncConfChangeVer),
×
653
  };
654

655
  mInfo("vgId:%d, build alter vnode hashrange req, dstVgId:%d, hashrange:[%u, %u]", srcVgId, pVgroup->vgId,
×
656
        pVgroup->hashBegin, pVgroup->hashEnd);
657
  int32_t contLen = tSerializeSAlterVnodeHashRangeReq(NULL, 0, &alterReq);
×
658
  if (contLen < 0) {
×
659
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
660
    return NULL;
×
661
  }
662

663
  void *pReq = taosMemoryMalloc(contLen);
×
664
  if (pReq == NULL) {
×
665
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
666
    return NULL;
×
667
  }
668

669
  if (tSerializeSAlterVnodeHashRangeReq(pReq, contLen, &alterReq) < 0) {
×
670
    mError("vgId:%d, failed to serialize alter vnode hashrange req,since %s", srcVgId, terrstr());
×
671
    taosMemoryFree(pReq);
×
672
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
673
    return NULL;
×
674
  }
675
  *pContLen = contLen;
×
676
  return pReq;
×
677
}
678

679
void *mndBuildDropVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen) {
×
680
  SDropVnodeReq dropReq = {0};
×
681
  dropReq.dnodeId = pDnode->id;
×
682
  dropReq.vgId = pVgroup->vgId;
×
683
  memcpy(dropReq.db, pDb->name, TSDB_DB_FNAME_LEN);
×
684
  dropReq.dbUid = pDb->uid;
×
685

686
  mInfo("vgId:%d, build drop vnode req", dropReq.vgId);
×
687
  int32_t contLen = tSerializeSDropVnodeReq(NULL, 0, &dropReq);
×
688
  if (contLen < 0) {
×
689
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
690
    return NULL;
×
691
  }
692

693
  void *pReq = taosMemoryMalloc(contLen);
×
694
  if (pReq == NULL) {
×
695
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
696
    return NULL;
×
697
  }
698

699
  if (tSerializeSDropVnodeReq(pReq, contLen, &dropReq) < 0) {
×
700
    mError("vgId:%d, failed to serialize drop vnode req,since %s", dropReq.vgId, terrstr());
×
701
    taosMemoryFree(pReq);
×
702
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
703
    return NULL;
×
704
  }
705
  *pContLen = contLen;
×
706
  return pReq;
×
707
}
708

709
static bool mndResetDnodesArrayFp(SMnode *pMnode, void *pObj, void *p1, void *p2, void *p3) {
×
710
  SDnodeObj *pDnode = pObj;
×
711
  pDnode->numOfVnodes = 0;
×
712
  pDnode->numOfOtherNodes = 0;
×
713
  return true;
×
714
}
715

716
static bool mndBuildDnodesArrayFp(SMnode *pMnode, void *pObj, void *p1, void *p2, void *p3) {
×
717
  SDnodeObj *pDnode = pObj;
×
718
  SArray    *pArray = p1;
×
719
  int32_t    exceptDnodeId = *(int32_t *)p2;
×
720
  SArray    *dnodeList = p3;
×
721

722
  if (exceptDnodeId == pDnode->id) {
×
723
    return true;
×
724
  }
725

726
  if (dnodeList != NULL) {
×
727
    int32_t dnodeListSize = taosArrayGetSize(dnodeList);
×
728
    if (dnodeListSize > 0) {
×
729
      bool inDnodeList = false;
×
730
      for (int32_t index = 0; index < dnodeListSize; ++index) {
×
731
        int32_t dnodeId = *(int32_t *)taosArrayGet(dnodeList, index);
×
732
        if (pDnode->id == dnodeId) {
×
733
          inDnodeList = true;
×
734
        }
735
      }
736
      if (!inDnodeList) {
×
737
        return true;
×
738
      }
739
    }
740
  }
741

742
  int64_t curMs = taosGetTimestampMs();
×
743
  bool    online = mndIsDnodeOnline(pDnode, curMs);
×
744
  bool    isMnode = mndIsMnode(pMnode, pDnode->id);
×
745
  pDnode->numOfVnodes = mndGetVnodesNum(pMnode, pDnode->id);
×
746
  pDnode->memUsed = mndGetVnodesMemory(pMnode, pDnode->id);
×
747

748
  mInfo("dnode:%d, vnodes:%d supportVnodes:%d isMnode:%d online:%d memory avail:%" PRId64 " used:%" PRId64, pDnode->id,
×
749
        pDnode->numOfVnodes, pDnode->numOfSupportVnodes, isMnode, online, pDnode->memAvail, pDnode->memUsed);
750

751
  if (isMnode) {
×
752
    pDnode->numOfOtherNodes++;
×
753
  }
754

755
  if (online && pDnode->numOfSupportVnodes > 0) {
×
756
    if (taosArrayPush(pArray, pDnode) == NULL) return false;
×
757
  }
758
  return true;
×
759
}
760

761
SArray *mndBuildDnodesArray(SMnode *pMnode, int32_t exceptDnodeId, SArray *dnodeList) {
×
762
  SSdb   *pSdb = pMnode->pSdb;
×
763
  int32_t numOfDnodes = mndGetDnodeSize(pMnode);
×
764

765
  SArray *pArray = taosArrayInit(numOfDnodes, sizeof(SDnodeObj));
×
766
  if (pArray == NULL) {
×
767
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
768
    return NULL;
×
769
  }
770

771
  sdbTraverse(pSdb, SDB_DNODE, mndResetDnodesArrayFp, NULL, NULL, NULL);
×
772
  sdbTraverse(pSdb, SDB_DNODE, mndBuildDnodesArrayFp, pArray, &exceptDnodeId, dnodeList);
×
773

774
  mDebug("build %d dnodes array", (int32_t)taosArrayGetSize(pArray));
×
775
  for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pArray); ++i) {
×
776
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
×
777
    mDebug("dnode:%d, vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes);
×
778
  }
779
  return pArray;
×
780
}
781

782
static int32_t mndCompareDnodeId(int32_t *dnode1Id, int32_t *dnode2Id) {
×
783
  if (*dnode1Id == *dnode2Id) {
×
784
    return 0;
×
785
  }
786
  return *dnode1Id > *dnode2Id ? 1 : -1;
×
787
}
788

789
static float mndGetDnodeScore(SDnodeObj *pDnode, int32_t additionDnodes, float ratio) {
×
790
  float totalDnodes = pDnode->numOfVnodes + (float)pDnode->numOfOtherNodes * ratio + additionDnodes;
×
791
  return totalDnodes / pDnode->numOfSupportVnodes;
×
792
}
793

794
static int32_t mndCompareDnodeVnodes(SDnodeObj *pDnode1, SDnodeObj *pDnode2) {
×
795
  float d1Score = mndGetDnodeScore(pDnode1, 0, 0.9);
×
796
  float d2Score = mndGetDnodeScore(pDnode2, 0, 0.9);
×
797
  if (d1Score == d2Score) {
×
798
    return 0;
×
799
  }
800
  return d1Score > d2Score ? 1 : -1;
×
801
}
802

803
void mndSortVnodeGid(SVgObj *pVgroup) {
×
804
  for (int32_t i = 0; i < pVgroup->replica; ++i) {
×
805
    for (int32_t j = 0; j < pVgroup->replica - 1 - i; ++j) {
×
806
      if (pVgroup->vnodeGid[j].dnodeId > pVgroup->vnodeGid[j + 1].dnodeId) {
×
807
        TSWAP(pVgroup->vnodeGid[j], pVgroup->vnodeGid[j + 1]);
×
808
      }
809
    }
810
  }
811
}
×
812

813
static int32_t mndGetAvailableDnode(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup, SArray *pArray) {
×
814
  mDebug("start to sort %d dnodes", (int32_t)taosArrayGetSize(pArray));
×
815
  taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes);
×
816
  for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pArray); ++i) {
×
817
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
×
818
    mDebug("dnode:%d, score:%f", pDnode->id, mndGetDnodeScore(pDnode, 0, 0.9));
×
819
  }
820

821
  int32_t size = taosArrayGetSize(pArray);
×
822
  if (size < pVgroup->replica) {
×
823
    mError("db:%s, vgId:%d, no enough online dnodes:%d to alloc %d replica", pVgroup->dbName, pVgroup->vgId, size,
×
824
           pVgroup->replica);
825
    TAOS_RETURN(TSDB_CODE_MND_NO_ENOUGH_DNODES);
×
826
  }
827

828
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
×
829
    SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
×
830
    SDnodeObj *pDnode = taosArrayGet(pArray, v);
×
831
    if (pDnode == NULL) {
×
832
      TAOS_RETURN(TSDB_CODE_MND_NO_ENOUGH_DNODES);
×
833
    }
834
    if (pDnode->numOfVnodes >= pDnode->numOfSupportVnodes) {
×
835
      TAOS_RETURN(TSDB_CODE_MND_NO_ENOUGH_VNODES);
×
836
    }
837

838
    int64_t vgMem = mndGetVgroupMemory(pMnode, pDb, pVgroup);
×
839
    if (pDnode->memAvail - vgMem - pDnode->memUsed <= 0) {
×
840
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d, avail:%" PRId64 " used:%" PRId64,
×
841
             pVgroup->dbName, pVgroup->vgId, vgMem, pDnode->id, pDnode->memAvail, pDnode->memUsed);
842
      TAOS_RETURN(TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE);
×
843
    } else {
844
      pDnode->memUsed += vgMem;
×
845
    }
846

847
    pVgid->dnodeId = pDnode->id;
×
848
    if (pVgroup->replica == 1) {
×
849
      pVgid->syncState = TAOS_SYNC_STATE_LEADER;
×
850
    } else {
851
      pVgid->syncState = TAOS_SYNC_STATE_FOLLOWER;
×
852
    }
853

854
    mInfo("db:%s, vgId:%d, vn:%d is alloced, memory:%" PRId64 ", dnode:%d avail:%" PRId64 " used:%" PRId64,
×
855
          pVgroup->dbName, pVgroup->vgId, v, vgMem, pVgid->dnodeId, pDnode->memAvail, pDnode->memUsed);
856
    pDnode->numOfVnodes++;
×
857
  }
858

859
  mndSortVnodeGid(pVgroup);
×
860
  return 0;
×
861
}
862

863
int32_t mndAllocSmaVgroup(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup) {
×
864
  int32_t code = 0;
×
865
  SArray *pArray = mndBuildDnodesArray(pMnode, 0, NULL);
×
866
  if (pArray == NULL) {
×
867
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
868
    if (terrno != 0) code = terrno;
×
869
    TAOS_RETURN(code);
×
870
  }
871

872
  pVgroup->vgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP);
×
873
  pVgroup->isTsma = 1;
×
874
  pVgroup->createdTime = taosGetTimestampMs();
×
875
  pVgroup->updateTime = pVgroup->createdTime;
×
876
  pVgroup->version = 1;
×
877
  memcpy(pVgroup->dbName, pDb->name, TSDB_DB_FNAME_LEN);
×
878
  pVgroup->dbUid = pDb->uid;
×
879
  pVgroup->replica = 1;
×
880

881
  if (mndGetAvailableDnode(pMnode, pDb, pVgroup, pArray) != 0) return -1;
×
882
  taosArrayDestroy(pArray);
×
883

884
  mInfo("db:%s, sma vgId:%d is alloced", pDb->name, pVgroup->vgId);
×
885
  return 0;
×
886
}
887

888
int32_t mndAllocVgroup(SMnode *pMnode, SDbObj *pDb, SVgObj **ppVgroups, SArray *dnodeList) {
×
889
  int32_t code = -1;
×
890
  SArray *pArray = NULL;
×
891
  SVgObj *pVgroups = NULL;
×
892

893
  pVgroups = taosMemoryCalloc(pDb->cfg.numOfVgroups, sizeof(SVgObj));
×
894
  if (pVgroups == NULL) {
×
895
    code = terrno;
×
896
    goto _OVER;
×
897
  }
898

899
  pArray = mndBuildDnodesArray(pMnode, 0, dnodeList);
×
900
  if (pArray == NULL) {
×
901
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
902
    if (terrno != 0) code = terrno;
×
903
    goto _OVER;
×
904
  }
905

906
  mInfo("db:%s, total %d dnodes used to create %d vgroups (%d vnodes)", pDb->name, (int32_t)taosArrayGetSize(pArray),
×
907
        pDb->cfg.numOfVgroups, pDb->cfg.numOfVgroups * pDb->cfg.replications);
908

909
  int32_t  allocedVgroups = 0;
×
910
  int32_t  maxVgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP);
×
911
  uint32_t hashMin = 0;
×
912
  uint32_t hashMax = UINT32_MAX;
×
913
  uint32_t hashInterval = (hashMax - hashMin) / pDb->cfg.numOfVgroups;
×
914

915
  if (maxVgId < 2) maxVgId = 2;
×
916

917
  for (uint32_t v = 0; v < pDb->cfg.numOfVgroups; v++) {
×
918
    SVgObj *pVgroup = &pVgroups[v];
×
919
    pVgroup->vgId = maxVgId++;
×
920
    pVgroup->createdTime = taosGetTimestampMs();
×
921
    pVgroup->updateTime = pVgroups->createdTime;
×
922
    pVgroup->version = 1;
×
923
    pVgroup->hashBegin = hashMin + hashInterval * v;
×
924
    if (v == pDb->cfg.numOfVgroups - 1) {
×
925
      pVgroup->hashEnd = hashMax;
×
926
    } else {
927
      pVgroup->hashEnd = hashMin + hashInterval * (v + 1) - 1;
×
928
    }
929

930
    memcpy(pVgroup->dbName, pDb->name, TSDB_DB_FNAME_LEN);
×
931
    pVgroup->dbUid = pDb->uid;
×
932
    pVgroup->replica = pDb->cfg.replications;
×
933

934
    if ((code = mndGetAvailableDnode(pMnode, pDb, pVgroup, pArray)) != 0) {
×
935
      goto _OVER;
×
936
    }
937

938
    allocedVgroups++;
×
939
  }
940

941
  *ppVgroups = pVgroups;
×
942
  code = 0;
×
943

944
  mInfo("db:%s, total %d vgroups is alloced, replica:%d", pDb->name, pDb->cfg.numOfVgroups, pDb->cfg.replications);
×
945

946
_OVER:
×
947
  if (code != 0) taosMemoryFree(pVgroups);
×
948
  taosArrayDestroy(pArray);
×
949
  TAOS_RETURN(code);
×
950
}
951

952
SEpSet mndGetVgroupEpset(SMnode *pMnode, const SVgObj *pVgroup) {
×
953
  SEpSet epset = {0};
×
954

955
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
×
956
    const SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
×
957
    SDnodeObj       *pDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
×
958
    if (pDnode == NULL) continue;
×
959

960
    if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
×
961
      epset.inUse = epset.numOfEps;
×
962
    }
963

964
    if (addEpIntoEpSet(&epset, pDnode->fqdn, pDnode->port) != 0) {
×
965
      mWarn("vgId:%d, failed to add ep:%s:%d into epset", pVgroup->vgId, pDnode->fqdn, pDnode->port);
×
966
    }
967
    mndReleaseDnode(pMnode, pDnode);
×
968
  }
969
  epsetSort(&epset);
×
970

971
  return epset;
×
972
}
973

974
SEpSet mndGetVgroupEpsetById(SMnode *pMnode, int32_t vgId) {
×
975
  SEpSet epset = {0};
×
976

977
  SVgObj *pVgroup = mndAcquireVgroup(pMnode, vgId);
×
978
  if (!pVgroup) return epset;
×
979

980
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
×
981
    const SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
×
982
    SDnodeObj       *pDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
×
983
    if (pDnode == NULL) continue;
×
984

985
    if (pVgid->syncState == TAOS_SYNC_STATE_LEADER || pVgid->syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
×
986
      epset.inUse = epset.numOfEps;
×
987
    }
988

989
    if (addEpIntoEpSet(&epset, pDnode->fqdn, pDnode->port) != 0) {
×
990
      mWarn("vgId:%d, failed to add ep:%s:%d into epset", pVgroup->vgId, pDnode->fqdn, pDnode->port);
×
991
    }
992
    mndReleaseDnode(pMnode, pDnode);
×
993
  }
994

995
  mndReleaseVgroup(pMnode, pVgroup);
×
996
  return epset;
×
997
}
998

999
static int32_t mndRetrieveVgroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) {
×
1000
  SMnode *pMnode = pReq->info.node;
×
1001
  SSdb   *pSdb = pMnode->pSdb;
×
1002
  int32_t numOfRows = 0;
×
1003
  SVgObj *pVgroup = NULL;
×
1004
  int32_t cols = 0;
×
1005
  int64_t curMs = taosGetTimestampMs();
×
1006
  int32_t code = 0;
×
1007

1008
  SDbObj *pDb = NULL;
×
1009
  if (strlen(pShow->db) > 0) {
×
1010
    pDb = mndAcquireDb(pMnode, pShow->db);
×
1011
    if (pDb == NULL) {
×
1012
      return 0;
×
1013
    }
1014
  }
1015

1016
  while (numOfRows < rows) {
×
1017
    pShow->pIter = sdbFetch(pSdb, SDB_VGROUP, pShow->pIter, (void **)&pVgroup);
×
1018
    if (pShow->pIter == NULL) break;
×
1019

1020
    if (pDb != NULL && pVgroup->dbUid != pDb->uid) {
×
1021
      sdbRelease(pSdb, pVgroup);
×
1022
      continue;
×
1023
    }
1024

1025
    cols = 0;
×
1026
    SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1027
    code = colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->vgId, false);
×
1028
    if (code != 0) {
×
1029
      mError("vgId:%d, failed to set vgId, since %s", pVgroup->vgId, tstrerror(code));
×
1030
      return code;
×
1031
    }
1032

1033
    SName name = {0};
×
1034
    char  db[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
×
1035
    code = tNameFromString(&name, pVgroup->dbName, T_NAME_ACCT | T_NAME_DB);
×
1036
    if (code != 0) {
×
1037
      mError("vgId:%d, failed to set dbName, since %s", pVgroup->vgId, tstrerror(code));
×
1038
      return code;
×
1039
    }
1040
    (void)tNameGetDbName(&name, varDataVal(db));
×
1041
    varDataSetLen(db, strlen(varDataVal(db)));
×
1042

1043
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1044
    code = colDataSetVal(pColInfo, numOfRows, (const char *)db, false);
×
1045
    if (code != 0) {
×
1046
      mError("vgId:%d, failed to set dbName, since %s", pVgroup->vgId, tstrerror(code));
×
1047
      return code;
×
1048
    }
1049

1050
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1051
    code = colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->numOfTables, false);
×
1052
    if (code != 0) {
×
1053
      mError("vgId:%d, failed to set numOfTables, since %s", pVgroup->vgId, tstrerror(code));
×
1054
      return code;
×
1055
    }
1056

1057
    // default 3 replica, add 1 replica if move vnode
1058
    for (int32_t i = 0; i < 4; ++i) {
×
1059
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1060
      if (i < pVgroup->replica) {
×
1061
        int16_t dnodeId = (int16_t)pVgroup->vnodeGid[i].dnodeId;
×
1062
        code = colDataSetVal(pColInfo, numOfRows, (const char *)&dnodeId, false);
×
1063
        if (code != 0) {
×
1064
          mError("vgId:%d, failed to set dnodeId, since %s", pVgroup->vgId, tstrerror(code));
×
1065
          return code;
×
1066
        }
1067

1068
        bool       exist = false;
×
1069
        bool       online = false;
×
1070
        SDnodeObj *pDnode = mndAcquireDnode(pMnode, pVgroup->vnodeGid[i].dnodeId);
×
1071
        if (pDnode != NULL) {
×
1072
          exist = true;
×
1073
          online = mndIsDnodeOnline(pDnode, curMs);
×
1074
          mndReleaseDnode(pMnode, pDnode);
×
1075
        }
1076

1077
        char buf1[20] = {0};
×
1078
        char role[20] = "offline";
×
1079
        if (!exist) {
×
1080
          tstrncpy(role, "dropping", sizeof(role));
×
1081
        } else if (online) {
×
1082
          char *star = "";
×
1083
          if (pVgroup->vnodeGid[i].syncState == TAOS_SYNC_STATE_LEADER ||
×
1084
              pVgroup->vnodeGid[i].syncState == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
×
1085
            if (!pVgroup->vnodeGid[i].syncRestore && !pVgroup->vnodeGid[i].syncCanRead) {
×
1086
              star = "**";
×
1087
            } else if (!pVgroup->vnodeGid[i].syncRestore && pVgroup->vnodeGid[i].syncCanRead) {
×
1088
              star = "*";
×
1089
            } else {
1090
            }
1091
          }
1092
          snprintf(role, sizeof(role), "%s%s", syncStr(pVgroup->vnodeGid[i].syncState), star);
×
1093
          /*
1094
          mInfo("db:%s, learner progress:%d", pDb->name, pVgroup->vnodeGid[i].learnerProgress);
1095

1096
          if (pVgroup->vnodeGid[i].syncState == TAOS_SYNC_STATE_LEARNER) {
1097
            if(pVgroup->vnodeGid[i].learnerProgress < 0){
1098
              snprintf(role, sizeof(role), "%s-",
1099
                syncStr(pVgroup->vnodeGid[i].syncState));
1100

1101
            }
1102
            else if(pVgroup->vnodeGid[i].learnerProgress >= 100){
1103
              snprintf(role, sizeof(role), "%s--",
1104
                syncStr(pVgroup->vnodeGid[i].syncState));
1105
            }
1106
            else{
1107
              snprintf(role, sizeof(role), "%s%d",
1108
                syncStr(pVgroup->vnodeGid[i].syncState), pVgroup->vnodeGid[i].learnerProgress);
1109
            }
1110
          }
1111
          else{
1112
            snprintf(role, sizeof(role), "%s%s", syncStr(pVgroup->vnodeGid[i].syncState), star);
1113
          }
1114
          */
1115
        } else {
1116
        }
1117
        STR_WITH_MAXSIZE_TO_VARSTR(buf1, role, pShow->pMeta->pSchemas[cols].bytes);
×
1118

1119
        pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1120
        code = colDataSetVal(pColInfo, numOfRows, (const char *)buf1, false);
×
1121
        if (code != 0) {
×
1122
          mError("vgId:%d, failed to set role, since %s", pVgroup->vgId, tstrerror(code));
×
1123
          return code;
×
1124
        }
1125
      } else {
1126
        colDataSetNULL(pColInfo, numOfRows);
×
1127
        pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1128
        colDataSetNULL(pColInfo, numOfRows);
×
1129
      }
1130
    }
1131

1132
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1133
    int32_t cacheUsage = (int32_t)pVgroup->cacheUsage;
×
1134
    code = colDataSetVal(pColInfo, numOfRows, (const char *)&cacheUsage, false);
×
1135
    if (code != 0) {
×
1136
      mError("vgId:%d, failed to set cacheUsage, since %s", pVgroup->vgId, tstrerror(code));
×
1137
      return code;
×
1138
    }
1139

1140
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1141
    code = colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->numOfCachedTables, false);
×
1142
    if (code != 0) {
×
1143
      mError("vgId:%d, failed to set numOfCachedTables, since %s", pVgroup->vgId, tstrerror(code));
×
1144
      return code;
×
1145
    }
1146

1147
    pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1148
    code = colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->isTsma, false);
×
1149
    if (code != 0) {
×
1150
      mError("vgId:%d, failed to set isTsma, since %s", pVgroup->vgId, tstrerror(code));
×
1151
      return code;
×
1152
    }
1153
    numOfRows++;
×
1154
    sdbRelease(pSdb, pVgroup);
×
1155
  }
1156

1157
  if (pDb != NULL) {
×
1158
    mndReleaseDb(pMnode, pDb);
×
1159
  }
1160

1161
  pShow->numOfRows += numOfRows;
×
1162
  return numOfRows;
×
1163
}
1164

1165
static void mndCancelGetNextVgroup(SMnode *pMnode, void *pIter) {
×
1166
  SSdb *pSdb = pMnode->pSdb;
×
1167
  sdbCancelFetchByType(pSdb, pIter, SDB_VGROUP);
×
1168
}
×
1169

1170
static bool mndGetVnodesNumFp(SMnode *pMnode, void *pObj, void *p1, void *p2, void *p3) {
×
1171
  SVgObj  *pVgroup = pObj;
×
1172
  int32_t  dnodeId = *(int32_t *)p1;
×
1173
  int32_t *pNumOfVnodes = (int32_t *)p2;
×
1174

1175
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
×
1176
    if (pVgroup->vnodeGid[v].dnodeId == dnodeId) {
×
1177
      (*pNumOfVnodes)++;
×
1178
    }
1179
  }
1180

1181
  return true;
×
1182
}
1183

1184
int32_t mndGetVnodesNum(SMnode *pMnode, int32_t dnodeId) {
×
1185
  int32_t numOfVnodes = 0;
×
1186
  sdbTraverse(pMnode->pSdb, SDB_VGROUP, mndGetVnodesNumFp, &dnodeId, &numOfVnodes, NULL);
×
1187
  return numOfVnodes;
×
1188
}
1189

1190
int64_t mndGetVgroupMemory(SMnode *pMnode, SDbObj *pDbInput, SVgObj *pVgroup) {
×
1191
  SDbObj *pDb = pDbInput;
×
1192
  if (pDbInput == NULL) {
×
1193
    pDb = mndAcquireDb(pMnode, pVgroup->dbName);
×
1194
  }
1195

1196
  int64_t vgroupMemroy = 0;
×
1197
  if (pDb != NULL) {
×
1198
    int64_t buffer = (int64_t)pDb->cfg.buffer * 1024 * 1024;
×
1199
    int64_t cache = (int64_t)pDb->cfg.pages * pDb->cfg.pageSize * 1024;
×
1200
    vgroupMemroy = buffer + cache;
×
1201
    int64_t cacheLast = (int64_t)pDb->cfg.cacheLastSize * 1024 * 1024;
×
1202
    if (pDb->cfg.cacheLast > 0) {
×
1203
      vgroupMemroy += cacheLast;
×
1204
    }
1205
    mDebug("db:%s, vgroup:%d, buffer:%" PRId64 " cache:%" PRId64 " cacheLast:%" PRId64, pDb->name, pVgroup->vgId,
×
1206
           buffer, cache, cacheLast);
1207
  }
1208

1209
  if (pDbInput == NULL) {
×
1210
    mndReleaseDb(pMnode, pDb);
×
1211
  }
1212
  return vgroupMemroy;
×
1213
}
1214

1215
static bool mndGetVnodeMemroyFp(SMnode *pMnode, void *pObj, void *p1, void *p2, void *p3) {
×
1216
  SVgObj  *pVgroup = pObj;
×
1217
  int32_t  dnodeId = *(int32_t *)p1;
×
1218
  int64_t *pVnodeMemory = (int64_t *)p2;
×
1219

1220
  for (int32_t v = 0; v < pVgroup->replica; ++v) {
×
1221
    if (pVgroup->vnodeGid[v].dnodeId == dnodeId) {
×
1222
      *pVnodeMemory += mndGetVgroupMemory(pMnode, NULL, pVgroup);
×
1223
    }
1224
  }
1225

1226
  return true;
×
1227
}
1228

1229
int64_t mndGetVnodesMemory(SMnode *pMnode, int32_t dnodeId) {
×
1230
  int64_t vnodeMemory = 0;
×
1231
  sdbTraverse(pMnode->pSdb, SDB_VGROUP, mndGetVnodeMemroyFp, &dnodeId, &vnodeMemory, NULL);
×
1232
  return vnodeMemory;
×
1233
}
1234

1235
static int32_t mndRetrieveVnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) {
×
1236
  SMnode *pMnode = pReq->info.node;
×
1237
  SSdb   *pSdb = pMnode->pSdb;
×
1238
  int32_t numOfRows = 0;
×
1239
  SVgObj *pVgroup = NULL;
×
1240
  int32_t cols = 0;
×
1241
  int64_t curMs = taosGetTimestampMs();
×
1242
  int32_t code = 0;
×
1243

1244
  while (numOfRows < rows - TSDB_MAX_REPLICA) {
×
1245
    pShow->pIter = sdbFetch(pSdb, SDB_VGROUP, pShow->pIter, (void **)&pVgroup);
×
1246
    if (pShow->pIter == NULL) break;
×
1247

1248
    for (int32_t i = 0; i < pVgroup->replica && numOfRows < rows; ++i) {
×
1249
      SVnodeGid       *pGid = &pVgroup->vnodeGid[i];
×
1250
      SColumnInfoData *pColInfo = NULL;
×
1251
      cols = 0;
×
1252

1253
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1254
      code = colDataSetVal(pColInfo, numOfRows, (const char *)&pGid->dnodeId, false);
×
1255
      if (code != 0) {
×
1256
        mError("vgId:%d, failed to set dnodeId, since %s", pVgroup->vgId, tstrerror(code));
×
1257
        return code;
×
1258
      }
1259
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1260
      code = colDataSetVal(pColInfo, numOfRows, (const char *)&pVgroup->vgId, false);
×
1261
      if (code != 0) {
×
1262
        mError("vgId:%d, failed to set vgId, since %s", pVgroup->vgId, tstrerror(code));
×
1263
        return code;
×
1264
      }
1265

1266
      // db_name
1267
      const char *dbname = mndGetDbStr(pVgroup->dbName);
×
1268
      char        b1[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
×
1269
      if (dbname != NULL) {
×
1270
        STR_WITH_MAXSIZE_TO_VARSTR(b1, dbname, TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE);
×
1271
      } else {
1272
        STR_WITH_MAXSIZE_TO_VARSTR(b1, "NULL", TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE);
×
1273
      }
1274
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1275
      code = colDataSetVal(pColInfo, numOfRows, (const char *)b1, false);
×
1276
      if (code != 0) {
×
1277
        mError("vgId:%d, failed to set dbName, since %s", pVgroup->vgId, tstrerror(code));
×
1278
        return code;
×
1279
      }
1280

1281
      // dnode is online?
1282
      SDnodeObj *pDnode = mndAcquireDnode(pMnode, pGid->dnodeId);
×
1283
      if (pDnode == NULL) {
×
1284
        mError("failed to acquire dnode. dnodeId:%d", pGid->dnodeId);
×
1285
        break;
×
1286
      }
1287
      bool isDnodeOnline = mndIsDnodeOnline(pDnode, curMs);
×
1288

1289
      char       buf[20] = {0};
×
1290
      ESyncState syncState = (isDnodeOnline) ? pGid->syncState : TAOS_SYNC_STATE_OFFLINE;
×
1291
      STR_TO_VARSTR(buf, syncStr(syncState));
×
1292
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1293
      code = colDataSetVal(pColInfo, numOfRows, (const char *)buf, false);
×
1294
      if (code != 0) {
×
1295
        mError("vgId:%d, failed to set syncState, since %s", pVgroup->vgId, tstrerror(code));
×
1296
        return code;
×
1297
      }
1298

1299
      int64_t roleTimeMs = (isDnodeOnline) ? pGid->roleTimeMs : 0;
×
1300
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1301
      code = colDataSetVal(pColInfo, numOfRows, (const char *)&roleTimeMs, false);
×
1302
      if (code != 0) {
×
1303
        mError("vgId:%d, failed to set roleTimeMs, since %s", pVgroup->vgId, tstrerror(code));
×
1304
        return code;
×
1305
      }
1306

1307
      int64_t startTimeMs = (isDnodeOnline) ? pGid->startTimeMs : 0;
×
1308
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1309
      code = colDataSetVal(pColInfo, numOfRows, (const char *)&startTimeMs, false);
×
1310
      if (code != 0) {
×
1311
        mError("vgId:%d, failed to set startTimeMs, since %s", pVgroup->vgId, tstrerror(code));
×
1312
        return code;
×
1313
      }
1314

1315
      pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
×
1316
      code = colDataSetVal(pColInfo, numOfRows, (const char *)&pGid->syncRestore, false);
×
1317
      if (code != 0) {
×
1318
        mError("vgId:%d, failed to set syncRestore, since %s", pVgroup->vgId, tstrerror(code));
×
1319
        return code;
×
1320
      }
1321

1322
      numOfRows++;
×
1323
      sdbRelease(pSdb, pDnode);
×
1324
    }
1325

1326
    sdbRelease(pSdb, pVgroup);
×
1327
  }
1328

1329
  pShow->numOfRows += numOfRows;
×
1330
  return numOfRows;
×
1331
}
1332

1333
static void mndCancelGetNextVnode(SMnode *pMnode, void *pIter) {
×
1334
  SSdb *pSdb = pMnode->pSdb;
×
1335
  sdbCancelFetchByType(pSdb, pIter, SDB_VGROUP);
×
1336
}
×
1337

1338
static int32_t mndAddVnodeToVgroup(SMnode *pMnode, STrans *pTrans, SVgObj *pVgroup, SArray *pArray) {
×
1339
  int32_t code = 0;
×
1340
  taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes);
×
1341
  for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) {
×
1342
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
×
1343
    mInfo("dnode:%d, equivalent vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes);
×
1344
  }
1345

1346
  SVnodeGid *pVgid = &pVgroup->vnodeGid[pVgroup->replica];
×
1347
  for (int32_t d = 0; d < taosArrayGetSize(pArray); ++d) {
×
1348
    SDnodeObj *pDnode = taosArrayGet(pArray, d);
×
1349

1350
    bool used = false;
×
1351
    for (int32_t vn = 0; vn < pVgroup->replica; ++vn) {
×
1352
      if (pDnode->id == pVgroup->vnodeGid[vn].dnodeId) {
×
1353
        used = true;
×
1354
        break;
×
1355
      }
1356
    }
1357
    if (used) continue;
×
1358

1359
    if (pDnode == NULL) {
×
1360
      TAOS_RETURN(TSDB_CODE_MND_NO_ENOUGH_DNODES);
×
1361
    }
1362
    if (pDnode->numOfVnodes >= pDnode->numOfSupportVnodes) {
×
1363
      TAOS_RETURN(TSDB_CODE_MND_NO_ENOUGH_VNODES);
×
1364
    }
1365

1366
    int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
×
1367
    if (pDnode->memAvail - vgMem - pDnode->memUsed <= 0) {
×
1368
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d avail:%" PRId64 " used:%" PRId64,
×
1369
             pVgroup->dbName, pVgroup->vgId, vgMem, pDnode->id, pDnode->memAvail, pDnode->memUsed);
1370
      TAOS_RETURN(TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE);
×
1371
    } else {
1372
      pDnode->memUsed += vgMem;
×
1373
    }
1374

1375
    pVgid->dnodeId = pDnode->id;
×
1376
    pVgid->syncState = TAOS_SYNC_STATE_OFFLINE;
×
1377
    mInfo("db:%s, vgId:%d, vn:%d is added, memory:%" PRId64 ", dnode:%d avail:%" PRId64 " used:%" PRId64,
×
1378
          pVgroup->dbName, pVgroup->vgId, pVgroup->replica, vgMem, pVgid->dnodeId, pDnode->memAvail, pDnode->memUsed);
1379

1380
    pVgroup->replica++;
×
1381
    pDnode->numOfVnodes++;
×
1382

1383
    SSdbRaw *pVgRaw = mndVgroupActionEncode(pVgroup);
×
1384
    if (pVgRaw == NULL) {
×
1385
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1386
      if (terrno != 0) code = terrno;
×
1387
      TAOS_RETURN(code);
×
1388
    }
1389
    if ((code = mndTransAppendRedolog(pTrans, pVgRaw)) != 0) {
×
1390
      sdbFreeRaw(pVgRaw);
×
1391
      TAOS_RETURN(code);
×
1392
    }
1393
    code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
1394
    if (code != 0) {
×
1395
      mError("vgId:%d, failed to set raw status since %s at line:%d", pVgroup->vgId, tstrerror(code), __LINE__);
×
1396
    }
1397
    TAOS_RETURN(code);
×
1398
  }
1399

1400
  code = TSDB_CODE_MND_NO_ENOUGH_DNODES;
×
1401
  mError("db:%s, failed to add vnode to vgId:%d since %s", pVgroup->dbName, pVgroup->vgId, tstrerror(code));
×
1402
  TAOS_RETURN(code);
×
1403
}
1404

1405
static int32_t mndRemoveVnodeFromVgroup(SMnode *pMnode, STrans *pTrans, SVgObj *pVgroup, SArray *pArray,
×
1406
                                        SVnodeGid *pDelVgid) {
1407
  taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes);
×
1408
  for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) {
×
1409
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
×
1410
    mInfo("dnode:%d, equivalent vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes);
×
1411
  }
1412

1413
  int32_t code = -1;
×
1414
  for (int32_t d = taosArrayGetSize(pArray) - 1; d >= 0; --d) {
×
1415
    SDnodeObj *pDnode = taosArrayGet(pArray, d);
×
1416

1417
    for (int32_t vn = 0; vn < pVgroup->replica; ++vn) {
×
1418
      SVnodeGid *pVgid = &pVgroup->vnodeGid[vn];
×
1419
      if (pVgid->dnodeId == pDnode->id) {
×
1420
        int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
×
1421
        pDnode->memUsed -= vgMem;
×
1422
        mInfo("db:%s, vgId:%d, vn:%d is removed, memory:%" PRId64 ", dnode:%d avail:%" PRId64 " used:%" PRId64,
×
1423
              pVgroup->dbName, pVgroup->vgId, vn, vgMem, pVgid->dnodeId, pDnode->memAvail, pDnode->memUsed);
1424
        pDnode->numOfVnodes--;
×
1425
        pVgroup->replica--;
×
1426
        *pDelVgid = *pVgid;
×
1427
        *pVgid = pVgroup->vnodeGid[pVgroup->replica];
×
1428
        memset(&pVgroup->vnodeGid[pVgroup->replica], 0, sizeof(SVnodeGid));
×
1429
        code = 0;
×
1430
        goto _OVER;
×
1431
      }
1432
    }
1433
  }
1434

1435
_OVER:
×
1436
  if (code != 0) {
×
1437
    code = TSDB_CODE_APP_ERROR;
×
1438
    mError("db:%s, failed to remove vnode from vgId:%d since %s", pVgroup->dbName, pVgroup->vgId, tstrerror(code));
×
1439
    TAOS_RETURN(code);
×
1440
  }
1441

1442
  for (int32_t vn = 0; vn < pVgroup->replica; ++vn) {
×
1443
    SVnodeGid *pVgid = &pVgroup->vnodeGid[vn];
×
1444
    mInfo("db:%s, vgId:%d, vn:%d dnode:%d is reserved", pVgroup->dbName, pVgroup->vgId, vn, pVgid->dnodeId);
×
1445
  }
1446

1447
  SSdbRaw *pVgRaw = mndVgroupActionEncode(pVgroup);
×
1448
  if (pVgRaw == NULL) {
×
1449
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1450
    if (terrno != 0) code = terrno;
×
1451
    TAOS_RETURN(code);
×
1452
  }
1453
  if (mndTransAppendRedolog(pTrans, pVgRaw) != 0) {
×
1454
    sdbFreeRaw(pVgRaw);
×
1455
    TAOS_RETURN(code);
×
1456
  }
1457
  code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
1458
  if (code != 0) {
×
1459
    mError("vgId:%d, failed to set raw status since %s at line:%d", pVgroup->vgId, tstrerror(code), __LINE__);
×
1460
  }
1461

1462
  TAOS_RETURN(code);
×
1463
}
1464

1465
static int32_t mndRemoveVnodeFromVgroupWithoutSave(SMnode *pMnode, STrans *pTrans, SVgObj *pVgroup, SArray *pArray,
×
1466
                                                   SVnodeGid *pDelVgid) {
1467
  taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes);
×
1468
  for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) {
×
1469
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
×
1470
    mInfo("dnode:%d, equivalent vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes);
×
1471
  }
1472

1473
  int32_t code = -1;
×
1474
  for (int32_t d = taosArrayGetSize(pArray) - 1; d >= 0; --d) {
×
1475
    SDnodeObj *pDnode = taosArrayGet(pArray, d);
×
1476

1477
    for (int32_t vn = 0; vn < pVgroup->replica; ++vn) {
×
1478
      SVnodeGid *pVgid = &pVgroup->vnodeGid[vn];
×
1479
      if (pVgid->dnodeId == pDnode->id) {
×
1480
        int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
×
1481
        pDnode->memUsed -= vgMem;
×
1482
        mInfo("db:%s, vgId:%d, vn:%d is removed, memory:%" PRId64 ", dnode:%d avail:%" PRId64 " used:%" PRId64,
×
1483
              pVgroup->dbName, pVgroup->vgId, vn, vgMem, pVgid->dnodeId, pDnode->memAvail, pDnode->memUsed);
1484
        pDnode->numOfVnodes--;
×
1485
        pVgroup->replica--;
×
1486
        *pDelVgid = *pVgid;
×
1487
        *pVgid = pVgroup->vnodeGid[pVgroup->replica];
×
1488
        memset(&pVgroup->vnodeGid[pVgroup->replica], 0, sizeof(SVnodeGid));
×
1489
        code = 0;
×
1490
        goto _OVER;
×
1491
      }
1492
    }
1493
  }
1494

1495
_OVER:
×
1496
  if (code != 0) {
×
1497
    code = TSDB_CODE_APP_ERROR;
×
1498
    mError("db:%s, failed to remove vnode from vgId:%d since %s", pVgroup->dbName, pVgroup->vgId, tstrerror(code));
×
1499
    TAOS_RETURN(code);
×
1500
  }
1501

1502
  for (int32_t vn = 0; vn < pVgroup->replica; ++vn) {
×
1503
    SVnodeGid *pVgid = &pVgroup->vnodeGid[vn];
×
1504
    mInfo("db:%s, vgId:%d, vn:%d dnode:%d is reserved", pVgroup->dbName, pVgroup->vgId, vn, pVgid->dnodeId);
×
1505
  }
1506

1507
  TAOS_RETURN(code);
×
1508
}
1509

1510
int32_t mndAddCreateVnodeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SVnodeGid *pVgid) {
×
1511
  int32_t      code = 0;
×
1512
  STransAction action = {0};
×
1513

1514
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
×
1515
  if (pDnode == NULL) return -1;
×
1516
  action.epSet = mndGetDnodeEpset(pDnode);
×
1517
  mndReleaseDnode(pMnode, pDnode);
×
1518

1519
  int32_t contLen = 0;
×
1520
  void   *pReq = mndBuildCreateVnodeReq(pMnode, pDnode, pDb, pVgroup, &contLen);
×
1521
  if (pReq == NULL) return -1;
×
1522

1523
  action.pCont = pReq;
×
1524
  action.contLen = contLen;
×
1525
  action.msgType = TDMT_DND_CREATE_VNODE;
×
1526
  action.acceptableCode = TSDB_CODE_VND_ALREADY_EXIST;
×
1527

1528
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1529
    taosMemoryFree(pReq);
×
1530
    TAOS_RETURN(code);
×
1531
  }
1532

1533
  TAOS_RETURN(code);
×
1534
}
1535

1536
int32_t mndRestoreAddCreateVnodeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
×
1537
                                       SDnodeObj *pDnode) {
1538
  int32_t      code = 0;
×
1539
  STransAction action = {0};
×
1540

1541
  action.epSet = mndGetDnodeEpset(pDnode);
×
1542

1543
  int32_t contLen = 0;
×
1544
  void   *pReq = mndBuildCreateVnodeReq(pMnode, pDnode, pDb, pVgroup, &contLen);
×
1545
  if (pReq == NULL) {
×
1546
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1547
    if (terrno != 0) code = terrno;
×
1548
    TAOS_RETURN(code);
×
1549
  }
1550

1551
  action.pCont = pReq;
×
1552
  action.contLen = contLen;
×
1553
  action.msgType = TDMT_DND_CREATE_VNODE;
×
1554
  action.acceptableCode = TSDB_CODE_VND_ALREADY_EXIST;
×
1555

1556
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1557
    taosMemoryFree(pReq);
×
1558
    TAOS_RETURN(code);
×
1559
  }
1560

1561
  TAOS_RETURN(code);
×
1562
}
1563

1564
int32_t mndAddAlterVnodeConfirmAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup) {
×
1565
  int32_t      code = 0;
×
1566
  STransAction action = {0};
×
1567
  action.epSet = mndGetVgroupEpset(pMnode, pVgroup);
×
1568

1569
  mInfo("vgId:%d, build alter vnode confirm req", pVgroup->vgId);
×
1570
  int32_t   contLen = sizeof(SMsgHead);
×
1571
  SMsgHead *pHead = taosMemoryMalloc(contLen);
×
1572
  if (pHead == NULL) {
×
1573
    TAOS_RETURN(terrno);
×
1574
  }
1575

1576
  pHead->contLen = htonl(contLen);
×
1577
  pHead->vgId = htonl(pVgroup->vgId);
×
1578

1579
  action.pCont = pHead;
×
1580
  action.contLen = contLen;
×
1581
  action.msgType = TDMT_VND_ALTER_CONFIRM;
×
1582
  // incorrect redirect result will cause this erro
1583
  action.retryCode = TSDB_CODE_VND_INVALID_VGROUP_ID;
×
1584

1585
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1586
    taosMemoryFree(pHead);
×
1587
    TAOS_RETURN(code);
×
1588
  }
1589

1590
  TAOS_RETURN(code);
×
1591
}
1592

1593
int32_t mndAddChangeConfigAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pOldVgroup, SVgObj *pNewVgroup,
×
1594
                                 int32_t dnodeId) {
1595
  int32_t      code = 0;
×
1596
  STransAction action = {0};
×
1597
  action.epSet = mndGetVgroupEpset(pMnode, pNewVgroup);
×
1598

1599
  int32_t contLen = 0;
×
1600
  void   *pReq = mndBuildAlterVnodeReplicaReq(pMnode, pDb, pNewVgroup, dnodeId, &contLen);
×
1601
  if (pReq == NULL) {
×
1602
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1603
    if (terrno != 0) code = terrno;
×
1604
    TAOS_RETURN(code);
×
1605
  }
1606

1607
  int32_t totallen = contLen + sizeof(SMsgHead);
×
1608

1609
  SMsgHead *pHead = taosMemoryMalloc(totallen);
×
1610
  if (pHead == NULL) {
×
1611
    taosMemoryFree(pReq);
×
1612
    TAOS_RETURN(terrno);
×
1613
  }
1614

1615
  pHead->contLen = htonl(totallen);
×
1616
  pHead->vgId = htonl(pNewVgroup->vgId);
×
1617

1618
  memcpy((void *)(pHead + 1), pReq, contLen);
×
1619
  taosMemoryFree(pReq);
×
1620

1621
  action.pCont = pHead;
×
1622
  action.contLen = totallen;
×
1623
  action.msgType = TDMT_SYNC_CONFIG_CHANGE;
×
1624

1625
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1626
    taosMemoryFree(pHead);
×
1627
    TAOS_RETURN(code);
×
1628
  }
1629

1630
  TAOS_RETURN(code);
×
1631
}
1632

1633
static int32_t mndAddAlterVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, int32_t srcVgId, SVgObj *pVgroup) {
×
1634
  int32_t      code = 0;
×
1635
  STransAction action = {0};
×
1636
  action.epSet = mndGetVgroupEpset(pMnode, pVgroup);
×
1637

1638
  int32_t contLen = 0;
×
1639
  void   *pReq = mndBuildAlterVnodeHashRangeReq(pMnode, srcVgId, pVgroup, &contLen);
×
1640
  if (pReq == NULL) {
×
1641
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1642
    if (terrno != 0) code = terrno;
×
1643
    TAOS_RETURN(code);
×
1644
  }
1645

1646
  action.pCont = pReq;
×
1647
  action.contLen = contLen;
×
1648
  action.msgType = TDMT_VND_ALTER_HASHRANGE;
×
1649
  action.acceptableCode = TSDB_CODE_VND_ALREADY_EXIST;
×
1650

1651
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1652
    taosMemoryFree(pReq);
×
1653
    TAOS_RETURN(code);
×
1654
  }
1655

1656
  mInfo("trans:%d, add alter vnode hash range action for from vgId:%d to vgId:%d", pTrans->id, srcVgId, pVgroup->vgId);
×
1657
  TAOS_RETURN(code);
×
1658
}
1659

1660
int32_t mndAddAlterVnodeConfigAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup) {
×
1661
  int32_t      code = 0;
×
1662
  STransAction action = {0};
×
1663
  action.epSet = mndGetVgroupEpset(pMnode, pVgroup);
×
1664

1665
  int32_t contLen = 0;
×
1666
  void   *pReq = mndBuildAlterVnodeConfigReq(pMnode, pDb, pVgroup, &contLen);
×
1667
  if (pReq == NULL) {
×
1668
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1669
    if (terrno != 0) code = terrno;
×
1670
    TAOS_RETURN(code);
×
1671
  }
1672

1673
  action.pCont = pReq;
×
1674
  action.contLen = contLen;
×
1675
  action.msgType = TDMT_VND_ALTER_CONFIG;
×
1676

1677
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1678
    taosMemoryFree(pReq);
×
1679
    TAOS_RETURN(code);
×
1680
  }
1681

1682
  TAOS_RETURN(code);
×
1683
}
1684

1685
int32_t mndAddNewVgPrepareAction(SMnode *pMnode, STrans *pTrans, SVgObj *pVg) {
×
1686
  int32_t  code = 0;
×
1687
  SSdbRaw *pRaw = mndVgroupActionEncode(pVg);
×
1688
  if (pRaw == NULL) {
×
1689
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1690
    if (terrno != 0) code = terrno;
×
1691
    goto _err;
×
1692
  }
1693

1694
  TAOS_CHECK_GOTO(mndTransAppendPrepareLog(pTrans, pRaw), NULL, _err);
×
1695
  if (sdbSetRawStatus(pRaw, SDB_STATUS_CREATING) != 0) {
×
1696
    mError("vgId:%d, failed to set raw status at line:%d", pVg->vgId, __LINE__);
×
1697
  }
1698
  if (code != 0) {
×
1699
    mError("vgId:%d, failed to set raw status since %s at line:%d", pVg->vgId, tstrerror(code), __LINE__);
×
1700
    TAOS_RETURN(code);
×
1701
  }
1702
  pRaw = NULL;
×
1703
  TAOS_RETURN(code);
×
1704

1705
_err:
×
1706
  sdbFreeRaw(pRaw);
×
1707
  TAOS_RETURN(code);
×
1708
}
1709

1710
int32_t mndAddAlterVnodeReplicaAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId) {
×
1711
  int32_t    code = 0;
×
1712
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
×
1713
  if (pDnode == NULL) {
×
1714
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1715
    if (terrno != 0) code = terrno;
×
1716
    TAOS_RETURN(code);
×
1717
  }
1718

1719
  STransAction action = {0};
×
1720
  action.epSet = mndGetDnodeEpset(pDnode);
×
1721
  mndReleaseDnode(pMnode, pDnode);
×
1722

1723
  int32_t contLen = 0;
×
1724
  void   *pReq = mndBuildAlterVnodeReplicaReq(pMnode, pDb, pVgroup, dnodeId, &contLen);
×
1725
  if (pReq == NULL) {
×
1726
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1727
    if (terrno != 0) code = terrno;
×
1728
    TAOS_RETURN(code);
×
1729
  }
1730

1731
  action.pCont = pReq;
×
1732
  action.contLen = contLen;
×
1733
  action.msgType = TDMT_VND_ALTER_REPLICA;
×
1734

1735
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1736
    taosMemoryFree(pReq);
×
1737
    TAOS_RETURN(code);
×
1738
  }
1739

1740
  TAOS_RETURN(code);
×
1741
}
1742

1743
int32_t mndAddCheckLearnerCatchupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId) {
×
1744
  int32_t    code = 0;
×
1745
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
×
1746
  if (pDnode == NULL) {
×
1747
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1748
    if (terrno != 0) code = terrno;
×
1749
    TAOS_RETURN(code);
×
1750
  }
1751

1752
  STransAction action = {0};
×
1753
  action.epSet = mndGetDnodeEpset(pDnode);
×
1754
  mndReleaseDnode(pMnode, pDnode);
×
1755

1756
  int32_t contLen = 0;
×
1757
  void   *pReq = mndBuildCheckLearnCatchupReq(pMnode, pDb, pVgroup, dnodeId, &contLen);
×
1758
  if (pReq == NULL) {
×
1759
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1760
    if (terrno != 0) code = terrno;
×
1761
    TAOS_RETURN(code);
×
1762
  }
1763

1764
  action.pCont = pReq;
×
1765
  action.contLen = contLen;
×
1766
  action.msgType = TDMT_DND_CHECK_VNODE_LEARNER_CATCHUP;
×
1767
  action.acceptableCode = TSDB_CODE_VND_ALREADY_IS_VOTER;
×
1768
  action.retryCode = TSDB_CODE_VND_NOT_CATCH_UP;
×
1769

1770
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1771
    taosMemoryFree(pReq);
×
1772
    TAOS_RETURN(code);
×
1773
  }
1774

1775
  TAOS_RETURN(code);
×
1776
}
1777

1778
int32_t mndAddAlterVnodeTypeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId) {
×
1779
  int32_t    code = 0;
×
1780
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
×
1781
  if (pDnode == NULL) {
×
1782
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1783
    if (terrno != 0) code = terrno;
×
1784
    TAOS_RETURN(code);
×
1785
  }
1786

1787
  STransAction action = {0};
×
1788
  action.epSet = mndGetDnodeEpset(pDnode);
×
1789
  mndReleaseDnode(pMnode, pDnode);
×
1790

1791
  int32_t contLen = 0;
×
1792
  void   *pReq = mndBuildAlterVnodeReplicaReq(pMnode, pDb, pVgroup, dnodeId, &contLen);
×
1793
  if (pReq == NULL) {
×
1794
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1795
    if (terrno != 0) code = terrno;
×
1796
    TAOS_RETURN(code);
×
1797
  }
1798

1799
  action.pCont = pReq;
×
1800
  action.contLen = contLen;
×
1801
  action.msgType = TDMT_DND_ALTER_VNODE_TYPE;
×
1802
  action.acceptableCode = TSDB_CODE_VND_ALREADY_IS_VOTER;
×
1803
  action.retryCode = TSDB_CODE_VND_NOT_CATCH_UP;
×
1804

1805
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1806
    taosMemoryFree(pReq);
×
1807
    TAOS_RETURN(code);
×
1808
  }
1809

1810
  TAOS_RETURN(code);
×
1811
}
1812

1813
int32_t mndRestoreAddAlterVnodeTypeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
×
1814
                                          SDnodeObj *pDnode) {
1815
  int32_t      code = 0;
×
1816
  STransAction action = {0};
×
1817
  action.epSet = mndGetDnodeEpset(pDnode);
×
1818

1819
  int32_t contLen = 0;
×
1820
  void   *pReq = mndBuildAlterVnodeReplicaReq(pMnode, pDb, pVgroup, pDnode->id, &contLen);
×
1821
  if (pReq == NULL) {
×
1822
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1823
    if (terrno != 0) code = terrno;
×
1824
    TAOS_RETURN(code);
×
1825
  }
1826

1827
  action.pCont = pReq;
×
1828
  action.contLen = contLen;
×
1829
  action.msgType = TDMT_DND_ALTER_VNODE_TYPE;
×
1830
  action.acceptableCode = TSDB_CODE_VND_ALREADY_IS_VOTER;
×
1831
  action.retryCode = TSDB_CODE_VND_NOT_CATCH_UP;
×
1832

1833
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1834
    taosMemoryFree(pReq);
×
1835
    TAOS_RETURN(code);
×
1836
  }
1837

1838
  TAOS_RETURN(code);
×
1839
}
1840

1841
static int32_t mndAddDisableVnodeWriteAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
×
1842
                                             int32_t dnodeId) {
1843
  int32_t    code = 0;
×
1844
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
×
1845
  if (pDnode == NULL) {
×
1846
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1847
    if (terrno != 0) code = terrno;
×
1848
    TAOS_RETURN(code);
×
1849
  }
1850

1851
  STransAction action = {0};
×
1852
  action.epSet = mndGetDnodeEpset(pDnode);
×
1853
  mndReleaseDnode(pMnode, pDnode);
×
1854

1855
  int32_t contLen = 0;
×
1856
  void   *pReq = mndBuildDisableVnodeWriteReq(pMnode, pDb, pVgroup->vgId, &contLen);
×
1857
  if (pReq == NULL) {
×
1858
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1859
    if (terrno != 0) code = terrno;
×
1860
    TAOS_RETURN(code);
×
1861
  }
1862

1863
  action.pCont = pReq;
×
1864
  action.contLen = contLen;
×
1865
  action.msgType = TDMT_VND_DISABLE_WRITE;
×
1866

1867
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1868
    taosMemoryFree(pReq);
×
1869
    TAOS_RETURN(code);
×
1870
  }
1871

1872
  TAOS_RETURN(code);
×
1873
}
1874

1875
int32_t mndAddDropVnodeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SVnodeGid *pVgid,
×
1876
                              bool isRedo) {
1877
  int32_t      code = 0;
×
1878
  STransAction action = {0};
×
1879

1880
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
×
1881
  if (pDnode == NULL) {
×
1882
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1883
    if (terrno != 0) code = terrno;
×
1884
    TAOS_RETURN(code);
×
1885
  }
1886
  action.epSet = mndGetDnodeEpset(pDnode);
×
1887
  mndReleaseDnode(pMnode, pDnode);
×
1888

1889
  int32_t contLen = 0;
×
1890
  void   *pReq = mndBuildDropVnodeReq(pMnode, pDnode, pDb, pVgroup, &contLen);
×
1891
  if (pReq == NULL) {
×
1892
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1893
    if (terrno != 0) code = terrno;
×
1894
    TAOS_RETURN(code);
×
1895
  }
1896

1897
  action.pCont = pReq;
×
1898
  action.contLen = contLen;
×
1899
  action.msgType = TDMT_DND_DROP_VNODE;
×
1900
  action.acceptableCode = TSDB_CODE_VND_NOT_EXIST;
×
1901

1902
  if (isRedo) {
×
1903
    if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
1904
      taosMemoryFree(pReq);
×
1905
      TAOS_RETURN(code);
×
1906
    }
1907
  } else {
1908
    if ((code = mndTransAppendUndoAction(pTrans, &action)) != 0) {
×
1909
      taosMemoryFree(pReq);
×
1910
      TAOS_RETURN(code);
×
1911
    }
1912
  }
1913

1914
  TAOS_RETURN(code);
×
1915
}
1916

1917
int32_t mndSetMoveVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t vnIndex,
×
1918
                                    SArray *pArray, bool force, bool unsafe) {
1919
  int32_t code = 0;
×
1920
  SVgObj  newVg = {0};
×
1921
  memcpy(&newVg, pVgroup, sizeof(SVgObj));
×
1922

1923
  mInfo("vgId:%d, vgroup info before move, replica:%d", newVg.vgId, newVg.replica);
×
1924
  for (int32_t i = 0; i < newVg.replica; ++i) {
×
1925
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
×
1926
  }
1927

1928
  if (!force) {
×
1929
#if 1
1930
    {
1931
#else
1932
    if (newVg.replica == 1) {
1933
#endif
1934
      mInfo("vgId:%d, will add 1 vnode, replca:%d", pVgroup->vgId, newVg.replica);
×
1935
      TAOS_CHECK_RETURN(mndAddVnodeToVgroup(pMnode, pTrans, &newVg, pArray));
×
1936
      for (int32_t i = 0; i < newVg.replica - 1; ++i) {
×
1937
        TAOS_CHECK_RETURN(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId));
×
1938
      }
1939
      TAOS_CHECK_RETURN(mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg, &newVg.vnodeGid[newVg.replica - 1]));
×
1940
      TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg));
×
1941

1942
      mInfo("vgId:%d, will remove 1 vnode, replca:2", pVgroup->vgId);
×
1943
      newVg.replica--;
×
1944
      SVnodeGid del = newVg.vnodeGid[vnIndex];
×
1945
      newVg.vnodeGid[vnIndex] = newVg.vnodeGid[newVg.replica];
×
1946
      memset(&newVg.vnodeGid[newVg.replica], 0, sizeof(SVnodeGid));
×
1947
      {
1948
        SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
×
1949
        if (pRaw == NULL) {
×
1950
          code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
1951
          if (terrno != 0) code = terrno;
×
1952
          TAOS_RETURN(code);
×
1953
        }
1954
        if ((code = mndTransAppendRedolog(pTrans, pRaw)) != 0) {
×
1955
          sdbFreeRaw(pRaw);
×
1956
          TAOS_RETURN(code);
×
1957
        }
1958
        code = sdbSetRawStatus(pRaw, SDB_STATUS_READY);
×
1959
        if (code != 0) {
×
1960
          mError("vgId:%d, failed to set raw status since %s at line:%d", newVg.vgId, tstrerror(code), __LINE__);
×
1961
          return code;
×
1962
        }
1963
      }
1964

1965
      TAOS_CHECK_RETURN(mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVg, &del, true));
×
1966
      for (int32_t i = 0; i < newVg.replica; ++i) {
×
1967
        TAOS_CHECK_RETURN(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId));
×
1968
      }
1969
      TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg));
×
1970
#if 1
1971
    }
1972
#else
1973
    } else {  // new replica == 3
1974
      mInfo("vgId:%d, will add 1 vnode, replca:3", pVgroup->vgId);
1975
      if (mndAddVnodeToVgroup(pMnode, pTrans, &newVg, pArray) != 0) return -1;
1976
      mInfo("vgId:%d, will remove 1 vnode, replca:4", pVgroup->vgId);
1977
      newVg.replica--;
1978
      SVnodeGid del = newVg.vnodeGid[vnIndex];
1979
      newVg.vnodeGid[vnIndex] = newVg.vnodeGid[newVg.replica];
1980
      memset(&newVg.vnodeGid[newVg.replica], 0, sizeof(SVnodeGid));
1981
      {
1982
        SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
1983
        if (pRaw == NULL) return -1;
1984
        if (mndTransAppendRedolog(pTrans, pRaw) != 0) {
1985
          sdbFreeRaw(pRaw);
1986
          return -1;
1987
        }
1988
      }
1989

1990
      if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVg, &del, true) != 0) return -1;
1991
      for (int32_t i = 0; i < newVg.replica; ++i) {
1992
        if (i == vnIndex) continue;
1993
        if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId) != 0) return -1;
1994
      }
1995
      if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg, &newVg.vnodeGid[vnIndex]) != 0) return -1;
1996
      if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg) != 0) return -1;
1997
    }
1998
#endif
1999
  } else {
2000
    mInfo("vgId:%d, will add 1 vnode and force remove 1 vnode", pVgroup->vgId);
×
2001
    TAOS_CHECK_RETURN(mndAddVnodeToVgroup(pMnode, pTrans, &newVg, pArray));
×
2002
    newVg.replica--;
×
2003
    // SVnodeGid del = newVg.vnodeGid[vnIndex];
2004
    newVg.vnodeGid[vnIndex] = newVg.vnodeGid[newVg.replica];
×
2005
    memset(&newVg.vnodeGid[newVg.replica], 0, sizeof(SVnodeGid));
×
2006
    {
2007
      SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
×
2008
      if (pRaw == NULL) {
×
2009
        code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2010
        if (terrno != 0) code = terrno;
×
2011
        TAOS_RETURN(code);
×
2012
      }
2013
      if ((code = mndTransAppendRedolog(pTrans, pRaw)) != 0) {
×
2014
        sdbFreeRaw(pRaw);
×
2015
        TAOS_RETURN(code);
×
2016
      }
2017
      code = sdbSetRawStatus(pRaw, SDB_STATUS_READY);
×
2018
      if (code != 0) {
×
2019
        mError("vgId:%d, failed to set raw status since %s at line:%d", newVg.vgId, tstrerror(code), __LINE__);
×
2020
        return code;
×
2021
      }
2022
    }
2023

2024
    for (int32_t i = 0; i < newVg.replica; ++i) {
×
2025
      if (i != vnIndex) {
×
2026
        TAOS_CHECK_RETURN(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId));
×
2027
      }
2028
    }
2029
    TAOS_CHECK_RETURN(mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg, &newVg.vnodeGid[vnIndex]));
×
2030
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg));
×
2031

2032
    if (newVg.replica == 1) {
×
2033
      if (force && !unsafe) {
×
2034
        TAOS_RETURN(TSDB_CODE_VND_META_DATA_UNSAFE_DELETE);
×
2035
      }
2036

2037
      SSdb *pSdb = pMnode->pSdb;
×
2038
      void *pIter = NULL;
×
2039

2040
      while (1) {
×
2041
        SStbObj *pStb = NULL;
×
2042
        pIter = sdbFetch(pSdb, SDB_STB, pIter, (void **)&pStb);
×
2043
        if (pIter == NULL) break;
×
2044

2045
        if (strcmp(pStb->db, pDb->name) == 0) {
×
2046
          if ((code = mndSetForceDropCreateStbRedoActions(pMnode, pTrans, &newVg, pStb)) != 0) {
×
2047
            sdbCancelFetch(pSdb, pIter);
×
2048
            sdbRelease(pSdb, pStb);
×
2049
            TAOS_RETURN(code);
×
2050
          }
2051
        }
2052

2053
        sdbRelease(pSdb, pStb);
×
2054
      }
2055

2056
      mInfo("vgId:%d, all data is dropped since replica=1", pVgroup->vgId);
×
2057
    }
2058
  }
2059

2060
  {
2061
    SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
×
2062
    if (pRaw == NULL) {
×
2063
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2064
      if (terrno != 0) code = terrno;
×
2065
      TAOS_RETURN(code);
×
2066
    }
2067
    if ((code = mndTransAppendCommitlog(pTrans, pRaw)) != 0) {
×
2068
      sdbFreeRaw(pRaw);
×
2069
      TAOS_RETURN(code);
×
2070
    }
2071
    code = sdbSetRawStatus(pRaw, SDB_STATUS_READY);
×
2072
    if (code != 0) {
×
2073
      mError("vgId:%d, failed to set raw status since %s at line:%d", newVg.vgId, tstrerror(code), __LINE__);
×
2074
      return code;
×
2075
    }
2076
  }
2077

2078
  mInfo("vgId:%d, vgroup info after move, replica:%d", newVg.vgId, newVg.replica);
×
2079
  for (int32_t i = 0; i < newVg.replica; ++i) {
×
2080
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
×
2081
  }
2082
  TAOS_RETURN(code);
×
2083
}
2084

2085
int32_t mndSetMoveVgroupsInfoToTrans(SMnode *pMnode, STrans *pTrans, int32_t delDnodeId, bool force, bool unsafe) {
×
2086
  int32_t code = 0;
×
2087
  SArray *pArray = mndBuildDnodesArray(pMnode, delDnodeId, NULL);
×
2088
  if (pArray == NULL) {
×
2089
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2090
    if (terrno != 0) code = terrno;
×
2091
    TAOS_RETURN(code);
×
2092
  }
2093

2094
  void *pIter = NULL;
×
2095
  while (1) {
×
2096
    SVgObj *pVgroup = NULL;
×
2097
    pIter = sdbFetch(pMnode->pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
×
2098
    if (pIter == NULL) break;
×
2099

2100
    int32_t vnIndex = -1;
×
2101
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
×
2102
      if (pVgroup->vnodeGid[i].dnodeId == delDnodeId) {
×
2103
        vnIndex = i;
×
2104
        break;
×
2105
      }
2106
    }
2107

2108
    code = 0;
×
2109
    if (vnIndex != -1) {
×
2110
      mInfo("vgId:%d, vnode:%d will be removed from dnode:%d, force:%d", pVgroup->vgId, vnIndex, delDnodeId, force);
×
2111
      SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
×
2112
      code = mndSetMoveVgroupInfoToTrans(pMnode, pTrans, pDb, pVgroup, vnIndex, pArray, force, unsafe);
×
2113
      mndReleaseDb(pMnode, pDb);
×
2114
    }
2115

2116
    sdbRelease(pMnode->pSdb, pVgroup);
×
2117

2118
    if (code != 0) {
×
2119
      sdbCancelFetch(pMnode->pSdb, pIter);
×
2120
      break;
×
2121
    }
2122
  }
2123

2124
  taosArrayDestroy(pArray);
×
2125
  TAOS_RETURN(code);
×
2126
}
2127

2128
static int32_t mndAddIncVgroupReplicaToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
×
2129
                                             int32_t newDnodeId) {
2130
  int32_t code = 0;
×
2131
  mInfo("vgId:%d, will add 1 vnode, replica:%d dnode:%d", pVgroup->vgId, pVgroup->replica, newDnodeId);
×
2132

2133
  // assoc dnode
2134
  SVnodeGid *pGid = &pVgroup->vnodeGid[pVgroup->replica];
×
2135
  pVgroup->replica++;
×
2136
  pGid->dnodeId = newDnodeId;
×
2137
  pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
×
2138
  pGid->nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
2139

2140
  SSdbRaw *pVgRaw = mndVgroupActionEncode(pVgroup);
×
2141
  if (pVgRaw == NULL) {
×
2142
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2143
    if (terrno != 0) code = terrno;
×
2144
    TAOS_RETURN(code);
×
2145
  }
2146
  if ((code = mndTransAppendRedolog(pTrans, pVgRaw)) != 0) {
×
2147
    sdbFreeRaw(pVgRaw);
×
2148
    TAOS_RETURN(code);
×
2149
  }
2150
  code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
2151
  if (code != 0) {
×
2152
    mError("vgId:%d, failed to set raw status since %s at line:%d", pVgroup->vgId, tstrerror(code), __LINE__);
×
2153
    TAOS_RETURN(code);
×
2154
  }
2155

2156
  // learner
2157
  for (int32_t i = 0; i < pVgroup->replica - 1; ++i) {
×
2158
    TAOS_CHECK_RETURN(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, pVgroup, pVgroup->vnodeGid[i].dnodeId));
×
2159
  }
2160
  TAOS_CHECK_RETURN(mndAddCreateVnodeAction(pMnode, pTrans, pDb, pVgroup, pGid));
×
2161

2162
  // voter
2163
  pGid->nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2164
  TAOS_CHECK_RETURN(mndAddAlterVnodeTypeAction(pMnode, pTrans, pDb, pVgroup, pGid->dnodeId));
×
2165
  for (int32_t i = 0; i < pVgroup->replica - 1; ++i) {
×
2166
    TAOS_CHECK_RETURN(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, pVgroup, pVgroup->vnodeGid[i].dnodeId));
×
2167
  }
2168

2169
  // confirm
2170
  TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, pVgroup));
×
2171

2172
  TAOS_RETURN(code);
×
2173
}
2174

2175
static int32_t mndAddDecVgroupReplicaFromTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
×
2176
                                               int32_t delDnodeId) {
2177
  int32_t code = 0;
×
2178
  mInfo("vgId:%d, will remove 1 vnode, replica:%d dnode:%d", pVgroup->vgId, pVgroup->replica, delDnodeId);
×
2179

2180
  SVnodeGid *pGid = NULL;
×
2181
  SVnodeGid  delGid = {0};
×
2182
  for (int32_t i = 0; i < pVgroup->replica; ++i) {
×
2183
    if (pVgroup->vnodeGid[i].dnodeId == delDnodeId) {
×
2184
      pGid = &pVgroup->vnodeGid[i];
×
2185
      break;
×
2186
    }
2187
  }
2188

2189
  if (pGid == NULL) return 0;
×
2190

2191
  pVgroup->replica--;
×
2192
  memcpy(&delGid, pGid, sizeof(SVnodeGid));
×
2193
  memcpy(pGid, &pVgroup->vnodeGid[pVgroup->replica], sizeof(SVnodeGid));
×
2194
  memset(&pVgroup->vnodeGid[pVgroup->replica], 0, sizeof(SVnodeGid));
×
2195

2196
  SSdbRaw *pVgRaw = mndVgroupActionEncode(pVgroup);
×
2197
  if (pVgRaw == NULL) {
×
2198
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2199
    if (terrno != 0) code = terrno;
×
2200
    TAOS_RETURN(code);
×
2201
  }
2202
  if ((code = mndTransAppendRedolog(pTrans, pVgRaw)) != 0) {
×
2203
    sdbFreeRaw(pVgRaw);
×
2204
    TAOS_RETURN(code);
×
2205
  }
2206
  code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
2207
  if (code != 0) {
×
2208
    mError("vgId:%d, failed to set raw status since %s at line:%d", pVgroup->vgId, tstrerror(code), __LINE__);
×
2209
    TAOS_RETURN(code);
×
2210
  }
2211

2212
  TAOS_CHECK_RETURN(mndAddDropVnodeAction(pMnode, pTrans, pDb, pVgroup, &delGid, true));
×
2213
  for (int32_t i = 0; i < pVgroup->replica; ++i) {
×
2214
    TAOS_CHECK_RETURN(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, pVgroup, pVgroup->vnodeGid[i].dnodeId));
×
2215
  }
2216
  TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, pVgroup));
×
2217

2218
  TAOS_RETURN(code);
×
2219
}
2220

2221
static int32_t mndRedistributeVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup, SDnodeObj *pNew1,
×
2222
                                     SDnodeObj *pOld1, SDnodeObj *pNew2, SDnodeObj *pOld2, SDnodeObj *pNew3,
2223
                                     SDnodeObj *pOld3) {
2224
  int32_t code = -1;
×
2225
  STrans *pTrans = NULL;
×
2226

2227
  pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "red-vgroup");
×
2228
  if (pTrans == NULL) {
×
2229
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2230
    if (terrno != 0) code = terrno;
×
2231
    goto _OVER;
×
2232
  }
2233

2234
  mndTransSetDbName(pTrans, pVgroup->dbName, NULL);
×
2235
  TAOS_CHECK_GOTO(mndTransCheckConflictWithCompact(pMnode, pTrans), NULL, _OVER);
×
2236

2237
  mndTransSetSerial(pTrans);
×
2238
  mInfo("trans:%d, used to redistribute vgroup, vgId:%d", pTrans->id, pVgroup->vgId);
×
2239

2240
  SVgObj newVg = {0};
×
2241
  memcpy(&newVg, pVgroup, sizeof(SVgObj));
×
2242
  mInfo("vgId:%d, vgroup info before redistribute, replica:%d", newVg.vgId, newVg.replica);
×
2243
  for (int32_t i = 0; i < newVg.replica; ++i) {
×
2244
    mInfo("vgId:%d, vnode:%d dnode:%d role:%s", newVg.vgId, i, newVg.vnodeGid[i].dnodeId,
×
2245
          syncStr(newVg.vnodeGid[i].syncState));
2246
  }
2247

2248
  if (pNew1 != NULL && pOld1 != NULL) {
×
2249
    int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew1->id);
×
2250
    if (numOfVnodes >= pNew1->numOfSupportVnodes) {
×
2251
      mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew1->id, numOfVnodes,
×
2252
             pNew1->numOfSupportVnodes);
2253
      code = TSDB_CODE_MND_NO_ENOUGH_VNODES;
×
2254
      goto _OVER;
×
2255
    }
2256

2257
    int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
×
2258
    if (pNew1->memAvail - vgMem - pNew1->memUsed <= 0) {
×
2259
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d avail:%" PRId64 " used:%" PRId64,
×
2260
             pVgroup->dbName, pVgroup->vgId, vgMem, pNew1->id, pNew1->memAvail, pNew1->memUsed);
2261
      code = TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE;
×
2262
      goto _OVER;
×
2263
    } else {
2264
      pNew1->memUsed += vgMem;
×
2265
    }
2266

2267
    TAOS_CHECK_GOTO(mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew1->id), NULL, _OVER);
×
2268
    TAOS_CHECK_GOTO(mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld1->id), NULL, _OVER);
×
2269
  }
2270

2271
  if (pNew2 != NULL && pOld2 != NULL) {
×
2272
    int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew2->id);
×
2273
    if (numOfVnodes >= pNew2->numOfSupportVnodes) {
×
2274
      mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew2->id, numOfVnodes,
×
2275
             pNew2->numOfSupportVnodes);
2276
      code = TSDB_CODE_MND_NO_ENOUGH_VNODES;
×
2277
      goto _OVER;
×
2278
    }
2279
    int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
×
2280
    if (pNew2->memAvail - vgMem - pNew2->memUsed <= 0) {
×
2281
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d avail:%" PRId64 " used:%" PRId64,
×
2282
             pVgroup->dbName, pVgroup->vgId, vgMem, pNew2->id, pNew2->memAvail, pNew2->memUsed);
2283
      code = TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE;
×
2284
      goto _OVER;
×
2285
    } else {
2286
      pNew2->memUsed += vgMem;
×
2287
    }
2288
    TAOS_CHECK_GOTO(mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew2->id), NULL, _OVER);
×
2289
    TAOS_CHECK_GOTO(mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld2->id), NULL, _OVER);
×
2290
  }
2291

2292
  if (pNew3 != NULL && pOld3 != NULL) {
×
2293
    int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew3->id);
×
2294
    if (numOfVnodes >= pNew3->numOfSupportVnodes) {
×
2295
      mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew3->id, numOfVnodes,
×
2296
             pNew3->numOfSupportVnodes);
2297
      code = TSDB_CODE_MND_NO_ENOUGH_VNODES;
×
2298
      goto _OVER;
×
2299
    }
2300
    int64_t vgMem = mndGetVgroupMemory(pMnode, NULL, pVgroup);
×
2301
    if (pNew3->memAvail - vgMem - pNew3->memUsed <= 0) {
×
2302
      mError("db:%s, vgId:%d, no enough memory:%" PRId64 " in dnode:%d avail:%" PRId64 " used:%" PRId64,
×
2303
             pVgroup->dbName, pVgroup->vgId, vgMem, pNew3->id, pNew3->memAvail, pNew3->memUsed);
2304
      code = TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE;
×
2305
      goto _OVER;
×
2306
    } else {
2307
      pNew3->memUsed += vgMem;
×
2308
    }
2309
    TAOS_CHECK_GOTO(mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew3->id), NULL, _OVER);
×
2310
    TAOS_CHECK_GOTO(mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld3->id), NULL, _OVER);
×
2311
  }
2312

2313
  {
2314
    SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
×
2315
    if (pRaw == NULL) {
×
2316
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2317
      if (terrno != 0) code = terrno;
×
2318
      goto _OVER;
×
2319
    }
2320
    if ((code = mndTransAppendCommitlog(pTrans, pRaw)) != 0) {
×
2321
      sdbFreeRaw(pRaw);
×
2322
      goto _OVER;
×
2323
    }
2324
    code = sdbSetRawStatus(pRaw, SDB_STATUS_READY);
×
2325
    if (code != 0) {
×
2326
      mError("vgId:%d, failed to set raw status since %s at line:%d", newVg.vgId, tstrerror(code), __LINE__);
×
2327
      goto _OVER;
×
2328
    }
2329
  }
2330

2331
  mInfo("vgId:%d, vgroup info after redistribute, replica:%d", newVg.vgId, newVg.replica);
×
2332
  for (int32_t i = 0; i < newVg.replica; ++i) {
×
2333
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
×
2334
  }
2335

2336
  TAOS_CHECK_GOTO(mndTransPrepare(pMnode, pTrans), NULL, _OVER);
×
2337
  code = 0;
×
2338

2339
_OVER:
×
2340
  mndTransDrop(pTrans);
×
2341
  mndReleaseDb(pMnode, pDb);
×
2342
  TAOS_RETURN(code);
×
2343
}
2344

2345
static int32_t mndProcessRedistributeVgroupMsg(SRpcMsg *pReq) {
×
2346
  SMnode    *pMnode = pReq->info.node;
×
2347
  SDnodeObj *pNew1 = NULL;
×
2348
  SDnodeObj *pNew2 = NULL;
×
2349
  SDnodeObj *pNew3 = NULL;
×
2350
  SDnodeObj *pOld1 = NULL;
×
2351
  SDnodeObj *pOld2 = NULL;
×
2352
  SDnodeObj *pOld3 = NULL;
×
2353
  SVgObj    *pVgroup = NULL;
×
2354
  SDbObj    *pDb = NULL;
×
2355
  int32_t    code = -1;
×
2356
  int64_t    curMs = taosGetTimestampMs();
×
2357
  int32_t    newDnodeId[3] = {0};
×
2358
  int32_t    oldDnodeId[3] = {0};
×
2359
  int32_t    newIndex = -1;
×
2360
  int32_t    oldIndex = -1;
×
2361

2362
  SRedistributeVgroupReq req = {0};
×
2363
  if (tDeserializeSRedistributeVgroupReq(pReq->pCont, pReq->contLen, &req) != 0) {
×
2364
    code = TSDB_CODE_INVALID_MSG;
×
2365
    goto _OVER;
×
2366
  }
2367

2368
  mInfo("vgId:%d, start to redistribute vgroup to dnode %d:%d:%d", req.vgId, req.dnodeId1, req.dnodeId2, req.dnodeId3);
×
2369
  if ((code = mndCheckOperPrivilege(pMnode, pReq->info.conn.user, MND_OPER_REDISTRIBUTE_VGROUP)) != 0) {
×
2370
    goto _OVER;
×
2371
  }
2372

2373
  pVgroup = mndAcquireVgroup(pMnode, req.vgId);
×
2374
  if (pVgroup == NULL) {
×
2375
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2376
    if (terrno != 0) code = terrno;
×
2377
    goto _OVER;
×
2378
  }
2379

2380
  pDb = mndAcquireDb(pMnode, pVgroup->dbName);
×
2381
  if (pDb == NULL) {
×
2382
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2383
    if (terrno != 0) code = terrno;
×
2384
    goto _OVER;
×
2385
  }
2386

2387
  if (pVgroup->replica == 1) {
×
2388
    if (req.dnodeId1 <= 0 || req.dnodeId2 > 0 || req.dnodeId3 > 0) {
×
2389
      code = TSDB_CODE_MND_INVALID_REPLICA;
×
2390
      goto _OVER;
×
2391
    }
2392

2393
    if (req.dnodeId1 == pVgroup->vnodeGid[0].dnodeId) {
×
2394
      // terrno = TSDB_CODE_MND_VGROUP_UN_CHANGED;
2395
      code = 0;
×
2396
      goto _OVER;
×
2397
    }
2398

2399
    pNew1 = mndAcquireDnode(pMnode, req.dnodeId1);
×
2400
    if (pNew1 == NULL) {
×
2401
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2402
      if (terrno != 0) code = terrno;
×
2403
      goto _OVER;
×
2404
    }
2405
    if (!mndIsDnodeOnline(pNew1, curMs)) {
×
2406
      code = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
×
2407
      goto _OVER;
×
2408
    }
2409

2410
    pOld1 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[0].dnodeId);
×
2411
    if (pOld1 == NULL) {
×
2412
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2413
      if (terrno != 0) code = terrno;
×
2414
      goto _OVER;
×
2415
    }
2416
    if (!mndIsDnodeOnline(pOld1, curMs)) {
×
2417
      code = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
×
2418
      goto _OVER;
×
2419
    }
2420

2421
    code = mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, NULL, NULL, NULL, NULL);
×
2422

2423
  } else if (pVgroup->replica == 3) {
×
2424
    if (req.dnodeId1 <= 0 || req.dnodeId2 <= 0 || req.dnodeId3 <= 0) {
×
2425
      code = TSDB_CODE_MND_INVALID_REPLICA;
×
2426
      goto _OVER;
×
2427
    }
2428

2429
    if (req.dnodeId1 == req.dnodeId2 || req.dnodeId1 == req.dnodeId3 || req.dnodeId2 == req.dnodeId3) {
×
2430
      code = TSDB_CODE_MND_INVALID_REPLICA;
×
2431
      goto _OVER;
×
2432
    }
2433

2434
    if (req.dnodeId1 != pVgroup->vnodeGid[0].dnodeId && req.dnodeId1 != pVgroup->vnodeGid[1].dnodeId &&
×
2435
        req.dnodeId1 != pVgroup->vnodeGid[2].dnodeId) {
×
2436
      newDnodeId[++newIndex] = req.dnodeId1;
×
2437
      mInfo("vgId:%d, dnode:%d will be added, index:%d", pVgroup->vgId, newDnodeId[newIndex], newIndex);
×
2438
    }
2439

2440
    if (req.dnodeId2 != pVgroup->vnodeGid[0].dnodeId && req.dnodeId2 != pVgroup->vnodeGid[1].dnodeId &&
×
2441
        req.dnodeId2 != pVgroup->vnodeGid[2].dnodeId) {
×
2442
      newDnodeId[++newIndex] = req.dnodeId2;
×
2443
      mInfo("vgId:%d, dnode:%d will be added, index:%d", pVgroup->vgId, newDnodeId[newIndex], newIndex);
×
2444
    }
2445

2446
    if (req.dnodeId3 != pVgroup->vnodeGid[0].dnodeId && req.dnodeId3 != pVgroup->vnodeGid[1].dnodeId &&
×
2447
        req.dnodeId3 != pVgroup->vnodeGid[2].dnodeId) {
×
2448
      newDnodeId[++newIndex] = req.dnodeId3;
×
2449
      mInfo("vgId:%d, dnode:%d will be added, index:%d", pVgroup->vgId, newDnodeId[newIndex], newIndex);
×
2450
    }
2451

2452
    if (req.dnodeId1 != pVgroup->vnodeGid[0].dnodeId && req.dnodeId2 != pVgroup->vnodeGid[0].dnodeId &&
×
2453
        req.dnodeId3 != pVgroup->vnodeGid[0].dnodeId) {
×
2454
      oldDnodeId[++oldIndex] = pVgroup->vnodeGid[0].dnodeId;
×
2455
      mInfo("vgId:%d, dnode:%d will be removed, index:%d", pVgroup->vgId, oldDnodeId[oldIndex], oldIndex);
×
2456
    }
2457

2458
    if (req.dnodeId1 != pVgroup->vnodeGid[1].dnodeId && req.dnodeId2 != pVgroup->vnodeGid[1].dnodeId &&
×
2459
        req.dnodeId3 != pVgroup->vnodeGid[1].dnodeId) {
×
2460
      oldDnodeId[++oldIndex] = pVgroup->vnodeGid[1].dnodeId;
×
2461
      mInfo("vgId:%d, dnode:%d will be removed, index:%d", pVgroup->vgId, oldDnodeId[oldIndex], oldIndex);
×
2462
    }
2463

2464
    if (req.dnodeId1 != pVgroup->vnodeGid[2].dnodeId && req.dnodeId2 != pVgroup->vnodeGid[2].dnodeId &&
×
2465
        req.dnodeId3 != pVgroup->vnodeGid[2].dnodeId) {
×
2466
      oldDnodeId[++oldIndex] = pVgroup->vnodeGid[2].dnodeId;
×
2467
      mInfo("vgId:%d, dnode:%d will be removed, index:%d", pVgroup->vgId, oldDnodeId[oldIndex], oldIndex);
×
2468
    }
2469

2470
    if (newDnodeId[0] != 0) {
×
2471
      pNew1 = mndAcquireDnode(pMnode, newDnodeId[0]);
×
2472
      if (pNew1 == NULL) {
×
2473
        code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2474
        if (terrno != 0) code = terrno;
×
2475
        goto _OVER;
×
2476
      }
2477
      if (!mndIsDnodeOnline(pNew1, curMs)) {
×
2478
        code = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
×
2479
        goto _OVER;
×
2480
      }
2481
    }
2482

2483
    if (newDnodeId[1] != 0) {
×
2484
      pNew2 = mndAcquireDnode(pMnode, newDnodeId[1]);
×
2485
      if (pNew2 == NULL) {
×
2486
        code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2487
        if (terrno != 0) code = terrno;
×
2488
        goto _OVER;
×
2489
      }
2490
      if (!mndIsDnodeOnline(pNew2, curMs)) {
×
2491
        code = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
×
2492
        goto _OVER;
×
2493
      }
2494
    }
2495

2496
    if (newDnodeId[2] != 0) {
×
2497
      pNew3 = mndAcquireDnode(pMnode, newDnodeId[2]);
×
2498
      if (pNew3 == NULL) {
×
2499
        code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2500
        if (terrno != 0) code = terrno;
×
2501
        goto _OVER;
×
2502
      }
2503
      if (!mndIsDnodeOnline(pNew3, curMs)) {
×
2504
        code = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
×
2505
        goto _OVER;
×
2506
      }
2507
    }
2508

2509
    if (oldDnodeId[0] != 0) {
×
2510
      pOld1 = mndAcquireDnode(pMnode, oldDnodeId[0]);
×
2511
      if (pOld1 == NULL) {
×
2512
        code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2513
        if (terrno != 0) code = terrno;
×
2514
        goto _OVER;
×
2515
      }
2516
      if (!mndIsDnodeOnline(pOld1, curMs)) {
×
2517
        code = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
×
2518
        goto _OVER;
×
2519
      }
2520
    }
2521

2522
    if (oldDnodeId[1] != 0) {
×
2523
      pOld2 = mndAcquireDnode(pMnode, oldDnodeId[1]);
×
2524
      if (pOld2 == NULL) {
×
2525
        code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2526
        if (terrno != 0) code = terrno;
×
2527
        goto _OVER;
×
2528
      }
2529
      if (!mndIsDnodeOnline(pOld2, curMs)) {
×
2530
        code = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
×
2531
        goto _OVER;
×
2532
      }
2533
    }
2534

2535
    if (oldDnodeId[2] != 0) {
×
2536
      pOld3 = mndAcquireDnode(pMnode, oldDnodeId[2]);
×
2537
      if (pOld3 == NULL) {
×
2538
        code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2539
        if (terrno != 0) code = terrno;
×
2540
        goto _OVER;
×
2541
      }
2542
      if (!mndIsDnodeOnline(pOld3, curMs)) {
×
2543
        code = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
×
2544
        goto _OVER;
×
2545
      }
2546
    }
2547

2548
    if (pNew1 == NULL && pOld1 == NULL && pNew2 == NULL && pOld2 == NULL && pNew3 == NULL && pOld3 == NULL) {
×
2549
      // terrno = TSDB_CODE_MND_VGROUP_UN_CHANGED;
2550
      code = 0;
×
2551
      goto _OVER;
×
2552
    }
2553

2554
    code = mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, pNew2, pOld2, pNew3, pOld3);
×
2555

2556
  } else {
2557
    code = TSDB_CODE_MND_REQ_REJECTED;
×
2558
    goto _OVER;
×
2559
  }
2560

2561
  if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS;
×
2562

2563
  char obj[33] = {0};
×
2564
  (void)tsnprintf(obj, sizeof(obj), "%d", req.vgId);
×
2565

2566
  auditRecord(pReq, pMnode->clusterId, "RedistributeVgroup", "", obj, req.sql, req.sqlLen);
×
2567

2568
_OVER:
×
2569
  if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {
×
2570
    mError("vgId:%d, failed to redistribute to dnode %d:%d:%d since %s", req.vgId, req.dnodeId1, req.dnodeId2,
×
2571
           req.dnodeId3, tstrerror(code));
2572
  }
2573

2574
  mndReleaseDnode(pMnode, pNew1);
×
2575
  mndReleaseDnode(pMnode, pNew2);
×
2576
  mndReleaseDnode(pMnode, pNew3);
×
2577
  mndReleaseDnode(pMnode, pOld1);
×
2578
  mndReleaseDnode(pMnode, pOld2);
×
2579
  mndReleaseDnode(pMnode, pOld3);
×
2580
  mndReleaseVgroup(pMnode, pVgroup);
×
2581
  mndReleaseDb(pMnode, pDb);
×
2582
  tFreeSRedistributeVgroupReq(&req);
×
2583

2584
  TAOS_RETURN(code);
×
2585
}
2586

2587
static void *mndBuildSForceBecomeFollowerReq(SMnode *pMnode, SVgObj *pVgroup, int32_t dnodeId, int32_t *pContLen) {
×
2588
  SForceBecomeFollowerReq balanceReq = {
×
2589
      .vgId = pVgroup->vgId,
×
2590
  };
2591

2592
  int32_t contLen = tSerializeSForceBecomeFollowerReq(NULL, 0, &balanceReq);
×
2593
  if (contLen < 0) {
×
2594
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
2595
    return NULL;
×
2596
  }
2597
  contLen += sizeof(SMsgHead);
×
2598

2599
  void *pReq = taosMemoryMalloc(contLen);
×
2600
  if (pReq == NULL) {
×
2601
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
2602
    return NULL;
×
2603
  }
2604

2605
  SMsgHead *pHead = pReq;
×
2606
  pHead->contLen = htonl(contLen);
×
2607
  pHead->vgId = htonl(pVgroup->vgId);
×
2608

2609
  if (tSerializeSForceBecomeFollowerReq((char *)pReq + sizeof(SMsgHead), contLen, &balanceReq) < 0) {
×
2610
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
2611
    taosMemoryFree(pReq);
×
2612
    return NULL;
×
2613
  }
2614
  *pContLen = contLen;
×
2615
  return pReq;
×
2616
}
2617

2618
int32_t mndAddBalanceVgroupLeaderAction(SMnode *pMnode, STrans *pTrans, SVgObj *pVgroup, int32_t dnodeId) {
×
2619
  int32_t    code = 0;
×
2620
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
×
2621
  if (pDnode == NULL) {
×
2622
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2623
    if (terrno != 0) code = terrno;
×
2624
    TAOS_RETURN(code);
×
2625
  }
2626

2627
  STransAction action = {0};
×
2628
  action.epSet = mndGetDnodeEpset(pDnode);
×
2629
  mndReleaseDnode(pMnode, pDnode);
×
2630

2631
  int32_t contLen = 0;
×
2632
  void   *pReq = mndBuildSForceBecomeFollowerReq(pMnode, pVgroup, dnodeId, &contLen);
×
2633
  if (pReq == NULL) {
×
2634
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2635
    if (terrno != 0) code = terrno;
×
2636
    TAOS_RETURN(code);
×
2637
  }
2638

2639
  action.pCont = pReq;
×
2640
  action.contLen = contLen;
×
2641
  action.msgType = TDMT_SYNC_FORCE_FOLLOWER;
×
2642

2643
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
2644
    taosMemoryFree(pReq);
×
2645
    TAOS_RETURN(code);
×
2646
  }
2647

2648
  TAOS_RETURN(code);
×
2649
}
2650

2651
int32_t mndAddVgroupBalanceToTrans(SMnode *pMnode, SVgObj *pVgroup, STrans *pTrans) {
×
2652
  int32_t code = 0;
×
2653
  SSdb   *pSdb = pMnode->pSdb;
×
2654

2655
  int32_t vgid = pVgroup->vgId;
×
2656
  int8_t  replica = pVgroup->replica;
×
2657

2658
  if (pVgroup->replica <= 1) {
×
2659
    mInfo("trans:%d, vgid:%d no need to balance, replica:%d", pTrans->id, vgid, replica);
×
2660
    return -1;
×
2661
  }
2662

2663
  int32_t dnodeId = 0;
×
2664

2665
  for (int i = 0; i < replica; i++) {
×
2666
    if (pVgroup->vnodeGid[i].syncState == TAOS_SYNC_STATE_LEADER) {
×
2667
      dnodeId = pVgroup->vnodeGid[i].dnodeId;
×
2668
      break;
×
2669
    }
2670
  }
2671

2672
  bool       exist = false;
×
2673
  bool       online = false;
×
2674
  int64_t    curMs = taosGetTimestampMs();
×
2675
  SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
×
2676
  if (pDnode != NULL) {
×
2677
    exist = true;
×
2678
    online = mndIsDnodeOnline(pDnode, curMs);
×
2679
    mndReleaseDnode(pMnode, pDnode);
×
2680
  }
2681

2682
  if (exist && online) {
×
2683
    mInfo("trans:%d, vgid:%d leader to dnode:%d", pTrans->id, vgid, dnodeId);
×
2684

2685
    if ((code = mndAddBalanceVgroupLeaderAction(pMnode, pTrans, pVgroup, dnodeId)) != 0) {
×
2686
      mError("trans:%d, vgid:%d failed to be balanced to dnode:%d", pTrans->id, vgid, dnodeId);
×
2687
      TAOS_RETURN(code);
×
2688
    }
2689

2690
    SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
×
2691
    if (pDb == NULL) {
×
2692
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2693
      if (terrno != 0) code = terrno;
×
2694
      mError("trans:%d, vgid:%d failed to be balanced to dnode:%d, because db not exist", pTrans->id, vgid, dnodeId);
×
2695
      TAOS_RETURN(code);
×
2696
    }
2697

2698
    mndReleaseDb(pMnode, pDb);
×
2699
  } else {
2700
    mInfo("trans:%d, vgid:%d cant be balanced to dnode:%d, exist:%d, online:%d", pTrans->id, vgid, dnodeId, exist,
×
2701
          online);
2702
  }
2703

2704
  TAOS_RETURN(code);
×
2705
}
2706

2707
extern int32_t mndProcessVgroupBalanceLeaderMsgImp(SRpcMsg *pReq);
2708

2709
int32_t mndProcessVgroupBalanceLeaderMsg(SRpcMsg *pReq) { return mndProcessVgroupBalanceLeaderMsgImp(pReq); }
×
2710

2711
#ifndef TD_ENTERPRISE
2712
int32_t mndProcessVgroupBalanceLeaderMsgImp(SRpcMsg *pReq) { return 0; }
2713
#endif
2714

2715
static int32_t mndCheckDnodeMemory(SMnode *pMnode, SDbObj *pOldDb, SDbObj *pNewDb, SVgObj *pOldVgroup,
×
2716
                                   SVgObj *pNewVgroup, SArray *pArray) {
2717
  for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pArray); ++i) {
×
2718
    SDnodeObj *pDnode = taosArrayGet(pArray, i);
×
2719
    bool       inVgroup = false;
×
2720
    int64_t    oldMemUsed = 0;
×
2721
    int64_t    newMemUsed = 0;
×
2722
    mDebug("db:%s, vgId:%d, check dnode:%d, avail:%" PRId64 " used:%" PRId64, pNewVgroup->dbName, pNewVgroup->vgId,
×
2723
           pDnode->id, pDnode->memAvail, pDnode->memUsed);
2724
    for (int32_t j = 0; j < pOldVgroup->replica; ++j) {
×
2725
      SVnodeGid *pVgId = &pOldVgroup->vnodeGid[j];
×
2726
      if (pDnode->id == pVgId->dnodeId) {
×
2727
        oldMemUsed = mndGetVgroupMemory(pMnode, pOldDb, pOldVgroup);
×
2728
        inVgroup = true;
×
2729
      }
2730
    }
2731
    for (int32_t j = 0; j < pNewVgroup->replica; ++j) {
×
2732
      SVnodeGid *pVgId = &pNewVgroup->vnodeGid[j];
×
2733
      if (pDnode->id == pVgId->dnodeId) {
×
2734
        newMemUsed = mndGetVgroupMemory(pMnode, pNewDb, pNewVgroup);
×
2735
        inVgroup = true;
×
2736
      }
2737
    }
2738

2739
    mDebug("db:%s, vgId:%d, memory in dnode:%d, oldUsed:%" PRId64 ", newUsed:%" PRId64, pNewVgroup->dbName,
×
2740
           pNewVgroup->vgId, pDnode->id, oldMemUsed, newMemUsed);
2741

2742
    pDnode->memUsed = pDnode->memUsed - oldMemUsed + newMemUsed;
×
2743
    if (pDnode->memAvail - pDnode->memUsed <= 0) {
×
2744
      mError("db:%s, vgId:%d, no enough memory in dnode:%d, avail:%" PRId64 " used:%" PRId64, pNewVgroup->dbName,
×
2745
             pNewVgroup->vgId, pDnode->id, pDnode->memAvail, pDnode->memUsed);
2746
      TAOS_RETURN(TSDB_CODE_MND_NO_ENOUGH_MEM_IN_DNODE);
×
2747
    } else if (inVgroup) {
×
2748
      mInfo("db:%s, vgId:%d, memory in dnode:%d, avail:%" PRId64 " used:%" PRId64, pNewVgroup->dbName, pNewVgroup->vgId,
×
2749
            pDnode->id, pDnode->memAvail, pDnode->memUsed);
2750
    } else {
2751
    }
2752
  }
2753
  return 0;
×
2754
}
2755

2756
int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pOldDb, SDbObj *pNewDb, SVgObj *pVgroup,
×
2757
                                  SArray *pArray, SVgObj *pNewVgroup) {
2758
  int32_t code = 0;
×
2759
  memcpy(pNewVgroup, pVgroup, sizeof(SVgObj));
×
2760

2761
  if (pVgroup->replica <= 0 || pVgroup->replica == pNewDb->cfg.replications) {
×
2762
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfigAction(pMnode, pTrans, pNewDb, pVgroup));
×
2763
    TAOS_CHECK_RETURN(mndCheckDnodeMemory(pMnode, pOldDb, pNewDb, pNewVgroup, pVgroup, pArray));
×
2764
    return 0;
×
2765
  }
2766

2767
  mndTransSetSerial(pTrans);
×
2768

2769
  if (pNewDb->cfg.replications == 3) {
×
2770
    mInfo("db:%s, vgId:%d, will add 2 vnodes, vn:0 dnode:%d", pVgroup->dbName, pVgroup->vgId,
×
2771
          pVgroup->vnodeGid[0].dnodeId);
2772

2773
    // add second
2774
    if (pNewVgroup->replica == 1){
×
2775
      TAOS_CHECK_RETURN(mndAddVnodeToVgroup(pMnode, pTrans, pNewVgroup, pArray));
×
2776
    }
2777

2778
    // learner stage
2779
    pNewVgroup->vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2780
    pNewVgroup->vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
2781
    TAOS_CHECK_RETURN(
×
2782
        mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[0].dnodeId));
2783

2784
    TAOS_CHECK_RETURN(mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, pNewVgroup, &pNewVgroup->vnodeGid[1]));
×
2785

2786
    // follower stage
2787
    pNewVgroup->vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2788
    TAOS_CHECK_RETURN(mndAddAlterVnodeTypeAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[1].dnodeId));
×
2789
    TAOS_CHECK_RETURN(
×
2790
        mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[0].dnodeId));
2791

2792
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, pNewVgroup));
×
2793

2794
    // add third
2795
    if (pNewVgroup->replica == 2){
×
2796
      TAOS_CHECK_RETURN (mndAddVnodeToVgroup(pMnode, pTrans, pNewVgroup, pArray));
×
2797
    }
2798

2799
    pNewVgroup->vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2800
    pNewVgroup->vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2801
    pNewVgroup->vnodeGid[2].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2802
    TAOS_CHECK_RETURN(
×
2803
        mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[0].dnodeId));
2804
    TAOS_CHECK_RETURN(
×
2805
        mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[1].dnodeId));
2806
    TAOS_CHECK_RETURN(mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, pNewVgroup, &pNewVgroup->vnodeGid[2]));
×
2807

2808
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, pNewVgroup));
×
2809
  } else if (pNewDb->cfg.replications == 1) {
×
2810
    mInfo("db:%s, vgId:%d, will remove 2 vnodes, vn:0 dnode:%d vn:1 dnode:%d vn:2 dnode:%d", pVgroup->dbName,
×
2811
          pVgroup->vgId, pVgroup->vnodeGid[0].dnodeId, pVgroup->vnodeGid[1].dnodeId, pVgroup->vnodeGid[2].dnodeId);
2812

2813
    SVnodeGid del1 = {0};
×
2814
    SVnodeGid del2 = {0};
×
2815
    TAOS_CHECK_RETURN(mndRemoveVnodeFromVgroup(pMnode, pTrans, pNewVgroup, pArray, &del1));
×
2816
    TAOS_CHECK_RETURN(mndAddDropVnodeAction(pMnode, pTrans, pNewDb, pNewVgroup, &del1, true));
×
2817
    TAOS_CHECK_RETURN(
×
2818
        mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[0].dnodeId));
2819
    TAOS_CHECK_RETURN(
×
2820
        mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[1].dnodeId));
2821
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, pNewVgroup));
×
2822

2823
    TAOS_CHECK_RETURN(mndRemoveVnodeFromVgroup(pMnode, pTrans, pNewVgroup, pArray, &del2));
×
2824
    TAOS_CHECK_RETURN(mndAddDropVnodeAction(pMnode, pTrans, pNewDb, pNewVgroup, &del2, true));
×
2825
    TAOS_CHECK_RETURN(
×
2826
      mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[0].dnodeId));
2827
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, pNewVgroup));
×
2828
  } else if (pNewDb->cfg.replications == 2) {
×
2829
    mInfo("db:%s, vgId:%d, will add 1 vnode, vn:0 dnode:%d", pVgroup->dbName, pVgroup->vgId,
×
2830
          pVgroup->vnodeGid[0].dnodeId);
2831

2832
    // add second
2833
    TAOS_CHECK_RETURN(mndAddVnodeToVgroup(pMnode, pTrans, pNewVgroup, pArray));
×
2834

2835
    // learner stage
2836
    pNewVgroup->vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2837
    pNewVgroup->vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
2838
    TAOS_CHECK_RETURN(
×
2839
        mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[0].dnodeId));
2840

2841
    TAOS_CHECK_RETURN(mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, pNewVgroup, &pNewVgroup->vnodeGid[1]));
×
2842

2843
    // follower stage
2844
    pNewVgroup->vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2845
    TAOS_CHECK_RETURN(mndAddAlterVnodeTypeAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[1].dnodeId));
×
2846
    TAOS_CHECK_RETURN(
×
2847
        mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, pNewVgroup, pNewVgroup->vnodeGid[0].dnodeId));
2848

2849
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, pNewVgroup));
×
2850
  } else {
2851
    return -1;
×
2852
  }
2853

2854
  mndSortVnodeGid(pNewVgroup);
×
2855

2856
  {
2857
    SSdbRaw *pVgRaw = mndVgroupActionEncode(pNewVgroup);
×
2858
    if (pVgRaw == NULL) {
×
2859
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2860
      if (terrno != 0) code = terrno;
×
2861
      TAOS_RETURN(code);
×
2862
    }
2863
    if ((code = mndTransAppendCommitlog(pTrans, pVgRaw)) != 0) {
×
2864
      sdbFreeRaw(pVgRaw);
×
2865
      TAOS_RETURN(code);
×
2866
    }
2867
    code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
2868
    if (code != 0) {
×
2869
      mError("vgId:%d, failed to set raw status since %s at line:%d", pNewVgroup->vgId, tstrerror(code), __LINE__);
×
2870
      TAOS_RETURN(code);
×
2871
    }
2872
  }
2873

2874
  TAOS_RETURN(code);
×
2875
}
2876

2877
int32_t mndBuildRaftAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pOldDb, SDbObj *pNewDb, SVgObj *pVgroup,
×
2878
                                      SArray *pArray) {
2879
  int32_t code = 0;
×
2880
  SVgObj  newVgroup = {0};
×
2881
  memcpy(&newVgroup, pVgroup, sizeof(SVgObj));
×
2882

2883
  if (pVgroup->replica <= 0 || pVgroup->replica == pNewDb->cfg.replications) {
×
2884
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfigAction(pMnode, pTrans, pNewDb, pVgroup));
×
2885
    TAOS_CHECK_RETURN(mndCheckDnodeMemory(pMnode, pOldDb, pNewDb, &newVgroup, pVgroup, pArray));
×
2886
    return 0;
×
2887
  }
2888

2889
  mndTransSetSerial(pTrans);
×
2890

2891
  mInfo("trans:%d, vgId:%d, alter vgroup, syncConfChangeVer:%d, version:%d, replica:%d", pTrans->id, pVgroup->vgId,
×
2892
        pVgroup->syncConfChangeVer, pVgroup->version, pVgroup->replica);
2893

2894
  if (newVgroup.replica == 1 && pNewDb->cfg.replications == 3) {
×
2895
    mInfo("db:%s, vgId:%d, will add 2 vnodes, vn:0 dnode:%d", pVgroup->dbName, pVgroup->vgId,
×
2896
          pVgroup->vnodeGid[0].dnodeId);
2897

2898
    // add second
2899
    TAOS_CHECK_RETURN(mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray));
×
2900
    // add third
2901
    TAOS_CHECK_RETURN(mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray));
×
2902

2903
    // add learner stage
2904
    newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2905
    newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
2906
    newVgroup.vnodeGid[2].nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
2907
    TAOS_CHECK_RETURN(
×
2908
        mndAddChangeConfigAction(pMnode, pTrans, pNewDb, pVgroup, &newVgroup, newVgroup.vnodeGid[0].dnodeId));
2909
    mInfo("trans:%d, vgId:%d, add change config, syncConfChangeVer:%d, version:%d, replica:%d", pTrans->id,
×
2910
          pVgroup->vgId, newVgroup.syncConfChangeVer, pVgroup->version, pVgroup->replica);
2911
    TAOS_CHECK_RETURN(mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[1]));
×
2912
    mInfo("trans:%d, vgId:%d, create vnode, syncConfChangeVer:%d, version:%d, replica:%d", pTrans->id, pVgroup->vgId,
×
2913
          newVgroup.syncConfChangeVer, pVgroup->version, pVgroup->replica);
2914
    TAOS_CHECK_RETURN(mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[2]));
×
2915
    mInfo("trans:%d, vgId:%d, create vnode, syncConfChangeVer:%d, version:%d, replica:%d", pTrans->id, pVgroup->vgId,
×
2916
          newVgroup.syncConfChangeVer, pVgroup->version, pVgroup->replica);
2917

2918
    // check learner
2919
    newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2920
    newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2921
    newVgroup.vnodeGid[2].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2922
    TAOS_CHECK_RETURN(
×
2923
        mndAddCheckLearnerCatchupAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId));
2924
    TAOS_CHECK_RETURN(
×
2925
        mndAddCheckLearnerCatchupAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[2].dnodeId));
2926

2927
    // change raft type
2928
    newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2929
    newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2930
    newVgroup.vnodeGid[2].nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
2931
    TAOS_CHECK_RETURN(
×
2932
        mndAddChangeConfigAction(pMnode, pTrans, pNewDb, pVgroup, &newVgroup, newVgroup.vnodeGid[0].dnodeId));
2933

2934
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup));
×
2935

2936
    newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2937
    newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2938
    newVgroup.vnodeGid[2].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
2939
    TAOS_CHECK_RETURN(
×
2940
        mndAddChangeConfigAction(pMnode, pTrans, pNewDb, pVgroup, &newVgroup, newVgroup.vnodeGid[0].dnodeId));
2941

2942
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup));
×
2943

2944
    SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup);
×
2945
    if (pVgRaw == NULL) {
×
2946
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2947
      if (terrno != 0) code = terrno;
×
2948
      TAOS_RETURN(code);
×
2949
    }
2950
    if ((code = mndTransAppendRedolog(pTrans, pVgRaw)) != 0) {
×
2951
      sdbFreeRaw(pVgRaw);
×
2952
      TAOS_RETURN(code);
×
2953
    }
2954
    code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
2955
    if (code != 0) {
×
2956
      mError("vgId:%d, failed to set raw status to ready, error:%s, line:%d", newVgroup.vgId, tstrerror(code),
×
2957
             __LINE__);
2958
      TAOS_RETURN(code);
×
2959
    }
2960
  } else if (newVgroup.replica == 3 && pNewDb->cfg.replications == 1) {
×
2961
    mInfo("db:%s, vgId:%d, will remove 2 vnodes, vn:0 dnode:%d vn:1 dnode:%d vn:2 dnode:%d", pVgroup->dbName,
×
2962
          pVgroup->vgId, pVgroup->vnodeGid[0].dnodeId, pVgroup->vnodeGid[1].dnodeId, pVgroup->vnodeGid[2].dnodeId);
2963

2964
    SVnodeGid del1 = {0};
×
2965
    TAOS_CHECK_RETURN(mndRemoveVnodeFromVgroupWithoutSave(pMnode, pTrans, &newVgroup, pArray, &del1));
×
2966

2967
    TAOS_CHECK_RETURN(
×
2968
        mndAddChangeConfigAction(pMnode, pTrans, pNewDb, pVgroup, &newVgroup, newVgroup.vnodeGid[0].dnodeId));
2969

2970
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup));
×
2971

2972
    TAOS_CHECK_RETURN(mndAddDropVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &del1, true));
×
2973

2974
    SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup);
×
2975
    if (pVgRaw == NULL) {
×
2976
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
2977
      if (terrno != 0) code = terrno;
×
2978
      TAOS_RETURN(code);
×
2979
    }
2980
    if ((code = mndTransAppendRedolog(pTrans, pVgRaw)) != 0) {
×
2981
      sdbFreeRaw(pVgRaw);
×
2982
      TAOS_RETURN(code);
×
2983
    }
2984
    code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
2985
    if (code != 0) {
×
2986
      mError("vgId:%d, failed to set raw status to ready, error:%s, line:%d", newVgroup.vgId, tstrerror(code),
×
2987
             __LINE__);
2988
      TAOS_RETURN(code);
×
2989
    }
2990

2991
    SVnodeGid del2 = {0};
×
2992
    TAOS_CHECK_RETURN(mndRemoveVnodeFromVgroupWithoutSave(pMnode, pTrans, &newVgroup, pArray, &del2));
×
2993

2994
    TAOS_CHECK_RETURN(
×
2995
        mndAddChangeConfigAction(pMnode, pTrans, pNewDb, pVgroup, &newVgroup, newVgroup.vnodeGid[0].dnodeId));
2996

2997
    TAOS_CHECK_RETURN(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup));
×
2998

2999
    TAOS_CHECK_RETURN(mndAddDropVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &del2, true));
×
3000

3001
    pVgRaw = mndVgroupActionEncode(&newVgroup);
×
3002
    if (pVgRaw == NULL) {
×
3003
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3004
      if (terrno != 0) code = terrno;
×
3005
      TAOS_RETURN(code);
×
3006
    }
3007
    if ((code = mndTransAppendRedolog(pTrans, pVgRaw)) != 0) {
×
3008
      sdbFreeRaw(pVgRaw);
×
3009
      TAOS_RETURN(code);
×
3010
    }
3011
    code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
3012
    if (code != 0) {
×
3013
      mError("vgId:%d, failed to set raw status to ready, error:%s, line:%d", newVgroup.vgId, tstrerror(code),
×
3014
             __LINE__);
3015
      TAOS_RETURN(code);
×
3016
    }
3017
  } else {
3018
    return -1;
×
3019
  }
3020

3021
  mndSortVnodeGid(&newVgroup);
×
3022

3023
  {
3024
    SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup);
×
3025
    if (pVgRaw == NULL) {
×
3026
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3027
      if (terrno != 0) code = terrno;
×
3028
      TAOS_RETURN(code);
×
3029
    }
3030
    if ((code = mndTransAppendCommitlog(pTrans, pVgRaw)) != 0) {
×
3031
      sdbFreeRaw(pVgRaw);
×
3032
      TAOS_RETURN(code);
×
3033
    }
3034
    code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
3035
    if (code != 0) {
×
3036
      mError("vgId:%d, failed to set raw status to ready, error:%s, line:%d", newVgroup.vgId, tstrerror(code),
×
3037
             __LINE__);
3038
      TAOS_RETURN(code);
×
3039
    }
3040
  }
3041

3042
  TAOS_RETURN(code);
×
3043
}
3044

3045
int32_t mndBuildRestoreAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *db, SVgObj *pVgroup, SDnodeObj *pDnode,
×
3046
                                         SDnodeObj *pAnotherDnode) {
3047
  int32_t code = 0;
×
3048
  SVgObj  newVgroup = {0};
×
3049
  memcpy(&newVgroup, pVgroup, sizeof(SVgObj));
×
3050

3051
  mInfo("db:%s, vgId:%d, restore vnodes, vn:0 dnode:%d", pVgroup->dbName, pVgroup->vgId, pVgroup->vnodeGid[0].dnodeId);
×
3052

3053
  if (newVgroup.replica == 1) {
×
3054
    int selected = 0;
×
3055
    for (int i = 0; i < newVgroup.replica; i++) {
×
3056
      newVgroup.vnodeGid[i].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
3057
      if (newVgroup.vnodeGid[i].dnodeId == pDnode->id) {
×
3058
        selected = i;
×
3059
      }
3060
    }
3061
    TAOS_CHECK_RETURN(mndAddCreateVnodeAction(pMnode, pTrans, db, &newVgroup, &newVgroup.vnodeGid[selected]));
×
3062
  } else if (newVgroup.replica == 2) {
×
3063
    for (int i = 0; i < newVgroup.replica; i++) {
×
3064
      if (newVgroup.vnodeGid[i].dnodeId == pDnode->id) {
×
3065
        newVgroup.vnodeGid[i].nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
3066
      } else {
3067
        newVgroup.vnodeGid[i].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
3068
      }
3069
    }
3070
    TAOS_CHECK_RETURN(mndRestoreAddAlterVnodeTypeAction(pMnode, pTrans, db, &newVgroup, pAnotherDnode));
×
3071

3072
    for (int i = 0; i < newVgroup.replica; i++) {
×
3073
      if (newVgroup.vnodeGid[i].dnodeId == pDnode->id) {
×
3074
        newVgroup.vnodeGid[i].nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
3075
      } else {
3076
        newVgroup.vnodeGid[i].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
3077
      }
3078
    }
3079
    TAOS_CHECK_RETURN(mndRestoreAddCreateVnodeAction(pMnode, pTrans, db, &newVgroup, pDnode));
×
3080

3081
    for (int i = 0; i < newVgroup.replica; i++) {
×
3082
      newVgroup.vnodeGid[i].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
3083
      if (newVgroup.vnodeGid[i].dnodeId == pDnode->id) {
×
3084
      }
3085
    }
3086
    TAOS_CHECK_RETURN(mndRestoreAddAlterVnodeTypeAction(pMnode, pTrans, db, &newVgroup, pDnode));
×
3087
    TAOS_CHECK_RETURN(mndRestoreAddAlterVnodeTypeAction(pMnode, pTrans, db, &newVgroup, pAnotherDnode));
×
3088
  } else if (newVgroup.replica == 3) {
×
3089
    for (int i = 0; i < newVgroup.replica; i++) {
×
3090
      if (newVgroup.vnodeGid[i].dnodeId == pDnode->id) {
×
3091
        newVgroup.vnodeGid[i].nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
3092
      } else {
3093
        newVgroup.vnodeGid[i].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
3094
      }
3095
    }
3096
    TAOS_CHECK_RETURN(mndRestoreAddCreateVnodeAction(pMnode, pTrans, db, &newVgroup, pDnode));
×
3097

3098
    for (int i = 0; i < newVgroup.replica; i++) {
×
3099
      newVgroup.vnodeGid[i].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
3100
      if (newVgroup.vnodeGid[i].dnodeId == pDnode->id) {
×
3101
      }
3102
    }
3103
    TAOS_CHECK_RETURN(mndRestoreAddAlterVnodeTypeAction(pMnode, pTrans, db, &newVgroup, pDnode));
×
3104
  }
3105
  SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup);
×
3106
  if (pVgRaw == NULL) {
×
3107
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3108
    if (terrno != 0) code = terrno;
×
3109
    TAOS_RETURN(code);
×
3110
  }
3111
  if ((code = mndTransAppendCommitlog(pTrans, pVgRaw)) != 0) {
×
3112
    sdbFreeRaw(pVgRaw);
×
3113
    TAOS_RETURN(code);
×
3114
  }
3115
  code = sdbSetRawStatus(pVgRaw, SDB_STATUS_READY);
×
3116
  if (code != 0) {
×
3117
    mError("vgId:%d, failed to set raw status to ready, error:%s, line:%d", newVgroup.vgId, tstrerror(code), __LINE__);
×
3118
    TAOS_RETURN(code);
×
3119
  }
3120

3121
  TAOS_RETURN(code);
×
3122
}
3123

3124
static int32_t mndAddAdjustVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup) {
×
3125
  return 0;
×
3126
}
3127

3128
typedef int32_t (*FpTransActionCb)(STrans *pTrans, SSdbRaw *pRaw);
3129

3130
static int32_t mndAddVgStatusAction(STrans *pTrans, SVgObj *pVg, ESdbStatus vgStatus, ETrnStage stage) {
×
3131
  int32_t         code = 0;
×
3132
  FpTransActionCb appendActionCb = (stage == TRN_STAGE_COMMIT_ACTION) ? mndTransAppendCommitlog : mndTransAppendRedolog;
×
3133
  SSdbRaw        *pRaw = mndVgroupActionEncode(pVg);
×
3134
  if (pRaw == NULL) {
×
3135
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3136
    if (terrno != 0) code = terrno;
×
3137
    goto _err;
×
3138
  }
3139
  if ((code = appendActionCb(pTrans, pRaw)) != 0) goto _err;
×
3140
  code = sdbSetRawStatus(pRaw, vgStatus);
×
3141
  if (code != 0) {
×
3142
    mError("vgId:%d, failed to set raw status to ready, error:%s, line:%d", pVg->vgId, tstrerror(code), __LINE__);
×
3143
    goto _err;
×
3144
  }
3145
  pRaw = NULL;
×
3146
  TAOS_RETURN(code);
×
3147
_err:
×
3148
  sdbFreeRaw(pRaw);
×
3149
  TAOS_RETURN(code);
×
3150
}
3151

3152
static int32_t mndAddDbStatusAction(STrans *pTrans, SDbObj *pDb, ESdbStatus dbStatus, ETrnStage stage) {
×
3153
  int32_t         code = 0;
×
3154
  FpTransActionCb appendActionCb = (stage == TRN_STAGE_COMMIT_ACTION) ? mndTransAppendCommitlog : mndTransAppendRedolog;
×
3155
  SSdbRaw        *pRaw = mndDbActionEncode(pDb);
×
3156
  if (pRaw == NULL) {
×
3157
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3158
    if (terrno != 0) code = terrno;
×
3159
    goto _err;
×
3160
  }
3161
  if ((code = appendActionCb(pTrans, pRaw)) != 0) goto _err;
×
3162
  code = sdbSetRawStatus(pRaw, dbStatus);
×
3163
  if (code != 0) {
×
3164
    mError("db:%s, failed to set raw status to ready, error:%s, line:%d", pDb->name, tstrerror(code), __LINE__);
×
3165
    goto _err;
×
3166
  }
3167
  pRaw = NULL;
×
3168
  TAOS_RETURN(code);
×
3169
_err:
×
3170
  sdbFreeRaw(pRaw);
×
3171
  TAOS_RETURN(code);
×
3172
}
3173

3174
int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) {
×
3175
  int32_t code = -1;
×
3176
  STrans *pTrans = NULL;
×
3177
  SDbObj  dbObj = {0};
×
3178
  SArray *pArray = mndBuildDnodesArray(pMnode, 0, NULL);
×
3179

3180
  int32_t numOfStreams = 0;
×
3181
  if ((code = mndGetNumOfStreams(pMnode, pDb->name, &numOfStreams)) != 0) {
×
3182
    goto _OVER;
×
3183
  }
3184
  if (numOfStreams > 0) {
×
3185
    code = TSDB_CODE_MND_STREAM_MUST_BE_DELETED;
×
3186
    goto _OVER;
×
3187
  }
3188

3189
#if defined(USE_S3)
3190
  extern int8_t tsS3Enabled;
3191
  if (tsS3Enabled) {
×
3192
    code = TSDB_CODE_OPS_NOT_SUPPORT;
×
3193
    mError("vgId:%d, db:%s, s3 exists, split vgroup not allowed", pVgroup->vgId, pVgroup->dbName);
×
3194
    goto _OVER;
×
3195
  }
3196
#endif
3197

3198
  if (pDb->cfg.withArbitrator) {
×
3199
    code = TSDB_CODE_OPS_NOT_SUPPORT;
×
3200
    mError("vgId:%d, db:%s, with arbitrator, split vgroup not allowed", pVgroup->vgId, pVgroup->dbName);
×
3201
    goto _OVER;
×
3202
  }
3203

3204
  pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, pReq, "split-vgroup");
×
3205
  if (pTrans == NULL) {
×
3206
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3207
    if (terrno != 0) code = terrno;
×
3208
    goto _OVER;
×
3209
  }
3210
  mndTransSetSerial(pTrans);
×
3211
  mInfo("trans:%d, used to split vgroup, vgId:%d", pTrans->id, pVgroup->vgId);
×
3212

3213
  mndTransSetDbName(pTrans, pDb->name, NULL);
×
3214
  TAOS_CHECK_GOTO(mndTransCheckConflictWithCompact(pMnode, pTrans), NULL, _OVER);
×
3215

3216
  SVgObj newVg1 = {0};
×
3217
  memcpy(&newVg1, pVgroup, sizeof(SVgObj));
×
3218
  mInfo("vgId:%d, vgroup info before split, replica:%d hashBegin:%u hashEnd:%u", newVg1.vgId, newVg1.replica,
×
3219
        newVg1.hashBegin, newVg1.hashEnd);
3220
  for (int32_t i = 0; i < newVg1.replica; ++i) {
×
3221
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg1.vgId, i, newVg1.vnodeGid[i].dnodeId);
×
3222
  }
3223

3224
  if (newVg1.replica == 1) {
×
3225
    TAOS_CHECK_GOTO(mndAddVnodeToVgroup(pMnode, pTrans, &newVg1, pArray), NULL, _OVER);
×
3226

3227
    newVg1.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_LEARNER;
×
3228
    TAOS_CHECK_GOTO(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[0].dnodeId), NULL,
×
3229
                    _OVER);
3230
    TAOS_CHECK_GOTO(mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg1, &newVg1.vnodeGid[1]), NULL, _OVER);
×
3231

3232
    newVg1.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
×
3233
    TAOS_CHECK_GOTO(mndAddAlterVnodeTypeAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[1].dnodeId), NULL, _OVER);
×
3234
    TAOS_CHECK_GOTO(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[0].dnodeId), NULL,
×
3235
                    _OVER);
3236

3237
    TAOS_CHECK_GOTO(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1), NULL, _OVER);
×
3238
  } else if (newVg1.replica == 3) {
×
3239
    SVnodeGid del1 = {0};
×
3240
    TAOS_CHECK_GOTO(mndRemoveVnodeFromVgroup(pMnode, pTrans, &newVg1, pArray, &del1), NULL, _OVER);
×
3241
    TAOS_CHECK_GOTO(mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVg1, &del1, true), NULL, _OVER);
×
3242
    TAOS_CHECK_GOTO(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[0].dnodeId), NULL,
×
3243
                    _OVER);
3244
    TAOS_CHECK_GOTO(mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[1].dnodeId), NULL,
×
3245
                    _OVER);
3246
  } else {
3247
    goto _OVER;
×
3248
  }
3249

3250
  for (int32_t i = 0; i < newVg1.replica; ++i) {
×
3251
    TAOS_CHECK_GOTO(mndAddDisableVnodeWriteAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[i].dnodeId), NULL,
×
3252
                    _OVER);
3253
  }
3254
  TAOS_CHECK_GOTO(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1), NULL, _OVER);
×
3255

3256
  SVgObj newVg2 = {0};
×
3257
  memcpy(&newVg2, &newVg1, sizeof(SVgObj));
×
3258
  newVg1.replica = 1;
×
3259
  newVg1.hashEnd = newVg1.hashBegin / 2 + newVg1.hashEnd / 2;
×
3260
  memset(&newVg1.vnodeGid[1], 0, sizeof(SVnodeGid));
×
3261

3262
  newVg2.replica = 1;
×
3263
  newVg2.hashBegin = newVg1.hashEnd + 1;
×
3264
  memcpy(&newVg2.vnodeGid[0], &newVg2.vnodeGid[1], sizeof(SVnodeGid));
×
3265
  memset(&newVg2.vnodeGid[1], 0, sizeof(SVnodeGid));
×
3266

3267
  mInfo("vgId:%d, vgroup info after split, replica:%d hashrange:[%u, %u] vnode:0 dnode:%d", newVg1.vgId, newVg1.replica,
×
3268
        newVg1.hashBegin, newVg1.hashEnd, newVg1.vnodeGid[0].dnodeId);
3269
  for (int32_t i = 0; i < newVg1.replica; ++i) {
×
3270
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg1.vgId, i, newVg1.vnodeGid[i].dnodeId);
×
3271
  }
3272
  mInfo("vgId:%d, vgroup info after split, replica:%d hashrange:[%u, %u] vnode:0 dnode:%d", newVg2.vgId, newVg2.replica,
×
3273
        newVg2.hashBegin, newVg2.hashEnd, newVg2.vnodeGid[0].dnodeId);
3274
  for (int32_t i = 0; i < newVg1.replica; ++i) {
×
3275
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg2.vgId, i, newVg2.vnodeGid[i].dnodeId);
×
3276
  }
3277

3278
  // alter vgId and hash range
3279
  int32_t maxVgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP);
×
3280
  int32_t srcVgId = newVg1.vgId;
×
3281
  newVg1.vgId = maxVgId;
×
3282
  TAOS_CHECK_GOTO(mndAddNewVgPrepareAction(pMnode, pTrans, &newVg1), NULL, _OVER);
×
3283
  TAOS_CHECK_GOTO(mndAddAlterVnodeHashRangeAction(pMnode, pTrans, srcVgId, &newVg1), NULL, _OVER);
×
3284

3285
  maxVgId++;
×
3286
  srcVgId = newVg2.vgId;
×
3287
  newVg2.vgId = maxVgId;
×
3288
  TAOS_CHECK_GOTO(mndAddNewVgPrepareAction(pMnode, pTrans, &newVg2), NULL, _OVER);
×
3289
  TAOS_CHECK_GOTO(mndAddAlterVnodeHashRangeAction(pMnode, pTrans, srcVgId, &newVg2), NULL, _OVER);
×
3290

3291
  TAOS_CHECK_GOTO(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1), NULL, _OVER);
×
3292
  TAOS_CHECK_GOTO(mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg2), NULL, _OVER);
×
3293

3294
  TAOS_CHECK_GOTO(mndAddVgStatusAction(pTrans, &newVg1, SDB_STATUS_READY, TRN_STAGE_REDO_ACTION), NULL, _OVER);
×
3295
  TAOS_CHECK_GOTO(mndAddVgStatusAction(pTrans, &newVg2, SDB_STATUS_READY, TRN_STAGE_REDO_ACTION), NULL, _OVER);
×
3296
  TAOS_CHECK_GOTO(mndAddVgStatusAction(pTrans, pVgroup, SDB_STATUS_DROPPED, TRN_STAGE_REDO_ACTION), NULL, _OVER);
×
3297

3298
  // update db status
3299
  memcpy(&dbObj, pDb, sizeof(SDbObj));
×
3300
  if (dbObj.cfg.pRetensions != NULL) {
×
3301
    dbObj.cfg.pRetensions = taosArrayDup(pDb->cfg.pRetensions, NULL);
×
3302
    if (dbObj.cfg.pRetensions == NULL) {
×
3303
      code = terrno;
×
3304
      goto _OVER;
×
3305
    }
3306
  }
3307
  dbObj.vgVersion++;
×
3308
  dbObj.updateTime = taosGetTimestampMs();
×
3309
  dbObj.cfg.numOfVgroups++;
×
3310
  TAOS_CHECK_GOTO(mndAddDbStatusAction(pTrans, &dbObj, SDB_STATUS_READY, TRN_STAGE_REDO_ACTION), NULL, _OVER);
×
3311

3312
  // adjust vgroup replica
3313
  if (pDb->cfg.replications != newVg1.replica) {
×
3314
    SVgObj tmpGroup = {0};
×
3315
    TAOS_CHECK_GOTO(mndBuildAlterVgroupAction(pMnode, pTrans, pDb, pDb, &newVg1, pArray, &tmpGroup), NULL, _OVER);
×
3316
  } else {
3317
    TAOS_CHECK_GOTO(mndAddVgStatusAction(pTrans, &newVg1, SDB_STATUS_READY, TRN_STAGE_COMMIT_ACTION), NULL, _OVER);
×
3318
  }
3319

3320
  if (pDb->cfg.replications != newVg2.replica) {
×
3321
    SVgObj tmpGroup = {0};
×
3322
    TAOS_CHECK_GOTO(mndBuildAlterVgroupAction(pMnode, pTrans, pDb, pDb, &newVg2, pArray, &tmpGroup), NULL, _OVER);
×
3323
  } else {
3324
    TAOS_CHECK_GOTO(mndAddVgStatusAction(pTrans, &newVg2, SDB_STATUS_READY, TRN_STAGE_COMMIT_ACTION), NULL, _OVER);
×
3325
  }
3326

3327
  TAOS_CHECK_GOTO(mndAddVgStatusAction(pTrans, pVgroup, SDB_STATUS_DROPPED, TRN_STAGE_COMMIT_ACTION), NULL, _OVER);
×
3328

3329
  // commit db status
3330
  dbObj.vgVersion++;
×
3331
  dbObj.updateTime = taosGetTimestampMs();
×
3332
  TAOS_CHECK_GOTO(mndAddDbStatusAction(pTrans, &dbObj, SDB_STATUS_READY, TRN_STAGE_COMMIT_ACTION), NULL, _OVER);
×
3333

3334
  TAOS_CHECK_GOTO(mndTransPrepare(pMnode, pTrans), NULL, _OVER);
×
3335
  code = 0;
×
3336

3337
_OVER:
×
3338
  taosArrayDestroy(pArray);
×
3339
  mndTransDrop(pTrans);
×
3340
  taosArrayDestroy(dbObj.cfg.pRetensions);
×
3341
  TAOS_RETURN(code);
×
3342
}
3343

3344
extern int32_t mndProcessSplitVgroupMsgImp(SRpcMsg *pReq);
3345

3346
static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { return mndProcessSplitVgroupMsgImp(pReq); }
×
3347

3348
#ifndef TD_ENTERPRISE
3349
int32_t mndProcessSplitVgroupMsgImp(SRpcMsg *pReq) { return 0; }
3350
#endif
3351

3352
static int32_t mndSetBalanceVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
×
3353
                                              SDnodeObj *pSrc, SDnodeObj *pDst) {
3354
  int32_t code = 0;
×
3355
  SVgObj  newVg = {0};
×
3356
  memcpy(&newVg, pVgroup, sizeof(SVgObj));
×
3357
  mInfo("vgId:%d, vgroup info before balance, replica:%d", newVg.vgId, newVg.replica);
×
3358
  for (int32_t i = 0; i < newVg.replica; ++i) {
×
3359
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
×
3360
  }
3361

3362
  TAOS_CHECK_RETURN(mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pDst->id));
×
3363
  TAOS_CHECK_RETURN(mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pSrc->id));
×
3364

3365
  {
3366
    SSdbRaw *pRaw = mndVgroupActionEncode(&newVg);
×
3367
    if (pRaw == NULL) {
×
3368
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3369
      if (terrno != 0) code = terrno;
×
3370
      TAOS_RETURN(code);
×
3371
    }
3372
    if ((code = mndTransAppendCommitlog(pTrans, pRaw)) != 0) {
×
3373
      sdbFreeRaw(pRaw);
×
3374
      TAOS_RETURN(code);
×
3375
    }
3376
    code = sdbSetRawStatus(pRaw, SDB_STATUS_READY);
×
3377
    if (code != 0) {
×
3378
      mError("vgId:%d, failed to set raw status to ready, error:%s, line:%d", newVg.vgId, tstrerror(code), __LINE__);
×
3379
      TAOS_RETURN(code);
×
3380
    }
3381
  }
3382

3383
  mInfo("vgId:%d, vgroup info after balance, replica:%d", newVg.vgId, newVg.replica);
×
3384
  for (int32_t i = 0; i < newVg.replica; ++i) {
×
3385
    mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId);
×
3386
  }
3387
  TAOS_RETURN(code);
×
3388
}
3389

3390
static int32_t mndBalanceVgroupBetweenDnode(SMnode *pMnode, STrans *pTrans, SDnodeObj *pSrc, SDnodeObj *pDst,
×
3391
                                            SHashObj *pBalancedVgroups) {
3392
  void   *pIter = NULL;
×
3393
  int32_t code = -1;
×
3394
  SSdb   *pSdb = pMnode->pSdb;
×
3395

3396
  while (1) {
×
3397
    SVgObj *pVgroup = NULL;
×
3398
    pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup);
×
3399
    if (pIter == NULL) break;
×
3400
    if (taosHashGet(pBalancedVgroups, &pVgroup->vgId, sizeof(int32_t)) != NULL) {
×
3401
      sdbRelease(pSdb, pVgroup);
×
3402
      continue;
×
3403
    }
3404

3405
    bool existInSrc = false;
×
3406
    bool existInDst = false;
×
3407
    for (int32_t i = 0; i < pVgroup->replica; ++i) {
×
3408
      SVnodeGid *pGid = &pVgroup->vnodeGid[i];
×
3409
      if (pGid->dnodeId == pSrc->id) existInSrc = true;
×
3410
      if (pGid->dnodeId == pDst->id) existInDst = true;
×
3411
    }
3412

3413
    if (!existInSrc || existInDst) {
×
3414
      sdbRelease(pSdb, pVgroup);
×
3415
      continue;
×
3416
    }
3417

3418
    SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName);
×
3419
    if (pDb == NULL) {
×
3420
      code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3421
      if (terrno != 0) code = terrno;
×
3422
      mError("vgId:%d, balance vgroup can't find db obj dbName:%s", pVgroup->vgId, pVgroup->dbName);
×
3423
      goto _OUT;
×
3424
    }
3425

3426
    if (pDb->cfg.withArbitrator) {
×
3427
      mInfo("vgId:%d, db:%s, with arbitrator, balance vgroup not allowed", pVgroup->vgId, pVgroup->dbName);
×
3428
      goto _OUT;
×
3429
    }
3430

3431
    code = mndSetBalanceVgroupInfoToTrans(pMnode, pTrans, pDb, pVgroup, pSrc, pDst);
×
3432
    if (code == 0) {
×
3433
      code = taosHashPut(pBalancedVgroups, &pVgroup->vgId, sizeof(int32_t), &pVgroup->vgId, sizeof(int32_t));
×
3434
    }
3435

3436
  _OUT:
×
3437
    mndReleaseDb(pMnode, pDb);
×
3438
    sdbRelease(pSdb, pVgroup);
×
3439
    sdbCancelFetch(pSdb, pIter);
×
3440
    break;
×
3441
  }
3442

3443
  return code;
×
3444
}
3445

3446
static int32_t mndBalanceVgroup(SMnode *pMnode, SRpcMsg *pReq, SArray *pArray) {
×
3447
  int32_t   code = -1;
×
3448
  int32_t   numOfVgroups = 0;
×
3449
  STrans   *pTrans = NULL;
×
3450
  SHashObj *pBalancedVgroups = NULL;
×
3451

3452
  pBalancedVgroups = taosHashInit(16, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
×
3453
  if (pBalancedVgroups == NULL) goto _OVER;
×
3454

3455
  pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "balance-vgroup");
×
3456
  if (pTrans == NULL) {
×
3457
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3458
    if (terrno != 0) code = terrno;
×
3459
    goto _OVER;
×
3460
  }
3461
  mndTransSetSerial(pTrans);
×
3462
  mInfo("trans:%d, used to balance vgroup", pTrans->id);
×
3463
  TAOS_CHECK_GOTO(mndTransCheckConflict(pMnode, pTrans), NULL, _OVER);
×
3464
  TAOS_CHECK_GOTO(mndTransCheckConflictWithCompact(pMnode, pTrans), NULL, _OVER);
×
3465

3466
  while (1) {
×
3467
    taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes);
×
3468
    for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) {
×
3469
      SDnodeObj *pDnode = taosArrayGet(pArray, i);
×
3470
      mInfo("dnode:%d, equivalent vnodes:%d others:%d support:%d, score:%f", pDnode->id, pDnode->numOfVnodes,
×
3471
            pDnode->numOfSupportVnodes, pDnode->numOfOtherNodes, mndGetDnodeScore(pDnode, 0, 1));
3472
    }
3473

3474
    SDnodeObj *pSrc = taosArrayGet(pArray, taosArrayGetSize(pArray) - 1);
×
3475
    SDnodeObj *pDst = taosArrayGet(pArray, 0);
×
3476

3477
    float srcScore = mndGetDnodeScore(pSrc, -1, 1);
×
3478
    float dstScore = mndGetDnodeScore(pDst, 1, 1);
×
3479
    mInfo("trans:%d, after balance, src dnode:%d score:%f, dst dnode:%d score:%f", pTrans->id, pSrc->id, dstScore,
×
3480
          pDst->id, dstScore);
3481

3482
    if (srcScore > dstScore - 0.000001) {
×
3483
      code = mndBalanceVgroupBetweenDnode(pMnode, pTrans, pSrc, pDst, pBalancedVgroups);
×
3484
      if (code == 0) {
×
3485
        pSrc->numOfVnodes--;
×
3486
        pDst->numOfVnodes++;
×
3487
        numOfVgroups++;
×
3488
        continue;
×
3489
      } else {
3490
        mInfo("trans:%d, no vgroup need to balance from dnode:%d to dnode:%d", pTrans->id, pSrc->id, pDst->id);
×
3491
        break;
×
3492
      }
3493
    } else {
3494
      mInfo("trans:%d, no vgroup need to balance any more", pTrans->id);
×
3495
      break;
×
3496
    }
3497
  }
3498

3499
  if (numOfVgroups <= 0) {
×
3500
    mInfo("no need to balance vgroup");
×
3501
    code = 0;
×
3502
  } else {
3503
    mInfo("start to balance vgroup, numOfVgroups:%d", numOfVgroups);
×
3504
    if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER;
×
3505
    code = TSDB_CODE_ACTION_IN_PROGRESS;
×
3506
  }
3507

3508
_OVER:
×
3509
  taosHashCleanup(pBalancedVgroups);
×
3510
  mndTransDrop(pTrans);
×
3511
  TAOS_RETURN(code);
×
3512
}
3513

3514
static int32_t mndProcessBalanceVgroupMsg(SRpcMsg *pReq) {
×
3515
  SMnode *pMnode = pReq->info.node;
×
3516
  int32_t code = -1;
×
3517
  SArray *pArray = NULL;
×
3518
  void   *pIter = NULL;
×
3519
  int64_t curMs = taosGetTimestampMs();
×
3520

3521
  SBalanceVgroupReq req = {0};
×
3522
  if (tDeserializeSBalanceVgroupReq(pReq->pCont, pReq->contLen, &req) != 0) {
×
3523
    code = TSDB_CODE_INVALID_MSG;
×
3524
    goto _OVER;
×
3525
  }
3526

3527
  mInfo("start to balance vgroup");
×
3528
  if ((code = mndCheckOperPrivilege(pMnode, pReq->info.conn.user, MND_OPER_BALANCE_VGROUP)) != 0) {
×
3529
    goto _OVER;
×
3530
  }
3531

3532
  while (1) {
×
3533
    SDnodeObj *pDnode = NULL;
×
3534
    pIter = sdbFetch(pMnode->pSdb, SDB_DNODE, pIter, (void **)&pDnode);
×
3535
    if (pIter == NULL) break;
×
3536
    if (!mndIsDnodeOnline(pDnode, curMs)) {
×
3537
      sdbCancelFetch(pMnode->pSdb, pIter);
×
3538
      code = TSDB_CODE_MND_HAS_OFFLINE_DNODE;
×
3539
      mError("failed to balance vgroup since %s, dnode:%d", terrstr(), pDnode->id);
×
3540
      sdbRelease(pMnode->pSdb, pDnode);
×
3541
      goto _OVER;
×
3542
    }
3543

3544
    sdbRelease(pMnode->pSdb, pDnode);
×
3545
  }
3546

3547
  pArray = mndBuildDnodesArray(pMnode, 0, NULL);
×
3548
  if (pArray == NULL) {
×
3549
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3550
    if (terrno != 0) code = terrno;
×
3551
    goto _OVER;
×
3552
  }
3553

3554
  if (taosArrayGetSize(pArray) < 2) {
×
3555
    mInfo("no need to balance vgroup since dnode num less than 2");
×
3556
    code = 0;
×
3557
  } else {
3558
    code = mndBalanceVgroup(pMnode, pReq, pArray);
×
3559
  }
3560

3561
  auditRecord(pReq, pMnode->clusterId, "balanceVgroup", "", "", req.sql, req.sqlLen);
×
3562

3563
_OVER:
×
3564
  if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {
×
3565
    mError("failed to balance vgroup since %s", tstrerror(code));
×
3566
  }
3567

3568
  taosArrayDestroy(pArray);
×
3569
  tFreeSBalanceVgroupReq(&req);
×
3570
  TAOS_RETURN(code);
×
3571
}
3572

3573
bool mndVgroupInDb(SVgObj *pVgroup, int64_t dbUid) { return !pVgroup->isTsma && pVgroup->dbUid == dbUid; }
×
3574

3575
bool mndVgroupInDnode(SVgObj *pVgroup, int32_t dnodeId) {
×
3576
  for (int i = 0; i < pVgroup->replica; i++) {
×
3577
    if (pVgroup->vnodeGid[i].dnodeId == dnodeId) return true;
×
3578
  }
3579
  return false;
×
3580
}
3581

3582
static void *mndBuildCompactVnodeReq(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen, int64_t compactTs,
×
3583
                                     STimeWindow tw) {
3584
  SCompactVnodeReq compactReq = {0};
×
3585
  compactReq.dbUid = pDb->uid;
×
3586
  compactReq.compactStartTime = compactTs;
×
3587
  compactReq.tw = tw;
×
3588
  tstrncpy(compactReq.db, pDb->name, TSDB_DB_FNAME_LEN);
×
3589

3590
  mInfo("vgId:%d, build compact vnode config req", pVgroup->vgId);
×
3591
  int32_t contLen = tSerializeSCompactVnodeReq(NULL, 0, &compactReq);
×
3592
  if (contLen < 0) {
×
3593
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
3594
    return NULL;
×
3595
  }
3596
  contLen += sizeof(SMsgHead);
×
3597

3598
  void *pReq = taosMemoryMalloc(contLen);
×
3599
  if (pReq == NULL) {
×
3600
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
3601
    return NULL;
×
3602
  }
3603

3604
  SMsgHead *pHead = pReq;
×
3605
  pHead->contLen = htonl(contLen);
×
3606
  pHead->vgId = htonl(pVgroup->vgId);
×
3607

3608
  if (tSerializeSCompactVnodeReq((char *)pReq + sizeof(SMsgHead), contLen, &compactReq) < 0) {
×
3609
    taosMemoryFree(pReq);
×
3610
    terrno = TSDB_CODE_OUT_OF_MEMORY;
×
3611
    return NULL;
×
3612
  }
3613
  *pContLen = contLen;
×
3614
  return pReq;
×
3615
}
3616

3617
static int32_t mndAddCompactVnodeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int64_t compactTs,
×
3618
                                        STimeWindow tw) {
3619
  int32_t      code = 0;
×
3620
  STransAction action = {0};
×
3621
  action.epSet = mndGetVgroupEpset(pMnode, pVgroup);
×
3622

3623
  int32_t contLen = 0;
×
3624
  void   *pReq = mndBuildCompactVnodeReq(pMnode, pDb, pVgroup, &contLen, compactTs, tw);
×
3625
  if (pReq == NULL) {
×
3626
    code = TSDB_CODE_MND_RETURN_VALUE_NULL;
×
3627
    if (terrno != 0) code = terrno;
×
3628
    TAOS_RETURN(code);
×
3629
  }
3630

3631
  action.pCont = pReq;
×
3632
  action.contLen = contLen;
×
3633
  action.msgType = TDMT_VND_COMPACT;
×
3634

3635
  if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) {
×
3636
    taosMemoryFree(pReq);
×
3637
    TAOS_RETURN(code);
×
3638
  }
3639

3640
  TAOS_RETURN(code);
×
3641
}
3642

3643
int32_t mndBuildCompactVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int64_t compactTs,
×
3644
                                    STimeWindow tw) {
3645
  TAOS_CHECK_RETURN(mndAddCompactVnodeAction(pMnode, pTrans, pDb, pVgroup, compactTs, tw));
×
3646
  return 0;
×
3647
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc