• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #4720

08 Sep 2025 08:43AM UTC coverage: 58.139% (-0.6%) from 58.762%
#4720

push

travis-ci

web-flow
Merge pull request #32881 from taosdata/enh/add-new-windows-ci

fix(ci): update workflow reference to use new Windows CI YAML

133181 of 292179 branches covered (45.58%)

Branch coverage included in aggregate %.

201691 of 283811 relevant lines covered (71.07%)

5442780.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

62.61
/source/libs/tdb/src/db/tdbBtree.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#include "tdbInt.h"
17

18
#define TDB_BTREE_ROOT 0x1
19
#define TDB_BTREE_LEAF 0x2
20
#define TDB_BTREE_OVFL 0x4
21
#define TDB_BTREE_STACK 0x8  // this is a stack (for free page management)
22

23
struct SBTree {
24
  SPgno         root;
25
  int           keyLen;
26
  int           valLen;
27
  SPager       *pPager;
28
  tdb_cmpr_fn_t kcmpr;
29
  int           pageSize;
30
  int           maxLocal;
31
  int           minLocal;
32
  int           maxLeaf;
33
  int           minLeaf;
34
  SBtInfo       info;
35
  char         *tbname;
36
  void         *pBuf;
37
};
38

39
#define TDB_BTREE_PAGE_COMMON_HDR u8 flags;
40

41
#define TDB_BTREE_PAGE_GET_FLAGS(PAGE)        (PAGE)->pData[0]
42
#define TDB_BTREE_PAGE_SET_FLAGS(PAGE, flags) ((PAGE)->pData[0] = (flags))
43
#define TDB_BTREE_PAGE_IS_ROOT(PAGE)          (TDB_BTREE_PAGE_GET_FLAGS(PAGE) & TDB_BTREE_ROOT)
44
#define TDB_BTREE_PAGE_IS_LEAF(PAGE)          (TDB_BTREE_PAGE_GET_FLAGS(PAGE) & TDB_BTREE_LEAF)
45
#define TDB_BTREE_PAGE_IS_OVFL(PAGE)          (TDB_BTREE_PAGE_GET_FLAGS(PAGE) & TDB_BTREE_OVFL)
46
#define TDB_BTREE_PAGE_IS_STACK(PAGE)          (TDB_BTREE_PAGE_GET_FLAGS(PAGE) & TDB_BTREE_STACK)
47

48
#pragma pack(push, 1)
49
typedef struct {
50
  TDB_BTREE_PAGE_COMMON_HDR
51
} SLeafHdr;
52

53
typedef struct {
54
  TDB_BTREE_PAGE_COMMON_HDR
55
  SPgno pgno;  // right-most child
56
} SIntHdr;
57
#pragma pack(pop)
58

59
static int tdbDefaultKeyCmprFn(const void *pKey1, int keyLen1, const void *pKey2, int keyLen2);
60
static int tdbBtreeOpenImpl(SBTree *pBt);
61
// static int tdbBtreeInitPage(SPage *pPage, void *arg, int init);
62
static int tdbBtreeEncodeCell(SPage *pPage, const void *pKey, int kLen, const void *pVal, int vLen, SCell *pCell,
63
                              int *szCell, TXN *pTxn, SBTree *pBt);
64
static int tdbBtreeDecodeCell(SPage *pPage, const SCell *pCell, SCellDecoder *pDecoder, TXN *pTxn, SBTree *pBt);
65
static int tdbBtreeBalance(SBTC *pBtc);
66
static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell, int *pFullSize);
67
static int tdbBtcMoveDownward(SBTC *pBtc);
68
static int tdbBtcMoveUpward(SBTC *pBtc);
69

70
int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPgno pgno, tdb_cmpr_fn_t kcmpr, TDB *pEnv,
235,799✔
71
                 SBTree **ppBt) {
72
  SBTree *pBt;
73
  int     ret;
74

75
  if (keyLen == 0) {
235,799!
76
    tdbError("tdb/btree-open: key len cannot be zero.");
×
77
    return TSDB_CODE_INVALID_PARA;
×
78
  }
79

80
  *ppBt = NULL;
235,799✔
81

82
  pBt = (SBTree *)tdbOsCalloc(1, sizeof(*pBt));
235,799!
83
  if (pBt == NULL) {
235,936!
84
    return terrno;
×
85
  }
86

87
  // pBt->keyLen
88
  pBt->keyLen = keyLen < 0 ? TDB_VARIANT_LEN : keyLen;
235,936✔
89
  // pBt->valLen
90
  pBt->valLen = valLen < 0 ? TDB_VARIANT_LEN : valLen;
235,936✔
91
  // pBt->pPager
92
  pBt->pPager = pPager;
235,936✔
93
  // pBt->kcmpr
94
  pBt->kcmpr = kcmpr ? kcmpr : tdbDefaultKeyCmprFn;
235,936✔
95
  // pBt->pageSize
96
  pBt->pageSize = pPager->pageSize;
235,936✔
97
  // pBt->maxLocal
98
  pBt->maxLocal = tdbPageCapacity(pBt->pageSize, sizeof(SIntHdr)) / 4;
235,936✔
99
  // pBt->minLocal: Should not be allowed smaller than 15, which is [nPayload][nKey][nData]
100
  pBt->minLocal = pBt->maxLocal / 2;
235,930✔
101
  // pBt->maxLeaf
102
  pBt->maxLeaf = tdbPageCapacity(pBt->pageSize, sizeof(SLeafHdr));
235,930✔
103
  // pBt->minLeaf
104
  pBt->minLeaf = pBt->minLocal;
235,931✔
105

106
  // if pgno == 0 fetch new btree root leaf page
107
  if (pgno == 0) {
235,931✔
108
    // fetch page & insert into main db
109
    SPage *pPage;
110
    TXN   *txn;
111

112
    ret = tdbBegin(pEnv, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
185,195✔
113
    if (ret < 0) {
185,124!
114
      tdbOsFree(pBt);
×
115
      return ret;
×
116
    }
117

118
    SBtreeInitPageArg zArg;
119
    zArg.flags = 0x1 | 0x2;  // root leaf node;
185,124✔
120
    zArg.pBt = pBt;
185,124✔
121
    ret = tdbPagerFetchPage(pPager, &pgno, &pPage, tdbBtreeInitPage, &zArg, txn);
185,124✔
122
    if (ret < 0) {
185,088!
123
      tdbAbort(pEnv, txn);
×
124
      tdbOsFree(pBt);
×
125
      return ret;
×
126
    }
127

128
    ret = tdbPagerWrite(pPager, pPage);
185,088✔
129
    if (ret < 0) {
185,029!
130
      tdbError("failed to write page since %s", terrstr());
×
131
      tdbAbort(pEnv, txn);
×
132
      tdbOsFree(pBt);
×
133
      return ret;
×
134
    }
135

136
    if (strcmp(TDB_MAINDB_NAME, tbname)) {
185,029✔
137
      pBt->info.root = pgno;
165,613✔
138
      pBt->info.nLevel = 1;
165,613✔
139
      pBt->info.nData = 0;
165,613✔
140
      pBt->tbname = (char *)tbname;
165,613✔
141

142
      ret = tdbTbInsert(pPager->pEnv->pMainDb, tbname, strlen(tbname) + 1, &pBt->info, sizeof(pBt->info), txn);
165,613✔
143
      if (ret < 0) {
165,601!
144
        tdbAbort(pEnv, txn);
×
145
        tdbOsFree(pBt);
×
146
        return ret;
×
147
      }
148
    }
149

150
    tdbPCacheRelease(pPager->pCache, pPage, txn);
185,017✔
151

152
    ret = tdbCommit(pPager->pEnv, txn);
185,180✔
153
    if (ret) return ret;
185,189!
154

155
    ret = tdbPostCommit(pPager->pEnv, txn);
185,189✔
156
    if (ret) return ret;
185,161!
157
  }
158

159
  if (pgno == 0) {
235,897!
160
    tdbError("tdb/btree-open: pgno cannot be zero.");
×
161
    tdbOsFree(pBt);
×
162
    return TSDB_CODE_INTERNAL_ERROR;
×
163
  }
164
  pBt->root = pgno;
235,897✔
165
  /*
166
  // TODO: pBt->root
167
  ret = tdbBtreeOpenImpl(pBt);
168
  if (ret < 0) {
169
    tdbOsFree(pBt);
170
    return -1;
171
  }
172
  */
173
  *ppBt = pBt;
235,897✔
174
  return 0;
235,897✔
175
}
176

177
void tdbBtreeClose(SBTree *pBt) {
235,946✔
178
  if (pBt) {
235,946!
179
    tdbFree(pBt->pBuf);
235,947✔
180
    tdbOsFree(pBt);
235,945!
181
  }
182
}
235,949✔
183

184
int tdbBtreeInsert(SBTree *pBt, const void *pKey, int kLen, const void *pVal, int vLen, TXN *pTxn) {
1,509,704✔
185
  SBTC   btc;
186
  int    ret;
187
  int    c;
188

189
  ret = tdbBtcOpen(&btc, pBt, pTxn);
1,509,704✔
190
  if (ret) {
1,509,742!
191
    tdbError("tdb/btree-insert: btc open failed with ret: %d.", ret);
×
192
    return ret;
×
193
  }
194

195
  tdbTrace("tdb insert, btc: %p, pTxn: %p", &btc, pTxn);
1,509,742✔
196

197
  // move to the position to insert
198
  ret = tdbBtcMoveTo(&btc, pKey, kLen, &c);
1,509,742✔
199
  if (ret < 0) {
1,509,272✔
200
    tdbBtcClose(&btc);
29✔
201
    tdbError("tdb/btree-insert: btc move to failed with ret: %d.", ret);
×
202
    return ret;
×
203
  }
204

205
  if (btc.idx == -1) {
1,509,243✔
206
    btc.idx = 0;
108,434✔
207
  } else {
208
    if (c > 0) {
1,400,809✔
209
      btc.idx++;
1,046,230✔
210
    } else if (c == 0) {
354,579!
211
      // dup key not allowed with insert
212
      tdbBtcClose(&btc);
×
213
      tdbError("tdb/btree-insert: dup key. pKey: %p, kLen: %d, btc: %p, pTxn: %p", pKey, kLen, &btc, pTxn);
×
214
      return TSDB_CODE_DUP_KEY;
×
215
    }
216
  }
217

218
  ret = tdbBtcUpsert(&btc, pKey, kLen, pVal, vLen, 1);
1,509,243✔
219
  if (ret < 0) {
1,508,173!
220
    tdbBtcClose(&btc);
×
221
    tdbError("tdb/btree-insert: btc upsert failed with ret: %d.", ret);
×
222
    return ret;
×
223
  }
224

225
  tdbBtcClose(&btc);
1,508,173✔
226
  return 0;
1,510,410✔
227
}
228

229
int tdbBtreeDelete(SBTree *pBt, const void *pKey, int kLen, TXN *pTxn) {
244,883✔
230
  SBTC btc;
231
  int  c;
232
  int  ret;
233

234
  ret = tdbBtcOpen(&btc, pBt, pTxn);
244,883✔
235
  if (ret) {
244,874!
236
    tdbError("tdb/btree-delete: btc open failed with ret: %d.", ret);
×
237
    return ret;
×
238
  }
239

240
  tdbTrace("tdb delete, btc: %p, pTxn: %p", &btc, pTxn);
244,874✔
241

242
  // move the cursor
243
  ret = tdbBtcMoveTo(&btc, pKey, kLen, &c);
244,875✔
244
  if (ret < 0) {
245,149!
245
    tdbBtcClose(&btc);
×
246
    tdbError("tdb/btree-delete: btc move to failed with ret: %d.", ret);
×
247
    return ret;
×
248
  }
249

250
  if (btc.idx < 0 || c != 0) {
245,149✔
251
    tdbBtcClose(&btc);
93,204✔
252
    return TSDB_CODE_NOT_FOUND;
93,215✔
253
  }
254

255
  // delete the key
256
  ret = tdbBtcDelete(&btc);
151,945✔
257
  if (ret < 0) {
151,907!
258
    tdbBtcClose(&btc);
×
259
    return ret;
×
260
  }
261

262
  tdbBtcClose(&btc);
151,907✔
263
  return 0;
151,971✔
264
}
265

266
#if 0
267
int tdbBtreeUpsert(SBTree *pBt, const void *pKey, int nKey, const void *pData, int nData, TXN *pTxn) {
268
  SBTC btc = {0};
269
  int  c;
270
  int  ret;
271

272
  tdbBtcOpen(&btc, pBt, pTxn);
273

274
  tdbTrace("tdb upsert, btc: %p, pTxn: %p", &btc, pTxn);
275

276
  // move the cursor
277
  ret = tdbBtcMoveTo(&btc, pKey, nKey, &c);
278
  if (ret < 0) {
279
    tdbError("tdb/btree-upsert: btc move to failed with ret: %d.", ret);
280
    if (TDB_CELLDECODER_FREE_KEY(&btc.coder)) {
281
      tdbFree(btc.coder.pKey);
282
    }
283
    tdbBtcClose(&btc);
284
    return -1;
285
  }
286

287
  if (TDB_CELLDECODER_FREE_KEY(&btc.coder)) {
288
    tdbFree(btc.coder.pKey);
289
  }
290

291
  if (btc.idx == -1) {
292
    btc.idx = 0;
293
    c = 1;
294
  } else {
295
    if (c > 0) {
296
      btc.idx = btc.idx + 1;
297
    }
298
  }
299

300
  ret = tdbBtcUpsert(&btc, pKey, nKey, pData, nData, c);
301
  if (ret < 0) {
302
    tdbBtcClose(&btc);
303
    tdbError("tdb/btree-upsert: btc upsert failed with ret: %d.", ret);
304
    return -1;
305
  }
306

307
  tdbBtcClose(&btc);
308
  return 0;
309
}
310
#endif
311

312
int tdbBtreeGet(SBTree *pBt, const void *pKey, int kLen, void **ppVal, int *vLen) {
10,521,248✔
313
  return tdbBtreePGet(pBt, pKey, kLen, NULL, NULL, ppVal, vLen);
10,521,248✔
314
}
315

316
int tdbBtreePGet(SBTree *pBt, const void *pKey, int kLen, void **ppKey, int *pkLen, void **ppVal, int *vLen) {
10,521,262✔
317
  SBTC         btc;
318
  SCell       *pCell;
319
  int          cret;
320
  int          ret;
321
  void        *pTKey = NULL;
10,521,262✔
322
  void        *pTVal = NULL;
10,521,262✔
323
  SCellDecoder cd = {0};
10,521,262✔
324

325
  ret = tdbBtcOpen(&btc, pBt, NULL);
10,521,262✔
326
  if (ret) {
10,520,757!
327
    tdbError("tdb/btree-pget: btc open failed with ret: %d.", ret);
×
328
    return ret;
×
329
  }
330

331
  tdbTrace("tdb pget, btc: %p", &btc);
10,520,757✔
332

333
  ret = tdbBtcMoveTo(&btc, pKey, kLen, &cret);
10,520,757✔
334
  if (ret < 0) {
10,514,327!
335
    tdbBtcClose(&btc);
×
336
    tdbError("tdb/btree-pget: btc move to failed with ret: %d.", ret);
×
337
    return ret;
×
338
  }
339

340
  if (btc.idx < 0 || cret) {
10,514,327✔
341
    tdbBtcClose(&btc);
784,574✔
342
    return TSDB_CODE_NOT_FOUND;
785,688✔
343
  }
344

345
  pCell = tdbPageGetCell(btc.pPage, btc.idx);
9,729,753✔
346
  ret = tdbBtreeDecodeCell(btc.pPage, pCell, &cd, btc.pTxn, pBt);
9,727,222✔
347
  if (ret < 0) {
9,730,119!
348
    tdbBtcClose(&btc);
×
349
    tdbError("tdb/btree-pget: decode cell failed with ret: %d.", ret);
×
350
    return ret;
×
351
  }
352

353
  if (ppKey) {
9,730,119!
354
    pTKey = tdbRealloc(*ppKey, cd.kLen);
×
355
    if (pTKey == NULL) {
×
356
      tdbBtcClose(&btc);
×
357
      tdbError("tdb/btree-pget: realloc pTKey failed.");
×
358
      return terrno;
×
359
    }
360
    *ppKey = pTKey;
×
361
    *pkLen = cd.kLen;
×
362
    memcpy(*ppKey, cd.pKey, (size_t)cd.kLen);
×
363
  }
364

365
  if (ppVal) {
9,730,119✔
366
    pTVal = tdbRealloc(*ppVal, cd.vLen);
9,212,841✔
367
    if (pTVal == NULL) {
9,216,752✔
368
      tdbBtcClose(&btc);
2,713✔
369
      tdbError("tdb/btree-pget: realloc pTVal failed.");
×
370
      return terrno;
×
371
    }
372
    *ppVal = pTVal;
9,214,039✔
373
    *vLen = cd.vLen;
9,214,039✔
374
    memcpy(*ppVal, cd.pVal, (size_t)cd.vLen);
9,214,039✔
375
  }
376

377
  if (TDB_CELLDECODER_FREE_KEY(&cd)) {
9,731,317!
378
    tdbFree(cd.pKey);
×
379
  }
380

381
  if (TDB_CELLDECODER_FREE_VAL(&cd)) {
9,731,317✔
382
    tdbTrace("tdb btc/pget/2 decoder: %p pVal free: %p", &cd, cd.pVal);
18,293✔
383

384
    tdbFree(cd.pVal);
18,293✔
385
  }
386

387
  tdbTrace("tdb pget end, btc decoder: %p/0x%x, local decoder:%p", &btc.coder, btc.coder.freeKV, &cd);
9,731,316✔
388

389
  tdbBtcClose(&btc);
9,731,316✔
390

391
  return 0;
9,739,927✔
392
}
393

394
// tdbBtreeToStack, tdbBtreePushFreePage, tdbBtreePopFreePage are only for free page management,
395
// they should only be called on the b-tree of the free page table, never call them for other
396
// purpose.
397
static int btreeToStack(SBTree* pBt, TXN* pTxn) {
24,843✔
398
  SPage *pRoot = NULL;
24,843✔
399
  SBtreeInitPageArg arg = {.pBt = pBt, .flags = TDB_BTREE_ROOT | TDB_BTREE_LEAF};
24,843✔
400
  int ret = tdbPagerFetchPage(pBt->pPager, &pBt->root, &pRoot, tdbBtreeInitPage, &arg, pTxn);
24,843✔
401
  if (ret < 0) {
24,847!
402
    
403
    return ret;
×
404
  }
405

406
  // already a stack
407
  if (TDB_BTREE_PAGE_IS_STACK(pRoot)) {
24,847✔
408
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
5,342✔
409
    return 0;
5,344✔
410
  }
411

412
  ret = tdbPagerWrite(pBt->pPager, pRoot);
19,505✔
413
  if (ret < 0) {
19,503!
414
    tdbError("tdb/btree-to-stack: failed to mark root page dirty since %s", terrstr());
×
415
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
416
    return ret;
×
417
  }
418

419
  // if root page is not a leaf page, it means a re-balance is alreay happened and the
420
  // data is already corrupted, we have no way to recover from this, so discard all
421
  // existing free pages. note, discarding free pages only 'fixes' the corruption of the
422
  // free page table, but there can still be data corruption in other places.
423
  //
424
  // and there's another case: the b-tree has been re-balanced more than once, the root
425
  // page becomes a non-leaf page and then a leaf page again, this also means data
426
  // corruption, it is a rare case, but we are unable to detect and handle it correctly.
427
  if (!TDB_BTREE_PAGE_IS_LEAF(pRoot)) {
19,504!
428
    tdbWarn("tdb/btree-to-stack: root page is not a leaf page, all existing free pages are discarded");
×
429
    tdbBtreeInitPage(pRoot, &arg, 0); // re-initialize the root page
×
430
    TDB_BTREE_PAGE_SET_FLAGS(pRoot, TDB_BTREE_STACK|TDB_BTREE_LEAF|TDB_BTREE_ROOT);
×
431
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
432
    return 0;
×
433
  }
434

435
  // if the root page still have enough space for a new cell, then we only need to
436
  // mark it as a stack.
437
  if (TDB_PAGE_FREE_SIZE(pRoot) >= sizeof(SPgno) + TDB_PAGE_OFFSET_SIZE(pRoot)) {
19,504!
438
    TDB_BTREE_PAGE_SET_FLAGS(pRoot, TDB_BTREE_STACK|TDB_BTREE_LEAF|TDB_BTREE_ROOT);
19,504✔
439
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
19,504✔
440
    tdbInfo("tdb/btree-to-stack: btree has been marked as a stack.");
19,505!
441
    return 0;
19,506✔
442
  }
443

444
  // the root page is full, we need to construct the linked list for extra free pages.
445
  SPgno pgno = *((SPgno*)tdbPageGetCell(pRoot, 0));
×
446
  SPage *pPage = NULL;
×
447
  ret = tdbPagerFetchPage(pBt->pPager, &pgno, &pPage, tdbBtreeInitPage, &arg, pTxn);
×
448
  if (ret < 0) {
×
449
    tdbError("tdb/btree-to-stack: fetch free page %d failed with ret: %d.", pgno, ret);
×
450
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
451
    return ret;
×
452
  }
453

454
  ret = tdbPagerWrite(pBt->pPager, pPage);
×
455
  if (ret < 0) {
×
456
    tdbError("tdb/btree-to-stack: failed to mark free page %d dirty since %s", pgno, terrstr());
×
457
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
458
    tdbPagerReturnPage(pBt->pPager, pPage, pTxn);
×
459
    return ret;
×
460
  }
461

462
  // in the linked list, [pPage->pData] holds the next free page number, and 0 means
463
  // there's no next free page.
464
  *((SPgno*)pPage->pData) = 0;
×
465
  tdbPagerReturnPage(pBt->pPager, pPage, pTxn);
×
466

467
  // mark the root page as free page management page, then upgrade is complete.
468
  TDB_BTREE_PAGE_SET_FLAGS(pRoot, TDB_BTREE_STACK|TDB_BTREE_LEAF|TDB_BTREE_ROOT);
×
469
  tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
470
  tdbInfo("tdb/btree-to-stack: btree has been converted to a stack.");
×
471

472
  return 0;
×
473
}
474

475
// convert the btree to a stack, after this function, cells of the root page of the btree
476
// will be used to store page numbers of free pages, and when there's not enough space for
477
// a new cell(a new free page number), more free pages become a linked list based stack,
478
// and the first cell of the root page becomes the head of the stack.
479
int tdbBtreeToStack(SBTree *pBt) {
24,842✔
480
  TDB* pEnv = pBt->pPager->pEnv;
24,842✔
481
  TXN* pTxn = NULL;
24,842✔
482

483
  int ret = tdbBegin(pEnv, &pTxn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
24,842✔
484
  if (ret < 0) {
24,846!
485
    return ret;
×
486
  }
487

488
  ret = btreeToStack(pBt, pTxn);
24,846✔
489
  if (ret < 0) {
24,849!
490
    tdbAbort(pEnv, pTxn);
×
491
    return ret;
×
492
  }
493

494
  ret = tdbCommit(pEnv, pTxn);
24,849✔
495
  if (ret) return ret;
24,850!
496

497
  return tdbPostCommit(pEnv, pTxn);
24,850✔
498
}
499

500
int tdbBtreePushFreePage(SBTree *pBt, SPage *pPage, TXN* pTxn) {
14,232✔
501
  SPage *pRoot = NULL;
14,232✔
502
  SBtreeInitPageArg arg = {.pBt = pBt, .flags = TDB_BTREE_ROOT | TDB_BTREE_LEAF};
14,232✔
503
  int ret = tdbPagerFetchPage(pBt->pPager, &pBt->root, &pRoot, tdbBtreeInitPage, &arg, pTxn);
14,232✔
504
  if (ret < 0) {
14,231!
505
    tdbError("tdb/btree-push-free-page: fetch root page failed with ret: %d.", ret);
×
506
    return ret;
×
507
  }
508

509
  ret = tdbPagerWrite(pBt->pPager, pRoot);
14,231✔
510
  if (ret < 0) {
14,231!
511
    tdbError("tdb/btree-push-free-page: failed to mark root page dirty since %s", terrstr());
×
512
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
513
    return ret;
×
514
  }
515

516
  SPgno pgno = TDB_PAGE_PGNO(pPage), prev = 0;
14,231✔
517
  int szCell = sizeof(pgno);
14,231✔
518

519
  // if there's not enough space for the new cell, then we need to push the new pgno to the
520
  // linked list based stack, so save the content of the stack head (the 1st cell of the root page)
521
  // and drop it.
522
  if (TDB_PAGE_FREE_SIZE(pRoot) < szCell + TDB_PAGE_OFFSET_SIZE(pRoot)) {
14,231!
523
    prev = *((SPgno*)tdbPageGetCell(pRoot, 0));
×
524
    ret = tdbPageDropCell(pRoot, 0, pTxn, pBt);
×
525
    if (ret < 0) {
×
526
      tdbError("tdb/btree-push-free-page: drop cell failed with ret: %d.", ret);
×
527
      tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
528
      return ret;
×
529
    }
530
  }
531

532
  // insert the new cell, it becomes the new stack head.
533
  ret = tdbPageInsertCell(pRoot, 0, (SCell*)&pgno, szCell, 0);
14,232✔
534
  if (ret < 0) {
14,230!
535
    tdbError("tdb/btree-push-free-page: insert cell failed with ret: %d.", ret);
×
536
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
537
    return ret;
×
538
  }
539
  
540
  // if there's not enough space for another new cell, save the value of the previous stack head
541
  // to [pPage->pData], this finishes the push operation to the linked list based stack.
542
  if (TDB_PAGE_FREE_SIZE(pRoot) < szCell + TDB_PAGE_OFFSET_SIZE(pRoot)) {
14,230!
543
    // mark the page dirty
544
    ret = tdbPagerWrite(pBt->pPager, pPage);
×
545
    if (ret < 0) {
×
546
      tdbError("tdb/btree-push-free-page: failed to mark page dirty since %s", terrstr());
×
547
      tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
548
      return ret;
×
549
    }
550
    *((SPgno*)pPage->pData) = prev;
×
551
  }
552

553
  tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
14,230✔
554
  return 0;
14,232✔
555
}
556

557

558
int tdbBtreePopFreePage(SBTree *pBt, SPgno *pgno, TXN* pTxn) {
1,283,584✔
559
  SPage* pRoot = NULL;
1,283,584✔
560
  SBtreeInitPageArg arg = {.pBt = pBt, .flags = TDB_BTREE_ROOT | TDB_BTREE_LEAF};
1,283,584✔
561
  int ret = tdbPagerFetchPage(pBt->pPager, &pBt->root, &pRoot, tdbBtreeInitPage, &arg, pTxn);
1,283,584✔
562
  if (ret < 0) {
1,283,616!
563
    tdbError("tdb/btree-pop-free-page: fetch root page failed with ret: %d.", ret);
×
564
    return ret;
×
565
  }
566

567
  if (TDB_PAGE_TOTAL_CELLS(pRoot) == 0) {
1,283,616✔
568
    *pgno = 0;
1,269,830✔
569
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
1,269,830✔
570
    return 0;
1,269,877✔
571
  }
572

573
  ret = tdbPagerWrite(pBt->pPager, pRoot);
13,818✔
574
  if (ret < 0) {
13,826!
575
    tdbError("tdb/btree-pop-free-page: failed to mark root page dirty since %s", terrstr());
×
576
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
577
    return ret;
×
578
  }
579

580
  *pgno = *((SPgno*)tdbPageGetCell(pRoot, 0));
13,826✔
581

582
  SPgno next = 0;
13,826✔
583
  int szCell = sizeof(next);
13,826✔
584
  // if there's not enough space for a new cell, then the free pages is a linked list based stack,
585
  // we need to fetch the first free page to get the page number of the next free page.
586
  if (TDB_PAGE_FREE_SIZE(pRoot) < (szCell + TDB_PAGE_OFFSET_SIZE(pRoot))) {
13,826!
587
    SPage* pPage = NULL;
×
588
    ret = tdbPagerFetchFreePage(pBt->pPager, *pgno, &pPage, pTxn);
×
589
    if (ret < 0) {
×
590
      tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
591
      tdbError("tdb/btree-pop-free-page: fetch page failed with ret: %d.", ret);
×
592
      return ret;
×
593
    }
594
    next = *((SPgno*)pPage->pData);
×
595
    tdbPagerReturnPage(pBt->pPager, pPage, pTxn);
×
596
  }
597

598
  // drop the first cell
599
  ret = tdbPageDropCell(pRoot, 0, pTxn, pBt);
13,826✔
600
  if (ret < 0) {
13,826!
601
    tdbError("tdb/btree-pop-free-page: drop cell failed with ret: %d.", ret);
×
602
    tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
603
    return ret;
×
604
  }
605

606
  // if we have a next page, insert a new cell at index 0 to make it the stack head.
607
  if (next != 0) {
13,826!
608
    ret = tdbPageInsertCell(pRoot, 0, (SCell*)&next, szCell, 0);
×
609
    if (ret < 0) {
×
610
      tdbError("tdb/btree-pop-free-page: insert cell failed with ret: %d.", ret);
×
611
      tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
×
612
      return ret;
×
613
    }
614
  }
615

616
  tdbPagerReturnPage(pBt->pPager, pRoot, pTxn);
13,826✔
617
  return 0;
13,827✔
618
}
619

620
static int tdbDefaultKeyCmprFn(const void *pKey1, int keyLen1, const void *pKey2, int keyLen2) {
7,419,066✔
621
  int mlen;
622
  int cret;
623

624
  mlen = keyLen1 < keyLen2 ? keyLen1 : keyLen2;
7,419,066✔
625
  cret = memcmp(pKey1, pKey2, mlen);
7,419,066✔
626
  if (cret == 0) {
7,419,066✔
627
    if (keyLen1 < keyLen2) {
395,823✔
628
      cret = -1;
9✔
629
    } else if (keyLen1 > keyLen2) {
395,814✔
630
      cret = 1;
15✔
631
    } else {
632
      cret = 0;
395,799✔
633
    }
634
  }
635
  return cret;
7,419,066✔
636
}
637

638
int tdbBtreeInitPage(SPage *pPage, void *arg, int init) {
4,181,649✔
639
  SBTree *pBt;
640
  u8      flags;
641
  u8      leaf;
642

643
  pBt = ((SBtreeInitPageArg *)arg)->pBt;
4,181,649✔
644

645
  if (init) {
4,181,649✔
646
    // init page
647
    flags = TDB_BTREE_PAGE_GET_FLAGS(pPage);
1,535,846✔
648
    leaf = TDB_BTREE_PAGE_IS_LEAF(pPage);
1,535,846✔
649

650
    tdbPageInit(pPage, leaf ? sizeof(SLeafHdr) : sizeof(SIntHdr), tdbBtreeCellSize);
1,535,846✔
651
  } else {
652
    // zero page
653
    flags = ((SBtreeInitPageArg *)arg)->flags;
2,645,803✔
654
    leaf = flags & TDB_BTREE_LEAF;
2,645,803✔
655

656
    tdbPageZero(pPage, leaf ? sizeof(SLeafHdr) : sizeof(SIntHdr), tdbBtreeCellSize);
2,645,803✔
657

658
    if (leaf) {
2,646,150✔
659
      SLeafHdr *pLeafHdr = (SLeafHdr *)(pPage->pData);
1,576,234✔
660
      pLeafHdr->flags = flags;
1,576,234✔
661

662
    } else {
663
      SIntHdr *pIntHdr = (SIntHdr *)(pPage->pData);
1,069,916✔
664
      pIntHdr->flags = flags;
1,069,916✔
665
      pIntHdr->pgno = 0;
1,069,916✔
666
    }
667
  }
668

669
  if (leaf) {
4,181,965✔
670
    pPage->kLen = pBt->keyLen;
3,040,134✔
671
    pPage->vLen = pBt->valLen;
3,040,134✔
672
    pPage->maxLocal = pBt->maxLeaf;
3,040,134✔
673
    pPage->minLocal = pBt->minLeaf;
3,040,134✔
674
  } else if (TDB_BTREE_PAGE_IS_OVFL(pPage)) {
1,141,831✔
675
    pPage->kLen = pBt->keyLen;
998,234✔
676
    pPage->vLen = pBt->valLen;
998,234✔
677
    pPage->maxLocal = tdbPageCapacity(pBt->pageSize, sizeof(SIntHdr));
998,234✔
678
    pPage->minLocal = pBt->minLocal;
998,233✔
679
  } else {
680
    pPage->kLen = pBt->keyLen;
143,597✔
681
    pPage->vLen = sizeof(SPgno);
143,597✔
682
    pPage->maxLocal = pBt->maxLocal;
143,597✔
683
    pPage->minLocal = pBt->minLocal;
143,597✔
684
  }
685

686
  return 0;
4,181,964✔
687
}
688

689
// TDB_BTREE_BALANCE =====================
690
static int tdbBtreeBalanceDeeper(SBTree *pBt, SPage *pRoot, SPage **ppChild, TXN *pTxn) {
2,499✔
691
  SPager           *pPager;
692
  SPage            *pChild;
693
  SPgno             pgnoChild;
694
  int               ret;
695
  u8                flags;
696
  SIntHdr          *pIntHdr;
697
  SBtreeInitPageArg zArg;
698
  u8                leaf;
699

700
  pPager = pRoot->pPager;
2,499✔
701
  flags = TDB_BTREE_PAGE_GET_FLAGS(pRoot);
2,499✔
702
  leaf = TDB_BTREE_PAGE_IS_LEAF(pRoot);
2,499✔
703

704
  // allocate a new child page
705
  pgnoChild = 0;
2,499✔
706
  zArg.flags = TDB_FLAG_REMOVE(flags, TDB_BTREE_ROOT);
2,499✔
707
  zArg.pBt = pBt;
2,499✔
708
  ret = tdbPagerFetchPage(pPager, &pgnoChild, &pChild, tdbBtreeInitPage, &zArg, pTxn);
2,499✔
709
  if (ret < 0) {
2,500!
710
    return ret;
×
711
  }
712

713
  if (!leaf) {
2,500✔
714
    ((SIntHdr *)pChild->pData)->pgno = ((SIntHdr *)(pRoot->pData))->pgno;
37✔
715
  }
716

717
  ret = tdbPagerWrite(pPager, pChild);
2,500✔
718
  if (ret < 0) {
2,500!
719
    tdbError("failed to write page since %s", terrstr());
×
720
    return ret;
×
721
  }
722

723
  // Copy the root page content to the child page
724
  ret = tdbPageCopy(pRoot, pChild, 0);
2,500✔
725
  if (ret < 0) {
2,498!
726
    return ret;
×
727
  }
728

729
  // Reinitialize the root page
730
  zArg.flags = TDB_BTREE_ROOT;
2,498✔
731
  zArg.pBt = pBt;
2,498✔
732
  ret = tdbBtreeInitPage(pRoot, &zArg, 0);
2,498✔
733
  if (ret < 0) {
2,499!
734
    return ret;
×
735
  }
736

737
  pIntHdr = (SIntHdr *)(pRoot->pData);
2,499✔
738
  pIntHdr->pgno = pgnoChild;
2,499✔
739

740
  *ppChild = pChild;
2,499✔
741
  return 0;
2,499✔
742
}
743

744
static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTxn) {
199,670✔
745
  int ret;
746

747
  typedef struct {
748
    int     kLen;
749
    u8     *pKey;
750
    int     vLen;
751
    u8     *pVal;
752
  } SDecodedCell;
753

754
  int    nOlds, pageIdx;
755
  SPage *pOlds[3] = {0};
199,670✔
756
  SDecodedCell divCells[3] = {0};
199,670✔
757
  int    sIdx;
758
  u8     childNotLeaf;
759
  SPgno  rPgno;
760

761
  {  // Find 3 child pages at most to do balance
762
    int    nCells = TDB_PAGE_TOTAL_CELLS(pParent);
199,670✔
763
    SCell *pCell;
764

765
    if (nCells <= 2) {
199,708✔
766
      sIdx = 0;
11,135✔
767
      nOlds = nCells + 1;
11,135✔
768
    } else {
769
      // has more than three child pages
770
      if (idx == 0) {
188,573✔
771
        sIdx = 0;
120,941✔
772
      } else if (idx == nCells) {
67,632✔
773
        sIdx = idx - 2;
63,957✔
774
      } else {
775
        sIdx = idx - 1;
3,675✔
776
      }
777
      nOlds = 3;
188,573✔
778
    }
779
    for (int i = 0; i < nOlds; i++) {
788,714✔
780
      if (!(sIdx + i <= nCells)) {
588,979!
781
        return TSDB_CODE_FAILED;
×
782
      }
783

784
      SPgno pgno;
785
      if (sIdx + i == nCells) {
588,979✔
786
        if (TDB_BTREE_PAGE_IS_LEAF(pParent)) {
75,747!
787
          return TSDB_CODE_INTERNAL_ERROR;
×
788
        }
789
        pgno = ((SIntHdr *)(pParent->pData))->pgno;
75,747✔
790
      } else {
791
        pCell = tdbPageGetCell(pParent, sIdx + i);
513,232✔
792
        pgno = *(SPgno *)pCell;
513,241✔
793
      }
794

795
      ret = tdbPagerFetchPage(pBt->pPager, &pgno, pOlds + i, tdbBtreeInitPage,
588,988✔
796
                              &((SBtreeInitPageArg){.pBt = pBt, .flags = 0}), pTxn);
588,988✔
797
      if (ret < 0) {
589,009!
798
        tdbError("tdb/btree-balance: fetch page failed with ret: %d.", ret);
×
799
        return TSDB_CODE_FAILED;
×
800
      }
801

802
      ret = tdbPagerWrite(pBt->pPager, pOlds[i]);
589,009✔
803
      if (ret < 0) {
589,007✔
804
        tdbError("failed to write page since %s", terrstr());
1!
805
        return TSDB_CODE_FAILED;
×
806
      }
807
    }
808
    // copy the parent key out if child pages are not leaf page
809
    // childNotLeaf = !(TDB_BTREE_PAGE_IS_LEAF(pOlds[0]) || TDB_BTREE_PAGE_IS_OVFL(pOlds[0]));
810
    childNotLeaf = !TDB_BTREE_PAGE_IS_LEAF(pOlds[0]);
199,735✔
811
    if (childNotLeaf) {
199,735✔
812
      for (int i = 0; i < nOlds; i++) {
48,997✔
813
        if (sIdx + i < TDB_PAGE_TOTAL_CELLS(pParent)) {
36,665✔
814
          SCellDecoder cd = { 0 };
34,413✔
815
          pCell = tdbPageGetCell(pParent, sIdx + i);
34,413✔
816
          ret = tdbBtreeDecodeCell(pParent, pCell, &cd, pTxn, pBt);
34,413✔
817
          if (ret < 0) {
34,413!
818
            tdbError("tdb/btree-balance: decode cell failed with ret: %d.", ret);
×
819
            return TSDB_CODE_FAILED;
×
820
          }
821

822
          divCells[i].kLen = cd.kLen;
34,413✔
823
          if (TDB_CELLDECODER_FREE_KEY(&cd)) {
34,413✔
824
            divCells[i].pKey = cd.pKey;
2,249✔
825
          } else {
826
            divCells[i].pKey = tdbRealloc(NULL, cd.kLen);
32,164✔
827
            if (divCells[i].pKey == NULL) {
32,164!
828
              return terrno;
×
829
            }
830
            memcpy(divCells[i].pKey, cd.pKey, cd.kLen);
32,164✔
831
          }
832

833
          divCells[i].vLen = cd.vLen;
34,413✔
834
          if (TDB_CELLDECODER_FREE_VAL(&cd)) {
34,413!
835
            divCells[i].pVal = cd.pVal;
×
836
          } else {
837
            divCells[i].pVal = tdbRealloc(NULL, cd.vLen);
34,413✔
838
            if (divCells[i].pVal == NULL) {
34,413!
839
              return terrno;
×
840
            }
841
            memcpy(divCells[i].pVal, cd.pVal, cd.vLen);
34,413✔
842
          }
843
        }
844

845
        if (i < nOlds - 1) {
36,665✔
846
          int szDivCell;
847
          SCell* pDivCell = tdbOsMalloc(divCells[i].kLen + divCells[i].vLen + sizeof(SIntHdr));
24,333!
848
          if(pDivCell == NULL) {
24,333!
849
            return terrno;
×
850
          }
851

852
          ret = tdbBtreeEncodeCell(pOlds[i], divCells[i].pKey, divCells[i].kLen, divCells[i].pVal, divCells[i].vLen,
24,333✔
853
                                   pDivCell, &szDivCell, pTxn, pBt);
854
          if (ret < 0) {
24,333!
855
            tdbOsFree(pDivCell);
×
856
            tdbError("tdb/btree-balance: encode cell failed with ret: %d.", ret);
×
857
            return TSDB_CODE_FAILED;
×
858
          }
859

860
          ((SPgno *)pDivCell)[0] = ((SIntHdr *)pOlds[i]->pData)->pgno;
24,333✔
861
          ((SIntHdr *)pOlds[i]->pData)->pgno = 0;
24,333✔
862
          ret = tdbPageInsertCell(pOlds[i], TDB_PAGE_TOTAL_CELLS(pOlds[i]), pDivCell, szDivCell, 1);
24,333✔
863
          tdbOsFree(pDivCell);
24,333!
864
          if (ret < 0) {
24,333!
865
            tdbError("tdb/btree-balance: insert cell failed with ret: %d.", ret);
×
866
            return TSDB_CODE_FAILED;
×
867
          }
868
        }
869
      }
870
      rPgno = ((SIntHdr *)pOlds[nOlds - 1]->pData)->pgno;
12,332✔
871
    }
872

873
    ret = tdbPagerWrite(pBt->pPager, pParent);
199,735✔
874
    if (ret < 0) {
199,725!
875
      tdbError("failed to write page since %s", terrstr());
×
876
      return ret;
×
877
    }
878

879
    // drop the cells on parent page
880
    for (int i = 0; i < nOlds; i++) {
788,639✔
881
      nCells = TDB_PAGE_TOTAL_CELLS(pParent);
588,924✔
882
      if (sIdx < nCells) {
588,944✔
883
        ret = tdbPageDropCell(pParent, sIdx, pTxn, pBt);
513,209✔
884
        if (ret < 0) {
513,182✔
885
          tdbError("tdb/btree-balance: drop cell failed with ret: %d.", ret);
7!
886
          return TSDB_CODE_FAILED;
×
887
        }
888
      } else {
889
        ((SIntHdr *)pParent->pData)->pgno = 0;
75,735✔
890
      }
891
    }
892
  }
893

894
  int nNews = 0;
199,715✔
895
  struct {
896
    int cnt;
897
    int size;
898
    int iPage;
899
    int oIdx;
900
  } infoNews[5] = {0};
199,715✔
901

902
  {  // Get how many new pages are needed and the new distribution
903

904
    // first loop to find minimum number of pages needed
905
    for (int oPage = 0; oPage < nOlds; oPage++) {
788,694✔
906
      SPage *pPage = pOlds[oPage];
588,933✔
907
      SCell *pCell;
908
      int    cellBytes;
909
      int    oIdx;
910

911
      for (oIdx = 0; oIdx < TDB_PAGE_TOTAL_CELLS(pPage); oIdx++) {
20,076,141✔
912
        pCell = tdbPageGetCell(pPage, oIdx);
19,483,692✔
913
        cellBytes = TDB_BYTES_CELL_TAKEN(pPage, pCell);
19,482,764✔
914

915
        if (infoNews[nNews].size + cellBytes > TDB_PAGE_USABLE_SIZE(pPage)) {
19,487,208✔
916
          // page is full, use a new page
917
          nNews++;
532,392✔
918

919
          if (childNotLeaf) {
532,392✔
920
            // for non-child page, this cell is used as the right-most child,
921
            // the divider cell to parent as well
922
            continue;
25,491✔
923
          }
924
        }
925
        infoNews[nNews].cnt++;
19,461,717✔
926
        infoNews[nNews].size += cellBytes;
19,461,717✔
927
        infoNews[nNews].iPage = oPage;
19,461,717✔
928
        infoNews[nNews].oIdx = oIdx;
19,461,717✔
929
      }
930
    }
931

932
    nNews++;
199,761✔
933

934
    // back loop to make the distribution even
935
    for (int iNew = nNews - 1; iNew > 0; iNew--) {
732,163✔
936
      SCell *pCell;
937
      int    szLCell, szRCell;
938

939
      // balance page (iNew) and (iNew-1)
940
      for (;;) {
941
        pCell = tdbPageGetCell(pOlds[infoNews[iNew - 1].iPage], infoNews[iNew - 1].oIdx);
1,860,996✔
942

943
        szLCell = tdbBtreeCellSize(pOlds[infoNews[iNew - 1].iPage], pCell, NULL);
1,860,785✔
944
        if (!childNotLeaf) {
1,860,733✔
945
          szRCell = szLCell;
1,424,539✔
946
        } else {
947
          int    iPage = infoNews[iNew - 1].iPage;
436,194✔
948
          int    oIdx = infoNews[iNew - 1].oIdx + 1;
436,194✔
949
          SPage *pPage;
950
          for (;;) {
951
            pPage = pOlds[iPage];
444,908✔
952
            if (oIdx < TDB_PAGE_TOTAL_CELLS(pPage)) {
444,908✔
953
              break;
436,194✔
954
            }
955

956
            iPage++;
8,714✔
957
            oIdx = 0;
8,714✔
958
          }
959

960
          pCell = tdbPageGetCell(pPage, oIdx);
436,194✔
961
          szRCell = tdbBtreeCellSize(pPage, pCell, NULL);
436,194✔
962
        }
963

964
        if (!(infoNews[iNew - 1].cnt > 0)) {
1,860,721!
965
          return TSDB_CODE_FAILED;
×
966
        }
967

968
        if (infoNews[iNew].size + szRCell >= infoNews[iNew - 1].size - szRCell) {
1,860,721✔
969
          break;
532,402✔
970
        }
971

972
        // Move a cell right forward
973
        infoNews[iNew - 1].cnt--;
1,328,319✔
974
        infoNews[iNew - 1].size -= szLCell;
1,328,319✔
975
        infoNews[iNew - 1].oIdx--;
1,328,319✔
976
        for (;;) {
977
          if (infoNews[iNew - 1].oIdx >= 0) {
1,338,286✔
978
            break;
1,328,559✔
979
          }
980

981
          infoNews[iNew - 1].iPage--;
9,727✔
982
          infoNews[iNew - 1].oIdx = TDB_PAGE_TOTAL_CELLS(pOlds[infoNews[iNew - 1].iPage]) - 1;
9,727✔
983
        }
984

985
        infoNews[iNew].cnt++;
1,328,559✔
986
        infoNews[iNew].size += szRCell;
1,328,559✔
987
      }
988
    }
989
  }
990

991
  SPage *pNews[5] = {0};
199,726✔
992
  {  // Allocate new pages, reuse the old page when possible
993

994
    SPgno             pgno;
995
    SBtreeInitPageArg iarg;
996
    u8                flags;
997

998
    flags = TDB_BTREE_PAGE_GET_FLAGS(pOlds[0]);
199,726✔
999

1000
    for (int iNew = 0; iNew < nNews; iNew++) {
931,801✔
1001
      if (iNew < nOlds) {
732,084✔
1002
        pNews[iNew] = pOlds[iNew];
588,850✔
1003
      } else {
1004
        pgno = 0;
143,234✔
1005
        iarg.pBt = pBt;
143,234✔
1006
        iarg.flags = flags;
143,234✔
1007
        ret = tdbPagerFetchPage(pBt->pPager, &pgno, pNews + iNew, tdbBtreeInitPage, &iarg, pTxn);
143,234✔
1008
        if (ret < 0) {
143,243!
1009
          tdbError("tdb/btree-balance: fetch page failed with ret: %d.", ret);
×
1010
          return ret;
×
1011
        }
1012

1013
        ret = tdbPagerWrite(pBt->pPager, pNews[iNew]);
143,243✔
1014
        if (ret < 0) {
143,235✔
1015
          tdbError("failed to write page since %s", terrstr());
10!
1016
          return ret;
×
1017
        }
1018
      }
1019
    }
1020

1021
    // TODO: sort the page according to the page number
1022
  }
1023

1024
  {  // Do the real cell distribution
1025
    SPage            *pOldsCopy[3] = {0};
199,717✔
1026
    SCell            *pCell;
1027
    int               szCell;
1028
    SBtreeInitPageArg iarg;
1029
    int               iNew, nNewCells;
1030
    SCellDecoder      cd = {0};
199,717✔
1031

1032
    iarg.pBt = pBt;
199,717✔
1033
    iarg.flags = TDB_BTREE_PAGE_GET_FLAGS(pOlds[0]);
199,717✔
1034
    for (int i = 0; i < nOlds; i++) {
788,703✔
1035
      ret = tdbPageCreate(pOlds[0]->pageSize, &pOldsCopy[i], tdbDefaultMalloc, NULL);
588,941✔
1036
      if (ret < 0) {
588,986!
1037
        tdbError("tdb/btree-balance: create page failed with ret: %d.", ret);
×
1038
        return TSDB_CODE_FAILED;
×
1039
      }
1040
      ret = tdbBtreeInitPage(pOldsCopy[i], &iarg, 0);
588,986✔
1041
      if (ret < 0) {
588,969!
1042
        tdbError("tdb/btree-balance: init page failed with ret: %d.", ret);
×
1043
        return TSDB_CODE_FAILED;
×
1044
      }
1045
      ret = tdbPageCopy(pOlds[i], pOldsCopy[i], 0);
588,969✔
1046
      if (ret < 0) {
588,969!
1047
        tdbError("tdb/btree-balance: copy page failed with ret: %d.", ret);
×
1048
        return TSDB_CODE_FAILED;
×
1049
      }
1050
      pOlds[i]->nOverflow = 0;
588,986✔
1051
    }
1052

1053
    iNew = 0;
199,762✔
1054
    nNewCells = 0;
199,762✔
1055
    ret = tdbBtreeInitPage(pNews[iNew], &iarg, 0);
199,762✔
1056
    if (ret < 0) {
199,724!
1057
      tdbError("tdb/btree-balance: init page failed with ret: %d.", ret);
×
1058
      return TSDB_CODE_FAILED;
×
1059
    }
1060

1061
    for (int iOld = 0; iOld < nOlds; iOld++) {
788,695✔
1062
      SPage *pPage;
1063

1064
      pPage = pOldsCopy[iOld];
588,956✔
1065

1066
      for (int oIdx = 0; oIdx < TDB_PAGE_TOTAL_CELLS(pPage); oIdx++) {
19,991,058✔
1067
        pCell = tdbPageGetCell(pPage, oIdx);
19,398,281✔
1068
        szCell = tdbBtreeCellSize(pPage, pCell, NULL);
19,377,407✔
1069

1070
        if (!(nNewCells <= infoNews[iNew].cnt)) {
19,394,077!
1071
          return TSDB_CODE_FAILED;
×
1072
        }
1073
        if (!(iNew < nNews)) {
19,394,077!
1074
          return TSDB_CODE_FAILED;
×
1075
        }
1076

1077
        if (nNewCells < infoNews[iNew].cnt) {
19,394,077✔
1078
          ret = tdbPageInsertCell(pNews[iNew], nNewCells, pCell, szCell, 0);
19,369,939✔
1079
          if (ret < 0) {
19,375,525!
1080
            tdbError("tdb/btree-balance: insert cell failed with ret: %d.", ret);
×
1081
            return TSDB_CODE_FAILED;
×
1082
          }
1083
          nNewCells++;
19,375,525✔
1084

1085
          // insert parent page
1086
          if (!childNotLeaf && nNewCells == infoNews[iNew].cnt) {
19,375,525✔
1087
            SIntHdr *pIntHdr = (SIntHdr *)pParent->pData;
694,300✔
1088

1089
            if (iNew == nNews - 1 && pIntHdr->pgno == 0) {
694,300✔
1090
              pIntHdr->pgno = TDB_PAGE_PGNO(pNews[iNew]);
73,497✔
1091
            } else {
1092
              ret = tdbBtreeDecodeCell(pPage, pCell, &cd, pTxn, pBt);
620,803✔
1093
              if (ret < 0) {
620,823!
1094
                tdbError("tdb/btree-balance: decode cell failed with ret: %d.", ret);
×
1095
                return TSDB_CODE_FAILED;
×
1096
              }
1097

1098
              // TODO: pCell here may be inserted as an overflow cell, handle it
1099
              SCell *pNewCell = tdbOsMalloc(cd.kLen + 9);
620,823!
1100
              if (pNewCell == NULL) {
620,808!
1101
                return terrno;
×
1102
              }
1103
              int   szNewCell;
1104
              SPgno pgno;
1105
              pgno = TDB_PAGE_PGNO(pNews[iNew]);
620,808✔
1106
              ret = tdbBtreeEncodeCell(pParent, cd.pKey, cd.kLen, (void *)&pgno, sizeof(SPgno), pNewCell, &szNewCell,
620,808✔
1107
                                       pTxn, pBt);
1108
              if (TDB_CELLDECODER_FREE_VAL(&cd)) {
620,818✔
1109
                tdbFree(cd.pVal);
468,666✔
1110
                cd.pVal = NULL;
468,606✔
1111
              }
1112
              if (ret < 0) {
620,758!
1113
                tdbOsFree(pNewCell);
×
1114
                tdbError("tdb/btree-balance: encode cell failed with ret: %d.", ret);
×
1115
                return TSDB_CODE_FAILED;
×
1116
              }
1117
              ret = tdbPageInsertCell(pParent, sIdx++, pNewCell, szNewCell, 0);
620,758✔
1118
              tdbOsFree(pNewCell);
620,808!
1119
              if (ret) {
620,757!
1120
                tdbError("tdb/btree-balance: insert cell failed with ret: %d.", ret);
×
1121
                return TSDB_CODE_FAILED;
×
1122
              }
1123
            }
1124

1125
            // move to next new page
1126
            iNew++;
694,254✔
1127
            nNewCells = 0;
694,254✔
1128
            if (iNew < nNews) {
694,254✔
1129
              ret = tdbBtreeInitPage(pNews[iNew], &iarg, 0);
506,880✔
1130
              if (ret < 0) {
506,919!
1131
                tdbError("tdb/btree-balance: init page failed with ret: %d.", ret);
×
1132
                return TSDB_CODE_FAILED;
×
1133
              }
1134
            }
1135
          }
1136
        } else {
1137
          if (!(childNotLeaf)) {
24,138!
1138
            return TSDB_CODE_FAILED;
×
1139
          }
1140
          if (!(iNew < nNews - 1)) {
24,138!
1141
            return TSDB_CODE_FAILED;
×
1142
          }
1143

1144
          // set current new page right-most child
1145
          ((SIntHdr *)pNews[iNew]->pData)->pgno = ((SPgno *)pCell)[0];
24,138✔
1146

1147
          // insert to parent as divider cell
1148
          if (!(iNew < nNews - 1)) {
24,138!
1149
            return TSDB_CODE_FAILED;
×
1150
          }
1151
          ((SPgno *)pCell)[0] = TDB_PAGE_PGNO(pNews[iNew]);
24,138✔
1152
          ret = tdbPageInsertCell(pParent, sIdx++, pCell, szCell, 0);
24,138✔
1153
          if (ret) {
25,491!
1154
            tdbError("tdb/btree-balance: insert cell failed with ret: %d.", ret);
×
1155
            return TSDB_CODE_FAILED;
×
1156
          }
1157

1158
          // move to next new page
1159
          iNew++;
25,491✔
1160
          nNewCells = 0;
25,491✔
1161
          if (iNew < nNews) {
25,491!
1162
            ret = tdbBtreeInitPage(pNews[iNew], &iarg, 0);
25,491✔
1163
            if (ret < 0) {
25,491!
1164
              tdbError("tdb/btree-balance: init page failed with ret: %d.", ret);
×
1165
              return TSDB_CODE_FAILED;
×
1166
            }
1167
          }
1168
        }
1169
      }
1170
    }
1171

1172
    if (childNotLeaf) {
199,739✔
1173
      if (!(TDB_PAGE_TOTAL_CELLS(pNews[nNews - 1]) == infoNews[nNews - 1].cnt)) {
12,332!
1174
        return TSDB_CODE_FAILED;
×
1175
      }
1176
      ((SIntHdr *)(pNews[nNews - 1]->pData))->pgno = rPgno;
12,332✔
1177

1178
      SIntHdr *pIntHdr = (SIntHdr *)pParent->pData;
12,332✔
1179
      if (pIntHdr->pgno == 0) {
12,332✔
1180
        pIntHdr->pgno = TDB_PAGE_PGNO(pNews[nNews - 1]);
2,252✔
1181
      } else {
1182
        int szDivCell;
1183
        SDecodedCell *pdc = &divCells[nOlds - 1];
10,080✔
1184
        SCell* pDivCell = tdbOsMalloc(pdc->kLen + pdc->vLen + sizeof(SIntHdr));
10,080!
1185
        if(pDivCell == NULL) {
10,080!
1186
          return terrno;
×
1187
        }
1188

1189
        ret = tdbBtreeEncodeCell(pParent, pdc->pKey, pdc->kLen, pdc->pVal, pdc->vLen, pDivCell, &szDivCell, pTxn, pBt);
10,080✔
1190
        if (ret < 0) {
10,080!
1191
          tdbOsFree(pDivCell);
×
1192
          tdbError("tdb/btree-balance: encode cell failed with ret: %d.", ret);
×
1193
          return TSDB_CODE_FAILED;
×
1194
        }
1195

1196
        ((SPgno *)pDivCell)[0] = TDB_PAGE_PGNO(pNews[nNews - 1]);
10,080✔
1197
        ret = tdbPageInsertCell(pParent, sIdx, pDivCell, szDivCell, 0);
10,080✔
1198
        tdbOsFree(pDivCell);
10,080!
1199
        if (ret) {
10,080!
1200
          tdbError("tdb/btree-balance: insert cell failed with ret: %d.", ret);
×
1201
          return TSDB_CODE_FAILED;
×
1202
        }
1203
      }
1204
    }
1205

1206
    for (int i = 0; i < nOlds; i++) {
788,747✔
1207
      tdbPageDestroy(pOldsCopy[i], tdbDefaultFree, NULL);
588,974✔
1208
    }
1209
  }
1210

1211
  if (TDB_BTREE_PAGE_IS_ROOT(pParent) && TDB_PAGE_TOTAL_CELLS(pParent) == 0) {
199,773✔
1212
    i8 flags = TDB_BTREE_ROOT | TDB_BTREE_PAGE_IS_LEAF(pNews[0]);
154✔
1213
    // copy content to the parent page
1214
    ret = tdbBtreeInitPage(pParent, &(SBtreeInitPageArg){.flags = flags, .pBt = pBt}, 0);
154✔
1215
    if (ret < 0) {
83!
1216
      return ret;
×
1217
    }
1218
    ret = tdbPageCopy(pNews[0], pParent, 1);
83✔
1219
    if (ret < 0) {
83!
1220
      return ret;
×
1221
    }
1222

1223
    if (!TDB_BTREE_PAGE_IS_LEAF(pNews[0])) {
83!
1224
      ((SIntHdr *)(pParent->pData))->pgno = ((SIntHdr *)(pNews[0]->pData))->pgno;
×
1225
    }
1226

1227
    ret = tdbPagerInsertFreePage(pBt->pPager, pNews[0], pTxn);
83✔
1228
    if (ret < 0) {
83!
1229
      return ret;
×
1230
    }
1231
  }
1232

1233
  for (int i = 0; i < sizeof(divCells) / sizeof(divCells[0]); i++) {
798,813✔
1234
    if (divCells[i].pKey) {
599,097✔
1235
      tdbFree(divCells[i].pKey);
34,413✔
1236
    }
1237
    if (divCells[i].pVal) {
599,097✔
1238
      tdbFree(divCells[i].pVal);
34,413✔
1239
    }
1240
  }
1241

1242
  for (pageIdx = 0; pageIdx < nOlds; ++pageIdx) {
788,722✔
1243
    if (pageIdx >= nNews) {
588,986✔
1244
      ret = tdbPagerInsertFreePage(pBt->pPager, pOlds[pageIdx], pTxn);
100✔
1245
      if (ret < 0) {
99!
1246
        return ret;
×
1247
      }
1248
    }
1249
    tdbPagerReturnPage(pBt->pPager, pOlds[pageIdx], pTxn);
588,985✔
1250
  }
1251
  for (; pageIdx < nNews; ++pageIdx) {
342,984✔
1252
    tdbPagerReturnPage(pBt->pPager, pNews[pageIdx], pTxn);
143,247✔
1253
  }
1254

1255
  return 0;
199,737✔
1256
}
1257

1258
static int tdbBtreeBalance(SBTC *pBtc) {
417,852✔
1259
  int    iPage;
1260
  int    ret;
1261
  int    nFree;
1262
  SPage *pParent;
1263
  SPage *pPage;
1264
  u8     flags;
1265
  u8     leaf;
1266
  u8     root;
1267

1268
  // Main loop to balance the BTree
1269
  for (;;) {
1270
    iPage = pBtc->iPage;
417,852✔
1271
    pPage = pBtc->pPage;
417,852✔
1272
    leaf = TDB_BTREE_PAGE_IS_LEAF(pPage);
417,852✔
1273
    root = TDB_BTREE_PAGE_IS_ROOT(pPage);
417,852✔
1274
    nFree = TDB_PAGE_FREE_SIZE(pPage);
417,852✔
1275

1276
    // when the page is not overflow and not too empty, the balance work
1277
    // is finished. Just break out the balance loop.
1278
    if (pPage->nOverflow == 0 && nFree < TDB_PAGE_USABLE_SIZE(pPage) * 2 / 3) {
417,882✔
1279
      break;
134,588✔
1280
    }
1281

1282
    if (iPage == 0) {
283,294✔
1283
      // For the root page, only balance when the page is overfull,
1284
      // ignore the case of empty
1285
      if (pPage->nOverflow == 0) break;
83,592✔
1286

1287
      ret = tdbBtreeBalanceDeeper(pBtc->pBt, pPage, &(pBtc->pgStack[1]), pBtc->pTxn);
2,501✔
1288
      if (ret < 0) {
2,498!
1289
        return ret;
×
1290
      }
1291

1292
      pBtc->idx = 0;
2,498✔
1293
      pBtc->idxStack[0] = 0;
2,498✔
1294
      pBtc->pgStack[0] = pBtc->pPage;
2,498✔
1295
      pBtc->iPage = 1;
2,498✔
1296
      pBtc->pPage = pBtc->pgStack[1];
2,498✔
1297
    } else {
1298
      // Generalized balance step
1299
      pParent = pBtc->pgStack[iPage - 1];
199,702✔
1300

1301
      ret = tdbBtreeBalanceNonRoot(pBtc->pBt, pParent, pBtc->idxStack[pBtc->iPage - 1], pBtc->pTxn);
199,702✔
1302
      if (ret < 0) {
199,727!
1303
        return ret;
×
1304
      }
1305

1306
      tdbPagerReturnPage(pBtc->pBt->pPager, pBtc->pPage, pBtc->pTxn);
199,727✔
1307

1308
      pBtc->iPage--;
199,734✔
1309
      pBtc->pPage = pBtc->pgStack[pBtc->iPage];
199,734✔
1310
    }
1311
  }
1312

1313
  return 0;
215,679✔
1314
}
1315
// TDB_BTREE_BALANCE
1316

1317
static int tdbFetchOvflPage(SPgno *pPgno, SPage **ppOfp, TXN *pTxn, SBTree *pBt) {
991,764✔
1318
  int ret = 0;
991,764✔
1319

1320
  *pPgno = 0;
991,764✔
1321
  SBtreeInitPageArg iArg;
1322
  iArg.pBt = pBt;
991,764✔
1323
  iArg.flags = TDB_FLAG_ADD(0, TDB_BTREE_OVFL);
991,764✔
1324
  ret = tdbPagerFetchPage(pBt->pPager, pPgno, ppOfp, tdbBtreeInitPage, &iArg, pTxn);
991,764✔
1325
  if (ret < 0) {
991,763!
1326
    return ret;
×
1327
  }
1328

1329
  // mark dirty
1330
  ret = tdbPagerWrite(pBt->pPager, *ppOfp);
991,763✔
1331
  if (ret < 0) {
991,746!
1332
    tdbError("failed to write page since %s", terrstr());
×
1333
    return ret;
×
1334
  }
1335

1336
  return ret;
991,747✔
1337
}
1338

1339
static int tdbLoadOvflPage(SPgno *pPgno, SPage **ppOfp, TXN *pTxn, SBTree *pBt) {
5,316,882✔
1340
  int ret = 0;
5,316,882✔
1341

1342
  SBtreeInitPageArg iArg;
1343
  iArg.pBt = pBt;
5,316,882✔
1344
  iArg.flags = TDB_FLAG_ADD(0, TDB_BTREE_OVFL);
5,316,882✔
1345
  ret = tdbPagerFetchPage(pBt->pPager, pPgno, ppOfp, tdbBtreeInitPage, &iArg, pTxn);
5,316,882✔
1346
  if (ret < 0) {
5,316,893!
1347
    return ret;
×
1348
  }
1349

1350
  return ret;
5,316,893✔
1351
}
1352

1353

1354
int tdbFreeOvflPage(SPgno pgno, int nSize, TXN *pTxn, SBTree *pBt) {
13,378✔
1355
  int ret = 0;
13,378✔
1356

1357
  while (pgno != 0) {
27,427✔
1358
    SPage *ofp;
1359
    int    bytes;
1360
    int ret = tdbLoadOvflPage(&pgno, &ofp, pTxn, pBt);
14,049✔
1361
    if (ret < 0) {
14,048!
1362
      return ret;
×
1363
    }
1364

1365
    SCell *cell = tdbPageGetCell(ofp, 0);
14,048✔
1366

1367
    if (nSize <= ofp->maxLocal - sizeof(SPgno)) {
14,048✔
1368
      bytes = nSize;
13,378✔
1369
    } else {
1370
      bytes = ofp->maxLocal - sizeof(SPgno);
670✔
1371
    }
1372

1373
    memcpy(&pgno, cell + bytes, sizeof(pgno));
14,048✔
1374

1375
    ret = tdbPagerWrite(pBt->pPager, ofp);
14,048✔
1376
    if (ret < 0) {
14,049!
1377
      tdbError("failed to write page since %s", terrstr());
×
1378
      return ret;
×
1379
    }
1380

1381
    tdbPagerInsertFreePage(pBt->pPager, ofp, pTxn);
14,049✔
1382
    tdbPagerReturnPage(pBt->pPager, ofp, pTxn);
14,049✔
1383

1384
    nSize -= bytes;
14,049✔
1385
  }
1386

1387
  return ret;
13,378✔
1388
}
1389

1390

1391
// TDB_BTREE_CELL =====================
1392
static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const void *pKey, int kLen, const void *pVal,
2,164,687✔
1393
                                 int vLen, int *szPayload, TXN *pTxn, SBTree *pBt) {
1394
  int ret = 0;
2,164,687✔
1395
  int nPayload = kLen + vLen;
2,164,687✔
1396
  int maxLocal = pPage->maxLocal;
2,164,687✔
1397

1398
  if (nPayload + nHeader <= maxLocal) {
2,164,687✔
1399
    // no overflow page is needed
1400
    memcpy(pCell + nHeader, pKey, kLen);
2,024,852✔
1401
    if (pVal) {
2,024,852✔
1402
      memcpy(pCell + nHeader + kLen, pVal, vLen);
1,032,772✔
1403
    }
1404

1405
    *szPayload = nPayload;
2,024,852✔
1406
    return 0;
2,024,852✔
1407
  }
1408

1409
  // handle overflow case
1410
  // calc local storage size
1411
  int minLocal = pPage->minLocal;
139,835✔
1412
  int surplus = minLocal + (nPayload + nHeader - minLocal) % (maxLocal - sizeof(SPgno));
139,835✔
1413
  int nLocal = surplus <= maxLocal ? surplus : minLocal;
139,835✔
1414

1415
  // fetch a new ofp and make it dirty
1416
  SPgno  pgno = 0;
139,835✔
1417
  SPage *ofp = NULL;
139,835✔
1418

1419
  ret = tdbFetchOvflPage(&pgno, &ofp, pTxn, pBt);
139,835✔
1420
  if (ret < 0) {
140,101!
1421
    return ret;
×
1422
  }
1423

1424
  // local buffer for cell
1425
  SCell *pBuf = tdbRealloc(NULL, pBt->pageSize);
140,101✔
1426
  if (pBuf == NULL) {
140,104!
1427
    tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
×
1428
    return ret;
×
1429
  }
1430

1431
  int nLeft = nPayload;
140,104✔
1432
  int bytes;
1433
  if (nLocal >= nHeader + kLen + sizeof(SPgno)) {
140,104✔
1434
    // pack key to local
1435
    memcpy(pCell + nHeader, pKey, kLen);
124,478✔
1436
    nLeft -= kLen;
124,478✔
1437
    // pack partial val to local if any space left
1438
    if (nLocal > nHeader + kLen + sizeof(SPgno)) {
124,478!
1439
      if (!(pVal != NULL && vLen != 0)) {
124,478!
1440
        tdbFree(pBuf);
×
1441
        tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
×
1442
        return TSDB_CODE_FAILED;
×
1443
      }
1444
      memcpy(pCell + nHeader + kLen, pVal, nLocal - nHeader - kLen - sizeof(SPgno));
124,478✔
1445
      nLeft -= nLocal - nHeader - kLen - sizeof(SPgno);
124,478✔
1446
    }
1447

1448
    // pack nextPgno
1449
    memcpy(pCell + nHeader + nPayload - nLeft, &pgno, sizeof(pgno));
124,478✔
1450

1451
  } else {
1452

1453
    int nLeftKey = kLen;
15,626✔
1454
    // pack partial key and nextPgno
1455
    memcpy(pCell + nHeader, pKey, nLocal - nHeader - sizeof(pgno));
15,626✔
1456
    nLeft -= nLocal - nHeader - sizeof(pgno);
15,626✔
1457
    nLeftKey -= nLocal - nHeader - sizeof(pgno);
15,626✔
1458

1459
    memcpy(pCell + nLocal - sizeof(pgno), &pgno, sizeof(pgno));
15,626✔
1460

1461
    size_t lastKeyPageSpace = 0;
15,626✔
1462
    // pack left key & val to ovpages
1463
    do {
1464
      // cal key to cpy
1465
      int lastKeyPage = 0;
15,785✔
1466
      if (nLeftKey <= ofp->maxLocal - sizeof(SPgno)) {
15,785✔
1467
        bytes = nLeftKey;
15,626✔
1468
        lastKeyPage = 1;
15,626✔
1469
        lastKeyPageSpace = ofp->maxLocal - sizeof(SPgno) - nLeftKey;
15,626✔
1470
      } else {
1471
        bytes = ofp->maxLocal - sizeof(SPgno);
159✔
1472
      }
1473

1474
      // cpy key
1475
      memcpy(pBuf, ((SCell *)pKey) + kLen - nLeftKey, bytes);
15,785✔
1476

1477
      SPage* nextOfp = NULL;
15,785✔
1478
      if (lastKeyPage) {
15,785✔
1479
        if (lastKeyPageSpace >= vLen) {
15,626!
1480
          if (vLen > 0) {
15,626!
1481
            memcpy(pBuf + bytes, pVal, vLen);
×
1482
            bytes += vLen;
×
1483
          }
1484
          pgno = 0;
15,626✔
1485
        } else {
1486
          memcpy(pBuf + bytes, pVal, lastKeyPageSpace);
×
1487
          bytes += lastKeyPageSpace;
×
1488

1489
          // fetch next ofp, a new ofp and make it dirty
1490
          ret = tdbFetchOvflPage(&pgno, &nextOfp, pTxn, pBt);
×
1491
          if (ret < 0) {
×
1492
            tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
×
1493
            tdbFree(pBuf);
×
1494
            return ret;
×
1495
          }
1496
        }
1497
      } else {
1498
        // fetch next ofp, a new ofp and make it dirty
1499
        ret = tdbFetchOvflPage(&pgno, &nextOfp, pTxn, pBt);
159✔
1500
        if (ret < 0) {
159!
1501
          tdbFree(pBuf);
×
1502
          tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
×
1503
          return ret;
×
1504
        }
1505
      }
1506

1507
      memcpy(pBuf + bytes, &pgno, sizeof(pgno));
15,785✔
1508

1509
      ret = tdbPageInsertCell(ofp, 0, pBuf, bytes + sizeof(pgno), 0);
15,785✔
1510
      tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
15,785✔
1511
      if (ret < 0) {
15,787!
1512
        return ret;
×
1513
      }
1514

1515
      ofp = nextOfp;
15,787✔
1516
      nLeftKey -= bytes;
15,787✔
1517
      nLeft -= bytes;
15,787✔
1518
    } while (nLeftKey > 0);
15,787✔
1519
  }
1520

1521
  while (nLeft > 0) {
1,116,087✔
1522
    SPage* nextOfp = NULL;
975,981✔
1523

1524
    // pack left val data to ovpages
1525
    int lastPage = 0;
975,981✔
1526
    if (nLeft <= ofp->maxLocal - sizeof(SPgno)) {
975,981✔
1527
      bytes = nLeft;
124,480✔
1528
      lastPage = 1;
124,480✔
1529
    } else {
1530
      bytes = ofp->maxLocal - sizeof(SPgno);
851,501✔
1531
    }
1532

1533
    // fetch next ofp if not last page
1534
    if (!lastPage) {
975,981✔
1535
      // fetch a new ofp and make it dirty
1536
      ret = tdbFetchOvflPage(&pgno, &nextOfp, pTxn, pBt);
851,501✔
1537
      if (ret < 0) {
851,486!
1538
        tdbFree(pBuf);
×
1539
        tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
×
1540
        return ret;
×
1541
      }
1542
    } else {
1543
      pgno = 0;
124,480✔
1544
    }
1545

1546
    memcpy(pBuf, ((SCell *)pVal) + vLen - nLeft, bytes);
975,966✔
1547
    memcpy(pBuf + bytes, &pgno, sizeof(pgno));
975,966✔
1548

1549
    ret = tdbPageInsertCell(ofp, 0, pBuf, bytes + sizeof(pgno), 0);
975,966✔
1550
    tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
975,965✔
1551
    if (ret < 0) {
975,981!
1552
      tdbFree(pBuf);
×
1553
      return ret;
×
1554
    }
1555

1556
    ofp = nextOfp;
975,981✔
1557
    nLeft -= bytes;
975,981✔
1558
  }
1559

1560
  if (ofp) {
140,106!
1561
    tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
×
1562
  }
1563
  // free local buffer
1564
  tdbFree(pBuf);
140,106✔
1565
  *szPayload = nLocal - nHeader;
140,106✔
1566

1567
  return 0;
140,106✔
1568
}
1569

1570
static int tdbBtreeEncodeCell(SPage *pPage, const void *pKey, int kLen, const void *pVal, int vLen, SCell *pCell,
2,164,444✔
1571
                              int *szCell, TXN *pTxn, SBTree *pBt) {
1572
  u8  leaf;
1573
  int nHeader;
1574
  int nPayload;
1575
  int ret;
1576

1577
  if (!(pPage->kLen == TDB_VARIANT_LEN || pPage->kLen == kLen)) {
2,164,444!
1578
    return TSDB_CODE_INVALID_PARA;
×
1579
  }
1580
  if (!(pPage->vLen == TDB_VARIANT_LEN || pPage->vLen == vLen)) {
2,164,444!
1581
    return TSDB_CODE_INVALID_PARA;
×
1582
  }
1583
  if (!(pKey != NULL && kLen > 0)) {
2,164,444!
1584
    return TSDB_CODE_INVALID_PARA;
×
1585
  }
1586

1587
  nPayload = 0;
2,164,720✔
1588
  nHeader = 0;
2,164,720✔
1589
  leaf = TDB_BTREE_PAGE_IS_LEAF(pPage);
2,164,720✔
1590

1591
  // 1. Encode Header part
1592
  /* Encode SPgno if interior page */
1593
  if (!leaf) {
2,164,720✔
1594
    if (pPage->vLen != sizeof(SPgno)) {
655,535!
1595
      tdbError("tdb/btree-encode-cell: invalid cell.");
×
1596
      return TSDB_CODE_INVALID_PARA;
×
1597
    }
1598

1599
    ((SPgno *)(pCell + nHeader))[0] = ((SPgno *)pVal)[0];
655,535✔
1600
    nHeader = nHeader + sizeof(SPgno);
655,535✔
1601
  }
1602

1603
  /* Encode kLen if need */
1604
  if (pPage->kLen == TDB_VARIANT_LEN) {
2,164,720✔
1605
    nHeader += tdbPutVarInt(pCell + nHeader, kLen);
1,141,221✔
1606
  }
1607

1608
  /* Encode vLen if need */
1609
  if (pPage->vLen == TDB_VARIANT_LEN) {
2,164,722✔
1610
    nHeader += tdbPutVarInt(pCell + nHeader, vLen);
576,783✔
1611
  }
1612

1613
  // 2. Encode payload part
1614
  if ((!leaf) || pPage->vLen == 0) {
2,164,700✔
1615
    pVal = NULL;
1,008,251✔
1616
    vLen = 0;
1,008,251✔
1617
  }
1618

1619
  ret = tdbBtreeEncodePayload(pPage, pCell, nHeader, pKey, kLen, pVal, vLen, &nPayload, pTxn, pBt);
2,164,700✔
1620
  if (ret < 0) {
2,164,850✔
1621
    // TODO
1622
    tdbError("tdb/btree-encode-cell: encode payload failed with ret: %d.", ret);
33!
1623
    return ret;
×
1624
  }
1625

1626
  *szCell = nHeader + nPayload;
2,164,817✔
1627
  return 0;
2,164,817✔
1628
}
1629

1630
static int tdbBtreeDecodePayload(SPage *pPage, const SCell *pCell, int nHeader, SCellDecoder *pDecoder, TXN *pTxn,
70,367,653✔
1631
                                 SBTree *pBt) {
1632
  int ret = 0;
70,367,653✔
1633
  int nPayload;
1634
  int maxLocal = pPage->maxLocal;
70,367,653✔
1635

1636
  int kLen = pDecoder->kLen;
70,367,653✔
1637
  int vLen = pDecoder->vLen;
70,367,653✔
1638

1639
  if (pDecoder->pVal) {
70,367,653✔
1640
    if (TDB_BTREE_PAGE_IS_LEAF(pPage)) {
16,280,252!
1641
      tdbError("tdb/btree-decode-payload: leaf page with non-null pVal.");
×
1642
      return TSDB_CODE_INVALID_DATA_FMT;
×
1643
    }
1644
    nPayload = pDecoder->kLen;
16,280,252✔
1645
  } else {
1646
    nPayload = pDecoder->kLen + pDecoder->vLen;
54,087,401✔
1647
  }
1648

1649
  if (nHeader + nPayload <= maxLocal) {
70,367,653✔
1650
    // no over flow case
1651
    pDecoder->pKey = (SCell *)pCell + nHeader;
69,693,022✔
1652
    if (pDecoder->pVal == NULL && pDecoder->vLen > 0) {
69,693,022✔
1653
      pDecoder->pVal = (SCell *)pCell + nHeader + pDecoder->kLen;
52,083,504✔
1654
    }
1655
    return 0;
69,693,022✔
1656
  }
1657

1658
  // handle overflow case
1659
  // calc local storage size
1660
  int minLocal = pPage->minLocal;
674,631✔
1661
  int surplus = minLocal + (nPayload + nHeader - minLocal) % (maxLocal - sizeof(SPgno));
674,631✔
1662
  int nLocal = surplus <= maxLocal ? surplus : minLocal;
674,631✔
1663

1664
  int    nLeft = nPayload;
674,631✔
1665
  SPgno  pgno = 0;
674,631✔
1666
  SPage *ofp;
1667
  SCell *ofpCell;
1668
  int    bytes;
1669
  int    lastPage = 0;
674,631✔
1670

1671
  if (nLocal >= pDecoder->kLen + nHeader + sizeof(SPgno)) {
674,631✔
1672
    pDecoder->pKey = (SCell *)pCell + nHeader;
666,174✔
1673
    nLeft -= kLen;
666,174✔
1674
    if (nLocal > kLen + nHeader + sizeof(SPgno)) {
666,174✔
1675
      // read partial val to local
1676
      pDecoder->pVal = tdbRealloc(pDecoder->pVal, vLen);
666,173✔
1677
      if (pDecoder->pVal == NULL) {
666,174!
1678
        return terrno;
×
1679
      }
1680
      TDB_CELLDECODER_SET_FREE_VAL(pDecoder);
666,174✔
1681

1682
      tdbDebug("tdb btc decoder: %p/0x%x pVal: %p ", pDecoder, pDecoder->freeKV, pDecoder->pVal);
666,174✔
1683

1684
      memcpy(pDecoder->pVal, pCell + nHeader + kLen, nLocal - nHeader - kLen - sizeof(SPgno));
666,170✔
1685

1686
      nLeft -= nLocal - nHeader - kLen - sizeof(SPgno);
666,170✔
1687
    }
1688

1689
    memcpy(&pgno, pCell + nHeader + nPayload - nLeft, sizeof(pgno));
666,171✔
1690

1691
    // unpack left val data from ovpages
1692
    while (pgno != 0) {
5,899,144✔
1693
      ret = tdbLoadOvflPage(&pgno, &ofp, pTxn, pBt);
5,232,970✔
1694
      if (ret < 0) {
5,232,981!
1695
        return ret;
×
1696
      }
1697
      ofpCell = tdbPageGetCell(ofp, 0);
5,232,981✔
1698
      if (ofpCell == NULL) {
5,232,936!
1699
        return TSDB_CODE_INVALID_DATA_FMT;
×
1700
      }
1701

1702
      if (nLeft <= ofp->maxLocal - sizeof(SPgno)) {
5,232,936✔
1703
        bytes = nLeft;
666,174✔
1704
        lastPage = 1;
666,174✔
1705
      } else {
1706
        bytes = ofp->maxLocal - sizeof(SPgno);
4,566,762✔
1707
      }
1708

1709
      memcpy(pDecoder->pVal + vLen - nLeft, ofpCell, bytes);
5,232,936✔
1710
      nLeft -= bytes;
5,232,936✔
1711

1712
      memcpy(&pgno, ofpCell + bytes, sizeof(pgno));
5,232,936✔
1713

1714
      tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
5,232,936✔
1715
    }
1716
  } else {
1717
    int nLeftKey = kLen;
8,457✔
1718
    // load partial key and nextPgno
1719
    pDecoder->pKey = tdbRealloc(pDecoder->pKey, kLen);
8,457✔
1720
    if (pDecoder->pKey == NULL) {
69,678!
1721
      return terrno;
×
1722
    }
1723
    TDB_CELLDECODER_SET_FREE_KEY(pDecoder);
69,678✔
1724

1725
    memcpy(pDecoder->pKey, pCell + nHeader, nLocal - nHeader - sizeof(pgno));
69,678✔
1726
    nLeft -= nLocal - nHeader - sizeof(pgno);
69,678✔
1727
    nLeftKey -= nLocal - nHeader - sizeof(pgno);
69,678✔
1728

1729
    memcpy(&pgno, pCell + nLocal - sizeof(pgno), sizeof(pgno));
69,678✔
1730

1731
    int lastKeyPageSpace = 0;
69,678✔
1732
    // load left key & val to ovpages
1733
    while (pgno != 0) {
139,542✔
1734
      tdbTrace("tdb decode-ofp, pTxn: %p, pgno:%u by cell:%p", pTxn, pgno, pCell);
69,864✔
1735
      // printf("tdb decode-ofp, pTxn: %p, pgno:%u by cell:%p\n", pTxn, pgno, pCell);
1736
      ret = tdbLoadOvflPage(&pgno, &ofp, pTxn, pBt);
69,864✔
1737
      if (ret < 0) {
69,864!
1738
        return ret;
×
1739
      }
1740
      ofpCell = tdbPageGetCell(ofp, 0);
69,864✔
1741

1742
      int lastKeyPage = 0;
69,864✔
1743
      if (nLeftKey <= ofp->maxLocal - sizeof(SPgno)) {
69,864✔
1744
        bytes = nLeftKey;
69,678✔
1745
        lastKeyPage = 1;
69,678✔
1746
        lastKeyPageSpace = ofp->maxLocal - sizeof(SPgno) - nLeftKey;
69,678✔
1747
      } else {
1748
        bytes = ofp->maxLocal - sizeof(SPgno);
186✔
1749
      }
1750

1751
      // cpy key
1752
      memcpy(pDecoder->pKey + kLen - nLeftKey, ofpCell, bytes);
69,864✔
1753

1754
      if (lastKeyPage) {
69,864✔
1755
        if (lastKeyPageSpace >= vLen) {
69,678!
1756
          if (vLen > 0) {
69,678✔
1757
            pDecoder->pVal = ofpCell + kLen - nLeftKey;
69,616✔
1758

1759
            nLeft -= vLen;
69,616✔
1760
          }
1761
          pgno = 0;
69,678✔
1762
        } else {
1763
          // read partial val to local
1764
          pDecoder->pVal = tdbRealloc(pDecoder->pVal, vLen);
×
1765
          if (pDecoder->pVal == NULL) {
×
1766
            return terrno;
×
1767
          }
1768
          TDB_CELLDECODER_SET_FREE_VAL(pDecoder);
×
1769

1770
          memcpy(pDecoder->pVal, ofpCell + kLen - nLeftKey, lastKeyPageSpace);
×
1771
          nLeft -= lastKeyPageSpace;
×
1772
        }
1773
      }
1774

1775
      memcpy(&pgno, ofpCell + bytes, sizeof(pgno));
69,864✔
1776

1777
      tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
69,864✔
1778

1779
      nLeftKey -= bytes;
69,864✔
1780
      nLeft -= bytes;
69,864✔
1781
    }
1782

1783
    while (nLeft > 0) {
69,678!
1784
      ret = tdbLoadOvflPage(&pgno, &ofp, pTxn, pBt);
×
1785
      if (ret < 0) {
×
1786
        return ret;
×
1787
      }
1788

1789
      ofpCell = tdbPageGetCell(ofp, 0);
×
1790

1791
      // load left val data to ovpages
1792
      lastPage = 0;
×
1793
      if (nLeft <= ofp->maxLocal - sizeof(SPgno)) {
×
1794
        bytes = nLeft;
×
1795
        lastPage = 1;
×
1796
      } else {
1797
        bytes = ofp->maxLocal - sizeof(SPgno);
×
1798
      }
1799

1800
      if (lastPage) {
×
1801
        pgno = 0;
×
1802
      }
1803

1804
      if (!pDecoder->pVal) {
×
1805
        pDecoder->pVal = tdbRealloc(pDecoder->pVal, vLen);
×
1806
        if (pDecoder->pVal == NULL) {
×
1807
          return terrno;
×
1808
        }
1809
        TDB_CELLDECODER_SET_FREE_VAL(pDecoder);
×
1810
      }
1811

1812
      memcpy(pDecoder->pVal, ofpCell + vLen - nLeft, bytes);
×
1813
      nLeft -= bytes;
×
1814

1815
      memcpy(&pgno, ofpCell + vLen - nLeft + bytes, sizeof(pgno));
×
1816

1817
      tdbPCacheRelease(pBt->pPager->pCache, ofp, pTxn);
×
1818

1819
      nLeft -= bytes;
×
1820
    }
1821
  }
1822

1823
  return 0;
735,852✔
1824
}
1825

1826
static int tdbBtreeDecodeCell(SPage *pPage, const SCell *pCell, SCellDecoder *pDecoder, TXN *pTxn, SBTree *pBt) {
70,317,193✔
1827
  u8  leaf;
1828
  int nHeader;
1829
  int ret;
1830

1831
  nHeader = 0;
70,317,193✔
1832
  leaf = TDB_BTREE_PAGE_IS_LEAF(pPage);
70,317,193✔
1833

1834
  // Clear the state of decoder
1835
  if (TDB_CELLDECODER_FREE_VAL(pDecoder)) {
70,317,193✔
1836
    tdbFree(pDecoder->pVal);
371,122✔
1837
    TDB_CELLDECODER_CLZ_FREE_VAL(pDecoder);
371,122✔
1838
    // tdbTrace("tdb btc decoder val set nil: %p/0x%x ", pDecoder, pDecoder->freeKV);
1839
  }
1840
  if (TDB_CELLDECODER_FREE_KEY(pDecoder)) {
70,317,193✔
1841
    tdbFree(pDecoder->pKey);
67,392✔
1842
    TDB_CELLDECODER_CLZ_FREE_KEY(pDecoder);
67,392✔
1843
    // tdbTrace("tdb btc decoder key set nil: %p/0x%x ", pDecoder, pDecoder->freeKV);
1844
  }
1845
  pDecoder->kLen = -1;
70,317,193✔
1846
  pDecoder->pKey = NULL;
70,317,193✔
1847
  pDecoder->vLen = -1;
70,317,193✔
1848
  pDecoder->pVal = NULL;
70,317,193✔
1849
  pDecoder->pgno = 0;
70,317,193✔
1850

1851
  // 1. Decode header part
1852
  if (!leaf) {
70,317,193✔
1853
    if (pPage->vLen != sizeof(SPgno)) {
16,276,163!
1854
      tdbError("tdb/btree-decode-cell: invalid cell.");
×
1855
      return TSDB_CODE_INVALID_DATA_FMT;
×
1856
    }
1857

1858
    pDecoder->pgno = ((SPgno *)(pCell + nHeader))[0];
16,276,163✔
1859
    pDecoder->pVal = (u8 *)(&(pDecoder->pgno));
16,276,163✔
1860
    nHeader = nHeader + sizeof(SPgno);
16,276,163✔
1861
  }
1862

1863
  if (pPage->kLen == TDB_VARIANT_LEN) {
70,317,193✔
1864
    nHeader += tdbGetVarInt(pCell + nHeader, &(pDecoder->kLen));
9,647,844✔
1865
  } else {
1866
    pDecoder->kLen = pPage->kLen;
60,669,349✔
1867
  }
1868

1869
  if (pPage->vLen == TDB_VARIANT_LEN) {
70,317,905✔
1870
    if (!leaf) {
30,980,833!
1871
      tdbError("tdb/btree-decode-cell: not a leaf page.");
×
1872
      return TSDB_CODE_INVALID_DATA_FMT;
×
1873
    }
1874
    nHeader += tdbGetVarInt(pCell + nHeader, &(pDecoder->vLen));
30,980,833✔
1875
  } else {
1876
    pDecoder->vLen = pPage->vLen;
39,337,072✔
1877
  }
1878

1879
  // 2. Decode payload part
1880
  ret = tdbBtreeDecodePayload(pPage, pCell, nHeader, pDecoder, pTxn, pBt);
70,319,275✔
1881
  if (ret < 0) {
70,483,114!
1882
    return ret;
×
1883
  }
1884

1885
  return 0;
70,483,114✔
1886
}
1887

1888

1889
// tdbBtreeCellSize returns the local size (number of bytes in [pPage]) of [pCell].
1890
// if [pFullSize] is not NULL, it return the full size of the cell, which also includes
1891
// the number of bytes of [pCell] in the overflow pages.
1892
static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell, int *pFullSize) {
43,769,531✔
1893
  u8  leaf;
1894
  int kLen = 0, vLen = 0, nHeader = 0;
43,769,531✔
1895

1896
  leaf = TDB_BTREE_PAGE_IS_LEAF(pPage);
43,769,531✔
1897

1898
  if (!leaf) {
43,769,531✔
1899
    nHeader += sizeof(SPgno);
19,877,691✔
1900
  }
1901

1902
  if (pPage->kLen == TDB_VARIANT_LEN) {
43,769,531✔
1903
    nHeader += tdbGetVarInt(pCell + nHeader, &kLen);
28,273,018✔
1904
  } else {
1905
    kLen = pPage->kLen;
15,496,513✔
1906
  }
1907

1908
  if (pPage->vLen == TDB_VARIANT_LEN) {
43,774,365✔
1909
    if (!leaf) {
9,498,009!
1910
      tdbError("tdb/btree-cell-size: not a leaf page:%p, pgno:%" PRIu32 ".", pPage, TDB_PAGE_PGNO(pPage));
×
1911
      // return -1;
1912
    }
1913
    nHeader += tdbGetVarInt(pCell + nHeader, &vLen);
9,498,009✔
1914
  } else if (leaf) {
34,276,356✔
1915
    vLen = pPage->vLen;
14,565,326✔
1916
  }
1917

1918
  int nSize = kLen + vLen + nHeader;
43,775,964✔
1919
  if (nSize <= pPage->maxLocal) {
43,775,964✔
1920
    // cell size should be at least the size of a free cell, otherwise it
1921
    // cannot be added to the free list when dropped.
1922
    if (nSize < pPage->pPageMethods->szFreeCell) {
42,333,444!
1923
      nSize = pPage->pPageMethods->szFreeCell;
×
1924
    }
1925
    if (pFullSize) {
42,333,444✔
1926
      *pFullSize = nSize;
665,911✔
1927
    }
1928
    return nSize;
42,333,444✔
1929
  }
1930

1931
  // handle overflow pages
1932
  if (pFullSize) {
1,442,520✔
1933
    *pFullSize = nSize;
13,378✔
1934
  }
1935

1936
  int maxLocal = pPage->maxLocal;
1,442,520✔
1937

1938
  // calc local storage size
1939
  int minLocal = pPage->minLocal;
1,442,520✔
1940
  int surplus = minLocal + (nSize - minLocal) % (maxLocal - sizeof(SPgno));
1,442,520✔
1941
  int nLocal = surplus <= maxLocal ? surplus : minLocal;
1,442,520✔
1942

1943
  return nLocal;
1,442,520✔
1944
}
1945
// TDB_BTREE_CELL
1946

1947
// TDB_BTREE_CURSOR =====================
1948
int tdbBtcOpen(SBTC *pBtc, SBTree *pBt, TXN *pTxn) {
13,235,976✔
1949
  pBtc->pBt = pBt;
13,235,976✔
1950
  pBtc->iPage = -1;
13,235,976✔
1951
  pBtc->pPage = NULL;
13,235,976✔
1952
  pBtc->idx = -1;
13,235,976✔
1953
  memset(&pBtc->coder, 0, sizeof(SCellDecoder));
13,235,976✔
1954

1955
  if (pTxn == NULL) {
13,235,976✔
1956
    TXN *pTxn = tdbOsCalloc(1, sizeof(*pTxn));
11,482,670!
1957
    if (!pTxn) {
11,487,439!
1958
      pBtc->pTxn = NULL;
×
1959
      return terrno;
×
1960
    }
1961

1962
    int32_t ret = tdbTxnOpen(pTxn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0);
11,487,439✔
1963
    if (ret < 0) {
11,483,246✔
1964
      tdbOsFree(pTxn);
281!
1965
      pBtc->pTxn = NULL;
×
1966
      return ret;
×
1967
    }
1968

1969
    pBtc->pTxn = pTxn;
11,482,965✔
1970
    pBtc->freeTxn = 1;
11,482,965✔
1971
  } else {
1972
    pBtc->pTxn = pTxn;
1,753,306✔
1973
    pBtc->freeTxn = 0;
1,753,306✔
1974
  }
1975

1976
  return 0;
13,236,271✔
1977
}
1978

1979
int tdbBtcMoveToFirst(SBTC *pBtc) {
64,280✔
1980
  int     ret;
1981
  SBTree *pBt;
1982
  SPager *pPager;
1983
  SCell  *pCell;
1984
  SPgno   pgno;
1985

1986
  pBt = pBtc->pBt;
64,280✔
1987
  pPager = pBt->pPager;
64,280✔
1988

1989
  if (pBtc->iPage < 0) {
64,280!
1990
    // move a clean cursor
1991
    ret = tdbPagerFetchPage(pPager, &pBt->root, &(pBtc->pPage), tdbBtreeInitPage,
64,290✔
1992
                            &((SBtreeInitPageArg){.pBt = pBt, .flags = TDB_BTREE_ROOT | TDB_BTREE_LEAF}), pBtc->pTxn);
64,290✔
1993
    if (ret < 0) {
64,280!
1994
      tdbError("tdb/btc-move-tofirst: fetch page failed with ret: %d.", ret);
×
1995
      return ret;
44,078✔
1996
    }
1997

1998
    if (!TDB_BTREE_PAGE_IS_ROOT(pBtc->pPage)) {
64,285!
1999
      tdbError("tdb/btc-move-tofirst: not a root page");
×
2000
      return ret;
×
2001
    }
2002

2003
    pBtc->iPage = 0;
64,285✔
2004
    if (TDB_PAGE_TOTAL_CELLS(pBtc->pPage) > 0) {
64,285✔
2005
      pBtc->idx = 0;
20,189✔
2006
    } else {
2007
      // no any data, point to an invalid position
2008
      if (!TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) {
44,078!
2009
        tdbError("tdb/btc-move-to-first: not a leaf page.");
×
2010
        return TSDB_CODE_FAILED;
×
2011
      }
2012

2013
      pBtc->idx = -1;
44,078✔
2014
      return 0;
44,078✔
2015
    }
2016
  } else {
2017
    // TODO
2018
    tdbError("tdb/btc-move-to-first: move from a dirty cursor.");
×
2019
    return TSDB_CODE_FAILED;
×
2020
#if 0
2021
    // move from a position
2022
    int iPage = 0;
2023

2024
    for (; iPage < pBtc->iPage; iPage++) {
2025
      if (pBtc->idxStack[iPage] < 0) {
2026
        tdbError("tdb/btc-move-to-first: invalid idx: %d.", pBtc->idxStack[iPage]);
2027
        return -1;
2028
      }
2029

2030
      if (pBtc->idxStack[iPage]) break;
2031
    }
2032

2033
    // move upward
2034
    for (;;) {
2035
      if (pBtc->iPage == iPage) {
2036
        pBtc->idx = 0;
2037
        break;
2038
      }
2039

2040
      tdbBtcMoveUpward(pBtc);
2041
    }
2042
#endif
2043
  }
2044

2045
  // move downward
2046
  for (;;) {
2047
    if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) break;
22,455✔
2048

2049
    ret = tdbBtcMoveDownward(pBtc);
2,265✔
2050
    if (ret < 0) {
2,266!
2051
      tdbError("tdb/btc-move-tofirst: btc move downward failed with ret: %d.", ret);
×
2052
      return ret;
×
2053
    }
2054

2055
    pBtc->idx = 0;
2,266✔
2056
  }
2057

2058
  return 0;
20,190✔
2059
}
2060

2061
int tdbBtcMoveToLast(SBTC *pBtc) {
×
2062
  int     ret;
2063
  int     nCells;
2064
  SBTree *pBt;
2065
  SPager *pPager;
2066
  SPgno   pgno;
2067

2068
  pBt = pBtc->pBt;
×
2069
  pPager = pBt->pPager;
×
2070

2071
  if (pBtc->iPage < 0) {
×
2072
    // move a clean cursor
2073
    ret = tdbPagerFetchPage(pPager, &pBt->root, &(pBtc->pPage), tdbBtreeInitPage,
×
2074
                            &((SBtreeInitPageArg){.pBt = pBt, .flags = TDB_BTREE_ROOT | TDB_BTREE_LEAF}), pBtc->pTxn);
×
2075
    if (ret < 0) {
×
2076
      tdbError("tdb/btc-move-tolast: fetch page failed with ret: %d.", ret);
×
2077
      return ret;
×
2078
    }
2079

2080
    nCells = TDB_PAGE_TOTAL_CELLS(pBtc->pPage);
×
2081
    pBtc->iPage = 0;
×
2082
    if (nCells > 0) {
×
2083
      pBtc->idx = TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage) ? nCells - 1 : nCells;
×
2084
    } else {
2085
      // no data at all, point to an invalid position
2086
      if (!TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) {
×
2087
        tdbError("tdb/btc-move-to-last: not a leaf page.");
×
2088
        return TSDB_CODE_FAILED;
×
2089
      }
2090

2091
      pBtc->idx = -1;
×
2092
      return 0;
×
2093
    }
2094
  } else {
2095
    // TODO
2096
    tdbError("tdb/btc-move-to-last: move from a dirty cursor.");
×
2097
    return TSDB_CODE_FAILED;
×
2098
#if 0
2099
    int iPage = 0;
2100

2101
    // downward search
2102
    for (; iPage < pBtc->iPage; iPage++) {
2103
      if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pgStack[iPage])) {
2104
        tdbError("tdb/btc-move-to-last: leaf page in cursor stack.");
2105
        return -1;
2106
      }
2107

2108
      nCells = TDB_PAGE_TOTAL_CELLS(pBtc->pgStack[iPage]);
2109
      if (pBtc->idxStack[iPage] != nCells) break;
2110
    }
2111

2112
    // move upward
2113
    for (;;) {
2114
      if (pBtc->iPage == iPage) {
2115
        if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) {
2116
          pBtc->idx = TDB_PAGE_TOTAL_CELLS(pBtc->pPage) - 1;
2117
        } else {
2118
          pBtc->idx = TDB_PAGE_TOTAL_CELLS(pBtc->pPage);
2119
        }
2120
        break;
2121
      }
2122

2123
      tdbBtcMoveUpward(pBtc);
2124
    }
2125
#endif
2126
  }
2127

2128
  // move downward
2129
  for (;;) {
2130
    if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) break;
×
2131

2132
    ret = tdbBtcMoveDownward(pBtc);
×
2133
    if (ret < 0) {
×
2134
      tdbError("tdb/btc-move-tolast: btc move downward failed with ret: %d.", ret);
×
2135
      return ret;
×
2136
    }
2137

2138
    nCells = TDB_PAGE_TOTAL_CELLS(pBtc->pPage);
×
2139
    if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) {
×
2140
      pBtc->idx = nCells - 1;
×
2141
    } else {
2142
      pBtc->idx = nCells;
×
2143
    }
2144
  }
2145

2146
  return 0;
×
2147
}
2148

2149
int tdbBtreeNext(SBTC *pBtc, void **ppKey, int *kLen, void **ppVal, int *vLen) {
5,220,396✔
2150
  SCell       *pCell;
2151
  SCellDecoder cd = {0};
5,220,396✔
2152
  void        *pKey, *pVal;
2153
  int          ret;
2154

2155
  // current cursor points to an invalid position
2156
  if (pBtc->idx < 0) {
5,220,396✔
2157
    return TSDB_CODE_FAILED;
550,253✔
2158
  }
2159

2160
  pCell = tdbPageGetCell(pBtc->pPage, pBtc->idx);
4,670,143✔
2161

2162
  ret = tdbBtreeDecodeCell(pBtc->pPage, pCell, &cd, pBtc->pTxn, pBtc->pBt);
4,666,382✔
2163
  if (ret < 0) {
4,672,437!
2164
    tdbError("tdb/btree-next: decode cell failed with ret: %d.", ret);
×
2165
    return ret;
×
2166
  }
2167

2168
  pKey = tdbRealloc(*ppKey, cd.kLen);
4,672,437✔
2169
  if (pKey == NULL) {
4,669,025✔
2170
    return terrno;
239✔
2171
  }
2172

2173
  *ppKey = pKey;
4,668,786✔
2174
  *kLen = cd.kLen;
4,668,786✔
2175
  memcpy(pKey, cd.pKey, (size_t)cd.kLen);
4,668,786✔
2176

2177
  if (ppVal) {
4,668,786✔
2178
    if (cd.vLen > 0) {
4,665,982✔
2179
      pVal = tdbRealloc(*ppVal, cd.vLen);
4,221,234✔
2180
      if (pVal == NULL) {
4,214,238!
2181
        return terrno;
×
2182
      }
2183

2184
      memcpy(pVal, cd.pVal, cd.vLen);
4,214,238✔
2185
      if (TDB_CELLDECODER_FREE_VAL(&cd)) {
4,214,238✔
2186
        tdbTrace("tdb/btree-next decoder: %p pVal free: %p", &cd, cd.pVal);
626✔
2187
        tdbFree(cd.pVal);
626✔
2188
      }
2189
    } else {
2190
      pVal = NULL;
444,748✔
2191
    }
2192

2193
    *ppVal = pVal;
4,660,155✔
2194
    *vLen = cd.vLen;
4,660,155✔
2195
  } else {
2196
    if (TDB_CELLDECODER_FREE_VAL(&cd)) {
2,804✔
2197
      tdbTrace("tdb/btree-next2 decoder: %p pVal free: %p", &cd, cd.pVal);
26✔
2198
      tdbFree(cd.pVal);
26✔
2199
    }
2200
  }
2201

2202
  ret = tdbBtcMoveToNext(pBtc);
4,662,959✔
2203
  if (ret < 0) {
4,664,464!
2204
    tdbError("tdb/btree-next: btc move to next failed with ret: %d.", ret);
×
2205
    return ret;
×
2206
  }
2207

2208
  return 0;
4,664,833✔
2209
}
2210

2211
int tdbBtreePrev(SBTC *pBtc, void **ppKey, int *kLen, void **ppVal, int *vLen) {
8,273✔
2212
  SCell       *pCell;
2213
  SCellDecoder cd = {0};
8,273✔
2214
  void        *pKey, *pVal;
2215
  int          ret;
2216

2217
  // current cursor points to an invalid position
2218
  if (pBtc->idx < 0) {
8,273✔
2219
    return TSDB_CODE_FAILED;
298✔
2220
  }
2221

2222
  pCell = tdbPageGetCell(pBtc->pPage, pBtc->idx);
7,975✔
2223

2224
  ret = tdbBtreeDecodeCell(pBtc->pPage, pCell, &cd, pBtc->pTxn, pBtc->pBt);
7,838✔
2225
  if (ret < 0) {
7,826!
2226
    tdbError("tdb/btree-prev: decode cell failed with ret: %d.", ret);
×
2227
    return ret;
×
2228
  }
2229

2230
  pKey = tdbRealloc(*ppKey, cd.kLen);
7,826✔
2231
  if (pKey == NULL) {
7,797!
2232
    return terrno;
×
2233
  }
2234

2235
  *ppKey = pKey;
7,809✔
2236
  *kLen = cd.kLen;
7,809✔
2237
  memcpy(pKey, cd.pKey, (size_t)cd.kLen);
7,809✔
2238

2239
  if (ppVal) {
7,809!
2240
    if (cd.vLen > 0) {
7,809!
2241
      pVal = tdbRealloc(*ppVal, cd.vLen);
7,811✔
2242
      if (pVal == NULL) {
7,728!
2243
        tdbFree(pKey);
×
2244
        return terrno;
×
2245
      }
2246

2247
      memcpy(pVal, cd.pVal, (size_t)cd.vLen);
7,728✔
2248
      if (TDB_CELLDECODER_FREE_VAL(&cd)) {
7,728!
2249
        tdbTrace("tdb/btree-next decoder: %p pVal free: %p", &cd, cd.pVal);
×
2250
        tdbFree(cd.pVal);
×
2251
      }
2252
    } else {
2253
      pVal = NULL;
×
2254
    }
2255

2256
    *ppVal = pVal;
7,806✔
2257
    *vLen = cd.vLen;
7,806✔
2258
  } else {
2259
    if (TDB_CELLDECODER_FREE_VAL(&cd)) {
×
2260
      tdbTrace("tdb/btree-next2 decoder: %p pVal free: %p", &cd, cd.pVal);
×
2261
      tdbFree(cd.pVal);
×
2262
    }
2263
  }
2264

2265
  ret = tdbBtcMoveToPrev(pBtc);
7,806✔
2266
  if (ret < 0) {
7,805!
2267
    tdbError("tdb/btree-prev: btc move to prev failed with ret: %d.", ret);
×
2268
    return ret;
×
2269
  }
2270

2271
  return 0;
7,807✔
2272
}
2273

2274
int tdbBtcMoveToNext(SBTC *pBtc) {
4,793,240✔
2275
  int    nCells;
2276
  int    ret;
2277
  SCell *pCell;
2278

2279
  if (!TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) {
4,793,240!
2280
    tdbError("tdb/btc-move-to-next: not a leaf page.");
×
2281
    return TSDB_CODE_FAILED;
×
2282
  }
2283

2284
  if (pBtc->idx < 0) return TSDB_CODE_FAILED;
4,793,240!
2285

2286
  pBtc->idx++;
4,793,240✔
2287
  if (pBtc->idx < TDB_PAGE_TOTAL_CELLS(pBtc->pPage)) {
4,793,240✔
2288
    return 0;
4,270,492✔
2289
  }
2290

2291
  // move upward
2292
  for (;;) {
2293
    if (pBtc->iPage == 0) {
536,153✔
2294
      pBtc->idx = -1;
491,165✔
2295
      return 0;
491,165✔
2296
    }
2297

2298
    ret = tdbBtcMoveUpward(pBtc);
44,988✔
2299
    if (ret < 0) {
45,053✔
2300
      tdbError("tdb/btc-move-to-next: btc move upward failed with ret: %d.", ret);
3!
2301
      return ret;
×
2302
    }
2303
    pBtc->idx++;
45,050✔
2304

2305
    if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) {
45,050!
2306
      tdbError("tdb/btree-decode-cell: should not be a leaf page here.");
×
2307
      return TSDB_CODE_FAILED;
×
2308
    }
2309
    if (pBtc->idx <= TDB_PAGE_TOTAL_CELLS(pBtc->pPage)) {
45,050✔
2310
      break;
32,615✔
2311
    }
2312
  }
2313

2314
  // move downward
2315
  for (;;) {
2316
    if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) break;
72,600✔
2317

2318
    ret = tdbBtcMoveDownward(pBtc);
36,306✔
2319
    if (ret < 0) {
36,310!
2320
      tdbError("tdb/btc-move-tonext: btc move downward failed with ret: %d.", ret);
×
2321
      return ret;
×
2322
    }
2323

2324
    pBtc->idx = 0;
39,985✔
2325
  }
2326

2327
  return 0;
36,294✔
2328
}
2329

2330
int tdbBtcMoveToPrev(SBTC *pBtc) {
66,217✔
2331
  int32_t ret = 0;
66,217✔
2332
  if (pBtc->idx < 0) return TSDB_CODE_FAILED;
66,217!
2333

2334
  pBtc->idx--;
66,217✔
2335
  if (pBtc->idx >= 0) {
66,217✔
2336
    return 0;
65,109✔
2337
  }
2338

2339
  // move upward
2340
  for (;;) {
2341
    if (pBtc->iPage == 0) {
1,272✔
2342
      pBtc->idx = -1;
867✔
2343
      return 0;
867✔
2344
    }
2345

2346
    ret = tdbBtcMoveUpward(pBtc);
405✔
2347
    if (ret < 0) {
405!
2348
      tdbError("tdb/btc-move-to-prev: btc move upward failed with ret: %d.", ret);
×
2349
      return ret;
×
2350
    }
2351
    pBtc->idx--;
523✔
2352
    if (pBtc->idx >= 0) {
523✔
2353
      break;
359✔
2354
    }
2355
  }
2356

2357
  // move downward
2358
  for (;;) {
2359
    if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) break;
718✔
2360

2361
    ret = tdbBtcMoveDownward(pBtc);
359✔
2362
    if (ret < 0) {
359!
2363
      tdbError("tdb/btc-move-to-prev: btc move downward failed with ret: %d.", ret);
×
2364
      return ret;
×
2365
    }
2366
    if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) {
359!
2367
      pBtc->idx = TDB_PAGE_TOTAL_CELLS(pBtc->pPage) - 1;
359✔
2368
    } else {
2369
      pBtc->idx = TDB_PAGE_TOTAL_CELLS(pBtc->pPage);
×
2370
    }
2371
  }
2372

2373
  return 0;
359✔
2374
}
2375

2376
static int tdbBtcMoveDownward(SBTC *pBtc) {
6,218,627✔
2377
  int    ret;
2378
  SPgno  pgno;
2379
  SCell *pCell;
2380

2381
  if (pBtc->idx < 0) {
6,218,627!
2382
    tdbError("tdb/btc-move-downward: invalid idx: %d.", pBtc->idx);
×
2383
    return TSDB_CODE_FAILED;
×
2384
  }
2385

2386
  if (TDB_BTREE_PAGE_IS_LEAF(pBtc->pPage)) {
6,218,627!
2387
    tdbError("tdb/btc-move-downward: should not be a leaf page here.");
×
2388
    return TSDB_CODE_FAILED;
×
2389
  }
2390

2391
  if (TDB_BTREE_PAGE_IS_OVFL(pBtc->pPage)) {
6,218,627!
2392
    tdbError("tdb/btc-move-downward: should not be a ovfl page here.");
×
2393
    return TSDB_CODE_FAILED;
×
2394
  }
2395

2396
  if (pBtc->idx < TDB_PAGE_TOTAL_CELLS(pBtc->pPage)) {
6,218,627✔
2397
    pCell = tdbPageGetCell(pBtc->pPage, pBtc->idx);
4,199,243✔
2398
    pgno = ((SPgno *)pCell)[0];
4,198,489✔
2399
  } else {
2400
    pgno = ((SIntHdr *)pBtc->pPage->pData)->pgno;
2,018,733✔
2401
  }
2402

2403
  if (!pgno) {
6,217,222!
2404
    tdbError("tdb/btc-move-downward: invalid pgno.");
×
2405
    return TSDB_CODE_FAILED;
×
2406
  }
2407

2408
  pBtc->pgStack[pBtc->iPage] = pBtc->pPage;
6,217,222✔
2409
  pBtc->idxStack[pBtc->iPage] = pBtc->idx;
6,217,222✔
2410
  pBtc->iPage++;
6,217,222✔
2411
  pBtc->pPage = NULL;
6,217,222✔
2412
  pBtc->idx = -1;
6,217,222✔
2413

2414
  ret = tdbPagerFetchPage(pBtc->pBt->pPager, &pgno, &pBtc->pPage, tdbBtreeInitPage,
6,217,222✔
2415
                          &((SBtreeInitPageArg){.pBt = pBtc->pBt, .flags = 0}), pBtc->pTxn);
6,217,222✔
2416
  if (ret < 0) {
6,232,476!
2417
    tdbError("tdb/btc-move-downward: fetch page failed with ret: %d.", ret);
×
2418
    return TSDB_CODE_FAILED;
×
2419
  }
2420

2421
  return 0;
6,232,511✔
2422
}
2423

2424
static int tdbBtcMoveUpward(SBTC *pBtc) {
45,393✔
2425
  if (pBtc->iPage == 0) return TSDB_CODE_FAILED;
45,393!
2426

2427
  tdbPagerReturnPage(pBtc->pBt->pPager, pBtc->pPage, pBtc->pTxn);
45,393✔
2428

2429
  pBtc->iPage--;
45,458✔
2430
  pBtc->pPage = pBtc->pgStack[pBtc->iPage];
45,458✔
2431
  pBtc->idx = pBtc->idxStack[pBtc->iPage];
45,458✔
2432

2433
  return 0;
45,458✔
2434
}
2435

2436
int tdbBtcGet(SBTC *pBtc, const void **ppKey, int *kLen, const void **ppVal, int *vLen) {
55,615,829✔
2437
  SCell *pCell;
2438

2439
  if (pBtc->idx < 0 || pBtc->idx >= TDB_PAGE_TOTAL_CELLS(pBtc->pPage)) {
55,615,829✔
2440
    return TSDB_CODE_FAILED;
3,781✔
2441
  }
2442

2443
  pCell = tdbPageGetCell(pBtc->pPage, pBtc->idx);
55,580,979✔
2444
  int32_t ret = tdbBtreeDecodeCell(pBtc->pPage, pCell, &pBtc->coder, pBtc->pTxn, pBtc->pBt);
55,453,771✔
2445
  if (ret < 0) {
55,550,228!
2446
    tdbError("tdb/btc-get: decode cell failed with ret: %d.", ret);
×
2447
    return ret;
×
2448
  }
2449

2450
  if (ppKey) {
55,550,228✔
2451
    *ppKey = (void *)pBtc->coder.pKey;
55,541,073✔
2452
    *kLen = pBtc->coder.kLen;
55,541,073✔
2453
  }
2454

2455
  if (ppVal) {
55,550,228✔
2456
    *ppVal = (void *)pBtc->coder.pVal;
107,940✔
2457
    *vLen = pBtc->coder.vLen;
107,940✔
2458
  }
2459

2460
  return 0;
55,550,228✔
2461
}
2462

2463
int tdbBtcDelete(SBTC *pBtc) {
151,947✔
2464
  int         idx = pBtc->idx;
151,947✔
2465
  int         nCells = TDB_PAGE_TOTAL_CELLS(pBtc->pPage);
151,947✔
2466
  SPager     *pPager = pBtc->pBt->pPager;
151,940✔
2467
  const void *pKey;
2468
  i8          iPage;
2469
  SPage      *pPage;
2470
  SPgno       pgno;
2471
  SCell      *pCell;
2472
  int         szCell;
2473
  int         nKey;
2474
  int         ret;
2475

2476
  if (idx < 0 || idx >= nCells) {
151,940!
2477
    tdbError("tdb/btc-delete: idx: %d out of range[%d, %d).", idx, 0, nCells);
×
2478
    return TSDB_CODE_FAILED;
×
2479
  }
2480

2481
  // drop the cell on the leaf
2482
  ret = tdbPagerWrite(pPager, pBtc->pPage);
151,941✔
2483
  if (ret < 0) {
151,937!
2484
    tdbError("failed to write page since %s", terrstr());
×
2485
    return ret;
×
2486
  }
2487

2488
  ret = tdbPageDropCell(pBtc->pPage, idx, pBtc->pTxn, pBtc->pBt);
151,937✔
2489
  if (ret < 0) {
151,920!
2490
    tdbError("tdb/btc-delete: page drop cell failed with ret: %d.", ret);
×
2491
  }
2492

2493
  // update interior page or do balance
2494
  if (idx == nCells - 1) {
151,883✔
2495
    if (idx) {
43,824✔
2496
      pBtc->idx--;
15,454✔
2497
      ret = tdbBtcGet(pBtc, &pKey, &nKey, NULL, NULL);
15,454✔
2498
      if (ret) {
15,455!
2499
        tdbError("tdb/btc-delete: btc get failed with ret: %d.", ret);
×
2500
        return ret;
×
2501
      }
2502

2503
      // loop to update the interial page
2504
      pgno = TDB_PAGE_PGNO(pBtc->pPage);
15,455✔
2505
      for (iPage = pBtc->iPage - 1; iPage >= 0; iPage--) {
17,243✔
2506
        pPage = pBtc->pgStack[iPage];
2,091✔
2507
        idx = pBtc->idxStack[iPage];
2,091✔
2508
        nCells = TDB_PAGE_TOTAL_CELLS(pPage);
2,091✔
2509

2510
        if (idx < nCells) {
2,091✔
2511
          ret = tdbPagerWrite(pPager, pPage);
303✔
2512
          if (ret < 0) {
303!
2513
            tdbError("failed to write page since %s", terrstr());
×
2514
            return ret;
×
2515
          }
2516

2517
          // update the cell with new key
2518
          if ((pCell = tdbOsMalloc(nKey + 9)) == NULL) {
303!
2519
            tdbError("tdb/btc-delete: malloc failed.");
×
2520
            return terrno;
×
2521
          }
2522
          ret = tdbBtreeEncodeCell(pPage, pKey, nKey, &pgno, sizeof(pgno), pCell, &szCell, pBtc->pTxn, pBtc->pBt);
303✔
2523
          if (ret < 0) {
303!
2524
            tdbError("tdb/btc-delete: btree encode cell failed with ret: %d.", ret);
×
2525
          }
2526

2527
          ret = tdbPageUpdateCell(pPage, idx, pCell, szCell, pBtc->pTxn, pBtc->pBt);
303✔
2528
          if (ret < 0) {
303!
2529
            tdbOsFree(pCell);
×
2530
            tdbError("tdb/btc-delete: page update cell failed with ret: %d.", ret);
×
2531
            return ret;
×
2532
          }
2533
          tdbOsFree(pCell);
303!
2534

2535
          if (pPage->nOverflow > 0) {
303!
2536
            tdbDebug("tdb/btc-delete: btree balance after update cell, pPage/nOverflow/pgno: %p/%d/%" PRIu32 ".", pPage,
×
2537
                     pPage->nOverflow, TDB_PAGE_PGNO(pPage));
2538

2539
            tdbPagerReturnPage(pBtc->pBt->pPager, pBtc->pPage, pBtc->pTxn);
×
2540
            while (--pBtc->iPage != iPage) {
×
2541
              tdbPagerReturnPage(pBtc->pBt->pPager, pBtc->pgStack[pBtc->iPage], pBtc->pTxn);
×
2542
            }
2543

2544
            // pBtc->iPage = iPage;
2545
            pBtc->pPage = pPage;
×
2546
            ret = tdbBtreeBalance(pBtc);
×
2547
            if (ret < 0) {
×
2548
              tdbError("tdb/btc-delete: btree balance failed with ret: %d.", ret);
×
2549
              return ret;
×
2550
            }
2551
          }
2552

2553
          break;
303✔
2554
        } else {
2555
          pgno = TDB_PAGE_PGNO(pPage);
1,788✔
2556
        }
2557
      }
2558
    } else {
2559
      // delete the leaf page and do balance
2560
      if (TDB_PAGE_TOTAL_CELLS(pBtc->pPage) != 0) {
28,370!
2561
        tdbError("tdb/btc-delete: page to be deleted should be empty.");
×
2562
        return TSDB_CODE_FAILED;
×
2563
      }
2564

2565
      // printf("tdb/btc-delete: btree balance delete pgno: %d.\n", TDB_PAGE_PGNO(pBtc->pPage));
2566

2567
      ret = tdbBtreeBalance(pBtc);
28,372✔
2568
      if (ret < 0) {
28,375!
2569
        tdbError("tdb/btc-delete: btree balance failed with ret: %d.", ret);
×
2570
        return ret;
×
2571
      }
2572
    }
2573
  }
2574

2575
  return 0;
151,889✔
2576
}
2577

2578
int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int nData, int insert) {
1,509,008✔
2579
  SCell *pCell;
2580
  int    szCell;
2581
  int    nCells = TDB_PAGE_TOTAL_CELLS(pBtc->pPage);
1,509,008✔
2582
  int    szBuf;
2583
  void  *pBuf;
2584
  int    ret;
2585

2586
  if (pBtc->idx < 0) {
1,509,006!
2587
    tdbError("tdb/btc-upsert: invalid idx: %d.", pBtc->idx);
×
2588
    return TSDB_CODE_FAILED;
×
2589
  }
2590

2591
  // alloc space
2592
  szBuf = kLen + nData + 14;
1,509,006✔
2593
  pBuf = tdbRealloc(pBtc->pBt->pBuf, pBtc->pBt->pageSize > szBuf ? szBuf : pBtc->pBt->pageSize);
1,509,006✔
2594
  if (pBuf == NULL) {
1,509,106!
2595
    tdbError("tdb/btc-upsert: realloc pBuf failed.");
×
2596
    return terrno;
×
2597
  }
2598
  pBtc->pBt->pBuf = pBuf;
1,509,106✔
2599
  pCell = (SCell *)pBtc->pBt->pBuf;
1,509,106✔
2600

2601
  // encode cell
2602
  ret = tdbBtreeEncodeCell(pBtc->pPage, pKey, kLen, pData, nData, pCell, &szCell, pBtc->pTxn, pBtc->pBt);
1,509,106✔
2603
  if (ret < 0) {
1,509,266!
2604
    tdbError("tdb/btc-upsert: btree encode cell failed with ret: %d.", ret);
×
2605
    return ret;
×
2606
  }
2607

2608
  // mark dirty
2609
  ret = tdbPagerWrite(pBtc->pBt->pPager, pBtc->pPage);
1,509,266✔
2610
  if (ret < 0) {
1,509,264!
2611
    tdbError("failed to write page since %s", terrstr());
×
2612
    return ret;
×
2613
  }
2614

2615
  // insert or update
2616
  if (insert) {
1,509,277!
2617
    if (pBtc->idx > nCells) {
1,509,277!
2618
      tdbError("tdb/btc-upsert: invalid idx: %d, nCells: %d.", pBtc->idx, nCells);
×
2619
      return TSDB_CODE_FAILED;
×
2620
    }
2621

2622
    ret = tdbPageInsertCell(pBtc->pPage, pBtc->idx, pCell, szCell, 0);
1,509,277✔
2623
  } else {
2624
    if (pBtc->idx >= nCells) {
×
2625
      tdbError("tdb/btc-upsert: invalid idx: %d, nCells: %d.", pBtc->idx, nCells);
×
2626
      return TSDB_CODE_FAILED;
×
2627
    }
2628

2629
    ret = tdbPageUpdateCell(pBtc->pPage, pBtc->idx, pCell, szCell, pBtc->pTxn, pBtc->pBt);
×
2630
  }
2631
  if (ret < 0) {
1,508,087!
2632
    tdbError("tdb/btc-upsert: page insert/update cell failed with ret: %d.", ret);
×
2633
    return ret;
×
2634
  }
2635

2636
  // check balance
2637
  if (pBtc->pPage->nOverflow > 0) {
1,508,087✔
2638
    ret = tdbBtreeBalance(pBtc);
187,256✔
2639
    if (ret < 0) {
187,290!
2640
      tdbError("tdb/btc-upsert: btree balance failed with ret: %d.", ret);
×
2641
      return ret;
×
2642
    }
2643
  }
2644
  
2645
  return 0;
1,508,205✔
2646
}
2647

2648
int tdbBtcMoveTo(SBTC *pBtc, const void *pKey, int kLen, int *pCRst) {
13,169,360✔
2649
  int         ret;
2650
  int         nCells;
2651
  int         c;
2652
  SCell      *pCell;
2653
  SBTree     *pBt = pBtc->pBt;
13,169,360✔
2654
  SPager     *pPager = pBt->pPager;
13,169,360✔
2655
  const void *pTKey;
2656
  int         tkLen;
2657

2658
  tdbTrace("tdb moveto, pager:%p, ipage:%d", pPager, pBtc->iPage);
13,169,360✔
2659
  if (pBtc->iPage < 0) {
13,172,065!
2660
    // move from a clear cursor
2661
    ret = tdbPagerFetchPage(pPager, &pBt->root, &(pBtc->pPage), tdbBtreeInitPage,
13,172,065✔
2662
                            &((SBtreeInitPageArg){.pBt = pBt, .flags = TDB_BTREE_ROOT | TDB_BTREE_LEAF}), pBtc->pTxn);
13,172,065✔
2663
    if (ret < 0) {
13,174,746!
2664
      // TODO
2665
      tdbError("tdb/btc-move-to: fetch page failed with ret: %d.", ret);
×
2666
      return ret;
333,885✔
2667
    }
2668

2669
    pBtc->iPage = 0;
13,174,746✔
2670
    pBtc->idx = -1;
13,174,746✔
2671
    // for empty tree, just return with an invalid position
2672
    if (TDB_PAGE_TOTAL_CELLS(pBtc->pPage) == 0) return 0;
13,174,746✔
2673
  } else {
2674
    // TODO
2675
    tdbError("tdb/btc-move-to: move from a dirty cursor.");
×
2676
    return TSDB_CODE_FAILED;
×
2677
#if 0
2678
    SPage *pPage;
2679
    int    idx;
2680
    int    iPage = 0;
2681

2682
    // downward search
2683
    for (; iPage < pBtc->iPage; iPage++) {
2684
      pPage = pBtc->pgStack[iPage];
2685
      idx = pBtc->idxStack[iPage];
2686
      nCells = TDB_PAGE_TOTAL_CELLS(pPage);
2687

2688
      if (TDB_BTREE_PAGE_IS_LEAF(pPage)) {
2689
        tdbError("tdb/btc-move-to: leaf page in cursor stack.");
2690
        return -1;
2691
      }
2692

2693
      // check if key <= current position
2694
      if (idx < nCells) {
2695
        pCell = tdbPageGetCell(pPage, idx);
2696
        tdbBtreeDecodeCell(pPage, pCell, &cd);
2697
        c = pBt->kcmpr(pKey, kLen, cd.pKey, cd.kLen);
2698
        if (c > 0) break;
2699
      }
2700

2701
      // check if key > current - 1 position
2702
      if (idx > 0) {
2703
        pCell = tdbPageGetCell(pPage, idx - 1);
2704
        tdbBtreeDecodeCell(pPage, pCell, &cd);
2705
        c = pBt->kcmpr(pKey, kLen, cd.pKey, cd.kLen, pBtc->pTxn, pBtc->pBt);
2706
        if (c <= 0) break;
2707
      }
2708
    }
2709

2710
    // move upward
2711
    for (;;) {
2712
      if (pBtc->iPage == iPage) break;
2713
      tdbBtcMoveUpward(pBtc);
2714
    }
2715
#endif
2716
  }
2717

2718
  // search downward to the leaf
2719
  tdbTrace("tdb search downward, pager:%p, ipage:%d", pPager, pBtc->iPage);
12,837,493✔
2720
  for (;;) {
6,193,096✔
2721
    int    lidx, ridx;
2722
    SPage *pPage;
2723

2724
    pPage = pBtc->pPage;
19,030,589✔
2725
    nCells = TDB_PAGE_TOTAL_CELLS(pPage);
19,030,589✔
2726
    lidx = 0;
19,021,808✔
2727
    ridx = nCells - 1;
19,021,808✔
2728

2729
    if (nCells <= 0) {
19,021,808!
2730
      tdbError("tdb/btc-move-to: empty page.");
×
2731
      return TSDB_CODE_FAILED;
×
2732
    }
2733

2734
    // compare first cell
2735
    pBtc->idx = lidx;
19,021,808✔
2736
    ret = tdbBtcGet(pBtc, &pTKey, &tkLen, NULL, NULL);
19,021,808✔
2737
    if (ret < 0) {
18,975,207!
2738
      tdbError("tdb/btc-move-to: btc get failed with ret: %d.", ret);
×
2739
      return ret;
×
2740
    }
2741
    c = pBt->kcmpr(pKey, kLen, pTKey, tkLen);
18,975,207✔
2742
    if (c <= 0) {
18,969,323✔
2743
      ridx = lidx - 1;
6,962,737✔
2744
    } else {
2745
      lidx = lidx + 1;
12,006,586✔
2746
    }
2747
    // compare last cell
2748
    if (lidx <= ridx) {
18,969,323✔
2749
      pBtc->idx = ridx;
11,436,221✔
2750
      ret = tdbBtcGet(pBtc, &pTKey, &tkLen, NULL, NULL);
11,436,221✔
2751
      if (ret < 0) {
11,432,692!
2752
        tdbError("tdb/btc-move-to: btc get failed with ret: %d.", ret);
×
2753
        return ret;
×
2754
      }
2755
      c = pBt->kcmpr(pKey, kLen, pTKey, tkLen);
11,432,692✔
2756
      if (c >= 0) {
11,431,138✔
2757
        lidx = ridx + 1;
4,233,295✔
2758
      } else {
2759
        ridx = ridx - 1;
7,197,843✔
2760
      }
2761
    }
2762

2763
    // binary search
2764
    tdbTrace("tdb binary search, pager:%p, ipage:%d", pPager, pBtc->iPage);
18,964,240✔
2765
    for (;;) {
2766
      if (lidx > ridx) break;
39,757,416✔
2767

2768
      pBtc->idx = (lidx + ridx) >> 1;
25,178,206✔
2769
      ret = tdbBtcGet(pBtc, &pTKey, &tkLen, NULL, NULL);
25,178,206✔
2770
      if (ret < 0) {
25,178,064!
2771
        tdbError("tdb/btc-move-to: btc get failed with ret: %d.", ret);
×
2772
        return ret;
×
2773
      }
2774
      c = pBt->kcmpr(pKey, kLen, pTKey, tkLen);
25,178,064✔
2775
      if (c < 0) {
25,177,663✔
2776
        // pKey < cd.pKey
2777
        ridx = pBtc->idx - 1;
9,015,741✔
2778
      } else if (c > 0) {
16,161,922✔
2779
        // pKey > cd.pKey
2780
        lidx = pBtc->idx + 1;
11,822,163✔
2781
      } else {
2782
        // pKey == cd.pKey
2783
        break;
4,339,759✔
2784
      }
2785
    }
2786

2787
    // keep search downward or break
2788
    if (TDB_BTREE_PAGE_IS_LEAF(pPage)) {
18,918,969✔
2789
      *pCRst = c;
12,828,375✔
2790
      break;
12,828,375✔
2791
    } else {
2792
      if (c > 0) {
6,090,594✔
2793
        pBtc->idx += 1;
2,511,994✔
2794
      }
2795
      if (tdbBtcMoveDownward(pBtc) < 0) {
6,090,594✔
2796
        tdbError("tdb/btc-move-to: btc move downward failed.");
279!
2797
        return TSDB_CODE_FAILED;
×
2798
      }
2799
    }
2800
  }
2801

2802
  tdbTrace("tdb moveto end, pager:%p, ipage:%d", pPager, pBtc->iPage);
12,828,375✔
2803

2804
  return 0;
12,827,917✔
2805
}
2806

2807
void tdbBtcClose(SBTC *pBtc) {
13,226,758✔
2808
  if (pBtc->iPage < 0) {
13,226,758!
2809
    if (pBtc->freeTxn) {
×
2810
      tdbTxnClose(pBtc->pTxn);
×
2811
    }
2812
    return;
×
2813
  }
2814

2815
  for (;;) {
2816
    if (NULL == pBtc->pPage) {
19,214,669!
2817
      tdbError("tdb/btc-close: null ptr pPage.");
×
2818
      return;
×
2819
    }
2820

2821
    tdbPagerReturnPage(pBtc->pBt->pPager, pBtc->pPage, pBtc->pTxn);
19,214,669✔
2822

2823
    pBtc->iPage--;
19,240,574✔
2824
    if (pBtc->iPage < 0) break;
19,240,574✔
2825

2826
    pBtc->pPage = pBtc->pgStack[pBtc->iPage];
5,987,911✔
2827
    pBtc->idx = pBtc->idxStack[pBtc->iPage];
5,987,911✔
2828
  }
2829

2830
  if (TDB_CELLDECODER_FREE_KEY(&pBtc->coder)) {
13,252,663✔
2831
    tdbFree(pBtc->coder.pKey);
37✔
2832
  }
2833

2834
  if (TDB_CELLDECODER_FREE_VAL(&pBtc->coder)) {
13,252,663✔
2835
    tdbDebug("tdb btc/close decoder: %p pVal free: %p", &pBtc->coder, pBtc->coder.pVal);
156,392✔
2836

2837
    tdbFree(pBtc->coder.pVal);
156,392✔
2838
  }
2839

2840
  if (pBtc->freeTxn) {
13,247,977✔
2841
    tdbTxnClose(pBtc->pTxn);
11,493,417✔
2842
  }
2843

2844
  return;
13,242,588✔
2845
}
2846

2847
int tdbBtcIsValid(SBTC *pBtc) {
×
2848
  if (pBtc->idx < 0) {
×
2849
    return 0;
×
2850
  } else {
2851
    return 1;
×
2852
  }
2853
}
2854
// TDB_BTREE_CURSOR
2855

2856
// TDB_BTREE_DEBUG =====================
2857
#ifndef NODEBUG
2858
typedef struct {
2859
  SPgno pgno;
2860
  u8    root;
2861
  u8    leaf;
2862
  SPgno rChild;
2863
  int   nCells;
2864
  int   nOvfl;
2865
} SBtPageInfo;
2866

2867
SBtPageInfo btPageInfos[20];
2868

2869
void tdbBtPageInfo(SPage *pPage, int idx) {
×
2870
  SBtPageInfo *pBtPageInfo;
2871

2872
  pBtPageInfo = btPageInfos + idx;
×
2873

2874
  pBtPageInfo->pgno = TDB_PAGE_PGNO(pPage);
×
2875

2876
  pBtPageInfo->root = TDB_BTREE_PAGE_IS_ROOT(pPage);
×
2877
  pBtPageInfo->leaf = TDB_BTREE_PAGE_IS_LEAF(pPage);
×
2878

2879
  pBtPageInfo->rChild = 0;
×
2880
  if (!pBtPageInfo->leaf) {
×
2881
    pBtPageInfo->rChild = *(SPgno *)(pPage->pData + 1);
×
2882
  }
2883

2884
  pBtPageInfo->nCells = TDB_PAGE_TOTAL_CELLS(pPage) - pPage->nOverflow;
×
2885
  pBtPageInfo->nOvfl = pPage->nOverflow;
×
2886
}
×
2887
#endif
2888
// TDB_BTREE_DEBUG
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc