• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

taosdata / TDengine / #3535

23 Nov 2024 02:07AM UTC coverage: 60.85% (+0.03%) from 60.825%
#3535

push

travis-ci

web-flow
Merge pull request #28893 from taosdata/doc/internal

refact: rename taos lib name

120252 of 252737 branches covered (47.58%)

Branch coverage included in aggregate %.

201187 of 275508 relevant lines covered (73.02%)

15886166.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.15
/source/util/src/tbloomfilter.c
1
/*
2
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
3
 *
4
 * This program is free software: you can use, redistribute, and/or modify
5
 * it under the terms of the GNU Affero General Public License, version 3
6
 * or later ("AGPL"), as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.
11
 *
12
 * You should have received a copy of the GNU Affero General Public License
13
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14
 */
15

16
#define _DEFAULT_SOURCE
17

18
#include "tbloomfilter.h"
19
#include "taos.h"
20
#include "taoserror.h"
21

22
#define UNIT_NUM_BITS      64ULL
23
#define UNIT_ADDR_NUM_BITS 6ULL
24

25
static FORCE_INLINE bool setBit(uint64_t* buf, uint64_t index) {
26
  uint64_t unitIndex = index >> UNIT_ADDR_NUM_BITS;
115,812,070✔
27
  uint64_t old = buf[unitIndex];
115,812,070✔
28
  buf[unitIndex] |= (1ULL << (index % UNIT_NUM_BITS));
115,812,070✔
29
  return buf[unitIndex] != old;
115,812,070✔
30
}
31

32
static FORCE_INLINE bool getBit(uint64_t* buf, uint64_t index) {
33
  uint64_t unitIndex = index >> UNIT_ADDR_NUM_BITS;
25,485,343✔
34
  uint64_t mask = 1ULL << (index % UNIT_NUM_BITS);
25,485,343✔
35
  return buf[unitIndex] & mask;
25,485,343✔
36
}
37

38
int32_t tBloomFilterInit(uint64_t expectedEntries, double errorRate, SBloomFilter** ppBF) {
127,069,547✔
39
  int32_t code = 0;
127,069,547✔
40
  int32_t lino = 0;
127,069,547✔
41
  if (expectedEntries < 1 || errorRate <= 0 || errorRate >= 1.0) {
127,069,547!
42
    code = TSDB_CODE_FAILED;
6,604✔
43
    QUERY_CHECK_CODE(code, lino, _error);
6,604!
44
  }
45
  SBloomFilter* pBF = taosMemoryCalloc(1, sizeof(SBloomFilter));
127,069,547✔
46
  if (pBF == NULL) {
127,541,011!
47
    code = terrno;
×
48
    QUERY_CHECK_CODE(code, lino, _error);
×
49
  }
50
  pBF->expectedEntries = expectedEntries;
127,541,011✔
51
  pBF->errorRate = errorRate;
127,541,011✔
52

53
  double lnRate = fabs(log(errorRate));
127,541,011✔
54
  // ln(2)^2 = 0.480453013918201
55
  // m = - n * ln(P) / ( ln(2) )^2
56
  // m is the size of bloom filter, n is expected entries, P is false positive probability
57
  pBF->numUnits = (uint64_t)ceil(expectedEntries * lnRate / 0.480453013918201 / UNIT_NUM_BITS);
127,541,011✔
58
  pBF->numBits = pBF->numUnits * 64;
127,541,011✔
59
  pBF->size = 0;
127,541,011✔
60

61
  // ln(2) = 0.693147180559945
62
  pBF->hashFunctions = (uint32_t)ceil(lnRate / 0.693147180559945);
127,541,011✔
63
  pBF->hashFn1 = HASH_FUNCTION_1;
127,541,011✔
64
  pBF->hashFn2 = HASH_FUNCTION_2;
127,541,011✔
65
  pBF->buffer = taosMemoryCalloc(pBF->numUnits, sizeof(uint64_t));
127,541,011✔
66
  if (pBF->buffer == NULL) {
122,710,733!
67
    tBloomFilterDestroy(pBF);
×
68
    code = terrno;
×
69
    QUERY_CHECK_CODE(code, lino, _error);
342,273!
70
  }
71
  (*ppBF) = pBF;
123,053,006✔
72

73
_error:
123,053,006✔
74
  if (code != TSDB_CODE_SUCCESS) {
123,053,006!
75
    uError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
×
76
  }
77
  return code;
122,862,994✔
78
}
79

80
int32_t tBloomFilterPutHash(SBloomFilter* pBF, uint64_t hash1, uint64_t hash2) {
10,562,450✔
81
  if (tBloomFilterIsFull(pBF)) {
10,562,450!
82
    uError("%s failed at line %d since %s", __func__, __LINE__, tstrerror(TSDB_CODE_INVALID_PARA));
×
83
    return TSDB_CODE_FAILED;
×
84
  }
85
  bool                    hasChange = false;
10,529,351✔
86
  const register uint64_t size = pBF->numBits;
10,529,351✔
87
  uint64_t                cbHash = hash1;
10,529,351✔
88
  for (uint32_t i = 0; i < pBF->hashFunctions; ++i) {
101,847,105✔
89
    hasChange |= setBit(pBF->buffer, cbHash % size);
91,317,754✔
90
    cbHash += hash2;
91,317,754✔
91
  }
92
  if (hasChange) {
10,529,351!
93
    pBF->size++;
10,561,543✔
94
    return TSDB_CODE_SUCCESS;
10,561,543✔
95
  }
96
  return TSDB_CODE_FAILED;
×
97
}
98

99
int32_t tBloomFilterPut(SBloomFilter* pBF, const void* keyBuf, uint32_t len) {
2,463,702✔
100
  uint64_t                h1 = (uint64_t)pBF->hashFn1(keyBuf, len);
2,463,702✔
101
  uint64_t                h2 = (uint64_t)pBF->hashFn2(keyBuf, len);
2,463,894✔
102
  bool                    hasChange = false;
2,463,852✔
103
  const register uint64_t size = pBF->numBits;
2,463,852✔
104
  uint64_t                cbHash = h1;
2,463,852✔
105
  for (uint32_t i = 0; i < pBF->hashFunctions; ++i) {
26,958,168✔
106
    hasChange |= setBit(pBF->buffer, cbHash % size);
24,494,316✔
107
    cbHash += h2;
24,494,316✔
108
  }
109
  if (hasChange) {
2,463,852✔
110
    pBF->size++;
2,442,306✔
111
    return TSDB_CODE_SUCCESS;
2,442,306✔
112
  }
113
  return TSDB_CODE_FAILED;
21,546✔
114
}
115

116
int32_t tBloomFilterNoContain(const SBloomFilter* pBF, uint64_t hash1, uint64_t hash2) {
8,195,572✔
117
  const register uint64_t size = pBF->numBits;
8,195,572✔
118
  uint64_t                cbHash = hash1;
8,195,572✔
119
  for (uint32_t i = 0; i < pBF->hashFunctions; ++i) {
25,692,837✔
120
    if (!getBit(pBF->buffer, cbHash % size)) {
50,970,686✔
121
      return TSDB_CODE_SUCCESS;
7,988,078✔
122
    }
123
    cbHash += hash2;
17,497,265✔
124
  }
125
  return TSDB_CODE_FAILED;
207,494✔
126
}
127

128
void tBloomFilterDestroy(SBloomFilter* pBF) {
127,950,738✔
129
  if (pBF == NULL) {
127,950,738!
130
    return;
×
131
  }
132
  taosMemoryFree(pBF->buffer);
127,950,738✔
133
  taosMemoryFree(pBF);
128,861,702✔
134
}
135

136
int32_t tBloomFilterEncode(const SBloomFilter* pBF, SEncoder* pEncoder) {
50,952✔
137
  TAOS_CHECK_RETURN(tEncodeU32(pEncoder, pBF->hashFunctions));
101,904!
138
  TAOS_CHECK_RETURN(tEncodeU64(pEncoder, pBF->expectedEntries));
101,904!
139
  TAOS_CHECK_RETURN(tEncodeU64(pEncoder, pBF->numUnits));
101,904!
140
  TAOS_CHECK_RETURN(tEncodeU64(pEncoder, pBF->numBits));
101,904!
141
  TAOS_CHECK_RETURN(tEncodeU64(pEncoder, pBF->size));
101,904!
142
  for (uint64_t i = 0; i < pBF->numUnits; i++) {
79,079,416✔
143
    uint64_t* pUnits = (uint64_t*)pBF->buffer;
79,028,464✔
144
    TAOS_CHECK_RETURN(tEncodeU64(pEncoder, pUnits[i]));
158,056,928!
145
  }
146
  TAOS_CHECK_RETURN(tEncodeDouble(pEncoder, pBF->errorRate));
101,904!
147
  return 0;
50,952✔
148
}
149

150
int32_t tBloomFilterDecode(SDecoder* pDecoder, SBloomFilter** ppBF) {
25,454✔
151
  int32_t       code = 0;
25,454✔
152
  int32_t       lino = 0;
25,454✔
153
  SBloomFilter* pBF = taosMemoryCalloc(1, sizeof(SBloomFilter));
25,454✔
154
  if (!pBF) {
25,454!
155
    code = terrno;
×
156
    QUERY_CHECK_CODE(code, lino, _error);
×
157
  }
158
  pBF->buffer = NULL;
25,454✔
159
  if (tDecodeU32(pDecoder, &pBF->hashFunctions) < 0) {
50,908!
160
    code = TSDB_CODE_FAILED;
×
161
    QUERY_CHECK_CODE(code, lino, _error);
×
162
  }
163
  if (tDecodeU64(pDecoder, &pBF->expectedEntries) < 0) {
50,907!
164
    code = TSDB_CODE_FAILED;
×
165
    QUERY_CHECK_CODE(code, lino, _error);
×
166
  }
167
  if (tDecodeU64(pDecoder, &pBF->numUnits) < 0) {
50,906!
168
    code = TSDB_CODE_FAILED;
×
169
    QUERY_CHECK_CODE(code, lino, _error);
×
170
  }
171
  if (tDecodeU64(pDecoder, &pBF->numBits) < 0) {
50,906!
172
    code = TSDB_CODE_FAILED;
×
173
    QUERY_CHECK_CODE(code, lino, _error);
×
174
  }
175
  if (tDecodeU64(pDecoder, &pBF->size) < 0) {
50,906!
176
    code = TSDB_CODE_FAILED;
×
177
    QUERY_CHECK_CODE(code, lino, _error);
×
178
  }
179
  pBF->buffer = taosMemoryCalloc(pBF->numUnits, sizeof(uint64_t));
25,453✔
180
  QUERY_CHECK_NULL(pBF->buffer, code, lino, _error, terrno);
25,442!
181

182
  for (int32_t i = 0; i < pBF->numUnits; i++) {
41,408,411✔
183
    uint64_t* pUnits = (uint64_t*)pBF->buffer;
41,408,353✔
184
    if (tDecodeU64(pDecoder, pUnits + i) < 0) {
82,791,321!
185
      code = TSDB_CODE_FAILED;
×
186
      QUERY_CHECK_CODE(code, lino, _error);
×
187
    }
188
  }
189
  if (tDecodeDouble(pDecoder, &pBF->errorRate) < 0) {
25,511!
190
    code = TSDB_CODE_FAILED;
×
191
    QUERY_CHECK_CODE(code, lino, _error);
×
192
  }
193
  pBF->hashFn1 = HASH_FUNCTION_1;
25,453✔
194
  pBF->hashFn2 = HASH_FUNCTION_2;
25,453✔
195
  (*ppBF) = pBF;
25,453✔
196
  return TSDB_CODE_SUCCESS;
25,453✔
197

198
_error:
×
199
  tBloomFilterDestroy(pBF);
×
200
  uError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
×
201
  return code;
×
202
}
203

204
bool tBloomFilterIsFull(const SBloomFilter* pBF) { return pBF->size >= pBF->expectedEntries; }
23,429,638✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc