• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

stacks-network / stacks-core / 25903914664-1

15 May 2026 06:28AM UTC coverage: 47.122% (-38.8%) from 85.959%
25903914664-1

Pull #7199

github

94e391
web-flow
Merge 109f2828c into 1c7b8e6ac
Pull Request #7199: Feat: L1 and L2 early unlocks, updating signer

103343 of 219309 relevant lines covered (47.12%)

12880462.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

26.33
/stackslib/src/util_lib/bloom.rs
1
// Copyright (C) 2013-2020 Blockstack PBC, a public benefit corporation
2
// Copyright (C) 2020-2021 Stacks Open Internet Foundation
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16

17
use std::cmp;
18
use std::hash::Hasher;
19
use std::io::{Read, Seek, SeekFrom, Write};
20

21
use rand::prelude::*;
22
use rand::thread_rng;
23
use rusqlite::blob::Blob;
24
use rusqlite::{params, Error as sqlite_error};
25
use siphasher::sip::SipHasher; // this is SipHash-2-4
26
use stacks_common::codec::{
27
    read_next, write_next, Error as codec_error, StacksMessageCodec, MAX_MESSAGE_LEN,
28
};
29
use stacks_common::types::sqlite::NO_PARAMS;
30
use stacks_common::util::hash::{to_hex, Sha512Trunc256Sum};
31

32
use crate::util_lib::db::{query_expect_row, DBConn, DBTx, Error as db_error};
33

34
/// A field of bits of known length!
35
#[derive(Debug, Clone, PartialEq)]
36
struct BitField(Vec<u8>, u32);
37

38
impl BitField {
39
    /// Make a new bitfield with sz bits represented (rounded up to the nearest byte in space used)
40
    pub fn new(sz: u32) -> BitField {
×
41
        BitField(vec![0u8; BITVEC_LEN!(sz) as usize], sz)
×
42
    }
×
43

44
    pub fn num_bits(&self) -> u32 {
×
45
        self.1
×
46
    }
×
47

48
    #[allow(clippy::indexing_slicing)]
49
    pub fn test(&self, bit: u32) -> bool {
×
50
        if bit >= self.1 {
×
51
            panic!("Attempted to read beyond end of bitfield");
×
52
        }
×
53
        self.0[(bit / 8) as usize] & (1u8 << ((bit % 8) as u8)) != 0
×
54
    }
×
55

56
    #[allow(clippy::indexing_slicing)]
57
    pub fn set(&mut self, bit: u32) {
×
58
        if bit >= self.1 {
×
59
            panic!("Attempted to write beyond end of bitfield");
×
60
        }
×
61
        self.0[(bit / 8) as usize] |= 1u8 << ((bit % 8) as u8);
×
62
    }
×
63

64
    #[allow(clippy::indexing_slicing)]
65
    pub fn clear(&mut self, bit: u32) {
×
66
        if bit >= self.1 {
×
67
            panic!("Attempted to write beyond end of bitfield");
×
68
        }
×
69
        self.0[(bit / 8) as usize] &= !(1u8 << ((bit % 8) as u8));
×
70
    }
×
71
}
72

73
/// Codec enum for how a bloom filter bitfield's fields are encoded
74
#[repr(u8)]
75
#[derive(Debug, Clone, PartialEq)]
76
enum BitFieldEncoding {
77
    Sparse = 0x01,
78
    Full = 0x02,
79
}
80

81
fn should_use_sparse_encoding(non_zero_bytes: usize, total_bytes: usize) -> bool {
×
82
    non_zero_bytes * 5 + 4 < total_bytes
×
83
}
×
84

85
/// Encode the inner count array, using a sparse representation if it would save space
86
fn encode_bitfield<W: Write>(fd: &mut W, bytes: &[u8]) -> Result<(), codec_error> {
×
87
    let mut num_filled = 0;
×
88
    for bits in bytes.iter() {
×
89
        if *bits > 0 {
×
90
            num_filled += 1;
×
91
        }
×
92
    }
93

94
    if should_use_sparse_encoding(num_filled, bytes.len()) {
×
95
        // more efficient to encode as (4-byte-index, 1-byte-value) pairs, with an extra 4-byte header
96
        write_next(fd, &(BitFieldEncoding::Sparse as u8))?;
×
97
        write_next(fd, &(bytes.len() as u32))?;
×
98
        write_next(fd, &(num_filled as u32))?;
×
99
        for (i, bits) in bytes.iter().enumerate() {
×
100
            if *bits > 0 {
×
101
                write_next(fd, &(i as u32))?;
×
102
                write_next(fd, bits)?;
×
103
            }
×
104
        }
105
    } else {
106
        // more efficient to encode as-is
107
        // (note that the array has a 4-byte length prefix)
108
        write_next(fd, &(BitFieldEncoding::Full as u8))?;
×
109
        write_next(fd, &bytes.to_vec())?;
×
110
    }
111
    Ok(())
×
112
}
×
113

114
/// Decode the inner count array, depending on whether or not it's sparse
115
fn decode_bitfield<R: Read>(fd: &mut R) -> Result<Vec<u8>, codec_error> {
×
116
    let encoding: u8 = read_next(fd)?;
×
117
    match encoding {
×
118
        x if x == BitFieldEncoding::Sparse as u8 => {
×
119
            // sparse encoding
120
            let vec_len: u32 = read_next(fd)?;
×
121
            if vec_len > MAX_MESSAGE_LEN.saturating_sub(5) {
×
122
                return Err(codec_error::OverflowError("vec_len is too big".into()));
×
123
            }
×
124
            let num_filled: u32 = read_next(fd)?;
×
125

126
            if !should_use_sparse_encoding(num_filled as usize, vec_len as usize) {
×
127
                return Err(codec_error::OverflowError(
×
128
                    "Non-sparse bitfield should not use sparse encoding.".into(),
×
129
                ));
×
130
            }
×
131

132
            let mut ret = vec![0u8; vec_len as usize];
×
133
            for _ in 0..num_filled {
×
134
                let idx: u32 = read_next(fd)?;
×
135
                let slot = ret.get_mut(idx as usize).ok_or_else(|| {
×
136
                    codec_error::DeserializeError(format!("Index overflow: {idx} >= {vec_len}"))
×
137
                })?;
×
138
                let value: u8 = read_next(fd)?;
×
139
                *slot = value;
×
140
            }
141

142
            Ok(ret)
×
143
        }
144
        x if x == BitFieldEncoding::Full as u8 => {
×
145
            // full encoding
146
            let ret: Vec<u8> = read_next(fd)?;
×
147
            Ok(ret)
×
148
        }
149
        _ => Err(codec_error::DeserializeError(format!(
×
150
            "Unrecognized bloom count encoding: {}",
×
151
            encoding
×
152
        ))),
×
153
    }
154
}
×
155

156
impl StacksMessageCodec for BitField {
157
    fn consensus_serialize<W: Write>(&self, fd: &mut W) -> Result<(), codec_error> {
×
158
        write_next(fd, &self.1)?;
×
159
        // no need to write the array length prefix -- we already know it, per the above
160
        encode_bitfield(fd, &self.0)?;
×
161
        Ok(())
×
162
    }
×
163

164
    fn consensus_deserialize<R: Read>(fd: &mut R) -> Result<BitField, codec_error> {
×
165
        let num_bits: u32 = read_next(fd)?;
×
166
        let bits: Vec<u8> = decode_bitfield(fd)?;
×
167
        let expected_length = BITVEC_LEN!(num_bits) as usize;
×
168
        let actual_length = bits.len();
×
169
        if expected_length != actual_length {
×
170
            return Err(codec_error::DeserializeError(format!(
×
171
                "Incorrect data size for bitfield of length {num_bits}, expected {expected_length} but got {actual_length}."
×
172
            )));
×
173
        }
×
174
        Ok(BitField(bits, num_bits))
×
175
    }
×
176
}
177

178
/// A node-specific collection of Bloom function hashes.
179
/// Works by using a node-local salt to ensure that the hash functions used to insert data into the
180
/// bloom structure will be unique (w.h.p.) to this node.
181
#[derive(Debug, Clone, PartialEq)]
182
pub struct BloomNodeHasher {
183
    seed: [u8; 32],
184
}
185

186
impl std::fmt::Display for BloomNodeHasher {
187
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
×
188
        write!(f, "BloomNodeHasher({})", to_hex(&self.seed))
×
189
    }
×
190
}
191

192
impl BloomNodeHasher {
193
    pub fn new(node_seed: &[u8]) -> BloomNodeHasher {
2,730✔
194
        let seed = Sha512Trunc256Sum::from_data(node_seed).0;
2,730✔
195
        BloomNodeHasher { seed }
2,730✔
196
    }
2,730✔
197

198
    pub fn new_random() -> BloomNodeHasher {
2,730✔
199
        let mut seed = [0u8; 32];
2,730✔
200
        thread_rng().fill(&mut seed[..]);
2,730✔
201
        BloomNodeHasher::new(&seed)
2,730✔
202
    }
2,730✔
203
}
204

205
impl StacksMessageCodec for BloomNodeHasher {
206
    fn consensus_serialize<W: Write>(&self, fd: &mut W) -> Result<(), codec_error> {
2,730✔
207
        write_next(fd, &(BloomHashID::BloomNodeHasher as u8))?;
2,730✔
208
        write_next(fd, &self.seed)?;
2,730✔
209
        Ok(())
2,730✔
210
    }
2,730✔
211

212
    fn consensus_deserialize<R: Read>(fd: &mut R) -> Result<BloomNodeHasher, codec_error> {
1,977,348✔
213
        let hasher_type_u8: u8 = read_next(fd)?;
1,977,348✔
214
        match hasher_type_u8 as u8 {
1,977,348✔
215
            x if x == BloomHashID::BloomNodeHasher as u8 => {
1,977,348✔
216
                let seed: [u8; 32] = read_next(fd)?;
1,977,348✔
217
                Ok(BloomNodeHasher { seed })
1,977,348✔
218
            }
219
            _ => Err(codec_error::DeserializeError(format!(
×
220
                "Not a supported bloom hasher type ID: {}",
×
221
                hasher_type_u8
×
222
            ))),
×
223
        }
224
    }
1,977,348✔
225
}
226

227
/// A trait for picking a bin that will be set in a bloom struct
228
pub trait BloomHash {
229
    fn get_seed(&self) -> &[u8; 32];
230
    fn pick_bin(&self, count: u32, data: &[u8], num_bins: u32) -> u32;
231
}
232

233
/// Basic bloom filter with a given hash implementation that can suitably provide a given number of
234
/// distinct hash functions.
235
#[derive(Debug, Clone, PartialEq)]
236
pub struct BloomFilter<H: BloomHash> {
237
    hasher: H,
238
    bits: BitField,
239
    num_hashes: u32,
240
}
241

242
impl std::fmt::Display for BloomFilter<BloomNodeHasher> {
243
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
×
244
        write!(
×
245
            f,
×
246
            "BloomFilter({},nbits={},bits={})",
247
            &self.hasher,
×
248
            self.bits.1,
249
            Sha512Trunc256Sum::from_data(&self.bits.0)
×
250
        )
251
    }
×
252
}
253

254
/// Parameter calculation for bloom filters.
255
/// Returns (number of bins, number of hash functions)
256
fn bloom_hash_count(error_rate: f64, max_items: u32) -> (u32, u32) {
2,730✔
257
    // https://stackoverflow.com/questions/658439/how-many-hash-functions-does-my-bloom-filter-need
258
    let num_slots =
2,730✔
259
        (((-(max_items as f64)) * error_rate.ln()) / (2.0f64.ln() * 2.0f64.ln())).ceil() as u32;
2,730✔
260
    let num_hashes = ((num_slots as f64) / (max_items as f64) * 2.0f64.ln()).round() as u32;
2,730✔
261
    (num_slots, num_hashes)
2,730✔
262
}
2,730✔
263

264
/// Codec enum for the types of hashers we support
265
#[repr(u8)]
266
#[derive(Debug, Clone, PartialEq)]
267
enum BloomHashID {
268
    BloomNodeHasher = 0x01,
269
}
270

271
impl<H: BloomHash> BloomFilter<H> {
272
    /// Make a new bloom filter with a given error rate and expected maximum size
273
    pub fn new(error_rate: f64, max_items: u32, hasher: H) -> BloomFilter<H> {
×
274
        let (num_bits, num_hashes) = bloom_hash_count(error_rate, max_items);
×
275
        BloomFilter {
×
276
            hasher,
×
277
            bits: BitField::new(num_bits),
×
278
            num_hashes,
×
279
        }
×
280
    }
×
281

282
    /// Add a raw item, represented as a byte array (e.g. a serialized struct, perhaps)
283
    pub fn insert_raw(&mut self, item: &[u8]) -> bool {
×
284
        let mut false_positive = true;
×
285
        for i in 0..self.num_hashes {
×
286
            let slot = self.hasher.pick_bin(i, item, self.bits.num_bits());
×
287
            assert!(
×
288
                slot < self.bits.num_bits(),
×
289
                "BUG: hasher selected a slot outside the bitfield: {}",
290
                slot
291
            );
292

293
            if false_positive && !self.bits.test(slot) {
×
294
                false_positive = false;
×
295
            }
×
296

297
            self.bits.set(slot);
×
298
        }
299
        false_positive
×
300
    }
×
301

302
    /// Test to see if a given item (a byte array) is likely present
303
    pub fn contains_raw(&self, item: &[u8]) -> bool {
×
304
        for i in 0..self.num_hashes {
×
305
            let slot = self.hasher.pick_bin(i, item, self.bits.num_bits());
×
306
            assert!(
×
307
                slot < self.bits.num_bits(),
×
308
                "BUG: hasher selected a slot outside the bitfield: {}",
309
                slot
310
            );
311

312
            if !self.bits.test(slot) {
×
313
                // definitely not here
314
                return false;
×
315
            }
×
316
        }
317
        true
×
318
    }
×
319
}
320

321
impl StacksMessageCodec for BloomFilter<BloomNodeHasher> {
322
    fn consensus_serialize<W: Write>(&self, fd: &mut W) -> Result<(), codec_error> {
×
323
        write_next(fd, &(BloomHashID::BloomNodeHasher as u8))?;
×
324
        write_next(fd, &self.hasher.seed)?;
×
325
        write_next(fd, &self.num_hashes)?;
×
326
        write_next(fd, &self.bits)?;
×
327
        Ok(())
×
328
    }
×
329

330
    fn consensus_deserialize<R: Read>(
×
331
        fd: &mut R,
×
332
    ) -> Result<BloomFilter<BloomNodeHasher>, codec_error> {
×
333
        let hasher_type_u8: u8 = read_next(fd)?;
×
334
        match hasher_type_u8 as u8 {
×
335
            x if x == BloomHashID::BloomNodeHasher as u8 => {
×
336
                let seed: [u8; 32] = read_next(fd)?;
×
337
                let num_hashes: u32 = read_next(fd)?;
×
338
                let bits: BitField = read_next(fd)?;
×
339
                if bits.num_bits() == 0 {
×
340
                    return Err(codec_error::DeserializeError(
×
341
                        "Bloom filter must have non-zero bin count".into(),
×
342
                    ));
×
343
                }
×
344
                Ok(BloomFilter {
×
345
                    hasher: BloomNodeHasher { seed },
×
346
                    bits,
×
347
                    num_hashes,
×
348
                })
×
349
            }
350
            _ => Err(codec_error::DeserializeError(format!(
×
351
                "Not a supported bloom hasher type ID: {}",
×
352
                hasher_type_u8
×
353
            ))),
×
354
        }
355
    }
×
356
}
357

358
/// Disk-backed counting bloom filter with a given set of hash functions.  Uses a sqlite3 blob of
359
/// 32-bit bins to count things.  Meant to work alongside an existing database, in its own table
360
/// (e.g. the mempool).
361
#[derive(Debug, Clone, PartialEq)]
362
pub struct BloomCounter<H: BloomHash + Clone + StacksMessageCodec> {
363
    hasher: H,
364
    table_name: String,
365
    num_bins: u32,
366
    num_hashes: u32,
367
    counts_rowid: u32,
368
}
369

370
impl<H: BloomHash + Clone + StacksMessageCodec> BloomCounter<H> {
371
    /// Make a new bloom counter with the given error rate and expected number of items
372
    pub fn new(
2,730✔
373
        tx: &mut DBTx,
2,730✔
374
        table_name: &str,
2,730✔
375
        error_rate: f64,
2,730✔
376
        max_items: u32,
2,730✔
377
        hasher: H,
2,730✔
378
    ) -> Result<BloomCounter<H>, db_error> {
2,730✔
379
        let sql = format!("CREATE TABLE IF NOT EXISTS {table_name}(counts BLOB NOT NULL, num_bins INTEGER NOT NULL, num_hashes INTEGER NOT NULL, hasher BLOB NOT NULL);");
2,730✔
380
        tx.execute(&sql, NO_PARAMS).map_err(db_error::SqliteError)?;
2,730✔
381

382
        let (num_bins, num_hashes) = bloom_hash_count(error_rate, max_items);
2,730✔
383
        let counts_vec = vec![0u8; (num_bins * 4) as usize];
2,730✔
384
        let hasher_vec = hasher.serialize_to_vec();
2,730✔
385

386
        let sql = format!(
2,730✔
387
            "INSERT INTO {} (counts, num_bins, num_hashes, hasher) VALUES (?1, ?2, ?3, ?4)",
388
            table_name
389
        );
390
        let args = params![counts_vec, num_bins, num_hashes, hasher_vec];
2,730✔
391

392
        tx.execute(&sql, args).map_err(db_error::SqliteError)?;
2,730✔
393

394
        let sql = format!("SELECT rowid FROM {table_name}");
2,730✔
395
        let counts_rowid: u64 = query_expect_row(tx, &sql, NO_PARAMS)?
2,730✔
396
            .expect("BUG: inserted bloom counter but can't find row ID");
2,730✔
397

398
        Ok(BloomCounter {
2,730✔
399
            hasher,
2,730✔
400
            table_name: table_name.to_string(),
2,730✔
401
            num_bins,
2,730✔
402
            num_hashes,
2,730✔
403
            counts_rowid: counts_rowid as u32,
2,730✔
404
        })
2,730✔
405
    }
2,730✔
406

407
    pub fn try_load(conn: &DBConn, table_name: &str) -> Result<Option<BloomCounter<H>>, db_error> {
1,977,348✔
408
        let sql = format!("SELECT rowid,* FROM {table_name}");
1,977,348✔
409
        let result = conn.query_row_and_then(&sql, NO_PARAMS, |row| {
1,977,348✔
410
            let mut hasher_blob = row
1,977,348✔
411
                .get_ref("hasher")?
1,977,348✔
412
                .as_blob()
1,977,348✔
413
                .expect("Unable to read hasher as blob");
1,977,348✔
414
            let hasher =
1,977,348✔
415
                H::consensus_deserialize(&mut hasher_blob).map_err(|_| db_error::ParseError)?;
1,977,348✔
416
            let num_bins: u32 = row.get_unwrap("num_bins");
1,977,348✔
417
            let num_hashes: u32 = row.get_unwrap("num_hashes");
1,977,348✔
418
            let counts_rowid: u32 = row.get_unwrap("rowid");
1,977,348✔
419
            Ok(BloomCounter {
1,977,348✔
420
                hasher,
1,977,348✔
421
                table_name: table_name.to_string(),
1,977,348✔
422
                num_bins,
1,977,348✔
423
                num_hashes,
1,977,348✔
424
                counts_rowid,
1,977,348✔
425
            })
1,977,348✔
426
        });
1,977,348✔
427
        match result {
×
428
            Ok(x) => Ok(Some(x)),
1,977,348✔
429
            Err(db_error::SqliteError(sqlite_error::QueryReturnedNoRows)) => Ok(None),
×
430
            Err(e) => Err(e),
×
431
        }
432
    }
1,977,348✔
433

434
    pub fn get_seed(&self) -> &[u8; 32] {
41,962✔
435
        self.hasher.get_seed()
41,962✔
436
    }
41,962✔
437

438
    /// Get a handle to the underlying bins list
439
    fn open_counts_blob<'a>(
69,570✔
440
        &self,
69,570✔
441
        conn: &'a DBConn,
69,570✔
442
        readwrite: bool,
69,570✔
443
    ) -> Result<Blob<'a>, db_error> {
69,570✔
444
        let blob = conn.blob_open(
69,570✔
445
            rusqlite::DatabaseName::Main,
69,570✔
446
            &self.table_name,
69,570✔
447
            "counts",
69,570✔
448
            self.counts_rowid.into(),
69,570✔
449
            !readwrite,
69,570✔
450
        )?;
×
451
        Ok(blob)
69,570✔
452
    }
69,570✔
453

454
    /// Get the 32-bit counter at a particular slot.  It's loaded from a big-endian representation
455
    /// within the readable handle, at offset 4*slot.
456
    fn get_counts_bin<R: Read + Seek>(counts_blob: &mut R, slot: u32) -> u32 {
695,700✔
457
        counts_blob
695,700✔
458
            .seek(SeekFrom::Start((slot as u64) * 4))
695,700✔
459
            .expect("BUG: failed to seek on counts blob");
695,700✔
460

461
        let mut bytes = [0u8; 4];
695,700✔
462
        counts_blob
695,700✔
463
            .read_exact(&mut bytes[..])
695,700✔
464
            .expect("BUG: failed to read from counts blob");
695,700✔
465

466
        u32::from_be_bytes(bytes)
695,700✔
467
    }
695,700✔
468

469
    /// Write the 32-bit counter at a particular slot.  It's stored in a big-endian representation
470
    /// within the writable handle, at offset 4*slot.
471
    fn set_counts_bin<W: Write + Seek>(counts_blob: &mut W, slot: u32, count: u32) {
524,340✔
472
        counts_blob
524,340✔
473
            .seek(SeekFrom::Start((slot as u64) * 4))
524,340✔
474
            .expect("BUG: failed to seek on counts blob");
524,340✔
475

476
        let bytes = count.to_be_bytes();
524,340✔
477
        counts_blob
524,340✔
478
            .write_all(&bytes)
524,340✔
479
            .expect("BUG: failed to write to counts blob");
524,340✔
480
    }
524,340✔
481

482
    /// Add a raw item to the bloom counter, and return the count it likely has (as an upper bound)
483
    /// Returns 0 if this item is absolutely new.
484
    /// Returns >0 if this item appears represented already.
485
    pub fn insert_raw(&self, tx: &mut DBTx, item: &[u8]) -> Result<u32, db_error> {
35,298✔
486
        let mut count = u32::MAX;
35,298✔
487
        let mut fd = self.open_counts_blob(tx, true)?;
35,298✔
488

489
        for i in 0..self.num_hashes {
352,980✔
490
            let slot = self.hasher.pick_bin(i, item, self.num_bins);
352,980✔
491
            assert!(
352,980✔
492
                slot < self.num_bins,
352,980✔
493
                "BUG: hasher selected a slot outside the bloom counters"
494
            );
495

496
            let bin = BloomCounter::<H>::get_counts_bin(&mut fd, slot);
352,980✔
497
            count = cmp::min(bin, count);
352,980✔
498
            BloomCounter::<H>::set_counts_bin(&mut fd, slot, bin.saturating_add(1));
352,980✔
499
        }
500

501
        Ok(count)
35,298✔
502
    }
35,298✔
503

504
    /// Return the upper bound on the number of times this item has been inserted.
505
    /// It will be 0 if it was never inserted (or was inserted and removed).
506
    pub fn count_raw(&self, conn: &DBConn, item: &[u8]) -> Result<u32, db_error> {
17,136✔
507
        let mut count = u32::MAX;
17,136✔
508
        let mut fd = self.open_counts_blob(conn, false)?;
17,136✔
509

510
        for i in 0..self.num_hashes {
171,360✔
511
            let slot = self.hasher.pick_bin(i, item, self.num_bins);
171,360✔
512
            assert!(
171,360✔
513
                slot < self.num_bins,
171,360✔
514
                "BUG: hasher selected a slot outside the bloom counters"
515
            );
516

517
            let bin = BloomCounter::<H>::get_counts_bin(&mut fd, slot);
171,360✔
518
            if bin == 0 {
171,360✔
519
                return Ok(0);
×
520
            } else {
171,360✔
521
                count = cmp::min(bin, count);
171,360✔
522
            }
171,360✔
523
        }
524
        Ok(count)
17,136✔
525
    }
17,136✔
526

527
    /// Remove an item from the bloom filter.  In order to use this correctly, you must ensure that
528
    /// it was actually inserted via insert_raw() earlier.  Returns the new lower bound on how many
529
    /// times this item was inserted.
530
    pub fn remove_raw(&self, tx: &mut DBTx, item: &[u8]) -> Result<u32, db_error> {
17,136✔
531
        if self.count_raw(tx, item)? == 0 {
17,136✔
532
            return Ok(0);
×
533
        }
17,136✔
534

535
        let mut count = u32::MAX;
17,136✔
536
        let mut fd = self.open_counts_blob(tx, true)?;
17,136✔
537

538
        for i in 0..self.num_hashes {
171,360✔
539
            let slot = self.hasher.pick_bin(i, item, self.num_bins);
171,360✔
540
            assert!(
171,360✔
541
                slot < self.num_bins,
171,360✔
542
                "BUG: hasher selected a slot outside the bloom counters"
543
            );
544

545
            let bin = BloomCounter::<H>::get_counts_bin(&mut fd, slot);
171,360✔
546
            if bin > 0 {
171,360✔
547
                let new_bin = bin - 1;
171,360✔
548
                BloomCounter::<H>::set_counts_bin(&mut fd, slot, new_bin);
171,360✔
549
                count = cmp::min(new_bin, count);
171,360✔
550
            }
171,360✔
551
        }
552

553
        Ok(count)
17,136✔
554
    }
17,136✔
555

556
    /// Extract a bloom filter from the bloom counter.
557
    /// There will be a 1-bit if the counter is positive
558
    pub fn to_bloom_filter(&self, conn: &DBConn) -> Result<BloomFilter<H>, db_error> {
×
559
        let new_hasher = self.hasher.clone();
×
560
        let mut bf = BitField::new(self.num_bins);
×
561

562
        let mut counts_blob = vec![0u8; (self.num_bins as usize) * 4];
×
563
        let mut fd = self.open_counts_blob(conn, false)?;
×
564

565
        fd.read_exact(&mut counts_blob).map_err(db_error::IOError)?;
×
566

567
        for (i, counts_chunk) in counts_blob.chunks_exact(4).enumerate() {
×
568
            if counts_chunk.iter().any(|x| *x > 0) {
×
569
                bf.set(i as u32);
×
570
            }
×
571
        }
572

573
        Ok(BloomFilter {
×
574
            hasher: new_hasher,
×
575
            bits: bf,
×
576
            num_hashes: self.num_hashes,
×
577
        })
×
578
    }
×
579
}
580

581
impl BloomHash for BloomNodeHasher {
582
    /// Pick a bin using the node seed and the count.
583
    /// Uses SipHash-2-4, with the count and seed used to set up the hash's initial state (thereby
584
    /// ensuring that a different initial state -- tantamount to a different hash function --
585
    /// will be used for each of the bloom struct's bins).
586
    /// A cryptographic hash isn't helpful here (and would be considerably slower), since the
587
    /// number of different bins is small enough that someone who's hell-bent on selecting items to
588
    /// create false positives would be able to do so no matter what we do (so why pay a
589
    /// performance penalty if it won't help?).
590
    fn pick_bin(&self, count: u32, data: &[u8], num_bins: u32) -> u32 {
695,700✔
591
        let mut initial_state = Vec::with_capacity(36 + data.len());
695,700✔
592
        initial_state.extend_from_slice(&count.to_be_bytes());
695,700✔
593
        initial_state.extend_from_slice(&self.seed);
695,700✔
594
        initial_state.extend_from_slice(data);
695,700✔
595

596
        let mut hasher = SipHasher::new();
695,700✔
597
        hasher.write(&initial_state);
695,700✔
598

599
        // be sure to remove modulus bias
600
        loop {
601
            let result_64 = hasher.finish();
695,700✔
602
            let result = (result_64 & 0x00000000ffffffff) as u32;
695,700✔
603
            if result < u32::MAX - (u32::MAX % num_bins) {
695,700✔
604
                return result % num_bins;
695,700✔
605
            } else {
×
606
                hasher.write_u64(result_64);
×
607
            }
×
608
        }
609
    }
695,700✔
610

611
    fn get_seed(&self) -> &[u8; 32] {
41,962✔
612
        &self.seed
41,962✔
613
    }
41,962✔
614
}
615

616
#[cfg(test)]
617
pub mod test {
618
    use std::fs;
619

620
    use rand::prelude::*;
621
    use rand::thread_rng;
622
    use rusqlite::OpenFlags;
623

624
    use super::*;
625
    use crate::util_lib::db::{sql_pragma, tx_begin_immediate, tx_busy_handler, DBConn};
626

627
    pub fn setup_bloom_counter(db_name: &str) -> DBConn {
×
628
        let db_path = format!("/tmp/test_bloom_filter_{}.db", db_name);
×
629
        if fs::metadata(&db_path).is_ok() {
×
630
            fs::remove_file(&db_path).unwrap();
×
631
        }
×
632
        let open_flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_CREATE;
×
633

634
        let conn = DBConn::open_with_flags(&db_path, open_flags).unwrap();
×
635

636
        conn.busy_handler(Some(tx_busy_handler)).unwrap();
×
637
        sql_pragma(&conn, "journal_mode", &"WAL".to_string()).unwrap();
×
638
        conn
×
639
    }
×
640

641
    #[test]
642
    fn test_bloom_hash_count() {
×
643
        // https://hur.st/bloomfilter/?n=8192&p=0.001&m=&k=8
644
        let (num_bits, num_hashes) = bloom_hash_count(0.001, 8192);
×
645
        assert_eq!(num_bits, 117_782);
×
646
        assert_eq!(num_hashes, 10);
×
647

648
        // https://hur.st/bloomfilter/?n=8192&p=1.0E-7&m=&k=
649
        let (num_bits, num_hashes) = bloom_hash_count(0.0000001, 8192);
×
650
        assert_eq!(num_bits, 274_823);
×
651
        assert_eq!(num_hashes, 23);
×
652
    }
×
653

654
    #[test]
655
    fn test_bloom_filter_has_all_inserted_items_with_error_rate() {
×
656
        let num_items = 8192;
×
657
        let err_rate = 0.001;
×
658

659
        let hasher = BloomNodeHasher::new(&[0u8; 32]);
×
660
        let mut bf = BloomFilter::new(err_rate, num_items, hasher);
×
661

662
        let mut fp_count = 0; // false positives
×
663

664
        for i in 0..num_items {
×
665
            let mut random_data = [0u8; 32];
×
666
            thread_rng().fill(&mut random_data[..]);
×
667

668
            if bf.contains_raw(&random_data) {
×
669
                fp_count += 1;
×
670
            }
×
671

672
            bf.insert_raw(&random_data);
×
673
            assert!(bf.contains_raw(&random_data));
×
674
        }
675

676
        let calculated_error_rate = (fp_count as f64) / (num_items as f64);
×
677
        eprintln!(
×
678
            "fp_count = {}, num_items = {}, err_rate = {}, calculated_error_rate = {}",
679
            fp_count, num_items, err_rate, calculated_error_rate
680
        );
681
        assert!(calculated_error_rate <= err_rate);
×
682
    }
×
683

684
    #[test]
685
    fn test_bloom_counter_has_all_inserted_items_with_error_rate() {
×
686
        let num_items = 8192;
×
687
        let err_rate = 0.001;
×
688

689
        let mut db = setup_bloom_counter(function_name!());
×
690
        let hasher = BloomNodeHasher::new(&[0u8; 32]);
×
691

692
        let bf = {
×
693
            let mut tx = tx_begin_immediate(&mut db).unwrap();
×
694
            let bf =
×
695
                BloomCounter::new(&mut tx, "bloom_counter", err_rate, num_items, hasher).unwrap();
×
696
            tx.commit().unwrap();
×
697
            bf
×
698
        };
699

700
        let mut fp_count = 0; // false positives
×
701

702
        let mut tx = tx_begin_immediate(&mut db).unwrap();
×
703
        for i in 0..num_items {
×
704
            let mut random_data = [0u8; 32];
×
705
            thread_rng().fill(&mut random_data[..]);
×
706

707
            if bf.count_raw(&tx, &random_data).unwrap() > 0 {
×
708
                fp_count += 1;
×
709
            }
×
710

711
            bf.insert_raw(&mut tx, &random_data).unwrap();
×
712
            assert!(bf.count_raw(&tx, &random_data).unwrap() > 0);
×
713
        }
714
        tx.commit().unwrap();
×
715

716
        let calculated_error_rate = (fp_count as f64) / (num_items as f64);
×
717
        eprintln!(
×
718
            "fp_count = {}, num_items = {}, err_rate = {}, calculated_error_rate = {}",
719
            fp_count, num_items, err_rate, calculated_error_rate
720
        );
721
        assert!(calculated_error_rate <= err_rate);
×
722
    }
×
723

724
    #[test]
725
    fn test_bloom_counter_is_invertible() {
×
726
        let num_items = 8192;
×
727
        let err_rate = 0.001;
×
728

729
        let mut db = setup_bloom_counter(function_name!());
×
730

731
        let hasher = BloomNodeHasher::new(&[0u8; 32]);
×
732

733
        let bf = {
×
734
            let mut tx = tx_begin_immediate(&mut db).unwrap();
×
735
            let bf =
×
736
                BloomCounter::new(&mut tx, "bloom_counter", err_rate, num_items, hasher).unwrap();
×
737
            tx.commit().unwrap();
×
738
            bf
×
739
        };
740

741
        let mut data = vec![];
×
742
        let mut fp_count = 0; // false positives
×
743

744
        let mut tx = tx_begin_immediate(&mut db).unwrap();
×
745
        for i in 0..num_items {
×
746
            let mut random_data = [0u8; 32];
×
747
            thread_rng().fill(&mut random_data[..]);
×
748

749
            if bf.count_raw(&tx, &random_data).unwrap() > 0 {
×
750
                fp_count += 1;
×
751
            }
×
752

753
            bf.insert_raw(&mut tx, &random_data).unwrap();
×
754
            assert!(bf.count_raw(&tx, &random_data).unwrap() > 0);
×
755

756
            data.push(random_data);
×
757
        }
758
        tx.commit().unwrap();
×
759

760
        let calculated_error_rate = (fp_count as f64) / (num_items as f64);
×
761
        eprintln!(
×
762
            "fp_count = {}, num_items = {}, err_rate = {}, calculated_error_rate = {}",
763
            fp_count, num_items, err_rate, calculated_error_rate
764
        );
765
        assert!(calculated_error_rate <= err_rate);
×
766

767
        let mut tx = tx_begin_immediate(&mut db).unwrap();
×
768

769
        fp_count = 0;
×
770
        for random_data in data.iter() {
×
771
            bf.remove_raw(&mut tx, random_data).unwrap();
×
772
            if bf.count_raw(&tx, random_data).unwrap() > 0 {
×
773
                fp_count += 1;
×
774
            }
×
775
        }
776
        tx.commit().unwrap();
×
777

778
        let calculated_error_rate = (fp_count as f64) / (num_items as f64);
×
779
        eprintln!(
×
780
            "fp_count = {}, num_items = {}, err_rate = {}, calculated_error_rate = {}",
781
            fp_count, num_items, err_rate, calculated_error_rate
782
        );
783
        assert!(calculated_error_rate <= err_rate);
×
784

785
        // everything is removed
786
        for random_data in data.iter() {
×
787
            assert_eq!(bf.count_raw(&db, random_data).unwrap(), 0);
×
788
        }
789
    }
×
790

791
    #[test]
792
    fn test_bloom_counter_is_invertible_over_iterations() {
×
793
        let num_items = 8192;
×
794
        let err_rate = 0.001;
×
795

796
        let mut db = setup_bloom_counter(function_name!());
×
797

798
        let hasher = BloomNodeHasher::new(&[0u8; 32]);
×
799

800
        let bf = {
×
801
            let mut tx = tx_begin_immediate(&mut db).unwrap();
×
802
            let bf =
×
803
                BloomCounter::new(&mut tx, "bloom_counter", err_rate, num_items, hasher).unwrap();
×
804
            tx.commit().unwrap();
×
805
            bf
×
806
        };
807

808
        let mut data = vec![];
×
809
        let mut fp_count = 0; // false positives
×
810
        let remove_delay = 2;
×
811

812
        for i in 0..(remove_delay * 10) {
×
813
            eprintln!("Add {} items for pass {}", num_items / remove_delay, i);
×
814
            let mut tx = tx_begin_immediate(&mut db).unwrap();
×
815
            for i in 0..(num_items / remove_delay) {
×
816
                let mut random_data = [0u8; 32];
×
817
                thread_rng().fill(&mut random_data[..]);
×
818

819
                if bf.count_raw(&tx, &random_data).unwrap() > 0 {
×
820
                    fp_count += 1;
×
821
                }
×
822

823
                bf.insert_raw(&mut tx, &random_data).unwrap();
×
824
                assert!(bf.count_raw(&tx, &random_data).unwrap() > 0);
×
825

826
                data.push(random_data);
×
827
            }
828
            tx.commit().unwrap();
×
829

830
            let calculated_error_rate = (fp_count as f64) / (num_items as f64);
×
831
            eprintln!(
×
832
                "fp_count = {}, num_items = {}, err_rate = {}, calculated_error_rate = {}",
833
                fp_count, num_items, err_rate, calculated_error_rate
834
            );
835
            assert!(calculated_error_rate <= err_rate);
×
836

837
            let mut tx = tx_begin_immediate(&mut db).unwrap();
×
838

839
            if i + 1 >= remove_delay {
×
840
                let remove_start = ((num_items / remove_delay) * (i + 1 - remove_delay)) as usize;
×
841
                let remove_end = remove_start + ((num_items / remove_delay) as usize);
×
842

843
                // this leaves $num_items in the bloom filter
844
                assert_eq!(data.len() - remove_start, num_items as usize);
×
845

846
                let remove_data = &data[remove_start..remove_end];
×
847
                eprintln!(
×
848
                    "Remove {} items from pass {}",
849
                    remove_data.len(),
×
850
                    i + 1 - remove_delay
×
851
                );
852
                fp_count = 0;
×
853
                for random_data in remove_data.iter() {
×
854
                    bf.remove_raw(&mut tx, random_data).unwrap();
×
855
                    if bf.count_raw(&tx, random_data).unwrap() > 0 {
×
856
                        fp_count += 1;
×
857
                    }
×
858
                }
859
                tx.commit().unwrap();
×
860

861
                let calculated_error_rate = (fp_count as f64) / (num_items as f64);
×
862
                eprintln!(
×
863
                    "fp_count = {}, num_items = {}, err_rate = {}, calculated_error_rate = {}",
864
                    fp_count, num_items, err_rate, calculated_error_rate
865
                );
866
                assert!(calculated_error_rate <= err_rate);
×
867

868
                // everything is removed, up to fp_rate
869
                let mut check_fp_count = 0;
×
870
                for random_data in remove_data.iter() {
×
871
                    if bf.count_raw(&db, random_data).unwrap() > 0 {
×
872
                        check_fp_count += 1;
×
873
                    }
×
874
                }
875
                assert!(check_fp_count <= fp_count);
×
876
            }
×
877
        }
878
    }
×
879

880
    #[test]
881
    fn test_bloom_bitfield_codec() {
×
882
        // aligned, full
883
        let bitfield = BitField(
×
884
            vec![
×
885
                0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd,
×
886
                0xee, 0xff,
×
887
            ],
×
888
            128,
×
889
        );
×
890
        let bytes = bitfield.serialize_to_vec();
×
891
        assert_eq!(
×
892
            bytes,
893
            vec![
×
894
                0x00, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x11, 0x22, 0x33, 0x44,
895
                0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff
896
            ]
897
        );
898

899
        assert_eq!(
×
900
            BitField::consensus_deserialize(&mut &bytes[..]).unwrap(),
×
901
            bitfield
902
        );
903

904
        // unaligned, full
905
        let bitfield = BitField(
×
906
            vec![
×
907
                0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd,
×
908
                0xee, 0x01,
×
909
            ],
×
910
            121,
×
911
        );
×
912
        let bytes = bitfield.serialize_to_vec();
×
913
        assert_eq!(
×
914
            bytes,
915
            vec![
×
916
                0x00, 0x00, 0x00, 0x79, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x11, 0x22, 0x33, 0x44,
917
                0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0x01
918
            ]
919
        );
920

921
        assert_eq!(
×
922
            BitField::consensus_deserialize(&mut &bytes[..]).unwrap(),
×
923
            bitfield
924
        );
925

926
        // aligned, sparse
927
        let bitfield = BitField(
×
928
            vec![
×
929
                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
×
930
                0x00, 0x08,
×
931
            ],
×
932
            128,
×
933
        );
×
934
        let bytes = bitfield.serialize_to_vec();
×
935
        assert_eq!(
×
936
            bytes,
937
            vec![
×
938
                0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00,
939
                0x00, 0x00, 0x0f, 0x08
940
            ]
941
        );
942

943
        // unaligned, sparse
944
        let bitfield = BitField(
×
945
            vec![
×
946
                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
×
947
                0x00, 0x08,
×
948
            ],
×
949
            121,
×
950
        );
×
951
        let bytes = bitfield.serialize_to_vec();
×
952
        assert_eq!(
×
953
            bytes,
954
            vec![
×
955
                0x00, 0x00, 0x00, 0x79, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00,
956
                0x00, 0x00, 0x0f, 0x08
957
            ]
958
        );
959

960
        // vec_len too big
961
        let mut bitfield_too_big = vec![];
×
962
        encode_bitfield(&mut bitfield_too_big, &vec![0u8; MAX_MESSAGE_LEN as usize]).unwrap();
×
963
        let e = decode_bitfield(&mut &bitfield_too_big[..]);
×
964
        if let Err(codec_error::OverflowError(s)) = e {
×
965
            assert_eq!(s, "vec_len is too big".to_string());
×
966
        } else {
967
            error!("Unexpected decode_bitfield result: {e:?}");
×
968
            panic!();
×
969
        }
970
    }
×
971

972
    #[test]
973
    fn test_bloom_bitfield_sparse_threshold() {
×
974
        // a bitfield that has only two bits set, one each in the first two bytes
975
        let bitfield = BitField(
×
976
            vec![
×
977
                0x01, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
×
978
                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
×
979
                0x00, 0x00, 0x00, 0x00,
×
980
            ],
×
981
            256,
×
982
        );
×
983
        let mut bytes = bitfield.serialize_to_vec();
×
984

985
        // 4 for bit count, 1 for the encoding marker, 4 for byte length,
986
        // 4 for `num_filled`, two times 5 for index/byte pairs
987
        assert_eq!(bytes.len(), 23);
×
988

989
        assert_eq!(bytes[4], BitFieldEncoding::Sparse as u8);
×
990

991
        // Change the bit count in the serialization to 16. Technically that is
992
        // still a valid representation, but at that size, it should've been
993
        // serialized using full, not sparse, encoding.
994
        bytes[2] = 0; // bit count 2nd-LSB
×
995
        bytes[3] = 16; // bit count LSB
×
996
        bytes[8] = 2; // byte length LSB
×
997

998
        let result = BitField::consensus_deserialize(&mut &bytes[..]);
×
999

1000
        if let Err(codec_error::OverflowError(message)) = result {
×
1001
            assert_eq!(
×
1002
                message,
1003
                "Non-sparse bitfield should not use sparse encoding."
1004
            );
1005
        } else {
1006
            error!("Unexpected BitField::consensus_deserialize result: {result:?}");
×
1007
            panic!();
×
1008
        }
1009
    }
×
1010

1011
    #[test]
1012
    fn test_bloom_filter_codec() {
×
1013
        let num_items = 8192;
×
1014
        let err_rate = 0.001;
×
1015

1016
        let hasher = BloomNodeHasher::new(&[0u8; 32]);
×
1017
        let mut bf = BloomFilter::new(err_rate, num_items, hasher);
×
1018

1019
        for i in 0..num_items {
×
1020
            let encoded_bf = bf.serialize_to_vec();
×
1021
            let decoded_bf =
×
1022
                BloomFilter::<BloomNodeHasher>::consensus_deserialize(&mut &encoded_bf[..])
×
1023
                    .unwrap();
×
1024
            assert_eq!(decoded_bf, bf);
×
1025

1026
            let mut random_data = [0u8; 32];
×
1027
            thread_rng().fill(&mut random_data[..]);
×
1028

1029
            bf.insert_raw(&random_data);
×
1030
            assert!(bf.contains_raw(&random_data));
×
1031
        }
1032
    }
×
1033

1034
    #[test]
1035
    fn test_bloom_counter_to_filter() {
×
1036
        let num_items = 8192;
×
1037
        let err_rate = 0.001;
×
1038

1039
        let mut db = setup_bloom_counter(function_name!());
×
1040

1041
        let hasher = BloomNodeHasher::new(&[0u8; 32]);
×
1042

1043
        let bc = {
×
1044
            let mut tx = tx_begin_immediate(&mut db).unwrap();
×
1045
            let bc =
×
1046
                BloomCounter::new(&mut tx, "bloom_counter", err_rate, num_items, hasher).unwrap();
×
1047
            tx.commit().unwrap();
×
1048
            bc
×
1049
        };
1050

1051
        let mut tx = tx_begin_immediate(&mut db).unwrap();
×
1052
        let mut data = vec![];
×
1053
        for i in 0..num_items {
×
1054
            let mut random_data = [0u8; 32];
×
1055
            thread_rng().fill(&mut random_data[..]);
×
1056

1057
            bc.insert_raw(&mut tx, &random_data).unwrap();
×
1058
            assert!(bc.count_raw(&tx, &random_data).unwrap() > 0);
×
1059

1060
            data.push(random_data);
×
1061

1062
            if i % 128 == 0 {
×
1063
                let bf = bc.to_bloom_filter(&tx).unwrap();
×
1064

1065
                for random_data in data.iter() {
×
1066
                    assert!(bf.contains_raw(random_data));
×
1067
                }
1068
            }
×
1069
        }
1070
        tx.commit().unwrap();
×
1071
    }
×
1072
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc