• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.21
/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.theta;
21

22
import static java.lang.Math.max;
23
import static java.lang.Math.min;
24
import static java.lang.Math.sqrt;
25
import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
26
import static org.apache.datasketches.common.Util.checkBounds;
27
import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
28
import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID;
29
import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
30
import static org.apache.datasketches.theta.PreambleUtil.extractLgNomLongs;
31
import static org.apache.datasketches.theta.PreambleUtil.extractLgResizeFactor;
32
import static org.apache.datasketches.theta.PreambleUtil.extractP;
33
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
34
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
35
import static org.apache.datasketches.theta.UpdateReturnState.InsertedCountIncremented;
36
import static org.apache.datasketches.theta.UpdateReturnState.InsertedCountNotIncremented;
37
import static org.apache.datasketches.theta.UpdateReturnState.RejectedDuplicate;
38
import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta;
39
import static org.apache.datasketches.thetacommon.HashOperations.STRIDE_MASK;
40

41
import java.util.Objects;
42

43
import org.apache.datasketches.common.Family;
44
import org.apache.datasketches.common.ResizeFactor;
45
import org.apache.datasketches.common.SketchesArgumentException;
46
import org.apache.datasketches.memory.Memory;
47
import org.apache.datasketches.memory.WritableMemory;
48
import org.apache.datasketches.thetacommon.HashOperations;
49
import org.apache.datasketches.thetacommon.ThetaUtil;
50

51
/**
52
 * This sketch uses the
53
 * <a href="{@docRoot}/resources/dictionary.html#thetaSketch">Theta Sketch Framework</a>
54
 * and the
55
 * <a href="{@docRoot}/resources/dictionary.html#alphaTCF">Alpha TCF</a> algorithm
56
 * with a single cache.
57
 *
58
 * @author Lee Rhodes
59
 * @author Kevin Lang
60
 */
61
final class HeapAlphaSketch extends HeapUpdateSketch {
1✔
62
  private static final int ALPHA_MIN_LG_NOM_LONGS = 9; //The smallest Log2 k allowed => 512.
63
  private final double alpha_;  // computed from lgNomLongs
64
  private final long split1_;   // computed from alpha and p
65

66
  private int lgArrLongs_;
67
  private int hashTableThreshold_;  //never serialized
68
  private int curCount_ = 0;
1✔
69
  private long thetaLong_;
70
  private boolean empty_ = true;
1✔
71

72
  private long[] cache_;
73
  private boolean dirty_ = false;
1✔
74

75
  private HeapAlphaSketch(final int lgNomLongs, final long seed, final float p,
76
      final ResizeFactor rf, final double alpha, final long split1) {
77
    super(lgNomLongs, seed, p, rf);
1✔
78
    alpha_ = alpha;
1✔
79
    split1_ = split1;
1✔
80
  }
1✔
81

82
  /**
83
   * Get a new sketch instance on the java heap.
84
   *
85
   * @param lgNomLongs <a href="{@docRoot}/resources/dictionary.html#lgNomLongs">See lgNomLongs</a>
86
   * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>
87
   * @param p <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability, <i>p</i></a>
88
   * @param rf <a href="{@docRoot}/resources/dictionary.html#resizeFactor">See Resize Factor</a>
89
   * @return instance of this sketch
90
   */
91
  static HeapAlphaSketch newHeapInstance(final int lgNomLongs, final long seed, final float p,
92
      final ResizeFactor rf) {
93

94
    if (lgNomLongs < ALPHA_MIN_LG_NOM_LONGS) {
1✔
95
      throw new SketchesArgumentException(
1✔
96
        "This sketch requires a minimum nominal entries of " + (1 << ALPHA_MIN_LG_NOM_LONGS));
97
    }
98

99
    final double nomLongs = (1L << lgNomLongs);
1✔
100
    final double alpha = nomLongs / (nomLongs + 1.0);
1✔
101
    final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE);
1✔
102

103
    final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, seed, p, rf, alpha, split1);
1✔
104

105
    final int lgArrLongs = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS);
1✔
106
    has.lgArrLongs_ = lgArrLongs;
1✔
107
    has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
1✔
108
    has.curCount_ = 0;
1✔
109
    has.thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE);
1✔
110
    has.empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false;
1✔
111
    has.cache_ = new long[1 << lgArrLongs];
1✔
112
    return has;
1✔
113
  }
114

115
  /**
116
   * Heapify a sketch from a Memory object containing sketch data.
117
   * @param srcMem The source Memory object.
118
   * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
119
   * It must have a size of at least 24 bytes.
120
   * @param expectedSeed the seed used to validate the given Memory image.
121
   * <a href="{@docRoot}/resources/dictionary.html#seed">See seed</a>
122
   * @return instance of this sketch
123
   */
124
  static HeapAlphaSketch heapifyInstance(final Memory srcMem, final long expectedSeed) {
125
    Objects.requireNonNull(srcMem, "Source Memory must not be null");
1✔
126
    checkBounds(0, 24, srcMem.getCapacity());
1✔
127
    final int preambleLongs = extractPreLongs(srcMem);            //byte 0
1✔
128
    final int lgNomLongs = extractLgNomLongs(srcMem);             //byte 3
1✔
129
    final int lgArrLongs = extractLgArrLongs(srcMem);             //byte 4
1✔
130

131
    checkAlphaFamily(srcMem, preambleLongs, lgNomLongs);
1✔
132
    checkMemIntegrity(srcMem, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs);
1✔
133

134
    final float p = extractP(srcMem);                             //bytes 12-15
1✔
135
    final int memlgRF = extractLgResizeFactor(srcMem);            //byte 0
1✔
136
    ResizeFactor memRF = ResizeFactor.getRF(memlgRF);
1✔
137

138
    final double nomLongs = (1L << lgNomLongs);
1✔
139
    final double alpha = nomLongs / (nomLongs + 1.0);
1✔
140
    final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE);
1✔
141

142
    if (isResizeFactorIncorrect(srcMem, lgNomLongs, lgArrLongs)) {
1✔
143
      memRF = ResizeFactor.X2; //X2 always works.
1✔
144
    }
145

146
    final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, memRF, alpha, split1);
1✔
147
    has.lgArrLongs_ = lgArrLongs;
1✔
148
    has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
1✔
149
    has.curCount_ = extractCurCount(srcMem);
1✔
150
    has.thetaLong_ = extractThetaLong(srcMem);
1✔
151
    has.empty_ = PreambleUtil.isEmptyFlag(srcMem);
1✔
152
    has.cache_ = new long[1 << lgArrLongs];
1✔
153
    srcMem.getLongArray(preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs); //read in as hash table
1✔
154
    return has;
1✔
155
  }
156

157
  //Sketch
158

159
  @Override
160
  public Family getFamily() {
161
    return Family.ALPHA;
1✔
162
  }
163

164
  @Override
165
  public HashIterator iterator() {
166
    return new HeapHashIterator(cache_, thetaLong_);
1✔
167
  }
168

169
  @Override
170
  public double getEstimate() {
171
    return (thetaLong_ > split1_)
1✔
172
        ? Sketch.estimate(thetaLong_, curCount_)
1✔
173
        : (1 << lgNomLongs_) * (LONG_MAX_VALUE_AS_DOUBLE / thetaLong_);
174
  }
175

176
  @Override
177
  public double getLowerBound(final int numStdDev) {
178
    if ((numStdDev < 1) || (numStdDev > 3)) {
1✔
179
      throw new SketchesArgumentException("numStdDev can only be the values 1, 2 or 3.");
1✔
180
    }
181
    double lb;
182
    if (isEstimationMode()) {
1✔
183
      final int validCount = getRetainedEntries(true);
1✔
184
      if (validCount > 0) {
1✔
185
        final double est = getEstimate();
1✔
186
        final double var = getVariance(1 << lgNomLongs_, getP(), alpha_, getTheta(), validCount);
1✔
187
        lb = est - (numStdDev * sqrt(var));
1✔
188
        lb = max(lb, 0.0);
1✔
189
      }
1✔
190
      else {
191
        lb = 0.0;
1✔
192
      }
193
    }
1✔
194
    else {
195
      lb = curCount_;
1✔
196
    }
197
    return lb;
1✔
198
  }
199

200
  @Override
201
  public int getRetainedEntries(final boolean valid) {
202
    if (curCount_ > 0) {
1✔
203
      if (valid && isDirty()) {
1✔
204
        final int curCount = HashOperations.countPart(getCache(), getLgArrLongs(), getThetaLong());
1✔
205
        return curCount;
1✔
206
      }
207
    }
208
    return curCount_;
1✔
209
  }
210

211
  @Override
212
  public long getThetaLong() {
213
    return thetaLong_;
1✔
214
  }
215

216
  @Override
217
  public double getUpperBound(final int numStdDev) {
218
    if ((numStdDev < 1) || (numStdDev > 3)) {
1✔
219
      throw new SketchesArgumentException("numStdDev can only be the values 1, 2 or 3.");
1✔
220
    }
221
    if (isEstimationMode()) {
1✔
222
      final double var =
1✔
223
          getVariance(1 << lgNomLongs_, getP(), alpha_, getTheta(), getRetainedEntries(true));
1✔
224
      return getEstimate() + (numStdDev * sqrt(var));
1✔
225
    }
226
    return curCount_;
1✔
227
  }
228

229
  @Override
230
  public boolean isEmpty() {
231
    return empty_;
1✔
232
  }
233

234
  /*
235
   * Alpha Sketch Preamble Layout ( same as Theta UpdateSketch )
236
   * <pre>
237
   * Long || Start Byte Adr:
238
   * Adr:
239
   *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |        0           |
240
   *  0   ||    Seed Hash    | Flags  |  LgArr | LgNom  | FamID  | SerVer | lgRF | PreLongs=3  |
241
   *
242
   *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
243
   *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
244
   *
245
   *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
246
   *  2   ||---------------------------------Theta---------------------------------------------|
247
   * </pre>
248
   */
249

250
  @Override
251
  public byte[] toByteArray() {
252
    return toByteArray(Family.ALPHA.getMinPreLongs(), (byte) Family.ALPHA.getID());
1✔
253
  }
254

255
  //UpdateSketch
256

257
  @Override
258
  public UpdateSketch rebuild() {
259
    if (isDirty()) {
1✔
260
      rebuildDirty();
1✔
261
    }
262
    return this;
1✔
263
  }
264

265
  @Override
266
  public final void reset() {
267
    final int lgArrLongs =
1✔
268
        ThetaUtil.startingSubMultiple(lgNomLongs_ + 1, getResizeFactor().lg(), ThetaUtil.MIN_LG_ARR_LONGS);
1✔
269
    if (lgArrLongs == lgArrLongs_) {
1✔
270
      final int arrLongs = cache_.length;
1✔
271
      assert (1 << lgArrLongs_) == arrLongs;
1✔
272
      java.util.Arrays.fill(cache_, 0L);
1✔
273
    }
1✔
274
    else {
275
      cache_ = new long[1 << lgArrLongs];
1✔
276
      lgArrLongs_ = lgArrLongs;
1✔
277
    }
278
    hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
1✔
279
    empty_ = true;
1✔
280
    curCount_ = 0;
1✔
281
    thetaLong_ =  (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE);
1✔
282
    dirty_ = false;
1✔
283
  }
1✔
284

285
  //restricted methods
286

287
  @Override
288
  int getCompactPreambleLongs() {
289
    return CompactOperations.computeCompactPreLongs(empty_, curCount_, thetaLong_);
1✔
290
  }
291

292
  @Override
293
  int getCurrentPreambleLongs() {
294
    return Family.ALPHA.getMinPreLongs();
1✔
295
  }
296

297
  @Override
298
  WritableMemory getMemory() {
299
    return null;
1✔
300
  }
301

302
  @Override
303
  long[] getCache() {
304
    return cache_;
1✔
305
  }
306

307
  @Override
308
  boolean isDirty() {
309
    return dirty_;
1✔
310
  }
311

312
  @Override
313
  boolean isOutOfSpace(final int numEntries) {
314
    return numEntries > hashTableThreshold_;
1✔
315
  }
316

317
  @Override
318
  int getLgArrLongs() {
319
    return lgArrLongs_;
1✔
320
  }
321

322
  @Override
323
  UpdateReturnState hashUpdate(final long hash) {
324
    HashOperations.checkHashCorruption(hash);
1✔
325
    empty_ = false;
1✔
326

327
    //The over-theta test
328
    if (HashOperations.continueCondition(thetaLong_, hash)) {
1✔
329
      return RejectedOverTheta; //signal that hash was rejected due to theta.
1✔
330
    }
331

332
    //The duplicate/inserted tests
333
    if (dirty_) { //may have dirty values, must be at tgt size
1✔
334
      return enhancedHashInsert(cache_, hash);
1✔
335
    }
336

337
    //NOT dirty, the other duplicate or inserted test
338
    if (HashOperations.hashSearchOrInsert(cache_, lgArrLongs_, hash) >= 0) {
1✔
339
      return UpdateReturnState.RejectedDuplicate;
1✔
340
    }
341
    //insertion occurred, must increment
342
    curCount_++;
1✔
343
    final int r = (thetaLong_ > split1_) ? 0 : 1; //are we in sketch mode? (i.e., seen k+1 inserts?)
1✔
344
    if (r == 0) { //not yet sketch mode (has not seen k+1 inserts), but could be sampling
1✔
345
      if (curCount_ > (1 << lgNomLongs_)) { // > k
1✔
346
        //Reached the k+1 insert. Must be at tgt size or larger.
347
        //Transition to Sketch Mode. Happens only once.
348
        //Decrement theta, make dirty, don't bother check size, already not-empty.
349
        thetaLong_ = (long) (thetaLong_ * alpha_);
1✔
350
        dirty_ = true; //now may have dirty values
1✔
351
      }
352
      else {
353
        //inserts (not entries!) <= k. It may not be at tgt size.
354
        //Check size, don't decrement theta. cnt already ++, empty_ already false;
355
        if (isOutOfSpace(curCount_)) {
1✔
356
          resizeClean(); //not dirty, not at tgt size.
1✔
357
        }
358
      }
359
    }
360
    else { //r > 0: sketch mode and not dirty (e.g., after a rebuild).
361
      //dec theta, make dirty, cnt already ++, must be at tgt size or larger. check for rebuild
362
      assert (lgArrLongs_ > lgNomLongs_) : "lgArr: " + lgArrLongs_ + ", lgNom: " + lgNomLongs_;
1✔
363
      thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta
1✔
364
      dirty_ = true; //now may have dirty values
1✔
365
      if (isOutOfSpace(curCount_)) {
1✔
366
        rebuildDirty(); // at tgt size and maybe dirty
×
367
      }
368
    }
369
    return UpdateReturnState.InsertedCountIncremented;
1✔
370
  }
371

372
  /**
373
   * Enhanced Knuth-style Open Addressing, Double Hash insert.
374
   * The insertion process will overwrite an already existing, dirty (over-theta) value if one is
375
   * found in the search.
376
   * If an empty cell is found first, it will be inserted normally.
377
   *
378
   * @param hashTable the hash table to insert into
379
   * @param hash must not be 0. If not a duplicate, it will be inserted into the hash array
380
   * @return <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
381
   */
382
  final UpdateReturnState enhancedHashInsert(final long[] hashTable, final long hash) {
383
    final int arrayMask = (1 << lgArrLongs_) - 1; // arrayLongs -1
1✔
384
    // make odd and independent of curProbe:
385
    final int stride = (2 * (int) ((hash >>> lgArrLongs_) & STRIDE_MASK)) + 1;
1✔
386
    int curProbe = (int) (hash & arrayMask);
1✔
387
    long curTableHash = hashTable[curProbe];
1✔
388
    final int loopIndex = curProbe;
1✔
389

390
    // This is the enhanced part
391
    // Search for duplicate or zero, or opportunity to replace garbage.
392
    while ((curTableHash != hash) && (curTableHash != 0)) {
1✔
393
      // curHash is not a duplicate and not zero
394

395
      if (curTableHash >= thetaLong_) { // curTableHash is garbage, do enhanced insert
1✔
396
        final int rememberPos = curProbe; // remember its position.
1✔
397
        // Now we must make sure there are no duplicates in this search path,
398
        //   so we keep searching
399
        curProbe = (curProbe + stride) & arrayMask; // move forward
1✔
400
        curTableHash = hashTable[curProbe];
1✔
401
        while ((curTableHash != hash) && (curTableHash != 0)) {
1✔
402
          curProbe = (curProbe + stride) & arrayMask;
1✔
403
          curTableHash = hashTable[curProbe];
1✔
404
        }
405
        // curTableHash is a duplicate or zero
406
        if (curTableHash == hash) {
1✔
407
          return RejectedDuplicate; // duplicate, just return
1✔
408
        }
409
        assert (curTableHash == 0); // must be zero
1✔
410
        // Now that we know there are no duplicates we can
411
        // go back and insert at first garbage value position
412
        hashTable[rememberPos] = hash;
1✔
413
        thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta
1✔
414
        dirty_ = true; //the decremented theta could have produced a new dirty value
1✔
415
        return InsertedCountNotIncremented;
1✔
416
      }
417

418
      // curTableHash was not a duplicate, not zero, and NOT garbage,
419
      // so we keep searching
420
      assert (curTableHash < thetaLong_);
1✔
421
      curProbe = (curProbe + stride) & arrayMask;
1✔
422
      curTableHash = hashTable[curProbe];
1✔
423

424
      // ensure no infinite loop
425
      if (curProbe == loopIndex) {
1✔
426
        throw new SketchesArgumentException("No empty slot in table!");
1✔
427
      }
428
      // end of Enhanced insert
429
    } // end while and search
430

431
    // curTableHash is a duplicate or zero and NOT garbage
432
    if (curTableHash == hash) {
1✔
433
      return RejectedDuplicate; // duplicate, just return
1✔
434
    }
435
    // must be zero, so insert and increment
436
    assert (curTableHash == 0);
1✔
437
    hashTable[curProbe] = hash;
1✔
438
    thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta
1✔
439
    dirty_ = true; //the decremented theta could have produced a new dirty value
1✔
440
    if (++curCount_ > hashTableThreshold_) {
1✔
441
      rebuildDirty(); //at tgt size and maybe dirty
1✔
442
    }
443
    return InsertedCountIncremented;
1✔
444
  }
445

446
  //At tgt size or greater
447
  //Checks for rare lockup condition
448
  // Used by hashUpdate(), rebuild()
449
  private final void rebuildDirty() {
450
    final int curCountBefore = curCount_;
1✔
451
    forceRebuildDirtyCache(); //changes curCount_ only
1✔
452
    if (curCountBefore == curCount_) {
1✔
453
      //clean but unsuccessful at reducing count, must take drastic measures, very rare.
454
      forceResizeCleanCache(1);
×
455
    }
456
  }
1✔
457

458
  //curCount > hashTableThreshold
459
  //Checks for rare lockup condition
460
  // Used by hashUpdate()
461
  private final void resizeClean() {
462
    //must resize, but are we at tgt size?
463
    final int lgTgtLongs = lgNomLongs_ + 1;
1✔
464
    if (lgTgtLongs > lgArrLongs_) {
1✔
465
      //not yet at tgt size
466
      final ResizeFactor rf = getResizeFactor();
1✔
467
      final int lgDeltaLongs = lgTgtLongs - lgArrLongs_; //must be > 0
1✔
468
      final int lgResizeFactor = max(min(rf.lg(), lgDeltaLongs), 1); //rf_.lg() could be 0
1✔
469
      forceResizeCleanCache(lgResizeFactor);
1✔
470
    }
1✔
471
    else {
472
      //at tgt size or larger, no dirty values, must take drastic measures, very rare.
473
      forceResizeCleanCache(1);
×
474
    }
475
  }
1✔
476

477
  //Force resize. Changes lgArrLongs_ only. Theta doesn't change, count doesn't change.
478
  // Used by rebuildDirty(), resizeClean()
479
  private final void forceResizeCleanCache(final int lgResizeFactor) {
480
    assert (!dirty_); // Should never be dirty before a resize.
1✔
481
    lgArrLongs_ += lgResizeFactor; // new tgt size
1✔
482
    final long[] tgtArr = new long[1 << lgArrLongs_];
1✔
483
    final int newCount = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_);
1✔
484
    assert (curCount_ == newCount);
1✔
485
    curCount_ = newCount;
1✔
486
    cache_ = tgtArr;
1✔
487
    hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
1✔
488
  }
1✔
489

490
  //Cache stays the same size. Must be dirty. Theta doesn't change, count will change.
491
  // Used by rebuildDirtyAtTgtSize()
492
  private final void forceRebuildDirtyCache() {
493
    final long[] tgtArr = new long[1 << lgArrLongs_];
1✔
494
    curCount_ = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_);
1✔
495
    cache_ = tgtArr;
1✔
496
    dirty_ = false;
1✔
497
    //hashTableThreshold stays the same
498
  }
1✔
499

500
  // @formatter:off
501
  /**
502
   * Computes an estimate of the error variance based on Historic Inverse Probability (HIP)
503
   * estimators.  See Cohen: All-Distances Sketches, Revisited: HIP Estimators for Massive Graph
504
   * Analysis, Nov 2014.
505
   * <pre>
506
   * Table of sketch states and how Upper and Lower Bounds are computed
507
   *
508
   * Theta P    Count  Empty  EstMode Est   UB  LB   Comments
509
   * 1.0   1.0  0      T      F       0     0   0    Empty Sketch-mode only sketch
510
   * 1.0   1.0  N      F      F       N     N   N    Degenerate Sketch-mode only sketch
511
   * &lt;1.0  1.0  -      F      T       est   HIP HIP  Normal Sketch-mode only sketch
512
   *  P    &lt;1.0 0      T      F       0     0   0    Virgin sampling sketch
513
   *  P    &lt;1.0 N      F      T       est   HIP HIP  Degenerate sampling sketch
514
   *  &lt;P   &lt;1.0 N      F      T       est   HIP HIP  Sampling sketch also in sketch-mode
515
   * </pre>
516
   * @param k alias for nominal entries.
517
   * @param p <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability, <i>p</i></a>.
518
   * @param alpha the value of alpha for this sketch
519
   * @param theta <a href="{@docRoot}/resources/dictionary.html#theta">See <i>theta</i></a>.
520
   * @param count the current valid count.
521
   * @return the variance.
522
   */
523
  // @formatter:on
524
  private static final double getVariance(final double k, final double p, final double alpha,
525
      final double theta, final int count) {
526
    final double kPlus1 = k + 1.0;
1✔
527
    final double y = 1.0 / p;
1✔
528
    final double ySq = y * y;
1✔
529
    final double ySqMinusY = ySq - y;
1✔
530
    final int r = getR(theta, alpha, p);
1✔
531
    final double result;
532
    if (r == 0) {
1✔
533
      result = count * ySqMinusY;
1✔
534
    }
535
    else if (r == 1) {
1✔
536
      result = kPlus1 * ySqMinusY; //term1
×
537
    }
538
    else { //r > 1
539
      final double b = 1.0 / alpha;
1✔
540
      final double bSq = b * b;
1✔
541
      final double x = p / theta;
1✔
542
      final double xSq = x * x;
1✔
543
      final double term1 = kPlus1 * ySqMinusY;
1✔
544
      final double term2 = y / (1.0 - bSq);
1✔
545
      final double term3 = (((y * bSq) - (y * xSq) - b - bSq) + x + (x * b));
1✔
546
      result = term1 + (term2 * term3);
1✔
547
    }
548
    final double term4 = (1 - theta) / (theta * theta);
1✔
549
    return result + term4;
1✔
550
  }
551

552
  /**
553
   * Computes whether there have been 0, 1, or 2 or more actual insertions into the cache in a
554
   * numerically safe way.
555
   * @param theta <a href="{@docRoot}/resources/dictionary.html#theta">See Theta</a>.
556
   * @param alpha internal computed value alpha.
557
   * @param p <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability, <i>p</i></a>.
558
   * @return R.
559
   */
560
  private static final int getR(final double theta, final double alpha, final double p) {
561
    final double split1 = (p * (alpha + 1.0)) / 2.0;
1✔
562
    if (theta > split1) { return 0; }
1✔
563
    if (theta > (alpha * split1)) { return 1; }
1✔
564
    return 2;
1✔
565
  }
566

567
  /**
568
   * Returns the cardinality limit given the current size of the hash table array.
569
   *
570
   * @param lgNomLongs <a href="{@docRoot}/resources/dictionary.html#lgNomLongs">See lgNomLongs</a>.
571
   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
572
   * @return the hash table threshold
573
   */
574
  private static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
575
    final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
1✔
576
    return (int) Math.floor(fraction * (1 << lgArrLongs));
1✔
577
  }
578

579
  static void checkAlphaFamily(final Memory mem, final int preambleLongs, final int lgNomLongs) {
580
    //Check Family
581
    final int familyID = extractFamilyID(mem);                       //byte 2
1✔
582
    final Family family = Family.idToFamily(familyID);
1✔
583
    if (family.equals(Family.ALPHA)) {
1✔
584
      if (preambleLongs != Family.ALPHA.getMinPreLongs()) {
1✔
585
        throw new SketchesArgumentException(
1✔
586
            "Possible corruption: Invalid PreambleLongs value for ALPHA: " + preambleLongs);
587
      }
588
    }
589
    else {
590
      throw new SketchesArgumentException(
1✔
591
          "Possible corruption: Invalid Family: " + family.toString());
1✔
592
    }
593

594
    //Check lgNomLongs
595
    if (lgNomLongs < ALPHA_MIN_LG_NOM_LONGS) {
1✔
596
      throw new SketchesArgumentException(
1✔
597
        "Possible corruption: This sketch requires a minimum nominal entries of "
598
            + (1 << ALPHA_MIN_LG_NOM_LONGS));
599
    }
600
  }
1✔
601

602
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc