• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.16
/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.tuple;
21

22
import static org.apache.datasketches.common.Util.ceilingPowerOf2;
23
import static org.apache.datasketches.common.Util.checkBounds;
24
import static org.apache.datasketches.common.Util.exactLog2OfLong;
25
import static org.apache.datasketches.thetacommon.HashOperations.count;
26

27
import java.lang.reflect.Array;
28
import java.nio.ByteOrder;
29
import java.util.Objects;
30

31
import org.apache.datasketches.common.ByteArrayUtil;
32
import org.apache.datasketches.common.Family;
33
import org.apache.datasketches.common.ResizeFactor;
34
import org.apache.datasketches.common.SketchesArgumentException;
35
import org.apache.datasketches.memory.Memory;
36
import org.apache.datasketches.thetacommon.HashOperations;
37
import org.apache.datasketches.thetacommon.QuickSelect;
38
import org.apache.datasketches.thetacommon.ThetaUtil;
39

40
/**
41
 * A generic tuple sketch using the QuickSelect algorithm.
42
 *
43
 * @param <S> type of Summary
44
 */
45
class QuickSelectSketch<S extends Summary> extends Sketch<S> {
46
  private static final byte serialVersionUID = 2;
47

48
  private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }
1✔
49

50
  private static final int DEFAULT_LG_RESIZE_FACTOR = ResizeFactor.X8.lg();
1✔
51
  private final int nomEntries_;
52
  private final int lgResizeFactor_;
53
  private final float samplingProbability_;
54
  private int lgCurrentCapacity_;
55
  private int retEntries_;
56
  private int rebuildThreshold_;
57
  private long[] hashTable_;
58
  S[] summaryTable_;
59

60
  /**
61
   * This is to create a new instance of a QuickSelectSketch with default resize factor.
62
   * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
63
   * given value.
64
   * @param summaryFactory An instance of a SummaryFactory.
65
   */
66
  QuickSelectSketch(
67
      final int nomEntries,
68
      final SummaryFactory<S> summaryFactory) {
69
    this(nomEntries, DEFAULT_LG_RESIZE_FACTOR, summaryFactory);
1✔
70
  }
1✔
71

72
  /**
73
   * This is to create a new instance of a QuickSelectSketch with custom resize factor
74
   * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
75
   * given value.
76
   * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3:
77
   * <pre>
78
   * 0 - no resizing (max size allocated),
79
   * 1 - double internal hash table each time it reaches a threshold
80
   * 2 - grow four times
81
   * 3 - grow eight times (default)
82
   * </pre>
83
   * @param summaryFactory An instance of a SummaryFactory.
84
   */
85
  QuickSelectSketch(
86
      final int nomEntries,
87
      final int lgResizeFactor,
88
      final SummaryFactory<S> summaryFactory) {
89
    this(nomEntries, lgResizeFactor, 1f, summaryFactory);
1✔
90
  }
1✔
91

92
  /**
93
   * This is to create a new instance of a QuickSelectSketch with custom resize factor and sampling
94
   * probability
95
   * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
96
   * or equal to the given value.
97
   * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3:
98
   * <pre>
99
   * 0 - no resizing (max size allocated),
100
   * 1 - double internal hash table each time it reaches a threshold
101
   * 2 - grow four times
102
   * 3 - grow eight times (default)
103
   * </pre>
104
   * @param samplingProbability the given sampling probability
105
   * @param summaryFactory An instance of a SummaryFactory.
106
   */
107
  QuickSelectSketch(
108
      final int nomEntries,
109
      final int lgResizeFactor,
110
      final float samplingProbability,
111
      final SummaryFactory<S> summaryFactory) {
112
    this(
1✔
113
      nomEntries,
114
      lgResizeFactor,
115
      samplingProbability,
116
      summaryFactory,
117
      Util.getStartingCapacity(nomEntries, lgResizeFactor)
1✔
118
    );
119
  }
1✔
120

121
  /**
122
   * Target constructor for above constructors for a new instance.
123
   * @param nomEntries Nominal number of entries.
124
   * @param lgResizeFactor log2(resizeFactor)
125
   * @param samplingProbability the given sampling probability
126
   * @param summaryFactory An instance of a SummaryFactory.
127
   * @param startingSize starting size of the sketch.
128
   */
129
  private QuickSelectSketch(
130
      final int nomEntries,
131
      final int lgResizeFactor,
132
      final float samplingProbability,
133
      final SummaryFactory<S> summaryFactory,
134
      final int startingSize) {
135
    super(
1✔
136
        (long) (Long.MAX_VALUE * (double) samplingProbability),
137
        true,
138
        summaryFactory);
139
    nomEntries_ = ceilingPowerOf2(nomEntries);
1✔
140
    lgResizeFactor_ = lgResizeFactor;
1✔
141
    samplingProbability_ = samplingProbability;
1✔
142
    lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingSize);
1✔
143
    retEntries_ = 0;
1✔
144
    hashTable_ = new long[startingSize]; //must be before setRebuildThreshold
1✔
145
    rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_);
1✔
146
    summaryTable_ = null; // wait for the first summary to call Array.newInstance()
1✔
147
  }
1✔
148

149
  /**
150
   * Copy constructor
151
   * @param sketch the QuickSelectSketch to be deep copied.
152
   */
153
  QuickSelectSketch(final QuickSelectSketch<S> sketch) {
154
    super(
1✔
155
        sketch.thetaLong_,
156
        sketch.empty_,
157
        sketch.summaryFactory_);
158
    nomEntries_ = sketch.nomEntries_;
1✔
159
    lgResizeFactor_ = sketch.lgResizeFactor_;
1✔
160
    samplingProbability_ = sketch.samplingProbability_;
1✔
161
    lgCurrentCapacity_ = sketch.lgCurrentCapacity_;
1✔
162
    retEntries_ = sketch.retEntries_;
1✔
163
    hashTable_ = sketch.hashTable_.clone();
1✔
164
    rebuildThreshold_ = sketch.rebuildThreshold_;
1✔
165
    summaryTable_ = Util.copySummaryArray(sketch.summaryTable_);
1✔
166
  }
1✔
167

168
  /**
169
   * This is to create an instance of a QuickSelectSketch given a serialized form
170
   * @param mem Memory object with serialized QuickSelectSketch
171
   * @param deserializer the SummaryDeserializer
172
   * @param summaryFactory the SummaryFactory
173
   * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
174
   * This capability will be removed in a future release.
175
   * Heapifying a CompactSketch is not deprecated.
176
   */
177
  @Deprecated
178
  QuickSelectSketch(
179
      final Memory mem,
180
      final SummaryDeserializer<S> deserializer,
181
      final SummaryFactory<S> summaryFactory) {
182
    this(new Validate<>(), mem, deserializer, summaryFactory);
1✔
183
  }
1✔
184

185
  /*
186
   * This private constructor is used to protect against "Finalizer attacks".
187
   * The private static inner class Validate performs validation and deserialization
188
   * from the input Memory and may throw exceptions. In order to protect against the attack, we must
189
   * perform this validation prior to the constructor's super reaches the Object class.
190
   * Making QuickSelectSketch final won't work here because UpdatableSketch is a subclass.
191
   * Using an empty final finalizer() is not recommended and is deprecated as of Java9.
192
   */
193
  private QuickSelectSketch(
194
      final Validate<S> val,
195
      final Memory mem,
196
      final SummaryDeserializer<S> deserializer,
197
      final SummaryFactory<S> summaryFactory) {
198
    super(val.validate(mem, deserializer), val.myEmpty, summaryFactory);
1✔
199
    nomEntries_ = val.myNomEntries;
1✔
200
    lgResizeFactor_ = val.myLgResizeFactor;
1✔
201
    samplingProbability_ = val.mySamplingProbability;
1✔
202
    lgCurrentCapacity_ = val.myLgCurrentCapacity;
1✔
203
    retEntries_ = val.myRetEntries;
1✔
204
    rebuildThreshold_ = val.myRebuildThreshold;
1✔
205
    hashTable_ = val.myHashTable;
1✔
206
    summaryTable_ = val.mySummaryTable;
1✔
207
  }
1✔
208

209
  private static final class Validate<S> {
210
    //super fields
211
    long myThetaLong;
212
    boolean myEmpty;
213
    //this fields
214
    int myNomEntries;
215
    int myLgResizeFactor;
216
    float mySamplingProbability;
217
    int myLgCurrentCapacity;
218
    int myRetEntries;
219
    int myRebuildThreshold;
220
    long[] myHashTable;
221
    S[] mySummaryTable;
222

223
    @SuppressWarnings("unchecked")
224
    long validate(
225
        final Memory mem,
226
        final SummaryDeserializer<?> deserializer) {
227
      Objects.requireNonNull(mem, "SourceMemory must not be null.");
1✔
228
      Objects.requireNonNull(deserializer, "Deserializer must not be null.");
1✔
229
      checkBounds(0, 8, mem.getCapacity());
1✔
230

231
      int offset = 0;
1✔
232
      final byte preambleLongs = mem.getByte(offset++); //byte 0 PreLongs
1✔
233
      final byte version = mem.getByte(offset++);       //byte 1 SerVer
1✔
234
      final byte familyId = mem.getByte(offset++);      //byte 2 FamID
1✔
235
      SerializerDeserializer.validateFamily(familyId, preambleLongs);
1✔
236
      if (version > serialVersionUID) {
1✔
237
        throw new SketchesArgumentException(
1✔
238
            "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: "
239
                + version);
240
      }
241
      SerializerDeserializer.validateType(mem.getByte(offset++), //byte 3
1✔
242
          SerializerDeserializer.SketchType.QuickSelectSketch);
243
      final byte flags = mem.getByte(offset++); //byte 4
1✔
244
      final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0;
1✔
245
      if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
1✔
246
        throw new SketchesArgumentException("Endian byte order mismatch");
×
247
      }
248
      myNomEntries = 1 << mem.getByte(offset++); //byte 5
1✔
249
      myLgCurrentCapacity = mem.getByte(offset++); //byte 6
1✔
250
      myLgResizeFactor = mem.getByte(offset++); //byte 7
1✔
251

252
      checkBounds(0, preambleLongs * 8L, mem.getCapacity());
1✔
253
      final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0;
1✔
254
      mySamplingProbability = isInSamplingMode ? mem.getFloat(offset) : 1f; //bytes 8 - 11
1✔
255
      if (isInSamplingMode) {
1✔
256
        offset += Float.BYTES;
1✔
257
      }
258

259
      final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0;
1✔
260
      if (isThetaIncluded) {
1✔
261
        myThetaLong = mem.getLong(offset);
1✔
262
        offset += Long.BYTES;
1✔
263
      } else {
264
        myThetaLong = (long) (Long.MAX_VALUE * (double) mySamplingProbability);
1✔
265
      }
266

267
      int count = 0;
1✔
268
      final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0;
1✔
269
      if (hasEntries) {
1✔
270
        count = mem.getInt(offset);
1✔
271
        offset += Integer.BYTES;
1✔
272
      }
273
      final int currentCapacity = 1 << myLgCurrentCapacity;
1✔
274
      myHashTable = new long[currentCapacity];
1✔
275
      for (int i = 0; i < count; i++) {
1✔
276
        final long hash = mem.getLong(offset);
1✔
277
        offset += Long.BYTES;
1✔
278
        final Memory memRegion = mem.region(offset, mem.getCapacity() - offset);
1✔
279
        final DeserializeResult<?> summaryResult = deserializer.heapifySummary(memRegion);
1✔
280
        final S summary = (S) summaryResult.getObject();
1✔
281
        offset += summaryResult.getSize();
1✔
282
        //in-place equivalent to insert(hash, summary):
283
        final int index = HashOperations.hashInsertOnly(myHashTable, myLgCurrentCapacity, hash);
1✔
284
        if (mySummaryTable == null) {
1✔
285
          mySummaryTable = (S[]) Array.newInstance(summary.getClass(), myHashTable.length);
1✔
286
        }
287
        mySummaryTable[index] = summary;
1✔
288
        myRetEntries++;
1✔
289
        myEmpty = false;
1✔
290
      }
291
      myEmpty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
1✔
292
      myRebuildThreshold = setRebuildThreshold(myHashTable, myNomEntries);
1✔
293
      return myThetaLong;
1✔
294
    }
295

296
  } //end class Validate
297

298
  /**
299
   * @return a deep copy of this sketch
300
   */
301
  QuickSelectSketch<S> copy() {
302
    return new QuickSelectSketch<>(this);
×
303
  }
304

305
  long[] getHashTable() {
306
    return hashTable_;
1✔
307
  }
308

309
  @Override
310
  public int getRetainedEntries() {
311
    return retEntries_;
1✔
312
  }
313

314
  @Override
315
  public int getCountLessThanThetaLong(final long thetaLong) {
316
    return count(hashTable_, thetaLong);
1✔
317
  }
318

319
  S[] getSummaryTable() {
320
    return summaryTable_;
1✔
321
  }
322

323
  /**
324
   * Get configured nominal number of entries
325
   * @return nominal number of entries
326
   */
327
  public int getNominalEntries() {
328
    return nomEntries_;
1✔
329
  }
330

331
  /**
332
   * Get log_base2 of Nominal Entries
333
   * @return log_base2 of Nominal Entries
334
   */
335
  public int getLgK() {
336
    return exactLog2OfLong(nomEntries_);
1✔
337
  }
338

339
  /**
340
   * Get configured sampling probability
341
   * @return sampling probability
342
   */
343
  public float getSamplingProbability() {
344
    return samplingProbability_;
1✔
345
  }
346

347
  /**
348
   * Get current capacity
349
   * @return current capacity
350
   */
351
  public int getCurrentCapacity() {
352
    return 1 << lgCurrentCapacity_;
1✔
353
  }
354

355
  /**
356
   * Get configured resize factor
357
   * @return resize factor
358
   */
359
  public ResizeFactor getResizeFactor() {
360
    return ResizeFactor.getRF(lgResizeFactor_);
1✔
361
  }
362

363
  /**
364
   * Rebuilds reducing the actual number of entries to the nominal number of entries if needed
365
   */
366
  public void trim() {
367
    if (retEntries_ > nomEntries_) {
1✔
368
      updateTheta();
1✔
369
      resize(hashTable_.length);
1✔
370
    }
371
  }
1✔
372

373
  /**
374
   * Resets this sketch an empty state.
375
   */
376
  public void reset() {
377
    empty_ = true;
1✔
378
    retEntries_ = 0;
1✔
379
    thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_);
1✔
380
    final int startingCapacity = Util.getStartingCapacity(nomEntries_, lgResizeFactor_);
1✔
381
    lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
1✔
382
    hashTable_ = new long[startingCapacity];
1✔
383
    summaryTable_ = null; // wait for the first summary to call Array.newInstance()
1✔
384
    rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_);
1✔
385
  }
1✔
386

387
  /**
388
   * Converts the current state of the sketch into a compact sketch
389
   * @return compact sketch
390
   */
391
  @Override
392
  @SuppressWarnings("unchecked")
393
  public CompactSketch<S> compact() {
394
    if (getRetainedEntries() == 0) {
1✔
395
      if (empty_) { return new CompactSketch<>(null, null, Long.MAX_VALUE, true); }
1✔
396
      return new CompactSketch<>(null, null, thetaLong_, false);
1✔
397
    }
398
    final long[] hashArr = new long[getRetainedEntries()];
1✔
399
    final S[] summaryArr = Util.newSummaryArray(summaryTable_, getRetainedEntries());
1✔
400
    int i = 0;
1✔
401
    for (int j = 0; j < hashTable_.length; j++) {
1✔
402
      if (summaryTable_[j] != null) {
1✔
403
        hashArr[i] = hashTable_[j];
1✔
404
        summaryArr[i] = (S)summaryTable_[j].copy();
1✔
405
        i++;
1✔
406
      }
407
    }
408
    return new CompactSketch<>(hashArr, summaryArr, thetaLong_, empty_);
1✔
409
  }
410

411
  // Layout of first 8 bytes:
412
  // Long || Start Byte Adr:
413
  // Adr:
414
  //      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
415
  //  0   ||   RF   |  lgArr | lgNom  |  Flags | SkType | FamID  | SerVer |  Preamble_Longs    |
416
  /**
417
   * This serializes an UpdatableSketch (QuickSelectSketch).
418
   * @return serialized representation of an UpdatableSketch (QuickSelectSketch).
419
   * @deprecated As of 3.0.0, serializing an UpdatableSketch is deprecated.
420
   * This capability will be removed in a future release.
421
   * Serializing a CompactSketch is not deprecated.
422
   */
423
  @Deprecated
424
  @Override
425
  public byte[] toByteArray() {
426
    byte[][] summariesBytes = null;
1✔
427
    int summariesBytesLength = 0;
1✔
428
    if (retEntries_ > 0) {
1✔
429
      summariesBytes = new byte[retEntries_][];
1✔
430
      int i = 0;
1✔
431
      for (int j = 0; j < summaryTable_.length; j++) {
1✔
432
        if (summaryTable_[j] != null) {
1✔
433
          summariesBytes[i] = summaryTable_[j].toByteArray();
1✔
434
          summariesBytesLength += summariesBytes[i].length;
1✔
435
          i++;
1✔
436
        }
437
      }
438
    }
439
    int sizeBytes =
1✔
440
        Byte.BYTES // preamble longs
441
      + Byte.BYTES // serial version
442
      + Byte.BYTES // family
443
      + Byte.BYTES // sketch type
444
      + Byte.BYTES // flags
445
      + Byte.BYTES // log2(nomEntries)
446
      + Byte.BYTES // log2(currentCapacity)
447
      + Byte.BYTES; // log2(resizeFactor)
448
    if (isInSamplingMode()) {
1✔
449
      sizeBytes += Float.BYTES; // samplingProbability
1✔
450
    }
451
    final boolean isThetaIncluded = isInSamplingMode()
1✔
452
        ? thetaLong_ < samplingProbability_ : thetaLong_ < Long.MAX_VALUE;
453
    if (isThetaIncluded) {
1✔
454
      sizeBytes += Long.BYTES;
1✔
455
    }
456
    if (retEntries_ > 0) {
1✔
457
      sizeBytes += Integer.BYTES; // count
1✔
458
    }
459
    sizeBytes += Long.BYTES * retEntries_ + summariesBytesLength;
1✔
460
    final byte[] bytes = new byte[sizeBytes];
1✔
461
    int offset = 0;
1✔
462
    bytes[offset++] = PREAMBLE_LONGS;
1✔
463
    bytes[offset++] = serialVersionUID;
1✔
464
    bytes[offset++] = (byte) Family.TUPLE.getID();
1✔
465
    bytes[offset++] = (byte) SerializerDeserializer.SketchType.QuickSelectSketch.ordinal();
1✔
466
    final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
1✔
467
    bytes[offset++] = (byte) (
1✔
468
      (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0)
1✔
469
      | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0)
1✔
470
      | (empty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0)
1✔
471
      | (retEntries_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0)
1✔
472
      | (isThetaIncluded ? 1 << Flags.IS_THETA_INCLUDED.ordinal() : 0)
1✔
473
    );
474
    bytes[offset++] = (byte) Integer.numberOfTrailingZeros(nomEntries_);
1✔
475
    bytes[offset++] = (byte) lgCurrentCapacity_;
1✔
476
    bytes[offset++] = (byte) lgResizeFactor_;
1✔
477
    if (samplingProbability_ < 1f) {
1✔
478
      ByteArrayUtil.putFloatLE(bytes, offset, samplingProbability_);
1✔
479
      offset += Float.BYTES;
1✔
480
    }
481
    if (isThetaIncluded) {
1✔
482
      ByteArrayUtil.putLongLE(bytes, offset, thetaLong_);
1✔
483
      offset += Long.BYTES;
1✔
484
    }
485
    if (retEntries_ > 0) {
1✔
486
      ByteArrayUtil.putIntLE(bytes, offset, retEntries_);
1✔
487
      offset += Integer.BYTES;
1✔
488
    }
489
    if (retEntries_ > 0) {
1✔
490
      int i = 0;
1✔
491
      for (int j = 0; j < hashTable_.length; j++) {
1✔
492
        if (summaryTable_[j] != null) {
1✔
493
          ByteArrayUtil.putLongLE(bytes, offset, hashTable_[j]);
1✔
494
          offset += Long.BYTES;
1✔
495
          System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length);
1✔
496
          offset += summariesBytes[i].length;
1✔
497
          i++;
1✔
498
        }
499
      }
500
    }
501
    return bytes;
1✔
502
  }
503

504
  // non-public methods below
505

506
  // this is a special back door insert for merging
507
  // not sufficient by itself without keeping track of theta of another sketch
508
  @SuppressWarnings("unchecked")
509
  void merge(final long hash, final S summary, final SummarySetOperations<S> summarySetOps) {
510
    empty_ = false;
1✔
511
    if (hash > 0 && hash < thetaLong_) {
1✔
512
      final int index = findOrInsert(hash);
1✔
513
      if (index < 0) {
1✔
514
        insertSummary(~index, (S)summary.copy()); //did not find, so insert
1✔
515
      } else {
516
        insertSummary(index, summarySetOps.union(summaryTable_[index], (S) summary.copy()));
1✔
517
      }
518
      rebuildIfNeeded();
1✔
519
    }
520
  }
1✔
521

522
  boolean isInSamplingMode() {
523
    return samplingProbability_ < 1f;
1✔
524
  }
525

526
  void setThetaLong(final long theta) {
527
    thetaLong_ = theta;
1✔
528
  }
1✔
529

530
  void setEmpty(final boolean value) {
531
    empty_ = value;
1✔
532
  }
1✔
533

534
  int findOrInsert(final long hash) {
535
    final int index = HashOperations.hashSearchOrInsert(hashTable_, lgCurrentCapacity_, hash);
1✔
536
    if (index < 0) {
1✔
537
      retEntries_++;
1✔
538
    }
539
    return index;
1✔
540
  }
541

542
  boolean rebuildIfNeeded() {
543
    if (retEntries_ <= rebuildThreshold_) {
1✔
544
      return false;
1✔
545
    }
546
    if (hashTable_.length > nomEntries_) {
1✔
547
      updateTheta();
1✔
548
      rebuild();
1✔
549
    } else {
550
      resize(hashTable_.length * (1 << lgResizeFactor_));
1✔
551
    }
552
    return true;
1✔
553
  }
554

555
  void rebuild() {
556
    resize(hashTable_.length);
1✔
557
  }
1✔
558

559
  void insert(final long hash, final S summary) {
560
    final int index = HashOperations.hashInsertOnly(hashTable_, lgCurrentCapacity_, hash);
1✔
561
    insertSummary(index, summary);
1✔
562
    retEntries_++;
1✔
563
    empty_ = false;
1✔
564
  }
1✔
565

566
  private void updateTheta() {
567
    final long[] hashArr = new long[retEntries_];
1✔
568
    int i = 0;
1✔
569
    //Because of the association of the hashTable with the summaryTable we cannot destroy the
570
    // hashTable structure. So we must copy. May as well compact at the same time.
571
    // Might consider a whole table clone and use the selectExcludingZeros method instead.
572
    // Not sure if there would be any speed advantage.
573
    for (int j = 0; j < hashTable_.length; j++) {
1✔
574
      if (summaryTable_[j] != null) {
1✔
575
        hashArr[i++] = hashTable_[j];
1✔
576
      }
577
    }
578
    thetaLong_ = QuickSelect.select(hashArr, 0, retEntries_ - 1, nomEntries_);
1✔
579
  }
1✔
580

581
  private void resize(final int newSize) {
582
    final long[] oldHashTable = hashTable_;
1✔
583
    final S[] oldSummaryTable = summaryTable_;
1✔
584
    hashTable_ = new long[newSize];
1✔
585
    summaryTable_ = Util.newSummaryArray(summaryTable_, newSize);
1✔
586
    lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newSize);
1✔
587
    retEntries_ = 0;
1✔
588
    for (int i = 0; i < oldHashTable.length; i++) {
1✔
589
      if (oldSummaryTable[i] != null && oldHashTable[i] < thetaLong_) {
1✔
590
        insert(oldHashTable[i], oldSummaryTable[i]);
1✔
591
      }
592
    }
593
    rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_);
1✔
594
  }
1✔
595

596
  private static int setRebuildThreshold(final long[] hashTable, final int nomEntries) {
597
    if (hashTable.length > nomEntries) {
1✔
598
      return (int) (hashTable.length * ThetaUtil.REBUILD_THRESHOLD);
1✔
599
    } else {
600
      return (int) (hashTable.length * ThetaUtil.RESIZE_THRESHOLD);
1✔
601
    }
602
  }
603

604
  @SuppressWarnings("unchecked")
605
  protected void insertSummary(final int index, final S summary) {
606
    if (summaryTable_ == null) {
1✔
607
      summaryTable_ = (S[]) Array.newInstance(summary.getClass(), hashTable_.length);
1✔
608
    }
609
    summaryTable_[index] = summary;
1✔
610
  }
1✔
611

612
  @Override
613
  public TupleSketchIterator<S> iterator() {
614
    return new TupleSketchIterator<>(hashTable_, summaryTable_);
1✔
615
  }
616

617
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc