• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.1
/src/main/java/org/apache/datasketches/theta/Sketch.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.theta;
21

22
import static org.apache.datasketches.common.Family.idToFamily;
23
import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
24
import static org.apache.datasketches.common.Util.LS;
25
import static org.apache.datasketches.common.Util.ceilingPowerOf2;
26
import static org.apache.datasketches.common.Util.zeroPad;
27
import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
28
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
29
import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK;
30
import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE;
31
import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE;
32
import static org.apache.datasketches.thetacommon.HashOperations.count;
33

34
import org.apache.datasketches.common.Family;
35
import org.apache.datasketches.common.SketchesArgumentException;
36
import org.apache.datasketches.memory.Memory;
37
import org.apache.datasketches.memory.WritableMemory;
38
import org.apache.datasketches.thetacommon.BinomialBoundsN;
39
import org.apache.datasketches.thetacommon.ThetaUtil;
40

41
/**
42
 * The top-level class for all theta sketches. This class is never constructed directly.
43
 * Use the UpdateSketch.builder() methods to create UpdateSketches.
44
 *
45
 * @author Lee Rhodes
46
 */
47
public abstract class Sketch {
48
  static final int DEFAULT_LG_RESIZE_FACTOR = 3;   //Unique to Heap
49

50
  Sketch() {}
1✔
51

52
  //public static factory constructor-type methods
53

54
  /**
55
   * Heapify takes the sketch image in Memory and instantiates an on-heap Sketch.
56
   *
57
   * <p>The resulting sketch will not retain any link to the source Memory.</p>
58
   *
59
   * <p>For Update Sketches this method checks if the
60
   * <a href="{@docRoot}/resources/dictionary.html#defaultUpdateSeed">Default Update Seed</a></p>
61
   * was used to create the source Memory image.
62
   *
63
   * <p>For Compact Sketches this method assumes that the sketch image was created with the
64
   * correct hash seed, so it is not checked.</p>
65
   *
66
   * @param srcMem an image of a Sketch.
67
   * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>.
68
   * @return a Sketch on the heap.
69
   */
70
  public static Sketch heapify(final Memory srcMem) {
71
    final byte familyID = srcMem.getByte(FAMILY_BYTE);
1✔
72
    final Family family = idToFamily(familyID);
1✔
73
    if (family == Family.COMPACT) {
1✔
74
      return CompactSketch.heapify(srcMem);
1✔
75
    }
76
    return heapifyUpdateFromMemory(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED);
1✔
77
  }
78

79
  /**
80
   * Heapify takes the sketch image in Memory and instantiates an on-heap Sketch.
81
   *
82
   * <p>The resulting sketch will not retain any link to the source Memory.</p>
83
   *
84
   * <p>For Update and Compact Sketches this method checks if the given expectedSeed was used to
85
   * create the source Memory image.  However, SerialVersion 1 sketches cannot be checked.</p>
86
   *
87
   * @param srcMem an image of a Sketch that was created using the given expectedSeed.
88
   * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>.
89
   * @param expectedSeed the seed used to validate the given Memory image.
90
   *  <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
91
   * Compact sketches store a 16-bit hash of the seed, but not the seed itself.
92
   * @return a Sketch on the heap.
93
   */
94
  public static Sketch heapify(final Memory srcMem, final long expectedSeed) {
95
    final byte familyID = srcMem.getByte(FAMILY_BYTE);
1✔
96
    final Family family = idToFamily(familyID);
1✔
97
    if (family == Family.COMPACT) {
1✔
98
      return CompactSketch.heapify(srcMem, expectedSeed);
1✔
99
    }
100
    return heapifyUpdateFromMemory(srcMem, expectedSeed);
1✔
101
  }
102

103
  /**
104
   * Wrap takes the sketch image in the given Memory and refers to it directly.
105
   * There is no data copying onto the java heap.
106
   * The wrap operation enables fast read-only merging and access to all the public read-only API.
107
   *
108
   * <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
109
   * been explicitly stored as direct sketches can be wrapped.
110
   * Wrapping earlier serial version sketches will result in a on-heap CompactSketch
111
   * where all data will be copied to the heap. These early versions were never designed to
112
   * "wrap".</p>
113
   *
114
   * <p>Wrapping any subclass of this class that is empty or contains only a single item will
115
   * result in on-heap equivalent forms of empty and single item sketch respectively.
116
   * This is actually faster and consumes less overall memory.</p>
117
   *
118
   * <p>For Update Sketches this method checks if the
119
   * <a href="{@docRoot}/resources/dictionary.html#defaultUpdateSeed">Default Update Seed</a></p>
120
   * was used to create the source Memory image.
121
   *
122
   * <p>For Compact Sketches this method assumes that the sketch image was created with the
123
   * correct hash seed, so it is not checked.</p>
124
   *
125
   * @param srcMem an image of a Sketch.
126
   * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>.
127
   * @return a Sketch backed by the given Memory
128
   */
129
  public static Sketch wrap(final Memory srcMem) {
130
    final int  preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
1✔
131
    final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
1✔
132
    final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
1✔
133
    final Family family = Family.idToFamily(familyID);
1✔
134
    if (family == Family.QUICKSELECT) {
1✔
135
      if (serVer == 3 && preLongs == 3) {
1✔
136
        return DirectQuickSelectSketchR.readOnlyWrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED);
1✔
137
      } else {
138
        throw new SketchesArgumentException(
1✔
139
            "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3");
140
      }
141
    }
142
    if (family == Family.COMPACT) {
1✔
143
      return CompactSketch.wrap(srcMem);
1✔
144
    }
145
    throw new SketchesArgumentException(
1✔
146
        "Cannot wrap family: " + family + " as a Sketch");
147
  }
148

149
  /**
150
   * Wrap takes the sketch image in the given Memory and refers to it directly.
151
   * There is no data copying onto the java heap.
152
   * The wrap operation enables fast read-only merging and access to all the public read-only API.
153
   *
154
   * <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
155
   * been explicitly stored as direct sketches can be wrapped.
156
   * Wrapping earlier serial version sketches will result in a on-heap CompactSketch
157
   * where all data will be copied to the heap. These early versions were never designed to
158
   * "wrap".</p>
159
   *
160
   * <p>Wrapping any subclass of this class that is empty or contains only a single item will
161
   * result in on-heap equivalent forms of empty and single item sketch respectively.
162
   * This is actually faster and consumes less overall memory.</p>
163
   *
164
   * <p>For Update and Compact Sketches this method checks if the given expectedSeed was used to
165
   * create the source Memory image.  However, SerialVersion 1 sketches cannot be checked.</p>
166
   *
167
   * @param srcMem an image of a Sketch.
168
   * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
169
   * @param expectedSeed the seed used to validate the given Memory image.
170
   * <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
171
   * @return a UpdateSketch backed by the given Memory except as above.
172
   */
173
  public static Sketch wrap(final Memory srcMem, final long expectedSeed) {
174
    final int  preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
1✔
175
    final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
1✔
176
    final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
1✔
177
    final Family family = Family.idToFamily(familyID);
1✔
178
    if (family == Family.QUICKSELECT) {
1✔
179
      if (serVer == 3 && preLongs == 3) {
×
180
        return DirectQuickSelectSketchR.readOnlyWrap(srcMem, expectedSeed);
×
181
      } else {
182
        throw new SketchesArgumentException(
×
183
            "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3");
184
      }
185
    }
186
    if (family == Family.COMPACT) {
1✔
187
      return CompactSketch.wrap(srcMem, expectedSeed);
1✔
188
    }
189
    throw new SketchesArgumentException(
×
190
        "Cannot wrap family: " + family + " as a Sketch");
191
  }
192

193
  //Sketch interface
194

195
  /**
196
   * Converts this sketch to a ordered CompactSketch.
197
   *
198
   * <p>If <i>this.isCompact() == true</i> this method returns <i>this</i>,
199
   * otherwise, this method is equivalent to
200
   * {@link #compact(boolean, WritableMemory) compact(true, null)}.
201
   *
202
   * <p>A CompactSketch is always immutable.</p>
203
   *
204
   * @return this sketch as an ordered CompactSketch.
205
   */
206
  public CompactSketch compact() {
207
    return (this.isCompact()) ? (CompactSketch)this : compact(true, null);
1✔
208
  }
209

210
  /**
211
   * Convert this sketch to a <i>CompactSketch</i>.
212
   *
213
   * <p>If this sketch is a type of <i>UpdateSketch</i>, the compacting process converts the hash table
214
   * of the <i>UpdateSketch</i> to a simple list of the valid hash values.
215
   * Any hash values of zero or equal-to or greater than theta will be discarded.
216
   * The number of valid values remaining in the <i>CompactSketch</i> depends on a number of factors,
217
   * but may be larger or smaller than <i>Nominal Entries</i> (or <i>k</i>).
218
   * It will never exceed 2<i>k</i>.
219
   * If it is critical to always limit the size to no more than <i>k</i>,
220
   * then <i>rebuild()</i> should be called on the <i>UpdateSketch</i> prior to calling this method.</p>
221
   *
222
   * <p>A <i>CompactSketch</i> is always immutable.</p>
223
   *
224
   * <p>A new <i>CompactSketch</i> object is created:</p>
225
   * <ul><li>if <i>dstMem != null</i></li>
226
   * <li>if <i>dstMem == null</i> and <i>this.hasMemory() == true</i></li>
227
   * <li>if <i>dstMem == null</i> and <i>this</i> has more than 1 item and <i>this.isOrdered() == false</i>
228
   * and <i>dstOrdered == true</i>.</li>
229
   *</ul>
230
   *
231
   * <p>Otherwise, this operation returns <i>this</i>.</p>
232
   *
233
   * @param dstOrdered assumed true if this sketch is empty or has only one value
234
   * <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>
235
   *
236
   * @param dstMem
237
   * <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
238
   *
239
   * @return this sketch as a <i>CompactSketch</i>.
240
   */
241
  public abstract CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem);
242

243
  /**
244
   * Returns the number of storage bytes required for this Sketch if its current state were
245
   * compacted. It this sketch is already in the compact form this is equivalent to
246
   * calling {@link #getCurrentBytes()}.
247
   * @return number of compact bytes
248
   */
249
  public abstract int getCompactBytes();
250

251
  /**
252
   * Gets the number of hash values less than the given theta expressed as a long.
253
   * @param thetaLong the given theta as a long between zero and <i>Long.MAX_VALUE</i>.
254
   * @return the number of hash values less than the given thetaLong.
255
   */
256
  public int getCountLessThanThetaLong(final long thetaLong) {
257
    return count(getCache(), thetaLong);
1✔
258
  }
259

260
  /**
261
   * Returns the number of storage bytes required for this sketch in its current state.
262
   *
263
   * @return the number of storage bytes required for this sketch
264
   */
265
  public abstract int getCurrentBytes();
266

267
  /**
268
   * Gets the unique count estimate.
269
   * @return the sketch's best estimate of the cardinality of the input stream.
270
   */
271
  public abstract double getEstimate();
272

273
  /**
274
   * Returns the Family that this sketch belongs to
275
   * @return the Family that this sketch belongs to
276
   */
277
  public abstract Family getFamily();
278

279
  /**
280
   * Gets the approximate lower error bound given the specified number of Standard Deviations.
281
   * This will return getEstimate() if isEmpty() is true.
282
   *
283
   * @param numStdDev
284
   * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a>
285
   * @return the lower bound.
286
   */
287
  public double getLowerBound(final int numStdDev) {
288
    return isEstimationMode()
1✔
289
        ? lowerBound(getRetainedEntries(true), getThetaLong(), numStdDev, isEmpty())
1✔
290
        : getRetainedEntries(true);
1✔
291
  }
292

293
  /**
294
   * Returns the maximum number of storage bytes required for a CompactSketch with the given
295
   * number of actual entries. Note that this assumes the worse case of the sketch in
296
   * estimation mode, which requires storing theta and count.
297
   * @param numberOfEntries the actual number of entries stored with the CompactSketch.
298
   * @return the maximum number of storage bytes required for a CompactSketch with the given number
299
   * of entries.
300
   */
301
  public static int getMaxCompactSketchBytes(final int numberOfEntries) {
302
    if (numberOfEntries == 0) { return 8; }
1✔
303
    if (numberOfEntries == 1) { return 16; }
1✔
304
    return (numberOfEntries << 3) + 24;
1✔
305
  }
306

307
  /**
308
   * Returns the maximum number of storage bytes required for an UpdateSketch with the given
309
   * number of nominal entries (power of 2).
310
   * @param nomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entries</a>
311
   * This will become the ceiling power of 2 if it is not.
312
   * @return the maximum number of storage bytes required for a UpdateSketch with the given
313
   * nomEntries
314
   */
315
  public static int getMaxUpdateSketchBytes(final int nomEntries) {
316
    final int nomEnt = ceilingPowerOf2(nomEntries);
1✔
317
    return (nomEnt << 4) + (Family.QUICKSELECT.getMaxPreLongs() << 3);
1✔
318
  }
319

320
  /**
321
   * Returns the number of valid entries that have been retained by the sketch.
322
   * @return the number of valid retained entries
323
   */
324
  public int getRetainedEntries() {
325
    return getRetainedEntries(true);
1✔
326
  }
327

328
  /**
329
   * Returns the number of entries that have been retained by the sketch.
330
   * @param valid if true, returns the number of valid entries, which are less than theta and used
331
   * for estimation.
332
   * Otherwise, return the number of all entries, valid or not, that are currently in the internal
333
   * sketch cache.
334
   * @return the number of retained entries
335
   */
336
  public abstract int getRetainedEntries(boolean valid);
337

338
  /**
339
   * Returns the serialization version from the given Memory
340
   * @param mem the sketch Memory
341
   * @return the serialization version from the Memory
342
   */
343
  public static int getSerializationVersion(final Memory mem) {
344
    return mem.getByte(SER_VER_BYTE);
1✔
345
  }
346

347
  /**
348
   * Gets the value of theta as a double with a value between zero and one
349
   * @return the value of theta as a double
350
   */
351
  public double getTheta() {
352
    return getThetaLong() / LONG_MAX_VALUE_AS_DOUBLE;
1✔
353
  }
354

355
  /**
356
   * Gets the value of theta as a long
357
   * @return the value of theta as a long
358
   */
359
  public abstract long getThetaLong();
360

361
  /**
362
   * Gets the approximate upper error bound given the specified number of Standard Deviations.
363
   * This will return getEstimate() if isEmpty() is true.
364
   *
365
   * @param numStdDev
366
   * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a>
367
   * @return the upper bound.
368
   */
369
  public double getUpperBound(final int numStdDev) {
370
    return isEstimationMode()
1✔
371
        ? upperBound(getRetainedEntries(true), getThetaLong(), numStdDev, isEmpty())
1✔
372
        : getRetainedEntries(true);
1✔
373
  }
374

375
  /**
376
   * Returns true if this sketch's data structure is backed by Memory or WritableMemory.
377
   * @return true if this sketch's data structure is backed by Memory or WritableMemory.
378
   */
379
  public abstract boolean hasMemory();
380

381
  /**
382
   * Returns true if this sketch is in compact form.
383
   * @return true if this sketch is in compact form.
384
   */
385
  public abstract boolean isCompact();
386

387
  /**
388
   * Returns true if the this sketch's internal data structure is backed by direct (off-heap)
389
   * Memory.
390
   * @return true if the this sketch's internal data structure is backed by direct (off-heap)
391
   * Memory.
392
   */
393
  public abstract boolean isDirect();
394

395
  /**
396
   * <a href="{@docRoot}/resources/dictionary.html#empty">See Empty</a>
397
   * @return true if empty.
398
   */
399
  public abstract boolean isEmpty();
400

401
  /**
402
   * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode).
403
   * This is true if theta &lt; 1.0 AND isEmpty() is false.
404
   * @return true if the sketch is in estimation mode.
405
   */
406
  public boolean isEstimationMode() {
407
    return estMode(getThetaLong(), isEmpty());
1✔
408
  }
409

410
  /**
411
   * Returns true if internal cache is ordered
412
   * @return true if internal cache is ordered
413
   */
414
  public abstract boolean isOrdered();
415

416
  /**
417
   * Returns true if the backing resource of <i>this</i> is identical with the backing resource
418
   * of <i>that</i>. The capacities must be the same.  If <i>this</i> is a region,
419
   * the region offset must also be the same.
420
   * @param that A different non-null object
421
   * @return true if the backing resource of <i>this</i> is the same as the backing resource
422
   * of <i>that</i>.
423
   */
424
  public boolean isSameResource(final Memory that) {
425
    return false;
1✔
426
  }
427

428
  /**
429
   * Returns a HashIterator that can be used to iterate over the retained hash values of the
430
   * Theta sketch.
431
   * @return a HashIterator that can be used to iterate over the retained hash values of the
432
   * Theta sketch.
433
   */
434
  public abstract HashIterator iterator();
435

436
  /**
437
   * Serialize this sketch to a byte array form.
438
   * @return byte array of this sketch
439
   */
440
  public abstract byte[] toByteArray();
441

442
  /**
443
   * Returns a human readable summary of the sketch.  This method is equivalent to the parameterized
444
   * call:<br>
445
   * <i>Sketch.toString(sketch, true, false, 8, true);</i>
446
   * @return summary
447
   */
448
  @Override
449
  public String toString() {
450
    return toString(true, false, 8, true);
1✔
451
  }
452

453
  /**
454
   * Gets a human readable listing of contents and summary of the given sketch.
455
   * This can be a very long string.  If this sketch is in a "dirty" state there
456
   * may be values in the dataDetail view that are &ge; theta.
457
   *
458
   * @param sketchSummary If true the sketch summary will be output at the end.
459
   * @param dataDetail If true, includes all valid hash values in the sketch.
460
   * @param width The number of columns of hash values. Default is 8.
461
   * @param hexMode If true, hashes will be output in hex.
462
   * @return The result string, which can be very long.
463
   */
464
  public String toString(final boolean sketchSummary, final boolean dataDetail, final int width,
465
      final boolean hexMode) {
466
    final StringBuilder sb = new StringBuilder();
1✔
467

468
    final long[] cache = getCache();
1✔
469
    int nomLongs = 0;
1✔
470
    int arrLongs = cache.length;
1✔
471
    float p = 0;
1✔
472
    int rf = 0;
1✔
473
    final boolean updateSketch = this instanceof UpdateSketch;
1✔
474

475
    final long thetaLong = getThetaLong();
1✔
476
    final int curCount = this.getRetainedEntries(true);
1✔
477

478
    if (updateSketch) {
1✔
479
      final UpdateSketch uis = (UpdateSketch)this;
1✔
480
      nomLongs = 1 << uis.getLgNomLongs();
1✔
481
      arrLongs = 1 << uis.getLgArrLongs();
1✔
482
      p = uis.getP();
1✔
483
      rf = uis.getResizeFactor().getValue();
1✔
484
    }
485

486
    if (dataDetail) {
1✔
487
      final int w = width > 0 ? width : 8; // default is 8 wide
1✔
488
      if (curCount > 0) {
1✔
489
        sb.append("### SKETCH DATA DETAIL");
1✔
490
        for (int i = 0, j = 0; i < arrLongs; i++ ) {
1✔
491
          final long h;
492
          h = cache[i];
1✔
493
          if (h <= 0 || h >= thetaLong) {
1✔
494
            continue;
×
495
          }
496
          if (j % w == 0) {
1✔
497
            sb.append(LS).append(String.format("   %6d", j + 1));
1✔
498
          }
499
          if (hexMode) {
1✔
500
            sb.append(" " + zeroPad(Long.toHexString(h), 16) + ",");
1✔
501
          }
502
          else {
503
            sb.append(String.format(" %20d,", h));
1✔
504
          }
505
          j++ ;
1✔
506
        }
507
        sb.append(LS).append("### END DATA DETAIL").append(LS + LS);
1✔
508
      }
509
    }
510

511
    if (sketchSummary) {
1✔
512
      final double thetaDbl = thetaLong / LONG_MAX_VALUE_AS_DOUBLE;
1✔
513
      final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16);
1✔
514
      final String thisSimpleName = this.getClass().getSimpleName();
1✔
515
      final int seedHash = Short.toUnsignedInt(getSeedHash());
1✔
516

517
      sb.append(LS);
1✔
518
      sb.append("### ").append(thisSimpleName).append(" SUMMARY: ").append(LS);
1✔
519
      if (updateSketch) {
1✔
520
        sb.append("   Nominal Entries (k)     : ").append(nomLongs).append(LS);
1✔
521
      }
522
      sb.append("   Estimate                : ").append(getEstimate()).append(LS);
1✔
523
      sb.append("   Upper Bound, 95% conf   : ").append(getUpperBound(2)).append(LS);
1✔
524
      sb.append("   Lower Bound, 95% conf   : ").append(getLowerBound(2)).append(LS);
1✔
525
      if (updateSketch) {
1✔
526
        sb.append("   p                       : ").append(p).append(LS);
1✔
527
      }
528
      sb.append("   Theta (double)          : ").append(thetaDbl).append(LS);
1✔
529
      sb.append("   Theta (long)            : ").append(thetaLong).append(LS);
1✔
530
      sb.append("   Theta (long) hex        : ").append(thetaHex).append(LS);
1✔
531
      sb.append("   EstMode?                : ").append(isEstimationMode()).append(LS);
1✔
532
      sb.append("   Empty?                  : ").append(isEmpty()).append(LS);
1✔
533
      sb.append("   Ordered?                : ").append(isOrdered()).append(LS);
1✔
534
      if (updateSketch) {
1✔
535
        sb.append("   Resize Factor           : ").append(rf).append(LS);
1✔
536
        sb.append("   Array Size Entries      : ").append(arrLongs).append(LS);
1✔
537
      }
538
      sb.append("   Retained Entries        : ").append(curCount).append(LS);
1✔
539
      sb.append("   Seed Hash               : ").append(Integer.toHexString(seedHash))
1✔
540
        .append(" | ").append(seedHash).append(LS);
1✔
541
      sb.append("### END SKETCH SUMMARY").append(LS);
1✔
542

543
    }
544
    return sb.toString();
1✔
545
  }
546

547
  /**
548
   * Returns a human readable string of the preamble of a byte array image of a Theta Sketch.
549
   * @param byteArr the given byte array
550
   * @return a human readable string of the preamble of a byte array image of a Theta Sketch.
551
   */
552
  public static String toString(final byte[] byteArr) {
553
    return PreambleUtil.preambleToString(byteArr);
1✔
554
  }
555

556
  /**
557
   * Returns a human readable string of the preamble of a Memory image of a Theta Sketch.
558
   * @param mem the given Memory object
559
   * @return a human readable string of the preamble of a Memory image of a Theta Sketch.
560
   */
561
  public static String toString(final Memory mem) {
562
    return PreambleUtil.preambleToString(mem);
1✔
563
  }
564

565
  //Restricted methods
566

567
  /**
568
   * Gets the internal cache array. For on-heap sketches this will return a reference to the actual
569
   * cache array. For Memory-based sketches this returns a copy.
570
   * @return the internal cache array.
571
   */
572
  abstract long[] getCache();
573

574
  /**
575
   * Gets preamble longs if stored in compact form. If this sketch is already in compact form,
576
   * this is identical to the call {@link #getCurrentPreambleLongs()}.
577
   * @return preamble longs if stored in compact form.
578
   */
579
  abstract int getCompactPreambleLongs();
580

581
  /**
582
   * Gets the number of data longs if stored in current state.
583
   * @return the number of data longs if stored in current state.
584
   */
585
  abstract int getCurrentDataLongs();
586

587
  /**
588
   * Returns preamble longs if stored in current state.
589
   * @return number of preamble longs if stored.
590
   */
591
  abstract int getCurrentPreambleLongs();
592

593
  /**
594
   * Returns the Memory object if it exists, otherwise null.
595
   * @return the Memory object if it exists, otherwise null.
596
   */
597
  abstract Memory getMemory();
598

599
  /**
600
   * Gets the 16-bit seed hash
601
   * @return the seed hash
602
   */
603
  abstract short getSeedHash();
604

605
  /**
606
   * Returns true if given Family id is one of the theta sketches
607
   * @param id the given Family id
608
   * @return true if given Family id is one of the theta sketches
609
   */
610
  static final boolean isValidSketchID(final int id) {
611
    return id == Family.ALPHA.getID()
1✔
612
        || id == Family.QUICKSELECT.getID()
1✔
613
        || id == Family.COMPACT.getID();
1✔
614
  }
615

616
  /**
617
   * Checks Ordered and Compact flags for integrity between sketch and Memory
618
   * @param sketch the given sketch
619
   */
620
  static final void checkSketchAndMemoryFlags(final Sketch sketch) {
621
    final Memory mem = sketch.getMemory();
1✔
622
    if (mem == null) { return; }
1✔
623
    final int flags = PreambleUtil.extractFlags(mem);
1✔
624
    if ((flags & COMPACT_FLAG_MASK) > 0 ^ sketch.isCompact()) {
1✔
625
      throw new SketchesArgumentException("Possible corruption: "
1✔
626
          + "Memory Compact Flag inconsistent with Sketch");
627
    }
628
    if ((flags & ORDERED_FLAG_MASK) > 0 ^ sketch.isOrdered()) {
1✔
629
      throw new SketchesArgumentException("Possible corruption: "
×
630
          + "Memory Ordered Flag inconsistent with Sketch");
631
    }
632
  }
1✔
633

634
  static final double estimate(final long thetaLong, final int curCount) {
635
    return curCount * (LONG_MAX_VALUE_AS_DOUBLE / thetaLong);
1✔
636
  }
637

638
  static final double lowerBound(final int curCount, final long thetaLong, final int numStdDev,
639
      final boolean empty) {
640
    final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE;
1✔
641
    return BinomialBoundsN.getLowerBound(curCount, theta, numStdDev, empty);
1✔
642
  }
643

644
  static final double upperBound(final int curCount, final long thetaLong, final int numStdDev,
645
      final boolean empty) {
646
    final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE;
1✔
647
    return BinomialBoundsN.getUpperBound(curCount, theta, numStdDev, empty);
1✔
648
  }
649

650
  private static final boolean estMode(final long thetaLong, final boolean empty) {
651
    return thetaLong < Long.MAX_VALUE && !empty;
1✔
652
  }
653

654
  /**
655
   * Instantiates a Heap Update Sketch from Memory. Only SerVer3. SerVer 1 & 2 already handled.
656
   * @param srcMem <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
657
   * @param expectedSeed the seed used to validate the given Memory image.
658
   * <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
659
   * @return a Sketch
660
   */
661
  private static final Sketch heapifyUpdateFromMemory(final Memory srcMem, final long expectedSeed) {
662
    final long cap = srcMem.getCapacity();
1✔
663
    if (cap < 8) {
1✔
664
      throw new SketchesArgumentException(
×
665
          "Corrupted: valid sketch must be at least 8 bytes.");
666
    }
667
    final byte familyID = srcMem.getByte(FAMILY_BYTE);
1✔
668
    final Family family = idToFamily(familyID);
1✔
669

670
    if (family == Family.ALPHA) {
1✔
671
      final int flags = PreambleUtil.extractFlags(srcMem);
1✔
672
      final boolean compactFlag = (flags & COMPACT_FLAG_MASK) != 0;
1✔
673
      if (compactFlag) {
1✔
674
        throw new SketchesArgumentException(
1✔
675
            "Corrupted: ALPHA family image: cannot be compact");
676
      }
677
      return HeapAlphaSketch.heapifyInstance(srcMem, expectedSeed);
1✔
678
    }
679
    if (family == Family.QUICKSELECT) {
1✔
680
      return HeapQuickSelectSketch.heapifyInstance(srcMem, expectedSeed);
1✔
681
    }
682
    throw new SketchesArgumentException(
1✔
683
        "Sketch cannot heapify family: " + family + " as a Sketch");
684
  }
685

686
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc