• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.46
/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.frequencies;
21

22
import static org.apache.datasketches.common.Util.LS;
23
import static org.apache.datasketches.common.Util.checkBounds;
24
import static org.apache.datasketches.common.Util.exactLog2OfInt;
25
import static org.apache.datasketches.common.Util.isPowerOf2;
26
import static org.apache.datasketches.frequencies.PreambleUtil.EMPTY_FLAG_MASK;
27
import static org.apache.datasketches.frequencies.PreambleUtil.SER_VER;
28
import static org.apache.datasketches.frequencies.PreambleUtil.extractActiveItems;
29
import static org.apache.datasketches.frequencies.PreambleUtil.extractFamilyID;
30
import static org.apache.datasketches.frequencies.PreambleUtil.extractFlags;
31
import static org.apache.datasketches.frequencies.PreambleUtil.extractLgCurMapSize;
32
import static org.apache.datasketches.frequencies.PreambleUtil.extractLgMaxMapSize;
33
import static org.apache.datasketches.frequencies.PreambleUtil.extractPreLongs;
34
import static org.apache.datasketches.frequencies.PreambleUtil.extractSerVer;
35
import static org.apache.datasketches.frequencies.PreambleUtil.insertActiveItems;
36
import static org.apache.datasketches.frequencies.PreambleUtil.insertFamilyID;
37
import static org.apache.datasketches.frequencies.PreambleUtil.insertFlags;
38
import static org.apache.datasketches.frequencies.PreambleUtil.insertLgCurMapSize;
39
import static org.apache.datasketches.frequencies.PreambleUtil.insertLgMaxMapSize;
40
import static org.apache.datasketches.frequencies.PreambleUtil.insertPreLongs;
41
import static org.apache.datasketches.frequencies.PreambleUtil.insertSerVer;
42
import static org.apache.datasketches.frequencies.Util.LG_MIN_MAP_SIZE;
43
import static org.apache.datasketches.frequencies.Util.SAMPLE_SIZE;
44

45
import java.lang.reflect.Array;
46
import java.util.ArrayList;
47
import java.util.Comparator;
48
import java.util.Objects;
49

50
import org.apache.datasketches.common.ArrayOfItemsSerDe;
51
import org.apache.datasketches.common.Family;
52
import org.apache.datasketches.common.SketchesArgumentException;
53
import org.apache.datasketches.common.SketchesStateException;
54
import org.apache.datasketches.memory.Memory;
55
import org.apache.datasketches.memory.WritableMemory;
56

57
/**
58
 * <p>This sketch is useful for tracking approximate frequencies of items of type <i>&lt;T&gt;</i>
59
 * with optional associated counts (<i>&lt;T&gt;</i> item, <i>long</i> count) that are members of a
60
 * multiset of such items. The true frequency of an item is defined to be the sum of associated
61
 * counts.</p>
62
 *
63
 * <p>This implementation provides the following capabilities:</p>
64
 * <ul>
65
 * <li>Estimate the frequency of an item.</li>
66
 * <li>Return upper and lower bounds of any item, such that the true frequency is always
67
 * between the upper and lower bounds.</li>
68
 * <li>Return a global maximum error that holds for all items in the stream.</li>
69
 * <li>Return an array of frequent items that qualify either a NO_FALSE_POSITIVES or a
70
 * NO_FALSE_NEGATIVES error type.</li>
71
 * <li>Merge itself with another sketch object created from this class.</li>
72
 * <li>Serialize/Deserialize to/from a byte array.</li>
73
 * </ul>
74
 *
75
 * <p><b>Space Usage</b></p>
76
 *
77
 * <p>The sketch is initialized with a <i>maxMapSize</i> that specifies the maximum physical
78
 * length of the internal hash map of the form (<i>&lt;T&gt;</i> item, <i>long</i> count).
79
 * The <i>maxMapSize</i> must be a power of 2.</p>
80
 *
81
 * <p>The hash map starts at a very small size (8 entries), and grows as needed up to the
82
 * specified <i>maxMapSize</i>.</p>
83
 *
84
 * <p>Excluding external space required for the item objects, the internal memory space usage of
85
 * this sketch is 18 * <i>mapSize</i> bytes (assuming 8 bytes for each Java reference), plus a small
86
 * constant number of additional bytes. The internal memory space usage of this sketch will never
87
 * exceed 18 * <i>maxMapSize</i> bytes, plus a small constant number of additional bytes.</p>
88
 *
89
 * <p><b>Maximum Capacity of the Sketch</b></p>
90
 *
91
 * <p>The LOAD_FACTOR for the hash map is internally set at 75%,
92
 * which means at any time the map capacity of (item, count) pairs is <i>mapCap</i> = 0.75 *
93
 * <i>mapSize</i>.
94
 * The maximum capacity of (item, count) pairs of the sketch is <i>maxMapCap</i> = 0.75 *
95
 * <i>maxMapSize</i>.</p>
96
 *
97
 * <p><b>Updating the sketch with (item, count) pairs</b></p>
98
 *
99
 * <p>If the item is found in the hash map, the mapped count field (the "counter") is
100
 * incremented by the incoming count, otherwise, a new counter "(item, count) pair" is
101
 * created. If the number of tracked counters reaches the maximum capacity of the hash map
102
 * the sketch decrements all of the counters (by an approximately computed median), and
103
 * removes any non-positive counters.</p>
104
 *
105
 * <p><b>Accuracy</b></p>
106
 *
107
 * <p>If fewer than 0.75 * <i>maxMapSize</i> different items are inserted into the sketch the
108
 * estimated frequencies returned by the sketch will be exact.</p>
109
 *
110
 * <p>The logic of the frequent items sketch is such that the stored counts and true counts are
111
 * never too different.
112
 * More specifically, for any <i>item</i>, the sketch can return an estimate of the
113
 * true frequency of <i>item</i>, along with upper and lower bounds on the frequency
114
 * (that hold deterministically).</p>
115
 *
116
 * <p>For this implementation and for a specific active <i>item</i>, it is guaranteed that
117
 * the true frequency will be between the Upper Bound (UB) and the Lower Bound (LB) computed for
118
 * that <i>item</i>.  Specifically, <i>(UB- LB) &le; W * epsilon</i>, where <i>W</i> denotes the
119
 * sum of all item counts, and <i>epsilon = 3.5/M</i>, where <i>M</i> is the <i>maxMapSize</i>.</p>
120
 *
121
 * <p>This is a worst case guarantee that applies to arbitrary inputs.<sup>1</sup>
122
 * For inputs typically seen in practice <i>(UB-LB)</i> is usually much smaller.
123
 * </p>
124
 *
125
 * <p><b>Background</b></p>
126
 *
127
 * <p>This code implements a variant of what is commonly known as the "Misra-Gries
128
 * algorithm". Variants of it were discovered and rediscovered and redesigned several times
129
 * over the years:</p>
130
 * <ul><li>"Finding repeated elements", Misra, Gries, 1982</li>
131
 * <li>"Frequency estimation of Internet packet streams with limited space" Demaine,
132
 * Lopez-Ortiz, Munro, 2002</li>
133
 * <li>"A simple algorithm for finding frequent elements in streams and bags" Karp, Shenker,
134
 * Papadimitriou, 2003</li>
135
 * <li>"Efficient Computation of Frequent and Top-k Elements in Data Streams" Metwally,
136
 * Agrawal, Abbadi, 2006</li>
137
 * </ul>
138
 *
139
 * <sup>1</sup> For speed we do employ some randomization that introduces a small probability that
140
 * our proof of the worst-case bound might not apply to a given run. However, we have ensured
141
 * that this probability is extremely small. For example, if the stream causes one table purge
142
 * (rebuild), our proof of the worst case bound applies with probability at least 1 - 1E-14.
143
 * If the stream causes 1E9 purges, our proof applies with probability at least 1 - 1E-5.
144
 *
145
 * @param <T> The type of item to be tracked by this sketch
146
 *
147
 * @author Justin Thaler
148
 * @author Alexander Saydakov
149
 */
150
public class ItemsSketch<T> {
151

152
  /**
153
   * Log2 Maximum length of the arrays internal to the hash map supported by the data
154
   * structure.
155
   */
156
  private int lgMaxMapSize;
157

158
  /**
159
   * The current number of counters supported by the hash map.
160
   */
161
  private int curMapCap; //the threshold to purge
162

163
  /**
164
   * Tracks the total of decremented counts.
165
   */
166
  private long offset;
167

168
  /**
169
   * The sum of all frequencies of the stream so far.
170
   */
171
  private long streamWeight = 0;
1✔
172

173
  /**
174
   * The maximum number of samples used to compute approximate median of counters when doing
175
   * decrement
176
   */
177
  private int sampleSize;
178

179
  /**
180
   * Hash map mapping stored items to approximate counts
181
   */
182
  private ReversePurgeItemHashMap<T> hashMap;
183

184
  /**
185
   * Construct this sketch with the parameter maxMapSize and the default initialMapSize (8).
186
   *
187
   * @param maxMapSize Determines the physical size of the internal hash map managed by this
188
   * sketch and must be a power of 2. The maximum capacity of this internal hash map is
189
   * 0.75 times * maxMapSize. Both the ultimate accuracy and size of this sketch are
190
   * functions of maxMapSize.
191
   */
192
  public ItemsSketch(final int maxMapSize) {
193
    this(exactLog2OfInt(maxMapSize, "maxMapSize"), LG_MIN_MAP_SIZE);
1✔
194
  }
1✔
195

196
  /**
197
   * Construct this sketch with parameter lgMaxMapSize and lgCurMapSize. This internal
198
   * constructor is used when deserializing the sketch.
199
   *
200
   * @param lgMaxMapSize Log2 of the physical size of the internal hash map managed by this
201
   * sketch. The maximum capacity of this internal hash map is 0.75 times 2^lgMaxMapSize.
202
   * Both the ultimate accuracy and size of this sketch are functions of lgMaxMapSize.
203
   *
204
   * @param lgCurMapSize Log2 of the starting (current) physical size of the internal hash
205
   * map managed by this sketch.
206
   */
207
  ItemsSketch(final int lgMaxMapSize, final int lgCurMapSize) {
1✔
208
    //set initial size of hash map
209
    this.lgMaxMapSize = Math.max(lgMaxMapSize, LG_MIN_MAP_SIZE);
1✔
210
    final int lgCurMapSz = Math.max(lgCurMapSize, LG_MIN_MAP_SIZE);
1✔
211
    hashMap = new ReversePurgeItemHashMap<>(1 << lgCurMapSz);
1✔
212
    this.curMapCap = hashMap.getCapacity();
1✔
213
    final int maxMapCap =
1✔
214
        (int) ((1 << lgMaxMapSize) * ReversePurgeItemHashMap.getLoadFactor());
1✔
215
    offset = 0;
1✔
216
    sampleSize = Math.min(SAMPLE_SIZE, maxMapCap);
1✔
217
  }
1✔
218

219
  /**
220
   * Returns a sketch instance of this class from the given srcMem,
221
   * which must be a Memory representation of this sketch class.
222
   *
223
   * @param <T> The type of item that this sketch will track
224
   * @param srcMem a Memory representation of a sketch of this class.
225
   * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
226
   * @param serDe an instance of ArrayOfItemsSerDe
227
   * @return a sketch instance of this class.
228
   */
229
  public static <T> ItemsSketch<T> getInstance(final Memory srcMem,
230
      final ArrayOfItemsSerDe<T> serDe) {
231
    Objects.requireNonNull(srcMem, "srcMem must not be null.");
1✔
232
    Objects.requireNonNull(serDe, "serDe must not be null.");
1✔
233

234
    final long pre0 = PreambleUtil.checkPreambleSize(srcMem); //make sure preamble will fit
1✔
235
    final int maxPreLongs = Family.FREQUENCY.getMaxPreLongs();
1✔
236

237
    final int preLongs = extractPreLongs(pre0);         //Byte 0
1✔
238
    final int serVer = extractSerVer(pre0);             //Byte 1
1✔
239
    final int familyID = extractFamilyID(pre0);         //Byte 2
1✔
240
    final int lgMaxMapSize = extractLgMaxMapSize(pre0); //Byte 3
1✔
241
    final int lgCurMapSize = extractLgCurMapSize(pre0); //Byte 4
1✔
242
    final boolean empty = (extractFlags(pre0) & EMPTY_FLAG_MASK) != 0; //Byte 5
1✔
243

244
    // Checks
245
    final boolean preLongsEq1 = (preLongs == 1);        //Byte 0
1✔
246
    final boolean preLongsEqMax = (preLongs == maxPreLongs);
1✔
247
    if (!preLongsEq1 && !preLongsEqMax) {
1✔
248
      throw new SketchesArgumentException(
1✔
249
          "Possible Corruption: PreLongs must be 1 or " + maxPreLongs + ": " + preLongs);
250
    }
251
    if (serVer != SER_VER) {                            //Byte 1
1✔
252
      throw new SketchesArgumentException(
1✔
253
          "Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
254
    }
255
    final int actFamID = Family.FREQUENCY.getID();      //Byte 2
1✔
256
    if (familyID != actFamID) {
1✔
257
      throw new SketchesArgumentException(
1✔
258
          "Possible Corruption: FamilyID must be " + actFamID + ": " + familyID);
259
    }
260
    if (empty ^ preLongsEq1) {                          //Byte 5 and Byte 0
1✔
261
      throw new SketchesArgumentException(
1✔
262
          "Possible Corruption: (PreLongs == 1) ^ Empty == True.");
263
    }
264

265
    if (empty) {
1✔
266
      return new ItemsSketch<>(lgMaxMapSize, LG_MIN_MAP_SIZE);
1✔
267
    }
268
    //get full preamble
269
    final long[] preArr = new long[preLongs];
1✔
270
    srcMem.getLongArray(0, preArr, 0, preLongs);
1✔
271

272
    final ItemsSketch<T> fis = new ItemsSketch<>(lgMaxMapSize, lgCurMapSize);
1✔
273
    fis.streamWeight = 0; //update after
1✔
274
    fis.offset = preArr[3];
1✔
275

276
    final int preBytes = preLongs << 3;
1✔
277
    final int activeItems = extractActiveItems(preArr[1]);
1✔
278

279
    //Get countArray
280
    final long[] countArray = new long[activeItems];
1✔
281
    final int reqBytes = preBytes + activeItems * Long.BYTES; //count Arr only
1✔
282
    checkBounds(0, reqBytes, srcMem.getCapacity()); //check Memory capacity
1✔
283
    srcMem.getLongArray(preBytes, countArray, 0, activeItems);
1✔
284

285
    //Get itemArray
286
    final int itemsOffset = preBytes + (Long.BYTES * activeItems);
1✔
287
    final T[] itemArray = serDe.deserializeFromMemory(
1✔
288
        srcMem.region(itemsOffset, srcMem.getCapacity() - itemsOffset), 0, activeItems);
1✔
289
    //update the sketch
290
    for (int i = 0; i < activeItems; i++) {
1✔
291
      fis.update(itemArray[i], countArray[i]);
1✔
292
    }
293
    fis.streamWeight = preArr[2]; //override streamWeight due to updating
1✔
294
    return fis;
1✔
295
  }
296

297
  /**
298
   * Returns the estimated <i>a priori</i> error given the maxMapSize for the sketch and the
299
   * estimatedTotalStreamWeight.
300
   * @param maxMapSize the planned map size to be used when constructing this sketch.
301
   * @param estimatedTotalStreamWeight the estimated total stream weight.
302
   * @return the estimated <i>a priori</i> error.
303
   */
304
  public static double getAprioriError(final int maxMapSize, final long estimatedTotalStreamWeight) {
305
    return getEpsilon(maxMapSize) * estimatedTotalStreamWeight;
1✔
306
  }
307

308
  /**
309
   * Returns the current number of counters the sketch is configured to support.
310
   *
311
   * @return the current number of counters the sketch is configured to support.
312
   */
313
  public int getCurrentMapCapacity() {
314
    return this.curMapCap;
1✔
315
  }
316

317
  /**
318
   * Returns epsilon used to compute <i>a priori</i> error.
319
   * This is just the value <i>3.5 / maxMapSize</i>.
320
   * @param maxMapSize the planned map size to be used when constructing this sketch.
321
   * @return epsilon used to compute <i>a priori</i> error.
322
   */
323
  public static double getEpsilon(final int maxMapSize) {
324
    if (!isPowerOf2(maxMapSize)) {
1✔
325
      throw new SketchesArgumentException("maxMapSize is not a power of 2.");
1✔
326
    }
327
    return 3.5 / maxMapSize;
1✔
328
  }
329

330
  /**
331
   * Gets the estimate of the frequency of the given item.
332
   * Note: The true frequency of a item would be the sum of the counts as a result of the
333
   * two update functions.
334
   *
335
   * @param item the given item
336
   * @return the estimate of the frequency of the given item
337
   */
338
  public long getEstimate(final T item) {
339
    // If item is tracked:
340
    // Estimate = itemCount + offset; Otherwise it is 0.
341
    final long itemCount = hashMap.get(item);
1✔
342
    return (itemCount > 0) ? itemCount + offset : 0;
1✔
343
  }
344

345
  /**
346
   * Gets the guaranteed lower bound frequency of the given item, which can never be
347
   * negative.
348
   *
349
   * @param item the given item.
350
   * @return the guaranteed lower bound frequency of the given item. That is, a number which
351
   * is guaranteed to be no larger than the real frequency.
352
   */
353
  public long getLowerBound(final T item) {
354
    //LB = itemCount or 0
355
    return hashMap.get(item);
1✔
356
  }
357

358
  /**
359
   * Returns an array of Rows that include frequent items, estimates, upper and lower bounds
360
   * given a threshold and an ErrorCondition. If the threshold is lower than getMaximumError(),
361
   * then getMaximumError() will be used instead.
362
   *
363
   * <p>The method first examines all active items in the sketch (items that have a counter).
364
   *
365
   * <p>If <i>ErrorType = NO_FALSE_NEGATIVES</i>, this will include an item in the result
366
   * list if getUpperBound(item) &gt; threshold.
367
   * There will be no false negatives, i.e., no Type II error.
368
   * There may be items in the set with true frequencies less than the threshold
369
   * (false positives).</p>
370
   *
371
   * <p>If <i>ErrorType = NO_FALSE_POSITIVES</i>, this will include an item in the result
372
   * list if getLowerBound(item) &gt; threshold.
373
   * There will be no false positives, i.e., no Type I error.
374
   * There may be items omitted from the set with true frequencies greater than the
375
   * threshold (false negatives).</p>
376
   *
377
   * @param threshold to include items in the result list
378
   * @param errorType determines whether no false positives or no false negatives are
379
   * desired.
380
   * @return an array of frequent items
381
   */
382
  public Row<T>[] getFrequentItems(final long threshold, final ErrorType errorType) {
383
    return sortItems(threshold > getMaximumError() ? threshold : getMaximumError(), errorType);
1✔
384
  }
385

386
  /**
387
   * Returns an array of Rows that include frequent items, estimates, upper and lower bounds
388
   * given an ErrorCondition and the default threshold.
389
   * This is the same as getFrequentItems(getMaximumError(), errorType)
390
   *
391
   * @param errorType determines whether no false positives or no false negatives are
392
   * desired.
393
   * @return an array of frequent items
394
   */
395
  public Row<T>[] getFrequentItems(final ErrorType errorType) {
396
    return sortItems(getMaximumError(), errorType);
1✔
397
  }
398

399
  /**
400
   * @return An upper bound on the maximum error of getEstimate(item) for any item.
401
   * This is equivalent to the maximum distance between the upper bound and the lower bound
402
   * for any item.
403
   */
404
  public long getMaximumError() {
405
    return offset;
1✔
406
  }
407

408
  /**
409
   * Returns the maximum number of counters the sketch is configured to support.
410
   *
411
   * @return the maximum number of counters the sketch is configured to support.
412
   */
413
  public int getMaximumMapCapacity() {
414
    return (int) ((1 << lgMaxMapSize) * ReversePurgeLongHashMap.getLoadFactor());
1✔
415
  }
416

417
  /**
418
   * @return the number of active items in the sketch.
419
   */
420
  public int getNumActiveItems() {
421
    return hashMap.getNumActive();
1✔
422
  }
423

424
  /**
425
   * Returns the sum of the frequencies in the stream seen so far by the sketch
426
   *
427
   * @return the sum of the frequencies in the stream seen so far by the sketch
428
   */
429
  public long getStreamLength() {
430
    return this.streamWeight;
1✔
431
  }
432

433
  /**
434
   * Gets the guaranteed upper bound frequency of the given item.
435
   *
436
   * @param item the given item
437
   * @return the guaranteed upper bound frequency of the given item. That is, a number which
438
   * is guaranteed to be no smaller than the real frequency.
439
   */
440
  public long getUpperBound(final T item) {
441
    // UB = itemCount + offset
442
    return hashMap.get(item) + offset;
1✔
443
  }
444

445
  /**
446
   * Returns true if this sketch is empty
447
   *
448
   * @return true if this sketch is empty
449
   */
450
  public boolean isEmpty() {
451
    return getNumActiveItems() == 0;
1✔
452
  }
453

454
  /**
455
   * This function merges the other sketch into this one.
456
   * The other sketch may be of a different size.
457
   *
458
   * @param other sketch of this class
459
   * @return a sketch whose estimates are within the guarantees of the
460
   * largest error tolerance of the two merged sketches.
461
   */
462
  public ItemsSketch<T> merge(final ItemsSketch<T> other) {
463
    if (other == null) { return this; }
1✔
464
    if (other.isEmpty()) { return this; }
1✔
465

466
    final long streamLen = this.streamWeight + other.streamWeight; //capture before merge
1✔
467

468
    final ReversePurgeItemHashMap.Iterator<T> iter = other.hashMap.iterator();
1✔
469
    while (iter.next()) { //this may add to offset during rebuilds
1✔
470
      this.update(iter.getKey(), iter.getValue());
1✔
471
    }
472
    this.offset += other.offset;
1✔
473
    this.streamWeight = streamLen; //corrected streamWeight
1✔
474
    return this;
1✔
475
  }
476

477
  /**
478
   * Resets this sketch to a virgin state.
479
   */
480
  public void reset() {
481
    hashMap = new ReversePurgeItemHashMap<>(1 << LG_MIN_MAP_SIZE);
1✔
482
    this.curMapCap = hashMap.getCapacity();
1✔
483
    this.offset = 0;
1✔
484
    this.streamWeight = 0;
1✔
485
  }
1✔
486

487
  //Serialization
488

489
  /**
490
   * Returns a byte array representation of this sketch
491
   * @param serDe an instance of ArrayOfItemsSerDe
492
   * @return a byte array representation of this sketch
493
   */
494
  public byte[] toByteArray(final ArrayOfItemsSerDe<T> serDe) {
495
    final int preLongs;
496
    final int outBytes;
497
    final boolean empty = isEmpty();
1✔
498
    final int activeItems = getNumActiveItems();
1✔
499
    byte[] bytes = null;
1✔
500
    if (empty) {
1✔
501
      preLongs = 1;
1✔
502
      outBytes = 8;
1✔
503
    } else {
504
      preLongs = Family.FREQUENCY.getMaxPreLongs();
1✔
505
      bytes = serDe.serializeToByteArray(hashMap.getActiveKeys());
1✔
506
      outBytes = ((preLongs + activeItems) << 3) + bytes.length;
1✔
507
    }
508
    final byte[] outArr = new byte[outBytes];
1✔
509
    final WritableMemory mem = WritableMemory.writableWrap(outArr);
1✔
510

511
    // build first preLong empty or not
512
    long pre0 = 0L;
1✔
513
    pre0 = insertPreLongs(preLongs, pre0);                  //Byte 0
1✔
514
    pre0 = insertSerVer(SER_VER, pre0);                     //Byte 1
1✔
515
    pre0 = insertFamilyID(Family.FREQUENCY.getID(), pre0);  //Byte 2
1✔
516
    pre0 = insertLgMaxMapSize(lgMaxMapSize, pre0);          //Byte 3
1✔
517
    pre0 = insertLgCurMapSize(hashMap.getLgLength(), pre0); //Byte 4
1✔
518
    pre0 = empty ? insertFlags(EMPTY_FLAG_MASK, pre0) : insertFlags(0, pre0); //Byte 5
1✔
519

520
    if (empty) {
1✔
521
      mem.putLong(0, pre0);
1✔
522
    } else {
523
      final long pre = 0;
1✔
524
      final long[] preArr = new long[preLongs];
1✔
525
      preArr[0] = pre0;
1✔
526
      preArr[1] = insertActiveItems(activeItems, pre);
1✔
527
      preArr[2] = this.streamWeight;
1✔
528
      preArr[3] = this.offset;
1✔
529
      mem.putLongArray(0, preArr, 0, preLongs);
1✔
530
      final int preBytes = preLongs << 3;
1✔
531
      mem.putLongArray(preBytes, hashMap.getActiveValues(), 0, activeItems);
1✔
532
      mem.putByteArray(preBytes + (this.getNumActiveItems() << 3), bytes, 0, bytes.length);
1✔
533
    }
534
    return outArr;
1✔
535
  }
536

537
  /**
538
   * Returns a human readable summary of this sketch.
539
   * @return a human readable summary of this sketch.
540
   */
541
  @Override
542
  public String toString() {
543
    final StringBuilder sb = new StringBuilder();
1✔
544
    sb.append("FrequentItemsSketch<T>:").append(LS);
1✔
545
    sb.append("  Stream Length    : " + streamWeight).append(LS);
1✔
546
    sb.append("  Max Error Offset : " + offset).append(LS);
1✔
547
    sb.append(hashMap.toString());
1✔
548
    return sb.toString();
1✔
549
  }
550

551
  /**
552
   * Returns a human readable string of the preamble of a byte array image of a ItemsSketch.
553
   * @param byteArr the given byte array
554
   * @return a human readable string of the preamble of a byte array image of a ItemsSketch.
555
   */
556
  public static String toString(final byte[] byteArr) {
557
    return toString(Memory.wrap(byteArr));
1✔
558
  }
559

560
  /**
561
   * Returns a human readable string of the preamble of a Memory image of a ItemsSketch.
562
   * @param mem the given Memory object
563
   * @return a human readable string of the preamble of a Memory image of a ItemsSketch.
564
   */
565
  public static String toString(final Memory mem) {
566
    return PreambleUtil.preambleToString(mem);
1✔
567
  }
568

569
  /**
570
   * Update this sketch with an item and a frequency count of one.
571
   * @param item for which the frequency should be increased.
572
   */
573
  public void update(final T item) {
574
    update(item, 1);
1✔
575
  }
1✔
576

577
  /**
578
   * Update this sketch with an item and a positive frequency count.
579
   * @param item for which the frequency should be increased. The sketch uses
580
   * hashCode() and equals() methods of the type T.
581
   * @param count the amount by which the frequency of the item should be increased.
582
   * A count of zero is a no-op, and a negative count will throw an exception.
583
   */
584
  public void update(final T item, final long count) {
585
    if ((item == null) || (count == 0)) {
1✔
586
      return;
1✔
587
    }
588
    if (count < 0) {
1✔
589
      throw new SketchesArgumentException("Count may not be negative");
1✔
590
    }
591
    this.streamWeight += count;
1✔
592
    hashMap.adjustOrPutValue(item, count);
1✔
593

594
    if (getNumActiveItems() > curMapCap) { //over the threshold, we need to do something
1✔
595
      if (hashMap.getLgLength() < lgMaxMapSize) { //below tgt size, we can grow
1✔
596
        hashMap.resize(2 * hashMap.getLength());
1✔
597
        curMapCap = hashMap.getCapacity();
1✔
598
      } else { //At tgt size, must purge
599
        offset += hashMap.purge(sampleSize);
1✔
600
        if (getNumActiveItems() > getMaximumMapCapacity()) {
1✔
601
          throw new SketchesStateException("Purge did not reduce active items.");
×
602
        }
603
      }
604
    }
605
  }
1✔
606

607
  /**
608
   * Row class that defines the return values from a getFrequentItems query.
609
   * @param <T> type of item
610
   */
611
  public static class Row<T> implements Comparable<Row<T>> {
612
    final T item;
613
    final long est;
614
    final long ub;
615
    final long lb;
616
    private static final String FMT =  "  %12d%12d%12d %s";
617
    private static final String HFMT = "  %12s%12s%12s %s";
618

619
    Row(final T item, final long estimate, final long ub, final long lb) {
1✔
620
      this.item = item;
1✔
621
      this.est = estimate;
1✔
622
      this.ub = ub;
1✔
623
      this.lb = lb;
1✔
624
    }
1✔
625

626
    /**
627
     * @return item of type T
628
     */
629
    public T getItem() { return item; }
1✔
630

631
    /**
632
     * @return the estimate
633
     */
634
    public long getEstimate() { return est; }
1✔
635

636
    /**
637
     * @return the upper bound
638
     */
639
    public long getUpperBound() { return ub; }
1✔
640

641
    /**
642
     * @return return the lower bound
643
     */
644
    public long getLowerBound() { return lb; }
1✔
645

646
    /**
647
     * @return the descriptive row header
648
     */
649
    public static String getRowHeader() {
650
      return String.format(HFMT,"Est", "UB", "LB", "Item");
1✔
651
    }
652

653
    @Override
654
    public String toString() {
655
      return String.format(FMT,  est, ub, lb, item.toString());
1✔
656
    }
657

658
    /**
659
     * This compareTo is strictly limited to the Row.getEstimate() value and does not imply any
660
     * ordering whatsoever to the other elements of the row: item and upper and lower bounds.
661
     * Defined this way, this compareTo will be consistent with hashCode() and equals(Object).
662
     * @param that the other row to compare to.
663
     * @return a negative integer, zero, or a positive integer as this.getEstimate() is less than,
664
     * equal to, or greater than that.getEstimate().
665
     */
666
    @Override
667
    public int compareTo(final Row<T> that) {
668
      return (this.est < that.est) ? -1 : (this.est > that.est) ? 1 : 0;
1✔
669
    }
670

671
    /**
672
     * This hashCode is computed only from the Row.getEstimate() value.
673
     * Defined this way, this hashCode will be consistent with equals(Object):<br>
674
     * If (x.equals(y)) implies: x.hashCode() == y.hashCode().<br>
675
     * If (!x.equals(y)) does NOT imply: x.hashCode() != y.hashCode().
676
     * @return the hashCode computed from getEstimate().
677
     */
678
    @Override
679
    public int hashCode() {
680
      final int prime = 31;
1✔
681
      int result = 1;
1✔
682
      result = (prime * result) + (int) (est ^ (est >>> 32));
1✔
683
      return result;
1✔
684
    }
685

686
    /**
687
     * This equals is computed only from the Row.getEstimate() value and does not imply equality
688
     * of the other items within the row: item and upper and lower bounds.
689
     * Defined this way, this equals will be consistent with compareTo(Row).
690
     * @param obj the other row to determine equality with.
691
     * @return true if this.getEstimate() equals ((Row&lt;T&gt;)obj).getEstimate().
692
     */
693
    @SuppressWarnings("unchecked")
694
    @Override
695
    public boolean equals(final Object obj) {
696
      if (this == obj) { return true; }
1✔
697
      if (obj == null) { return false; }
1✔
698
      if ( !(obj instanceof Row)) { return false; }
1✔
699
      final Row<T> that = (Row<T>) obj;
1✔
700
      if (est != that.est) { return false; }
1✔
701
      return true;
1✔
702
    }
703

704
  } //End of class Row<T>
705

706
  Row<T>[] sortItems(final long threshold, final ErrorType errorType) {
707
    final ArrayList<Row<T>> rowList = new ArrayList<>();
1✔
708
    final ReversePurgeItemHashMap.Iterator<T> iter = hashMap.iterator();
1✔
709
    if (errorType == ErrorType.NO_FALSE_NEGATIVES) {
1✔
710
      while (iter.next()) {
1✔
711
        final long est = getEstimate(iter.getKey());
1✔
712
        final long ub = getUpperBound(iter.getKey());
1✔
713
        final long lb = getLowerBound(iter.getKey());
1✔
714
        if (ub >= threshold) {
1✔
715
          final Row<T> row = new Row<>(iter.getKey(), est, ub, lb);
1✔
716
          rowList.add(row);
1✔
717
        }
718
      }
1✔
719
    } else { //NO_FALSE_POSITIVES
720
      while (iter.next()) {
1✔
721
        final long est = getEstimate(iter.getKey());
1✔
722
        final long ub = getUpperBound(iter.getKey());
1✔
723
        final long lb = getLowerBound(iter.getKey());
1✔
724
        if (lb >= threshold) {
1✔
725
          final Row<T> row = new Row<>(iter.getKey(), est, ub, lb);
1✔
726
          rowList.add(row);
1✔
727
        }
728
      }
1✔
729
    }
730

731
    // descending order
732
    rowList.sort(new Comparator<Row<T>>() {
1✔
733
      @Override
734
      public int compare(final Row<T> r1, final Row<T> r2) {
735
        return r2.compareTo(r1);
1✔
736
      }
737
    });
738

739
    @SuppressWarnings("unchecked")
740
    final Row<T>[] rowsArr =
1✔
741
      rowList.toArray((Row<T>[]) Array.newInstance(Row.class, rowList.size()));
1✔
742
    return rowsArr;
1✔
743
  }
744

745
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc