• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.85
/src/main/java/org/apache/datasketches/kll/KllHelper.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.kll;
21

22
import static java.lang.Math.abs;
23
import static java.lang.Math.ceil;
24
import static java.lang.Math.exp;
25
import static java.lang.Math.log;
26
import static java.lang.Math.max;
27
import static java.lang.Math.min;
28
import static java.lang.Math.pow;
29
import static java.lang.Math.round;
30
import static org.apache.datasketches.common.Family.KLL;
31
import static org.apache.datasketches.common.Util.LS;
32
import static org.apache.datasketches.common.Util.bitAt;
33
import static org.apache.datasketches.common.Util.floorPowerOf2;
34
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
35
import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK;
36
import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK;
37
import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK;
38
import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_EMPTY;
39
import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_FULL;
40
import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_SINGLE;
41
import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE;
42
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
43
import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH;
44
import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH;
45
import static org.apache.datasketches.quantilescommon.QuantilesAPI.UNSUPPORTED_MSG;
46

47
import java.nio.ByteOrder;
48
import java.util.Arrays;
49

50
import org.apache.datasketches.common.ArrayOfItemsSerDe;
51
import org.apache.datasketches.common.SketchesArgumentException;
52
import org.apache.datasketches.kll.KllSketch.SketchStructure;
53
import org.apache.datasketches.kll.KllSketch.SketchType;
54
import org.apache.datasketches.memory.WritableBuffer;
55
import org.apache.datasketches.memory.WritableMemory;
56

57
/**
58
 * This class provides some useful sketch analysis tools that are used internally.
59
 *
60
 * @author Lee Rhodes
61
 */
62
final class KllHelper {
×
63
  static final double EPS_DELTA_THRESHOLD = 1E-6;
64
  static final double MIN_EPS = 4.7634E-5;
65
  static final double PMF_COEF = 2.446;
66
  static final double PMF_EXP = 0.9433;
67
  static final double CDF_COEF = 2.296;
68
  static final double CDF_EXP = 0.9723;
69

70
  static class GrowthStats {
1✔
71
    SketchType sketchType;
72
    int k;
73
    int m;
74
    long givenN;
75
    long maxN;
76
    int numLevels;
77
    int maxItems;
78
    int compactBytes;
79
    int updatableBytes;
80
  }
81

82
  static class LevelStats {
83
    long n;
84
    public int numLevels;
85
    int numItems;
86

87
    LevelStats(final long n, final int numLevels, final int numItems) {
1✔
88
      this.n = n;
1✔
89
      this.numLevels = numLevels;
1✔
90
      this.numItems = numItems;
1✔
91
    }
1✔
92
  }
93

94
  /**
95
   * This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index
96
   */
97
  static long[] powersOfThree =
1✔
98
      new long[] {1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441,
99
  1594323, 4782969, 14348907, 43046721, 129140163, 387420489, 1162261467,
100
  3486784401L, 10460353203L, 31381059609L, 94143178827L, 282429536481L,
101
  847288609443L, 2541865828329L, 7625597484987L, 22876792454961L, 68630377364883L,
102
  205891132094649L};
103

104
  /**
105
   * Checks the validity of the given k
106
   * @param k must be greater than 7 and less than 65536.
107
   */
108
  static void checkK(final int k, final int m) {
109
    if (k < m || k > KllSketch.MAX_K) {
1✔
110
      throw new SketchesArgumentException(
1✔
111
          "K must be >= " + m + " and <= " + KllSketch.MAX_K + ": " + k);
112
    }
113
  }
1✔
114

115
  static void checkM(final int m) {
116
    if (m < KllSketch.MIN_M || m > KllSketch.MAX_M || ((m & 1) == 1)) {
1✔
117
      throw new SketchesArgumentException(
1✔
118
          "M must be >= 2, <= 8 and even: " + m);
119
    }
120
  }
1✔
121

122
  /**
123
   * Returns the approximate maximum number of items that this sketch can handle
124
   * @param k The sizing / accuracy parameter of the sketch in items.
125
   * Note: this method actually works for k items up to k = 2^29 and 61 levels,
126
   * however only k items up to (2^16 - 1) are currently used by the sketch.
127
   * @param m the size of the smallest level in items. Default is 8.
128
   * @param numLevels the upper bound number of levels based on <i>n</i> items.
129
   * @return the total item capacity of the sketch.
130
   */
131
  static int computeTotalItemCapacity(final int k, final int m, final int numLevels) {
132
    long total = 0;
1✔
133
    for (int level = 0; level < numLevels; level++) {
1✔
134
      total += levelCapacity(k, numLevels, level, m);
1✔
135
    }
136
    return (int) total;
1✔
137
  }
138

139
  /**
140
   * Convert the individual weights into cumulative weights.
141
   * An array of {1,1,1,1} becomes {1,2,3,4}
142
   * @param array of actual weights from the sketch, where first element is not zero.
143
   * @return total weight
144
   */
145
  public static long convertToCumulative(final long[] array) {
146
    long subtotal = 0;
1✔
147
    for (int i = 0; i < array.length; i++) {
1✔
148
      final long newSubtotal = subtotal + array[i];
1✔
149
      subtotal = array[i] = newSubtotal;
1✔
150
    }
151
    return subtotal;
1✔
152
  }
153

154
  /**
155
   * Create the Levels Array from given weight
156
   * Used with weighted update only.
157
   * @param weight the given weight
158
   * @return the Levels Array
159
   */
160
  static int[] createLevelsArray(final long weight) {
161
    final int numLevels = 64 - Long.numberOfLeadingZeros(weight);
1✔
162
    if (numLevels > 61) {
1✔
163
      throw new SketchesArgumentException("The requested weight must not exceed 2^61");
×
164
    }
165
    final int[] levelsArr = new int[numLevels + 1]; //always one more than numLevels
1✔
166
    int itemsArrIndex = 0;
1✔
167
    levelsArr[0] = itemsArrIndex;
1✔
168
    for (int level = 0; level < numLevels; level++) {
1✔
169
      levelsArr[level + 1] = itemsArrIndex += bitAt(weight, level);
1✔
170
    }
171
    return levelsArr;
1✔
172
  }
173

174
  static int currentLevelSizeItems(final int level, final int numLevels, final int[] levels) {
175
    if (level >= numLevels) { return 0; }
1✔
176
    return levels[level + 1] - levels[level];
1✔
177
  }
178

179
  /**
180
   * Given k, m, and numLevels, this computes and optionally prints the structure of the sketch when the given
181
   * number of levels are completely filled.
182
   * @param k the given user configured sketch parameter
183
   * @param m the given user configured sketch parameter
184
   * @param numLevels the given number of levels of the sketch
185
   * @param printSketchStructure if true will print the details of the sketch structure at the given numLevels.
186
   * @return LevelStats with the final summary of the sketch's cumulative N,
187
   * and cumulative items at the given numLevels.
188
   */
189
  static LevelStats getFinalSketchStatsAtNumLevels(
190
      final int k,
191
      final int m,
192
      final int numLevels,
193
      final boolean printSketchStructure) {
194
    int cumItems = 0;
1✔
195
    long cumN = 0;
1✔
196
    if (printSketchStructure) {
1✔
197
      println("SKETCH STRUCTURE:");
1✔
198
      println("Given K        : " + k);
1✔
199
      println("Given M        : " + m);
1✔
200
      println("Given NumLevels: " + numLevels);
1✔
201
      printf("%6s %8s %12s %18s %18s" + LS, "Level", "Items", "CumItems", "N at Level", "CumN");
1✔
202
    }
203
    for (int level = 0; level < numLevels; level++) {
1✔
204
      final int items = KllHelper.levelCapacity(k, numLevels, level, m);
1✔
205
      final long n = (long)items << level;
1✔
206
      final LevelStats lvlStats = new LevelStats(n, numLevels, items);
1✔
207
      cumItems += lvlStats.numItems;
1✔
208
      cumN += lvlStats.n;
1✔
209
      if (printSketchStructure) {
1✔
210
        printf("%6d %,8d %,12d %,18d %,18d" + LS, level, lvlStats.numItems, cumItems, lvlStats.n, cumN);
1✔
211
      }
212
    }
213
    return new LevelStats(cumN, numLevels, cumItems);
1✔
214
  }
215

216
  /**
217
   * This method is for direct Double and Float sketches only.
218
   * Given k, m, n, and the sketch type, this computes (and optionally prints) the growth scheme for a sketch as it
219
   * grows large enough to accommodate a stream length of n items.
220
   * @param k the given user configured sketch parameter
221
   * @param m the given user configured sketch parameter
222
   * @param n the desired stream length
223
   * @param sketchType the given sketch type: either DOUBLES_SKETCH or FLOATS_SKETCH.
224
   * @param printGrowthScheme if true the entire growth scheme of the sketch will be printed.
225
   * @return GrowthStats with the final numItems of the growth scheme
226
   */
227
  static GrowthStats getGrowthSchemeForGivenN(
228
      final int k,
229
      final int m,
230
      final long n,
231
      final SketchType sketchType,
232
      final boolean printGrowthScheme) {
233
    if (sketchType == ITEMS_SKETCH) { throw new SketchesArgumentException(UNSUPPORTED_MSG); }
1✔
234
    LevelStats lvlStats;
235
    final GrowthStats gStats = new GrowthStats();
1✔
236
    gStats.numLevels = 0;
1✔
237
    gStats.k = k;
1✔
238
    gStats.m = m;
1✔
239
    gStats.givenN = n;
1✔
240
    gStats.sketchType = sketchType;
1✔
241
    if (printGrowthScheme) {
1✔
242
      println("GROWTH SCHEME:");
1✔
243
      println("Given SketchType: " + gStats.sketchType.toString());
1✔
244
      println("Given K         : " + gStats.k);
1✔
245
      println("Given M         : " + gStats.m);
1✔
246
      println("Given N         : " + gStats.givenN);
1✔
247
      printf("%10s %10s %20s %13s %15s" + LS, "NumLevels", "MaxItems", "MaxN", "CompactBytes", "UpdatableBytes");
1✔
248
    }
249
    final int typeBytes = sketchType.getBytes();
1✔
250
    do {
251
      gStats.numLevels++; //
1✔
252
      lvlStats = getFinalSketchStatsAtNumLevels(gStats.k, gStats.m, gStats.numLevels, false);
1✔
253
      gStats.maxItems = lvlStats.numItems;
1✔
254
      gStats.maxN = lvlStats.n; //
1✔
255
      gStats.compactBytes =
1✔
256
          gStats.maxItems * typeBytes + gStats.numLevels * Integer.BYTES + 2 * typeBytes + DATA_START_ADR;
257
      gStats.updatableBytes = gStats.compactBytes + Integer.BYTES;
1✔
258
      if (printGrowthScheme) {
1✔
259
        printf("%10d %,10d %,20d %,13d %,15d" + LS,
1✔
260
            gStats.numLevels, gStats.maxItems, gStats.maxN, gStats.compactBytes, gStats.updatableBytes);
1✔
261
      }
262
    } while (lvlStats.n < n);
1✔
263

264
    //gStats.numLevels = lvlStats.numLevels; //
265
    //gStats.maxItems = lvlStats.numItems; //
266
    return gStats;
1✔
267
  }
268

269
  // constants were derived as the best fit to 99 percentile empirically measured max error in
270
  // thousands of trials
271
  static int getKFromEpsilon(final double epsilon, final boolean pmf) {
272
    //Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false.
273
    final double eps = max(epsilon, MIN_EPS);
1✔
274
    final double kdbl = pmf
1✔
275
        ? exp(log(PMF_COEF / eps) / PMF_EXP)
1✔
276
        : exp(log(CDF_COEF / eps) / CDF_EXP);
1✔
277
    final double krnd = round(kdbl);
1✔
278
    final double del = abs(krnd - kdbl);
1✔
279
    final int k = (int) (del < EPS_DELTA_THRESHOLD ? krnd : ceil(kdbl));
1✔
280
    return max(KllSketch.MIN_M, min(KllSketch.MAX_K, k));
1✔
281
  }
282

283
  /**
284
   * Gets the normalized rank error given k and pmf.
285
   * Static method version of the <i>getNormalizedRankError(boolean)</i>.
286
   * @param k the configuration parameter
287
   * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function.
288
   * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
289
   * @return if pmf is true, the normalized rank error for the getPMF() function.
290
   * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
291
   * @see KllHeapDoublesSketch
292
   */
293
  // constants were derived as the best fit to 99 percentile empirically measured max error in
294
  // thousands of trials
295
  static double getNormalizedRankError(final int k, final boolean pmf) {
296
    return pmf
1✔
297
        ? PMF_COEF / pow(k, PMF_EXP)
1✔
298
        : CDF_COEF / pow(k, CDF_EXP);
1✔
299
  }
300

301
  static int getNumRetainedAboveLevelZero(final int numLevels, final int[] levels) {
302
    return levels[numLevels] - levels[1];
1✔
303
  }
304

305
  /**
306
   * Returns the item capacity of a specific level.
307
   * @param k the accuracy parameter of the sketch. Because of the Java limits on array sizes,
308
   * the theoretical maximum k is 2^29. However, this implementation of the KLL sketch
309
   * limits k to 2^16 -1.
310
   * @param numLevels the number of current levels in the sketch. Maximum is 61.
311
   * @param level the zero-based index of a level. This varies from 0 to 60.
312
   * @param m the minimum level width. Default is 8.
313
   * @return the capacity of a specific level
314
   */
315
  static int levelCapacity(final int k, final int numLevels, final int level, final int m) {
316
    assert (k <= (1 << 29)) : "The given k is > 2^29.";
1✔
317
    assert (numLevels >= 1) && (numLevels <= 61) : "The given numLevels is < 1 or > 61";
1✔
318
    assert (level >= 0) && (level < numLevels) : "The given level is < 0 or >= numLevels.";
1✔
319
    final int depth = numLevels - level - 1; //depth is # levels from the top level (= 0)
1✔
320
    return (int) Math.max(m, intCapAux(k, depth));
1✔
321
  }
322

323
  /**
324
   * This method is for direct Double and Float sketches only and does the following:
325
   * <ul>
326
   * <li>Determines if the required sketch bytes will fit in the current Memory.
327
   * If so, it will stretch the positioning of the arrays to fit. Otherwise:
328
   * <li>Allocates a new WritableMemory of the required size</li>
329
   * <li>Copies over the preamble as is (20 bytes)</li>
330
   * <li>The caller is responsible for filling the remainder and updating the preamble.</li>
331
   * </ul>
332
   *
333
   * @param sketch The current sketch that needs to be expanded.
334
   * @param newLevelsArrLen the element length of the new Levels array.
335
   * @param newItemsArrLen the element length of the new Items array.
336
   * @return the new expanded memory with preamble.
337
   */
338
  static WritableMemory memorySpaceMgmt(
339
      final KllSketch sketch,
340
      final int newLevelsArrLen,
341
      final int newItemsArrLen) {
342
    final KllSketch.SketchType sketchType = sketch.sketchType;
1✔
343
    if (sketchType == ITEMS_SKETCH) { throw new SketchesArgumentException(UNSUPPORTED_MSG); }
1✔
344
    final WritableMemory wmem = sketch.getWritableMemory();
1✔
345
    if (wmem == null) { return null; }
1✔
346
    final WritableMemory oldWmem = wmem;
1✔
347
    final int typeBytes = sketchType.getBytes();
1✔
348
    final int requiredSketchBytes =  DATA_START_ADR
1✔
349
      + newLevelsArrLen * Integer.BYTES
350
      + 2 * typeBytes
351
      + newItemsArrLen * typeBytes;
352
    final WritableMemory newWmem;
353

354
    if (requiredSketchBytes > oldWmem.getCapacity()) { //Acquire new WritableMemory
1✔
355
      newWmem = sketch.getMemoryRequestServer().request(oldWmem, requiredSketchBytes);
1✔
356
      oldWmem.copyTo(0, newWmem, 0, DATA_START_ADR); //copy preamble (first 20 bytes)
1✔
357
    }
358
    else { //Expand or contract in current memory
359
      newWmem = oldWmem;
1✔
360
    }
361
    assert requiredSketchBytes <= newWmem.getCapacity();
1✔
362
    return newWmem;
1✔
363
  }
364

365
  private static String outputDataDetail(final KllSketch sketch) {
366
    final int[] levelsArr = sketch.getLevelsArray(SketchStructure.UPDATABLE);
1✔
367
    final int numLevels = sketch.getNumLevels();
1✔
368
    final int k = sketch.getK();
1✔
369
    final int m = sketch.getM();
1✔
370
    final StringBuilder sb =  new StringBuilder();
1✔
371
    sb.append(LS + "### KLL ItemsArray & LevelsArray Detail:").append(LS);
1✔
372
    sb.append("Index, Value").append(LS);
1✔
373
    if (levelsArr[0] > 0) {
1✔
374
      final String gbg = " Free Space, Size = " + levelsArr[0];
1✔
375
      for (int i = 0; i < levelsArr[0]; i++) {
1✔
376
        sb.append("    ").append(i + ", ").append(sketch.getItemAsString(i));
1✔
377
        if (i == 0) { sb.append(gbg); }
1✔
378
        sb.append(LS);
1✔
379
      }
380
    }
381
    int level = 0;
1✔
382
    while (level < numLevels) {
1✔
383
      final int fromIndex = levelsArr[level];
1✔
384
      final int toIndex = levelsArr[level + 1]; // exclusive
1✔
385
      String lvlData = "";
1✔
386
      if (fromIndex < toIndex) {
1✔
387
        lvlData = " Level[" + level + "]=" + levelsArr[level]
1✔
388
            + ", Cap=" + KllHelper.levelCapacity(k, numLevels, level, m)
1✔
389
            + ", Size=" + KllHelper.currentLevelSizeItems(level, numLevels, levelsArr)
1✔
390
            + ", Wt=" + (1 << level) + LS;
391
      }
392

393
      for (int i = fromIndex; i < toIndex; i++) {
1✔
394
        sb.append("    ").append(i + ", ").append(sketch.getItemAsString(i));
1✔
395
        if (i == fromIndex) { sb.append(lvlData); } else { sb.append(LS); }
1✔
396
      }
397
      level++;
1✔
398
    }
1✔
399
    sb.append("   ----------Level[" + level + "]=" + levelsArr[level] + ": ItemsArray[].length");
1✔
400
    sb.append(LS);
1✔
401
    sb.append("### End ItemsArray & LevelsArray Detail").append(LS);
1✔
402
    return sb.toString();
1✔
403
  }
404

405
  private static String outputLevels(final int k, final int m, final int numLevels, final int[] levelsArr) {
406
    final StringBuilder sb =  new StringBuilder();
1✔
407
    sb.append(LS + "### KLL Levels Array:").append(LS)
1✔
408
    .append(" Level, Offset: Nominal Capacity, Actual Capacity").append(LS);
1✔
409
    int level = 0;
1✔
410
    for ( ; level < numLevels; level++) {
1✔
411
      sb.append("     ").append(level).append(", ").append(levelsArr[level]).append(": ")
1✔
412
      .append(KllHelper.levelCapacity(k, numLevels, level, m))
1✔
413
      .append(", ").append(KllHelper.currentLevelSizeItems(level, numLevels, levelsArr)).append(LS);
1✔
414
    }
415
    sb.append("     ").append(level).append(", ").append(levelsArr[level]).append(": ----ItemsArray[].length")
1✔
416
    .append(LS);
1✔
417
    sb.append("### End Levels Array").append(LS);
1✔
418
    return sb.toString();
1✔
419
  }
420

421
  static long sumTheSampleWeights(final int num_levels, final int[] levels) {
422
    long total = 0;
1✔
423
    long weight = 1;
1✔
424
    for (int i = 0; i < num_levels; i++) {
1✔
425
      total += weight * (levels[i + 1] - levels[i]);
1✔
426
      weight *= 2;
1✔
427
    }
428
    return total;
1✔
429
  }
430

431
  static  byte[] toByteArray(final KllSketch srcSk, final boolean updatable) {
432
    //ITEMS_SKETCH byte array is never updatable
433
    final boolean myUpdatable = srcSk.sketchType == ITEMS_SKETCH ? false : updatable;
1✔
434
    final long srcN = srcSk.getN();
1✔
435
    final SketchStructure tgtStructure;
436
    if (myUpdatable) { tgtStructure = UPDATABLE; }
1✔
437
    else if (srcN == 0) { tgtStructure = COMPACT_EMPTY; }
1✔
438
    else if (srcN == 1) { tgtStructure = COMPACT_SINGLE; }
1✔
439
    else { tgtStructure = COMPACT_FULL; }
1✔
440
    final int totalBytes = srcSk.currentSerializedSizeBytes(myUpdatable);
1✔
441
    final byte[] bytesOut = new byte[totalBytes];
1✔
442
    final WritableBuffer wbuf = WritableMemory.writableWrap(bytesOut).asWritableBuffer(ByteOrder.LITTLE_ENDIAN);
1✔
443

444
    //ints 0,1
445
    final byte preInts = (byte)tgtStructure.getPreInts();
1✔
446
    final byte serVer = (byte)tgtStructure.getSerVer();
1✔
447
    final byte famId = (byte)(KLL.getID());
1✔
448
    final byte flags = (byte) ((srcSk.isEmpty() ? EMPTY_BIT_MASK : 0)
1✔
449
        | (srcSk.isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0)
1✔
450
        | (srcSk.getN() == 1 ? SINGLE_ITEM_BIT_MASK : 0));
1✔
451
    final short k = (short) srcSk.getK();
1✔
452
    final byte m = (byte) srcSk.getM();
1✔
453

454
    //load first 8 bytes
455
    wbuf.putByte(preInts); //byte 0
1✔
456
    wbuf.putByte(serVer);
1✔
457
    wbuf.putByte(famId);
1✔
458
    wbuf.putByte(flags);
1✔
459
    wbuf.putShort(k);
1✔
460
    wbuf.putByte(m);
1✔
461
    wbuf.incrementPosition(1); //byte 7 is unused
1✔
462

463
    if (tgtStructure == COMPACT_EMPTY) {
1✔
464
      return bytesOut;
1✔
465
    }
466

467
    if (tgtStructure == COMPACT_SINGLE) {
1✔
468
      final byte[] siByteArr = srcSk.getSingleItemByteArr();
1✔
469
      final int len = siByteArr.length;
1✔
470
      wbuf.putByteArray(siByteArr, 0, len);
1✔
471
      wbuf.incrementPosition(-len);
1✔
472
      return bytesOut;
1✔
473
    }
474

475
    // Tgt is either COMPACT_FULL or UPDATABLE
476
    //ints 2,3
477
    final long n = srcSk.getN();
1✔
478
    //ints 4
479
    final short minK = (short) srcSk.getMinK();
1✔
480
    final byte numLevels = (byte) srcSk.getNumLevels();
1✔
481
    //end of full preamble
482
    final int[] lvlsArr = srcSk.getLevelsArray(tgtStructure);
1✔
483
    final byte[] minMaxByteArr = srcSk.getMinMaxByteArr();
1✔
484
    final byte[] itemsByteArr = tgtStructure == COMPACT_FULL
1✔
485
        ? srcSk.getRetainedItemsByteArr()
1✔
486
        : srcSk.getTotalItemsByteArr();
1✔
487

488
    wbuf.putLong(n);
1✔
489
    wbuf.putShort(minK);
1✔
490
    wbuf.putByte(numLevels);
1✔
491
    wbuf.incrementPosition(1);
1✔
492
    wbuf.putIntArray(lvlsArr, 0, lvlsArr.length);
1✔
493
    wbuf.putByteArray(minMaxByteArr, 0, minMaxByteArr.length);
1✔
494
    wbuf.putByteArray(itemsByteArr, 0, itemsByteArr.length);
1✔
495
    return bytesOut;
1✔
496
  }
497

498
  static <T> String toStringImpl(final KllSketch sketch, final boolean withLevels, final boolean withLevelsAndItems,
499
      final ArrayOfItemsSerDe<T> serDe) {
500
    final StringBuilder sb = new StringBuilder();
1✔
501
    final int k = sketch.getK();
1✔
502
    final int m = sketch.getM();
1✔
503
    final int numLevels = sketch.getNumLevels();
1✔
504
    final int[] fullLevelsArr = sketch.getLevelsArray(UPDATABLE);
1✔
505

506
    final SketchType sketchType = sketch.sketchType;
1✔
507
    final boolean hasMemory = sketch.hasMemory();
1✔
508
    final long n = sketch.getN();
1✔
509
    final String epsPct = String.format("%.3f%%", sketch.getNormalizedRankError(false) * 100);
1✔
510
    final String epsPMFPct = String.format("%.3f%%", sketch.getNormalizedRankError(true) * 100);
1✔
511
    final boolean compact = sketch.isCompactMemoryFormat();
1✔
512

513
    final String directStr = hasMemory ? "Direct" : "";
1✔
514
    final String compactStr = compact ? "Compact" : "";
1✔
515
    final String readOnlyStr = sketch.isReadOnly() ? "true" + ("(" + (compact ? "Format" : "Memory") + ")") : "false";
1✔
516
    final String skTypeStr = sketchType.getName();
1✔
517
    final String className = "Kll" + directStr + compactStr + skTypeStr;
1✔
518

519
    sb.append(LS + "### ").append(className).append(" Summary:").append(LS);
1✔
520
    sb.append("   K                      : ").append(k).append(LS);
1✔
521
    sb.append("   Dynamic min K          : ").append(sketch.getMinK()).append(LS);
1✔
522
    sb.append("   M                      : ").append(m).append(LS);
1✔
523
    sb.append("   N                      : ").append(n).append(LS);
1✔
524
    sb.append("   Epsilon                : ").append(epsPct).append(LS);
1✔
525
    sb.append("   Epsilon PMF            : ").append(epsPMFPct).append(LS);
1✔
526
    sb.append("   Empty                  : ").append(sketch.isEmpty()).append(LS);
1✔
527
    sb.append("   Estimation Mode        : ").append(sketch.isEstimationMode()).append(LS);
1✔
528
    sb.append("   Levels                 : ").append(numLevels).append(LS);
1✔
529
    sb.append("   Level 0 Sorted         : ").append(sketch.isLevelZeroSorted()).append(LS);
1✔
530
    sb.append("   Capacity Items         : ").append(fullLevelsArr[numLevels]).append(LS);
1✔
531
    sb.append("   Retained Items         : ").append(sketch.getNumRetained()).append(LS);
1✔
532
    sb.append("   Free Space             : ").append(sketch.levelsArr[0]).append(LS);
1✔
533
    sb.append("   ReadOnly               : ").append(readOnlyStr).append(LS);
1✔
534
    if (sketchType != ITEMS_SKETCH) {
1✔
535
      sb.append("   Updatable Storage Bytes: ").append(sketch.currentSerializedSizeBytes(true)).append(LS);
1✔
536
    }
537
    sb.append("   Compact Storage Bytes  : ").append(sketch.currentSerializedSizeBytes(false)).append(LS);
1✔
538

539
    final String emptyStr = (sketchType == ITEMS_SKETCH) ? "Null" : "NaN";
1✔
540

541
    sb.append("   Min Item               : ").append(sketch.isEmpty() ? emptyStr : sketch.getMinItemAsString())
1✔
542
        .append(LS);
1✔
543
    sb.append("   Max Item               : ").append(sketch.isEmpty() ? emptyStr : sketch.getMaxItemAsString())
1✔
544
        .append(LS);
1✔
545
    sb.append("### End sketch summary").append(LS);
1✔
546

547
    if (withLevels) {
1✔
548
      sb.append(outputLevels(k, m, numLevels, fullLevelsArr));
1✔
549
    }
550

551
    if (withLevelsAndItems) {
1✔
552
      sb.append(outputDataDetail(sketch));
1✔
553
    }
554
    return sb.toString();
1✔
555
  }
556

557
  /**
558
   * Returns very conservative upper bound of the number of levels based on <i>n</i>.
559
   * @param n the length of the stream
560
   * @return floor( log_2(n) )
561
   */
562
  static int ubOnNumLevels(final long n) {
563
    return 1 + Long.numberOfTrailingZeros(floorPowerOf2(n));
1✔
564
  }
565

566
  /**
567
   * This grows the levels arr by 1 (if needed) and increases the capacity of the items array
568
   * at the bottom.  Only numLevels, the levels array and the items array are affected.
569
   * This assumes sketch is writable and UPDATABLE.
570
   * @param sketch the current sketch
571
   */
572
  static void addEmptyTopLevelToCompletelyFullSketch(final KllSketch sketch) {
573
    final SketchType sketchType = sketch.sketchType;
1✔
574

575
    final int[] myCurLevelsArr = sketch.getLevelsArray(sketch.sketchStructure);
1✔
576
    final int myCurNumLevels = sketch.getNumLevels();
1✔
577
    final int myCurTotalItemsCapacity = myCurLevelsArr[myCurNumLevels];
1✔
578

579
    final int myNewNumLevels;
580
    final int[] myNewLevelsArr;
581
    final int myNewTotalItemsCapacity;
582

583

584
    double[] myCurDoubleItemsArr = null;
1✔
585
    double[] myNewDoubleItemsArr = null;
1✔
586
    double minDouble = Double.NaN;
1✔
587
    double maxDouble = Double.NaN;
1✔
588

589
    float[] myCurFloatItemsArr = null;
1✔
590
    float[] myNewFloatItemsArr = null;
1✔
591
    float minFloat = Float.NaN;
1✔
592
    float maxFloat = Float.NaN;
1✔
593

594
    Object[] myCurItemsArr = null;
1✔
595
    Object[] myNewItemsArr = null;
1✔
596
    Object minItem = null;
1✔
597
    Object maxItem = null;
1✔
598

599
    if (sketchType == DOUBLES_SKETCH) {
1✔
600
      final KllDoublesSketch dblSk = (KllDoublesSketch) sketch;
1✔
601
      myCurDoubleItemsArr = dblSk.getDoubleItemsArray();
1✔
602
      minDouble = dblSk.getMinItem();
1✔
603
      maxDouble = dblSk.getMaxItem();
1✔
604
      //assert we are following a certain growth scheme
605
      assert myCurDoubleItemsArr.length == myCurTotalItemsCapacity;
1✔
606
    }
1✔
607
    else if (sketchType == FLOATS_SKETCH) {
1✔
608
      final KllFloatsSketch fltSk = (KllFloatsSketch) sketch;
1✔
609
      myCurFloatItemsArr = fltSk.getFloatItemsArray();
1✔
610
      minFloat = fltSk.getMinItem();
1✔
611
      maxFloat = fltSk.getMaxItem();
1✔
612
      //assert we are following a certain growth scheme
613
      assert myCurFloatItemsArr.length == myCurTotalItemsCapacity;
1✔
614
    }
1✔
615
    else { //sketchType == ITEMS_SKETCH
616
      final KllItemsSketch<?> itmSk = (KllItemsSketch<?>) sketch;
1✔
617
      myCurItemsArr = itmSk.getTotalItemsArray();
1✔
618
      minItem = itmSk.getMinItem();
1✔
619
      maxItem = itmSk.getMaxItem();
1✔
620
    }
621
    assert myCurLevelsArr[0] == 0; //definition of full is part of the growth scheme
1✔
622

623
    final int deltaItemsCap = levelCapacity(sketch.getK(), myCurNumLevels + 1, 0, sketch.getM());
1✔
624
    myNewTotalItemsCapacity = myCurTotalItemsCapacity + deltaItemsCap;
1✔
625

626
    // Check if growing the levels arr if required.
627
    // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it
628
    final boolean growLevelsArr = myCurLevelsArr.length < myCurNumLevels + 2;
1✔
629

630
    // GROW LEVELS ARRAY
631
    if (growLevelsArr) {
1✔
632
      //grow levels arr by one and copy the old data to the new array, extra space at the top.
633
      myNewLevelsArr = Arrays.copyOf(myCurLevelsArr, myCurNumLevels + 2);
1✔
634
      assert myNewLevelsArr.length == myCurLevelsArr.length + 1;
1✔
635
      myNewNumLevels = myCurNumLevels + 1;
1✔
636
      sketch.incNumLevels(); //increment for off-heap
1✔
637
    } else {
638
      myNewLevelsArr = myCurLevelsArr;
×
639
      myNewNumLevels = myCurNumLevels;
×
640
    }
641
    // This loop updates all level indices EXCLUDING the "extra" index at the top
642
    for (int level = 0; level <= myNewNumLevels - 1; level++) {
1✔
643
      myNewLevelsArr[level] += deltaItemsCap;
1✔
644
    }
645
    myNewLevelsArr[myNewNumLevels] = myNewTotalItemsCapacity; // initialize the new "extra" index at the top
1✔
646

647
    // GROW items ARRAY
648
    if (sketchType == DOUBLES_SKETCH) {
1✔
649
      myNewDoubleItemsArr = new double[myNewTotalItemsCapacity];
1✔
650
      // copy and shift the current data into the new array
651
      System.arraycopy(myCurDoubleItemsArr, 0, myNewDoubleItemsArr, deltaItemsCap, myCurTotalItemsCapacity);
1✔
652
    }
653
    else if (sketchType == FLOATS_SKETCH) {
1✔
654
      myNewFloatItemsArr = new float[myNewTotalItemsCapacity];
1✔
655
      // copy and shift the current items data into the new array
656
      System.arraycopy(myCurFloatItemsArr, 0, myNewFloatItemsArr, deltaItemsCap, myCurTotalItemsCapacity);
1✔
657
    }
658
    else { //sketchType == ITEMS_SKETCH
659
      myNewItemsArr = new Object[myNewTotalItemsCapacity];
1✔
660
      // copy and shift the current items data into the new array
661
      System.arraycopy(myCurItemsArr, 0, myNewItemsArr, deltaItemsCap, myCurTotalItemsCapacity);
1✔
662
    }
663

664
    //MEMORY SPACE MANAGEMENT
665
    if (sketch.getWritableMemory() != null) {
1✔
666
      final WritableMemory wmem = memorySpaceMgmt(sketch, myNewLevelsArr.length, myNewTotalItemsCapacity);
1✔
667
      sketch.setWritableMemory(wmem);
1✔
668
    }
669

670
    //update our sketch with new expanded spaces
671
    sketch.setNumLevels(myNewNumLevels);   //for off-heap only
1✔
672
    sketch.setLevelsArray(myNewLevelsArr); //the KllSketch copy
1✔
673
    if (sketchType == DOUBLES_SKETCH) {
1✔
674
      final KllDoublesSketch dblSk = (KllDoublesSketch) sketch;
1✔
675
      dblSk.setMinItem(minDouble);
1✔
676
      dblSk.setMaxItem(maxDouble);
1✔
677
      dblSk.setDoubleItemsArray(myNewDoubleItemsArr);
1✔
678
    }
1✔
679
    else if (sketchType == FLOATS_SKETCH) {
1✔
680
      final KllFloatsSketch fltSk = (KllFloatsSketch) sketch;
1✔
681
      fltSk.setMinItem(minFloat);
1✔
682
      fltSk.setMaxItem(maxFloat);
1✔
683
      fltSk.setFloatItemsArray(myNewFloatItemsArr);
1✔
684
    }
1✔
685
    else { //sketchType == ITEMS_SKETCH
686
      final KllItemsSketch<?> itmSk = (KllItemsSketch<?>) sketch;
1✔
687
      itmSk.setMinItem(minItem);
1✔
688
      itmSk.setMaxItem(maxItem);
1✔
689
      itmSk.setItemsArray(myNewItemsArr);
1✔
690
    }
691

692
  }
1✔
693

694
  /**
695
   * Finds the first level starting with level 0 that exceeds its nominal capacity
696
   * @param k configured size of sketch. Range [m, 2^16]
697
   * @param m minimum level size. Default is 8.
698
   * @param numLevels one-based number of current levels
699
   * @return level to compact
700
   */
701
  static int findLevelToCompact(final int k, final int m, final int numLevels, final int[] levels) {
702
    int level = 0;
1✔
703
    while (true) {
704
      assert level < numLevels;
1✔
705
      final int pop = levels[level + 1] - levels[level];
1✔
706
      final int cap = KllHelper.levelCapacity(k, numLevels, level, m);
1✔
707
      if (pop >= cap) {
1✔
708
        return level;
1✔
709
      }
710
      level++;
1✔
711
    }
1✔
712
  }
713

714
  /**
715
   * Computes the actual item capacity of a given level given its depth index.
716
   * If the depth of levels exceeds 30, this uses a folding technique to accurately compute the
717
   * actual level capacity up to a depth of 60 (or 61 levels).
718
   * Without folding, the internal calculations would exceed the capacity of a long.
719
   * This method just decides whether folding is required or not.
720
   * @param k the configured k of the sketch
721
   * @param depth the zero-based index of the level being computed.
722
   * @return the actual capacity of a given level given its depth index.
723
   */
724
  static long intCapAux(final int k, final int depth) {
725
    if (depth <= 30) { return intCapAuxAux(k, depth); }
1✔
726
    final int half = depth / 2;
1✔
727
    final int rest = depth - half;
1✔
728
    final long tmp = intCapAuxAux(k, half);
1✔
729
    return intCapAuxAux(tmp, rest);
1✔
730
  }
731

732
  /**
733
   * Performs the integer based calculation of an individual level (or folded level).
734
   * @param k the configured k of the sketch
735
   * @param depth the zero-based index of the level being computed. The max depth is 30!
736
   * @return the actual capacity of a given level given its depth index.
737
   */
738
  static long intCapAuxAux(final long k, final int depth) {
739
    final long twok = k << 1; // for rounding at the end, pre-multiply by 2 here, divide by 2 during rounding.
1✔
740
    final long tmp = ((twok << depth) / powersOfThree[depth]); //2k* (2/3)^depth. 2k also keeps the fraction larger.
1✔
741
    final long result = ((tmp + 1L) >>> 1); // (tmp + 1)/2. If odd, round up. This guarantees an integer.
1✔
742
    assert (result <= k);
1✔
743
    return result;
1✔
744
  }
745

746
  private final static boolean enablePrinting = false;
747

748
  /**
749
   * @param format the format
750
   * @param args the args
751
   */
752
  private static final void printf(final String format, final Object ... args) {
753
    if (enablePrinting) { System.out.printf(format, args); }
754
  }
1✔
755

756
  /**
757
   * @param o the Object to println
758
   */
759
  private static final void println(final Object o) {
760
    if (enablePrinting) { System.out.println(o.toString()); }
761
  }
1✔
762

763
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc