• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.46
/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.quantiles;
21

22
import static java.lang.Math.max;
23
import static java.lang.Math.min;
24
import static java.lang.System.arraycopy;
25
import static org.apache.datasketches.common.Util.ceilingPowerOf2;
26
import static org.apache.datasketches.quantiles.ClassicUtil.MAX_PRELONGS;
27
import static org.apache.datasketches.quantiles.ClassicUtil.MIN_K;
28
import static org.apache.datasketches.quantiles.ClassicUtil.checkIsCompactMemory;
29
import static org.apache.datasketches.quantiles.ClassicUtil.checkK;
30
import static org.apache.datasketches.quantiles.ClassicUtil.computeNumLevelsNeeded;
31
import static org.apache.datasketches.quantiles.ClassicUtil.computeRetainedItems;
32
import static org.apache.datasketches.quantiles.DoublesSketchAccessor.BB_LVL_IDX;
33

34
import java.util.Arrays;
35
import java.util.Random;
36

37
import org.apache.datasketches.common.SketchesArgumentException;
38
import org.apache.datasketches.common.SketchesStateException;
39
import org.apache.datasketches.memory.Memory;
40
import org.apache.datasketches.memory.WritableMemory;
41
import org.apache.datasketches.quantilescommon.DoublesSketchSortedView;
42
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
43
import org.apache.datasketches.quantilescommon.QuantilesAPI;
44
import org.apache.datasketches.quantilescommon.QuantilesDoublesAPI;
45
import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator;
46

47
/**
48
 * This is an implementation of the Low Discrepancy Mergeable Quantiles Sketch, using doubles,
49
 * described in section 3.2 of the journal version of the paper "Mergeable Summaries"
50
 * by Agarwal, Cormode, Huang, Phillips, Wei, and Yi:
51
 *
52
 * <p>Reference: <a href="http://dblp.org/rec/html/journals/tods/AgarwalCHPWY13"></a></p>
53
 *
54
 * <p>A <i>k</i> of 128 produces a normalized, rank error of about 1.7%.
55
 * For example, the median returned from getQuantile(0.5) will be between the actual quantiles
56
 * from the hypothetically sorted array of input quantiles at normalized ranks of 0.483 and 0.517, with
57
 * a confidence of about 99%.</p>
58
 *
59
 * <pre>
60
Table Guide for DoublesSketch Size in Bytes and Approximate Error:
61
          K =&gt; |      16      32      64     128     256     512   1,024
62
    ~ Error =&gt; | 12.145%  6.359%  3.317%  1.725%  0.894%  0.463%  0.239%
63
             N | Size in Bytes -&gt;
64
------------------------------------------------------------------------
65
             0 |       8       8       8       8       8       8       8
66
             1 |      72      72      72      72      72      72      72
67
             3 |      72      72      72      72      72      72      72
68
             7 |     104     104     104     104     104     104     104
69
            15 |     168     168     168     168     168     168     168
70
            31 |     296     296     296     296     296     296     296
71
            63 |     424     552     552     552     552     552     552
72
           127 |     552     808   1,064   1,064   1,064   1,064   1,064
73
           255 |     680   1,064   1,576   2,088   2,088   2,088   2,088
74
           511 |     808   1,320   2,088   3,112   4,136   4,136   4,136
75
         1,023 |     936   1,576   2,600   4,136   6,184   8,232   8,232
76
         2,047 |   1,064   1,832   3,112   5,160   8,232  12,328  16,424
77
         4,095 |   1,192   2,088   3,624   6,184  10,280  16,424  24,616
78
         8,191 |   1,320   2,344   4,136   7,208  12,328  20,520  32,808
79
        16,383 |   1,448   2,600   4,648   8,232  14,376  24,616  41,000
80
        32,767 |   1,576   2,856   5,160   9,256  16,424  28,712  49,192
81
        65,535 |   1,704   3,112   5,672  10,280  18,472  32,808  57,384
82
       131,071 |   1,832   3,368   6,184  11,304  20,520  36,904  65,576
83
       262,143 |   1,960   3,624   6,696  12,328  22,568  41,000  73,768
84
       524,287 |   2,088   3,880   7,208  13,352  24,616  45,096  81,960
85
     1,048,575 |   2,216   4,136   7,720  14,376  26,664  49,192  90,152
86
     2,097,151 |   2,344   4,392   8,232  15,400  28,712  53,288  98,344
87
     4,194,303 |   2,472   4,648   8,744  16,424  30,760  57,384 106,536
88
     8,388,607 |   2,600   4,904   9,256  17,448  32,808  61,480 114,728
89
    16,777,215 |   2,728   5,160   9,768  18,472  34,856  65,576 122,920
90
    33,554,431 |   2,856   5,416  10,280  19,496  36,904  69,672 131,112
91
    67,108,863 |   2,984   5,672  10,792  20,520  38,952  73,768 139,304
92
   134,217,727 |   3,112   5,928  11,304  21,544  41,000  77,864 147,496
93
   268,435,455 |   3,240   6,184  11,816  22,568  43,048  81,960 155,688
94
   536,870,911 |   3,368   6,440  12,328  23,592  45,096  86,056 163,880
95
 1,073,741,823 |   3,496   6,696  12,840  24,616  47,144  90,152 172,072
96
 2,147,483,647 |   3,624   6,952  13,352  25,640  49,192  94,248 180,264
97
 4,294,967,295 |   3,752   7,208  13,864  26,664  51,240  98,344 188,456
98
 * </pre>
99
 *
100
 * @see QuantilesAPI
101
 */
102
public abstract class DoublesSketch implements QuantilesDoublesAPI {
103
  /**
104
   * Setting the seed makes the results of the sketch deterministic if the input quantiles are
105
   * received in exactly the same order. This is only useful when performing test comparisons,
106
   * otherwise is not recommended.
107
   */
108
  static Random rand = new Random();
1✔
109

110
  /**
111
   * Parameter that controls space usage of sketch and accuracy of estimates.
112
   */
113
  final int k_;
114

115
  DoublesSketchSortedView doublesSV = null;
1✔
116

117
  DoublesSketch(final int k) {
1✔
118
    checkK(k);
1✔
119
    k_ = k;
1✔
120
  }
1✔
121

122
  synchronized static void setRandom(final long seed) {
123
    DoublesSketch.rand = new Random(seed);
1✔
124
  }
1✔
125

126
  /**
127
   * Returns a new builder
128
   * @return a new builder
129
   */
130
  public static final DoublesSketchBuilder builder() {
131
    return new DoublesSketchBuilder();
1✔
132
  }
133

134
  /**
135
   * Heapify takes the sketch image in Memory and instantiates an on-heap Sketch.
136
   * The resulting sketch will not retain any link to the source Memory.
137
   * @param srcMem a Memory image of a Sketch.
138
   * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
139
   * @return a heap-based Sketch based on the given Memory
140
   */
141
  public static DoublesSketch heapify(final Memory srcMem) {
142
    if (checkIsCompactMemory(srcMem)) {
1✔
143
      return CompactDoublesSketch.heapify(srcMem);
1✔
144
    }
145
    return UpdateDoublesSketch.heapify(srcMem);
1✔
146
  }
147

148
  /**
149
   * Wrap this sketch around the given Memory image of a DoublesSketch, compact or updatable.
150
   * A DirectUpdateDoublesSketch can only wrap an updatable array, and a
151
   * DirectCompactDoublesSketch can only wrap a compact array.
152
   *
153
   * @param srcMem the given Memory image of a DoublesSketch that may have data,
154
   * @return a sketch that wraps the given srcMem
155
   */
156
  public static DoublesSketch wrap(final Memory srcMem) {
157
    if (checkIsCompactMemory(srcMem)) {
1✔
158
      return DirectCompactDoublesSketch.wrapInstance(srcMem);
1✔
159
    }
160
    return DirectUpdateDoublesSketchR.wrapInstance(srcMem);
1✔
161
  }
162

163
  @Override
164
  public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
165
  if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
1✔
166
    refreshSortedView();
1✔
167
    return doublesSV.getCDF(splitPoints, searchCrit);
1✔
168
  }
169

170
  @Override
171
  public abstract double getMaxItem();
172

173
  @Override
174
  public abstract double getMinItem();
175

176
  @Override
177
  public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
178
  if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
1✔
179
    refreshSortedView();
1✔
180
    return doublesSV.getPMF(splitPoints, searchCrit);
1✔
181
  }
182

183
  @Override
184
  public double getQuantile(final double rank, final QuantileSearchCriteria searchCrit) {
185
  if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
1✔
186
    refreshSortedView();
1✔
187
    return doublesSV.getQuantile(rank, searchCrit);
1✔
188
  }
189

190
  @Override
191
  public double[] getQuantiles(final double[] ranks, final QuantileSearchCriteria searchCrit) {
192
    if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
1✔
193
    refreshSortedView();
1✔
194
    final int len = ranks.length;
1✔
195
    final double[] quantiles = new double[len];
1✔
196
    for (int i = 0; i < len; i++) {
1✔
197
      quantiles[i] = doublesSV.getQuantile(ranks[i], searchCrit);
1✔
198
    }
199
    return quantiles;
1✔
200
  }
201

202
  /**
203
   * {@inheritDoc}
204
   * The approximate probability that the true quantile is within the confidence interval
205
   * specified by the upper and lower quantile bounds for this sketch is 0.99.
206
   */
207
  @Override
208
  public double getQuantileLowerBound(final double rank) {
209
    return getQuantile(max(0, rank - getNormalizedRankError(k_, false)));
1✔
210
  }
211

212
  /**
213
   * {@inheritDoc}
214
   * The approximate probability that the true quantile is within the confidence interval
215
   * specified by the upper and lower quantile bounds for this sketch is 0.99.
216
   */
217
  @Override
218
  public double getQuantileUpperBound(final double rank) {
219
    return getQuantile(min(1.0, rank + getNormalizedRankError(k_, false)));
1✔
220
  }
221

222
  @Override
223
  public double getRank(final double quantile, final QuantileSearchCriteria searchCrit) {
224
    if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
1✔
225
    refreshSortedView();
1✔
226
    return doublesSV.getRank(quantile, searchCrit);
1✔
227
  }
228

229
  /**
230
   * {@inheritDoc}
231
   * The approximate probability that the true rank is within the confidence interval
232
   * specified by the upper and lower rank bounds for this sketch is 0.99.
233
   */
234
  @Override
235
  public double getRankLowerBound(final double rank) {
236
    return max(0.0, rank - getNormalizedRankError(k_, false));
1✔
237
  }
238

239
  /**
240
   * {@inheritDoc}
241
   * The approximate probability that the true rank is within the confidence interval
242
   * specified by the upper and lower rank bounds for this sketch is 0.99.
243
   */
244
  @Override
245
  public double getRankUpperBound(final double rank) {
246
    return min(1.0, rank + getNormalizedRankError(k_, false));
1✔
247
  }
248

249
  @Override
250
  public double[] getRanks(final double[] quantiles, final QuantileSearchCriteria searchCrit) {
251
    if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
1✔
252
    refreshSortedView();
1✔
253
    final int len = quantiles.length;
1✔
254
    final double[] ranks = new double[len];
1✔
255
    for (int i = 0; i < len; i++) {
1✔
256
      ranks[i] = doublesSV.getRank(quantiles[i], searchCrit);
1✔
257
    }
258
    return ranks;
1✔
259
  }
260

261
  @Override
262
  public int getK() {
263
    return k_;
1✔
264
  }
265

266
  @Override
267
  public abstract long getN();
268

269
  @Override
270
  public double getNormalizedRankError(final boolean pmf) {
271
    return getNormalizedRankError(k_, pmf);
1✔
272
  }
273

274
  /**
275
   * Gets the normalized rank error given k and pmf.
276
   * Static method version of the <i>getNormalizedRankError(boolean)</i>.
277
   * The epsilon returned is a best fit to 99 percent confidence empirically measured max error
278
   * in thousands of trials.
279
   * @param k the configuration parameter
280
   * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function.
281
   * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
282
   * @return if pmf is true, the normalized rank error for the getPMF() function.
283
   * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
284
   */
285
  public static double getNormalizedRankError(final int k, final boolean pmf) {
286
    return ClassicUtil.getNormalizedRankError(k, pmf);
1✔
287
  }
288

289
  /**
290
   * Gets the approximate <em>k</em> to use given epsilon, the normalized rank error.
291
   * @param epsilon the normalized rank error between zero and one.
292
   * @param pmf if true, this function returns <em>k</em> assuming the input epsilon
293
   * is the desired "double-sided" epsilon for the getPMF() function. Otherwise, this function
294
   * returns <em>k</em> assuming the input epsilon is the desired "single-sided"
295
   * epsilon for all the other queries.
296
   * @return <i>k</i> given epsilon.
297
   */
298
  public static int getKFromEpsilon(final double epsilon, final boolean pmf) {
299
    return ClassicUtil.getKFromEpsilon(epsilon, pmf);
1✔
300
  }
301

302
  @Override
303
  public abstract boolean hasMemory();
304

305
  @Override
306
  public abstract boolean isDirect();
307

308
  @Override
309
  public boolean isEmpty() {
310
    return getN() == 0;
1✔
311
  }
312

313
  @Override
314
  public boolean isEstimationMode() {
315
    return getN() >= 2L * k_;
1✔
316
  }
317

318
  @Override
319
  public abstract boolean isReadOnly();
320

321
  /**
322
   * Returns true if the backing resource of <i>this</i> is identical with the backing resource
323
   * of <i>that</i>. The capacities must be the same.  If <i>this</i> is a region,
324
   * the region offset must also be the same.
325
   * @param that A different non-null object
326
   * @return true if the backing resource of <i>this</i> is the same as the backing resource
327
   * of <i>that</i>.
328
   */
329
  public boolean isSameResource(final Memory that) { //Overridden by direct sketches
330
    return false;
1✔
331
  }
332

333
  @Override
334
  public byte[] toByteArray() {
335
    if (isCompact()) {
1✔
336
      return toByteArray(true);
1✔
337
    }
338
    return toByteArray(false);
1✔
339
  }
340

341
  /**
342
   * Serialize this sketch in a byte array form.
343
   * @param compact if true the sketch will be serialized in compact form.
344
   * DirectCompactDoublesSketch can wrap() only a compact byte array;
345
   * DirectUpdateDoublesSketch can wrap() only a updatable byte array.
346
   * @return this sketch in a byte array form.
347
   */
348
  public byte[] toByteArray(final boolean compact) {
349
    return DoublesByteArrayImpl.toByteArray(this, compact, compact);
1✔
350
  }
351

352
  /**
353
   * Returns human readable summary information about this sketch.
354
   * Used for debugging.
355
   */
356
  @Override
357
  public String toString() {
358
    return toString(true, false);
1✔
359
  }
360

361
  /**
362
   * Returns human readable summary information about this sketch.
363
   * Used for debugging.
364
   * @param withLevels if true includes sketch levels array summary information
365
   * @param withLevelsAndItems if true include detail of levels array and items array together
366
   * @return human readable summary information about this sketch.
367
   */
368
  public String toString(final boolean withLevels, final boolean withLevelsAndItems) {
369
    return DoublesUtil.toString(withLevels, withLevelsAndItems, this);
1✔
370
  }
371

372
  /**
373
   * Returns a human readable string of the preamble of a byte array image of a DoublesSketch.
374
   * Used for debugging.
375
   * @param byteArr the given byte array
376
   * @return a human readable string of the preamble of a byte array image of a DoublesSketch.
377
   */
378
  public static String toString(final byte[] byteArr) {
379
    return PreambleUtil.toString(byteArr, true);
1✔
380
  }
381

382
  /**
383
   * Returns a human readable string of the preamble of a Memory image of a DoublesSketch.
384
   * Used for debugging.
385
   * @param mem the given Memory
386
   * @return a human readable string of the preamble of a Memory image of a DoublesSketch.
387
   */
388
  public static String toString(final Memory mem) {
389
    return PreambleUtil.toString(mem, true);
1✔
390
  }
391

392
  /**
393
   * From an source sketch, create a new sketch that must have a smaller K.
394
   * The original sketch is not modified.
395
   *
396
   * @param srcSketch the sourcing sketch
397
   * @param smallerK the new sketch's K that must be smaller than this K.
398
   * It is required that this.getK() = smallerK * 2^(nonnegative integer).
399
   * @param dstMem the destination Memory.  It must not overlap the Memory of this sketch.
400
   * If null, a heap sketch will be returned, otherwise it will be off-heap.
401
   *
402
   * @return the new sketch.
403
   */
404
  public DoublesSketch downSample(final DoublesSketch srcSketch, final int smallerK,
405
        final WritableMemory dstMem) {
406
    return downSampleInternal(srcSketch, smallerK, dstMem);
1✔
407
  }
408

409
  @Override
410
  public int getNumRetained() {
411
    return computeRetainedItems(getK(), getN());
1✔
412
  }
413

414
  /**
415
   * Returns the current number of bytes this sketch would require to store in the compact Memory Format.
416
   * @return the current number of bytes this sketch would require to store in the compact Memory Format.
417
   */
418
  public int getCurrentCompactSerializedSizeBytes() {
419
    return getCompactSerialiedSizeBytes(getK(), getN());
1✔
420
  }
421

422
  /**
423
   * Returns the number of bytes a DoublesSketch would require to store in compact form
424
   * given <i>k</i> and <i>n</i>. The compact form is not updatable.
425
   * @param k the size configuration parameter for the sketch
426
   * @param n the number of quantiles input into the sketch
427
   * @return the number of bytes required to store this sketch in compact form.
428
   */
429
  public static int getCompactSerialiedSizeBytes(final int k, final long n) {
430
    if (n == 0) { return 8; }
1✔
431
    final int metaPreLongs = MAX_PRELONGS + 2; //plus min, max
1✔
432
    return metaPreLongs + computeRetainedItems(k, n) << 3;
1✔
433
  }
434

435
  @Override
436
  public int getSerializedSizeBytes() {
437
    if (isCompact()) { return getCurrentCompactSerializedSizeBytes(); }
1✔
438
    return getCurrentUpdatableSerializedSizeBytes();
1✔
439
  }
440

441
  /**
442
   * Returns the current number of bytes this sketch would require to store in the updatable Memory Format.
443
   * @return the current number of bytes this sketch would require to store in the updatable Memory Format.
444
   */
445
  public int getCurrentUpdatableSerializedSizeBytes() {
446
    return getUpdatableStorageBytes(getK(), getN());
1✔
447
  }
448

449
  /**
450
   * Returns the number of bytes a sketch would require to store in updatable form.
451
   * This uses roughly 2X the storage of the compact form
452
   * given <i>k</i> and <i>n</i>.
453
   * @param k the size configuration parameter for the sketch
454
   * @param n the number of quantiles input into the sketch
455
   * @return the number of bytes this sketch would require to store in updatable form.
456
   */
457
  public static int getUpdatableStorageBytes(final int k, final long n) {
458
    if (n == 0) { return 8; }
1✔
459
    final int metaPre = MAX_PRELONGS + 2; //plus min, max
1✔
460
    final int totLevels = computeNumLevelsNeeded(k, n);
1✔
461
    if (n <= k) {
1✔
462
      final int ceil = Math.max(ceilingPowerOf2((int)n), MIN_K * 2);
1✔
463
      return metaPre + ceil << 3;
1✔
464
    }
465
    return metaPre + (2 + totLevels) * k << 3;
1✔
466
  }
467

468
  /**
469
   * Puts the current sketch into the given Memory in compact form if there is sufficient space,
470
   * otherwise, it throws an error.
471
   *
472
   * @param dstMem the given memory.
473
   */
474
  public void putMemory(final WritableMemory dstMem) {
475
    putMemory(dstMem, true);
1✔
476
  }
1✔
477

478
  /**
479
   * Puts the current sketch into the given Memory if there is sufficient space, otherwise,
480
   * throws an error.
481
   *
482
   * @param dstMem the given memory.
483
   * @param compact if true, compacts and sorts the base buffer, which optimizes merge
484
   *                performance at the cost of slightly increased serialization time.
485
   */
486
  public void putMemory(final WritableMemory dstMem, final boolean compact) {
487
    if (hasMemory() && isCompact() == compact) {
1✔
488
      final Memory srcMem = getMemory();
1✔
489
      srcMem.copyTo(0, dstMem, 0, getSerializedSizeBytes());
1✔
490
    } else {
1✔
491
      final byte[] byteArr = toByteArray(compact);
1✔
492
      final int arrLen = byteArr.length;
1✔
493
      final long memCap = dstMem.getCapacity();
1✔
494
      if (memCap < arrLen) {
1✔
495
        throw new SketchesArgumentException(
1✔
496
           "Destination Memory not large enough: " + memCap + " < " + arrLen);
497
      }
498
      dstMem.putByteArray(0, byteArr, 0, arrLen);
1✔
499
    }
500
  }
1✔
501

502
  @Override
503
  public QuantilesDoublesSketchIterator iterator() {
504
    return new DoublesSketchIterator(this, getBitPattern());
1✔
505
  }
506

507
  /**
508
   * {@inheritDoc}
509
   * <p>The parameter <i>k</i> will not change.</p>
510
   */
511
  @Override
512
  public abstract void reset();
513

514
  //Restricted
515

516
  /*
517
   * DoublesMergeImpl.downSamplingMergeInto requires the target sketch to implement update(), so
518
   * we ensure that the target is an UpdateSketch. The public API, on the other hand, just
519
   * specifies a DoublesSketch. This lets us be more specific about the type without changing the
520
   * public API.
521
   */
522
  UpdateDoublesSketch downSampleInternal(final DoublesSketch srcSketch, final int smallerK,
523
                                         final WritableMemory dstMem) {
524
    final UpdateDoublesSketch newSketch = dstMem == null
1✔
525
            ? HeapUpdateDoublesSketch.newInstance(smallerK)
1✔
526
            : DirectUpdateDoublesSketch.newInstance(smallerK, dstMem);
1✔
527
    if (srcSketch.isEmpty()) { return newSketch; }
1✔
528
    DoublesMergeImpl.downSamplingMergeInto(srcSketch, newSketch);
1✔
529
    return newSketch;
1✔
530
  }
531

532
  //Restricted abstract
533

534
  /**
535
   * Returns true if this sketch is compact
536
   * @return true if this sketch is compact
537
   */
538
  abstract boolean isCompact();
539

540
  /**
541
   * Returns the base buffer count
542
   * @return the base buffer count
543
   */
544
  abstract int getBaseBufferCount();
545

546
  /**
547
   * Returns the bit pattern for valid log levels
548
   * @return the bit pattern for valid log levels
549
   */
550
  abstract long getBitPattern();
551

552
  /**
553
   * Returns the capacity for the combined base buffer
554
   * @return the capacity for the combined base buffer
555
   */
556
  abstract int getCombinedBufferItemCapacity();
557

558
  /**
559
   * Returns the combined buffer, in non-compact form.
560
   * @return the combined buffer, in non-compact form.
561
   */
562
  abstract double[] getCombinedBuffer();
563

564
  /**
565
   * Gets the Memory if it exists, otherwise returns null.
566
   * @return the Memory if it exists, otherwise returns null.
567
   */
568
  abstract WritableMemory getMemory();
569

570
  //************SORTED VIEW****************************
571

572
  @Override
573
  public final DoublesSketchSortedView getSortedView() {
574
    return refreshSortedView();
1✔
575
  }
576

577
  private final DoublesSketchSortedView refreshSortedView() {
578
    return (doublesSV == null) ? (doublesSV = getSV()) : doublesSV;
1✔
579
  }
580

581
  private DoublesSketchSortedView getSV() {
582
    final long totalN = getN();
1✔
583
    if (isEmpty() || (totalN == 0)) { throw new SketchesArgumentException(EMPTY_MSG); }
1✔
584
    final int numQuantiles = getNumRetained();
1✔
585
    final double[] svQuantiles = new double[numQuantiles];
1✔
586
    final long[] svCumWeights = new long[numQuantiles];
1✔
587
    final DoublesSketchAccessor sketchAccessor = DoublesSketchAccessor.wrap(this);
1✔
588

589
    // Populate from DoublesSketch:
590
    //  copy over the "levels" and then the base buffer, all with appropriate weights
591
    populateFromDoublesSketch(getK(), totalN, getBitPattern(), sketchAccessor, svQuantiles, svCumWeights);
1✔
592

593
    // Sort the first "numSamples" slots of the two arrays in tandem,
594
    // taking advantage of the already sorted blocks of length k
595
    blockyTandemMergeSort(svQuantiles, svCumWeights, numQuantiles, getK());
1✔
596

597
    if (convertToCumulative(svCumWeights) != totalN) {
1✔
598
      throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights.");
×
599
    }
600
    return new DoublesSketchSortedView(svQuantiles, svCumWeights, this);
1✔
601
  }
602

603
  private final static void populateFromDoublesSketch(
604
      final int k, final long totalN, final long bitPattern,
605
      final DoublesSketchAccessor sketchAccessor,
606
      final double[] svQuantiles, final long[] svCumWeights) {
607
    long weight = 1;
1✔
608
    int index = 0;
1✔
609
    long bits = bitPattern;
1✔
610
    assert bits == (totalN / (2L * k)); // internal consistency check
1✔
611
    for (int lvl = 0; bits != 0L; lvl++, bits >>>= 1) {
1✔
612
      weight <<= 1; // X2
1✔
613
      if ((bits & 1L) > 0L) {
1✔
614
        sketchAccessor.setLevel(lvl);
1✔
615
        for (int i = 0; i < sketchAccessor.numItems(); i++) {
1✔
616
          svQuantiles[index] = sketchAccessor.get(i);
1✔
617
          svCumWeights[index] = weight;
1✔
618
          index++;
1✔
619
        }
620
      }
621
    }
622

623
    weight = 1; //NOT a mistake! We just copied the highest level; now we need to copy the base buffer
1✔
624
    final int startOfBaseBufferBlock = index;
1✔
625

626
    // Copy BaseBuffer over, along with weight = 1
627
    sketchAccessor.setLevel(BB_LVL_IDX);
1✔
628
    for (int i = 0; i < sketchAccessor.numItems(); i++) {
1✔
629
      svQuantiles[index] = sketchAccessor.get(i);
1✔
630
      svCumWeights[index] = weight;
1✔
631
      index++;
1✔
632
    }
633
    assert index == svQuantiles.length;
1✔
634

635
    // Must sort the items that came from the base buffer.
636
    // Don't need to sort the corresponding weights because they are all the same.
637
    final int numSamples = index;
1✔
638
    Arrays.sort(svQuantiles, startOfBaseBufferBlock, numSamples);
1✔
639
  }
1✔
640

641
  /**
642
   * blockyTandemMergeSort() is an implementation of top-down merge sort specialized
643
   * for the case where the input contains successive equal-length blocks
644
   * that have already been sorted, so that only the top part of the
645
   * merge tree remains to be executed. Also, two arrays are sorted in tandem,
646
   * as discussed below.
647
   * @param svQuantiles array of quantiles for sorted view
648
   * @param svCumWts array for the cumulative weights (but not yet cumulative) for sorted view
649
   * @param arrLen length of quantiles array and cumWts array
650
   * @param blkSize size of internal sorted blocks, equal to k
651
   */
652
  //used by this and UtilTest
653
  static void blockyTandemMergeSort(final double[] svQuantiles, final long[] svCumWts, final int arrLen,
654
      final int blkSize) {
655
    assert blkSize >= 1;
1✔
656
    if (arrLen <= blkSize) { return; }
1✔
657
    int numblks = arrLen / blkSize;
1✔
658
    if ((numblks * blkSize) < arrLen) { numblks += 1; }
1✔
659
    assert ((numblks * blkSize) >= arrLen);
1✔
660

661
    // duplication of the input arrays is preparation for the "ping-pong" copy reduction strategy.
662
    final double[] qSrc = Arrays.copyOf(svQuantiles, arrLen);
1✔
663
    final long[] cwSrc   = Arrays.copyOf(svCumWts, arrLen);
1✔
664

665
    blockyTandemMergeSortRecursion(qSrc, cwSrc,
1✔
666
                                   svQuantiles, svCumWts,
667
                                   0, numblks,
668
                                   blkSize, arrLen);
669
  }
1✔
670

671
  /**
672
   *  blockyTandemMergeSortRecursion() is called by blockyTandemMergeSort().
673
   *  In addition to performing the algorithm's top down recursion,
674
   *  it manages the buffer swapping that eliminates most copying.
675
   *  It also maps the input's pre-sorted blocks into the subarrays
676
   *  that are processed by tandemMerge().
677
   * @param qSrc source array of quantiles
678
   * @param cwSrc source weights array
679
   * @param qDst destination quantiles array
680
   * @param cwDst destination weights array
681
   * @param grpStart group start, refers to pre-sorted blocks such as block 0, block 1, etc.
682
   * @param grpLen group length, refers to pre-sorted blocks such as block 0, block 1, etc.
683
   * @param blkSize block size
684
   * @param arrLim array limit
685
   */
686
  private static void blockyTandemMergeSortRecursion(final double[] qSrc, final long[] cwSrc,
687
      final double[] qDst, final long[] cwDst, final int grpStart, final int grpLen,
688
      /* indices of blocks */ final int blkSize, final int arrLim) {
689
    // Important note: grpStart and grpLen do NOT refer to positions in the underlying array.
690
    // Instead, they refer to the pre-sorted blocks, such as block 0, block 1, etc.
691

692
    assert (grpLen > 0);
1✔
693
    if (grpLen == 1) { return; }
1✔
694
    final int grpLen1 = grpLen / 2;
1✔
695
    final int grpLen2 = grpLen - grpLen1;
1✔
696
    assert (grpLen1 >= 1);
1✔
697
    assert (grpLen2 >= grpLen1);
1✔
698

699
    final int grpStart1 = grpStart;
1✔
700
    final int grpStart2 = grpStart + grpLen1;
1✔
701

702
    //swap roles of src and dst
703
    blockyTandemMergeSortRecursion(qDst, cwDst,
1✔
704
                           qSrc, cwSrc,
705
                           grpStart1, grpLen1, blkSize, arrLim);
706

707
    //swap roles of src and dst
708
    blockyTandemMergeSortRecursion(qDst, cwDst,
1✔
709
                           qSrc, cwSrc,
710
                           grpStart2, grpLen2, blkSize, arrLim);
711

712
    // here we convert indices of blocks into positions in the underlying array.
713
    final int arrStart1 = grpStart1 * blkSize;
1✔
714
    final int arrStart2 = grpStart2 * blkSize;
1✔
715
    final int arrLen1   = grpLen1   * blkSize;
1✔
716
    int arrLen2         = grpLen2   * blkSize;
1✔
717

718
    // special case for the final block which might be shorter than blkSize.
719
    if ((arrStart2 + arrLen2) > arrLim) { arrLen2 = arrLim - arrStart2; }
1✔
720

721
    tandemMerge(qSrc, cwSrc,
1✔
722
                arrStart1, arrLen1,
723
                arrStart2, arrLen2,
724
                qDst, cwDst,
725
                arrStart1); // which will be arrStart3
726
  }
1✔
727

728
  /**
729
   *  Performs two merges in tandem. One of them provides the sort keys
730
   *  while the other one passively undergoes the same data motion.
731
   * @param qSrc quantiles source
732
   * @param cwSrc cumulative weights source
733
   * @param arrStart1 Array 1 start offset
734
   * @param arrLen1 Array 1 length
735
   * @param arrStart2 Array 2 start offset
736
   * @param arrLen2 Array 2 length
737
   * @param qDst quantiles destination
738
   * @param cwDst cumulative weights destination
739
   * @param arrStart3 Array 3 start offset
740
   */
741
  private static void tandemMerge(final double[] qSrc, final long[] cwSrc,
742
                                  final int arrStart1, final int arrLen1,
743
                                  final int arrStart2, final int arrLen2,
744
                                  final double[] qDst, final long[] cwDst,
745
                                  final int arrStart3) {
746
    final int arrStop1 = arrStart1 + arrLen1;
1✔
747
    final int arrStop2 = arrStart2 + arrLen2;
1✔
748

749
    int i1 = arrStart1;
1✔
750
    int i2 = arrStart2;
1✔
751
    int i3 = arrStart3;
1✔
752
    while ((i1 < arrStop1) && (i2 < arrStop2)) {
1✔
753
      if (qSrc[i2] < qSrc[i1]) {
1✔
754
        qDst[i3] = qSrc[i2];
1✔
755
        cwDst[i3] = cwSrc[i2];
1✔
756
        i2++;
1✔
757
      } else {
758
        qDst[i3] = qSrc[i1];
1✔
759
        cwDst[i3] = cwSrc[i1];
1✔
760
        i1++;
1✔
761
      }
762
      i3++;
1✔
763
    }
764

765
    if (i1 < arrStop1) {
1✔
766
      arraycopy(qSrc, i1, qDst, i3, arrStop1 - i1);
1✔
767
      arraycopy(cwSrc, i1, cwDst, i3, arrStop1 - i1);
1✔
768
    } else {
769
      assert i2 < arrStop2;
1✔
770
      arraycopy(qSrc, i2, qDst, i3, arrStop2 - i2);
1✔
771
      arraycopy(cwSrc, i2, cwDst, i3, arrStop2 - i2);
1✔
772
    }
773
  }
1✔
774

775
  /**
776
   * Convert the individual weights into cumulative weights.
777
   * An array of {1,1,1,1} becomes {1,2,3,4}
778
   * @param array of actual weights from the sketch, none of the weights may be zero
779
   * @return total weight
780
   */
781
  private static long convertToCumulative(final long[] array) {
782
    long subtotal = 0;
1✔
783
    for (int i = 0; i < array.length; i++) {
1✔
784
      final long newSubtotal = subtotal + array[i];
1✔
785
      subtotal = array[i] = newSubtotal;
1✔
786
    }
787
    return subtotal;
1✔
788
  }
789

790
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc