• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.49
/src/main/java/org/apache/datasketches/hll/Union.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.hll;
21

22
import static org.apache.datasketches.common.Util.invPow2;
23
import static org.apache.datasketches.hll.HllUtil.AUX_TOKEN;
24
import static org.apache.datasketches.hll.HllUtil.EMPTY;
25
import static org.apache.datasketches.hll.HllUtil.loNibbleMask;
26
import static org.apache.datasketches.hll.PreambleUtil.HLL_BYTE_ARR_START;
27
import static org.apache.datasketches.hll.PreambleUtil.extractTgtHllType;
28
import static org.apache.datasketches.hll.TgtHllType.HLL_4;
29
import static org.apache.datasketches.hll.TgtHllType.HLL_8;
30

31
import org.apache.datasketches.common.SketchesArgumentException;
32
import org.apache.datasketches.memory.Memory;
33
import org.apache.datasketches.memory.WritableMemory;
34

35
/**
36
 * This performs union operations for all HllSketches. This union operator can be configured to be
37
 * on or off heap.  The source sketch given to this union using the {@link #update(HllSketch)} can
38
 * be configured with any precision value <i>lgConfigK</i> (from 4 to 21), any <i>TgtHllType</i>
39
 * (HLL_4, HLL_6, HLL_8), and either on or off-heap; and it can be in either of the sparse modes
40
 * (<i>LIST</i> or <i>SET</i>), or the dense mode (<i>HLL</i>).
41
 *
42
 * <p>Although the API for this union operator parallels many of the methods of the
43
 * <i>HllSketch</i>, the behavior of the union operator has some fundamental differences.</p>
44
 *
45
 * <p>First, this union operator is configured with a <i>lgMaxK</i> instead of the normal
46
 * <i>lgConfigK</i>.  Generally, this union operator will inherit the lowest <i>lgConfigK</i>
47
 * less than <i>lgMaxK</i> that it has seen. However, the <i>lgConfigK</i> of incoming sketches that
48
 * are still in sparse are ignored. The <i>lgMaxK</i> provides the user the ability to specify the
49
 * largest maximum size for the union operation.
50
 *
51
 * <p>Second, the user cannot specify the {@link TgtHllType} as an input parameter to the union.
52
 * Instead, it is specified for the sketch returned with {@link #getResult(TgtHllType)}.
53
 *
54
 * <p>The following graph illustrates the HLL Merge speed.</p>
55
 *
56
 * <p><img src="doc-files/HLL_UnionTime4_6_8_Java_CPP.png" width="500" alt="HLL LgK12 Union Speed"></p>
57
 * This graph illustrates the relative merging speed of the HLL 4,6,8 Java HLL sketches compared to
58
 * the DataSketches C++ implementations of the same sketches. With this particular test (merging 32 relative large
59
 * sketches together), the Java HLL 8 is the fastest and the Java HLL 4 the slowest, with a mixed cluster in the middle.
60
 * Union / Merging speed is somewhat difficult to measure as the performance is very dependent on the mix of sketch
61
 * sizes (and types) you are merging. So your mileage will vary!
62
 *
63
 * <p>For a complete example of using the Union operator
64
 * see <a href="https://datasketches.apache.org/docs/HLL/HllJavaExample.html">Union Example</a>.</p>
65
 *
66
 * @author Lee Rhodes
67
 * @author Kevin Lang
68
 */
69
public class Union extends BaseHllSketch {
1✔
70
  final int lgMaxK;
71
  private final HllSketch gadget;
72

73
  /**
74
   * Construct this Union operator with the default maximum log-base-2 of <i>K</i>.
75
   */
76
  public Union() {
1✔
77
    lgMaxK = HllSketch.DEFAULT_LG_K;
1✔
78
    gadget = new HllSketch(lgMaxK, HLL_8);
1✔
79
  }
1✔
80

81
  /**
82
   * Construct this Union operator with a given maximum log-base-2 of <i>K</i>.
83
   * @param lgMaxK the desired maximum log-base-2 of <i>K</i>.  This value must be
84
   * between 4 and 21 inclusively.
85
   */
86
  public Union(final int lgMaxK) {
1✔
87
    this.lgMaxK = HllUtil.checkLgK(lgMaxK);
1✔
88
    gadget = new HllSketch(lgMaxK, HLL_8);
1✔
89
  }
1✔
90

91
  /**
92
   * Construct this Union operator with a given maximum log-base-2 of <i>K</i> and the given
93
   * WritableMemory as the destination for this Union. This WritableMemory is usually configured
94
   * for off-heap memory. What remains on the java heap is a thin wrapper object that reads and
95
   * writes to the given WritableMemory.
96
   *
97
   * <p>The given <i>dstMem</i> is checked for the required capacity as determined by
98
   * {@link HllSketch#getMaxUpdatableSerializationBytes(int, TgtHllType)}.
99
   * @param lgMaxK the desired maximum log-base-2 of <i>K</i>.  This value must be
100
   * between 4 and 21 inclusively.
101
   * @param dstWmem the destination writable memory for the sketch.
102
   */
103
  public Union(final int lgMaxK, final WritableMemory dstWmem) {
1✔
104
    this.lgMaxK = HllUtil.checkLgK(lgMaxK);
1✔
105
    gadget = new HllSketch(lgMaxK, HLL_8, dstWmem);
1✔
106
  }
1✔
107

108
  //used only by writableWrap
109
  private Union(final HllSketch sketch) {
1✔
110
    lgMaxK = sketch.getLgConfigK();
1✔
111
    gadget = sketch;
1✔
112
  }
1✔
113

114
  /**
115
   * Construct a union operator populated with the given byte array image of an HllSketch.
116
   * @param byteArray the given byte array
117
   * @return a union operator populated with the given byte array image of an HllSketch.
118
   */
119
  public static final Union heapify(final byte[] byteArray) {
120
    return heapify(Memory.wrap(byteArray));
1✔
121
  }
122

123
  /**
124
   * Construct a union operator populated with the given Memory image of an HllSketch.
125
   * @param mem the given Memory
126
   * @return a union operator populated with the given Memory image of an HllSketch.
127
   */
128
  public static final Union heapify(final Memory mem) {
129
    final int lgK = HllUtil.checkLgK(mem.getByte(PreambleUtil.LG_K_BYTE));
1✔
130
    final HllSketch sk = HllSketch.heapify(mem, false); //allows non-finalized image
1✔
131
    final Union union = new Union(lgK);
1✔
132
    union.update(sk);
1✔
133
    return union;
1✔
134
  }
135

136
  /**
137
   * Wraps the given WritableMemory, which must be a image of a valid updatable HLL_8 sketch,
138
   * and may have data. What remains on the java heap is a
139
   * thin wrapper object that reads and writes to the given WritableMemory, which, depending on
140
   * how the user configures the WritableMemory, may actually reside on the Java heap or off-heap.
141
   *
142
   * <p>The given <i>dstMem</i> is checked for the required capacity as determined by
143
   * {@link HllSketch#getMaxUpdatableSerializationBytes(int, TgtHllType)}, and for the correct type.
144
   * @param srcWmem an writable image of a valid sketch with data.
145
   * @return a Union operator where the sketch data is in the given dstMem.
146
   */
147
  public static final Union writableWrap(final WritableMemory srcWmem) {
148
    final TgtHllType tgtHllType = extractTgtHllType(srcWmem);
1✔
149
    if (tgtHllType != TgtHllType.HLL_8) {
1✔
150
      throw new SketchesArgumentException(
1✔
151
          "Union can only wrap writable HLL_8 sketches that were the Gadget of a Union.");
152
    }
153
    //allows writableWrap of non-finalized image
154
    return new Union(HllSketch.writableWrap(srcWmem, false));
1✔
155
  }
156

157
  @Override
158
  public double getCompositeEstimate() {
159
    checkRebuildCurMinNumKxQ(gadget);
1✔
160
    return gadget.hllSketchImpl.getCompositeEstimate();
1✔
161
  }
162

163
  @Override
164
  CurMode getCurMode() {
165
    return gadget.getCurMode();
1✔
166
  }
167

168
  @Override
169
  public int getCompactSerializationBytes() {
170
    return gadget.getCompactSerializationBytes();
1✔
171
  }
172

173
  @Override
174
  public double getEstimate() {
175
    checkRebuildCurMinNumKxQ(gadget);
1✔
176
    return gadget.getEstimate();
1✔
177
  }
178

179
  /**
180
   * Gets the effective <i>lgConfigK</i> for the union operator, which may be less than
181
   * <i>lgMaxK</i>.
182
   * @return the <i>lgConfigK</i>.
183
   */
184
  @Override
185
  public int getLgConfigK() {
186
    return gadget.getLgConfigK();
1✔
187
  }
188

189
  @Override
190
  public double getLowerBound(final int numStdDev) {
191
    checkRebuildCurMinNumKxQ(gadget);
1✔
192
    return gadget.getLowerBound(numStdDev);
1✔
193
  }
194

195
  /**
196
   * Returns the maximum size in bytes that this union operator can grow to given a lgK.
197
   *
198
   * @param lgK The maximum Log2 of K for this union operator. This value must be
199
   * between 4 and 21 inclusively.
200
   * @return the maximum size in bytes that this union operator can grow to.
201
   */
202
  public static int getMaxSerializationBytes(final int lgK) {
203
    return HllSketch.getMaxUpdatableSerializationBytes(lgK, TgtHllType.HLL_8);
1✔
204
  }
205

206
  /**
207
   * Return the result of this union operator as an HLL_4 sketch.
208
   * @return the result of this union operator as an HLL_4 sketch.
209
   */
210
  public HllSketch getResult() {
211
    return getResult(HllSketch.DEFAULT_HLL_TYPE);
1✔
212
  }
213

214
  /**
215
   * Return the result of this union operator with the specified {@link TgtHllType}
216
   * @param tgtHllType the TgtHllType enum
217
   * @return the result of this union operator with the specified TgtHllType
218
   */
219
  public HllSketch getResult(final TgtHllType tgtHllType) {
220
    checkRebuildCurMinNumKxQ(gadget);
1✔
221
    return gadget.copyAs(tgtHllType);
1✔
222
  }
223

224
  @Override
225
  public TgtHllType getTgtHllType() {
226
    return TgtHllType.HLL_8;
1✔
227
  }
228

229
  @Override
230
  public int getUpdatableSerializationBytes() {
231
    return gadget.getUpdatableSerializationBytes();
1✔
232
  }
233

234
  @Override
235
  public double getUpperBound(final int numStdDev) {
236
    checkRebuildCurMinNumKxQ(gadget);
1✔
237
    return gadget.getUpperBound(numStdDev);
1✔
238
  }
239

240
  @Override
241
  public boolean isCompact() {
242
    return gadget.isCompact();
1✔
243
  }
244

245
  @Override
246
  public boolean isEmpty() {
247
    return gadget.isEmpty();
1✔
248
  }
249

250
  @Override
251
  public boolean isMemory() {
252
    return gadget.isMemory();
1✔
253
  }
254

255
  @Override
256
  public boolean isOffHeap() {
257
    return gadget.isOffHeap();
1✔
258
  }
259

260
  @Override
261
  boolean isOutOfOrder() {
262
    return gadget.isOutOfOrder();
1✔
263
  }
264

265
  @Override
266
  public boolean isSameResource(final Memory mem) {
267
    return gadget.isSameResource(mem);
1✔
268
  }
269

270
  boolean isRebuildCurMinNumKxQFlag() {
271
    return gadget.hllSketchImpl.isRebuildCurMinNumKxQFlag();
1✔
272
  }
273

274
  void putRebuildCurMinNumKxQFlag(final boolean rebuild) {
275
    gadget.hllSketchImpl.putRebuildCurMinNumKxQFlag(rebuild);
1✔
276
  }
1✔
277

278
  /**
279
   * Resets to empty and retains the current lgK, but does not change the configured value of
280
   * lgMaxK.
281
   */
282
  @Override
283
  public void reset() {
284
    gadget.reset();
1✔
285
  }
1✔
286

287
  /**
288
   * Gets the serialization of this union operator as a byte array in compact form, which is
289
   * designed to be heapified only. It is not directly updatable.
290
   * For the Union operator, this is the serialization of the internal state of
291
   * the union operator as a sketch.
292
   * @return the serialization of this union operator as a byte array.
293
   */
294
  @Override
295
  public byte[] toCompactByteArray() {
296
    checkRebuildCurMinNumKxQ(gadget);
1✔
297
    return gadget.toCompactByteArray();
1✔
298
  }
299

300
  @Override
301
  public byte[] toUpdatableByteArray() {
302
    checkRebuildCurMinNumKxQ(gadget);
1✔
303
    return gadget.toUpdatableByteArray();
1✔
304
  }
305

306
  @Override
307
  public String toString(final boolean summary, final boolean hllDetail,
308
      final boolean auxDetail, final boolean all) {
309
    final HllSketch clone = gadget.copy();
1✔
310
    checkRebuildCurMinNumKxQ(clone);
1✔
311
    return clone.toString(summary, hllDetail, auxDetail, all);
1✔
312
  }
313

314
  /**
315
   * Update this union operator with the given sketch.
316
   * @param sketch the given sketch.
317
   */
318
  public void update(final HllSketch sketch) {
319
    gadget.hllSketchImpl = unionImpl(sketch, gadget, lgMaxK);
1✔
320
  }
1✔
321

322
  @Override
323
  void couponUpdate(final int coupon) {
324
    if (coupon == EMPTY) { return; }
1✔
325
    gadget.hllSketchImpl = gadget.hllSketchImpl.couponUpdate(coupon);
1✔
326
  }
1✔
327

328
  // Union operator logic
329

330
  /**
331
   * Union the given source and destination sketches. This static method examines the state of
332
   * the current internal gadget and the incoming sketch and determines the optimum way to
333
   * perform the union. This may involve swapping the merge order, downsampling, transforming,
334
   * and / or copying one of the arguments and may completely replace the internals of the union.
335
   *
336
   * <p>If the union gadget is empty, the source sketch is effectively copied to the union gadget
337
   * after any required transformations.
338
   *
339
   * <p>The direction of the merge is reversed if the union gadget is in LIST or SET mode, and the
340
   * source sketch is in HLL mode. This is done to maintain maximum accuracy of the union process.
341
   *
342
   * <p>The source sketch is downsampled if the source LgK is larger than maxLgK and in HLL mode.
343
   *
344
   * <p>The union gadget is downsampled if both source and union gadget are in HLL mode
345
   * and the source LgK <b>less than</b> the union gadget LgK.
346
   *
347
   * @param source the given incoming sketch, which cannot be modified.
348
   * @param gadget the given gadget sketch, which has a target of HLL_8 and holds the result.
349
   * @param lgMaxK the maximum value of log2 K for this union.
350
   * @return the union of the two sketches in the form of the internal HllSketchImpl, which is
351
   * always in HLL_8 form.
352
   */
353
  private static HllSketchImpl unionImpl(final HllSketch source, final HllSketch gadget,
354
      final int lgMaxK) {
355
    assert gadget.getTgtHllType() == HLL_8;
1✔
356
    if ((source == null) || source.isEmpty()) {
1✔
357
      return gadget.hllSketchImpl;
1✔
358
    }
359

360
    final CurMode srcMode = source.getCurMode();
1✔
361
    if (srcMode == CurMode.LIST ) {
1✔
362
      source.mergeTo(gadget);
1✔
363
      return gadget.hllSketchImpl;
1✔
364
    }
365

366
    final int srcLgK = source.getLgConfigK();
1✔
367
    final int gadgetLgK = gadget.getLgConfigK();
1✔
368
    final boolean srcIsMem = source.isMemory();
1✔
369
    final boolean gdtIsMem = gadget.isMemory();
1✔
370
    final boolean gdtEmpty = gadget.isEmpty();
1✔
371

372
    if (srcMode == CurMode.SET ) {
1✔
373
      if (gdtEmpty && (srcLgK == gadgetLgK) && (!srcIsMem) && (!gdtIsMem)) {
1✔
374
        gadget.hllSketchImpl = source.copyAs(HLL_8).hllSketchImpl;
1✔
375
        return gadget.hllSketchImpl;
1✔
376
      }
377
      source.mergeTo(gadget);
1✔
378
      return gadget.hllSketchImpl;
1✔
379
    }
380

381
    //Hereafter, the source is in HLL mode.
382
    final int bit0 = gdtIsMem ? 1 : 0;
1✔
383
    final int bits1_2 = (gdtEmpty ? 3 : gadget.getCurMode().ordinal()) << 1;
1✔
384
    final int bit3 = (srcLgK < gadgetLgK) ? 8 : 0;
1✔
385
    final int bit4 = (srcLgK > lgMaxK) ? 16 : 0;
1✔
386
    final int sw = bit4 | bit3 | bits1_2 | bit0;
1✔
387
    HllSketchImpl hllSketchImpl = null; //never returned as null
1✔
388

389
    switch (sw) {
1✔
390
      case 0: //src <= max, src >= gdt, gdtLIST, gdtHeap
391
      case 8: //src <= max, src <  gdt, gdtLIST, gdtHeap
392
      case 2: //src <= max, src >= gdt, gdtSET,  gdtHeap
393
      case 10://src <= max, src <  gdt, gdtSET,  gdtHeap
394
      { //Action: copy src, reverse merge w/autofold, ooof=src
395
        final HllSketch srcHll8Heap = source.copyAs(HLL_8);
1✔
396
        gadget.mergeTo(srcHll8Heap); //merge gdt(Hll8,heap,list/set) -> src(Hll8,heap,hll)
1✔
397
        hllSketchImpl = srcHll8Heap.hllSketchImpl;
1✔
398
        break;
1✔
399
      }
400
      case 16://src >  max, src >= gdt, gdtList, gdtHeap
401
      case 18://src >  max, src >= gdt, gdtSet,  gdtHeap
402
      { //Action: downsample src to MaxLgK, reverse merge w/autofold, ooof=src
403
        final HllSketch srcHll8Heap = downsample(source, lgMaxK);
1✔
404
        gadget.mergeTo(srcHll8Heap); //merge gdt(Hll8,heap,list/set) -> src(Hll8,heap,hll)
1✔
405
        hllSketchImpl = srcHll8Heap.hllSketchImpl;
1✔
406
        break;
1✔
407
      }
408

409
      case 1: //src <= max, src >= gdt, gdtLIST, gdtMemory
410
      case 9: //src <= max, src <  gdt, gdtLIST, gdtMemory
411
      case 3: //src <= max, src >= gdt, gdtSET,  gdtMemory
412
      case 11://src <= max, src <  gdt, gdtSET,  gdtMemory
413
      { //Action: copy src, reverse merge w/autofold, use gdt memory, ooof=src
414
        final HllSketch srcHll8Heap = source.copyAs(HLL_8);
1✔
415
        gadget.mergeTo(srcHll8Heap);  //merge gdt(Hll8,mem,list/set) -> src(Hll8,heap,hll)
1✔
416
        hllSketchImpl = useGadgetMemory(gadget, srcHll8Heap, false).hllSketchImpl;
1✔
417
        break;
1✔
418
      }
419
      case 17://src >  max, src >= gdt, gdtList, gdtMemory
420
      case 19://src >  max, src >= gdt, gdtSet,  gdtMemory
421
      { //Action: downsample src to MaxLgK, reverse merge w/autofold, use gdt memory, ooof=src
422
        final HllSketch srcHll8Heap = downsample(source, lgMaxK);
1✔
423
        gadget.mergeTo(srcHll8Heap); //merge gdt(Hll8,mem,list/set) -> src(Hll8,heap,hll), autofold
1✔
424
        hllSketchImpl = useGadgetMemory(gadget, srcHll8Heap, false).hllSketchImpl;
1✔
425
        break;
1✔
426
      }
427

428
      case 4: //src <= max, src >= gdt, gdtHLL, gdtHeap
429
      case 20://src >  max, src >= gdt, gdtHLL, gdtHeap
430
      case 5: //src <= max, src >= gdt, gdtHLL, gdtMemory
431
      case 21://src >  max, src >= gdt, gdtHLL, gdtMemory
432
      { //Action: forward HLL merge w/autofold, ooof=True
433
        //merge src(Hll4,6,8,heap/mem,Mode=HLL) -> gdt(Hll8,heap,Mode=HLL)
434
        mergeHlltoHLLmode(source, gadget, srcLgK, gadgetLgK, srcIsMem, gdtIsMem);
1✔
435
        hllSketchImpl = gadget.putOutOfOrderFlag(true).hllSketchImpl;
1✔
436
        break;
1✔
437
      }
438
      case 12://src <= max, src <  gdt, gdtHLL, gdtHeap
439
      { //Action: downsample gdt to srcLgK, forward HLL merge w/autofold, ooof=True
440
        final HllSketch gdtHll8Heap = downsample(gadget, srcLgK);
1✔
441
        //merge src(Hll4,6,8;heap/mem,Mode=HLL) -> gdt(Hll8,heap,hll)
442
        mergeHlltoHLLmode(source, gdtHll8Heap, srcLgK, gadgetLgK, srcIsMem, false);
1✔
443
        hllSketchImpl = gdtHll8Heap.putOutOfOrderFlag(true).hllSketchImpl;
1✔
444
        break;
1✔
445
      }
446
      case 13://src <= max, src < gdt, gdtHLL, gdtMemory
447
      { //Action: downsample gdt to srcLgK, forward HLL merge w/autofold, use gdt memory, ooof=True
448
        final HllSketch gdtHll8Heap = downsample(gadget, srcLgK);
1✔
449
        //merge src(Hll4,6,8;heap/mem;Mode=HLL) -> gdt(Hll8,heap,Mode=HLL)
450
        mergeHlltoHLLmode(source, gdtHll8Heap, srcLgK, gadgetLgK, srcIsMem, false);
1✔
451
        hllSketchImpl = useGadgetMemory(gadget, gdtHll8Heap, true).hllSketchImpl;
1✔
452
        break;
1✔
453
      }
454

455
      case 6: //src <= max, src >= gdt, gdtEmpty, gdtHeap
456
      case 14://src <= max, src <  gdt, gdtEmpty, gdtHeap
457
      { //Action: copy src, replace gdt, ooof=src
458
        final HllSketch srcHll8Heap = source.copyAs(HLL_8);
1✔
459
        hllSketchImpl = srcHll8Heap.hllSketchImpl;
1✔
460
        break;
1✔
461
      }
462
      case 22://src >  max, src >= gdt, gdtEmpty, gdtHeap
463
      { //Action: downsample src to lgMaxK, replace gdt, ooof=src
464
        final HllSketch srcHll8Heap = downsample(source, lgMaxK);
1✔
465
        hllSketchImpl = srcHll8Heap.hllSketchImpl;
1✔
466
        break;
1✔
467
      }
468

469
      case 7: //src <= max, src >= gdt, gdtEmpty, gdtMemory
470
      case 15://src <= max, src <  gdt, gdtEmpty, gdtMemory
471
      { //Action: copy src, use gdt memory, ooof=src
472
        final HllSketch srcHll8Heap = source.copyAs(HLL_8);
1✔
473
        hllSketchImpl = useGadgetMemory(gadget, srcHll8Heap, false).hllSketchImpl;
1✔
474
        break;
1✔
475
      }
476
      case 23://src >  max, src >= gdt, gdtEmpty, gdtMemory, replace mem, downsample src, ooof=src
477
      { //Action: downsample src to lgMaxK, use gdt memory, ooof=src
478
        final HllSketch srcHll8Heap = downsample(source, lgMaxK);
1✔
479
        hllSketchImpl = useGadgetMemory(gadget, srcHll8Heap, false).hllSketchImpl;
1✔
480
        break;
1✔
481
      }
482
      default: return gadget.hllSketchImpl; //not possible
×
483
    }
484
    return hllSketchImpl;
1✔
485
  }
486

487
  private static final HllSketch useGadgetMemory(
488
      final HllSketch gadget, final HllSketch hll8Heap, final boolean setOooFlag) {
489
    final WritableMemory wmem = gadget.getWritableMemory();    //use the gdt wmem
1✔
490
    final byte[] byteArr = hll8Heap.toUpdatableByteArray();    //serialize srcCopy
1✔
491
    wmem.putByteArray(0, byteArr, 0, byteArr.length);          //replace old data with new
1✔
492
    return (setOooFlag)
1✔
493
        ? HllSketch.writableWrap(wmem, false).putOutOfOrderFlag(true) //wrap, set oooflag, return
1✔
494
        : HllSketch.writableWrap(wmem, false);                        //wrap & return
1✔
495
  }
496

497
  private static final void mergeHlltoHLLmode(final HllSketch src, final HllSketch tgt,
498
      final int srcLgK, final int tgtLgK, final boolean srcIsMem, final boolean tgtIsMem) {
499
      final int sw = (tgtIsMem ? 1 : 0) | (srcIsMem ? 2 : 0)
1✔
500
          | ((srcLgK > tgtLgK) ? 4 : 0) | ((src.getTgtHllType() != HLL_8) ? 8 : 0);
1✔
501
      final int srcK = 1 << srcLgK;
1✔
502

503
      switch (sw) {
1✔
504
        case 0: { //HLL_8, srcLgK=tgtLgK, src=heap, tgt=heap
505
          final byte[] srcArr = ((Hll8Array) src.hllSketchImpl).hllByteArr;
1✔
506
          final byte[] tgtArr = ((Hll8Array) tgt.hllSketchImpl).hllByteArr;
1✔
507
          for (int i = 0; i < srcK; i++) {
1✔
508
            final byte srcV = srcArr[i];
1✔
509
            final byte tgtV = tgtArr[i];
1✔
510
            tgtArr[i] = (byte) Math.max(srcV, tgtV);
1✔
511
          }
512
          break;
1✔
513
        }
514
        case 1: { //HLL_8, srcLgK=tgtLgK, src=heap, tgt=mem
515
          final byte[] srcArr = ((Hll8Array) src.hllSketchImpl).hllByteArr;
1✔
516
          final WritableMemory tgtMem = tgt.getWritableMemory();
1✔
517
          for (int i = 0; i < srcK; i++) {
1✔
518
            final byte srcV = srcArr[i];
1✔
519
            final byte tgtV = tgtMem.getByte(HLL_BYTE_ARR_START + i);
1✔
520
            tgtMem.putByte(HLL_BYTE_ARR_START + i, (byte) Math.max(srcV, tgtV));
1✔
521
          }
522
          break;
1✔
523
        }
524
        case 2: { //HLL_8, srcLgK=tgtLgK, src=mem,  tgt=heap
525
          final Memory srcMem = src.getMemory();
1✔
526
          final byte[] tgtArr = ((Hll8Array) tgt.hllSketchImpl).hllByteArr;
1✔
527
          for (int i = 0; i < srcK; i++) {
1✔
528
            final byte srcV = srcMem.getByte(HLL_BYTE_ARR_START + i);
1✔
529
            final byte tgtV = tgtArr[i];
1✔
530
            tgtArr[i] = (byte) Math.max(srcV, tgtV);
1✔
531
          }
532
          break;
1✔
533
        }
534
        case 3: { //HLL_8, srcLgK=tgtLgK, src=mem,  tgt=mem
535
          final Memory srcMem = src.getMemory();
1✔
536
          final WritableMemory tgtMem = tgt.getWritableMemory();
1✔
537
          for (int i = 0; i < srcK; i++) {
1✔
538
            final byte srcV = srcMem.getByte(HLL_BYTE_ARR_START + i);
1✔
539
            final byte tgtV = tgtMem.getByte(HLL_BYTE_ARR_START + i);
1✔
540
            tgtMem.putByte(HLL_BYTE_ARR_START + i, (byte) Math.max(srcV, tgtV));
1✔
541
          }
542
          break;
1✔
543
        }
544
        case 4: { //HLL_8, srcLgK>tgtLgK, src=heap, tgt=heap
545
          final int tgtKmask = (1 << tgtLgK) - 1;
1✔
546
          final byte[] srcArr = ((Hll8Array) src.hllSketchImpl).hllByteArr;
1✔
547
          final byte[] tgtArr = ((Hll8Array) tgt.hllSketchImpl).hllByteArr;
1✔
548
          for (int i = 0; i < srcK; i++) {
1✔
549
            final byte srcV = srcArr[i];
1✔
550
            final int j = i & tgtKmask;
1✔
551
            final byte tgtV = tgtArr[j];
1✔
552
            tgtArr[j] = (byte) Math.max(srcV, tgtV);
1✔
553
          }
554
          break;
1✔
555
        }
556
        case 5: { //HLL_8, srcLgK>tgtLgK, src=heap, tgt=mem
557
          final int tgtKmask = (1 << tgtLgK) - 1;
1✔
558
          final byte[] srcArr = ((Hll8Array) src.hllSketchImpl).hllByteArr;
1✔
559
          final WritableMemory tgtMem = tgt.getWritableMemory();
1✔
560
          for (int i = 0; i < srcK; i++) {
1✔
561
            final byte srcV = srcArr[i];
1✔
562
            final int j = i & tgtKmask;
1✔
563
            final byte tgtV = tgtMem.getByte(HLL_BYTE_ARR_START + j);
1✔
564
            tgtMem.putByte(HLL_BYTE_ARR_START + j, (byte) Math.max(srcV, tgtV));
1✔
565
          }
566
          break;
1✔
567
        }
568
        case 6: { //HLL_8, srcLgK>tgtLgK, src=mem,  tgt=heap
569
          final int tgtKmask = (1 << tgtLgK) - 1;
1✔
570
          final Memory srcMem = src.getMemory();
1✔
571
          final byte[] tgtArr = ((Hll8Array) tgt.hllSketchImpl).hllByteArr;
1✔
572
          for (int i = 0; i < srcK; i++) {
1✔
573
            final byte srcV = srcMem.getByte(HLL_BYTE_ARR_START + i);
1✔
574
            final int j = i & tgtKmask;
1✔
575
            final byte tgtV = tgtArr[j];
1✔
576
            tgtArr[j] = (byte) Math.max(srcV, tgtV);
1✔
577
          }
578
          break;
1✔
579
        }
580
        case 7: { //HLL_8, srcLgK>tgtLgK, src=mem,  tgt=mem
581
          final int tgtKmask = (1 << tgtLgK) - 1;
1✔
582
          final Memory srcMem = src.getMemory();
1✔
583
          final WritableMemory tgtMem = tgt.getWritableMemory();
1✔
584
          for (int i = 0; i < srcK; i++) {
1✔
585
            final byte srcV = srcMem.getByte(HLL_BYTE_ARR_START + i);
1✔
586
            final int j = i & tgtKmask;
1✔
587
            final byte tgtV = tgtMem.getByte(HLL_BYTE_ARR_START + j);
1✔
588
            tgtMem.putByte(HLL_BYTE_ARR_START + j, (byte) Math.max(srcV, tgtV));
1✔
589
          }
590
          break;
1✔
591
        }
592
        case 8: case 9:
593
        {
594
          //!HLL_8, srcLgK=tgtLgK, src=heap, tgt=heap/mem
595
          final AbstractHllArray tgtAbsHllArr = (AbstractHllArray)(tgt.hllSketchImpl);
1✔
596
          if (src.getTgtHllType() == HLL_4) {
1✔
597
            final Hll4Array src4 = (Hll4Array) src.hllSketchImpl;
1✔
598
            final AuxHashMap auxHashMap = src4.getAuxHashMap();
1✔
599
            final int curMin = src4.getCurMin();
1✔
600
            int i = 0;
1✔
601
            int j = 0;
1✔
602
            while (j < srcK) {
1✔
603
              final byte b = src4.hllByteArr[i++];
1✔
604
              int value = Byte.toUnsignedInt(b) & loNibbleMask;
1✔
605
              tgtAbsHllArr.updateSlotNoKxQ(j, value == AUX_TOKEN ? auxHashMap.mustFindValueFor(j) : value + curMin);
1✔
606
              j++;
1✔
607
              value = Byte.toUnsignedInt(b) >>> 4;
1✔
608
              tgtAbsHllArr.updateSlotNoKxQ(j, value == AUX_TOKEN ? auxHashMap.mustFindValueFor(j) : value + curMin);
1✔
609
              j++;
1✔
610
            }
1✔
611
          } else {
1✔
612
            final Hll6Array src6 = (Hll6Array) src.hllSketchImpl;
1✔
613
            int i = 0;
1✔
614
            int j = 0;
1✔
615
            while (j < srcK) {
1✔
616
              final byte b1 = src6.hllByteArr[i++];
1✔
617
              final byte b2 = src6.hllByteArr[i++];
1✔
618
              final byte b3 = src6.hllByteArr[i++];
1✔
619
              int value = Byte.toUnsignedInt(b1) & 0x3f;
1✔
620
              tgtAbsHllArr.updateSlotNoKxQ(j++, value);
1✔
621
              value = Byte.toUnsignedInt(b1) >>> 6;
1✔
622
              value |= (Byte.toUnsignedInt(b2) & 0x0f) << 2;
1✔
623
              tgtAbsHllArr.updateSlotNoKxQ(j++, value);
1✔
624
              value = Byte.toUnsignedInt(b2) >>> 4;
1✔
625
              value |= (Byte.toUnsignedInt(b3) & 3) << 4;
1✔
626
              tgtAbsHllArr.updateSlotNoKxQ(j++, value);
1✔
627
              value = Byte.toUnsignedInt(b3) >>> 2;
1✔
628
              tgtAbsHllArr.updateSlotNoKxQ(j++, value);
1✔
629
            }
1✔
630
          }
631
          break;
1✔
632
        }
633
        case 10: case 11:
634
        {
635
          //!HLL_8, srcLgK=tgtLgK, src=mem, tgt=heap/mem
636
          final AbstractHllArray tgtAbsHllArr = (AbstractHllArray)(tgt.hllSketchImpl);
1✔
637
          if (src.getTgtHllType() == HLL_4) {
1✔
638
            final DirectHll4Array src4 = (DirectHll4Array) src.hllSketchImpl;
1✔
639
            final AuxHashMap auxHashMap = src4.getAuxHashMap();
1✔
640
            final int curMin = src4.getCurMin();
1✔
641
            int i = 0;
1✔
642
            int j = 0;
1✔
643
            while (j < srcK) {
1✔
644
              final byte b = src4.mem.getByte(HLL_BYTE_ARR_START + i++);
1✔
645
              int value = Byte.toUnsignedInt(b) & loNibbleMask;
1✔
646
              tgtAbsHllArr.updateSlotNoKxQ(j, value == AUX_TOKEN ? auxHashMap.mustFindValueFor(j) : value + curMin);
1✔
647
              j++;
1✔
648
              value = Byte.toUnsignedInt(b) >>> 4;
1✔
649
              tgtAbsHllArr.updateSlotNoKxQ(j, value == AUX_TOKEN ? auxHashMap.mustFindValueFor(j) : value + curMin);
1✔
650
              j++;
1✔
651
            }
1✔
652
          } else {
1✔
653
            final DirectHll6Array src6 = (DirectHll6Array) src.hllSketchImpl;
1✔
654
            int i = 0;
1✔
655
            int offset = HLL_BYTE_ARR_START;
1✔
656
            while (i < srcK) {
1✔
657
              final byte b1 = src6.mem.getByte(offset++);
1✔
658
              final byte b2 = src6.mem.getByte(offset++);
1✔
659
              final byte b3 = src6.mem.getByte(offset++);
1✔
660
              int value = Byte.toUnsignedInt(b1) & 0x3f;
1✔
661
              tgtAbsHllArr.updateSlotNoKxQ(i++, value);
1✔
662
              value = Byte.toUnsignedInt(b1) >>> 6;
1✔
663
              value |= (Byte.toUnsignedInt(b2) & 0x0f) << 2;
1✔
664
              tgtAbsHllArr.updateSlotNoKxQ(i++, value);
1✔
665
              value = Byte.toUnsignedInt(b2) >>> 4;
1✔
666
              value |= (Byte.toUnsignedInt(b3) & 3) << 4;
1✔
667
              tgtAbsHllArr.updateSlotNoKxQ(i++, value);
1✔
668
              value = Byte.toUnsignedInt(b3) >>> 2;
1✔
669
              tgtAbsHllArr.updateSlotNoKxQ(i++, value);
1✔
670
            }
1✔
671
          }
672
          break;
1✔
673
        }
674
        case 12: case 13:
675
        {
676
          //!HLL_8, srcLgK>tgtLgK, src=heap, tgt=heap/mem
677
          final int tgtKmask = (1 << tgtLgK) - 1;
1✔
678
          final AbstractHllArray tgtAbsHllArr = (AbstractHllArray)(tgt.hllSketchImpl);
1✔
679
          if (src.getTgtHllType() == HLL_4) {
1✔
680
            final Hll4Array src4 = (Hll4Array) src.hllSketchImpl;
1✔
681
            final AuxHashMap auxHashMap = src4.getAuxHashMap();
1✔
682
            final int curMin = src4.getCurMin();
1✔
683
            int i = 0;
1✔
684
            int j = 0;
1✔
685
            while (j < srcK) {
1✔
686
              final byte b = src4.hllByteArr[i++];
1✔
687
              int value = Byte.toUnsignedInt(b) & loNibbleMask;
1✔
688
              tgtAbsHllArr.updateSlotNoKxQ(j & tgtKmask, value == AUX_TOKEN
1✔
689
                  ? auxHashMap.mustFindValueFor(j) : value + curMin);
1✔
690
              j++;
1✔
691
              value = Byte.toUnsignedInt(b) >>> 4;
1✔
692
              tgtAbsHllArr.updateSlotNoKxQ(j & tgtKmask, value == AUX_TOKEN
1✔
693
                  ? auxHashMap.mustFindValueFor(j) : value + curMin);
1✔
694
              j++;
1✔
695
            }
1✔
696
          } else {
1✔
697
            final Hll6Array src6 = (Hll6Array) src.hllSketchImpl;
1✔
698
            int i = 0;
1✔
699
            int j = 0;
1✔
700
            while (j < srcK) {
1✔
701
              final byte b1 = src6.hllByteArr[i++];
1✔
702
              final byte b2 = src6.hllByteArr[i++];
1✔
703
              final byte b3 = src6.hllByteArr[i++];
1✔
704
              int value = Byte.toUnsignedInt(b1) & 0x3f;
1✔
705
              tgtAbsHllArr.updateSlotNoKxQ(j++ & tgtKmask, value);
1✔
706
              value = Byte.toUnsignedInt(b1) >>> 6;
1✔
707
              value |= (Byte.toUnsignedInt(b2) & 0x0f) << 2;
1✔
708
              tgtAbsHllArr.updateSlotNoKxQ(j++ & tgtKmask, value);
1✔
709
              value = Byte.toUnsignedInt(b2) >>> 4;
1✔
710
              value |= (Byte.toUnsignedInt(b3) & 3) << 4;
1✔
711
              tgtAbsHllArr.updateSlotNoKxQ(j++ & tgtKmask, value);
1✔
712
              value = Byte.toUnsignedInt(b3) >>> 2;
1✔
713
              tgtAbsHllArr.updateSlotNoKxQ(j++ & tgtKmask, value);
1✔
714
            }
1✔
715
          }
716
          break;
1✔
717
        }
718
        case 14: case 15:
719
        {
720
          //!HLL_8, srcLgK>tgtLgK, src=mem, tgt=heap/mem
721
          final int tgtKmask = (1 << tgtLgK) - 1;
1✔
722
          final AbstractHllArray tgtAbsHllArr = (AbstractHllArray)(tgt.hllSketchImpl);
1✔
723
          if (src.getTgtHllType() == HLL_4) {
1✔
724
            final DirectHll4Array src4 = (DirectHll4Array) src.hllSketchImpl;
1✔
725
            final AuxHashMap auxHashMap = src4.getAuxHashMap();
1✔
726
            final int curMin = src4.getCurMin();
1✔
727
            int i = 0;
1✔
728
            int j = 0;
1✔
729
            while (j < srcK) {
1✔
730
              final byte b = src4.mem.getByte(HLL_BYTE_ARR_START + i++);
1✔
731
              int value = Byte.toUnsignedInt(b) & loNibbleMask;
1✔
732
              tgtAbsHllArr.updateSlotNoKxQ(j & tgtKmask, value == AUX_TOKEN
1✔
733
                  ? auxHashMap.mustFindValueFor(j) : value + curMin);
1✔
734
              j++;
1✔
735
              value = Byte.toUnsignedInt(b) >>> 4;
1✔
736
              tgtAbsHllArr.updateSlotNoKxQ(j & tgtKmask, value == AUX_TOKEN
1✔
737
                  ? auxHashMap.mustFindValueFor(j) : value + curMin);
1✔
738
              j++;
1✔
739
            }
1✔
740
          } else {
1✔
741
            final DirectHll6Array src6 = (DirectHll6Array) src.hllSketchImpl;
1✔
742
            int i = 0;
1✔
743
            int offset = HLL_BYTE_ARR_START;
1✔
744
            while (i < srcK) {
1✔
745
              final byte b1 = src6.mem.getByte(offset++);
1✔
746
              final byte b2 = src6.mem.getByte(offset++);
1✔
747
              final byte b3 = src6.mem.getByte(offset++);
1✔
748
              int value = Byte.toUnsignedInt(b1) & 0x3f;
1✔
749
              tgtAbsHllArr.updateSlotNoKxQ(i++ & tgtKmask, value);
1✔
750
              value = Byte.toUnsignedInt(b1) >>> 6;
1✔
751
              value |= (Byte.toUnsignedInt(b2) & 0x0f) << 2;
1✔
752
              tgtAbsHllArr.updateSlotNoKxQ(i++ & tgtKmask, value);
1✔
753
              value = Byte.toUnsignedInt(b2) >>> 4;
1✔
754
              value |= (Byte.toUnsignedInt(b3) & 3) << 4;
1✔
755
              tgtAbsHllArr.updateSlotNoKxQ(i++ & tgtKmask, value);
1✔
756
              value = Byte.toUnsignedInt(b3) >>> 2;
1✔
757
              tgtAbsHllArr.updateSlotNoKxQ(i++ & tgtKmask, value);
1✔
758
            }
1✔
759
          }
760
          break;
1✔
761
        }
762
        default: break; //not possible
763
      }
764
      tgt.hllSketchImpl.putRebuildCurMinNumKxQFlag(true);
1✔
765
  }
1✔
766

767
  //Used by union operator. Always copies or downsamples to Heap HLL_8.
768
  //Caller must ultimately manage oooFlag, as caller has more context.
769
  /**
770
   * Copies or downsamples the given candidate HLLmode sketch to tgtLgK, HLL_8, on the heap.
771
   *
772
   * @param candidate the HllSketch to downsample, must be in HLL mode.
773
   * @param tgtLgK the LgK to downsample to.
774
   * @return the downsampled HllSketch.
775
   */
776
  private static final HllSketch downsample(final HllSketch candidate, final int tgtLgK) {
777
    final AbstractHllArray candArr = (AbstractHllArray) candidate.hllSketchImpl;
1✔
778
    final HllArray tgtHllArr = HllArray.newHeapHll(tgtLgK, TgtHllType.HLL_8);
1✔
779
    final PairIterator candItr = candArr.iterator();
1✔
780
    while (candItr.nextValid()) {
1✔
781
      tgtHllArr.couponUpdate(candItr.getPair()); //rebuilds KxQ, etc.
1✔
782
    }
783
    //both of these are required for isomorphism
784
    tgtHllArr.putHipAccum(candArr.getHipAccum());
1✔
785
    tgtHllArr.putOutOfOrder(candidate.isOutOfOrder());
1✔
786
    tgtHllArr.putRebuildCurMinNumKxQFlag(false);
1✔
787
    return new HllSketch(tgtHllArr);
1✔
788
  }
789

790
  //Used to rebuild curMin, numAtCurMin and KxQ registers, due to high performance merge operation
791
  static final void checkRebuildCurMinNumKxQ(final HllSketch sketch) {
792
    final HllSketchImpl hllSketchImpl = sketch.hllSketchImpl;
1✔
793
    final CurMode curMode = sketch.getCurMode();
1✔
794
    final TgtHllType tgtHllType = sketch.getTgtHllType();
1✔
795
    final boolean rebuild = hllSketchImpl.isRebuildCurMinNumKxQFlag();
1✔
796
    if ( !rebuild || (curMode != CurMode.HLL) || (tgtHllType != HLL_8) ) { return; }
1✔
797
    final AbstractHllArray absHllArr = (AbstractHllArray)(hllSketchImpl);
1✔
798
    int curMin = 64;
1✔
799
    int numAtCurMin = 0;
1✔
800
    double kxq0 = 1 << absHllArr.getLgConfigK();
1✔
801
    double kxq1 = 0;
1✔
802
    final PairIterator itr = absHllArr.iterator();
1✔
803
    while (itr.nextAll()) {
1✔
804
      final int v = itr.getValue();
1✔
805
      if (v > 0) {
1✔
806
        if (v < 32) { kxq0 += invPow2(v) - 1.0; }
1✔
807
        else        { kxq1 += invPow2(v) - 1.0; }
×
808
      }
809
      if (v > curMin) { continue; }
1✔
810
      if (v < curMin) {
1✔
811
        curMin = v;
1✔
812
        numAtCurMin = 1;
1✔
813
      } else {
814
        numAtCurMin++;
1✔
815
      }
816
    }
1✔
817
    absHllArr.putKxQ0(kxq0);
1✔
818
    absHllArr.putKxQ1(kxq1);
1✔
819
    absHllArr.putCurMin(curMin);
1✔
820
    absHllArr.putNumAtCurMin(numAtCurMin);
1✔
821
    absHllArr.putRebuildCurMinNumKxQFlag(false);
1✔
822
    //HipAccum is not affected
823
  }
1✔
824

825
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc