• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.19
/src/main/java/org/apache/datasketches/tuple/AnotB.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.tuple;
21

22
import static java.lang.Math.min;
23
import static org.apache.datasketches.common.Util.exactLog2OfLong;
24
import static org.apache.datasketches.thetacommon.HashOperations.convertToHashTable;
25
import static org.apache.datasketches.thetacommon.HashOperations.hashSearch;
26

27
import java.lang.reflect.Method;
28
import java.util.Arrays;
29

30
import org.apache.datasketches.common.SketchesArgumentException;
31
import org.apache.datasketches.common.SketchesStateException;
32
import org.apache.datasketches.common.SuppressFBWarnings;
33
import org.apache.datasketches.thetacommon.SetOperationCornerCases;
34
import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction;
35
import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase;
36
import org.apache.datasketches.thetacommon.ThetaUtil;
37

38
/**
39
 * Computes a set difference, A-AND-NOT-B, of two generic tuple sketches.
40
 * This class includes both stateful and stateless operations.
41
 *
42
 * <p>The stateful operation is as follows:</p>
43
 * <pre><code>
44
 * AnotB anotb = new AnotB();
45
 *
46
 * anotb.setA(Sketch skA); //The first argument.
47
 * anotb.notB(Sketch skB); //The second (subtraction) argument.
48
 * anotb.notB(Sketch skC); // ...any number of additional subtractions...
49
 * anotb.getResult(false); //Get an interim result.
50
 * anotb.notB(Sketch skD); //Additional subtractions.
51
 * anotb.getResult(true);  //Final result and resets the AnotB operator.
52
 * </code></pre>
53
 *
54
 * <p>The stateless operation is as follows:</p>
55
 * <pre><code>
56
 * AnotB anotb = new AnotB();
57
 *
58
 * CompactSketch csk = anotb.aNotB(Sketch skA, Sketch skB);
59
 * </code></pre>
60
 *
61
 * <p>Calling the <i>setA</i> operation a second time essentially clears the internal state and loads
62
 * the new sketch.</p>
63
 *
64
 * <p>The stateless and stateful operations are independent of each other.</p>
65
 *
66
 * @param <S> Type of Summary
67
 *
68
 * @author Lee Rhodes
69
 */
70
@SuppressFBWarnings(value = "DP_DO_INSIDE_DO_PRIVILEGED", justification = "Defer fix")
71
public final class AnotB<S extends Summary> {
1✔
72
  private boolean empty_ = true;
1✔
73
  private long thetaLong_ = Long.MAX_VALUE;
1✔
74
  private long[] hashArr_ = null;   //always in compact form, not necessarily sorted
1✔
75
  private S[] summaryArr_ = null; //always in compact form, not necessarily sorted
1✔
76
  private int curCount_ = 0;
1✔
77

78
  private static final Method GET_CACHE;
79

80
  static {
81
    try {
82
      GET_CACHE = org.apache.datasketches.theta.Sketch.class.getDeclaredMethod("getCache");
1✔
83
      GET_CACHE.setAccessible(true);
1✔
84
    } catch (final Exception e) {
×
85
      throw new SketchesStateException("Could not reflect getCache(): " + e);
×
86
    }
1✔
87
  }
1✔
88

89
  /**
90
   * This is part of a multistep, stateful AnotB operation and sets the given Tuple sketch as the
91
   * first argument <i>A</i> of <i>A-AND-NOT-B</i>. This overwrites the internal state of this
92
   * AnotB operator with the contents of the given sketch.
93
   * This sets the stage for multiple following <i>notB</i> steps.
94
   *
95
   * <p>An input argument of null will throw an exception.</p>
96
   *
97
   * <p>Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
98
   * That is distinctly different from the java <i>null</i>, which represents a nonexistent object.
99
   * In most cases it is a programming error due to some object that was not properly initialized.
100
   * With a null as the first argument, we cannot know what the user's intent is.
101
   * Since it is very likely that a <i>null</i> is a programming error, we throw a an exception.</p>
102
   *
103
   * <p>An empty input argument will set the internal state to empty.</p>
104
   *
105
   * <p>Rationale: An empty set is a mathematically legal concept. Although it makes any subsequent,
106
   * valid argument for B irrelevant, we must allow this and assume the user knows what they are
107
   * doing.</p>
108
   *
109
   * <p>Performing {@link #getResult(boolean)} just after this step will return a compact form of
110
   * the given argument.</p>
111
   *
112
   * @param skA The incoming sketch for the first argument, <i>A</i>.
113
   */
114
  public void setA(final Sketch<S> skA) {
115
    if (skA == null) {
1✔
116
      reset();
1✔
117
      throw new SketchesArgumentException("The input argument <i>A</i> may not be null");
1✔
118
    }
119

120
    empty_ = skA.isEmpty();
1✔
121
    thetaLong_ = skA.getThetaLong();
1✔
122
    final DataArrays<S> da = getCopyOfDataArraysTuple(skA);
1✔
123
    summaryArr_ = da.summaryArr;  //it may be null
1✔
124
    hashArr_ = da.hashArr;        //it may be null
1✔
125
    curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
1✔
126
  }
1✔
127

128
  /**
129
   * This is part of a multistep, stateful AnotB operation and sets the given Tuple sketch as the
130
   * second (or <i>n+1</i>th) argument <i>B</i> of <i>A-AND-NOT-B</i>.
131
   * Performs an <i>AND NOT</i> operation with the existing internal state of this AnotB operator.
132
   *
133
   * <p>An input argument of null or empty is ignored.</p>
134
   *
135
   * <p>Rationale: A <i>null</i> for the second or following arguments is more tolerable because
136
   * <i>A NOT null</i> is still <i>A</i> even if we don't know exactly what the null represents. It
137
   * clearly does not have any content that overlaps with <i>A</i>. Also, because this can be part of
138
   * a multistep operation with multiple <i>notB</i> steps. Other following steps can still produce
139
   * a valid result.</p>
140
   *
141
   * <p>Use {@link #getResult(boolean)} to obtain the result.</p>
142
   *
143
   * @param skB The incoming Tuple sketch for the second (or following) argument <i>B</i>.
144
   */
145
  public void notB(final Sketch<S> skB) {
146
    if (skB == null) { return; } //ignore
1✔
147

148
    final long thetaLongB = skB.getThetaLong();
1✔
149
    final int countB = skB.getRetainedEntries();
1✔
150
    final boolean emptyB = skB.isEmpty();
1✔
151

152
    final int id =
1✔
153
        SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB);
1✔
154
    final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
1✔
155
    final AnotbAction anotbAction = cCase.getAnotbAction();
1✔
156

157
    switch (anotbAction) {
1✔
158
      case EMPTY_1_0_T: {
159
        reset();
1✔
160
        break;
1✔
161
      }
162
      case DEGEN_MIN_0_F: {
163
        reset();
1✔
164
        thetaLong_ = min(thetaLong_, thetaLongB);
1✔
165
        empty_ = false;
1✔
166
        break;
1✔
167
      }
168
      case DEGEN_THA_0_F: {
169
        empty_ = false;
1✔
170
        curCount_ = 0;
1✔
171
        //thetaLong_ is ok
172
        break;
1✔
173
      }
174
      case TRIM_A: {
175
        thetaLong_ = min(thetaLong_, thetaLongB);
1✔
176
        final DataArrays<S> da = trimAndCopyDataArrays(hashArr_, summaryArr_, thetaLong_, true);
1✔
177
        hashArr_ = da.hashArr;
1✔
178
        curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
1✔
179
        summaryArr_ = da.summaryArr;
1✔
180
        //empty_ = is whatever SkA is,
181
        break;
1✔
182
      }
183
      case SKETCH_A: {
184
        break; //result is already in A
1✔
185
      }
186
      case FULL_ANOTB: { //both A and B should have valid entries.
187
        thetaLong_ = min(thetaLong_, thetaLongB);
1✔
188
        final DataArrays<S> daR = getCopyOfResultArraysTuple(thetaLong_, curCount_, hashArr_, summaryArr_, skB);
1✔
189
        hashArr_ = daR.hashArr;
1✔
190
        curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
1✔
191
        summaryArr_ = daR.summaryArr;
1✔
192
        //empty_ = is whatever SkA is,
193
      }
194
      //default: not possible
195
    }
196
  }
1✔
197

198
  /**
199
   * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the
200
   * second (or <i>n+1</i>th) argument <i>B</i> of <i>A-AND-NOT-B</i>.
201
   * Performs an <i>AND NOT</i> operation with the existing internal state of this AnotB operator.
202
   * Calls to this method can be intermingled with calls to
203
   * {@link #notB(org.apache.datasketches.theta.Sketch)}.
204
   *
205
   * <p>An input argument of null or empty is ignored.</p>
206
   *
207
   * <p>Rationale: A <i>null</i> for the second or following arguments is more tolerable because
208
   * <i>A NOT null</i> is still <i>A</i> even if we don't know exactly what the null represents. It
209
   * clearly does not have any content that overlaps with <i>A</i>. Also, because this can be part of
210
   * a multistep operation with multiple <i>notB</i> steps. Other following steps can still produce
211
   * a valid result.</p>
212
   *
213
   * <p>Use {@link #getResult(boolean)} to obtain the result.</p>
214
   *
215
   * @param skB The incoming Theta sketch for the second (or following) argument <i>B</i>.
216
   */
217
  public void notB(final org.apache.datasketches.theta.Sketch skB) {
218
    if (skB == null) { return; } //ignore
1✔
219

220
    final long thetaLongB = skB.getThetaLong();
1✔
221
    final int countB = skB.getRetainedEntries();
1✔
222
    final boolean emptyB = skB.isEmpty();
1✔
223

224
    final int id =
1✔
225
        SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB);
1✔
226
    final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
1✔
227
    final AnotbAction anotbAction = cCase.getAnotbAction();
1✔
228

229
    switch (anotbAction) {
1✔
230
      case EMPTY_1_0_T: {
231
        reset();
1✔
232
        break;
1✔
233
      }
234
      case DEGEN_MIN_0_F: {
235
        reset();
1✔
236
        thetaLong_ = min(thetaLong_, thetaLongB);
1✔
237
        empty_ = false;
1✔
238
        break;
1✔
239
      }
240
      case DEGEN_THA_0_F: {
241
        empty_ = false;
1✔
242
        curCount_ = 0;
1✔
243
        //thetaLong_ is ok
244
        break;
1✔
245
      }
246
      case TRIM_A: {
247
        thetaLong_ = min(thetaLong_, thetaLongB);
1✔
248
        final DataArrays<S> da = trimAndCopyDataArrays(hashArr_, summaryArr_,thetaLong_, true);
1✔
249
        hashArr_ = da.hashArr;
1✔
250
        curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
1✔
251
        summaryArr_ = da.summaryArr;
1✔
252
        break;
1✔
253
      }
254
      case SKETCH_A: {
255
        break; //result is already in A
1✔
256
      }
257
      case FULL_ANOTB: { //both A and B should have valid entries.
258
        thetaLong_ = min(thetaLong_, thetaLongB);
1✔
259
        final DataArrays<S> daB = getCopyOfResultArraysTheta(thetaLong_, curCount_, hashArr_, summaryArr_, skB);
1✔
260
        hashArr_ = daB.hashArr;
1✔
261
        curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
1✔
262
        summaryArr_ = daB.summaryArr;
1✔
263
        //empty_ = is whatever SkA is,
264
      }
265
      //default: not possible
266
    }
267
  }
1✔
268

269
  /**
270
   * Gets the result of the multistep, stateful operation AnotB that have been executed with calls
271
   * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or
272
   * {@link #notB(org.apache.datasketches.theta.Sketch)}).
273
   *
274
   * @param reset If <i>true</i>, clears this operator to the empty state after this result is
275
   * returned. Set this to <i>false</i> if you wish to obtain an intermediate result.
276
   * @return the result of this operation as an unordered {@link CompactSketch}.
277
   */
278
  public CompactSketch<S> getResult(final boolean reset) {
279
    final CompactSketch<S> result;
280
    if (curCount_ == 0) {
1✔
281
      result = new CompactSketch<>(null, null, thetaLong_, thetaLong_ == Long.MAX_VALUE);
1✔
282
    } else {
283

284
      result = new CompactSketch<>(hashArr_, Util.copySummaryArray(summaryArr_), thetaLong_, false);
1✔
285
    }
286
    if (reset) { reset(); }
1✔
287
    return result;
1✔
288
  }
289

290
  /**
291
   * Returns the A-and-not-B set operation on the two given Tuple sketches.
292
   *
293
   * <p>This a stateless operation and has no impact on the internal state of this operator.
294
   * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)},
295
   * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta.Sketch)}, and
296
   * {@link #getResult(boolean)} methods.</p>
297
   *
298
   * <p>If either argument is null an exception is thrown.</p>
299
   *
300
   * <p>Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
301
   * That is distinctly different from the java <i>null</i>, which represents a nonexistent object.
302
   * In most cases it is a programming error due to some object that was not properly initialized.
303
   * With a null as the first argument, we cannot know what the user's intent is.
304
   * With a null as the second argument, we can't ignore it as we must return a result and there is
305
   * no following possible viable arguments for the second argument.
306
   * Since it is very likely that a <i>null</i> is a programming error, we throw an exception.</p>
307
   *
308
   * @param skA The incoming Tuple sketch for the first argument
309
   * @param skB The incoming Tuple sketch for the second argument
310
   * @param <S> Type of Summary
311
   * @return the result as an unordered {@link CompactSketch}
312
   */
313
  @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR",
314
      justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase")
315
  public static <S extends Summary> CompactSketch<S> aNotB(
316
      final Sketch<S> skA,
317
      final Sketch<S> skB) {
318
    if (skA == null || skB == null) {
1✔
319
      throw new SketchesArgumentException("Neither argument may be null for this stateless operation.");
1✔
320
    }
321

322
    final long thetaLongA = skA.getThetaLong();
1✔
323
    final int countA = skA.getRetainedEntries();
1✔
324
    final boolean emptyA = skA.isEmpty();
1✔
325

326
    final long thetaLongB = skB.getThetaLong();
1✔
327
    final int countB = skB.getRetainedEntries();
1✔
328
    final boolean emptyB = skB.isEmpty();
1✔
329

330
    final int id =
1✔
331
        SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
1✔
332
    final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
1✔
333
    final AnotbAction anotbAction = cCase.getAnotbAction();
1✔
334

335
    CompactSketch<S> result = null;
1✔
336

337
    switch (anotbAction) {
1✔
338
      case EMPTY_1_0_T: {
339
        result = new CompactSketch<>(null, null, Long.MAX_VALUE, true);
1✔
340
        break;
1✔
341
      }
342
      case DEGEN_MIN_0_F: {
343
        final long thetaLong = min(thetaLongA, thetaLongB);
1✔
344
        result = new CompactSketch<>(null, null, thetaLong, false);
1✔
345
        break;
1✔
346
      }
347
      case DEGEN_THA_0_F: {
348
        result = new CompactSketch<>(null, null, thetaLongA, false);
1✔
349
        break;
1✔
350
      }
351
      case TRIM_A: {
352
        final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
1✔
353
        final long[] hashArrA = daA.hashArr;
1✔
354
        final S[] summaryArrA = daA.summaryArr;
1✔
355
        final long minThetaLong =  min(thetaLongA, thetaLongB);
1✔
356
        final DataArrays<S> da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false);
1✔
357
        result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_);
1✔
358
        break;
1✔
359
      }
360
      case SKETCH_A: {
361
        final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
1✔
362
        result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_);
1✔
363
        break;
1✔
364
      }
365
      case FULL_ANOTB: { //both A and B should have valid entries.
366
        final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
1✔
367
        final long minThetaLong = min(thetaLongA, thetaLongB);
1✔
368
        final DataArrays<S> daR =
1✔
369
            getCopyOfResultArraysTuple(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB);
1✔
370
        final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length;
1✔
371
        if (countR == 0) {
1✔
372
          result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE);
1✔
373
        } else {
374
          result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false);
1✔
375
        }
376
      }
377
      //default: not possible
378
    }
379
    return result;
1✔
380
  }
381

382
  /**
383
   * Returns the A-and-not-B set operation on a Tuple sketch and a Theta sketch.
384
   *
385
   * <p>This a stateless operation and has no impact on the internal state of this operator.
386
   * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)},
387
   * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta.Sketch)}, and
388
   * {@link #getResult(boolean)} methods.</p>
389
   *
390
   * <p>If either argument is null an exception is thrown.</p>
391
   *
392
   * <p>Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
393
   * That is distinctly different from the java <i>null</i>, which represents a nonexistent object.
394
   * In most cases it is a programming error due to some object that was not properly initialized.
395
   * With a null as the first argument, we cannot know what the user's intent is.
396
   * With a null as the second argument, we can't ignore it as we must return a result and there is
397
   * no following possible viable arguments for the second argument.
398
   * Since it is very likely that a <i>null</i> is a programming error for either argument
399
   * we throw a an exception.</p>
400
   *
401
   * @param skA The incoming Tuple sketch for the first argument
402
   * @param skB The incoming Theta sketch for the second argument
403
   * @param <S> Type of Summary
404
   * @return the result as an unordered {@link CompactSketch}
405
   */
406
  @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR",
407
      justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase")
408
  public static <S extends Summary> CompactSketch<S> aNotB(
409
      final Sketch<S> skA,
410
      final org.apache.datasketches.theta.Sketch skB) {
411
    if (skA == null || skB == null) {
1✔
412
      throw new SketchesArgumentException("Neither argument may be null for this stateless operation.");
1✔
413
    }
414

415
    final long thetaLongA = skA.getThetaLong();
1✔
416
    final int countA = skA.getRetainedEntries();
1✔
417
    final boolean emptyA = skA.isEmpty();
1✔
418

419
    final long thetaLongB = skB.getThetaLong();
1✔
420
    final int countB = skB.getRetainedEntries();
1✔
421
    final boolean emptyB = skB.isEmpty();
1✔
422

423
    final int id =
1✔
424
        SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
1✔
425
    final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
1✔
426
    final AnotbAction anotbAction = cCase.getAnotbAction();
1✔
427

428
    CompactSketch<S> result = null;
1✔
429

430
    switch (anotbAction) {
1✔
431
      case EMPTY_1_0_T: {
432
        result = new CompactSketch<>(null, null, Long.MAX_VALUE, true);
1✔
433
        break;
1✔
434
      }
435
      case DEGEN_MIN_0_F: {
436
        final long thetaLong = min(thetaLongA, thetaLongB);
1✔
437
        result = new CompactSketch<>(null, null, thetaLong, false);
1✔
438
        break;
1✔
439
      }
440
      case DEGEN_THA_0_F: {
441
        result = new CompactSketch<>(null, null, thetaLongA, false);
1✔
442
        break;
1✔
443
      }
444
      case TRIM_A: {
445
        final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
1✔
446
        final long[] hashArrA = daA.hashArr;
1✔
447
        final S[] summaryArrA = daA.summaryArr;
1✔
448
        final long minThetaLong = min(thetaLongA, thetaLongB);
1✔
449
        final DataArrays<S> da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false);
1✔
450
        result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_);
1✔
451
        break;
1✔
452
      }
453
      case SKETCH_A: {
454
        final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
1✔
455
        result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_);
1✔
456
        break;
1✔
457
      }
458
      case FULL_ANOTB: { //both A and B have valid entries.
459
        final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
1✔
460
        final long minThetaLong = min(thetaLongA, thetaLongB);
1✔
461
        @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR",
462
            justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase")
463
        final DataArrays<S> daR =
1✔
464
            getCopyOfResultArraysTheta(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB);
1✔
465
        final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length;
1✔
466
        if (countR == 0) {
1✔
467
          result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE);
1✔
468
        } else {
469
          result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false);
1✔
470
        }
471
      }
472
      //default: not possible
473
    }
474
    return result;
1✔
475
  }
476

477
  //restricted
478

479
  static class DataArrays<S extends Summary> {
480
    DataArrays() {}
1✔
481

482
    long[] hashArr;
483
    S[] summaryArr;
484
  }
485

486
  private static <S extends Summary> DataArrays<S> getCopyOfDataArraysTuple(
487
      final Sketch<S> sk) {
488
    final CompactSketch<S> csk;
489
    final DataArrays<S> da = new DataArrays<>();
1✔
490
    if (sk instanceof CompactSketch) {
1✔
491
      csk = (CompactSketch<S>) sk;
1✔
492
    } else {
493
      csk = ((QuickSelectSketch<S>)sk).compact();
1✔
494
    }
495
    final int count = csk.getRetainedEntries();
1✔
496
    if (count == 0) {
1✔
497
      da.hashArr = null;
1✔
498
      da.summaryArr = null;
1✔
499
    } else {
500
      da.hashArr = csk.getHashArr().clone();       //deep copy, may not be sorted
1✔
501
      da.summaryArr = Util.copySummaryArray(csk.getSummaryArr());
1✔
502
    }
503
    return da;
1✔
504
  }
505

506
  @SuppressWarnings("unchecked")
507
  //Both skA and skB must have entries (count > 0)
508
  private static <S extends Summary> DataArrays<S> getCopyOfResultArraysTuple(
509
      final long minThetaLong,
510
      final int countA,
511
      final long[] hashArrA,
512
      final S[] summaryArrA,
513
      final Sketch<S> skB) {
514
    final DataArrays<S> daR = new DataArrays<>();
1✔
515

516
    //Rebuild/get hashtable of skB
517
    final long[] hashTableB;
518

519
    if (skB instanceof CompactSketch) {
1✔
520
      final CompactSketch<S> cskB = (CompactSketch<S>) skB;
1✔
521
      final int countB = skB.getRetainedEntries();
1✔
522
      hashTableB = convertToHashTable(cskB.getHashArr(), countB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD);
1✔
523
    } else {
1✔
524
      final QuickSelectSketch<S> qskB = (QuickSelectSketch<S>) skB;
1✔
525
      hashTableB = qskB.getHashTable();
1✔
526
    }
527

528
    //build temporary arrays of skA
529
    final long[] tmpHashArrA = new long[countA];
1✔
530
    final S[] tmpSummaryArrA = Util.newSummaryArray(summaryArrA, countA);
1✔
531

532
    //search for non matches and build temp arrays
533
    final int lgHTBLen = exactLog2OfLong(hashTableB.length);
1✔
534
    int nonMatches = 0;
1✔
535
    for (int i = 0; i < countA; i++) {
1✔
536
      final long hash = hashArrA[i];
1✔
537
      if (hash != 0 && hash < minThetaLong) { //skips hashes of A >= minTheta
1✔
538
        final int index = hashSearch(hashTableB, lgHTBLen, hash);
1✔
539
        if (index == -1) {
1✔
540
          tmpHashArrA[nonMatches] = hash;
1✔
541
          tmpSummaryArrA[nonMatches] = (S) summaryArrA[i].copy();
1✔
542
          nonMatches++;
1✔
543
        }
544
      }
545
    }
546
    daR.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
1✔
547
    daR.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches);
1✔
548
    return daR;
1✔
549
  }
550

551
  @SuppressWarnings("unchecked")
552
  private static <S extends Summary> DataArrays<S> getCopyOfResultArraysTheta(
553
      final long minThetaLong,
554
      final int countA,
555
      final long[] hashArrA,
556
      final S[] summaryArrA,
557
      final org.apache.datasketches.theta.Sketch skB) {
558
    final DataArrays<S> daB = new DataArrays<>();
1✔
559

560
    //Rebuild/get hashtable of skB
561
    final long[] hashTableB; //read only
562

563
    final long[] hashCacheB;
564
    try { hashCacheB = (long[])GET_CACHE.invoke(skB);
1✔
565
    } catch (final Exception e) { throw new SketchesStateException("Reflection Exception " + e); }
1✔
566

567
    if (skB instanceof org.apache.datasketches.theta.CompactSketch) {
1✔
568
      final int countB = skB.getRetainedEntries(true);
1✔
569
      hashTableB = convertToHashTable(hashCacheB, countB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD);
1✔
570
    } else {
1✔
571
      hashTableB = hashCacheB;
1✔
572
    }
573

574
    //build temporary result arrays of skA
575
    final long[] tmpHashArrA = new long[countA];
1✔
576
    final S[] tmpSummaryArrA = Util.newSummaryArray(summaryArrA, countA);
1✔
577

578
    //search for non matches and build temp arrays
579
    final int lgHTBLen = exactLog2OfLong(hashTableB.length);
1✔
580
    int nonMatches = 0;
1✔
581
    for (int i = 0; i < countA; i++) {
1✔
582
      final long hash = hashArrA[i];
1✔
583
      if (hash != 0 && hash < minThetaLong) { //skips hashes of A >= minTheta
1✔
584
        final int index = hashSearch(hashTableB, lgHTBLen, hash);
1✔
585
        if (index == -1) { //not found
1✔
586
          tmpHashArrA[nonMatches] = hash;
1✔
587
          tmpSummaryArrA[nonMatches] = (S) summaryArrA[i].copy();
1✔
588
          nonMatches++;
1✔
589
        }
590
      }
591
    }
592
    //trim the arrays
593
    daB.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
1✔
594
    daB.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches);
1✔
595
    return daB;
1✔
596
  }
597

598
  @SuppressWarnings("unchecked")
599
  private static <S extends Summary> DataArrays<S> trimAndCopyDataArrays(
600
      final long[] hashArr,
601
      final S[] summaryArr,
602
      final long minThetaLong,
603
      final boolean copy) {
604

605
    //build temporary arrays
606
    final int countIn = hashArr.length;
1✔
607
    final long[] tmpHashArr = new long[countIn];
1✔
608
    final S[] tmpSummaryArr = Util.newSummaryArray(summaryArr, countIn);
1✔
609
    int countResult = 0;
1✔
610
    for (int i = 0; i < countIn; i++) {
1✔
611
      final long hash = hashArr[i];
1✔
612
      if (hash < minThetaLong) {
1✔
613
        tmpHashArr[countResult] = hash;
1✔
614
        tmpSummaryArr[countResult] = (S) (copy ? summaryArr[i].copy() : summaryArr[i]);
1✔
615
        countResult++;
1✔
616
      } else { continue; }
617
    }
618
    //Remove empty slots
619
    final DataArrays<S> da = new DataArrays<>();
1✔
620
    da.hashArr = Arrays.copyOfRange(tmpHashArr, 0, countResult);
1✔
621
    da.summaryArr = Arrays.copyOfRange(tmpSummaryArr, 0, countResult);
1✔
622
    return da;
1✔
623
  }
624

625
  /**
626
   * Resets this operation back to the empty state.
627
   */
628
  public void reset() {
629
    empty_ = true;
1✔
630
    thetaLong_ = Long.MAX_VALUE;
1✔
631
    hashArr_ = null;
1✔
632
    summaryArr_ = null;
1✔
633
    curCount_ = 0;
1✔
634
  }
1✔
635

636
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc