• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.31
/src/main/java/org/apache/datasketches/theta/CompactOperations.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.theta;
21

22
import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
23
import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK;
24
import static org.apache.datasketches.theta.PreambleUtil.LG_NOM_LONGS_BYTE;
25
import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK;
26
import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK;
27
import static org.apache.datasketches.theta.PreambleUtil.SER_VER;
28
import static org.apache.datasketches.theta.PreambleUtil.SINGLEITEM_FLAG_MASK;
29
import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
30
import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID;
31
import static org.apache.datasketches.theta.PreambleUtil.extractFlags;
32
import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
33
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
34
import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash;
35
import static org.apache.datasketches.theta.PreambleUtil.extractSerVer;
36
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
37
import static org.apache.datasketches.theta.PreambleUtil.insertCurCount;
38
import static org.apache.datasketches.theta.PreambleUtil.insertFamilyID;
39
import static org.apache.datasketches.theta.PreambleUtil.insertFlags;
40
import static org.apache.datasketches.theta.PreambleUtil.insertP;
41
import static org.apache.datasketches.theta.PreambleUtil.insertPreLongs;
42
import static org.apache.datasketches.theta.PreambleUtil.insertSeedHash;
43
import static org.apache.datasketches.theta.PreambleUtil.insertSerVer;
44
import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong;
45

46
import java.util.Arrays;
47

48
import org.apache.datasketches.common.Family;
49
import org.apache.datasketches.common.SketchesArgumentException;
50
import org.apache.datasketches.common.SketchesStateException;
51
import org.apache.datasketches.memory.Memory;
52
import org.apache.datasketches.memory.WritableMemory;
53

54
/**
55
 * @author Lee Rhodes
56
 */
57
final class CompactOperations {
1✔
58

59
  private CompactOperations() {}
60

61
  static CompactSketch componentsToCompact( //No error checking
62
      final long thetaLong,
63
      final int curCount,
64
      final short seedHash,
65
      final boolean srcEmpty,
66
      final boolean srcCompact,
67
      final boolean srcOrdered,
68
      final boolean dstOrdered,
69
      final WritableMemory dstMem,
70
      final long[] hashArr) //may not be compacted, ordered or unordered, may be null
71
  {
72
    final boolean direct = dstMem != null;
1✔
73
    final boolean empty = srcEmpty || ((curCount == 0) && (thetaLong == Long.MAX_VALUE));
1✔
74
    final boolean single = (curCount == 1) && (thetaLong == Long.MAX_VALUE);
1✔
75
    final long[] hashArrOut;
76
    if (!srcCompact) {
1✔
77
      hashArrOut = CompactOperations.compactCache(hashArr, curCount, thetaLong, dstOrdered);
1✔
78
    } else {
79
      hashArrOut = hashArr;
1✔
80
    }
81
    if (!srcOrdered && dstOrdered && !empty && !single) {
1✔
82
      Arrays.sort(hashArrOut);
1✔
83
    }
84
    //Note: for empty or single we always output the ordered form.
85
    final boolean dstOrderedOut = (empty || single) ? true : dstOrdered;
1✔
86
    if (direct) {
1✔
87
      final int preLongs = computeCompactPreLongs(empty, curCount, thetaLong);
1✔
88
      int flags = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK; //always LE
1✔
89
      flags |=  empty ? EMPTY_FLAG_MASK : 0;
1✔
90
      flags |= dstOrderedOut ? ORDERED_FLAG_MASK : 0;
1✔
91
      flags |= single ? SINGLEITEM_FLAG_MASK : 0;
1✔
92

93
      final Memory mem =
1✔
94
          loadCompactMemory(hashArrOut, seedHash, curCount, thetaLong, dstMem, (byte)flags, preLongs);
1✔
95
      return new DirectCompactSketch(mem);
1✔
96

97
    } else { //Heap
98
      if (empty) {
1✔
99
        return EmptyCompactSketch.getInstance();
1✔
100
      }
101
      if (single) {
1✔
102
        return new SingleItemSketch(hashArrOut[0], seedHash);
1✔
103
      }
104
      return new HeapCompactSketch(hashArrOut, empty, seedHash, curCount, thetaLong, dstOrderedOut);
1✔
105
    }
106
  }
107

108
  /**
109
   * Heapify or convert a source Theta Sketch Memory image into a heap or target Memory CompactSketch.
110
   * This assumes hashSeed is OK; serVer = 3.
111
   * @param srcMem the given input source Memory image
112
   * @param dstOrdered the desired ordering of the resulting CompactSketch
113
   * @param dstMem Used for the target CompactSketch if it is Memory-based.
114
   * @return a CompactSketch of the correct form.
115
   */
116
  @SuppressWarnings("unused")
117
  static CompactSketch memoryToCompact(
118
      final Memory srcMem,
119
      final boolean dstOrdered,
120
      final WritableMemory dstMem)
121
  {
122
    //extract Pre0 fields and Flags from srcMem
123
    final int srcPreLongs = extractPreLongs(srcMem);
1✔
124
    final int srcSerVer = extractSerVer(srcMem); //not used
1✔
125
    final int srcFamId = extractFamilyID(srcMem);
1✔
126
    final int srcLgArrLongs = extractLgArrLongs(srcMem);
1✔
127
    final int srcFlags = extractFlags(srcMem);
1✔
128
    final short srcSeedHash = (short) extractSeedHash(srcMem);
1✔
129

130
    //srcFlags
131
    final boolean srcReadOnlyFlag = (srcFlags & READ_ONLY_FLAG_MASK) > 0;
1✔
132
    final boolean srcEmptyFlag = (srcFlags & EMPTY_FLAG_MASK) > 0;
1✔
133
    final boolean srcCompactFlag = (srcFlags & COMPACT_FLAG_MASK) > 0;
1✔
134
    final boolean srcOrderedFlag = (srcFlags & ORDERED_FLAG_MASK) > 0;
1✔
135
    final boolean srcSingleFlag = (srcFlags & SINGLEITEM_FLAG_MASK) > 0;
1✔
136

137
    final boolean single = srcSingleFlag
1✔
138
        || SingleItemSketch.otherCheckForSingleItem(srcPreLongs, srcSerVer, srcFamId, srcFlags);
1✔
139

140
    //extract pre1 and pre2 fields
141
    final int curCount = single ? 1 : (srcPreLongs > 1) ? extractCurCount(srcMem) : 0;
1✔
142
    final long thetaLong = (srcPreLongs > 2) ? extractThetaLong(srcMem) : Long.MAX_VALUE;
1✔
143

144
    //do some basic checks ...
145
    if (srcEmptyFlag)  { assert (curCount == 0) && (thetaLong == Long.MAX_VALUE); }
1✔
146
    if (single) { assert (curCount == 1) && (thetaLong == Long.MAX_VALUE); }
1✔
147
    checkFamilyAndFlags(srcFamId, srcCompactFlag, srcReadOnlyFlag);
1✔
148

149
    //dispatch empty and single cases
150
    //Note: for empty and single we always output the ordered form.
151
    final boolean dstOrderedOut = (srcEmptyFlag || single) ? true : dstOrdered;
1✔
152
    if (srcEmptyFlag) {
1✔
153
      if (dstMem != null) {
1✔
154
        dstMem.putByteArray(0, EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, 8);
1✔
155
        return new DirectCompactSketch(dstMem);
1✔
156
      } else {
157
        return EmptyCompactSketch.getInstance();
1✔
158
      }
159
    }
160
    if (single) {
1✔
161
      final long hash = srcMem.getLong(srcPreLongs << 3);
1✔
162
      final SingleItemSketch sis = new SingleItemSketch(hash, srcSeedHash);
1✔
163
      if (dstMem != null) {
1✔
164
        dstMem.putByteArray(0, sis.toByteArray(),0, 16);
1✔
165
        return new DirectCompactSketch(dstMem);
1✔
166
      } else { //heap
167
        return sis;
1✔
168
      }
169
    }
170

171
    //extract hashArr > 1
172
    final long[] hashArr;
173
    if (srcCompactFlag) {
1✔
174
      hashArr = new long[curCount];
1✔
175
      srcMem.getLongArray(srcPreLongs << 3, hashArr, 0, curCount);
1✔
176
    } else { //update sketch, thus hashTable form
177
      final int srcCacheLen = 1 << srcLgArrLongs;
1✔
178
      final long[] tempHashArr = new long[srcCacheLen];
1✔
179
      srcMem.getLongArray(srcPreLongs << 3, tempHashArr, 0, srcCacheLen);
1✔
180
      hashArr = compactCache(tempHashArr, curCount, thetaLong, dstOrderedOut);
1✔
181
    }
182

183
    final int flagsOut = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK
1✔
184
                         | ((dstOrderedOut) ? ORDERED_FLAG_MASK : 0);
185

186
    //load the destination.
187
    if (dstMem != null) {
1✔
188
      final Memory tgtMem = loadCompactMemory(hashArr, srcSeedHash, curCount, thetaLong, dstMem,
1✔
189
          (byte)flagsOut, srcPreLongs);
190
      return new DirectCompactSketch(tgtMem);
1✔
191
    } else { //heap
192
      return new HeapCompactSketch(hashArr, srcEmptyFlag, srcSeedHash, curCount, thetaLong,
1✔
193
          dstOrderedOut);
194
    }
195
  }
196

197
  private static final void checkFamilyAndFlags(
198
      final int srcFamId,
199
      final boolean srcCompactFlag,
200
      final boolean srcReadOnlyFlag) {
201
    final Family srcFamily = Family.idToFamily(srcFamId);
1✔
202
    if (srcCompactFlag) {
1✔
203
      if ((srcFamily == Family.COMPACT) && srcReadOnlyFlag) { return; }
1✔
204
    } else {
205
      if (srcFamily == Family.ALPHA) { return; }
1✔
206
      if (srcFamily == Family.QUICKSELECT) { return; }
1✔
207
    }
208
    throw new SketchesArgumentException(
1✔
209
        "Possible Corruption: Family does not match flags: Family: "
210
            + srcFamily.toString()
1✔
211
            + ", Compact Flag: " + srcCompactFlag
212
            + ", ReadOnly Flag: " + srcReadOnlyFlag);
213
  }
214

215
  //All arguments must be valid and correct including flags.
216
  // Used as helper to create byte arrays as well as loading Memory for direct compact sketches
217
  static final Memory loadCompactMemory(
218
      final long[] compactHashArr,
219
      final short seedHash,
220
      final int curCount,
221
      final long thetaLong,
222
      final WritableMemory dstMem,
223
      final byte flags,
224
      final int preLongs)
225
  {
226
    assert (dstMem != null) && (compactHashArr != null);
1✔
227
    final int outLongs = preLongs + curCount;
1✔
228
    final int outBytes = outLongs << 3;
1✔
229
    final int dstBytes = (int) dstMem.getCapacity();
1✔
230
    if (outBytes > dstBytes) {
1✔
231
      throw new SketchesArgumentException("Insufficient Memory: " + dstBytes
1✔
232
        + ", Need: " + outBytes);
233
    }
234
    final byte famID = (byte) Family.COMPACT.getID();
1✔
235

236
    //Caution: The following loads directly into Memory without creating a heap byte[] first,
237
    // which would act as a pre-clearing, initialization mechanism. So it is important to make sure
238
    // that all fields are initialized, even those that are not used by the CompactSketch.
239
    // Otherwise, uninitialized fields could be filled with off-heap garbage, which could cause
240
    // other problems downstream if those fields are not filtered out first.
241
    // As written below, all fields are initialized avoiding an extra copy.
242

243
    //The first 8 bytes (pre0)
244
    insertPreLongs(dstMem, preLongs); //RF not used = 0
1✔
245
    insertSerVer(dstMem, SER_VER);
1✔
246
    insertFamilyID(dstMem, famID);
1✔
247
    //The following initializes the lgNomLongs and lgArrLongs to 0.
248
    //They are not used in CompactSketches.
249
    dstMem.putShort(LG_NOM_LONGS_BYTE, (short)0);
1✔
250
    insertFlags(dstMem, flags);
1✔
251
    insertSeedHash(dstMem, seedHash);
1✔
252

253
    if ((preLongs == 1) && (curCount == 1)) { //singleItem, theta = 1.0
1✔
254
      dstMem.putLong(8, compactHashArr[0]);
1✔
255
      return dstMem;
1✔
256
    }
257
    if (preLongs > 1) {
1✔
258
      insertCurCount(dstMem, curCount);
1✔
259
      insertP(dstMem, (float) 1.0);
1✔
260
    }
261
    if (preLongs > 2) {
1✔
262
      insertThetaLong(dstMem, thetaLong);
1✔
263
    }
264
    if (curCount > 0) { //theta could be < 1.0.
1✔
265
      dstMem.putLongArray(preLongs << 3, compactHashArr, 0, curCount);
1✔
266
    }
267
    return dstMem; //if prelongs == 3 & curCount == 0, theta could be < 1.0.
1✔
268
  }
269

270
  /**
271
   * Copies then compacts, cleans, and may sort the resulting array.
272
   * The source cache can be a hash table with interstitial zeros or
273
   * "dirty" values, which are hash values greater than theta.
274
   * These can be generated by the Alpha sketch.
275
   * @param srcCache anything
276
   * @param curCount must be correct
277
   * @param thetaLong The correct
278
   * <a href="{@docRoot}/resources/dictionary.html#thetaLong">thetaLong</a>.
279
   * @param dstOrdered true if output array must be sorted
280
   * @return the compacted array.
281
   */
282
  static final long[] compactCache(final long[] srcCache, final int curCount,
283
      final long thetaLong, final boolean dstOrdered) {
284
    if (curCount == 0) {
1✔
285
      return new long[0];
1✔
286
    }
287
    final long[] cacheOut = new long[curCount];
1✔
288
    final int len = srcCache.length;
1✔
289
    int j = 0;
1✔
290
    for (int i = 0; i < len; i++) { //scan the full srcCache
1✔
291
      final long v = srcCache[i];
1✔
292
      if ((v <= 0L) || (v >= thetaLong) ) { continue; } //ignoring zeros or dirty values
1✔
293
      cacheOut[j++] = v;
1✔
294
    }
295
    if (j < curCount) {
1✔
296
      throw new SketchesStateException(
×
297
          "Possible Corruption: curCount parameter is incorrect.");
298
    }
299
    if (dstOrdered && (curCount > 1)) {
1✔
300
      Arrays.sort(cacheOut);
1✔
301
    }
302
    return cacheOut;
1✔
303
  }
304

305
  /*
306
   * The truth table for empty, curCount and theta when compacting is as follows:
307
   * <pre>
308
   * Num Theta CurCount Empty State    Name, Comments
309
   *  0    1.0     0      T     OK     EMPTY: The Normal Empty State
310
   *  1    1.0     0      F   Internal This can occur internally as the result of an intersection of two exact,
311
   *                                   disjoint sets, or AnotB of two exact, identical sets. There is no probability
312
   *                                   distribution, so this is converted internally to EMPTY {1.0, 0, T}.
313
   *                                   This is handled in SetOperation.createCompactSketch().
314
   *  2    1.0    !0      T   Error    Empty=T and curCount !0 should never coexist.
315
   *                                   This is checked in all compacting operations.
316
   *  3    1.0    !0      F     OK     EXACT: This corresponds to a sketch in exact mode
317
   *  4   <1.0     0      T   Internal This can be an initial UpdateSketch state if p < 1.0,
318
   *                                   so change theta to 1.0. Return {Th = 1.0, 0, T}.
319
   *                                   This is handled in UpdateSketch.compact() and toByteArray().
320
   *  5   <1.0     0      F     OK     This can result from set operations
321
   *  6   <1.0    !0      T   Error    Empty=T and curCount !0 should never coexist.
322
   *                                   This is checked in all compacting operations.
323
   *  7   <1.0    !0      F     OK     This corresponds to a sketch in estimation mode
324
   * </pre>
325
   * #4 is handled by <i>correctThetaOnCompat(boolean, int)</i> (below).
326
   * #2 & #6 handled by <i>checkIllegalCurCountAndEmpty(boolean, int)</i>
327
   */
328

329
  /**
330
   * This corrects a temporary anomalous condition where compact() is called on an UpdateSketch
331
   * that was initialized with p < 1.0 and update() was never called.  In this case Theta < 1.0,
332
   * curCount = 0, and empty = true.  The correction is to change Theta to 1.0, which makes the
333
   * returning sketch empty. This should only be used in the compaction or serialization of an
334
   * UpdateSketch.
335
   * @param empty the given empty state
336
   * @param curCount the given curCount
337
   * @param thetaLong the given thetaLong
338
   * @return thetaLong
339
   */
340
  static final long correctThetaOnCompact(final boolean empty, final int curCount,
341
      final long thetaLong) { //handles #4 above
342
    return (empty && (curCount == 0)) ? Long.MAX_VALUE : thetaLong;
1✔
343
  }
344

345
  /**
346
   * This checks for the illegal condition where curCount > 0 and the state of
347
   * empty = true.  This check can be used anywhere a sketch is returned or a sketch is created
348
   * from complete arguments.
349
   * @param empty the given empty state
350
   * @param curCount the given current count
351
   */ //This handles #2 and #6 above
352
  static final void checkIllegalCurCountAndEmpty(final boolean empty, final int curCount) {
353
    if (empty && (curCount != 0)) { //this handles #2 and #6 above
1✔
354
      throw new SketchesStateException("Illegal State: Empty=true and Current Count != 0.");
×
355
    }
356
  }
1✔
357

358
  /**
359
   * This compute number of preamble longs for a compact sketch based on <i>empty</i>,
360
   * <i>curCount</i> and <i>thetaLong</i>.
361
   * This also accommodates for EmptyCompactSketch and SingleItemSketch.
362
   * @param empty The given empty state
363
   * @param curCount The given current count (retained entries)
364
   * @param thetaLong the current thetaLong
365
   * @return the number of preamble longs
366
   */
367
  static final int computeCompactPreLongs(final boolean empty, final int curCount,
368
      final long thetaLong) {
369
    return (thetaLong < Long.MAX_VALUE) ? 3 : empty ? 1 : (curCount > 1) ? 2 : 1;
1✔
370
  }
371

372
  /**
373
   * This checks for the singleItem Compact Sketch.
374
   * @param empty the given empty state
375
   * @param curCount the given curCount
376
   * @param thetaLong the given thetaLong
377
   * @return true if notEmpty, curCount = 1 and theta = 1.0;
378
   */
379
  static final boolean isSingleItem(final boolean empty, final int curCount,
380
      final long thetaLong) {
381
    return !empty && (curCount == 1) && (thetaLong == Long.MAX_VALUE);
1✔
382
  }
383
}
384

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc