• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apache / datasketches-java / #306

30 Apr 2024 10:01PM UTC coverage: 97.645% (-0.5%) from 98.139%
#306

push

web-flow
Merge pull request #555 from apache/fix_pom_xml_header

Fix pom xml header

26865 of 27513 relevant lines covered (97.64%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.33
/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied.  See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19

20
package org.apache.datasketches.tuple;
21

22
import static org.apache.datasketches.thetacommon.HashOperations.count;
23

24
import java.lang.reflect.Array;
25
import java.nio.ByteOrder;
26

27
import org.apache.datasketches.common.ByteArrayUtil;
28
import org.apache.datasketches.common.Family;
29
import org.apache.datasketches.common.SketchesArgumentException;
30
import org.apache.datasketches.memory.Memory;
31

32
/**
33
 * CompactSketches are never created directly. They are created as a result of
34
 * the compact() method of an UpdatableSketch or as a result of the getResult()
35
 * method of a set operation like Union, Intersection or AnotB. CompactSketch
36
 * consists of a compact list (i.e. no intervening spaces) of hash values,
37
 * corresponding list of Summaries, and a value for theta. The lists may or may
38
 * not be ordered. CompactSketch is read-only.
39
 *
40
 * @param <S> type of Summary
41
 */
42
public final class CompactSketch<S extends Summary> extends Sketch<S> {
43
  private static final byte serialVersionWithSummaryClassNameUID = 1;
44
  private static final byte serialVersionUIDLegacy = 2;
45
  private static final byte serialVersionUID = 3;
46
  private static final short defaultSeedHash = (short) 37836; // for compatibility with C++
47
  private final long[] hashArr_;
48
  private S[] summaryArr_;
49

50
  private enum FlagsLegacy { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }
1✔
51

52
  private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED }
1✔
53

54
  /**
55
   * Create a CompactSketch from correct components
56
   * @param hashArr compacted hash array
57
   * @param summaryArr compacted summary array
58
   * @param thetaLong long value of theta
59
   * @param empty empty flag
60
   */
61
  CompactSketch(final long[] hashArr, final S[] summaryArr, final long thetaLong, final boolean empty) {
62
    super(thetaLong, empty, null);
1✔
63
    super.thetaLong_ = thetaLong;
1✔
64
    super.empty_ = empty;
1✔
65
    hashArr_ = hashArr;
1✔
66
    summaryArr_ = summaryArr;
1✔
67
  }
1✔
68

69
  /**
70
   * This is to create an instance of a CompactSketch given a serialized form
71
   *
72
   * @param mem Memory object with serialized CompactSketch
73
   * @param deserializer the SummaryDeserializer
74
   */
75
  CompactSketch(final Memory mem, final SummaryDeserializer<S> deserializer) {
76
    super(Long.MAX_VALUE, true, null);
1✔
77
    int offset = 0;
1✔
78
    final byte preambleLongs = mem.getByte(offset++);
1✔
79
    final byte version = mem.getByte(offset++);
1✔
80
    final byte familyId = mem.getByte(offset++);
1✔
81
    SerializerDeserializer.validateFamily(familyId, preambleLongs);
1✔
82
    if (version > serialVersionUID) {
1✔
83
      throw new SketchesArgumentException(
×
84
          "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + version);
85
    }
86
    SerializerDeserializer
1✔
87
      .validateType(mem.getByte(offset++), SerializerDeserializer.SketchType.CompactSketch);
1✔
88
    if (version <= serialVersionUIDLegacy) { // legacy serial format
1✔
89
      final byte flags = mem.getByte(offset++);
1✔
90
      final boolean isBigEndian = (flags & 1 << FlagsLegacy.IS_BIG_ENDIAN.ordinal()) > 0;
1✔
91
      if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
1✔
92
        throw new SketchesArgumentException("Byte order mismatch");
×
93
      }
94
      empty_ = (flags & 1 << FlagsLegacy.IS_EMPTY.ordinal()) > 0;
1✔
95
      final boolean isThetaIncluded = (flags & 1 << FlagsLegacy.IS_THETA_INCLUDED.ordinal()) > 0;
1✔
96
      if (isThetaIncluded) {
1✔
97
        thetaLong_ = mem.getLong(offset);
1✔
98
        offset += Long.BYTES;
1✔
99
      } else {
100
        thetaLong_ = Long.MAX_VALUE;
×
101
      }
102
      final boolean hasEntries = (flags & 1 << FlagsLegacy.HAS_ENTRIES.ordinal()) > 0;
1✔
103
      if (hasEntries) {
1✔
104
        int classNameLength = 0;
1✔
105
        if (version == serialVersionWithSummaryClassNameUID) {
1✔
106
          classNameLength = mem.getByte(offset++);
1✔
107
        }
108
        final int count = mem.getInt(offset);
1✔
109
        offset += Integer.BYTES;
1✔
110
        if (version == serialVersionWithSummaryClassNameUID) {
1✔
111
          offset += classNameLength;
1✔
112
        }
113
        hashArr_ = new long[count];
1✔
114

115
        for (int i = 0; i < count; i++) {
1✔
116
          hashArr_[i] = mem.getLong(offset);
1✔
117
          offset += Long.BYTES;
1✔
118
        }
119
        for (int i = 0; i < count; i++) {
1✔
120
          offset += readSummary(mem, offset, i, count, deserializer);
1✔
121
        }
122
      } else {
1✔
123
        hashArr_ = new long[0];
×
124
        summaryArr_ = null;
×
125
      }
126
    } else { // current serial format
1✔
127
      offset++; //skip unused byte
1✔
128
      final byte flags = mem.getByte(offset++);
1✔
129
      offset += 2; //skip 2 unused bytes
1✔
130
      empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
1✔
131
      thetaLong_ = Long.MAX_VALUE;
1✔
132
      int count = 0;
1✔
133
      if (!empty_) {
1✔
134
        if (preambleLongs == 1) {
1✔
135
          count = 1;
×
136
        } else {
137
          count = mem.getInt(offset);
1✔
138
          offset += Integer.BYTES;
1✔
139
          offset += 4; // unused
1✔
140
          if (preambleLongs > 2) {
1✔
141
            thetaLong_ = mem.getLong(offset);
1✔
142
            offset += Long.BYTES;
1✔
143
          }
144
        }
145
      }
146
      hashArr_ = new long[count];
1✔
147

148
      for (int i = 0; i < count; i++) {
1✔
149
        hashArr_[i] = mem.getLong(offset);
1✔
150
        offset += Long.BYTES;
1✔
151
        offset += readSummary(mem, offset, i, count, deserializer);
1✔
152
      }
153
    }
154
  }
1✔
155

156
  @SuppressWarnings({"unchecked"})
157
  private int readSummary(final Memory mem, final int offset, final int i, final int count,
158
      final SummaryDeserializer<S> deserializer) {
159
    final Memory memRegion = mem.region(offset, mem.getCapacity() - offset);
1✔
160
    final DeserializeResult<S> result = deserializer.heapifySummary(memRegion);
1✔
161
    final S summary = result.getObject();
1✔
162
    final Class<S> summaryType = (Class<S>) result.getObject().getClass();
1✔
163
    if (summaryArr_ == null) {
1✔
164
      summaryArr_ = (S[]) Array.newInstance(summaryType, count);
1✔
165
    }
166
    summaryArr_[i] = summary;
1✔
167
    return result.getSize();
1✔
168
  }
169

170
  @Override
171
  public CompactSketch<S> compact() {
172
    return this;
×
173
  }
174

175
  long[] getHashArr() {
176
    return hashArr_;
1✔
177
  }
178

179
  S[] getSummaryArr() {
180
    return summaryArr_;
1✔
181
  }
182

183
  @Override
184
  public int getRetainedEntries() {
185
    return hashArr_ == null ? 0 : hashArr_.length;
1✔
186
  }
187

188
  @Override
189
  public int getCountLessThanThetaLong(final long thetaLong) {
190
    return count(hashArr_, thetaLong);
×
191
  }
192

193
  // Layout of first 8 bytes:
194
  // Long || Start Byte Adr:
195
  // Adr:
196
  //      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
197
  //  0   ||    seed hash    |  Flags | unused | SkType | FamID  | SerVer |  Preamble_Longs    |
198
  @Override
199
  public byte[] toByteArray() {
200
  final int count = getRetainedEntries();
1✔
201
    final boolean isSingleItem = count == 1 && !isEstimationMode();
1✔
202
    final int preambleLongs = isEmpty() || isSingleItem ? 1 : isEstimationMode() ? 3 : 2;
1✔
203

204
    int summariesSizeBytes = 0;
1✔
205
    final byte[][] summariesBytes = new byte[count][];
1✔
206
    if (count > 0) {
1✔
207
      for (int i = 0; i < count; i++) {
1✔
208
        summariesBytes[i] = summaryArr_[i].toByteArray();
1✔
209
        summariesSizeBytes += summariesBytes[i].length;
1✔
210
      }
211
    }
212

213
    final int sizeBytes = Long.BYTES * preambleLongs + Long.BYTES * count + summariesSizeBytes;
1✔
214
    final byte[] bytes = new byte[sizeBytes];
1✔
215
    int offset = 0;
1✔
216
    bytes[offset++] = (byte) preambleLongs;
1✔
217
    bytes[offset++] = serialVersionUID;
1✔
218
    bytes[offset++] = (byte) Family.TUPLE.getID();
1✔
219
    bytes[offset++] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal();
1✔
220
    offset++; // unused
1✔
221
    bytes[offset++] = (byte) (
1✔
222
        (1 << Flags.IS_COMPACT.ordinal())
1✔
223
      | (1 << Flags.IS_READ_ONLY.ordinal())
1✔
224
      | (isEmpty() ? 1 << Flags.IS_EMPTY.ordinal() : 0)
1✔
225
    );
226
    ByteArrayUtil.putShortLE(bytes, offset, defaultSeedHash);
1✔
227
    offset += Short.BYTES;
1✔
228
    if (!isEmpty()) {
1✔
229
      if (!isSingleItem) {
1✔
230
        ByteArrayUtil.putIntLE(bytes, offset, count);
1✔
231
        offset += Integer.BYTES;
1✔
232
        offset += 4; // unused
1✔
233
        if (isEstimationMode()) {
1✔
234
          ByteArrayUtil.putLongLE(bytes, offset, thetaLong_);
1✔
235
          offset += Long.BYTES;
1✔
236
        }
237
      }
238
    }
239
    for (int i = 0; i < count; i++) {
1✔
240
      ByteArrayUtil.putLongLE(bytes, offset, hashArr_[i]);
1✔
241
      offset += Long.BYTES;
1✔
242
      System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length);
1✔
243
      offset += summariesBytes[i].length;
1✔
244
    }
245
    return bytes;
1✔
246
  }
247

248
  @Override
249
  public TupleSketchIterator<S> iterator() {
250
    return new TupleSketchIterator<>(hashArr_, summaryArr_);
1✔
251
  }
252

253
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc