• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bernardladenthin / BitcoinAddressFinder / #316

25 May 2025 06:36PM UTC coverage: 65.637% (-0.4%) from 66.005%
#316

push

bernardladenthin
Add loopCount support to OpenCL kernel for batched key generation

- Introduced `loopCount` parameter in `CProducerOpenCL` to allow multiple key computations per OpenCL work-item.
- Documented `loopCount` usage and constraints in detail.
- Adjusted `OpenClTask` to divide total work size by `loopCount`, validating input constraints.
- Updated kernel signature `generateKeysKernel_grid` to accept `loopCount` as third argument.
- Refactored kernel logic to loop over `loopCount` iterations:
  - First iteration uses `point_mul_xy` for scalar multiplication.
  - Subsequent iterations use `point_add_xy` to accumulate additional keys.
  - Coordinates are serialized and hashed per loop iteration.
- Added helper `copy_constant_u32_array_private_u32()` to copy constants into local registers.
- Defined precomputed base point offsets (`G_OFFSET_X1`, etc.) using `ONE_COORDINATE_NUM_WORDS` for clarity and maintainability.
- Updated test classes to set `loopCount` derived from `BITS_FOR_BATCH >> 1`.

1 of 16 new or added lines in 2 files covered. (6.25%)

1232 of 1877 relevant lines covered (65.64%)

0.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/java/net/ladenthin/bitcoinaddressfinder/OpenClTask.java
1
// @formatter:off
2
/**
3
 * Copyright 2020 Bernard Ladenthin bernard.ladenthin@gmail.com
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 *    http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
// @formatter:on
19
package net.ladenthin.bitcoinaddressfinder;
20

21
import com.google.common.annotations.VisibleForTesting;
22
import java.math.BigInteger;
23
import java.nio.ByteBuffer;
24
import java.nio.ByteOrder;
25
import net.ladenthin.bitcoinaddressfinder.configuration.CProducer;
26
import net.ladenthin.bitcoinaddressfinder.configuration.CProducerOpenCL;
27
import static org.jocl.CL.CL_MEM_READ_ONLY;
28
import static org.jocl.CL.CL_MEM_USE_HOST_PTR;
29
import static org.jocl.CL.CL_MEM_WRITE_ONLY;
30
import static org.jocl.CL.CL_TRUE;
31
import static org.jocl.CL.clCreateBuffer;
32
import static org.jocl.CL.clEnqueueNDRangeKernel;
33
import static org.jocl.CL.clEnqueueReadBuffer;
34
import static org.jocl.CL.clEnqueueWriteBuffer;
35
import static org.jocl.CL.clFinish;
36
import static org.jocl.CL.clReleaseMemObject;
37
import static org.jocl.CL.clSetKernelArg;
38
import org.jocl.Pointer;
39
import org.jocl.Sizeof;
40
import org.jocl.cl_command_queue;
41
import org.jocl.cl_context;
42
import org.jocl.cl_kernel;
43
import org.jocl.cl_mem;
44
import org.slf4j.Logger;
45
import org.slf4j.LoggerFactory;
46

47
public class OpenClTask implements ReleaseCLObject {
48

49
    protected Logger logger = LoggerFactory.getLogger(this.getClass());
×
50
    
51
    private final int PRIVATE_KEY_SOURCE_SIZE_IN_BYTES = PublicKeyBytes.PRIVATE_KEY_MAX_NUM_BYTES;
×
52
    
53
    private final CProducerOpenCL cProducer;
54

55
    private final cl_context context;
56
    
57
    private final SourceArgument privateKeySourceArgument;
58
    
59
    private final BitHelper bitHelper;
60
    private final ByteBufferUtility byteBufferUtility;
61
    private final BigInteger maxPrivateKeyForBatchSize;
62
    
63
    private boolean closed = false;
×
64

65
    public abstract static class CLByteBufferPointerArgument implements ReleaseCLObject {
66
        /**
67
         * Controls how memory is allocated for the OpenCL output buffer.
68
         *
69
         * If set to {@link org.jocl.CL#CL_MEM_USE_HOST_PTR}, the OpenCL buffer is created using a host pointer,
70
         * meaning the host's {@link ByteBuffer} is directly used by the device (zero-copy if supported).
71
         * This may reduce memory copy overhead on some platforms, but:
72
         * <ul>
73
         *     <li>It requires the buffer to remain valid and pinned in memory.</li>
74
         *     <li>On some OpenCL implementations or devices (e.g. discrete GPUs), it may cause slower access due to lack of true zero-copy support.</li>
75
         *     <li>Debugging and compatibility issues can arise if host memory alignment or page-locking requirements aren't met.</li>
76
         * </ul>
77
         *
78
         * If set to {@link org.jocl.CL#CL_MEM_WRITE_ONLY}, the buffer is created with no reference to host memory,
79
         * and OpenCL manages the memory internally. This is typically safer and potentially faster on discrete GPUs,
80
         * although it requires an explicit copy back to the host after kernel execution.
81
         *
82
         * In most cases, {@link org.jocl.CL#CL_MEM_WRITE_ONLY} (i.e. setting this flag to {@code false}) is more robust and portable.
83
         */
84
        protected static final boolean USE_HOST_PTR = false;
85

86
        protected final ByteBuffer byteBuffer;
87
        protected final Pointer hostMemoryPointer;
88
        protected final cl_mem mem;
89
        protected final Pointer clMemPointer;
90
        private boolean closed = false;
×
91

92
        public CLByteBufferPointerArgument(ByteBuffer byteBuffer, Pointer hostMemoryPointer, cl_mem mem, Pointer clMemPointer) {
×
93
            this.byteBuffer = byteBuffer;
×
94
            this.hostMemoryPointer = hostMemoryPointer;
×
95
            this.mem = mem;
×
96
            this.clMemPointer = clMemPointer;
×
97
        }
×
98

99
        public ByteBuffer getByteBuffer() {
100
            return byteBuffer;
×
101
        }
102

103
        /** Used for reading/writing data to the host via clEnqueueRead/WriteBuffer. */
104
        public Pointer getHostMemoryPointer() {
105
            return hostMemoryPointer;
×
106
        }
107

108
        /** Used to pass the buffer to the kernel via clSetKernelArg. */
109
        public Pointer getClMemPointer() {
110
            return clMemPointer;
×
111
        }
112

113
        public cl_mem getMem() {
114
            return mem;
×
115
        }
116

117
        @Override
118
        public boolean isClosed() {
119
            return closed;
×
120
        }
121

122
        @Override
123
        public void close() {
124
            if (!closed) {
×
125
                clReleaseMemObject(mem);
×
126
                closed = true;
×
127
            }
128
        }
×
129
    }
130

131
    public static class DestinationArgument extends CLByteBufferPointerArgument {
132
        
133
        private DestinationArgument(ByteBuffer byteBuffer, Pointer hostMemoryPointer, cl_mem mem, Pointer clMemPointer) {
134
            super(byteBuffer, hostMemoryPointer, mem, clMemPointer);
×
135
        }
×
136

137
        public static DestinationArgument create(cl_context context, long sizeInBytes) {
138
            final ByteBuffer byteBuffer = ByteBuffer.allocateDirect(ByteBufferUtility.ensureByteBufferCapacityFitsInt(sizeInBytes));
×
139
            final Pointer hostMemoryPointer = Pointer.to(byteBuffer);
×
140
            final cl_mem mem;
141

142
            if (USE_HOST_PTR) {
143
                mem = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeInBytes, hostMemoryPointer, null);
144
            } else {
145
                mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeInBytes, null, null);
×
146
            }
147
            final Pointer clMemPointer = Pointer.to(mem);
×
148

149
            return new DestinationArgument(byteBuffer, hostMemoryPointer, mem, clMemPointer);
×
150
        }
151
        
152
    }
153
    
154
    public static class SourceArgument extends CLByteBufferPointerArgument {
155

156
        private SourceArgument(ByteBuffer byteBuffer, Pointer hostMemoryPointer, cl_mem mem, Pointer clMemPointer) {
157
            super(byteBuffer, hostMemoryPointer, mem, clMemPointer);
×
158
        }
×
159

160
        public static SourceArgument create(cl_context context, long sizeInBytes) {
161
            final ByteBuffer byteBuffer = ByteBuffer.allocateDirect(ByteBufferUtility.ensureByteBufferCapacityFitsInt(sizeInBytes));
×
162
            final Pointer hostMemoryPointer = Pointer.to(byteBuffer);
×
163
            final cl_mem mem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeInBytes, hostMemoryPointer, null);
×
164
            final Pointer clMemPointer = Pointer.to(mem);
×
165
            return new SourceArgument(byteBuffer, hostMemoryPointer, mem, clMemPointer);
×
166
        }
167
    }
168

169
    // Only available after init
NEW
170
    public OpenClTask(cl_context context, CProducerOpenCL cProducer, BitHelper bitHelper, ByteBufferUtility byteBufferUtility) {
×
171
        this.context = context;
×
172
        this.cProducer = cProducer;
×
173
        this.bitHelper = bitHelper;
×
174
        this.byteBufferUtility = byteBufferUtility;
×
175
        this.maxPrivateKeyForBatchSize = KeyUtility.getMaxPrivateKeyForBatchSize(cProducer.batchSizeInBits);
×
176
        this.privateKeySourceArgument = SourceArgument.create(context, PRIVATE_KEY_SOURCE_SIZE_IN_BYTES);
×
177
    }
×
178

179
    public long getDstSizeInBytes() {
180
        return (long) PublicKeyBytes.CHUNK_SIZE_NUM_BYTES * cProducer.getOverallWorkSize(bitHelper);
×
181
    }
182

183
    /**
184
    * Writes the base private key to the source buffer in the format expected by the OpenCL kernel.
185
    * <p>
186
    * The method ensures that the provided private key is valid for the current batch size. If it exceeds
187
    * the allowed range, a {@link PrivateKeyTooLargeException} is thrown.
188
    * <p>
189
    * Internally, the private key is first converted to a byte array in Big-Endian format (as returned
190
    * by {@link BigInteger#toByteArray()}). Because the OpenCL kernel expects the private key as a
191
    * {@code __global const u32 *k} array in <strong>Little-Endian</strong> word order, the byte array
192
    * is then converted from Big-Endian to Little-Endian before being written to the OpenCL input buffer.
193
    * <p>
194
    * This matches the behavior of the OpenCL kernel {@code generateKeysKernel_grid}, which reads the key
195
    * using {@code copy_u32_array(k_littleEndian_local, k, ...)} assuming Little-Endian input and applies
196
    * the work-item ID to the least-significant word.
197
    *
198
    * @param privateKeyBase the base private key used as input to the OpenCL kernel
199
    * @throws PrivateKeyTooLargeException if the key is too large for the current batch size
200
    */
201
    public void setSrcPrivateKeyChunk(BigInteger privateKeyBase) {
202
        if (KeyUtility.isInvalidWithBatchSize(privateKeyBase, maxPrivateKeyForBatchSize)) {
×
203
            throw new PrivateKeyTooLargeException(privateKeyBase, maxPrivateKeyForBatchSize, cProducer.batchSizeInBits);
×
204
        }
205

206
        // BigInteger.toByteArray() always returns a big-endian (MSB-first) representation, 
207
        // meaning the most significant byte (MSB) comes first.
208
        // Therefore, the source format is always Big Endian.
209
        final byte[] byteArray = byteBufferUtility.bigIntegerToBytes(privateKeyBase);
×
210
        EndiannessConverter endiannessConverter = new EndiannessConverter(ByteOrder.BIG_ENDIAN, ByteOrder.LITTLE_ENDIAN, byteBufferUtility);
×
211
        endiannessConverter.convertEndian(byteArray);
×
212
        byteBufferUtility.putToByteBuffer(privateKeySourceArgument.getByteBuffer(), byteArray);
×
213
    }
×
214
    
215
    @VisibleForTesting
216
    public SourceArgument getPrivateKeySourceArgument() {
217
        return privateKeySourceArgument;
×
218
    }
219

220
    public ByteBuffer executeKernel(cl_kernel kernel, cl_command_queue commandQueue) {
221
        final long dstSizeInBytes = getDstSizeInBytes();
×
222
        // Allocate a new destination buffer so that cloning after kernel execution is unnecessary
223
        try (final DestinationArgument destinationArgument = DestinationArgument.create(context, dstSizeInBytes) ) {
×
224
            // Set the work-item dimensions
NEW
225
            final long totalResultCount = bitHelper.convertBitsToSize(cProducer.batchSizeInBits);
×
NEW
226
            final int loopCount = cProducer.loopCount;
×
NEW
227
            final long adjustedWorkSize = totalResultCount / loopCount;
×
228
            
229
            // Validate loopCount constraints
NEW
230
            if (loopCount < 1) {
×
NEW
231
                throw new IllegalArgumentException("loopCount must be >= 1.");
×
232
            }
NEW
233
            if (loopCount > totalResultCount) {
×
NEW
234
                throw new IllegalArgumentException("loopCount must not exceed total result count. Given: " + loopCount + ", max: " + totalResultCount);
×
235
            }
NEW
236
            if (totalResultCount % loopCount != 0) {
×
NEW
237
                throw new IllegalArgumentException("batchSizeInBits is not divisible by loopCount; result count would be invalid.");
×
238
            }
239
            
NEW
240
            final long global_work_size[] = new long[]{adjustedWorkSize};
×
241
            final long localWorkSize[] = null; // new long[]{1}; // enabling the system to choose the work-group size.
×
242
            final int workDim = 1;
×
243
            
244
            // Set the arguments for the kernel
NEW
245
            clSetKernelArg(kernel, 0, Sizeof.cl_mem, destinationArgument.getClMemPointer());
×
NEW
246
            clSetKernelArg(kernel, 1, Sizeof.cl_mem, privateKeySourceArgument.getClMemPointer());
×
NEW
247
            clSetKernelArg(kernel, 2, Sizeof.cl_uint, Pointer.to(new int[] { loopCount }));
×
248

249
            {
250
                // write src buffer
251
                clEnqueueWriteBuffer(commandQueue,
×
252
                        privateKeySourceArgument.getMem(),
×
253
                        CL_TRUE,
254
                        0,
255
                        PRIVATE_KEY_SOURCE_SIZE_IN_BYTES,
256
                        privateKeySourceArgument.getHostMemoryPointer(),
×
257
                        0,
258
                        null,
259
                        null
260
                );
261
                clFinish(commandQueue);
×
262
            }
263
            {
264
                // execute the kernel
265
                final long beforeExecute = System.currentTimeMillis();
×
266
                clEnqueueNDRangeKernel(
×
267
                        commandQueue,
268
                        kernel,
269
                        workDim,
270
                        null,
271
                        global_work_size,
272
                        localWorkSize,
273
                        0,
274
                        null,
275
                        null
276
                );
277
                clFinish(commandQueue);
×
278

279
                final long afterExecute = System.currentTimeMillis();
×
280

281
                if (logger.isTraceEnabled()) {
×
282
                    logger.trace("Executed OpenCL kernel in " + (afterExecute - beforeExecute) + "ms");
×
283
                }
284
            }
285
            {
286
                // read the dst buffer
287
                final long beforeRead = System.currentTimeMillis();
×
288

289
                clEnqueueReadBuffer(commandQueue,
×
290
                        destinationArgument.getMem(),
×
291
                        CL_TRUE,
292
                        0,
293
                        dstSizeInBytes,
294
                        destinationArgument.getHostMemoryPointer(),
×
295
                        0,
296
                        null,
297
                        null
298
                );
299
                clFinish(commandQueue);
×
300
                destinationArgument.close();
×
301

302
                final long afterRead = System.currentTimeMillis();
×
303
                if (logger.isTraceEnabled()) {
×
304
                    logger.trace("Read OpenCL data "+((dstSizeInBytes / 1024) / 1024) + "Mb in " + (afterRead - beforeRead) + "ms");
×
305
                }
306
            }
307
            return destinationArgument.getByteBuffer();
×
308
        }
309
    }
310

311
    @Override
312
    public boolean isClosed() {
313
        return closed;
×
314
    }
315
    
316
    @Override
317
    public void close() {
318
        if(!closed) {
×
319
            privateKeySourceArgument.close();
×
NEW
320
            closed = true;
×
321
            // hint: destinationArgument will be released immediately
322
        }
323
    }
×
324

325
    /**
326
     * https://stackoverflow.com/questions/3366925/deep-copy-duplicate-of-javas-bytebuffer/4074089
327
     */
328
    private static ByteBuffer cloneByteBuffer(final ByteBuffer original) {
329
        // Create clone with same capacity as original.
330
        final ByteBuffer clone = (original.isDirect())
×
331
                ? ByteBuffer.allocateDirect(original.capacity())
×
332
                : ByteBuffer.allocate(original.capacity());
×
333

334
        // Create a read-only copy of the original.
335
        // This allows reading from the original without modifying it.
336
        final ByteBuffer readOnlyCopy = original.asReadOnlyBuffer();
×
337

338
        // Flip and read from the original.
339
        readOnlyCopy.flip();
×
340
        clone.put(readOnlyCopy);
×
341

342
        return clone;
×
343
    }
344

345
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc