• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

knowledgepixels / nanopub-query / 24522677438

16 Apr 2026 04:48PM UTC coverage: 64.192% (-3.9%) from 68.052%
24522677438

push

github

web-flow
Merge pull request #66 from knowledgepixels/feature/65-trust-state-repos

feat: mirror registry trust state into the trust repo (#65)

281 of 488 branches covered (57.58%)

Branch coverage included in aggregate %.

800 of 1196 relevant lines covered (66.89%)

9.54 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

30.48
src/main/java/com/knowledgepixels/query/JellyNanopubLoader.java
1
package com.knowledgepixels.query;
2

3
import java.io.IOException;
4
import java.util.concurrent.atomic.AtomicLong;
5

6
import org.apache.http.client.methods.CloseableHttpResponse;
7
import org.apache.http.client.methods.HttpGet;
8
import org.apache.http.client.methods.HttpHead;
9
import org.apache.http.impl.client.CloseableHttpClient;
10
import org.apache.http.impl.client.HttpClientBuilder;
11
import org.apache.http.util.EntityUtils;
12
import org.nanopub.NanopubUtils;
13
import org.nanopub.jelly.NanopubStream;
14
import org.slf4j.Logger;
15
import org.slf4j.LoggerFactory;
16

17
/**
18
 * Loads nanopubs from the attached Nanopub Registry via a restartable Jelly stream.
19
 */
20
public class JellyNanopubLoader {
×
21
    static final String registryUrl;
22
    private static long lastCommittedCounter = -1;
6✔
23
    private static Long lastKnownSetupId = null;
6✔
24
    // Latest registry metadata fields, updated on each metadata fetch and forwarded to clients
25
    static volatile String lastCoverageTypes = null;
6✔
26
    static volatile String lastCoverageAgents = null;
6✔
27
    static volatile String lastTestInstance = null;
6✔
28
    static volatile String lastNanopubCount = null;
6✔
29
    private static final CloseableHttpClient metadataClient;
30
    private static final CloseableHttpClient jellyStreamClient;
31

32
    private static final int MAX_RETRIES_METADATA = 10;
33
    private static final int RETRY_DELAY_METADATA = 3000;
34
    private static final int RETRY_DELAY_JELLY = 5000;
35

36
    private static final Logger log = LoggerFactory.getLogger(JellyNanopubLoader.class);
9✔
37

38
    /**
39
     * Registry metadata returned by a HEAD request.
40
     */
41
    record RegistryMetadata(long loadCounter, Long setupId, String coverageTypes,
72✔
42
                            String coverageAgents, String testInstance, String nanopubCount,
43
                            String trustStateHash) {}
44

45
    /**
46
     * The interval in milliseconds at which the updates loader should poll for new nanopubs.
47
     */
48
    public static final int UPDATES_POLL_INTERVAL = 2000;
49

50
    enum LoadingType {
×
51
        INITIAL,
×
52
        UPDATE,
×
53
    }
54

55
    static {
56
        // Initialize registryUrl
57
        var url = Utils.getEnvString(
12✔
58
                "REGISTRY_FIXED_URL", "https://registry.knowledgepixels.com/"
59
        );
60
        if (!url.endsWith("/")) url += "/";
12!
61
        registryUrl = url;
6✔
62

63
        metadataClient = HttpClientBuilder.create().setDefaultRequestConfig(Utils.getHttpRequestConfig()).build();
15✔
64
        jellyStreamClient = NanopubUtils.getHttpClient();
6✔
65
    }
3✔
66

67
    /**
68
     * Start or continue (after restart) the initial loading procedure. This simply loads all
69
     * nanopubs from the attached Registry.
70
     *
71
     * @param afterCounter which counter to start from (-1 for the beginning)
72
     */
73
    public static void loadInitial(long afterCounter) {
74
        RegistryMetadata metadata = fetchRegistryMetadata();
6✔
75
        updateForwardingMetadata(metadata);
6✔
76
        TrustStateLoader.maybeUpdate(metadata.trustStateHash());
9✔
77
        long targetCounter = metadata.loadCounter();
9✔
78
        log.info("Fetched Registry load counter: {}", targetCounter);
15✔
79
        // Store setupId on initial load
80
        if (metadata.setupId() != null && lastKnownSetupId == null) {
9!
81
            lastKnownSetupId = metadata.setupId();
×
82
            StatusController.get().setRegistrySetupId(metadata.setupId());
×
83
        }
84
        lastCommittedCounter = afterCounter;
6✔
85
        while (lastCommittedCounter < targetCounter) {
12!
86
            try {
87
                loadBatch(lastCommittedCounter, LoadingType.INITIAL);
×
88
                log.info("Initial load: loaded batch up to counter {}", lastCommittedCounter);
×
89
            } catch (Exception e) {
×
90
                log.info("Failed to load batch starting from counter {}", lastCommittedCounter);
×
91
                log.info("Failure reason: ", e);
×
92
                try {
93
                    Thread.sleep(RETRY_DELAY_JELLY);
×
94
                } catch (InterruptedException e2) {
×
95
                    throw new RuntimeException("Interrupted while waiting to retry loading batch.");
×
96
                }
×
97
            }
×
98
        }
99
        log.info("Initial load complete.");
9✔
100
    }
3✔
101

102
    /**
103
     * Check if the Registry has any new nanopubs. If it does, load them.
104
     * This method should be called periodically, and you should wait for it to finish before
105
     * calling it again.
106
     */
107
    public static void loadUpdates() {
108
        try {
109
            final var status = StatusController.get().getState();
×
110
            lastCommittedCounter = status.loadCounter;
×
111
            RegistryMetadata metadata = fetchRegistryMetadata();
×
112
            updateForwardingMetadata(metadata);
×
113
            TrustStateLoader.maybeUpdate(metadata.trustStateHash());
×
114
            long targetCounter = metadata.loadCounter();
×
115
            Long currentSetupId = metadata.setupId();
×
116

117
            // Detect reset via setupId change
118
            if (lastKnownSetupId != null && currentSetupId != null
×
119
                    && !lastKnownSetupId.equals(currentSetupId)) {
×
120
                log.warn("Registry reset detected: setupId {} -> {}", lastKnownSetupId, currentSetupId);
×
121
                performResync(currentSetupId);
×
122
                return;
×
123
            }
124
            // Detect reset via counter decrease (also covers first run after upgrade
125
            // where no setupId was persisted yet but the registry has already been reset)
126
            if (lastCommittedCounter > 0 && targetCounter >= 0
×
127
                    && targetCounter < lastCommittedCounter) {
128
                log.warn("Registry counter decreased {} -> {}, triggering resync",
×
129
                        lastCommittedCounter, targetCounter);
×
130
                performResync(currentSetupId);
×
131
                return;
×
132
            }
133

134
            // Update lastKnownSetupId on first successful poll
135
            if (currentSetupId != null && lastKnownSetupId == null) {
×
136
                if (lastCommittedCounter > 0) {
×
137
                    // Upgrade from a version without setupId tracking. The DB has data but
138
                    // we can't verify it matches the current registry. Force a resync.
139
                    log.warn("No stored setupId but DB has data (counter: {}). "
×
140
                            + "Forcing resync to ensure data consistency.", lastCommittedCounter);
×
141
                    performResync(currentSetupId);
×
142
                    return;
×
143
                }
144
                lastKnownSetupId = currentSetupId;
×
145
                StatusController.get().setRegistrySetupId(currentSetupId);
×
146
            }
147

148
            StatusController.get().setLoadingUpdates(status.loadCounter);
×
149
            if (lastCommittedCounter >= targetCounter) {
×
150
                StatusController.get().setReady();
×
151
                return;
×
152
            }
153
            loadBatch(lastCommittedCounter, LoadingType.UPDATE);
×
154
            log.info("Loaded {} update(s). Counter: {}, target was: {}",
×
155
                    lastCommittedCounter - status.loadCounter, lastCommittedCounter, targetCounter);
×
156
            if (lastCommittedCounter < targetCounter) {
×
157
                log.info("Warning: expected to load nanopubs up to (inclusive) counter " +
×
158
                        targetCounter + " based on the counter reported in Registry's headers, " +
159
                        "but loaded only up to {}.", lastCommittedCounter);
×
160
            }
161
        } catch (Exception e) {
×
162
            log.info("Failed to load updates. Current counter: {}", lastCommittedCounter);
×
163
            log.info("Failure Reason: ", e);
×
164
        } finally {
165
            try {
166
                StatusController.get().setReady();
×
167
            } catch (Exception e) {
×
168
                log.info("Update loader: failed to set status to READY.");
×
169
                log.info("Failure Reason: ", e);
×
170
            }
×
171
        }
172
    }
×
173

174
    /**
175
     * Re-stream all nanopubs from the registry after a reset is detected.
176
     * Existing nanopubs are skipped by NanopubLoader's per-repo dedup.
177
     *
178
     * @param newSetupId the new setup ID from the registry, or null if unknown
179
     */
180
    private static void performResync(Long newSetupId) {
181
        log.warn("Starting resync with registry. New setupId: {}", newSetupId);
×
182
        StatusController.get().setResetting();
×
183
        lastKnownSetupId = newSetupId;
×
184
        if (newSetupId != null) {
×
185
            StatusController.get().setRegistrySetupId(newSetupId);
×
186
        }
187
        StatusController.get().setLoadingInitial(-1);
×
188
        loadInitial(-1);
×
189
        StatusController.get().setReady();
×
190
        log.warn("Resync complete. Counter: {}", lastCommittedCounter);
×
191
    }
×
192

193
    /**
194
     * Load a batch of nanopubs from the Jelly stream.
195
     * <p>
196
     * The method requests the list of all nanopubs from the Registry and reads it for as long
197
     * as it can. If the stream is interrupted, the method will throw an exception, and you
198
     * can resume loading from the last known counter.
199
     *
200
     * @param afterCounter the last known nanopub counter to have been committed in the DB
201
     * @param type         the type of loading operation (initial or update)
202
     */
203
    static void loadBatch(long afterCounter, LoadingType type) {
204
        CloseableHttpResponse response;
205
        try {
206
            var request = new HttpGet(makeStreamFetchUrl(afterCounter));
×
207
            response = jellyStreamClient.execute(request);
×
208
        } catch (IOException e) {
×
209
            throw new RuntimeException("Failed to fetch Jelly stream from the Registry (I/O error).", e);
×
210
        }
×
211

212
        int httpStatus = response.getStatusLine().getStatusCode();
×
213
        if (httpStatus < 200 || httpStatus >= 300) {
×
214
            EntityUtils.consumeQuietly(response.getEntity());
×
215
            throw new RuntimeException("Jelly stream HTTP status is not 2xx: " + httpStatus + ".");
×
216
        }
217

218
        try (
219
                var is = response.getEntity().getContent();
×
220
                var npStream = NanopubStream.fromByteStream(is).getAsNanopubs()
×
221
        ) {
222
            AtomicLong checkpointTime = new AtomicLong(System.currentTimeMillis());
×
223
            AtomicLong checkpointCounter = new AtomicLong(lastCommittedCounter);
×
224
            AtomicLong lastSavedCounter = new AtomicLong(lastCommittedCounter);
×
225
            AtomicLong loaded = new AtomicLong(0L);
×
226

227
            npStream.forEach(m -> {
×
228
                if (!m.isSuccess()) throw new RuntimeException("Failed to load " +
×
229
                        "nanopub from Jelly stream. Last known counter: " + lastCommittedCounter,
230
                        m.getException()
×
231
                );
232
                if (m.getCounter() < lastCommittedCounter) {
×
233
                    throw new RuntimeException("Received a nanopub with a counter lower than " +
×
234
                            "the last known counter. Last known counter: " + lastCommittedCounter +
235
                            ", received counter: " + m.getCounter());
×
236
                }
237
                NanopubLoader.load(m.getNanopub(), m.getCounter());
×
238
                if (m.getCounter() % 10 == 0) {
×
239
                    // Save the committed counter only every 10 nanopubs to reduce DB load
240
                    saveCommittedCounter(type);
×
241
                    lastSavedCounter.set(m.getCounter());
×
242
                }
243
                lastCommittedCounter = m.getCounter();
×
244
                loaded.getAndIncrement();
×
245

246
                if (loaded.get() % 50 == 0) {
×
247
                    long currTime = System.currentTimeMillis();
×
248
                    double speed = 50 / ((currTime - checkpointTime.get()) / 1000.0);
×
249
                    log.info("Loading speed: " + String.format("%.2f", speed) +
×
250
                            " np/s. Counter: " + lastCommittedCounter);
251
                    checkpointTime.set(currTime);
×
252
                    checkpointCounter.set(lastCommittedCounter);
×
253
                }
254
            });
×
255
            // Make sure to save the last committed counter at the end of the batch
256
            if (lastCommittedCounter >= lastSavedCounter.get()) {
×
257
                saveCommittedCounter(type);
×
258
            }
259
        } catch (IOException e) {
×
260
            throw new RuntimeException("I/O error while reading the response Jelly stream.", e);
×
261
        } finally {
262
            try {
263
                response.close();
×
264
            } catch (IOException e) {
×
265
                log.info("Failed to close the Jelly stream response.");
×
266
            }
×
267
        }
268
    }
×
269

270
    /**
271
     * Save the last committed counter to the DB. Do this every N nanopubs to reduce DB load.
272
     * Remember to call this method at the end of the batch as well.
273
     *
274
     * @param type the type of loading operation (initial or update)
275
     */
276
    private static void saveCommittedCounter(LoadingType type) {
277
        try {
278
            if (type == LoadingType.INITIAL) {
×
279
                StatusController.get().setLoadingInitial(lastCommittedCounter);
×
280
            } else {
281
                StatusController.get().setLoadingUpdates(lastCommittedCounter);
×
282
            }
283
        } catch (Exception e) {
×
284
            throw new RuntimeException("Could not update the nanopub counter in DB", e);
×
285
        }
×
286
    }
×
287

288
    /**
289
     * Set the last known setup ID. Called from MainVerticle on startup to restore persisted state.
290
     *
291
     * @param setupId the setup ID to set, or null if not known
292
     */
293
    static void setLastKnownSetupId(Long setupId) {
294
        lastKnownSetupId = setupId;
×
295
    }
×
296

297
    /**
298
     * Update the cached metadata fields used for forwarding to clients.
299
     */
300
    private static void updateForwardingMetadata(RegistryMetadata metadata) {
301
        lastCoverageTypes = metadata.coverageTypes();
9✔
302
        lastCoverageAgents = metadata.coverageAgents();
9✔
303
        lastTestInstance = metadata.testInstance();
9✔
304
        lastNanopubCount = metadata.nanopubCount();
9✔
305
    }
3✔
306

307
    /**
308
     * Run a HEAD request to the Registry to fetch its current metadata (load counter and setup ID).
309
     *
310
     * @return the registry metadata
311
     */
312
    static RegistryMetadata fetchRegistryMetadata() {
313
        int tries = 0;
6✔
314
        RegistryMetadata metadata = null;
6✔
315
        while (metadata == null && tries < MAX_RETRIES_METADATA) {
15!
316
            try {
317
                metadata = fetchRegistryMetadataInner();
4✔
318
            } catch (Exception e) {
1✔
319
                tries++;
1✔
320
                log.info("Failed to fetch registry metadata, try " + tries +
5✔
321
                        ". Retrying in {}ms...", RETRY_DELAY_METADATA);
1✔
322
                log.info("Failure Reason: ", e);
4✔
323
                try {
324
                    Thread.sleep(RETRY_DELAY_METADATA);
2✔
325
                } catch (InterruptedException e2) {
×
326
                    throw new RuntimeException(
×
327
                            "Interrupted while waiting to retry fetching registry metadata.");
328
                }
1✔
329
            }
3✔
330
        }
331
        if (metadata == null) {
6!
332
            throw new RuntimeException("Failed to fetch registry metadata after " +
5✔
333
                    MAX_RETRIES_METADATA + " retries.");
334
        }
335
        return metadata;
4✔
336
    }
337

338
    /**
339
     * Inner logic for fetching the registry metadata via HEAD request.
340
     *
341
     * @return the registry metadata (load counter and setup ID)
342
     * @throws IOException if the HTTP request fails
343
     */
344
    private static RegistryMetadata fetchRegistryMetadataInner() throws IOException {
345
        var request = new HttpHead(registryUrl);
15✔
346
        try (var response = metadataClient.execute(request)) {
8✔
347
            int status = response.getStatusLine().getStatusCode();
8✔
348
            EntityUtils.consumeQuietly(response.getEntity());
6✔
349
            if (status < 200 || status >= 300) {
12!
350
                throw new RuntimeException("Registry metadata HTTP status is not 2xx: " +
×
351
                        status + ".");
352
            }
353

354
            // Check if the registry is ready
355
            var hStatus = response.getHeaders("Nanopub-Registry-Status");
8✔
356
            if (hStatus.length == 0) {
6!
357
                throw new RuntimeException("Registry did not return a Nanopub-Registry-Status header.");
×
358
            }
359
            if (!"ready".equals(hStatus[0].getValue()) && !"updating".equals(hStatus[0].getValue())) {
14!
360
                throw new RuntimeException("Registry is not in ready state.");
×
361
            }
362

363
            // Get the load counter
364
            var hCounter = response.getHeaders("Nanopub-Registry-Load-Counter");
8✔
365
            if (hCounter.length == 0) {
6!
366
                throw new RuntimeException("Registry did not return a Nanopub-Registry-Load-Counter header.");
×
367
            }
368
            long loadCounter = Long.parseLong(hCounter[0].getValue());
12✔
369

370
            // Get the setup ID (optional — older registries may not have it)
371
            Long setupId = null;
4✔
372
            var hSetupId = response.getHeaders("Nanopub-Registry-Setup-Id");
8✔
373
            if (hSetupId.length > 0) {
6!
374
                try {
375
                    setupId = Long.parseLong(hSetupId[0].getValue());
14✔
376
                } catch (NumberFormatException e) {
×
377
                    log.info("Could not parse Nanopub-Registry-Setup-Id header: {}", hSetupId[0].getValue());
×
378
                }
2✔
379
            }
380

381
            // Read metadata headers for forwarding to clients
382
            String coverageTypes = getHeaderValue(response, "Nanopub-Registry-Coverage-Types");
8✔
383
            String coverageAgents = getHeaderValue(response, "Nanopub-Registry-Coverage-Agents");
8✔
384
            String testInstance = getHeaderValue(response, "Nanopub-Registry-Test-Instance");
8✔
385
            String nanopubCount = getHeaderValue(response, "Nanopub-Registry-Nanopub-Count");
8✔
386
            // Optional — older registries (without trust calculation) won't set this header.
387
            String trustStateHash = getHeaderValue(response, "Nanopub-Registry-Trust-State-Hash");
8✔
388

389
            return new RegistryMetadata(loadCounter, setupId, coverageTypes, coverageAgents,
26✔
390
                    testInstance, nanopubCount, trustStateHash);
391
        }
392
    }
393

394
    private static String getHeaderValue(CloseableHttpResponse response, String name) {
395
        var headers = response.getHeaders(name);
8✔
396
        return headers.length > 0 ? headers[0].getValue() : null;
18!
397
    }
398

399
    /**
400
     * Construct the URL for fetching the Jelly stream.
401
     *
402
     * @param afterCounter the last known counter to have been committed in the DB
403
     * @return the URL for fetching the Jelly stream
404
     */
405
    private static String makeStreamFetchUrl(long afterCounter) {
406
        return registryUrl + "nanopubs.jelly?afterCounter=" + afterCounter;
×
407
    }
408
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc