• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

knowledgepixels / nanopub-query / 24979573170

27 Apr 2026 06:16AM UTC coverage: 56.165% (+0.2%) from 55.994%
24979573170

push

github

web-flow
Merge pull request #85 from knowledgepixels/feature/62-phase-3a-metrics

feat: expose spaces build/cycle metrics as Prometheus gauges (#62)

385 of 774 branches covered (49.74%)

Branch coverage included in aggregate %.

1109 of 1886 relevant lines covered (58.8%)

8.96 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.9
src/main/java/com/knowledgepixels/query/MetricsCollector.java
1
package com.knowledgepixels.query;
2

3
import io.micrometer.core.instrument.Gauge;
4
import io.micrometer.core.instrument.MeterRegistry;
5

6
import java.util.Map;
7
import java.util.Set;
8
import java.util.concurrent.ConcurrentHashMap;
9
import java.util.concurrent.atomic.AtomicInteger;
10

11
/**
12
 * Class to collect metrics for performance analysis.
13
 */
14
public final class MetricsCollector {
15

16
    private final AtomicInteger loadCounter = new AtomicInteger(0);
18✔
17
    private final AtomicInteger typeRepositoriesCounter = new AtomicInteger(0);
18✔
18
    private final AtomicInteger pubkeyRepositoriesCounter = new AtomicInteger(0);
18✔
19
    private final AtomicInteger fullRepositoriesCounter = new AtomicInteger(0);
18✔
20

21
    private final Map<StatusController.State, AtomicInteger> statusStates = new ConcurrentHashMap<>();
15✔
22

23
    /**
24
     * Creates new metrics collector object.
25
     *
26
     * @param meterRegistry The registry instance
27
     */
28
    public MetricsCollector(MeterRegistry meterRegistry) {
6✔
29
        // Numeric metrics
30
        Gauge.builder("registry.load.counter", loadCounter, AtomicInteger::get).register(meterRegistry);
24✔
31
        Gauge.builder("registry.type.repositories.counter", typeRepositoriesCounter, AtomicInteger::get).register(meterRegistry);
24✔
32
        Gauge.builder("registry.pubkey.repositories.counter", pubkeyRepositoriesCounter, AtomicInteger::get).register(meterRegistry);
24✔
33
        Gauge.builder("registry.full.repositories.counter", fullRepositoriesCounter, AtomicInteger::get).register(meterRegistry);
24✔
34

35
        // Circuit-breaker observability: expose both the raw counter and a boolean
36
        // "breaker active" flag. The boolean is redundant with counter >= threshold
37
        // but much cleaner to visualise in Grafana (the counter can saturate well
38
        // above the threshold during a sustained outage, which makes a single
39
        // "is the breaker tripped?" alert awkward to express over the raw value).
40
        Gauge.builder("registry.loader.consecutive_batch_failures",
12✔
41
                        () -> (double) JellyNanopubLoader.consecutiveBatchFailures)
×
42
                .description("Consecutive loadUpdates batches that threw an exception before succeeding")
6✔
43
                .register(meterRegistry);
6✔
44
        Gauge.builder("registry.loader.breaker_active",
12✔
45
                        () -> JellyNanopubLoader.consecutiveBatchFailures >= JellyNanopubLoader.BREAKER_THRESHOLD ? 1.0 : 0.0)
×
46
                .description("1 if the loader circuit breaker is tripped (consecutive failures >= threshold), 0 otherwise")
6✔
47
                .register(meterRegistry);
6✔
48
        // Liveness signal that works without log access: seconds since the last
49
        // non-exceptional loadUpdates return. Counts both "loaded a batch" and
50
        // "caught up, nothing to do" as progress. An instance whose value climbs
51
        // unbounded while peers stay low is stuck on something the other
52
        // gauges don't capture.
53
        Gauge.builder("registry.loader.last_successful_batch_age_seconds",
12✔
54
                        () -> {
55
                            long t = JellyNanopubLoader.lastSuccessfulBatchAtMs;
×
56
                            if (t == 0L) return 0.0;    // not started yet
×
57
                            return (System.currentTimeMillis() - t) / 1000.0;
×
58
                        })
59
                .description("Seconds since the last non-exceptional loadUpdates return (idle or loading)")
6✔
60
                .register(meterRegistry);
6✔
61

62
        // Status label metrics
63
        for (final var status : StatusController.State.values()) {
48✔
64
            AtomicInteger stateGauge = new AtomicInteger(0);
15✔
65
            statusStates.put(status, stateGauge);
18✔
66
            Gauge.builder("registry.server.status", stateGauge, AtomicInteger::get)
15✔
67
                    .description("Server status (1 if current)")
9✔
68
                    .tag("status", status.name())
9✔
69
                    .register(meterRegistry);
6✔
70
        }
71

72
        // Spaces / AuthorityResolver gauges. These read volatile fields kept
73
        // by AuthorityResolver — no SPARQL on the scrape path. Each lambda
74
        // re-fetches the singleton to match the lazy-init pattern used by
75
        // the rest of the codebase.
76
        Gauge.builder("registry.spaces.subjects.admin_ris",
12✔
77
                        () -> (double) AuthorityResolver.get().getLastSubjectTotals().adminRIs())
18✔
78
                .description("Distinct admin gen:RoleInstantiation subjects in the current space-state graph (last build/cycle observation)")
6✔
79
                .register(meterRegistry);
6✔
80
        Gauge.builder("registry.spaces.subjects.attachment_ras",
12✔
81
                        () -> (double) AuthorityResolver.get().getLastSubjectTotals().attachmentRAs())
×
82
                .description("Distinct gen:RoleAssignment subjects in the current space-state graph (last build/cycle observation)")
6✔
83
                .register(meterRegistry);
6✔
84
        Gauge.builder("registry.spaces.subjects.non_admin_ris",
12✔
85
                        () -> (double) AuthorityResolver.get().getLastSubjectTotals().nonAdminRIs())
×
86
                .description("Distinct non-admin gen:RoleInstantiation subjects in the current space-state graph (last build/cycle observation)")
6✔
87
                .register(meterRegistry);
6✔
88
        Gauge.builder("registry.spaces.delta.last_inserted_triples",
12✔
89
                        () -> (double) AuthorityResolver.get().getLastInsertedTriplesTotal())
×
90
                .description("Total inserted triples across all five tiers in the most recent full build or incremental cycle")
6✔
91
                .register(meterRegistry);
6✔
92
        Gauge.builder("registry.spaces.rebuild.last_duration_seconds",
12✔
93
                        () -> AuthorityResolver.get().getLastFullBuildDurationMs() / 1000.0)
×
94
                .description("Wall-clock duration of the most recent full space-state build")
6✔
95
                .register(meterRegistry);
6✔
96
        Gauge.builder("registry.spaces.cycle.last_duration_seconds",
12✔
97
                        () -> AuthorityResolver.get().getLastIncrementalCycleDurationMs() / 1000.0)
×
98
                .description("Wall-clock duration of the most recent incremental space-state cycle that did work")
6✔
99
                .register(meterRegistry);
6✔
100
        Gauge.builder("registry.spaces.processed_up_to_lag",
12✔
101
                        () -> (double) AuthorityResolver.get().getLastProcessedUpToLag())
15✔
102
                .description("currentLoadCounter - processedUpTo observed at the start of the most recent incremental cycle (0 after a full build)")
6✔
103
                .register(meterRegistry);
6✔
104
    }
3✔
105

106
    /**
107
     * Updates the metrics based on the current state of the system.
108
     */
109
    public void updateMetrics() {
110
        // Update numeric metrics
111
        loadCounter.set((int) StatusController.get().getState().loadCounter);
21✔
112
        // Request repository names once, to avoid multiple calls
113
        var repoNames = TripleStore.get().getRepositoryNames();
9✔
114
        if (repoNames == null) {
6!
115
            repoNames = Set.of();
×
116
        }
117
        typeRepositoriesCounter.set(
12✔
118
                (int) repoNames
119
                        .stream()
6✔
120
                        .filter(repo -> repo.startsWith("type_"))
15✔
121
                        .count()
6✔
122
        );
123
        pubkeyRepositoriesCounter.set(
12✔
124
                (int) repoNames
125
                        .stream()
6✔
126
                        .filter(repo -> repo.startsWith("pubkey_"))
15✔
127
                        .count()
6✔
128
        );
129
        fullRepositoriesCounter.set(repoNames.size());
15✔
130

131
        // Update status gauge
132
        final var currentStatus = StatusController.get().getState().state;
12✔
133
        for (final var status : StatusController.State.values()) {
48✔
134
            statusStates.get(status).set(status.equals(currentStatus) ? 1 : 0);
33!
135
        }
136
    }
3✔
137
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc