• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SpiNNakerManchester / JavaSpiNNaker / 7172

17 Oct 2025 11:50AM UTC coverage: 36.244% (+0.003%) from 36.241%
7172

push

github

web-flow
Merge pull request #1345 from SpiNNakerManchester/7.4.2

7.4.2

1912 of 5898 branches covered (32.42%)

Branch coverage included in aggregate %.

8962 of 24104 relevant lines covered (37.18%)

0.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.37
/SpiNNaker-allocserv/src/main/java/uk/ac/manchester/spinnaker/alloc/bmp/BMPController.java
1
/*
2
 * Copyright (c) 2021 The University of Manchester
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *     https://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
package uk.ac.manchester.spinnaker.alloc.bmp;
17

18
import static java.lang.String.format;
19
import static java.lang.Thread.currentThread;
20
import static java.lang.Thread.sleep;
21
import static java.util.Objects.requireNonNull;
22
import static org.slf4j.LoggerFactory.getLogger;
23
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.GET_SERIAL;
24
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.READ_BL;
25
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.READ_TEMP;
26
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.WRITE_BL;
27
import static uk.ac.manchester.spinnaker.alloc.model.JobState.DESTROYED;
28
import static uk.ac.manchester.spinnaker.alloc.model.JobState.QUEUED;
29

30
import java.io.IOException;
31
import java.lang.Thread.UncaughtExceptionHandler;
32
import java.time.Instant;
33
import java.util.ArrayList;
34
import java.util.Collection;
35
import java.util.HashMap;
36
import java.util.HashSet;
37
import java.util.LinkedList;
38
import java.util.List;
39
import java.util.Map;
40
import java.util.Optional;
41
import java.util.function.Consumer;
42
import java.util.stream.Collectors;
43

44
import jakarta.annotation.PostConstruct;
45

46
import org.slf4j.Logger;
47
import org.springframework.beans.factory.ObjectProvider;
48
import org.springframework.beans.factory.annotation.Autowired;
49
import org.springframework.jmx.export.annotation.ManagedResource;
50
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
51
import org.springframework.stereotype.Service;
52

53
import com.google.errorprone.annotations.RestrictedApi;
54
import com.google.errorprone.annotations.concurrent.GuardedBy;
55

56
import uk.ac.manchester.spinnaker.alloc.ForTestingOnly;
57
import uk.ac.manchester.spinnaker.alloc.ServiceMasterControl;
58
import uk.ac.manchester.spinnaker.alloc.SpallocProperties.AllocatorProperties;
59
import uk.ac.manchester.spinnaker.alloc.SpallocProperties.TxrxProperties;
60
import uk.ac.manchester.spinnaker.alloc.admin.ReportMailSender;
61
import uk.ac.manchester.spinnaker.alloc.allocator.AllocatorTask;
62
import uk.ac.manchester.spinnaker.alloc.allocator.Epochs;
63
import uk.ac.manchester.spinnaker.alloc.allocator.SpallocAPI;
64
import uk.ac.manchester.spinnaker.alloc.db.DatabaseAPI.Connection;
65
import uk.ac.manchester.spinnaker.alloc.db.DatabaseAwareBean;
66
import uk.ac.manchester.spinnaker.alloc.db.Row;
67
import uk.ac.manchester.spinnaker.alloc.model.Direction;
68
import uk.ac.manchester.spinnaker.alloc.model.JobState;
69
import uk.ac.manchester.spinnaker.machine.board.BMPBoard;
70
import uk.ac.manchester.spinnaker.machine.board.BMPCoords;
71
import uk.ac.manchester.spinnaker.machine.board.HasBMPLocation;
72
import uk.ac.manchester.spinnaker.messages.model.ADCInfo;
73
import uk.ac.manchester.spinnaker.messages.model.Blacklist;
74
import uk.ac.manchester.spinnaker.transceiver.ProcessException;
75
import uk.ac.manchester.spinnaker.transceiver.ProcessException.CallerProcessException;
76
import uk.ac.manchester.spinnaker.transceiver.ProcessException.PermanentProcessException;
77
import uk.ac.manchester.spinnaker.transceiver.ProcessException.TransientProcessException;
78
import uk.ac.manchester.spinnaker.transceiver.SpinnmanException;
79
import uk.ac.manchester.spinnaker.utils.UsedInJavadocOnly;
80

81
/**
82
 * Manages the BMPs of machines controlled by Spalloc.
83
 *
84
 * @author Donal Fellows
85
 */
86
@Service("bmpController")
87
@ManagedResource("Spalloc:type=BMPController,name=bmpController")
88
public class BMPController extends DatabaseAwareBean {
2✔
89
        private static final Logger log = getLogger(BMPController.class);
2✔
90

91
        @Autowired
92
        private SpallocAPI spallocCore;
93

94
        @Autowired
95
        private ServiceMasterControl serviceControl;
96

97
        @Autowired
98
        private Epochs epochs;
99

100
        @Autowired
101
        private TxrxProperties props;
102

103
        @Autowired
104
        private PhysicalSerialMapping phySerMap;
105

106
        @Autowired
107
        private AllocatorProperties allocProps;
108

109
        @Autowired
110
        private ReportMailSender emailSender;
111

112
        @Autowired
113
        private AllocatorTask allocator;
114

115
        private Object guard = new Object();
2✔
116

117
        @GuardedBy("guard")
118
        private ThreadPoolTaskScheduler scheduler;
119

120
        @GuardedBy("guard")
2✔
121
        private boolean emergencyStop = false;
122

123
        /**
124
         * Synchronizer for power request access to the database (as otherwise
125
         * deadlocks can occur when multiple transactions try to update the boards
126
         * table).
127
         */
128
        private Object powerDBSync = new Object();
2✔
129

130
        /**
131
         * Map from BMP ID to worker task that handles it.
132
         */
133
        private final Map<Integer, Worker> workers = new HashMap<>();
2✔
134

135
        /**
136
         * Factory for {@linkplain SpiNNakerControl controllers}. Only use via
137
         * {@link #controllerFactory}.
138
         */
139
        @Autowired
140
        private ObjectProvider<SpiNNakerControl> controllerFactoryBean;
141

142
        /**
143
         * Type-safe factory for {@linkplain SpiNNakerControl controllers}.
144
         */
145
        private SpiNNakerControl.Factory controllerFactory;
146

147
        @GuardedBy("this")
148
        private Throwable bmpProcessingException;
149

150
        private boolean useDummyComms = false;
2✔
151

152
        /**
153
         * An {@link UncaughtExceptionHandler}.
154
         *
155
         * @param thread
156
         *            The thread with the problem.
157
         * @param exception
158
         *            The exception that describes the problem.
159
         */
160
        @UsedInJavadocOnly(UncaughtExceptionHandler.class)
161
        private void handleException(Thread thread, Throwable exception) {
162
                log.error("uncaught exception in BMP worker {}", thread, exception);
×
163
        }
×
164

165
        // ----------------------------------------------------------------
166

167
        @PostConstruct
168
        private void init() {
169
                useDummyComms = serviceControl.isUseDummyBMP();
2✔
170
                synchronized (guard) {
2✔
171
                        // Set up scheduler
172
                        scheduler = new ThreadPoolTaskScheduler();
2✔
173
                        scheduler.setThreadGroupName("BMP");
2✔
174

175
                        controllerFactory = controllerFactoryBean::getObject;
2✔
176
                        allocator.setBMPController(this);
2✔
177

178
                        // Set the pool size to match the number of workers
179
                        makeWorkers();
2✔
180
                        if (workers.size() > 1) {
2!
181
                                scheduler.setPoolSize(workers.size());
×
182
                        }
183

184
                        // Launch the scheduler now it is all set up
185
                        scheduler.initialize();
2✔
186

187
                        // And now use the scheduler
188
                        for (var worker : workers.values()) {
2✔
189
                                scheduler.scheduleAtFixedRate(worker,
2✔
190
                                                allocProps.getPeriod());
2✔
191
                        }
2✔
192
                }
2✔
193
        }
2✔
194

195
        private void makeWorkers() {
196
                // Make workers
197
                try (var c = getConnection();
2✔
198
                                var getBmps = c.query(GET_ALL_BMPS);
2✔
199
                                var getBoards = c.query(GET_ALL_BMP_BOARDS)) {
2✔
200
                        var foundBmpIds = c.transaction(false, () -> getBmps.call(row -> {
2✔
201
                                var m = spallocCore.getMachine(row.getString("machine_name"),
2✔
202
                                                true);
203
                                var coords = new BMPCoords(row.getInt("cabinet"),
2✔
204
                                                row.getInt("frame"));
2✔
205
                                var boards = new HashMap<BMPBoard, String>();
2✔
206
                                var bmpId = row.getInt("bmp_id");
2✔
207
                                var worker = workers.get(bmpId);
2✔
208
                                if (worker != null) {
2✔
209
                                        return bmpId;
2✔
210
                                }
211
                                getBoards.call(r -> {
2✔
212
                                        boards.put(new BMPBoard(r.getInt("board_num")),
2✔
213
                                                        r.getString("address"));
2✔
214
                                        return null;
2✔
215
                                }, bmpId);
2✔
216
                                worker = new Worker(m.get(), coords, boards, bmpId);
2✔
217
                                workers.put(row.getInt("bmp_id"), worker);
2✔
218
                                return bmpId;
2✔
219
                        }));
220

221
                        // Remove any workers for BMPs that no longer exist
222
                        var toRemove = new HashSet<>(workers.keySet());
2✔
223
                        toRemove.removeAll(foundBmpIds);
2✔
224
                        for (var bmpId : toRemove) {
2!
225
                                log.info("Removing worker for BMP {}", bmpId);
×
226
                                workers.remove(bmpId).shutdown();
×
227
                        }
×
228
                }
229
        }
2✔
230

231
        /**
232
         * Trigger the execution of the workers for the given BMPs now.
233
         *
234
         * @param bmps
235
         *            A list of BMPs that have changed.
236
         */
237
        public void triggerSearch(Collection<Integer> bmps) {
238
                synchronized (guard) {
2✔
239
                        if (emergencyStop) {
2!
240
                                log.warn("Emergency stop; not triggering workers");
×
241
                                return;
×
242
                        }
243
                        for (var b : bmps) {
2✔
244
                                var worker = workers.get(b);
2✔
245
                                if (worker != null) {
2!
246
                                        scheduler.schedule(() -> worker.run(), Instant.now());
2✔
247
                                } else {
248
                                        log.error("Could not find worker for BMP {}", b);
×
249
                                }
250
                        }
2✔
251
                }
2✔
252
        }
2✔
253

254
        /**
255
         * Stops execution immediately.
256
         */
257
        public void emergencyStop() {
258
                synchronized (guard) {
2✔
259
                        emergencyStop = true;
2✔
260
                        scheduler.shutdown();
2✔
261
                        for (var worker : workers.values()) {
2✔
262
                                try {
263
                                        worker.getControl().powerOff(worker.boards.keySet());
2✔
264
                                } catch (Throwable e) {
×
265
                                        log.warn("Error when stopping", e);
×
266
                                }
2✔
267
                        }
2✔
268
                        execute(conn -> {
2✔
269
                                try (var setAllOff = conn.update(SET_ALL_BOARDS_OFF)) {
2✔
270
                                        setAllOff.call();
2✔
271
                                }
272
                                return null;
2✔
273
                        });
274
                }
2✔
275
        }
2✔
276

277
        /** An action that may throw any of a range of exceptions. */
278
        private interface ThrowingAction {
279
                void act() throws ProcessException, IOException, InterruptedException;
280
        }
281

282
        private abstract class Request {
283

284
                /**
285
                 * The BMP ID that this request is for.
286
                 */
287
                final int bmpId;
288

289
                private int numTries = 0;
2✔
290

291
                Request(int bmpId) {
2✔
292
                        this.bmpId = bmpId;
2✔
293
                }
2✔
294

295
                /**
296
                 * @return Whether this request may be repeated.
297
                 */
298
                boolean isRepeat() {
299
                        return numTries < props.getPowerAttempts();
2!
300
                }
301

302
                /**
303
                 * Basic machinery for handling exceptions that arise while performing a
304
                 * BMP action. Runs on a thread that may touch a BMP directly, but which
305
                 * may not touch the database.
306
                 * <p>
307
                 * Only subclasses should use this!
308
                 *
309
                 * @param body
310
                 *            What to attempt.
311
                 * @param onFailure
312
                 *            What to do on failure.
313
                 * @param onServiceRemove
314
                 *            If the exception looks serious, call this to trigger a
315
                 *            board being taken out of service.
316
                 * @return Whether to stop the retry loop.
317
                 * @throws InterruptedException
318
                 *             If interrupted.
319
                 */
320
                final boolean bmpAction(ThrowingAction body,
321
                                Consumer<Exception> onFailure,
322
                                Consumer<PermanentProcessException> onServiceRemove)
323
                                throws InterruptedException {
324
                        boolean isLastTry = numTries++ >= props.getPowerAttempts();
2!
325
                        Exception exn;
326
                        try {
327
                                body.act();
2✔
328
                                // Exit the retry loop (up the stack); the requests all worked
329
                                return true;
2✔
330
                        } catch (InterruptedException e) {
×
331
                                /*
332
                                 * We were interrupted! This happens when we're shutting down.
333
                                 * Log (because we're in an inconsistent state) and rethrow so
334
                                 * that the outside gets to clean up.
335
                                 */
336
                                log.error("Requests failed on BMP {} because of "
×
337
                                                + "interruption", bmpId, e);
×
338
                                currentThread().interrupt();
×
339
                                throw e;
×
340
                        } catch (TransientProcessException e) {
×
341
                                if (!isLastTry) {
×
342
                                        // Log somewhat gently; we *might* be able to recover...
343
                                        log.warn("Retrying requests on BMP {} after {}: {}",
×
344
                                                        bmpId, props.getProbeInterval(),
×
345
                                                        e.getMessage());
×
346
                                        // Ask for a retry
347
                                        return false;
×
348
                                }
349
                                exn = e;
×
350
                                log.error("Requests failed on BMP {}", bmpId, e);
×
351
                        } catch (PermanentProcessException e) {
×
352
                                log.error("BMP {} on {} is unreachable", e.source, bmpId, e);
×
353
                                onServiceRemove.accept(e);
×
354
                                exn = e;
×
355
                        } catch (CallerProcessException e) {
×
356
                                // This is probably a software bug
357
                                log.error("SW bug talking to BMP {}", bmpId, e);
×
358
                                exn = e;
×
359
                        } catch (ProcessException | IOException | RuntimeException e) {
2✔
360
                                log.error("Requests failed on BMP {}", bmpId, e);
2✔
361
                                exn = e;
2✔
362
                        }
×
363
                        /*
364
                         * Common permanent failure handling case; arrange for taking a
365
                         * board out of service, mark a request as failed, and stop the
366
                         * retry loop.
367
                         */
368
                        onFailure.accept(exn);
2✔
369
                        return true;
2✔
370
                }
371

372
                /**
373
                 * Add a report to the database of a problem with a board.
374
                 *
375
                 * @param sql
376
                 *            How to talk to the DB
377
                 * @param boardId
378
                 *            Which board has the problem
379
                 * @param jobId
380
                 *            What job was associated with the problem (if any)
381
                 * @param msg
382
                 *            Information about what the problem was
383
                 */
384
                final void addBoardReport(Connection c, int boardId, Integer jobId,
385
                                String msg) {
386
                        try (var getUser = c.query(GET_USER_DETAILS_BY_NAME);
×
387
                                        var insertBoardReport = c.update(INSERT_BOARD_REPORT)) {
×
388
                                getUser.call1(row -> row.getInt("user_id"),
×
389
                                                allocProps.getSystemReportUser()).ifPresent(
×
390
                                                                userId -> insertBoardReport.call(
×
391
                                                                                boardId, jobId,        msg, userId));
×
392
                        }
393
                }
×
394

395
                /**
396
                 * Marks a board as actually dead, and requests we send email about it.
397
                 *
398
                 * @param sql
399
                 *            How to talk to the DB
400
                 * @param boardId
401
                 *            Which board has the problem
402
                 * @param msg
403
                 *            Information about what the problem was
404
                 * @return Whether we've successfully done a change.
405
                 */
406
                final void markBoardAsDead(Connection c, int boardId, String msg) {
407
                        try (var setFunctioning = c.update(SET_FUNCTIONING_FIELD);
×
408
                                        var findBoardById = c.query(FIND_BOARD_BY_ID)) {
×
409
                                boolean result = setFunctioning.call(false, boardId) > 0;
×
410
                                if (result) {
×
411
                                        findBoardById.call1(row -> {
×
412
                                                var ser = row.getString("physical_serial_id");
×
413
                                                if (ser == null) {
×
414
                                                        ser = "<UNKNOWN>";
×
415
                                                }
416
                                                var fullMessage = format(
×
417
                                                                "Marked board at %d,%d,%d of %s (serial: %s) "
418
                                                                                + "as dead: %s",
419
                                                                row.getInt("x"), row.getInt("y"),
×
420
                                                                row.getInt("z"), row.getString("machine_name"),
×
421
                                                                ser, msg);
422
                                                emailSender.sendServiceMail(fullMessage);
×
423
                                                return null;
×
424
                                        }, boardId);
×
425
                                }
426
                        }
427
                }
×
428

429
                boolean processRequest(SpiNNakerControl control) {
430
                        while (isRepeat()) {
2!
431
                                try {
432
                                        if (tryProcessRequest(control)) {
2!
433
                                                return true;
2✔
434
                                        }
435
                                        sleep(props.getProbeInterval().toMillis());
×
436
                                } catch (InterruptedException e) {
×
437
                                        // If this happens, just cancel the transaction;
438
                                        // when we come back, all things will be redone.
439
                                        throw new RuntimeException(e);
×
440
                                }
×
441
                        }
442
                        return false;
×
443
                }
444

445
                abstract boolean tryProcessRequest(SpiNNakerControl control)
446
                                throws InterruptedException;
447
        }
448

449
        /**
450
         * Describes a request to modify the power status of a collection of boards.
451
         * The boards must be on a single machine and must all be assigned to a
452
         * single job.
453
         * <p>
454
         * This is the message that is sent from the main thread to the per-BMP
455
         * worker threads.
456
         *
457
         * @author Donal Fellows
458
         */
459
        private final class PowerRequest extends Request {
460
                private final List<BMPBoard> powerOnBoards = new ArrayList<>();
2✔
461

462
                private final List<BMPBoard> powerOffBoards = new ArrayList<>();
2✔
463

464
                private final List<Link> linkRequests = new ArrayList<>();
2✔
465

466
                private final int jobId;
467

468
                private final JobState from;
469

470
                private final JobState to;
471

472
                private final List<Integer> changeIds = new ArrayList<>();
2✔
473

474
                private final Map<Integer, Integer> boardToId = new HashMap<>();
2✔
475

476
                /**
477
                 * Create a request.
478
                 *
479
                 * @param sql
480
                 *            How to access the database.
481
                 * @param machine
482
                 *            What machine are the boards on? <em>Must not</em> be
483
                 *            {@code null}.
484
                 * @param powerOn
485
                 *            What boards (by DB ID) are to be powered on? May be
486
                 *            {@code null}; that's equivalent to the empty list.
487
                 * @param powerOff
488
                 *            What boards (by DB ID) are to be powered off? May be
489
                 *            {@code null}; that's equivalent to the empty list.
490
                 * @param links
491
                 *            Any link power control requests. By default, links are on
492
                 *            if their board is on and they are connected; it is
493
                 *            <em>useful and relevant</em> to modify the power state of
494
                 *            links on the periphery of an allocation. May be
495
                 *            {@code null}; that's equivalent to the empty list.
496
                 * @param jobId
497
                 *            For what job is this?
498
                 * @param from
499
                 *            What state is the job moving from?
500
                 * @param to
501
                 *            What state is the job moving to?
502
                 * @param changeIds
503
                 *            The DB ids that describe the change, so we can update
504
                 *            those records.
505
                 * @param idToBoard
506
                 *            How to get the physical ID of a board from its database ID
507
                 */
508
                PowerRequest(int bmpId, int jobId, JobState from, JobState to,
509
                                List<PowerChange> powerChanges) {
2✔
510
                        super(bmpId);
2✔
511
                        for (var change : powerChanges) {
2✔
512
                                if (change.power) {
2✔
513
                                        powerOnBoards.add(new BMPBoard(change.boardNum));
2✔
514
                                } else {
515
                                        powerOffBoards.add(new BMPBoard(change.boardNum));
2✔
516
                                }
517
                                change.offLinks.stream().forEach(link ->
2✔
518
                                                linkRequests.add(new Link(change.boardNum, link)));
2✔
519
                                changeIds.add(change.changeId);
2✔
520
                                boardToId.put(change.boardNum, change.boardId);
2✔
521
                        }
2✔
522
                        this.jobId = jobId;
2✔
523
                        this.from = from;
2✔
524
                        this.to = to;
2✔
525
                }
2✔
526

527
                /**
528
                 * Change the power state of boards in this request.
529
                 *
530
                 * @param controllers
531
                 *            How to actually communicate with the machine
532
                 * @throws ProcessException
533
                 *             If the transceiver chokes
534
                 * @throws InterruptedException
535
                 *             If interrupted
536
                 * @throws IOException
537
                 *             If network I/O fails
538
                 */
539
                void changeBoardPowerState(SpiNNakerControl controller)
540
                                throws ProcessException, InterruptedException, IOException {
541

542
                        // Send any power on commands
543
                        if (!powerOnBoards.isEmpty()) {
2✔
544
                                controller.powerOnAndCheck(powerOnBoards);
2✔
545
                        }
546

547
                        // Process perimeter link requests next
548
                        for (var linkReq : linkRequests) {
2✔
549
                                // Set the link state, as required
550
                                controller.setLinkOff(linkReq);
2✔
551
                        }
2✔
552

553
                        // Finally send any power off commands
554
                        if (!powerOffBoards.isEmpty()) {
2✔
555
                                controller.powerOff(powerOffBoards);
2✔
556
                        }
557
                }
2✔
558

559
                /**
560
                 * Handles the database changes after a set of changes to a BMP complete
561
                 * successfully. We will move the job to the state it supposed to be in.
562
                 *
563
                 * @param sql
564
                 *            How to access the DB
565
                 * @return Whether the state of boards or jobs has changed.
566
                 */
567
                private void done() {
568
                        try (var c = getConnection();
2✔
569
                                        var deallocateBoards = c.update(DEALLOCATE_BMP_BOARDS_JOB);
2✔
570
                                        var deleteChange = c.update(FINISHED_PENDING);
2✔
571
                                        var setBoardPowerOn = c.update(SET_BOARD_POWER_ON);
2✔
572
                                        var setBoardPowerOff = c.update(SET_BOARD_POWER_OFF)) {
2✔
573
                                c.transaction(() -> {
2✔
574
                                        int turnedOn = powerOnBoards.stream().map(this::getBoardId)
2✔
575
                                                        .mapToInt(setBoardPowerOn::call).sum();
2✔
576
                                        int turnedOff =
2✔
577
                                                        powerOffBoards.stream().map(this::getBoardId)
2✔
578
                                                                        .mapToInt(setBoardPowerOff::call).sum();
2✔
579

580
                                        if (to == DESTROYED || to == QUEUED) {
2!
581
                                                /*
582
                                                 * Need to mark the boards as not allocated; can't do
583
                                                 * that until they've been switched off.
584
                                                 */
585
                                                deallocateBoards.call(jobId, bmpId);
2✔
586
                                        }
587
                                        int completed = changeIds.stream().mapToInt(
2✔
588
                                                        deleteChange::call).sum();
2✔
589

590
                                        log.debug("BMP ACTION SUCCEEDED ({}:{}->{}): on:{} off:{} "
2✔
591
                                                        + "completed: {}",
592
                                                        jobId, from, to, turnedOn, turnedOff, completed);
2✔
593
                                });
2✔
594
                        }
595

596
                        // Tell the allocator something has happened
597
                        allocator.updateJob(jobId, from, to);
2✔
598
                }
2✔
599

600
                /**
601
                 * Handles the database changes after a set of changes to a BMP complete
602
                 * with a failure. We will roll back the job state to what it was
603
                 * before.
604
                 *
605
                 * @param sql
606
                 *            How to access the DB
607
                 * @return Whether the state of boards or jobs has changed.
608
                 */
609
                private void failed() {
610
                        try (var c = getConnection();
2✔
611
                                        var deallocateBoards = c.update(DEALLOCATE_BMP_BOARDS_JOB);
2✔
612
                                        var deleteChange = c.update(FINISHED_PENDING);
2✔
613
                                        var errorChange = c.update(ERROR_PENDING);
2✔
614
                                        var setBoardPowerOff = c.update(SET_BOARD_POWER_OFF)) {
2✔
615
                                c.transaction(() -> {
2✔
616
                                        // We should mark the boards as off
617
                                        int turnedOff =
2✔
618
                                                        powerOffBoards.stream().map(this::getBoardId)
2✔
619
                                                                        .mapToInt(setBoardPowerOff::call).sum();
2✔
620

621
                                        // ... even those that we should be powering on ...
622
                                        turnedOff +=
2✔
623
                                                        powerOnBoards.stream().map(this::getBoardId)
2✔
624
                                                                        .mapToInt(setBoardPowerOff::call).sum();
2✔
625

626
                                        // If we are going to queued or destroyed, we can just
627
                                        // ignore the error as we will reallocate anyway
628
                                        int completed = 0;
2✔
629
                                        if (to == DESTROYED || to == QUEUED) {
2!
630
                                                // Need to mark the boards as not allocated; slightly
631
                                                // dodgy since they might still be on, but not a lot
632
                                                // we can do about it!
633
                                                deallocateBoards.call(jobId, bmpId);
2✔
634
                                                completed = changeIds.stream().mapToInt(
2✔
635
                                                                deleteChange::call).sum();
2✔
636
                                        } else {
637

638
                                                // If we are going to READY, we must mark changes as
639
                                                // failed to make sure we don't think we are done!
640
                                                completed = changeIds.stream().mapToInt(
2✔
641
                                                                errorChange::call).sum();
2✔
642
                                        }
643

644
                                        log.debug(
2✔
645
                                                        "BMP ACTION FAILED on {} ({}:{}->{}) off:{} "
646
                                                        + " completed {}",
647
                                                        bmpId, jobId, from, to, turnedOff, completed);
2✔
648
                                });
2✔
649
                        }
650
                        // Tell the allocator something has happened
651
                        allocator.updateJob(jobId, from, to);
2✔
652
                }
2✔
653

654
                /**
655
                 * Process an action to power on or off a set of boards. Runs on a
656
                 * thread that may touch a BMP directly, but which may not touch the
657
                 * database.
658
                 *
659
                 * @param controller
660
                 *            How to actually reach the BMPs.
661
                 * @return Whether this action has "succeeded" and shouldn't be retried.
662
                 * @throws InterruptedException
663
                 *             If interrupted.
664
                 */
665
                @Override
666
                boolean tryProcessRequest(SpiNNakerControl controller)
667
                                throws InterruptedException {
668
                        boolean ok = bmpAction(() -> {
2✔
669
                                changeBoardPowerState(controller);
2✔
670
                                // We want to ensure the lead board is alive
671
                                controller.ping(powerOnBoards);
2✔
672
                                synchronized (powerDBSync) {
2✔
673
                                        done();
2✔
674
                                }
2✔
675
                        }, e -> {
2✔
676
                                synchronized (powerDBSync) {
2✔
677
                                        failed();
2✔
678
                                }
2✔
679
                                synchronized (BMPController.this) {
2✔
680
                                        bmpProcessingException = e;
2✔
681
                                }
2✔
682
                        }, ppe -> {
2✔
683
                                synchronized (powerDBSync) {
×
684
                                        badBoard(ppe);
×
685
                                }
×
686
                        });
×
687
                        return ok;
2✔
688
                }
689

690
                @Override
691
                public String toString() {
692
                        var sb = new StringBuilder("PowerRequest(for=")
×
693
                                        .append(bmpId);
×
694
                        sb.append(";on=").append(powerOnBoards);
×
695
                        sb.append(",off=").append(powerOffBoards);
×
696
                        sb.append(",links=").append(linkRequests);
×
697
                        return sb.append(")").toString();
×
698
                }
699

700
                private static final String REPORT_MSG =
701
                                "board was not reachable when trying to power it: ";
702

703
                /**
704
                 * When a BMP is unroutable, we must tell the alloc engine to pick
705
                 * somewhere else, and we should mark the board as out of service too;
706
                 * it's never going to work so taking it out right away is the only sane
707
                 * plan. We also need to nuke the planned changes. Retrying is bad.
708
                 *
709
                 * @param failure
710
                 *            The failure message.
711
                 * @return Whether the state of boards or jobs has changed.
712
                 */
713
                private void badBoard(ProcessException failure) {
714
                        try (var c = getConnection()) {
×
715
                                c.transaction(() -> {
×
716
                                        getBoardId(failure.source).ifPresent(boardId -> {
×
717
                                                // Mark the board as dead right now
718
                                                markBoardAsDead(c, boardId, REPORT_MSG + failure);
×
719
                                                // Add a report if we can
720
                                                addBoardReport(c, boardId, jobId, REPORT_MSG + failure);
×
721
                                        });
×
722
                                });
×
723
                        }
724
                }
×
725

726
                /**
727
                 * Given a board address, get the ID that it corresponds to. Reverses
728
                 * {@link #idToBoard}.
729
                 *
730
                 * @param addr
731
                 *            The board address.
732
                 * @return The ID, if one can be found.
733
                 */
734
                private Optional<Integer> getBoardId(HasBMPLocation addr) {
735
                        return Optional.ofNullable(boardToId.get(addr.getBoard()));
×
736
                }
737

738
                private Integer getBoardId(BMPBoard board) {
739
                        return boardToId.get(board.board);
2✔
740
                }
741
        }
742

743
        /**
744
         * A request to read or write information on a BMP. Includes blacklists,
745
         * serial numbers, temperature data, etc.
746
         *
747
         * @author Donal Fellows
748
         */
749
        private final class BoardRequest extends Request {
750
                private final NonBootOperation op;
751

752
                private final int opId;
753

754
                private final int boardId;
755

756
                private final BMPCoords bmp;
757

758
                private final BMPBoard board;
759

760
                private final String bmpSerialId;
761

762
                private final Blacklist blacklist;
763

764
                private final int machineId;
765

766
                private BoardRequest(int bmpId, NonBootOperation op, Row row) {
2✔
767
                        super(bmpId);
2✔
768
                        this.op = op;
2✔
769
                        opId = row.getInt("op_id");
2✔
770
                        boardId = row.getInt("board_id");
2✔
771
                        bmp = new BMPCoords(row.getInt("cabinet"), row.getInt("frame"));
2✔
772
                        board = new BMPBoard(row.getInt("board_num"));
2✔
773
                        if (op == WRITE_BL) {
2✔
774
                                blacklist = row.getSerial("data", Blacklist.class);
2✔
775
                        } else {
776
                                blacklist = null;
2✔
777
                        }
778
                        bmpSerialId = row.getString("bmp_serial_id");
2✔
779
                        machineId = row.getInt("machine_id");
2✔
780
                }
2✔
781

782
                /** The serial number actually read from the board. */
783
                private String readSerial;
784

785
                /**
786
                 * Access the DB to store the serial number information that we
787
                 * retrieved. A transaction should already be held.
788
                 *
789
                 * @param c
790
                 *            How to access the DB
791
                 * @return Whether we've changed anything
792
                 */
793
                private void recordSerialIds(Connection c) {
794
                        try (var setBoardSerialIds = c.update(SET_BOARD_SERIAL_IDS)) {
2✔
795
                                setBoardSerialIds.call(boardId, readSerial,
2✔
796
                                                phySerMap.getPhysicalId(readSerial));
2✔
797
                        }
798
                }
2✔
799

800
                /**
801
                 * Access the DB to mark the read request as successful and store the
802
                 * blacklist that was read. A transaction should already be held.
803
                 *
804
                 * @param c
805
                 *            How to access the DB
806
                 * @param readBlacklist
807
                 *            The blacklist that was read
808
                 * @return Whether we've changed anything
809
                 */
810
                private void doneReadBlacklist(Connection c, Blacklist readBlacklist) {
811
                        try (var completed = c.update(COMPLETED_BOARD_INFO_READ)) {
2✔
812
                                log.debug("Completing blacklist read opId {}", opId);
2✔
813
                                completed.call(readBlacklist, opId);
2✔
814
                        }
815
                }
2✔
816

817
                /**
818
                 * Access the DB to mark the write request as successful. A transaction
819
                 * should already be held.
820
                 *
821
                 * @param c
822
                 *            How to access the DB
823
                 * @return Whether we've changed anything
824
                 */
825
                private void doneWriteBlacklist(Connection c) {
826
                        try (var completed = c.update(COMPLETED_BLACKLIST_WRITE)) {
2✔
827
                                completed.call(opId);
2✔
828
                        }
829
                }
2✔
830

831
                /**
832
                 * Access the DB to mark the read request as successful; the actual
833
                 * store of the serial data is elsewhere
834
                 * ({@link #recordSerialIds(Connection)}). A transaction should already
835
                 * be held.
836
                 *
837
                 * @param c
838
                 *            How to access the DB
839
                 * @return Whether we've changed anything
840
                 */
841
                private void doneReadSerial(Connection c) {
842
                        try (var completed = c.update(COMPLETED_GET_SERIAL_REQ)) {
2✔
843
                                completed.call(opId);
2✔
844
                        }
845
                }
2✔
846

847
                /**
848
                 * Access the DB to mark the read request as successful and store the
849
                 * ADC info that was read. A transaction should be held.
850
                 *
851
                 * @param c
852
                 *            The database connection.
853
                 */
854
                private void doneReadTemps(Connection c, ADCInfo adcInfo) {
855
                        try (var completed = c.update(COMPLETED_BOARD_INFO_READ)) {
×
856
                                log.debug("Completing temperature read opId {}", opId);
×
857
                                completed.call(adcInfo, opId);
×
858
                        }
859
                }
×
860

861
                /**
862
                 * Access the DB to mark the request as failed and store the exception.
863
                 *
864
                 * @param exn
865
                 *            The exception that caused the failure.
866
                 * @return Whether we've changed anything
867
                 */
868
                private void failed(Exception exn) {
869
                        try (var c = getConnection();
×
870
                                        var failed = c.update(FAILED_BLACKLIST_OP)) {
×
871
                                c.transaction(() -> failed.call(exn, opId));
×
872
                        }
873
                }
×
874

875
                private static final String REPORT_MSG =
876
                                "board was not reachable when trying to access its blacklist: ";
877

878
                /**
879
                 * Access the DB to mark a board as out of service.
880
                 *
881
                 * @param exn
882
                 *            The exception that caused the failure.
883
                 * @return Whether we've changed anything
884
                 */
885
                void takeOutOfService(Exception exn) {
886
                        try (var c = getConnection()) {
×
887
                                c.transaction(() -> {
×
888
                                        addBoardReport(c, boardId, null, REPORT_MSG + exn);
×
889
                                        markBoardAsDead(c, boardId, REPORT_MSG + exn);
×
890
                                });
×
891
                        }
892
                }
×
893

894
                /**
895
                 * Process an action to work with a blacklist or serial number. Runs on
896
                 * a thread that may touch a BMP directly, but which may not touch the
897
                 * database.
898
                 *
899
                 * @param controller
900
                 *            How to actually reach the BMP.
901
                 * @return Whether this action has "succeeded" and shouldn't be retried.
902
                 * @throws InterruptedException
903
                 *             If interrupted.
904
                 */
905
                @Override
906
                boolean tryProcessRequest(SpiNNakerControl controller)
907
                                throws InterruptedException {
908
                        return bmpAction(() -> {
2✔
909
                                switch (op) {
2!
910
                                case WRITE_BL:
911
                                        writeBlacklist(controller);
2✔
912
                                        break;
2✔
913
                                case READ_BL:
914
                                        readBlacklist(controller);
2✔
915
                                        break;
2✔
916
                                case GET_SERIAL:
917
                                        readSerial(controller);
2✔
918
                                        break;
2✔
919
                                case READ_TEMP:
920
                                        readTemps(controller);
×
921
                                        break;
×
922
                                default:
923
                                        throw new IllegalArgumentException();
×
924
                                }
925
                                epochs.blacklistChanged(boardId);
2✔
926
                                epochs.machineChanged(machineId);
2✔
927
                        }, e -> {
2✔
928
                                failed(e);
×
929
                                epochs.blacklistChanged(boardId);
×
930
                                epochs.machineChanged(machineId);
×
931
                        }, ppe -> {
×
932
                                takeOutOfService(ppe);
×
933
                        });
×
934
                }
935

936
                /**
937
                 * Process an action to read a blacklist.
938
                 *
939
                 * @param controller
940
                 *            How to actually reach the BMP.
941
                 * @throws InterruptedException
942
                 *             If interrupted.
943
                 * @throws IOException
944
                 *             If the network is unhappy.
945
                 * @throws ProcessException
946
                 *             If the BMP rejects a message.
947
                 */
948
                private void readBlacklist(SpiNNakerControl controller)
949
                                throws InterruptedException, ProcessException, IOException {
950
                        readSerial = controller.readSerial(board);
2✔
951
                        if (bmpSerialId != null && !bmpSerialId.equals(readSerial)) {
2!
952
                                /*
953
                                 * Doesn't match; WARN but keep going; hardware may just be
954
                                 * remapped behind our back.
955
                                 */
956
                                log.warn(
×
957
                                                "blacklist read mismatch: expected serial ID '{}' "
958
                                                                + "not equal to actual serial ID '{}'",
959
                                                bmpSerialId, readSerial);
960
                        }
961
                        var readBlacklist = controller.readBlacklist(board);
2✔
962
                        try (var c = getConnection()) {
2✔
963
                                c.transaction(() -> {
2✔
964
                                        recordSerialIds(c);
2✔
965
                                        doneReadBlacklist(c, readBlacklist);
2✔
966
                                });
2✔
967
                        }
968
                }
2✔
969

970
                /**
971
                 * Process an action to write a blacklist.
972
                 *
973
                 * @param controller
974
                 *            How to actually reach the BMP.
975
                 * @throws InterruptedException
976
                 *             If interrupted.
977
                 * @throws IOException
978
                 *             If the network is unhappy.
979
                 * @throws ProcessException
980
                 *             If the BMP rejects a message.
981
                 * @throws IllegalStateException
982
                 *             If the operation is applied to a board other than the one
983
                 *             that it is expected to apply to.
984
                 */
985
                private void writeBlacklist(SpiNNakerControl controller)
986
                                throws InterruptedException, ProcessException, IOException {
987
                        readSerial = controller.readSerial(board);
2✔
988
                        if (bmpSerialId != null && !bmpSerialId.equals(readSerial)) {
2!
989
                                // Doesn't match, so REALLY unsafe to keep going!
990
                                throw new IllegalStateException(format(
×
991
                                                "aborting blacklist write: expected serial ID '%s' "
992
                                                                + "not equal to actual serial ID '%s'",
993
                                                bmpSerialId, readSerial));
994
                        }
995
                        controller.writeBlacklist(board, requireNonNull(blacklist));
2✔
996
                        try (var c = getConnection()) {
2✔
997
                                c.transaction(() -> doneWriteBlacklist(c));
2✔
998
                        }
999
                }
2✔
1000

1001
                /**
1002
                 * Process an action to read the serial number from a BMP.
1003
                 *
1004
                 * @param controller
1005
                 *            How to actually reach the BMP.
1006
                 * @throws InterruptedException
1007
                 *             If interrupted.
1008
                 * @throws IOException
1009
                 *             If the network is unhappy
1010
                 * @throws ProcessException
1011
                 *             If the BMP rejects a message.
1012
                 */
1013
                private void readSerial(SpiNNakerControl controller)
1014
                                throws InterruptedException, ProcessException, IOException {
1015
                        readSerial = controller.readSerial(board);
2✔
1016
                        try (var c = getConnection()) {
2✔
1017
                                c.transaction(() -> {
2✔
1018
                                        recordSerialIds(c);
2✔
1019
                                        doneReadSerial(c);
2✔
1020
                                });
2✔
1021
                        }
1022
                }
2✔
1023

1024
                /**
1025
                 * Process an action to read some temperature data.
1026
                 *
1027
                 * @param controller
1028
                 *            How to actually reach the BMP.
1029
                 * @throws InterruptedException
1030
                 *             If interrupted.
1031
                 * @throws IOException
1032
                 *             If the network is unhappy.
1033
                 * @throws ProcessException
1034
                 *             If the BMP rejects a message.
1035
                 */
1036
                private void readTemps(SpiNNakerControl controller)
1037
                                throws InterruptedException, ProcessException, IOException {
1038
                        var adcInfo = controller.readTemp(board);
×
1039
                        try (var c = getConnection()) {
×
1040
                                c.transaction(() -> doneReadTemps(c, adcInfo));
×
1041
                        }
1042
                }
×
1043

1044
                @Override
1045
                public String toString() {
1046
                        var sb = new StringBuilder("BoardRequest(for ");
×
1047
                        sb.append("bmp=").append(bmp);
×
1048
                        sb.append(",board=").append(boardId);
×
1049
                        sb.append(",op=").append(op);
×
1050
                        return sb.append(")").toString();
×
1051
                }
1052
        }
1053

1054
        private class PowerChange {
1055
                private final Integer changeId;
1056

1057
                private final int jobId;
1058

1059
                private final Integer boardId;
1060

1061
                private final Integer boardNum;
1062

1063
                private final Instant powerOffTime;
1064

1065
                private final boolean power;
1066

1067
                private final JobState from;
1068

1069
                private final JobState to;
1070

1071
                private final List<Direction> offLinks;
1072

1073
                PowerChange(Row row) {
2✔
1074
                        changeId = row.getInteger("change_id");
2✔
1075
                        jobId = row.getInt("job_id");
2✔
1076
                        boardId = row.getInteger("board_id");
2✔
1077
                        boardNum = row.getInteger("board_num");
2✔
1078
                        power = row.getBoolean("power");
2✔
1079
                        from = row.getEnum("from_state", JobState.class);
2✔
1080
                        to = row.getEnum("to_state", JobState.class);
2✔
1081
                        offLinks = List.of(Direction.values()).stream().filter(
2✔
1082
                                        link -> !row.getBoolean(link.columnName)).collect(
2✔
1083
                                                        Collectors.toList());
2✔
1084
                        Instant powerOff = row.getInstant("power_off_timestamp");
2✔
1085
                        if (powerOff == null) {
2✔
1086
                                powerOff = Instant.EPOCH;
2✔
1087
                        }
1088
                        powerOffTime = powerOff;
2✔
1089
                }
2✔
1090

1091
                boolean isSameJob(PowerChange p) {
1092
                        return p.jobId == jobId && p.from == from && p.to == to;
×
1093
                }
1094
        }
1095

1096
        // ----------------------------------------------------------------
1097
        // WORKER IMPLEMENTATION
1098

1099
        /** A worker of a given BMP. */
1100
        private final class Worker implements Runnable {
1101
                /** What are we controlling? */
1102
                private SpiNNakerControl control;
1103

1104
                private final SpallocAPI.Machine machine;
1105

1106
                private final BMPCoords coords;
1107

1108
                private final Map<BMPBoard, String> boards;
1109

1110
                /** Which boards are we looking at? */
1111
                private final int bmpId;
1112

1113
                Worker(SpallocAPI.Machine machine, BMPCoords coords,
1114
                                Map<BMPBoard, String> boards, int bmpId) {
2✔
1115
                        this.machine = machine;
2✔
1116
                        this.coords = coords;
2✔
1117
                        this.boards = boards;
2✔
1118
                        this.bmpId = bmpId;
2✔
1119

1120
                        log.debug("Created worker for boards {}", bmpId);
2✔
1121
                }
2✔
1122

1123
                private SpiNNakerControl getControl() {
1124
                        if (control == null) {
2✔
1125
                                if (useDummyComms) {
2✔
1126
                                        control = new SpiNNakerControlDummy();
2✔
1127
                                } else {
1128
                                        try {
1129
                                                control = controllerFactory.create(machine, coords,
2✔
1130
                                                                boards);
1131
                                        } catch (Exception e) {
×
1132
                                                log.error("Could not create control for BMP '{}'",
×
1133
                                                                bmpId, e);
×
1134
                                        }
2✔
1135
                                }
1136
                        }
1137
                        return control;
2✔
1138
                }
1139

1140
                /**
1141
                 * Periodically call to update, or trigger externally.
1142
                 */
1143
                @Override
1144
                public synchronized void run() {
1145
                        log.trace("Searching for changes on BMP {}", bmpId);
2✔
1146

1147
                        try {
1148
                                var changes = getRequestedOperations();
2✔
1149
                                for (var change : changes) {
2✔
1150
                                        change.processRequest(getControl());
2✔
1151
                                }
2✔
1152
                        } catch (Exception e) {
×
1153
                                log.error("unhandled exception for BMP '{}'", bmpId, e);
×
1154
                        }
2✔
1155
                }
2✔
1156

1157
                private boolean waitedLongEnough(PowerChange change) {
1158
                        // Power off can be done any time
1159
                        if (!change.power) {
2✔
1160
                                return true;
2✔
1161
                        }
1162

1163
                        // Power on should wait until a time after last off
1164
                        Instant powerOnTime = change.powerOffTime.plus(
2✔
1165
                                        props.getOffWaitTime());
2✔
1166
                        return powerOnTime.isBefore(Instant.now());
2✔
1167
                }
1168

1169
                /**
1170
                 * Get the things that we want the worker to do. <em>Be very
1171
                 * careful!</em> Because this necessarily involves the database, this
1172
                 * must not touch the BMP handle as those operations take a long time
1173
                 * and we absolutely must not have a transaction open at the same time.
1174
                 *
1175
                 * @return List of operations to perform.
1176
                 */
1177
                private List<Request> getRequestedOperations() {
1178
                        var requests = new ArrayList<Request>();
2✔
1179
                        try (var c = getConnection();
2✔
1180
                                        var getPowerRequests = c.query(GET_CHANGES);
2✔
1181
                                        var getBlacklistReads = c.query(GET_BLACKLIST_READS);
2✔
1182
                                        var getBlacklistWrites = c.query(GET_BLACKLIST_WRITES);
2✔
1183
                                        var getReadSerialInfos = c.query(GET_SERIAL_INFO_REQS);
2✔
1184
                                        var getReadTemps = c.query(GET_TEMP_INFO_REQS)) {
2✔
1185
                                c.transaction(false, () -> {
2✔
1186
                                        // Batch power requests by job
1187
                                        var powerChanges = new LinkedList<>(
2✔
1188
                                                        getPowerRequests.call(PowerChange::new, bmpId));
2✔
1189
                                        while (!powerChanges.isEmpty()) {
2✔
1190
                                                var change = powerChanges.poll();
2✔
1191
                                                var jobChanges = new ArrayList<>(List.of(change));
2✔
1192
                                                var canDoNow = waitedLongEnough(change);
2✔
1193
                                                while (!powerChanges.isEmpty()
2!
1194
                                                                && change.isSameJob(powerChanges.peek())) {
×
1195
                                                        canDoNow &= waitedLongEnough(powerChanges.peek());
×
1196
                                                        jobChanges.add(powerChanges.poll());
×
1197
                                                }
1198
                                                if (!jobChanges.isEmpty() && canDoNow) {
2!
1199
                                                        log.debug("Running job changes {}", jobChanges);
2✔
1200
                                                        requests.add(new PowerRequest(bmpId, change.jobId,
2✔
1201
                                                                        change.from, change.to, jobChanges));
1202
                                                }
1203
                                        }
2✔
1204

1205
                                        // Leave these until quiet
1206
                                        if (requests.isEmpty()) {
2✔
1207
                                                requests.addAll(getBlacklistReads.call(
2✔
1208
                                                                row -> new BoardRequest(bmpId, READ_BL, row),
2✔
1209
                                                                bmpId));
2✔
1210
                                        }
1211
                                        if (requests.isEmpty()) {
2✔
1212
                                                requests.addAll(getBlacklistWrites.call(
2✔
1213
                                                                row -> new BoardRequest(bmpId, WRITE_BL, row),
2✔
1214
                                                                bmpId));
2✔
1215
                                                requests.addAll(getReadSerialInfos.call(
2✔
1216
                                                                row -> new BoardRequest(bmpId, GET_SERIAL, row),
2✔
1217
                                                                bmpId));
2✔
1218
                                                requests.addAll(getReadTemps.call(
2✔
1219
                                                                row -> new BoardRequest(bmpId, READ_TEMP, row),
×
1220
                                                                bmpId));
2✔
1221
                                        }
1222
                                });
2✔
1223
                        } catch (Exception e) {
×
1224
                                log.error("unhandled exception for BMP '{}'", bmpId, e);
×
1225
                        }
2✔
1226
                        return requests;
2✔
1227
                }
1228

1229
                private void shutdown() {
1230
                        log.debug("Shutting down worker for BMP {}", bmpId);
×
1231
                        if (control != null) {
×
1232
                                control.close();
×
1233
                        }
1234
                }
×
1235
        }
1236

1237
        /**
1238
         * The testing interface.
1239
         *
1240
         * @hidden
1241
         */
1242
        @ForTestingOnly
1243
        public interface TestAPI {
1244
                /**
1245
                 * Ensure things are set up after a database change that updates the
1246
                 * BMPs in the system.
1247
                 *
1248
                 * @param useDummyComms Whether to use dummy communications in the test
1249
                 */
1250
                void prepare(boolean useDummyComms);
1251

1252
                /**
1253
                 * Reset the transceivers stored in the workers after installing a new
1254
                 * transceiver.
1255
                 */
1256
                void resetTransceivers();
1257

1258
                /**
1259
                 * The core of the scheduler.
1260
                 *
1261
                 * @param millis
1262
                 *            How many milliseconds to sleep before doing a rerun of the
1263
                 *            scheduler. If zero (or less), only one run will be done.
1264
                 * @param bmps
1265
                 *            The BMPs to be updated.
1266
                 * @throws IOException
1267
                 *             If talking to the network fails
1268
                 * @throws SpinnmanException
1269
                 *             If a BMP sends an error back
1270
                 * @throws InterruptedException
1271
                 *             If the wait for workers to spawn fails.
1272
                 */
1273
                void processRequests(long millis, Collection<Integer> bmps)
1274
                                throws IOException, SpinnmanException, InterruptedException;
1275

1276
                /**
1277
                 * The core of the scheduler. Will process for all known BMPs.
1278
                 *
1279
                 * @param millis
1280
                 *            How many milliseconds to sleep before doing a rerun of the
1281
                 *            scheduler. If zero (or less), only one run will be done.
1282
                 * @throws IOException
1283
                 *             If talking to the network fails
1284
                 * @throws SpinnmanException
1285
                 *             If a BMP sends an error back
1286
                 * @throws InterruptedException
1287
                 *             If the wait for workers to spawn fails.
1288
                 */
1289
                void processRequests(long millis)
1290
                                throws IOException, SpinnmanException, InterruptedException;
1291

1292
                /**
1293
                 * Get the last BMP exception.
1294
                 *
1295
                 * @return The exception.
1296
                 */
1297
                Throwable getBmpException();
1298

1299
                /**
1300
                 * Clear the last BMP exception.
1301
                 */
1302
                void clearBmpException();
1303

1304
                /**
1305
                 * Resume after emergency stop.
1306
                 */
1307
                void emergencyResume();
1308
        }
1309

1310
        /**
1311
         * @return The test interface.
1312
         * @deprecated This interface is just for testing.
1313
         * @hidden
1314
         */
1315
        @ForTestingOnly
1316
        @RestrictedApi(explanation = "just for testing", link = "index.html",
1317
                        allowedOnPath = ".*/src/test/java/.*")
1318
        @Deprecated
1319
        public final TestAPI getTestAPI() {
1320
                ForTestingOnly.Utils.checkForTestClassOnStack();
2✔
1321
                return new TestAPI() {
2✔
1322
                        @Override
1323
                        public void prepare(boolean useDummyCommsParam) {
1324
                                useDummyComms = useDummyCommsParam;
2✔
1325
                        }
2✔
1326

1327
                        @Override
1328
                        public void resetTransceivers() {
1329
                                for (var worker : workers.values()) {
2✔
1330
                                        worker.control = null;
2✔
1331
                                }
2✔
1332
                                // Recreate in case of new data
1333
                                makeWorkers();
2✔
1334
                        }
2✔
1335

1336
                        @Override
1337
                        public void processRequests(long millis, Collection<Integer> bmps)
1338
                                        throws IOException, SpinnmanException,
1339
                                        InterruptedException {
1340
                                /*
1341
                                 * Runs twice because it takes two cycles to fully process a
1342
                                 * request.
1343
                                 */
1344
                                triggerSearch(bmps);
2✔
1345
                                if (millis > 0) {
2!
1346
                                        Thread.sleep(millis);
2✔
1347
                                        triggerSearch(bmps);
2✔
1348
                                }
1349
                        }
2✔
1350

1351
                        @Override
1352
                        public void processRequests(long millis) throws IOException,
1353
                                        SpinnmanException, InterruptedException {
1354
                                processRequests(millis, workers.keySet());
2✔
1355
                        }
2✔
1356

1357
                        @Override
1358
                        public Throwable getBmpException() {
1359
                                synchronized (BMPController.this) {
2✔
1360
                                        return bmpProcessingException;
2✔
1361
                                }
1362
                        }
1363

1364
                        @Override
1365
                        public void clearBmpException() {
1366
                                synchronized (BMPController.this) {
2✔
1367
                                        bmpProcessingException = null;
2✔
1368
                                }
2✔
1369
                        }
2✔
1370

1371
                        @Override
1372
                        public void emergencyResume() {
1373
                                synchronized (guard) {
2✔
1374
                                        emergencyStop = false;
2✔
1375
                                        workers.clear();
2✔
1376
                                }
2✔
1377
                                init();
2✔
1378
                        }
2✔
1379
                };
1380
        }
1381
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc