• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SpiNNakerManchester / JavaSpiNNaker / 16072098972

04 Jul 2025 10:51AM UTC coverage: 37.516% (+0.04%) from 37.475%
16072098972

push

github

rowleya
Update maven version

9059 of 24147 relevant lines covered (37.52%)

1.12 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.11
/SpiNNaker-allocserv/src/main/java/uk/ac/manchester/spinnaker/alloc/bmp/BMPController.java
1
/*
2
 * Copyright (c) 2021 The University of Manchester
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *     https://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
package uk.ac.manchester.spinnaker.alloc.bmp;
17

18
import static java.lang.String.format;
19
import static java.lang.Thread.currentThread;
20
import static java.lang.Thread.sleep;
21
import static java.util.Objects.requireNonNull;
22
import static org.slf4j.LoggerFactory.getLogger;
23
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.GET_SERIAL;
24
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.READ_BL;
25
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.READ_TEMP;
26
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.WRITE_BL;
27
import static uk.ac.manchester.spinnaker.alloc.model.JobState.DESTROYED;
28
import static uk.ac.manchester.spinnaker.alloc.model.JobState.QUEUED;
29

30
import java.io.IOException;
31
import java.lang.Thread.UncaughtExceptionHandler;
32
import java.time.Instant;
33
import java.util.ArrayList;
34
import java.util.Collection;
35
import java.util.HashMap;
36
import java.util.LinkedList;
37
import java.util.List;
38
import java.util.Map;
39
import java.util.Optional;
40
import java.util.function.Consumer;
41
import java.util.stream.Collectors;
42

43
import javax.annotation.PostConstruct;
44

45
import org.slf4j.Logger;
46
import org.springframework.beans.factory.ObjectProvider;
47
import org.springframework.beans.factory.annotation.Autowired;
48
import org.springframework.jmx.export.annotation.ManagedResource;
49
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
50
import org.springframework.stereotype.Service;
51

52
import com.google.errorprone.annotations.RestrictedApi;
53
import com.google.errorprone.annotations.concurrent.GuardedBy;
54

55
import uk.ac.manchester.spinnaker.alloc.ForTestingOnly;
56
import uk.ac.manchester.spinnaker.alloc.ServiceMasterControl;
57
import uk.ac.manchester.spinnaker.alloc.SpallocProperties.AllocatorProperties;
58
import uk.ac.manchester.spinnaker.alloc.SpallocProperties.TxrxProperties;
59
import uk.ac.manchester.spinnaker.alloc.admin.ReportMailSender;
60
import uk.ac.manchester.spinnaker.alloc.allocator.AllocatorTask;
61
import uk.ac.manchester.spinnaker.alloc.allocator.Epochs;
62
import uk.ac.manchester.spinnaker.alloc.allocator.SpallocAPI;
63
import uk.ac.manchester.spinnaker.alloc.db.DatabaseAPI.Connection;
64
import uk.ac.manchester.spinnaker.alloc.db.DatabaseAwareBean;
65
import uk.ac.manchester.spinnaker.alloc.db.Row;
66
import uk.ac.manchester.spinnaker.alloc.model.Direction;
67
import uk.ac.manchester.spinnaker.alloc.model.JobState;
68
import uk.ac.manchester.spinnaker.machine.board.BMPBoard;
69
import uk.ac.manchester.spinnaker.machine.board.BMPCoords;
70
import uk.ac.manchester.spinnaker.machine.board.HasBMPLocation;
71
import uk.ac.manchester.spinnaker.messages.model.ADCInfo;
72
import uk.ac.manchester.spinnaker.messages.model.Blacklist;
73
import uk.ac.manchester.spinnaker.transceiver.ProcessException;
74
import uk.ac.manchester.spinnaker.transceiver.ProcessException.CallerProcessException;
75
import uk.ac.manchester.spinnaker.transceiver.ProcessException.PermanentProcessException;
76
import uk.ac.manchester.spinnaker.transceiver.ProcessException.TransientProcessException;
77
import uk.ac.manchester.spinnaker.transceiver.SpinnmanException;
78
import uk.ac.manchester.spinnaker.utils.UsedInJavadocOnly;
79

80
/**
81
 * Manages the BMPs of machines controlled by Spalloc.
82
 *
83
 * @author Donal Fellows
84
 */
85
@Service("bmpController")
86
@ManagedResource("Spalloc:type=BMPController,name=bmpController")
87
public class BMPController extends DatabaseAwareBean {
3✔
88
        private static final Logger log = getLogger(BMPController.class);
3✔
89

90
        @Autowired
91
        private SpallocAPI spallocCore;
92

93
        @Autowired
94
        private ServiceMasterControl serviceControl;
95

96
        @Autowired
97
        private Epochs epochs;
98

99
        @Autowired
100
        private TxrxProperties props;
101

102
        @Autowired
103
        private PhysicalSerialMapping phySerMap;
104

105
        @Autowired
106
        private AllocatorProperties allocProps;
107

108
        @Autowired
109
        private ReportMailSender emailSender;
110

111
        @Autowired
112
        private AllocatorTask allocator;
113

114
        private Object guard = new Object();
3✔
115

116
        @GuardedBy("guard")
117
        private ThreadPoolTaskScheduler scheduler;
118

119
        @GuardedBy("guard")
3✔
120
        private boolean emergencyStop = false;
121

122
        /**
123
         * Synchronizer for power request access to the database (as otherwise
124
         * deadlocks can occur when multiple transactions try to update the boards
125
         * table).
126
         */
127
        private Object powerDBSync = new Object();
3✔
128

129
        /**
130
         * Map from BMP ID to worker task that handles it.
131
         */
132
        private final Map<Integer, Worker> workers = new HashMap<>();
3✔
133

134
        /**
135
         * Factory for {@linkplain SpiNNakerControl controllers}. Only use via
136
         * {@link #controllerFactory}.
137
         */
138
        @Autowired
139
        private ObjectProvider<SpiNNakerControl> controllerFactoryBean;
140

141
        /**
142
         * Type-safe factory for {@linkplain SpiNNakerControl controllers}.
143
         */
144
        private SpiNNakerControl.Factory controllerFactory;
145

146
        @GuardedBy("this")
147
        private Throwable bmpProcessingException;
148

149
        private boolean useDummyComms = false;
3✔
150

151
        /**
152
         * An {@link UncaughtExceptionHandler}.
153
         *
154
         * @param thread
155
         *            The thread with the problem.
156
         * @param exception
157
         *            The exception that describes the problem.
158
         */
159
        @UsedInJavadocOnly(UncaughtExceptionHandler.class)
160
        private void handleException(Thread thread, Throwable exception) {
161
                log.error("uncaught exception in BMP worker {}", thread, exception);
×
162
        }
×
163

164
        // ----------------------------------------------------------------
165

166
        @PostConstruct
167
        private void init() {
168
                useDummyComms = serviceControl.isUseDummyBMP();
3✔
169
                synchronized (guard) {
3✔
170
                        // Set up scheduler
171
                        scheduler = new ThreadPoolTaskScheduler();
3✔
172
                        scheduler.setThreadGroupName("BMP");
3✔
173

174
                        controllerFactory = controllerFactoryBean::getObject;
3✔
175
                        allocator.setBMPController(this);
3✔
176

177
                        // Set the pool size to match the number of workers
178
                        makeWorkers();
3✔
179
                        if (workers.size() > 1) {
3✔
180
                                scheduler.setPoolSize(workers.size());
×
181
                        }
182

183
                        // Launch the scheduler now it is all set up
184
                        scheduler.initialize();
3✔
185

186
                        // And now use the scheduler
187
                        for (var worker : workers.values()) {
3✔
188
                                scheduler.scheduleAtFixedRate(worker,
3✔
189
                                                allocProps.getPeriod());
3✔
190
                        }
3✔
191
                }
3✔
192
        }
3✔
193

194
        private List<Worker> makeWorkers() {
195
                // Make workers
196
                try (var c = getConnection();
3✔
197
                                var getBmps = c.query(GET_ALL_BMPS);
3✔
198
                                var getBoards = c.query(GET_ALL_BMP_BOARDS)) {
3✔
199
                        return c.transaction(false, () -> getBmps.call(row -> {
3✔
200
                                var m = spallocCore.getMachine(row.getString("machine_name"),
3✔
201
                                                true);
202
                                var coords = new BMPCoords(row.getInt("cabinet"),
3✔
203
                                                row.getInt("frame"));
3✔
204
                                var boards = new HashMap<BMPBoard, String>();
3✔
205
                                var bmpId = row.getInt("bmp_id");
3✔
206
                                getBoards.call(r -> {
3✔
207
                                        boards.put(new BMPBoard(r.getInt("board_num")),
3✔
208
                                                        r.getString("address"));
3✔
209
                                        return null;
3✔
210
                                }, bmpId);
3✔
211
                                var worker = new Worker(m.get(), coords, boards, bmpId);
3✔
212
                                workers.put(row.getInt("bmp_id"), worker);
3✔
213
                                return worker;
3✔
214
                        }));
215
                }
216
        }
217

218
        /**
219
         * Trigger the execution of the workers for the given BMPs now.
220
         *
221
         * @param bmps
222
         *            A list of BMPs that have changed.
223
         */
224
        public void triggerSearch(Collection<Integer> bmps) {
225
                synchronized (guard) {
3✔
226
                        if (emergencyStop) {
3✔
227
                                log.warn("Emergency stop; not triggering workers");
×
228
                                return;
×
229
                        }
230
                        for (var b : bmps) {
3✔
231
                                var worker = workers.get(b);
3✔
232
                                if (worker != null) {
3✔
233
                                        scheduler.schedule(() -> worker.run(), Instant.now());
3✔
234
                                } else {
235
                                        log.error("Could not find worker for BMP {}", b);
×
236
                                }
237
                        }
3✔
238
                }
3✔
239
        }
3✔
240

241
        /**
242
         * Stops execution immediately.
243
         */
244
        public void emergencyStop() {
245
                synchronized (guard) {
3✔
246
                        emergencyStop = true;
3✔
247
                        scheduler.shutdown();
3✔
248
                        for (var worker : workers.values()) {
3✔
249
                                try {
250
                                        worker.getControl().powerOff(worker.boards.keySet());
3✔
251
                                } catch (Throwable e) {
×
252
                                        log.warn("Error when stopping", e);
×
253
                                }
3✔
254
                        }
3✔
255
                        execute(conn -> {
3✔
256
                                try (var setAllOff = conn.update(SET_ALL_BOARDS_OFF)) {
3✔
257
                                        setAllOff.call();
3✔
258
                                }
259
                                return null;
3✔
260
                        });
261
                }
3✔
262
        }
3✔
263

264
        /** An action that may throw any of a range of exceptions. */
265
        private interface ThrowingAction {
266
                void act() throws ProcessException, IOException, InterruptedException;
267
        }
268

269
        private abstract class Request {
270
                final int bmpId;
271

272
                private int numTries = 0;
3✔
273

274
                Request(int bmpId) {
3✔
275
                        this.bmpId = bmpId;
3✔
276
                }
3✔
277

278
                /**
279
                 * @return Whether this request may be repeated.
280
                 */
281
                boolean isRepeat() {
282
                        return numTries < props.getPowerAttempts();
3✔
283
                }
284

285
                /**
286
                 * Basic machinery for handling exceptions that arise while performing a
287
                 * BMP action. Runs on a thread that may touch a BMP directly, but which
288
                 * may not touch the database.
289
                 * <p>
290
                 * Only subclasses should use this!
291
                 *
292
                 * @param body
293
                 *            What to attempt.
294
                 * @param onFailure
295
                 *            What to do on failure.
296
                 * @param onServiceRemove
297
                 *            If the exception looks serious, call this to trigger a
298
                 *            board being taken out of service.
299
                 * @return Whether to stop the retry loop.
300
                 * @throws InterruptedException
301
                 *             If interrupted.
302
                 */
303
                final boolean bmpAction(ThrowingAction body,
304
                                Consumer<Exception> onFailure,
305
                                Consumer<PermanentProcessException> onServiceRemove)
306
                                throws InterruptedException {
307
                        boolean isLastTry = numTries++ >= props.getPowerAttempts();
3✔
308
                        Exception exn;
309
                        try {
310
                                body.act();
3✔
311
                                // Exit the retry loop (up the stack); the requests all worked
312
                                return true;
3✔
313
                        } catch (InterruptedException e) {
×
314
                                /*
315
                                 * We were interrupted! This happens when we're shutting down.
316
                                 * Log (because we're in an inconsistent state) and rethrow so
317
                                 * that the outside gets to clean up.
318
                                 */
319
                                log.error("Requests failed on BMP {} because of "
×
320
                                                + "interruption", bmpId, e);
×
321
                                currentThread().interrupt();
×
322
                                throw e;
×
323
                        } catch (TransientProcessException e) {
×
324
                                if (!isLastTry) {
×
325
                                        // Log somewhat gently; we *might* be able to recover...
326
                                        log.warn("Retrying requests on BMP {} after {}: {}",
×
327
                                                        bmpId, props.getProbeInterval(),
×
328
                                                        e.getMessage());
×
329
                                        // Ask for a retry
330
                                        return false;
×
331
                                }
332
                                exn = e;
×
333
                                log.error("Requests failed on BMP {}", bmpId, e);
×
334
                        } catch (PermanentProcessException e) {
×
335
                                log.error("BMP {} on {} is unreachable", e.source, bmpId, e);
×
336
                                onServiceRemove.accept(e);
×
337
                                exn = e;
×
338
                        } catch (CallerProcessException e) {
×
339
                                // This is probably a software bug
340
                                log.error("SW bug talking to BMP {}", bmpId, e);
×
341
                                exn = e;
×
342
                        } catch (ProcessException | IOException | RuntimeException e) {
3✔
343
                                log.error("Requests failed on BMP {}", bmpId, e);
3✔
344
                                exn = e;
3✔
345
                        }
×
346
                        /*
347
                         * Common permanent failure handling case; arrange for taking a
348
                         * board out of service, mark a request as failed, and stop the
349
                         * retry loop.
350
                         */
351
                        onFailure.accept(exn);
3✔
352
                        return true;
3✔
353
                }
354

355
                /**
356
                 * Add a report to the database of a problem with a board.
357
                 *
358
                 * @param sql
359
                 *            How to talk to the DB
360
                 * @param boardId
361
                 *            Which board has the problem
362
                 * @param jobId
363
                 *            What job was associated with the problem (if any)
364
                 * @param msg
365
                 *            Information about what the problem was
366
                 */
367
                final void addBoardReport(Connection c, int boardId, Integer jobId,
368
                                String msg) {
369
                        try (var getUser = c.query(GET_USER_DETAILS_BY_NAME);
×
370
                                        var insertBoardReport = c.update(INSERT_BOARD_REPORT)) {
×
371
                                getUser.call1(row -> row.getInt("user_id"),
×
372
                                                allocProps.getSystemReportUser()).ifPresent(
×
373
                                                                userId -> insertBoardReport.call(
×
374
                                                                                boardId, jobId,        msg, userId));
×
375
                        }
376
                }
×
377

378
                /**
379
                 * Marks a board as actually dead, and requests we send email about it.
380
                 *
381
                 * @param sql
382
                 *            How to talk to the DB
383
                 * @param boardId
384
                 *            Which board has the problem
385
                 * @param msg
386
                 *            Information about what the problem was
387
                 * @return Whether we've successfully done a change.
388
                 */
389
                final void markBoardAsDead(Connection c, int boardId, String msg) {
390
                        try (var setFunctioning = c.update(SET_FUNCTIONING_FIELD);
×
391
                                        var findBoardById = c.query(FIND_BOARD_BY_ID)) {
×
392
                                boolean result = setFunctioning.call(false, boardId) > 0;
×
393
                                if (result) {
×
394
                                        findBoardById.call1(row -> {
×
395
                                                var ser = row.getString("physical_serial_id");
×
396
                                                if (ser == null) {
×
397
                                                        ser = "<UNKNOWN>";
×
398
                                                }
399
                                                var fullMessage = format(
×
400
                                                                "Marked board at %d,%d,%d of %s (serial: %s) "
401
                                                                                + "as dead: %s",
402
                                                                row.getInt("x"), row.getInt("y"),
×
403
                                                                row.getInt("z"), row.getString("machine_name"),
×
404
                                                                ser, msg);
405
                                                emailSender.sendServiceMail(fullMessage);
×
406
                                                return null;
×
407
                                        }, boardId);
×
408
                                }
409
                        }
410
                }
×
411

412
                boolean processRequest(SpiNNakerControl control) {
413
                        while (isRepeat()) {
3✔
414
                                try {
415
                                        if (tryProcessRequest(control)) {
3✔
416
                                                return true;
3✔
417
                                        }
418
                                        sleep(props.getProbeInterval().toMillis());
×
419
                                } catch (InterruptedException e) {
×
420
                                        // If this happens, just cancel the transaction;
421
                                        // when we come back, all things will be redone.
422
                                        throw new RuntimeException(e);
×
423
                                }
×
424
                        }
425
                        return false;
×
426
                }
427

428
                abstract boolean tryProcessRequest(SpiNNakerControl control)
429
                                throws InterruptedException;
430
        }
431

432
        /**
433
         * Describes a request to modify the power status of a collection of boards.
434
         * The boards must be on a single machine and must all be assigned to a
435
         * single job.
436
         * <p>
437
         * This is the message that is sent from the main thread to the per-BMP
438
         * worker threads.
439
         *
440
         * @author Donal Fellows
441
         */
442
        private final class PowerRequest extends Request {
443
                private final List<BMPBoard> powerOnBoards = new ArrayList<>();
3✔
444

445
                private final List<BMPBoard> powerOffBoards = new ArrayList<>();
3✔
446

447
                private final List<Link> linkRequests = new ArrayList<>();
3✔
448

449
                private final int jobId;
450

451
                private final JobState from;
452

453
                private final JobState to;
454

455
                private final List<Integer> changeIds = new ArrayList<>();
3✔
456

457
                private final Map<Integer, Integer> boardToId = new HashMap<>();
3✔
458

459
                /**
460
                 * Create a request.
461
                 *
462
                 * @param sql
463
                 *            How to access the database.
464
                 * @param machine
465
                 *            What machine are the boards on? <em>Must not</em> be
466
                 *            {@code null}.
467
                 * @param powerOn
468
                 *            What boards (by DB ID) are to be powered on? May be
469
                 *            {@code null}; that's equivalent to the empty list.
470
                 * @param powerOff
471
                 *            What boards (by DB ID) are to be powered off? May be
472
                 *            {@code null}; that's equivalent to the empty list.
473
                 * @param links
474
                 *            Any link power control requests. By default, links are on
475
                 *            if their board is on and they are connected; it is
476
                 *            <em>useful and relevant</em> to modify the power state of
477
                 *            links on the periphery of an allocation. May be
478
                 *            {@code null}; that's equivalent to the empty list.
479
                 * @param jobId
480
                 *            For what job is this?
481
                 * @param from
482
                 *            What state is the job moving from?
483
                 * @param to
484
                 *            What state is the job moving to?
485
                 * @param changeIds
486
                 *            The DB ids that describe the change, so we can update
487
                 *            those records.
488
                 * @param idToBoard
489
                 *            How to get the physical ID of a board from its database ID
490
                 */
491
                PowerRequest(int bmpId, int jobId, JobState from, JobState to,
492
                                List<PowerChange> powerChanges) {
3✔
493
                        super(bmpId);
3✔
494
                        for (var change : powerChanges) {
3✔
495
                                if (change.power) {
3✔
496
                                        powerOnBoards.add(new BMPBoard(change.boardNum));
3✔
497
                                } else {
498
                                        powerOffBoards.add(new BMPBoard(change.boardNum));
3✔
499
                                }
500
                                change.offLinks.stream().forEach(link ->
3✔
501
                                                linkRequests.add(new Link(change.boardNum, link)));
3✔
502
                                changeIds.add(change.changeId);
3✔
503
                                boardToId.put(change.boardNum, change.boardId);
3✔
504
                        }
3✔
505
                        this.jobId = jobId;
3✔
506
                        this.from = from;
3✔
507
                        this.to = to;
3✔
508
                }
3✔
509

510
                /**
511
                 * Change the power state of boards in this request.
512
                 *
513
                 * @param controllers
514
                 *            How to actually communicate with the machine
515
                 * @throws ProcessException
516
                 *             If the transceiver chokes
517
                 * @throws InterruptedException
518
                 *             If interrupted
519
                 * @throws IOException
520
                 *             If network I/O fails
521
                 */
522
                void changeBoardPowerState(SpiNNakerControl controller)
523
                                throws ProcessException, InterruptedException, IOException {
524

525
                        // Send any power on commands
526
                        if (!powerOnBoards.isEmpty()) {
3✔
527
                                controller.powerOnAndCheck(powerOnBoards);
3✔
528
                        }
529

530
                        // Process perimeter link requests next
531
                        for (var linkReq : linkRequests) {
3✔
532
                                // Set the link state, as required
533
                                controller.setLinkOff(linkReq);
3✔
534
                        }
3✔
535

536
                        // Finally send any power off commands
537
                        if (!powerOffBoards.isEmpty()) {
3✔
538
                                controller.powerOff(powerOffBoards);
3✔
539
                        }
540
                }
3✔
541

542
                /**
543
                 * Handles the database changes after a set of changes to a BMP complete
544
                 * successfully. We will move the job to the state it supposed to be in.
545
                 *
546
                 * @param sql
547
                 *            How to access the DB
548
                 * @return Whether the state of boards or jobs has changed.
549
                 */
550
                private void done() {
551
                        try (var c = getConnection();
3✔
552
                                        var deallocateBoards = c.update(DEALLOCATE_BMP_BOARDS_JOB);
3✔
553
                                        var deleteChange = c.update(FINISHED_PENDING);
3✔
554
                                        var setBoardPowerOn = c.update(SET_BOARD_POWER_ON);
3✔
555
                                        var setBoardPowerOff = c.update(SET_BOARD_POWER_OFF)) {
3✔
556
                                c.transaction(() -> {
3✔
557
                                        int turnedOn = powerOnBoards.stream().map(this::getBoardId)
3✔
558
                                                        .mapToInt(setBoardPowerOn::call).sum();
3✔
559
                                        int turnedOff =
3✔
560
                                                        powerOffBoards.stream().map(this::getBoardId)
3✔
561
                                                                        .mapToInt(setBoardPowerOff::call).sum();
3✔
562

563
                                        if (to == DESTROYED || to == QUEUED) {
3✔
564
                                                /*
565
                                                 * Need to mark the boards as not allocated; can't do
566
                                                 * that until they've been switched off.
567
                                                 */
568
                                                deallocateBoards.call(jobId, bmpId);
3✔
569
                                        }
570
                                        int completed = changeIds.stream().mapToInt(
3✔
571
                                                        deleteChange::call).sum();
3✔
572

573
                                        log.debug("BMP ACTION SUCCEEDED ({}:{}->{}): on:{} off:{} "
3✔
574
                                                        + "completed: {}",
575
                                                        jobId, from, to, turnedOn, turnedOff, completed);
3✔
576
                                });
3✔
577
                        }
578

579
                        // Tell the allocator something has happened
580
                        allocator.updateJob(jobId, from, to);
3✔
581
                }
3✔
582

583
                /**
584
                 * Handles the database changes after a set of changes to a BMP complete
585
                 * with a failure. We will roll back the job state to what it was
586
                 * before.
587
                 *
588
                 * @param sql
589
                 *            How to access the DB
590
                 * @return Whether the state of boards or jobs has changed.
591
                 */
592
                private void failed() {
593
                        try (var c = getConnection();
3✔
594
                                        var deallocateBoards = c.update(DEALLOCATE_BMP_BOARDS_JOB);
3✔
595
                                        var deleteChange = c.update(FINISHED_PENDING);
3✔
596
                                        var errorChange = c.update(ERROR_PENDING);
3✔
597
                                        var setBoardPowerOff = c.update(SET_BOARD_POWER_OFF)) {
3✔
598
                                c.transaction(() -> {
3✔
599
                                        // We should mark the boards as off
600
                                        int turnedOff =
3✔
601
                                                        powerOffBoards.stream().map(this::getBoardId)
3✔
602
                                                                        .mapToInt(setBoardPowerOff::call).sum();
3✔
603

604
                                        // ... even those that we should be powering on ...
605
                                        turnedOff +=
3✔
606
                                                        powerOnBoards.stream().map(this::getBoardId)
3✔
607
                                                                        .mapToInt(setBoardPowerOff::call).sum();
3✔
608

609
                                        // If we are going to queued or destroyed, we can just
610
                                        // ignore the error as we will reallocate anyway
611
                                        int completed = 0;
3✔
612
                                        if (to == DESTROYED || to == QUEUED) {
3✔
613
                                                // Need to mark the boards as not allocated; slightly
614
                                                // dodgy since they might still be on, but not a lot
615
                                                // we can do about it!
616
                                                deallocateBoards.call(jobId, bmpId);
3✔
617
                                                completed = changeIds.stream().mapToInt(
3✔
618
                                                                deleteChange::call).sum();
3✔
619
                                        } else {
620

621
                                                // If we are going to READY, we must mark changes as
622
                                                // failed to make sure we don't think we are done!
623
                                                completed = changeIds.stream().mapToInt(
3✔
624
                                                                errorChange::call).sum();
3✔
625
                                        }
626

627
                                        log.debug(
3✔
628
                                                        "BMP ACTION FAILED on {} ({}:{}->{}) off:{} "
629
                                                        + " completed {}",
630
                                                        bmpId, jobId, from, to, turnedOff, completed);
3✔
631
                                });
3✔
632
                        }
633
                        // Tell the allocator something has happened
634
                        allocator.updateJob(jobId, from, to);
3✔
635
                }
3✔
636

637
                /**
638
                 * Process an action to power on or off a set of boards. Runs on a
639
                 * thread that may touch a BMP directly, but which may not touch the
640
                 * database.
641
                 *
642
                 * @param controller
643
                 *            How to actually reach the BMPs.
644
                 * @return Whether this action has "succeeded" and shouldn't be retried.
645
                 * @throws InterruptedException
646
                 *             If interrupted.
647
                 */
648
                @Override
649
                boolean tryProcessRequest(SpiNNakerControl controller)
650
                                throws InterruptedException {
651
                        boolean ok = bmpAction(() -> {
3✔
652
                                changeBoardPowerState(controller);
3✔
653
                                // We want to ensure the lead board is alive
654
                                controller.ping(powerOnBoards);
3✔
655
                                synchronized (powerDBSync) {
3✔
656
                                        done();
3✔
657
                                }
3✔
658
                        }, e -> {
3✔
659
                                synchronized (powerDBSync) {
3✔
660
                                        failed();
3✔
661
                                }
3✔
662
                                synchronized (BMPController.this) {
3✔
663
                                        bmpProcessingException = e;
3✔
664
                                }
3✔
665
                        }, ppe -> {
3✔
666
                                synchronized (powerDBSync) {
×
667
                                        badBoard(ppe);
×
668
                                }
×
669
                        });
×
670
                        return ok;
3✔
671
                }
672

673
                @Override
674
                public String toString() {
675
                        var sb = new StringBuilder("PowerRequest(for=")
×
676
                                        .append(bmpId);
×
677
                        sb.append(";on=").append(powerOnBoards);
×
678
                        sb.append(",off=").append(powerOffBoards);
×
679
                        sb.append(",links=").append(linkRequests);
×
680
                        return sb.append(")").toString();
×
681
                }
682

683
                private static final String REPORT_MSG =
684
                                "board was not reachable when trying to power it: ";
685

686
                /**
687
                 * When a BMP is unroutable, we must tell the alloc engine to pick
688
                 * somewhere else, and we should mark the board as out of service too;
689
                 * it's never going to work so taking it out right away is the only sane
690
                 * plan. We also need to nuke the planned changes. Retrying is bad.
691
                 *
692
                 * @param failure
693
                 *            The failure message.
694
                 * @return Whether the state of boards or jobs has changed.
695
                 */
696
                private void badBoard(ProcessException failure) {
697
                        try (var c = getConnection()) {
×
698
                                c.transaction(() -> {
×
699
                                        getBoardId(failure.source).ifPresent(boardId -> {
×
700
                                                // Mark the board as dead right now
701
                                                markBoardAsDead(c, boardId, REPORT_MSG + failure);
×
702
                                                // Add a report if we can
703
                                                addBoardReport(c, boardId, jobId, REPORT_MSG + failure);
×
704
                                        });
×
705
                                });
×
706
                        }
707
                }
×
708

709
                /**
710
                 * Given a board address, get the ID that it corresponds to. Reverses
711
                 * {@link #idToBoard}.
712
                 *
713
                 * @param addr
714
                 *            The board address.
715
                 * @return The ID, if one can be found.
716
                 */
717
                private Optional<Integer> getBoardId(HasBMPLocation addr) {
718
                        return Optional.ofNullable(boardToId.get(addr.getBoard()));
×
719
                }
720

721
                private Integer getBoardId(BMPBoard board) {
722
                        return boardToId.get(board.board);
3✔
723
                }
724
        }
725

726
        /**
727
         * A request to read or write information on a BMP. Includes blacklists,
728
         * serial numbers, temperature data, etc.
729
         *
730
         * @author Donal Fellows
731
         */
732
        private final class BoardRequest extends Request {
733
                private final NonBootOperation op;
734

735
                private final int opId;
736

737
                private final int boardId;
738

739
                private final BMPCoords bmp;
740

741
                private final BMPBoard board;
742

743
                private final String bmpSerialId;
744

745
                private final Blacklist blacklist;
746

747
                private final int machineId;
748

749
                private BoardRequest(int bmpId, NonBootOperation op, Row row) {
3✔
750
                        super(bmpId);
3✔
751
                        this.op = op;
3✔
752
                        opId = row.getInt("op_id");
3✔
753
                        boardId = row.getInt("board_id");
3✔
754
                        bmp = new BMPCoords(row.getInt("cabinet"), row.getInt("frame"));
3✔
755
                        board = new BMPBoard(row.getInt("board_num"));
3✔
756
                        if (op == WRITE_BL) {
3✔
757
                                blacklist = row.getSerial("data", Blacklist.class);
3✔
758
                        } else {
759
                                blacklist = null;
3✔
760
                        }
761
                        bmpSerialId = row.getString("bmp_serial_id");
3✔
762
                        machineId = row.getInt("machine_id");
3✔
763
                }
3✔
764

765
                /** The serial number actually read from the board. */
766
                private String readSerial;
767

768
                /**
769
                 * Access the DB to store the serial number information that we
770
                 * retrieved. A transaction should already be held.
771
                 *
772
                 * @param c
773
                 *            How to access the DB
774
                 * @return Whether we've changed anything
775
                 */
776
                private void recordSerialIds(Connection c) {
777
                        try (var setBoardSerialIds = c.update(SET_BOARD_SERIAL_IDS)) {
3✔
778
                                setBoardSerialIds.call(boardId, readSerial,
3✔
779
                                                phySerMap.getPhysicalId(readSerial));
3✔
780
                        }
781
                }
3✔
782

783
                /**
784
                 * Access the DB to mark the read request as successful and store the
785
                 * blacklist that was read. A transaction should already be held.
786
                 *
787
                 * @param c
788
                 *            How to access the DB
789
                 * @param readBlacklist
790
                 *            The blacklist that was read
791
                 * @return Whether we've changed anything
792
                 */
793
                private void doneReadBlacklist(Connection c, Blacklist readBlacklist) {
794
                        try (var completed = c.update(COMPLETED_BOARD_INFO_READ)) {
3✔
795
                                log.debug("Completing blacklist read opId {}", opId);
3✔
796
                                completed.call(readBlacklist, opId);
3✔
797
                        }
798
                }
3✔
799

800
                /**
801
                 * Access the DB to mark the write request as successful. A transaction
802
                 * should already be held.
803
                 *
804
                 * @param c
805
                 *            How to access the DB
806
                 * @return Whether we've changed anything
807
                 */
808
                private void doneWriteBlacklist(Connection c) {
809
                        try (var completed = c.update(COMPLETED_BLACKLIST_WRITE)) {
3✔
810
                                completed.call(opId);
3✔
811
                        }
812
                }
3✔
813

814
                /**
815
                 * Access the DB to mark the read request as successful; the actual
816
                 * store of the serial data is elsewhere
817
                 * ({@link #recordSerialIds(Connection)}). A transaction should already
818
                 * be held.
819
                 *
820
                 * @param c
821
                 *            How to access the DB
822
                 * @return Whether we've changed anything
823
                 */
824
                private void doneReadSerial(Connection c) {
825
                        try (var completed = c.update(COMPLETED_GET_SERIAL_REQ)) {
3✔
826
                                completed.call(opId);
3✔
827
                        }
828
                }
3✔
829

830
                /**
831
                 * Access the DB to mark the read request as successful and store the
832
                 * ADC info that was read. A transaction should be held.
833
                 *
834
                 * @param c
835
                 *            The database connection.
836
                 */
837
                private void doneReadTemps(Connection c, ADCInfo adcInfo) {
838
                        try (var completed = c.update(COMPLETED_BOARD_INFO_READ)) {
×
839
                                log.debug("Completing temperature read opId {}", opId);
×
840
                                completed.call(adcInfo, opId);
×
841
                        }
842
                }
×
843

844
                /**
845
                 * Access the DB to mark the request as failed and store the exception.
846
                 *
847
                 * @param exn
848
                 *            The exception that caused the failure.
849
                 * @return Whether we've changed anything
850
                 */
851
                private void failed(Exception exn) {
852
                        try (var c = getConnection();
×
853
                                        var failed = c.update(FAILED_BLACKLIST_OP)) {
×
854
                                c.transaction(() -> failed.call(exn, opId));
×
855
                        }
856
                }
×
857

858
                private static final String REPORT_MSG =
859
                                "board was not reachable when trying to access its blacklist: ";
860

861
                /**
862
                 * Access the DB to mark a board as out of service.
863
                 *
864
                 * @param exn
865
                 *            The exception that caused the failure.
866
                 * @return Whether we've changed anything
867
                 */
868
                void takeOutOfService(Exception exn) {
869
                        try (var c = getConnection()) {
×
870
                                c.transaction(() -> {
×
871
                                        addBoardReport(c, boardId, null, REPORT_MSG + exn);
×
872
                                        markBoardAsDead(c, boardId, REPORT_MSG + exn);
×
873
                                });
×
874
                        }
875
                }
×
876

877
                /**
878
                 * Process an action to work with a blacklist or serial number. Runs on
879
                 * a thread that may touch a BMP directly, but which may not touch the
880
                 * database.
881
                 *
882
                 * @param controller
883
                 *            How to actually reach the BMP.
884
                 * @return Whether this action has "succeeded" and shouldn't be retried.
885
                 * @throws InterruptedException
886
                 *             If interrupted.
887
                 */
888
                @Override
889
                boolean tryProcessRequest(SpiNNakerControl controller)
890
                                throws InterruptedException {
891
                        return bmpAction(() -> {
3✔
892
                                switch (op) {
3✔
893
                                case WRITE_BL:
894
                                        writeBlacklist(controller);
3✔
895
                                        break;
3✔
896
                                case READ_BL:
897
                                        readBlacklist(controller);
3✔
898
                                        break;
3✔
899
                                case GET_SERIAL:
900
                                        readSerial(controller);
3✔
901
                                        break;
3✔
902
                                case READ_TEMP:
903
                                        readTemps(controller);
×
904
                                        break;
×
905
                                default:
906
                                        throw new IllegalArgumentException();
×
907
                                }
908
                                epochs.blacklistChanged(boardId);
3✔
909
                                epochs.machineChanged(machineId);
3✔
910
                        }, e -> {
3✔
911
                                failed(e);
×
912
                                epochs.blacklistChanged(boardId);
×
913
                                epochs.machineChanged(machineId);
×
914
                        }, ppe -> {
×
915
                                takeOutOfService(ppe);
×
916
                        });
×
917
                }
918

919
                /**
920
                 * Process an action to read a blacklist.
921
                 *
922
                 * @param controller
923
                 *            How to actually reach the BMP.
924
                 * @throws InterruptedException
925
                 *             If interrupted.
926
                 * @throws IOException
927
                 *             If the network is unhappy.
928
                 * @throws ProcessException
929
                 *             If the BMP rejects a message.
930
                 */
931
                private void readBlacklist(SpiNNakerControl controller)
932
                                throws InterruptedException, ProcessException, IOException {
933
                        readSerial = controller.readSerial(board);
3✔
934
                        if (bmpSerialId != null && !bmpSerialId.equals(readSerial)) {
3✔
935
                                /*
936
                                 * Doesn't match; WARN but keep going; hardware may just be
937
                                 * remapped behind our back.
938
                                 */
939
                                log.warn(
×
940
                                                "blacklist read mismatch: expected serial ID '{}' "
941
                                                                + "not equal to actual serial ID '{}'",
942
                                                bmpSerialId, readSerial);
943
                        }
944
                        var readBlacklist = controller.readBlacklist(board);
3✔
945
                        try (var c = getConnection()) {
3✔
946
                                c.transaction(() -> {
3✔
947
                                        recordSerialIds(c);
3✔
948
                                        doneReadBlacklist(c, readBlacklist);
3✔
949
                                });
3✔
950
                        }
951
                }
3✔
952

953
                /**
954
                 * Process an action to write a blacklist.
955
                 *
956
                 * @param controller
957
                 *            How to actually reach the BMP.
958
                 * @throws InterruptedException
959
                 *             If interrupted.
960
                 * @throws IOException
961
                 *             If the network is unhappy.
962
                 * @throws ProcessException
963
                 *             If the BMP rejects a message.
964
                 * @throws IllegalStateException
965
                 *             If the operation is applied to a board other than the one
966
                 *             that it is expected to apply to.
967
                 */
968
                private void writeBlacklist(SpiNNakerControl controller)
969
                                throws InterruptedException, ProcessException, IOException {
970
                        readSerial = controller.readSerial(board);
3✔
971
                        if (bmpSerialId != null && !bmpSerialId.equals(readSerial)) {
3✔
972
                                // Doesn't match, so REALLY unsafe to keep going!
973
                                throw new IllegalStateException(format(
×
974
                                                "aborting blacklist write: expected serial ID '%s' "
975
                                                                + "not equal to actual serial ID '%s'",
976
                                                bmpSerialId, readSerial));
977
                        }
978
                        controller.writeBlacklist(board, requireNonNull(blacklist));
3✔
979
                        try (var c = getConnection()) {
3✔
980
                                c.transaction(() -> doneWriteBlacklist(c));
3✔
981
                        }
982
                }
3✔
983

984
                /**
985
                 * Process an action to read the serial number from a BMP.
986
                 *
987
                 * @param controller
988
                 *            How to actually reach the BMP.
989
                 * @throws InterruptedException
990
                 *             If interrupted.
991
                 * @throws IOException
992
                 *             If the network is unhappy
993
                 * @throws ProcessException
994
                 *             If the BMP rejects a message.
995
                 */
996
                private void readSerial(SpiNNakerControl controller)
997
                                throws InterruptedException, ProcessException, IOException {
998
                        readSerial = controller.readSerial(board);
3✔
999
                        try (var c = getConnection()) {
3✔
1000
                                c.transaction(() -> {
3✔
1001
                                        recordSerialIds(c);
3✔
1002
                                        doneReadSerial(c);
3✔
1003
                                });
3✔
1004
                        }
1005
                }
3✔
1006

1007
                /**
1008
                 * Process an action to read some temperature data.
1009
                 *
1010
                 * @param controller
1011
                 *            How to actually reach the BMP.
1012
                 * @throws InterruptedException
1013
                 *             If interrupted.
1014
                 * @throws IOException
1015
                 *             If the network is unhappy.
1016
                 * @throws ProcessException
1017
                 *             If the BMP rejects a message.
1018
                 */
1019
                private void readTemps(SpiNNakerControl controller)
1020
                                throws InterruptedException, ProcessException, IOException {
1021
                        var adcInfo = controller.readTemp(board);
×
1022
                        try (var c = getConnection()) {
×
1023
                                c.transaction(() -> doneReadTemps(c, adcInfo));
×
1024
                        }
1025
                }
×
1026

1027
                @Override
1028
                public String toString() {
1029
                        var sb = new StringBuilder("BoardRequest(for ");
×
1030
                        sb.append("bmp=").append(bmp);
×
1031
                        sb.append(",board=").append(boardId);
×
1032
                        sb.append(",op=").append(op);
×
1033
                        return sb.append(")").toString();
×
1034
                }
1035
        }
1036

1037
        private class PowerChange {
1038
                final Integer changeId;
1039

1040
                final int jobId;
1041

1042
                final Integer boardId;
1043

1044
                final Integer boardNum;
1045

1046
                final Instant powerOffTime;
1047

1048
                final boolean power;
1049

1050
                final JobState from;
1051

1052
                final JobState to;
1053

1054
                final List<Direction> offLinks;
1055

1056
                PowerChange(Row row) {
3✔
1057
                        changeId = row.getInteger("change_id");
3✔
1058
                        jobId = row.getInt("job_id");
3✔
1059
                        boardId = row.getInteger("board_id");
3✔
1060
                        boardNum = row.getInteger("board_num");
3✔
1061
                        power = row.getBoolean("power");
3✔
1062
                        from = row.getEnum("from_state", JobState.class);
3✔
1063
                        to = row.getEnum("to_state", JobState.class);
3✔
1064
                        offLinks = List.of(Direction.values()).stream().filter(
3✔
1065
                                        link -> !row.getBoolean(link.columnName)).collect(
3✔
1066
                                                        Collectors.toList());
3✔
1067
                        Instant powerOff = row.getInstant("power_off_timestamp");
3✔
1068
                        if (powerOff == null) {
3✔
1069
                                powerOff = Instant.EPOCH;
3✔
1070
                        }
1071
                        powerOffTime = powerOff;
3✔
1072
                }
3✔
1073

1074
                boolean isSameJob(PowerChange p) {
1075
                        return p.jobId == jobId && p.from == from && p.to == to;
×
1076
                }
1077
        }
1078

1079
        // ----------------------------------------------------------------
1080
        // WORKER IMPLEMENTATION
1081

1082
        /** A worker of a given BMP. */
1083
        private final class Worker implements Runnable {
1084
                /** What are we controlling? */
1085
                private SpiNNakerControl control;
1086

1087
                private final SpallocAPI.Machine machine;
1088

1089
                private final BMPCoords coords;
1090

1091
                private final Map<BMPBoard, String> boards;
1092

1093
                /** Which boards are we looking at? */
1094
                private final int bmpId;
1095

1096
                Worker(SpallocAPI.Machine machine, BMPCoords coords,
1097
                                Map<BMPBoard, String> boards, int bmpId) {
3✔
1098
                        this.machine = machine;
3✔
1099
                        this.coords = coords;
3✔
1100
                        this.boards = boards;
3✔
1101
                        this.bmpId = bmpId;
3✔
1102

1103
                        log.debug("Created worker for boards {}", bmpId);
3✔
1104
                }
3✔
1105

1106
                private SpiNNakerControl getControl() {
1107
                        if (control == null) {
3✔
1108
                                if (useDummyComms) {
3✔
1109
                                        control = new SpiNNakerControlDummy();
3✔
1110
                                } else {
1111
                                        try {
1112
                                                control = controllerFactory.create(machine, coords,
3✔
1113
                                                                boards);
1114
                                        } catch (Exception e) {
×
1115
                                                log.error("Could not create control for BMP '{}'",
×
1116
                                                                bmpId, e);
×
1117
                                        }
3✔
1118
                                }
1119
                        }
1120
                        return control;
3✔
1121
                }
1122

1123
                /**
1124
                 * Periodically call to update, or trigger externally.
1125
                 */
1126
                @Override
1127
                public synchronized void run() {
1128
                        log.trace("Searching for changes on BMP {}", bmpId);
3✔
1129

1130
                        try {
1131
                                var changes = getRequestedOperations();
3✔
1132
                                for (var change : changes) {
3✔
1133
                                        change.processRequest(getControl());
3✔
1134
                                }
3✔
1135
                        } catch (Exception e) {
×
1136
                                log.error("unhandled exception for BMP '{}'", bmpId, e);
×
1137
                        }
3✔
1138
                }
3✔
1139

1140
                private boolean waitedLongEnough(PowerChange change) {
1141
                        // Power off can be done any time
1142
                        if (!change.power) {
3✔
1143
                                return true;
3✔
1144
                        }
1145

1146
                        // Power on should wait until a time after last off
1147
                        Instant powerOnTime = change.powerOffTime.plus(
3✔
1148
                                        props.getOffWaitTime());
3✔
1149
                        return powerOnTime.isBefore(Instant.now());
3✔
1150
                }
1151

1152
                /**
1153
                 * Get the things that we want the worker to do. <em>Be very
1154
                 * careful!</em> Because this necessarily involves the database, this
1155
                 * must not touch the BMP handle as those operations take a long time
1156
                 * and we absolutely must not have a transaction open at the same time.
1157
                 *
1158
                 * @return List of operations to perform.
1159
                 */
1160
                private List<Request> getRequestedOperations() {
1161
                        var requests = new ArrayList<Request>();
3✔
1162
                        try (var c = getConnection();
3✔
1163
                                        var getPowerRequests = c.query(GET_CHANGES);
3✔
1164
                                        var getBlacklistReads = c.query(GET_BLACKLIST_READS);
3✔
1165
                                        var getBlacklistWrites = c.query(GET_BLACKLIST_WRITES);
3✔
1166
                                        var getReadSerialInfos = c.query(GET_SERIAL_INFO_REQS);
3✔
1167
                                        var getReadTemps = c.query(GET_TEMP_INFO_REQS)) {
3✔
1168
                                c.transaction(false, () -> {
3✔
1169
                                        // Batch power requests by job
1170
                                        var powerChanges = new LinkedList<>(
3✔
1171
                                                        getPowerRequests.call(PowerChange::new, bmpId));
3✔
1172
                                        while (!powerChanges.isEmpty()) {
3✔
1173
                                                var change = powerChanges.poll();
3✔
1174
                                                var jobChanges = new ArrayList<>(List.of(change));
3✔
1175
                                                var canDoNow = waitedLongEnough(change);
3✔
1176
                                                while (!powerChanges.isEmpty()
3✔
1177
                                                                && change.isSameJob(powerChanges.peek())) {
×
1178
                                                        canDoNow &= waitedLongEnough(powerChanges.peek());
×
1179
                                                        jobChanges.add(powerChanges.poll());
×
1180
                                                }
1181
                                                if (!jobChanges.isEmpty() && canDoNow) {
3✔
1182
                                                        log.debug("Running job changes {}", jobChanges);
3✔
1183
                                                        requests.add(new PowerRequest(bmpId, change.jobId,
3✔
1184
                                                                        change.from, change.to, jobChanges));
1185
                                                }
1186
                                        }
3✔
1187

1188
                                        // Leave these until quiet
1189
                                        if (requests.isEmpty()) {
3✔
1190
                                                requests.addAll(getBlacklistReads.call(
3✔
1191
                                                                row -> new BoardRequest(bmpId, READ_BL, row),
3✔
1192
                                                                bmpId));
3✔
1193
                                        }
1194
                                        if (requests.isEmpty()) {
3✔
1195
                                                requests.addAll(getBlacklistWrites.call(
3✔
1196
                                                                row -> new BoardRequest(bmpId, WRITE_BL, row),
3✔
1197
                                                                bmpId));
3✔
1198
                                                requests.addAll(getReadSerialInfos.call(
3✔
1199
                                                                row -> new BoardRequest(bmpId, GET_SERIAL, row),
3✔
1200
                                                                bmpId));
3✔
1201
                                                requests.addAll(getReadTemps.call(
3✔
1202
                                                                row -> new BoardRequest(bmpId, READ_TEMP, row),
×
1203
                                                                bmpId));
3✔
1204
                                        }
1205
                                });
3✔
1206
                        } catch (Exception e) {
×
1207
                                log.error("unhandled exception for BMP '{}'", bmpId, e);
×
1208
                        }
3✔
1209
                        return requests;
3✔
1210
                }
1211
        }
1212

1213
        /**
1214
         * The testing interface.
1215
         *
1216
         * @hidden
1217
         */
1218
        @ForTestingOnly
1219
        public interface TestAPI {
1220
                /**
1221
                 * Ensure things are set up after a database change that updates the
1222
                 * BMPs in the system.
1223
                 *
1224
                 * @param useDummyComms Whether to use dummy communications in the test
1225
                 */
1226
                void prepare(boolean useDummyComms);
1227

1228
                /**
1229
                 * Reset the transceivers stored in the workers after installing a new
1230
                 * transceiver.
1231
                 */
1232
                void resetTransceivers();
1233

1234
                /**
1235
                 * The core of the scheduler.
1236
                 *
1237
                 * @param millis
1238
                 *            How many milliseconds to sleep before doing a rerun of the
1239
                 *            scheduler. If zero (or less), only one run will be done.
1240
                 * @param bmps
1241
                 *            The BMPs to be updated.
1242
                 * @throws IOException
1243
                 *             If talking to the network fails
1244
                 * @throws SpinnmanException
1245
                 *             If a BMP sends an error back
1246
                 * @throws InterruptedException
1247
                 *             If the wait for workers to spawn fails.
1248
                 */
1249
                void processRequests(long millis, Collection<Integer> bmps)
1250
                                throws IOException, SpinnmanException, InterruptedException;
1251

1252
                /**
1253
                 * The core of the scheduler. Will process for all known BMPs.
1254
                 *
1255
                 * @param millis
1256
                 *            How many milliseconds to sleep before doing a rerun of the
1257
                 *            scheduler. If zero (or less), only one run will be done.
1258
                 * @throws IOException
1259
                 *             If talking to the network fails
1260
                 * @throws SpinnmanException
1261
                 *             If a BMP sends an error back
1262
                 * @throws InterruptedException
1263
                 *             If the wait for workers to spawn fails.
1264
                 */
1265
                void processRequests(long millis)
1266
                                throws IOException, SpinnmanException, InterruptedException;
1267

1268
                /**
1269
                 * Get the last BMP exception.
1270
                 *
1271
                 * @return The exception.
1272
                 */
1273
                Throwable getBmpException();
1274

1275
                /**
1276
                 * Clear the last BMP exception.
1277
                 */
1278
                void clearBmpException();
1279

1280
                /**
1281
                 * Resume after emergency stop.
1282
                 */
1283
                void emergencyResume();
1284
        }
1285

1286
        /**
1287
         * @return The test interface.
1288
         * @deprecated This interface is just for testing.
1289
         * @hidden
1290
         */
1291
        @ForTestingOnly
1292
        @RestrictedApi(explanation = "just for testing", link = "index.html",
1293
                        allowedOnPath = ".*/src/test/java/.*")
1294
        @Deprecated
1295
        public final TestAPI getTestAPI() {
1296
                ForTestingOnly.Utils.checkForTestClassOnStack();
3✔
1297
                return new TestAPI() {
3✔
1298
                        @Override
1299
                        public void prepare(boolean useDummyCommsParam) {
1300
                                useDummyComms = useDummyCommsParam;
3✔
1301
                                makeWorkers();
3✔
1302
                        }
3✔
1303

1304
                        @Override
1305
                        public void resetTransceivers() {
1306
                                for (var worker : workers.values()) {
3✔
1307
                                        worker.control = null;
3✔
1308
                                }
3✔
1309
                        }
3✔
1310

1311
                        @Override
1312
                        public void processRequests(long millis, Collection<Integer> bmps)
1313
                                        throws IOException, SpinnmanException,
1314
                                        InterruptedException {
1315
                                /*
1316
                                 * Runs twice because it takes two cycles to fully process a
1317
                                 * request.
1318
                                 */
1319
                                triggerSearch(bmps);
3✔
1320
                                if (millis > 0) {
3✔
1321
                                        Thread.sleep(millis);
3✔
1322
                                        triggerSearch(bmps);
3✔
1323
                                }
1324
                        }
3✔
1325

1326
                        @Override
1327
                        public void processRequests(long millis) throws IOException,
1328
                                        SpinnmanException, InterruptedException {
1329
                                processRequests(millis, workers.keySet());
3✔
1330
                        }
3✔
1331

1332
                        @Override
1333
                        public Throwable getBmpException() {
1334
                                synchronized (BMPController.this) {
3✔
1335
                                        return bmpProcessingException;
3✔
1336
                                }
1337
                        }
1338

1339
                        @Override
1340
                        public void clearBmpException() {
1341
                                synchronized (BMPController.this) {
3✔
1342
                                        bmpProcessingException = null;
3✔
1343
                                }
3✔
1344
                        }
3✔
1345

1346
                        @Override
1347
                        public void emergencyResume() {
1348
                                synchronized (guard) {
3✔
1349
                                        emergencyStop = false;
3✔
1350
                                        workers.clear();
3✔
1351
                                }
3✔
1352
                                init();
3✔
1353
                        }
3✔
1354
                };
1355
        }
1356
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc