• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SpiNNakerManchester / JavaSpiNNaker / 15113742418

19 May 2025 01:03PM UTC coverage: 37.528% (-0.8%) from 38.278%
15113742418

Pull #1227

github

rowleya
Merge branch 'master' into emergency_stop
Pull Request #1227: Emergency stop

114 of 152 new or added lines in 10 files covered. (75.0%)

243 existing lines in 6 files now uncovered.

9062 of 24147 relevant lines covered (37.53%)

1.12 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.11
/SpiNNaker-allocserv/src/main/java/uk/ac/manchester/spinnaker/alloc/bmp/BMPController.java
1
/*
2
 * Copyright (c) 2021 The University of Manchester
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *     https://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
package uk.ac.manchester.spinnaker.alloc.bmp;
17

18
import static java.lang.String.format;
19
import static java.lang.Thread.currentThread;
20
import static java.lang.Thread.sleep;
21
import static java.util.Objects.requireNonNull;
22
import static org.slf4j.LoggerFactory.getLogger;
23
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.GET_SERIAL;
24
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.READ_BL;
25
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.READ_TEMP;
26
import static uk.ac.manchester.spinnaker.alloc.bmp.NonBootOperation.WRITE_BL;
27
import static uk.ac.manchester.spinnaker.alloc.model.JobState.DESTROYED;
28
import static uk.ac.manchester.spinnaker.alloc.model.JobState.QUEUED;
29

30
import java.io.IOException;
31
import java.lang.Thread.UncaughtExceptionHandler;
32
import java.time.Instant;
33
import java.util.ArrayList;
34
import java.util.Collection;
35
import java.util.HashMap;
36
import java.util.LinkedList;
37
import java.util.List;
38
import java.util.Map;
39
import java.util.Optional;
40
import java.util.function.Consumer;
41
import java.util.stream.Collectors;
42

43
import javax.annotation.PostConstruct;
44

45
import org.slf4j.Logger;
46
import org.springframework.beans.factory.ObjectProvider;
47
import org.springframework.beans.factory.annotation.Autowired;
48
import org.springframework.jmx.export.annotation.ManagedResource;
49
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
50
import org.springframework.stereotype.Service;
51

52
import com.google.errorprone.annotations.RestrictedApi;
53
import com.google.errorprone.annotations.concurrent.GuardedBy;
54

55
import uk.ac.manchester.spinnaker.alloc.ForTestingOnly;
56
import uk.ac.manchester.spinnaker.alloc.ServiceMasterControl;
57
import uk.ac.manchester.spinnaker.alloc.SpallocProperties.AllocatorProperties;
58
import uk.ac.manchester.spinnaker.alloc.SpallocProperties.TxrxProperties;
59
import uk.ac.manchester.spinnaker.alloc.admin.ReportMailSender;
60
import uk.ac.manchester.spinnaker.alloc.allocator.AllocatorTask;
61
import uk.ac.manchester.spinnaker.alloc.allocator.Epochs;
62
import uk.ac.manchester.spinnaker.alloc.allocator.SpallocAPI;
63
import uk.ac.manchester.spinnaker.alloc.db.DatabaseAPI.Connection;
64
import uk.ac.manchester.spinnaker.alloc.db.DatabaseAwareBean;
65
import uk.ac.manchester.spinnaker.alloc.db.Row;
66
import uk.ac.manchester.spinnaker.alloc.model.Direction;
67
import uk.ac.manchester.spinnaker.alloc.model.JobState;
68
import uk.ac.manchester.spinnaker.machine.board.BMPBoard;
69
import uk.ac.manchester.spinnaker.machine.board.BMPCoords;
70
import uk.ac.manchester.spinnaker.machine.board.HasBMPLocation;
71
import uk.ac.manchester.spinnaker.messages.model.ADCInfo;
72
import uk.ac.manchester.spinnaker.messages.model.Blacklist;
73
import uk.ac.manchester.spinnaker.transceiver.ProcessException;
74
import uk.ac.manchester.spinnaker.transceiver.ProcessException.CallerProcessException;
75
import uk.ac.manchester.spinnaker.transceiver.ProcessException.PermanentProcessException;
76
import uk.ac.manchester.spinnaker.transceiver.ProcessException.TransientProcessException;
77
import uk.ac.manchester.spinnaker.transceiver.SpinnmanException;
78
import uk.ac.manchester.spinnaker.utils.UsedInJavadocOnly;
79

80
/**
81
 * Manages the BMPs of machines controlled by Spalloc.
82
 *
83
 * @author Donal Fellows
84
 */
85
@Service("bmpController")
86
@ManagedResource("Spalloc:type=BMPController,name=bmpController")
87
public class BMPController extends DatabaseAwareBean {
3✔
88
        private static final Logger log = getLogger(BMPController.class);
3✔
89

90
        @Autowired
91
        private SpallocAPI spallocCore;
92

93
        @Autowired
94
        private ServiceMasterControl serviceControl;
95

96
        @Autowired
97
        private Epochs epochs;
98

99
        @Autowired
100
        private TxrxProperties props;
101

102
        @Autowired
103
        private PhysicalSerialMapping phySerMap;
104

105
        @Autowired
106
        private AllocatorProperties allocProps;
107

108
        @Autowired
109
        private ReportMailSender emailSender;
110

111
        @Autowired
112
        private AllocatorTask allocator;
113

114
        private Object guard = new Object();
3✔
115

116
        @GuardedBy("guard")
117
        private ThreadPoolTaskScheduler scheduler;
118

119
        @GuardedBy("guard")
3✔
120
        private boolean emergencyStop = false;
121

122
        /**
123
         * Synchronizer for power request access to the database (as otherwise
124
         * deadlocks can occur when multiple transactions try to update the boards
125
         * table).
126
         */
127
        private Object powerDBSync = new Object();
3✔
128

129
        /**
130
         * Map from BMP ID to worker task that handles it.
131
         */
132
        private final Map<Integer, Worker> workers = new HashMap<>();
3✔
133

134
        /**
135
         * Factory for {@linkplain SpiNNakerControl controllers}. Only use via
136
         * {@link #controllerFactory}.
137
         */
138
        @Autowired
139
        private ObjectProvider<SpiNNakerControl> controllerFactoryBean;
140

141
        /**
142
         * Type-safe factory for {@linkplain SpiNNakerControl controllers}.
143
         */
144
        private SpiNNakerControl.Factory controllerFactory;
145

146
        @GuardedBy("this")
147
        private Throwable bmpProcessingException;
148

149
        private boolean useDummyComms = false;
3✔
150

151
        /**
152
         * An {@link UncaughtExceptionHandler}.
153
         *
154
         * @param thread
155
         *            The thread with the problem.
156
         * @param exception
157
         *            The exception that describes the problem.
158
         */
159
        @UsedInJavadocOnly(UncaughtExceptionHandler.class)
160
        private void handleException(Thread thread, Throwable exception) {
161
                log.error("uncaught exception in BMP worker {}", thread, exception);
×
162
        }
×
163

164
        // ----------------------------------------------------------------
165

166
        @PostConstruct
167
        private void init() {
168
                useDummyComms = serviceControl.isUseDummyBMP();
3✔
169
                synchronized (guard) {
3✔
170
                        // Set up scheduler
171
                        scheduler = new ThreadPoolTaskScheduler();
3✔
172
                        scheduler.setThreadGroupName("BMP");
3✔
173

174
                        controllerFactory = controllerFactoryBean::getObject;
3✔
175
                        allocator.setBMPController(this);
3✔
176

177
                        // Set the pool size to match the number of workers
178
                        makeWorkers();
3✔
179
                        if (workers.size() > 1) {
3✔
NEW
180
                                scheduler.setPoolSize(workers.size());
×
181
                        }
182

183
                        // Launch the scheduler now it is all set up
184
                        scheduler.initialize();
3✔
185

186
                        // And now use the scheduler
187
                        for (var worker : workers.values()) {
3✔
188
                                scheduler.scheduleAtFixedRate(worker,
3✔
189
                                                allocProps.getPeriod());
3✔
190
                        }
3✔
191
                }
3✔
192
        }
3✔
193

194
        private List<Worker> makeWorkers() {
195
                // Make workers
196
                try (var c = getConnection();
3✔
197
                                var getBmps = c.query(GET_ALL_BMPS);
3✔
198
                                var getBoards = c.query(GET_ALL_BMP_BOARDS)) {
3✔
199
                        return c.transaction(false, () -> getBmps.call(row -> {
3✔
200
                                var m = spallocCore.getMachine(row.getString("machine_name"),
3✔
201
                                                true);
202
                                var coords = new BMPCoords(row.getInt("cabinet"),
3✔
203
                                                row.getInt("frame"));
3✔
204
                                var boards = new HashMap<BMPBoard, String>();
3✔
205
                                var bmpId = row.getInt("bmp_id");
3✔
206
                                getBoards.call(r -> {
3✔
207
                                        boards.put(new BMPBoard(r.getInt("board_num")),
3✔
208
                                                        r.getString("address"));
3✔
209
                                        return null;
3✔
210
                                }, bmpId);
3✔
211
                                var worker = new Worker(m.get(), coords, boards, bmpId);
3✔
212
                                workers.put(row.getInt("bmp_id"), worker);
3✔
213
                                return worker;
3✔
214
                        }));
215
                }
216
        }
217

218
        /**
219
         * Trigger the execution of the workers for the given BMPs now.
220
         *
221
         * @param bmps
222
         *            A list of BMPs that have changed.
223
         */
224
        public void triggerSearch(Collection<Integer> bmps) {
225
                synchronized (guard) {
3✔
226
                        if (emergencyStop) {
3✔
NEW
227
                                log.warn("Emergency stop; not triggering workers");
×
NEW
228
                                return;
×
229
                        }
230
                        for (var b : bmps) {
3✔
231
                                var worker = workers.get(b);
3✔
232
                                if (worker != null) {
3✔
233
                                        scheduler.schedule(() -> worker.run(), Instant.now());
3✔
234
                                } else {
NEW
235
                                        log.error("Could not find worker for BMP {}", b);
×
236
                                }
237
                        }
3✔
238
                }
3✔
239
        }
3✔
240

241
        /**
242
         * Stops execution immediately.
243
         */
244
        public void emergencyStop() {
245
                synchronized (guard) {
3✔
246
                        emergencyStop = true;
3✔
247
                        scheduler.shutdown();
3✔
248
                        for (var worker : workers.values()) {
3✔
249
                                try {
250
                                        worker.getControl().powerOff(worker.boards.keySet());
3✔
NEW
251
                                } catch (ProcessException | InterruptedException
×
252
                                                | IOException e) {
NEW
253
                                        log.warn("Error when stopping", e);
×
254
                                }
3✔
255
                        }
3✔
256
                        execute(conn -> {
3✔
257
                                try (var setAllOff = conn.update(SET_ALL_BOARDS_OFF)) {
3✔
258
                                        setAllOff.call();
3✔
259
                                }
260
                                return null;
3✔
261
                        });
262
                }
3✔
263
        }
3✔
264

265
        /** An action that may throw any of a range of exceptions. */
266
        private interface ThrowingAction {
267
                void act() throws ProcessException, IOException, InterruptedException;
268
        }
269

270
        private abstract class Request {
271
                final int bmpId;
272

273
                private int numTries = 0;
3✔
274

275
                Request(int bmpId) {
3✔
276
                        this.bmpId = bmpId;
3✔
277
                }
3✔
278

279
                /**
280
                 * @return Whether this request may be repeated.
281
                 */
282
                boolean isRepeat() {
283
                        return numTries < props.getPowerAttempts();
3✔
284
                }
285

286
                /**
287
                 * Basic machinery for handling exceptions that arise while performing a
288
                 * BMP action. Runs on a thread that may touch a BMP directly, but which
289
                 * may not touch the database.
290
                 * <p>
291
                 * Only subclasses should use this!
292
                 *
293
                 * @param body
294
                 *            What to attempt.
295
                 * @param onFailure
296
                 *            What to do on failure.
297
                 * @param onServiceRemove
298
                 *            If the exception looks serious, call this to trigger a
299
                 *            board being taken out of service.
300
                 * @return Whether to stop the retry loop.
301
                 * @throws InterruptedException
302
                 *             If interrupted.
303
                 */
304
                final boolean bmpAction(ThrowingAction body,
305
                                Consumer<Exception> onFailure,
306
                                Consumer<PermanentProcessException> onServiceRemove)
307
                                throws InterruptedException {
308
                        boolean isLastTry = numTries++ >= props.getPowerAttempts();
3✔
309
                        Exception exn;
310
                        try {
311
                                body.act();
3✔
312
                                // Exit the retry loop (up the stack); the requests all worked
313
                                return true;
3✔
314
                        } catch (InterruptedException e) {
×
315
                                /*
316
                                 * We were interrupted! This happens when we're shutting down.
317
                                 * Log (because we're in an inconsistent state) and rethrow so
318
                                 * that the outside gets to clean up.
319
                                 */
320
                                log.error("Requests failed on BMP {} because of "
×
321
                                                + "interruption", bmpId, e);
×
322
                                currentThread().interrupt();
×
323
                                throw e;
×
324
                        } catch (TransientProcessException e) {
×
325
                                if (!isLastTry) {
×
326
                                        // Log somewhat gently; we *might* be able to recover...
327
                                        log.warn("Retrying requests on BMP {} after {}: {}",
×
328
                                                        bmpId, props.getProbeInterval(),
×
329
                                                        e.getMessage());
×
330
                                        // Ask for a retry
331
                                        return false;
×
332
                                }
333
                                exn = e;
×
334
                                log.error("Requests failed on BMP {}", bmpId, e);
×
335
                        } catch (PermanentProcessException e) {
×
336
                                log.error("BMP {} on {} is unreachable", e.source, bmpId, e);
×
337
                                onServiceRemove.accept(e);
×
338
                                exn = e;
×
339
                        } catch (CallerProcessException e) {
×
340
                                // This is probably a software bug
341
                                log.error("SW bug talking to BMP {}", bmpId, e);
×
342
                                exn = e;
×
343
                        } catch (ProcessException | IOException | RuntimeException e) {
3✔
344
                                log.error("Requests failed on BMP {}", bmpId, e);
3✔
345
                                exn = e;
3✔
346
                        }
×
347
                        /*
348
                         * Common permanent failure handling case; arrange for taking a
349
                         * board out of service, mark a request as failed, and stop the
350
                         * retry loop.
351
                         */
352
                        onFailure.accept(exn);
3✔
353
                        return true;
3✔
354
                }
355

356
                /**
357
                 * Add a report to the database of a problem with a board.
358
                 *
359
                 * @param sql
360
                 *            How to talk to the DB
361
                 * @param boardId
362
                 *            Which board has the problem
363
                 * @param jobId
364
                 *            What job was associated with the problem (if any)
365
                 * @param msg
366
                 *            Information about what the problem was
367
                 */
368
                final void addBoardReport(Connection c, int boardId, Integer jobId,
369
                                String msg) {
370
                        try (var getUser = c.query(GET_USER_DETAILS_BY_NAME);
×
371
                                        var insertBoardReport = c.update(INSERT_BOARD_REPORT)) {
×
372
                                getUser.call1(row -> row.getInt("user_id"),
×
373
                                                allocProps.getSystemReportUser()).ifPresent(
×
374
                                                                userId -> insertBoardReport.call(
×
375
                                                                                boardId, jobId,        msg, userId));
×
376
                        }
377
                }
×
378

379
                /**
380
                 * Marks a board as actually dead, and requests we send email about it.
381
                 *
382
                 * @param sql
383
                 *            How to talk to the DB
384
                 * @param boardId
385
                 *            Which board has the problem
386
                 * @param msg
387
                 *            Information about what the problem was
388
                 * @return Whether we've successfully done a change.
389
                 */
390
                final void markBoardAsDead(Connection c, int boardId, String msg) {
391
                        try (var setFunctioning = c.update(SET_FUNCTIONING_FIELD);
×
392
                                        var findBoardById = c.query(FIND_BOARD_BY_ID)) {
×
393
                                boolean result = setFunctioning.call(false, boardId) > 0;
×
394
                                if (result) {
×
395
                                        findBoardById.call1(row -> {
×
396
                                                var ser = row.getString("physical_serial_id");
×
397
                                                if (ser == null) {
×
398
                                                        ser = "<UNKNOWN>";
×
399
                                                }
400
                                                var fullMessage = format(
×
401
                                                                "Marked board at %d,%d,%d of %s (serial: %s) "
402
                                                                                + "as dead: %s",
403
                                                                row.getInt("x"), row.getInt("y"),
×
404
                                                                row.getInt("z"), row.getString("machine_name"),
×
405
                                                                ser, msg);
406
                                                emailSender.sendServiceMail(fullMessage);
×
407
                                                return null;
×
408
                                        }, boardId);
×
409
                                }
410
                        }
411
                }
×
412

413
                boolean processRequest(SpiNNakerControl control) {
414
                        while (isRepeat()) {
3✔
415
                                try {
416
                                        if (tryProcessRequest(control)) {
3✔
417
                                                return true;
3✔
418
                                        }
419
                                        sleep(props.getProbeInterval().toMillis());
×
420
                                } catch (InterruptedException e) {
×
421
                                        // If this happens, just cancel the transaction;
422
                                        // when we come back, all things will be redone.
423
                                        throw new RuntimeException(e);
×
424
                                }
×
425
                        }
426
                        return false;
×
427
                }
428

429
                abstract boolean tryProcessRequest(SpiNNakerControl control)
430
                                throws InterruptedException;
431
        }
432

433
        /**
434
         * Describes a request to modify the power status of a collection of boards.
435
         * The boards must be on a single machine and must all be assigned to a
436
         * single job.
437
         * <p>
438
         * This is the message that is sent from the main thread to the per-BMP
439
         * worker threads.
440
         *
441
         * @author Donal Fellows
442
         */
443
        private final class PowerRequest extends Request {
444
                private final List<BMPBoard> powerOnBoards = new ArrayList<>();
3✔
445

446
                private final List<BMPBoard> powerOffBoards = new ArrayList<>();
3✔
447

448
                private final List<Link> linkRequests = new ArrayList<>();
3✔
449

450
                private final int jobId;
451

452
                private final JobState from;
453

454
                private final JobState to;
455

456
                private final List<Integer> changeIds = new ArrayList<>();
3✔
457

458
                private final Map<Integer, Integer> boardToId = new HashMap<>();
3✔
459

460
                /**
461
                 * Create a request.
462
                 *
463
                 * @param sql
464
                 *            How to access the database.
465
                 * @param machine
466
                 *            What machine are the boards on? <em>Must not</em> be
467
                 *            {@code null}.
468
                 * @param powerOn
469
                 *            What boards (by DB ID) are to be powered on? May be
470
                 *            {@code null}; that's equivalent to the empty list.
471
                 * @param powerOff
472
                 *            What boards (by DB ID) are to be powered off? May be
473
                 *            {@code null}; that's equivalent to the empty list.
474
                 * @param links
475
                 *            Any link power control requests. By default, links are on
476
                 *            if their board is on and they are connected; it is
477
                 *            <em>useful and relevant</em> to modify the power state of
478
                 *            links on the periphery of an allocation. May be
479
                 *            {@code null}; that's equivalent to the empty list.
480
                 * @param jobId
481
                 *            For what job is this?
482
                 * @param from
483
                 *            What state is the job moving from?
484
                 * @param to
485
                 *            What state is the job moving to?
486
                 * @param changeIds
487
                 *            The DB ids that describe the change, so we can update
488
                 *            those records.
489
                 * @param idToBoard
490
                 *            How to get the physical ID of a board from its database ID
491
                 */
492
                PowerRequest(int bmpId, int jobId, JobState from, JobState to,
493
                                List<PowerChange> powerChanges) {
3✔
494
                        super(bmpId);
3✔
495
                        for (var change : powerChanges) {
3✔
496
                                if (change.power) {
3✔
497
                                        powerOnBoards.add(new BMPBoard(change.boardNum));
3✔
498
                                } else {
499
                                        powerOffBoards.add(new BMPBoard(change.boardNum));
3✔
500
                                }
501
                                change.offLinks.stream().forEach(link ->
3✔
502
                                                linkRequests.add(new Link(change.boardNum, link)));
3✔
503
                                changeIds.add(change.changeId);
3✔
504
                                boardToId.put(change.boardNum, change.boardId);
3✔
505
                        }
3✔
506
                        this.jobId = jobId;
3✔
507
                        this.from = from;
3✔
508
                        this.to = to;
3✔
509
                }
3✔
510

511
                /**
512
                 * Change the power state of boards in this request.
513
                 *
514
                 * @param controllers
515
                 *            How to actually communicate with the machine
516
                 * @throws ProcessException
517
                 *             If the transceiver chokes
518
                 * @throws InterruptedException
519
                 *             If interrupted
520
                 * @throws IOException
521
                 *             If network I/O fails
522
                 */
523
                void changeBoardPowerState(SpiNNakerControl controller)
524
                                throws ProcessException, InterruptedException, IOException {
525

526
                        // Send any power on commands
527
                        if (!powerOnBoards.isEmpty()) {
3✔
528
                                controller.powerOnAndCheck(powerOnBoards);
3✔
529
                        }
530

531
                        // Process perimeter link requests next
532
                        for (var linkReq : linkRequests) {
3✔
533
                                // Set the link state, as required
534
                                controller.setLinkOff(linkReq);
3✔
535
                        }
3✔
536

537
                        // Finally send any power off commands
538
                        if (!powerOffBoards.isEmpty()) {
3✔
539
                                controller.powerOff(powerOffBoards);
3✔
540
                        }
541
                }
3✔
542

543
                /**
544
                 * Handles the database changes after a set of changes to a BMP complete
545
                 * successfully. We will move the job to the state it supposed to be in.
546
                 *
547
                 * @param sql
548
                 *            How to access the DB
549
                 * @return Whether the state of boards or jobs has changed.
550
                 */
551
                private void done() {
552
                        try (var c = getConnection();
3✔
553
                                        var deallocateBoards = c.update(DEALLOCATE_BMP_BOARDS_JOB);
3✔
554
                                        var deleteChange = c.update(FINISHED_PENDING);
3✔
555
                                        var setBoardPowerOn = c.update(SET_BOARD_POWER_ON);
3✔
556
                                        var setBoardPowerOff = c.update(SET_BOARD_POWER_OFF)) {
3✔
557
                                c.transaction(() -> {
3✔
558
                                        int turnedOn = powerOnBoards.stream().map(this::getBoardId)
3✔
559
                                                        .mapToInt(setBoardPowerOn::call).sum();
3✔
560
                                        int turnedOff =
3✔
561
                                                        powerOffBoards.stream().map(this::getBoardId)
3✔
562
                                                                        .mapToInt(setBoardPowerOff::call).sum();
3✔
563

564
                                        if (to == DESTROYED || to == QUEUED) {
3✔
565
                                                /*
566
                                                 * Need to mark the boards as not allocated; can't do
567
                                                 * that until they've been switched off.
568
                                                 */
569
                                                deallocateBoards.call(jobId, bmpId);
3✔
570
                                        }
571
                                        int completed = changeIds.stream().mapToInt(
3✔
572
                                                        deleteChange::call).sum();
3✔
573

574
                                        log.debug("BMP ACTION SUCCEEDED ({}:{}->{}): on:{} off:{} "
3✔
575
                                                        + "completed: {}",
576
                                                        jobId, from, to, turnedOn, turnedOff, completed);
3✔
577
                                });
3✔
578
                        }
579

580
                        // Tell the allocator something has happened
581
                        allocator.updateJob(jobId, from, to);
3✔
582
                }
3✔
583

584
                /**
585
                 * Handles the database changes after a set of changes to a BMP complete
586
                 * with a failure. We will roll back the job state to what it was
587
                 * before.
588
                 *
589
                 * @param sql
590
                 *            How to access the DB
591
                 * @return Whether the state of boards or jobs has changed.
592
                 */
593
                private void failed() {
594
                        try (var c = getConnection();
3✔
595
                                        var deallocateBoards = c.update(DEALLOCATE_BMP_BOARDS_JOB);
3✔
596
                                        var deleteChange = c.update(FINISHED_PENDING);
3✔
597
                                        var errorChange = c.update(ERROR_PENDING);
3✔
598
                                        var setBoardPowerOff = c.update(SET_BOARD_POWER_OFF)) {
3✔
599
                                c.transaction(() -> {
3✔
600
                                        // We should mark the boards as off
601
                                        int turnedOff =
3✔
602
                                                        powerOffBoards.stream().map(this::getBoardId)
3✔
603
                                                                        .mapToInt(setBoardPowerOff::call).sum();
3✔
604

605
                                        // ... even those that we should be powering on ...
606
                                        turnedOff +=
3✔
607
                                                        powerOnBoards.stream().map(this::getBoardId)
3✔
608
                                                                        .mapToInt(setBoardPowerOff::call).sum();
3✔
609

610
                                        // If we are going to queued or destroyed, we can just
611
                                        // ignore the error as we will reallocate anyway
612
                                        int completed = 0;
3✔
613
                                        if (to == DESTROYED || to == QUEUED) {
3✔
614
                                                // Need to mark the boards as not allocated; slightly
615
                                                // dodgy since they might still be on, but not a lot
616
                                                // we can do about it!
617
                                                deallocateBoards.call(jobId, bmpId);
3✔
618
                                                completed = changeIds.stream().mapToInt(
3✔
619
                                                                deleteChange::call).sum();
3✔
620
                                        } else {
621

622
                                                // If we are going to READY, we must mark changes as
623
                                                // failed to make sure we don't think we are done!
624
                                                completed = changeIds.stream().mapToInt(
3✔
625
                                                                errorChange::call).sum();
3✔
626
                                        }
627

628
                                        log.debug(
3✔
629
                                                        "BMP ACTION FAILED on {} ({}:{}->{}) off:{} "
630
                                                        + " completed {}",
631
                                                        bmpId, jobId, from, to, turnedOff, completed);
3✔
632
                                });
3✔
633
                        }
634
                        // Tell the allocator something has happened
635
                        allocator.updateJob(jobId, from, to);
3✔
636
                }
3✔
637

638
                /**
639
                 * Process an action to power on or off a set of boards. Runs on a
640
                 * thread that may touch a BMP directly, but which may not touch the
641
                 * database.
642
                 *
643
                 * @param controller
644
                 *            How to actually reach the BMPs.
645
                 * @return Whether this action has "succeeded" and shouldn't be retried.
646
                 * @throws InterruptedException
647
                 *             If interrupted.
648
                 */
649
                @Override
650
                boolean tryProcessRequest(SpiNNakerControl controller)
651
                                throws InterruptedException {
652
                        boolean ok = bmpAction(() -> {
3✔
653
                                changeBoardPowerState(controller);
3✔
654
                                // We want to ensure the lead board is alive
655
                                controller.ping(powerOnBoards);
3✔
656
                                synchronized (powerDBSync) {
3✔
657
                                        done();
3✔
658
                                }
3✔
659
                        }, e -> {
3✔
660
                                synchronized (powerDBSync) {
3✔
661
                                        failed();
3✔
662
                                }
3✔
663
                                synchronized (BMPController.this) {
3✔
664
                                        bmpProcessingException = e;
3✔
665
                                }
3✔
666
                        }, ppe -> {
3✔
667
                                synchronized (powerDBSync) {
×
668
                                        badBoard(ppe);
×
669
                                }
×
670
                        });
×
671
                        return ok;
3✔
672
                }
673

674
                @Override
675
                public String toString() {
676
                        var sb = new StringBuilder("PowerRequest(for=")
×
677
                                        .append(bmpId);
×
678
                        sb.append(";on=").append(powerOnBoards);
×
679
                        sb.append(",off=").append(powerOffBoards);
×
680
                        sb.append(",links=").append(linkRequests);
×
681
                        return sb.append(")").toString();
×
682
                }
683

684
                private static final String REPORT_MSG =
685
                                "board was not reachable when trying to power it: ";
686

687
                /**
688
                 * When a BMP is unroutable, we must tell the alloc engine to pick
689
                 * somewhere else, and we should mark the board as out of service too;
690
                 * it's never going to work so taking it out right away is the only sane
691
                 * plan. We also need to nuke the planned changes. Retrying is bad.
692
                 *
693
                 * @param failure
694
                 *            The failure message.
695
                 * @return Whether the state of boards or jobs has changed.
696
                 */
697
                private void badBoard(ProcessException failure) {
698
                        try (var c = getConnection()) {
×
699
                                c.transaction(() -> {
×
700
                                        getBoardId(failure.source).ifPresent(boardId -> {
×
701
                                                // Mark the board as dead right now
702
                                                markBoardAsDead(c, boardId, REPORT_MSG + failure);
×
703
                                                // Add a report if we can
704
                                                addBoardReport(c, boardId, jobId, REPORT_MSG + failure);
×
705
                                        });
×
706
                                });
×
707
                        }
708
                }
×
709

710
                /**
711
                 * Given a board address, get the ID that it corresponds to. Reverses
712
                 * {@link #idToBoard}.
713
                 *
714
                 * @param addr
715
                 *            The board address.
716
                 * @return The ID, if one can be found.
717
                 */
718
                private Optional<Integer> getBoardId(HasBMPLocation addr) {
719
                        return Optional.ofNullable(boardToId.get(addr.getBoard()));
×
720
                }
721

722
                private Integer getBoardId(BMPBoard board) {
723
                        return boardToId.get(board.board);
3✔
724
                }
725
        }
726

727
        /**
728
         * A request to read or write information on a BMP. Includes blacklists,
729
         * serial numbers, temperature data, etc.
730
         *
731
         * @author Donal Fellows
732
         */
733
        private final class BoardRequest extends Request {
734
                private final NonBootOperation op;
735

736
                private final int opId;
737

738
                private final int boardId;
739

740
                private final BMPCoords bmp;
741

742
                private final BMPBoard board;
743

744
                private final String bmpSerialId;
745

746
                private final Blacklist blacklist;
747

748
                private final int machineId;
749

750
                private BoardRequest(int bmpId, NonBootOperation op, Row row) {
3✔
751
                        super(bmpId);
3✔
752
                        this.op = op;
3✔
753
                        opId = row.getInt("op_id");
3✔
754
                        boardId = row.getInt("board_id");
3✔
755
                        bmp = new BMPCoords(row.getInt("cabinet"), row.getInt("frame"));
3✔
756
                        board = new BMPBoard(row.getInt("board_num"));
3✔
757
                        if (op == WRITE_BL) {
3✔
758
                                blacklist = row.getSerial("data", Blacklist.class);
3✔
759
                        } else {
760
                                blacklist = null;
3✔
761
                        }
762
                        bmpSerialId = row.getString("bmp_serial_id");
3✔
763
                        machineId = row.getInt("machine_id");
3✔
764
                }
3✔
765

766
                /** The serial number actually read from the board. */
767
                private String readSerial;
768

769
                /**
770
                 * Access the DB to store the serial number information that we
771
                 * retrieved. A transaction should already be held.
772
                 *
773
                 * @param c
774
                 *            How to access the DB
775
                 * @return Whether we've changed anything
776
                 */
777
                private void recordSerialIds(Connection c) {
778
                        try (var setBoardSerialIds = c.update(SET_BOARD_SERIAL_IDS)) {
3✔
779
                                setBoardSerialIds.call(boardId, readSerial,
3✔
780
                                                phySerMap.getPhysicalId(readSerial));
3✔
781
                        }
782
                }
3✔
783

784
                /**
785
                 * Access the DB to mark the read request as successful and store the
786
                 * blacklist that was read. A transaction should already be held.
787
                 *
788
                 * @param c
789
                 *            How to access the DB
790
                 * @param readBlacklist
791
                 *            The blacklist that was read
792
                 * @return Whether we've changed anything
793
                 */
794
                private void doneReadBlacklist(Connection c, Blacklist readBlacklist) {
795
                        try (var completed = c.update(COMPLETED_BOARD_INFO_READ)) {
3✔
796
                                log.debug("Completing blacklist read opId {}", opId);
3✔
797
                                completed.call(readBlacklist, opId);
3✔
798
                        }
799
                }
3✔
800

801
                /**
802
                 * Access the DB to mark the write request as successful. A transaction
803
                 * should already be held.
804
                 *
805
                 * @param c
806
                 *            How to access the DB
807
                 * @return Whether we've changed anything
808
                 */
809
                private void doneWriteBlacklist(Connection c) {
810
                        try (var completed = c.update(COMPLETED_BLACKLIST_WRITE)) {
3✔
811
                                completed.call(opId);
3✔
812
                        }
813
                }
3✔
814

815
                /**
816
                 * Access the DB to mark the read request as successful; the actual
817
                 * store of the serial data is elsewhere
818
                 * ({@link #recordSerialIds(Connection)}). A transaction should already
819
                 * be held.
820
                 *
821
                 * @param c
822
                 *            How to access the DB
823
                 * @return Whether we've changed anything
824
                 */
825
                private void doneReadSerial(Connection c) {
826
                        try (var completed = c.update(COMPLETED_GET_SERIAL_REQ)) {
3✔
827
                                completed.call(opId);
3✔
828
                        }
829
                }
3✔
830

831
                /**
832
                 * Access the DB to mark the read request as successful and store the
833
                 * ADC info that was read. A transaction should be held.
834
                 *
835
                 * @param c
836
                 *            The database connection.
837
                 */
838
                private void doneReadTemps(Connection c, ADCInfo adcInfo) {
839
                        try (var completed = c.update(COMPLETED_BOARD_INFO_READ)) {
×
840
                                log.debug("Completing temperature read opId {}", opId);
×
841
                                completed.call(adcInfo, opId);
×
842
                        }
843
                }
×
844

845
                /**
846
                 * Access the DB to mark the request as failed and store the exception.
847
                 *
848
                 * @param exn
849
                 *            The exception that caused the failure.
850
                 * @return Whether we've changed anything
851
                 */
852
                private void failed(Exception exn) {
853
                        try (var c = getConnection();
×
854
                                        var failed = c.update(FAILED_BLACKLIST_OP)) {
×
855
                                c.transaction(() -> failed.call(exn, opId));
×
856
                        }
857
                }
×
858

859
                private static final String REPORT_MSG =
860
                                "board was not reachable when trying to access its blacklist: ";
861

862
                /**
863
                 * Access the DB to mark a board as out of service.
864
                 *
865
                 * @param exn
866
                 *            The exception that caused the failure.
867
                 * @return Whether we've changed anything
868
                 */
869
                void takeOutOfService(Exception exn) {
870
                        try (var c = getConnection()) {
×
871
                                c.transaction(() -> {
×
872
                                        addBoardReport(c, boardId, null, REPORT_MSG + exn);
×
873
                                        markBoardAsDead(c, boardId, REPORT_MSG + exn);
×
874
                                });
×
875
                        }
876
                }
×
877

878
                /**
879
                 * Process an action to work with a blacklist or serial number. Runs on
880
                 * a thread that may touch a BMP directly, but which may not touch the
881
                 * database.
882
                 *
883
                 * @param controller
884
                 *            How to actually reach the BMP.
885
                 * @return Whether this action has "succeeded" and shouldn't be retried.
886
                 * @throws InterruptedException
887
                 *             If interrupted.
888
                 */
889
                @Override
890
                boolean tryProcessRequest(SpiNNakerControl controller)
891
                                throws InterruptedException {
892
                        return bmpAction(() -> {
3✔
893
                                switch (op) {
3✔
894
                                case WRITE_BL:
895
                                        writeBlacklist(controller);
3✔
896
                                        break;
3✔
897
                                case READ_BL:
898
                                        readBlacklist(controller);
3✔
899
                                        break;
3✔
900
                                case GET_SERIAL:
901
                                        readSerial(controller);
3✔
902
                                        break;
3✔
903
                                case READ_TEMP:
904
                                        readTemps(controller);
×
905
                                        break;
×
906
                                default:
907
                                        throw new IllegalArgumentException();
×
908
                                }
909
                                epochs.blacklistChanged(boardId);
3✔
910
                                epochs.machineChanged(machineId);
3✔
911
                        }, e -> {
3✔
912
                                failed(e);
×
913
                                epochs.blacklistChanged(boardId);
×
914
                                epochs.machineChanged(machineId);
×
915
                        }, ppe -> {
×
916
                                takeOutOfService(ppe);
×
917
                        });
×
918
                }
919

920
                /**
921
                 * Process an action to read a blacklist.
922
                 *
923
                 * @param controller
924
                 *            How to actually reach the BMP.
925
                 * @throws InterruptedException
926
                 *             If interrupted.
927
                 * @throws IOException
928
                 *             If the network is unhappy.
929
                 * @throws ProcessException
930
                 *             If the BMP rejects a message.
931
                 */
932
                private void readBlacklist(SpiNNakerControl controller)
933
                                throws InterruptedException, ProcessException, IOException {
934
                        readSerial = controller.readSerial(board);
3✔
935
                        if (bmpSerialId != null && !bmpSerialId.equals(readSerial)) {
3✔
936
                                /*
937
                                 * Doesn't match; WARN but keep going; hardware may just be
938
                                 * remapped behind our back.
939
                                 */
940
                                log.warn(
×
941
                                                "blacklist read mismatch: expected serial ID '{}' "
942
                                                                + "not equal to actual serial ID '{}'",
943
                                                bmpSerialId, readSerial);
944
                        }
945
                        var readBlacklist = controller.readBlacklist(board);
3✔
946
                        try (var c = getConnection()) {
3✔
947
                                c.transaction(() -> {
3✔
948
                                        recordSerialIds(c);
3✔
949
                                        doneReadBlacklist(c, readBlacklist);
3✔
950
                                });
3✔
951
                        }
952
                }
3✔
953

954
                /**
955
                 * Process an action to write a blacklist.
956
                 *
957
                 * @param controller
958
                 *            How to actually reach the BMP.
959
                 * @throws InterruptedException
960
                 *             If interrupted.
961
                 * @throws IOException
962
                 *             If the network is unhappy.
963
                 * @throws ProcessException
964
                 *             If the BMP rejects a message.
965
                 * @throws IllegalStateException
966
                 *             If the operation is applied to a board other than the one
967
                 *             that it is expected to apply to.
968
                 */
969
                private void writeBlacklist(SpiNNakerControl controller)
970
                                throws InterruptedException, ProcessException, IOException {
971
                        readSerial = controller.readSerial(board);
3✔
972
                        if (bmpSerialId != null && !bmpSerialId.equals(readSerial)) {
3✔
973
                                // Doesn't match, so REALLY unsafe to keep going!
974
                                throw new IllegalStateException(format(
×
975
                                                "aborting blacklist write: expected serial ID '%s' "
976
                                                                + "not equal to actual serial ID '%s'",
977
                                                bmpSerialId, readSerial));
978
                        }
979
                        controller.writeBlacklist(board, requireNonNull(blacklist));
3✔
980
                        try (var c = getConnection()) {
3✔
981
                                c.transaction(() -> doneWriteBlacklist(c));
3✔
982
                        }
983
                }
3✔
984

985
                /**
986
                 * Process an action to read the serial number from a BMP.
987
                 *
988
                 * @param controller
989
                 *            How to actually reach the BMP.
990
                 * @throws InterruptedException
991
                 *             If interrupted.
992
                 * @throws IOException
993
                 *             If the network is unhappy
994
                 * @throws ProcessException
995
                 *             If the BMP rejects a message.
996
                 */
997
                private void readSerial(SpiNNakerControl controller)
998
                                throws InterruptedException, ProcessException, IOException {
999
                        readSerial = controller.readSerial(board);
3✔
1000
                        try (var c = getConnection()) {
3✔
1001
                                c.transaction(() -> {
3✔
1002
                                        recordSerialIds(c);
3✔
1003
                                        doneReadSerial(c);
3✔
1004
                                });
3✔
1005
                        }
1006
                }
3✔
1007

1008
                /**
1009
                 * Process an action to read some temperature data.
1010
                 *
1011
                 * @param controller
1012
                 *            How to actually reach the BMP.
1013
                 * @throws InterruptedException
1014
                 *             If interrupted.
1015
                 * @throws IOException
1016
                 *             If the network is unhappy.
1017
                 * @throws ProcessException
1018
                 *             If the BMP rejects a message.
1019
                 */
1020
                private void readTemps(SpiNNakerControl controller)
1021
                                throws InterruptedException, ProcessException, IOException {
1022
                        var adcInfo = controller.readTemp(board);
×
1023
                        try (var c = getConnection()) {
×
1024
                                c.transaction(() -> doneReadTemps(c, adcInfo));
×
1025
                        }
1026
                }
×
1027

1028
                @Override
1029
                public String toString() {
1030
                        var sb = new StringBuilder("BoardRequest(for ");
×
1031
                        sb.append("bmp=").append(bmp);
×
1032
                        sb.append(",board=").append(boardId);
×
1033
                        sb.append(",op=").append(op);
×
1034
                        return sb.append(")").toString();
×
1035
                }
1036
        }
1037

1038
        private class PowerChange {
1039
                final Integer changeId;
1040

1041
                final int jobId;
1042

1043
                final Integer boardId;
1044

1045
                final Integer boardNum;
1046

1047
                final Instant powerOffTime;
1048

1049
                final boolean power;
1050

1051
                final JobState from;
1052

1053
                final JobState to;
1054

1055
                final List<Direction> offLinks;
1056

1057
                PowerChange(Row row) {
3✔
1058
                        changeId = row.getInteger("change_id");
3✔
1059
                        jobId = row.getInt("job_id");
3✔
1060
                        boardId = row.getInteger("board_id");
3✔
1061
                        boardNum = row.getInteger("board_num");
3✔
1062
                        power = row.getBoolean("power");
3✔
1063
                        from = row.getEnum("from_state", JobState.class);
3✔
1064
                        to = row.getEnum("to_state", JobState.class);
3✔
1065
                        offLinks = List.of(Direction.values()).stream().filter(
3✔
1066
                                        link -> !row.getBoolean(link.columnName)).collect(
3✔
1067
                                                        Collectors.toList());
3✔
1068
                        Instant powerOff = row.getInstant("power_off_timestamp");
3✔
1069
                        if (powerOff == null) {
3✔
1070
                                powerOff = Instant.EPOCH;
3✔
1071
                        }
1072
                        powerOffTime = powerOff;
3✔
1073
                }
3✔
1074

1075
                boolean isSameJob(PowerChange p) {
1076
                        return p.jobId == jobId && p.from == from && p.to == to;
×
1077
                }
1078
        }
1079

1080
        // ----------------------------------------------------------------
1081
        // WORKER IMPLEMENTATION
1082

1083
        /** A worker of a given BMP. */
1084
        private final class Worker implements Runnable {
1085
                /** What are we controlling? */
1086
                private SpiNNakerControl control;
1087

1088
                private final SpallocAPI.Machine machine;
1089

1090
                private final BMPCoords coords;
1091

1092
                private final Map<BMPBoard, String> boards;
1093

1094
                /** Which boards are we looking at? */
1095
                private final int bmpId;
1096

1097
                Worker(SpallocAPI.Machine machine, BMPCoords coords,
1098
                                Map<BMPBoard, String> boards, int bmpId) {
3✔
1099
                        this.machine = machine;
3✔
1100
                        this.coords = coords;
3✔
1101
                        this.boards = boards;
3✔
1102
                        this.bmpId = bmpId;
3✔
1103

1104
                        log.debug("Created worker for boards {}", bmpId);
3✔
1105
                }
3✔
1106

1107
                private SpiNNakerControl getControl() {
1108
                        if (control == null) {
3✔
1109
                                if (useDummyComms) {
3✔
1110
                                        control = new SpiNNakerControlDummy();
3✔
1111
                                } else {
1112
                                        try {
1113
                                                control = controllerFactory.create(machine, coords,
3✔
1114
                                                                boards);
NEW
1115
                                        } catch (Exception e) {
×
NEW
1116
                                                log.error("Could not create control for BMP '{}'",
×
NEW
1117
                                                                bmpId, e);
×
1118
                                        }
3✔
1119
                                }
1120
                        }
1121
                        return control;
3✔
1122
                }
1123

1124
                /**
1125
                 * Periodically call to update, or trigger externally.
1126
                 */
1127
                @Override
1128
                public synchronized void run() {
1129
                        log.trace("Searching for changes on BMP {}", bmpId);
3✔
1130

1131
                        try {
1132
                                var changes = getRequestedOperations();
3✔
1133
                                for (var change : changes) {
3✔
1134
                                        change.processRequest(getControl());
3✔
1135
                                }
3✔
1136
                        } catch (Exception e) {
×
1137
                                log.error("unhandled exception for BMP '{}'", bmpId, e);
×
1138
                        }
3✔
1139
                }
3✔
1140

1141
                private boolean waitedLongEnough(PowerChange change) {
1142
                        // Power off can be done any time
1143
                        if (!change.power) {
3✔
1144
                                return true;
3✔
1145
                        }
1146

1147
                        // Power on should wait until a time after last off
1148
                        Instant powerOnTime = change.powerOffTime.plus(
3✔
1149
                                        props.getOffWaitTime());
3✔
1150
                        return powerOnTime.isBefore(Instant.now());
3✔
1151
                }
1152

1153
                /**
1154
                 * Get the things that we want the worker to do. <em>Be very
1155
                 * careful!</em> Because this necessarily involves the database, this
1156
                 * must not touch the BMP handle as those operations take a long time
1157
                 * and we absolutely must not have a transaction open at the same time.
1158
                 *
1159
                 * @return List of operations to perform.
1160
                 */
1161
                private List<Request> getRequestedOperations() {
1162
                        var requests = new ArrayList<Request>();
3✔
1163
                        try (var c = getConnection();
3✔
1164
                                        var getPowerRequests = c.query(GET_CHANGES);
3✔
1165
                                        var getBlacklistReads = c.query(GET_BLACKLIST_READS);
3✔
1166
                                        var getBlacklistWrites = c.query(GET_BLACKLIST_WRITES);
3✔
1167
                                        var getReadSerialInfos = c.query(GET_SERIAL_INFO_REQS);
3✔
1168
                                        var getReadTemps = c.query(GET_TEMP_INFO_REQS)) {
3✔
1169
                                c.transaction(false, () -> {
3✔
1170
                                        // Batch power requests by job
1171
                                        var powerChanges = new LinkedList<>(
3✔
1172
                                                        getPowerRequests.call(PowerChange::new, bmpId));
3✔
1173
                                        while (!powerChanges.isEmpty()) {
3✔
1174
                                                var change = powerChanges.poll();
3✔
1175
                                                var jobChanges = new ArrayList<>(List.of(change));
3✔
1176
                                                var canDoNow = waitedLongEnough(change);
3✔
1177
                                                while (!powerChanges.isEmpty()
3✔
1178
                                                                && change.isSameJob(powerChanges.peek())) {
×
1179
                                                        canDoNow &= waitedLongEnough(powerChanges.peek());
×
1180
                                                        jobChanges.add(powerChanges.poll());
×
1181
                                                }
1182
                                                if (!jobChanges.isEmpty() && canDoNow) {
3✔
1183
                                                        log.debug("Running job changes {}", jobChanges);
3✔
1184
                                                        requests.add(new PowerRequest(bmpId, change.jobId,
3✔
1185
                                                                        change.from, change.to, jobChanges));
1186
                                                }
1187
                                        }
3✔
1188

1189
                                        // Leave these until quiet
1190
                                        if (requests.isEmpty()) {
3✔
1191
                                                requests.addAll(getBlacklistReads.call(
3✔
1192
                                                                row -> new BoardRequest(bmpId, READ_BL, row),
3✔
1193
                                                                bmpId));
3✔
1194
                                        }
1195
                                        if (requests.isEmpty()) {
3✔
1196
                                                requests.addAll(getBlacklistWrites.call(
3✔
1197
                                                                row -> new BoardRequest(bmpId, WRITE_BL, row),
3✔
1198
                                                                bmpId));
3✔
1199
                                                requests.addAll(getReadSerialInfos.call(
3✔
1200
                                                                row -> new BoardRequest(bmpId, GET_SERIAL, row),
3✔
1201
                                                                bmpId));
3✔
1202
                                                requests.addAll(getReadTemps.call(
3✔
1203
                                                                row -> new BoardRequest(bmpId, READ_TEMP, row),
×
1204
                                                                bmpId));
3✔
1205
                                        }
1206
                                });
3✔
1207
                        } catch (Exception e) {
×
1208
                                log.error("unhandled exception for BMP '{}'", bmpId, e);
×
1209
                        }
3✔
1210
                        return requests;
3✔
1211
                }
1212
        }
1213

1214
        /**
1215
         * The testing interface.
1216
         *
1217
         * @hidden
1218
         */
1219
        @ForTestingOnly
1220
        public interface TestAPI {
1221
                /**
1222
                 * Ensure things are set up after a database change that updates the
1223
                 * BMPs in the system.
1224
                 *
1225
                 * @param useDummyComms Whether to use dummy communications in the test
1226
                 */
1227
                void prepare(boolean useDummyComms);
1228

1229
                /**
1230
                 * Reset the transceivers stored in the workers after installing a new
1231
                 * transceiver.
1232
                 */
1233
                void resetTransceivers();
1234

1235
                /**
1236
                 * The core of the scheduler.
1237
                 *
1238
                 * @param millis
1239
                 *            How many milliseconds to sleep before doing a rerun of the
1240
                 *            scheduler. If zero (or less), only one run will be done.
1241
                 * @param bmps
1242
                 *            The BMPs to be updated.
1243
                 * @throws IOException
1244
                 *             If talking to the network fails
1245
                 * @throws SpinnmanException
1246
                 *             If a BMP sends an error back
1247
                 * @throws InterruptedException
1248
                 *             If the wait for workers to spawn fails.
1249
                 */
1250
                void processRequests(long millis, Collection<Integer> bmps)
1251
                                throws IOException, SpinnmanException, InterruptedException;
1252

1253
                /**
1254
                 * The core of the scheduler. Will process for all known BMPs.
1255
                 *
1256
                 * @param millis
1257
                 *            How many milliseconds to sleep before doing a rerun of the
1258
                 *            scheduler. If zero (or less), only one run will be done.
1259
                 * @throws IOException
1260
                 *             If talking to the network fails
1261
                 * @throws SpinnmanException
1262
                 *             If a BMP sends an error back
1263
                 * @throws InterruptedException
1264
                 *             If the wait for workers to spawn fails.
1265
                 */
1266
                void processRequests(long millis)
1267
                                throws IOException, SpinnmanException, InterruptedException;
1268

1269
                /**
1270
                 * Get the last BMP exception.
1271
                 *
1272
                 * @return The exception.
1273
                 */
1274
                Throwable getBmpException();
1275

1276
                /**
1277
                 * Clear the last BMP exception.
1278
                 */
1279
                void clearBmpException();
1280

1281
                /**
1282
                 * Resume after emergency stop.
1283
                 */
1284
                void emergencyResume();
1285
        }
1286

1287
        /**
1288
         * @return The test interface.
1289
         * @deprecated This interface is just for testing.
1290
         * @hidden
1291
         */
1292
        @ForTestingOnly
1293
        @RestrictedApi(explanation = "just for testing", link = "index.html",
1294
                        allowedOnPath = ".*/src/test/java/.*")
1295
        @Deprecated
1296
        public final TestAPI getTestAPI() {
1297
                ForTestingOnly.Utils.checkForTestClassOnStack();
3✔
1298
                return new TestAPI() {
3✔
1299
                        @Override
1300
                        public void prepare(boolean useDummyCommsParam) {
1301
                                useDummyComms = useDummyCommsParam;
3✔
1302
                                makeWorkers();
3✔
1303
                        }
3✔
1304

1305
                        @Override
1306
                        public void resetTransceivers() {
1307
                                for (var worker : workers.values()) {
3✔
1308
                                        worker.control = null;
3✔
1309
                                }
3✔
1310
                        }
3✔
1311

1312
                        @Override
1313
                        public void processRequests(long millis, Collection<Integer> bmps)
1314
                                        throws IOException, SpinnmanException,
1315
                                        InterruptedException {
1316
                                /*
1317
                                 * Runs twice because it takes two cycles to fully process a
1318
                                 * request.
1319
                                 */
1320
                                triggerSearch(bmps);
3✔
1321
                                if (millis > 0) {
3✔
1322
                                        Thread.sleep(millis);
3✔
1323
                                        triggerSearch(bmps);
3✔
1324
                                }
1325
                        }
3✔
1326

1327
                        @Override
1328
                        public void processRequests(long millis) throws IOException,
1329
                                        SpinnmanException, InterruptedException {
1330
                                processRequests(millis, workers.keySet());
3✔
1331
                        }
3✔
1332

1333
                        @Override
1334
                        public Throwable getBmpException() {
1335
                                synchronized (BMPController.this) {
3✔
1336
                                        return bmpProcessingException;
3✔
1337
                                }
1338
                        }
1339

1340
                        @Override
1341
                        public void clearBmpException() {
1342
                                synchronized (BMPController.this) {
3✔
1343
                                        bmpProcessingException = null;
3✔
1344
                                }
3✔
1345
                        }
3✔
1346

1347
                        @Override
1348
                        public void emergencyResume() {
1349
                                synchronized (guard) {
3✔
1350
                                        emergencyStop = false;
3✔
1351
                                        workers.clear();
3✔
1352
                                }
3✔
1353
                                init();
3✔
1354
                        }
3✔
1355
                };
1356
        }
1357
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc