• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SpiNNakerManchester / JavaSpiNNaker / 15234311369

22 May 2025 10:25AM UTC coverage: 37.528% (-0.8%) from 38.278%
15234311369

push

github

web-flow
Merge pull request #1227 from SpiNNakerManchester/emergency_stop

Emergency stop

114 of 152 new or added lines in 10 files covered. (75.0%)

243 existing lines in 6 files now uncovered.

9062 of 24147 relevant lines covered (37.53%)

1.12 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.06
/SpiNNaker-allocserv/src/main/java/uk/ac/manchester/spinnaker/alloc/bmp/SpiNNaker1.java
1
/*
2
 * Copyright (c) 2021 The University of Manchester
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *     https://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
package uk.ac.manchester.spinnaker.alloc.bmp;
17

18
import static org.slf4j.LoggerFactory.getLogger;
19
import static uk.ac.manchester.spinnaker.messages.model.FPGALinkRegisters.STOP;
20
import static uk.ac.manchester.spinnaker.messages.model.FPGAMainRegisters.FLAG;
21

22
import java.io.IOException;
23
import java.util.ArrayList;
24
import java.util.Collection;
25
import java.util.List;
26
import java.util.Map;
27

28
import javax.annotation.PostConstruct;
29

30
import org.slf4j.Logger;
31
import org.springframework.beans.factory.ObjectProvider;
32
import org.springframework.beans.factory.annotation.Autowired;
33
import org.springframework.stereotype.Component;
34

35
import uk.ac.manchester.spinnaker.alloc.SpallocProperties.TxrxProperties;
36
import uk.ac.manchester.spinnaker.alloc.allocator.SpallocAPI.Machine;
37
import uk.ac.manchester.spinnaker.alloc.bmp.FirmwareLoader.FirmwareLoaderException;
38
import uk.ac.manchester.spinnaker.alloc.model.Prototype;
39
import uk.ac.manchester.spinnaker.machine.board.BMPBoard;
40
import uk.ac.manchester.spinnaker.machine.board.BMPCoords;
41
import uk.ac.manchester.spinnaker.messages.model.ADCInfo;
42
import uk.ac.manchester.spinnaker.messages.model.Blacklist;
43
import uk.ac.manchester.spinnaker.messages.model.FPGA;
44
import uk.ac.manchester.spinnaker.transceiver.BMPTransceiverInterface;
45
import uk.ac.manchester.spinnaker.transceiver.ProcessException;
46
import uk.ac.manchester.spinnaker.transceiver.SpinnmanException;
47
import uk.ac.manchester.spinnaker.utils.UsedInJavadocOnly;
48

49
/**
50
 * Implementation of controller for one BMP of one SpiNNaker 1 based system.
51
 * This is a short-lived component, which is why it isn't merged with the
52
 * longer-lived {@link TransceiverFactory}.
53
 */
54
@Component
55
@Prototype
56
class SpiNNaker1 implements SpiNNakerControl {
57
        // ----------------------------------------------------------------
58
        // CORE BMP ACCESS FUNCTIONS
59

60
        private static final Logger log = getLogger(SpiNNaker1.class);
3✔
61

62
        private static final int FPGA_FLAG_ID_MASK = 0x3;
63

64
        private static final int BMP_VERSION_MIN = 2;
65

66
        /**
67
         * We <em>always</em> pretend to talk to the root BMP of a machine (actually
68
         * the root of a frame), and never directly to any others. The BMPs within a
69
         * frame use a CAN bus and I<sup>2</sup>C to communicate with each other on
70
         * our behalf.
71
         */
72
        private static final BMPCoords ROOT_BMP = new BMPCoords(0, 0);
3✔
73

74
        @Autowired
75
        private TxrxProperties props;
76

77
        @Autowired
78
        private TransceiverFactoryAPI<?> txrxFactory;
79

80
        /**
81
         * Factory for {@link FirmwareLoader}. Do not call directly; use
82
         * {@link #loadFirmware(List)} instead.
83
         */
84
        @Autowired
85
        private ObjectProvider<FirmwareLoader> firmwareLoaderFactory;
86

87
        /** The BMP coordinates to bind into the transceiver. */
88
        private final BMPCoords bmp;
89

90
        private final Machine machine;
91

92
        private final Map<BMPBoard, String> boardAddresses;
93

94
        /**
95
         * The transceiver for talking to the machine.
96
         */
97
        private BMPTransceiverInterface txrx;
98

99
        /**
100
         * Load the FPGA firmware onto a board.
101
         *
102
         * @param boards
103
         *            Which boards are we planning to load the firmware on?
104
         * @throws InterruptedException
105
         *             If interrupted while sleeping
106
         * @throws ProcessException
107
         *             If a BMP rejects a message
108
         * @throws IOException
109
         *             If the network fails or the packaged bitfiles are unreadable
110
         * @throws FirmwareLoaderException
111
         *             If something goes wrong.
112
         */
113
        @UsedInJavadocOnly(FirmwareLoaderException.class)
114
        private void loadFirmware(List<BMPBoard> boards)
115
                        throws ProcessException, InterruptedException, IOException {
UNCOV
116
                int count = 0;
×
UNCOV
117
                for (var board : boards) {
×
UNCOV
118
                        firmwareLoaderFactory.getObject(txrx, board)
×
UNCOV
119
                                        .bitLoad(++count == boards.size());
×
UNCOV
120
                }
×
UNCOV
121
        }
×
122

123
        /**
124
         * The factory. Forces the constructor to conform to the API.
125
         * <p>
126
         * Do not use this directly (unless you're Spring Boot itself).
127
         */
128
        static final Factory FACTORY = SpiNNaker1::new;
3✔
129

130
        /**
131
         * @param machine
132
         *            The machine hosting the boards and FPGAs.
133
         * @param bmp
134
         *            Which BMP on the machine are we really talking to.
135
         */
136
        SpiNNaker1(Machine machine, BMPCoords bmp,
137
                        Map<BMPBoard, String> boardAddresses) {
3✔
138
                this.machine = machine;
3✔
139
                this.bmp = bmp;
3✔
140
                this.boardAddresses = boardAddresses;
3✔
141
        }
3✔
142

143
        @PostConstruct
144
        void initTransceiver()
145
                        throws IOException, SpinnmanException, InterruptedException {
146
                txrx = txrxFactory.getTransciever(machine, bmp);
3✔
147
                txrx.bind(ROOT_BMP);
3✔
148
        }
3✔
149

150
        /** Notes that a board probably needs its FPGA definitions reloading. */
151
        private static class FPGAReloadRequired extends Exception {
152
                private static final long serialVersionUID = 1L;
153

154
                final BMPBoard board;
155

UNCOV
156
                FPGAReloadRequired(BMPBoard board) {
×
UNCOV
157
                        this.board = board;
×
UNCOV
158
                }
×
159
        }
160

161
        /**
162
         * Check whether an FPGA has come up in a good state.
163
         *
164
         * @param board
165
         *            Which board is the FPGA on?
166
         * @param fpga
167
         *            Which FPGA (0, 1, or 2) is being tested?
168
         * @return True if the FPGA is in a correct state, false otherwise.
169
         * @throws FPGAReloadRequired
170
         *             If the FPGA is in such a bad state that the FPGA definitions
171
         *             for the board need to be reloaded.
172
         * @throws InterruptedException
173
         *             If the communications were interrupted.
174
         */
175
        private boolean isGoodFPGA(BMPBoard board, FPGA fpga)
176
                        throws FPGAReloadRequired, InterruptedException {
177
                int flag;
178
                try {
179
                        flag = txrx.readFPGARegister(fpga, FLAG, board);
3✔
180
                } catch (ProcessException | IOException ignored) {
×
181
                        // An exception means the FPGA is a problem
182
                        return false;
×
183
                }
3✔
184
                // FPGA ID is bottom two bits of FLAG register
185
                int fpgaId = flag & FPGA_FLAG_ID_MASK;
3✔
186
                boolean ok = fpgaId == fpga.value;
3✔
187
                if (!ok) {
3✔
UNCOV
188
                        log.warn("{} on board {} of {} has incorrect FPGA ID flag {}", fpga,
×
UNCOV
189
                                        board, machine.getName(), fpgaId);
×
UNCOV
190
                        if (fpgaId == FPGA.FPGA_ALL.value) {
×
UNCOV
191
                                throw new FPGAReloadRequired(board);
×
192
                        }
193
                }
194
                return ok;
3✔
195
        }
196

197
        /**
198
         * Is a board new enough to be able to manage FPGAs?
199
         *
200
         * @param board
201
         *            The board number.
202
         * @return True if the board can manage FPGAs.
203
         * @throws ProcessException
204
         *             If a BMP rejects a message.
205
         * @throws IOException
206
         *             If network I/O fails.
207
         * @throws InterruptedException
208
         *             If the communications were interrupted.
209
         */
210
        private boolean canBoardManageFPGAs(BMPBoard board)
211
                        throws ProcessException, IOException, InterruptedException {
212
                var vi = txrx.readBMPVersion(board);
3✔
213
                return vi.versionNumber.majorVersion >= BMP_VERSION_MIN;
3✔
214
        }
215

216
        /**
217
         * {@inheritDoc}
218
         * <p>
219
         * Technically, switching a link off just switches off <em>sending</em> on
220
         * that link. We assume that the other end of the link also behaves.
221
         */
222
        @Override
223
        public void setLinkOff(Link link)
224
                        throws ProcessException, IOException, InterruptedException {
225
                var board = link.getBoard();
3✔
226
                var d = link.getLink();
3✔
227
                // skip FPGA link configuration if old BMP version
228
                if (!canBoardManageFPGAs(board)) {
3✔
229
                        return;
×
230
                }
231
                txrx.writeFPGARegister(d.fpga, d.bank, STOP, 1, board);
3✔
232
        }
3✔
233

234
        /**
235
         * A board is good if all its FPGAs are good.
236
         *
237
         * @param board
238
         *            The board ID
239
         * @return Whether the board's FPGAs all came up correctly.
240
         * @throws FPGAReloadRequired
241
         *             If an FPGA is in such a bad state that the FPGA definitions
242
         *             for the board need to be reloaded.
243
         * @throws InterruptedException
244
         *             If the communications were interrupted.
245
         * @see #isGoodFPGA(Integer, FPGA)
246
         */
247
        private boolean hasGoodFPGAs(BMPBoard board)
248
                        throws FPGAReloadRequired, InterruptedException {
249
                for (var fpga : FPGA.values()) {
3✔
250
                        if (fpga.isSingleFPGA() && !isGoodFPGA(board, fpga)) {
3✔
251
                                return false;
×
252
                        }
253
                }
254
                return true;
3✔
255
        }
256

257
        @Override
258
        public void powerOnAndCheck(List<BMPBoard> boards)
259
                        throws ProcessException, InterruptedException, IOException {
260
                var boardsToPower = boards;
3✔
261
                log.debug("Power on and check boards {} for BMP {}", boards, bmp);
3✔
262
                boolean reloadDone = false; // so we only do firmware loading once
3✔
263
                for (int attempt = 1; attempt <= props.getFpgaAttempts(); attempt++) {
3✔
264
                        if (attempt > 1) {
3✔
UNCOV
265
                                log.warn("rebooting {} boards in allocation to "
×
UNCOV
266
                                                + "get stability", boards.size());
×
267
                        }
268
                        txrx.powerOn(boardsToPower);
3✔
269

270
                        /*
271
                         * Check whether all the FPGAs on each board have come up correctly.
272
                         * If not, we'll need to try booting that board again. The boards
273
                         * that have booted correctly need no further action.
274
                         */
275

276
                        var retryBoards = new ArrayList<BMPBoard>();
3✔
277
                        var reloadBoards = new ArrayList<BMPBoard>();
3✔
278
                        for (var board : boardsToPower) {
3✔
279
                                // Skip board if old BMP version
280
                                if (!canBoardManageFPGAs(board)) {
3✔
281
                                        continue;
×
282
                                }
283
                                try {
284
                                        if (!hasGoodFPGAs(board)) {
3✔
285
                                                retryBoards.add(board);
×
286
                                        }
UNCOV
287
                                } catch (FPGAReloadRequired e) {
×
UNCOV
288
                                        reloadBoards.add(e.board);
×
289
                                }
3✔
290
                        }
3✔
291
                        if (retryBoards.isEmpty() && reloadBoards.isEmpty()) {
3✔
292
                                // Success!
293
                                log.debug("Finished power on and check boards {} for {}",
3✔
294
                                                boards, bmp);
295
                                return;
3✔
296
                        }
297
                        // We don't try reloading the first time
UNCOV
298
                        if (props.isFpgaReload() && attempt > 1
×
UNCOV
299
                                        && attempt < props.getFpgaAttempts()
×
UNCOV
300
                                        && !reloadBoards.isEmpty() && !reloadDone) {
×
UNCOV
301
                                log.warn("reloading FPGA firmware on {} boards",
×
UNCOV
302
                                                retryBoards.size());
×
UNCOV
303
                                loadFirmware(reloadBoards);
×
UNCOV
304
                                reloadDone = true;
×
305
                                // Need a full retry after that!
UNCOV
306
                                boardsToPower = boards;
×
UNCOV
307
                                continue;
×
308
                        }
UNCOV
309
                        retryBoards.addAll(reloadBoards); // Might not be empty
×
UNCOV
310
                        boardsToPower = retryBoards;
×
311
                }
312
                throw new IOException("Could not get correct FPGA ID for "
×
313
                                + boardsToPower.size() + " boards after "
×
314
                                + props.getFpgaAttempts() + " tries");
×
315
        }
316

317
        @Override
318
        public void powerOff(Collection<BMPBoard> boards)
319
                        throws ProcessException, InterruptedException, IOException {
320
                txrx.powerOff(boards);
3✔
321
        }
3✔
322

323
        @Override
324
        public String readSerial(BMPBoard board)
325
                        throws ProcessException, IOException, InterruptedException {
326
                return txrx.readBoardSerialNumber(board);
3✔
327
        }
328

329
        @Override
330
        public Blacklist readBlacklist(BMPBoard board)
331
                        throws ProcessException, IOException, InterruptedException {
332
                return txrx.readBlacklist(board);
3✔
333
        }
334

335
        @Override
336
        public void writeBlacklist(BMPBoard board, Blacklist blacklist)
337
                        throws ProcessException, InterruptedException, IOException {
338
                txrx.writeBlacklist(board, blacklist);
3✔
339
        }
3✔
340

341
        @Override
342
        public ADCInfo readTemp(BMPBoard board)
343
                        throws ProcessException, IOException, InterruptedException {
344
                return txrx.readADCData(board);
×
345
        }
346

347
        @Override
348
        public void ping(List<BMPBoard> boards) {
349
                boards.parallelStream().forEach(id -> {
3✔
350
                        var address = boardAddresses.get(id);
3✔
351
                        if (txrx.pingBoard(address) != 0) {
3✔
352
                                log.warn(
×
353
                                                "ARP fault? Board with address {} might not have "
354
                                                                + "come up correctly", address);
355
                        }
356
                });
3✔
357
        }
3✔
358
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc