• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Alan-Jowett / ebpf-verifier / 27778108035

07 Jun 2026 06:51PM UTC coverage: 86.386% (-2.5%) from 88.93%
27778108035

push

github

elazarg
Release v0.2.5

Bump project version to 0.2.5 and add a CHANGELOG entry covering ELF loader hardening, numeric-domain soundness fixes, and the writable helper output initialization documentation update since v0.2.4. Also updates the using_installed_package example version requirement.

Signed-off-by: Elazar Gershuni <elazarg@gmail.com>

9125 of 10563 relevant lines covered (86.39%)

6334294.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.84
/src/ir/unmarshal.cpp
1
// Copyright (c) Prevail Verifier contributors.
2
// SPDX-License-Identifier: MIT
3
#include <cassert>
4
#include <iostream>
5
#include <string>
6
#include <vector>
7

8
#include "crab_utils/debug.hpp"
9
#include "crab_utils/num_safety.hpp"
10
#include "ir/unmarshal.hpp"
11
#include "spec/vm_isa.hpp"
12

13
using std::string;
14
using std::vector;
15

16
namespace prevail {
17
int opcode_to_width(const uint8_t opcode) {
161,390✔
18
    switch (opcode & INST_SIZE_MASK) {
161,390✔
19
    case INST_SIZE_B: return 1;
7,118✔
20
    case INST_SIZE_H: return 2;
22,584✔
21
    case INST_SIZE_W: return 4;
46,836✔
22
    case INST_SIZE_DW: return 8;
77,734✔
23
    default: CRAB_ERROR("unexpected opcode", opcode);
×
24
    }
25
}
26

27
uint8_t width_to_opcode(const int width) {
134✔
28
    switch (width) {
134✔
29
    case 1: return INST_SIZE_B;
14✔
30
    case 2: return INST_SIZE_H;
14✔
31
    case 4: return INST_SIZE_W;
38✔
32
    case 8: return INST_SIZE_DW;
54✔
33
    default: CRAB_ERROR("unexpected width", width);
×
34
    }
35
}
36

37
template <typename T>
38
void compare(const string& field, T actual, T expected) {
39
    if (actual != expected) {
40
        std::cerr << field << ": (actual) " << std::hex << static_cast<int>(actual)
41
                  << " != " << static_cast<int>(expected) << " (expected)\n";
42
    }
43
}
44

45
static std::string make_opcode_message(const char* msg, const uint8_t opcode) {
744✔
46
    std::ostringstream oss;
744✔
47
    oss << msg << " op 0x" << std::hex << static_cast<int>(opcode);
744✔
48
    return oss.str();
1,488✔
49
}
744✔
50

51
struct InvalidInstruction final : std::invalid_argument {
52
    size_t pc;
53
    explicit InvalidInstruction(const size_t pc, const char* what) : std::invalid_argument{what}, pc{pc} {}
1,116✔
54
    InvalidInstruction(const size_t pc, const std::string& what) : std::invalid_argument{what}, pc{pc} {}
237✔
55
    InvalidInstruction(const size_t pc, const uint8_t opcode)
402✔
56
        : std::invalid_argument{make_opcode_message("bad instruction", opcode)}, pc{pc} {}
603✔
57
};
58

59
static auto getMemIsLoad(const uint8_t opcode) -> bool {
290,696✔
60
    switch (opcode & INST_CLS_MASK) {
290,696✔
61
    case INST_CLS_LD:
71,486✔
62
    case INST_CLS_LDX: return true;
71,486✔
63
    case INST_CLS_ST:
147,724✔
64
    case INST_CLS_STX: return false;
147,724✔
65
    default: CRAB_ERROR("unexpected opcode", opcode);
×
66
    }
67
}
68

69
static auto getMemWidth(const uint8_t opcode) -> int {
292,974✔
70
    switch (opcode & INST_SIZE_MASK) {
292,974✔
71
    case INST_SIZE_B: return 1;
43,041✔
72
    case INST_SIZE_H: return 2;
35,172✔
73
    case INST_SIZE_W: return 4;
81,718✔
74
    case INST_SIZE_DW: return 8;
90,002✔
75
    default: CRAB_ERROR("unexpected opcode", opcode);
×
76
    }
77
}
78

79
static Instruction shift32(const Reg dst, const Bin::Op op) {
605,776✔
80
    return Bin{.op = op, .dst = dst, .v = Imm{32}, .is64 = true, .lddw = false};
605,776✔
81
}
82

83
struct Unmarshaller {
84
    vector<vector<string>>& notes;
85
    const ProgramInfo& info;
86
    int subprogram_stack_size = 512;
87
    bool allow_division_by_zero = false;
88
    // ReSharper disable once CppMemberFunctionMayBeConst
89
    void note(const string& what) { notes.back().emplace_back(what); }
1,598✔
90
    // ReSharper disable once CppMemberFunctionMayBeConst
91
    void note_next_pc() { notes.emplace_back(); }
1,205,198✔
92
    explicit Unmarshaller(vector<vector<string>>& notes, const ProgramInfo& info) : notes{notes}, info{info} {
5,318✔
93
        note_next_pc();
7,977✔
94
    }
5,318✔
95

96
    auto getAluOp(const size_t pc, const EbpfInst inst) -> std::variant<Bin::Op, Un::Op> {
597,982✔
97
        // First handle instructions that support a non-zero offset.
98
        switch (inst.opcode & INST_ALU_OP_MASK) {
597,982✔
99
        case INST_ALU_OP_DIV:
6,492✔
100
            switch (inst.offset) {
6,492✔
101
            case 0: return Bin::Op::UDIV;
6,414✔
102
            case 1: return Bin::Op::SDIV;
70✔
103
            default: throw InvalidInstruction(pc, make_opcode_message("invalid offset for", inst.opcode));
12✔
104
            }
105
        case INST_ALU_OP_MOD:
206✔
106
            switch (inst.offset) {
206✔
107
            case 0: return Bin::Op::UMOD;
114✔
108
            case 1: return Bin::Op::SMOD;
84✔
109
            default: throw InvalidInstruction(pc, make_opcode_message("invalid offset for", inst.opcode));
12✔
110
            }
111
        case INST_ALU_OP_MOV:
293,912✔
112
            if (inst.offset > 0 && !(inst.opcode & INST_SRC_REG)) {
293,912✔
113
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
6✔
114
            }
115
            switch (inst.offset) {
293,908✔
116
            case 0: return Bin::Op::MOV;
293,832✔
117
            case 8: return Bin::Op::MOVSX8;
24✔
118
            case 16: return Bin::Op::MOVSX16;
24✔
119
            case 32: return Bin::Op::MOVSX32;
14✔
120
            default: throw InvalidInstruction(pc, make_opcode_message("invalid offset for", inst.opcode));
21✔
121
            }
122
        default: break;
297,372✔
123
        }
124

125
        // All the rest require a zero offset.
126
        if (inst.offset != 0) {
297,372✔
127
            throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
141✔
128
        }
129

130
        switch (inst.opcode & INST_ALU_OP_MASK) {
297,278✔
131
        case INST_ALU_OP_ADD: return Bin::Op::ADD;
103,638✔
132
        case INST_ALU_OP_SUB: return Bin::Op::SUB;
2,896✔
133
        case INST_ALU_OP_MUL: return Bin::Op::MUL;
918✔
134
        case INST_ALU_OP_OR: return Bin::Op::OR;
52,204✔
135
        case INST_ALU_OP_AND: return Bin::Op::AND;
30,148✔
136
        case INST_ALU_OP_LSH: return Bin::Op::LSH;
51,890✔
137
        case INST_ALU_OP_RSH: return Bin::Op::RSH;
37,142✔
138
        case INST_ALU_OP_NEG:
298✔
139
            // Negation is a unary operation. The SRC bit, src, and imm must be all 0.
140
            if (inst.opcode & INST_SRC_REG) {
298✔
141
                throw InvalidInstruction{pc, inst.opcode};
4✔
142
            }
143
            if (inst.src != 0) {
294✔
144
                throw InvalidInstruction{pc, inst.opcode};
4✔
145
            }
146
            if (inst.imm != 0) {
290✔
147
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
6✔
148
            }
149
            return Un::Op::NEG;
286✔
150
        case INST_ALU_OP_XOR: return Bin::Op::XOR;
3,788✔
151
        case INST_ALU_OP_ARSH:
6,510✔
152
            if ((inst.opcode & INST_CLS_MASK) == INST_CLS_ALU) {
6,510✔
153
                note("arsh32 is not allowed");
1,852✔
154
            }
155
            return Bin::Op::ARSH;
6,510✔
156
        case INST_ALU_OP_END:
7,830✔
157
            if (inst.src != 0) {
7,830✔
158
                throw InvalidInstruction{pc, inst.opcode};
18✔
159
            }
160
            if ((inst.opcode & INST_CLS_MASK) == INST_CLS_ALU64) {
7,812✔
161
                if (inst.opcode & INST_END_BE) {
52✔
162
                    throw InvalidInstruction(pc, inst.opcode);
2✔
163
                }
164
                switch (inst.imm) {
50✔
165
                case 16: return Un::Op::SWAP16;
14✔
166
                case 32: return Un::Op::SWAP32;
14✔
167
                case 64: return Un::Op::SWAP64;
14✔
168
                default: throw InvalidInstruction(pc, "unsupported immediate");
8✔
169
                }
170
            }
171
            switch (inst.imm) {
7,760✔
172
            case 16: return (inst.opcode & INST_END_BE) ? Un::Op::BE16 : Un::Op::LE16;
7,338✔
173
            case 32: return (inst.opcode & INST_END_BE) ? Un::Op::BE32 : Un::Op::LE32;
356✔
174
            case 64: return (inst.opcode & INST_END_BE) ? Un::Op::BE64 : Un::Op::LE64;
73✔
175
            default: throw InvalidInstruction(pc, "unsupported immediate");
16✔
176
            }
177
        case 0xe0: throw InvalidInstruction{pc, inst.opcode};
8✔
178
        case 0xf0: throw InvalidInstruction{pc, inst.opcode};
8✔
179
        default: return {};
×
180
        }
181
    }
182

183
    static auto getAtomicOp(const size_t pc, const EbpfInst inst) -> Atomic::Op {
1,400✔
184
        switch (const auto op = gsl::narrow<Atomic::Op>(inst.imm & ~INST_FETCH)) {
1,400✔
185
        case Atomic::Op::XCHG:
56✔
186
        case Atomic::Op::CMPXCHG:
28✔
187
            if ((inst.imm & INST_FETCH) == 0) {
56✔
188
                throw InvalidInstruction(pc, "unsupported immediate");
8✔
189
            }
190
        case Atomic::Op::ADD:
684✔
191
        case Atomic::Op::OR:
684✔
192
        case Atomic::Op::AND:
684✔
193
        case Atomic::Op::XOR: return op;
1,368✔
194
        }
195
        throw InvalidInstruction(pc, "unsupported immediate");
24✔
196
    }
197

198
    static uint64_t sign_extend(const int32_t imm) { return to_unsigned(int64_t{imm}); }
431,256✔
199

200
    static uint64_t zero_extend(const int32_t imm) { return uint64_t{to_unsigned(imm)}; }
74✔
201

202
    static auto getBinValue(const Pc pc, const EbpfInst inst) -> Value {
589,710✔
203
        if (inst.opcode & INST_SRC_REG) {
589,710✔
204
            if (inst.imm != 0) {
244,828✔
205
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
66✔
206
            }
207
            return Reg{inst.src};
244,762✔
208
        }
209
        if (inst.src != 0) {
344,882✔
210
            throw InvalidInstruction{pc, inst.opcode};
56✔
211
        }
212
        // Imm is a signed 32-bit number.  Sign extend it to 64-bits for storage.
213
        return Imm{sign_extend(inst.imm)};
344,826✔
214
    }
215

216
    static auto getJmpOp(const size_t pc, const uint8_t opcode) -> Condition::Op {
126,280✔
217
        using Op = Condition::Op;
63,140✔
218
        switch ((opcode >> 4) & 0xF) {
126,280✔
219
        case 0x0: return {}; // goto
11,338✔
220
        case 0x1: return Op::EQ;
23,514✔
221
        case 0x2: return Op::GT;
9,650✔
222
        case 0x3: return Op::GE;
802✔
223
        case 0x4: return Op::SET;
82✔
224
        case 0x5: return Op::NE;
29,030✔
225
        case 0x6: return Op::SGT;
7,142✔
226
        case 0x7: return Op::SGE;
158✔
227
        case 0x8: return {}; // call
228
        case 0x9: return {}; // exit
229
        case 0xa: return Op::LT;
2,722✔
230
        case 0xb: return Op::LE;
332✔
231
        case 0xc: return Op::SLT;
6,552✔
232
        case 0xd: return Op::SLE;
90✔
233
        case 0xe: throw InvalidInstruction(pc, opcode);
8✔
234
        case 0xf: throw InvalidInstruction(pc, opcode);
8✔
235
        default: return {};
236
        }
237
    }
238

239
    auto makeMemOp(const Pc pc, const EbpfInst inst) -> Instruction {
293,112✔
240
        if (inst.dst > R10_STACK_POINTER || inst.src > R10_STACK_POINTER) {
293,112✔
241
            throw InvalidInstruction(pc, "bad register");
138✔
242
        }
243

244
        const int width = getMemWidth(inst.opcode);
292,974✔
245
        const bool isLD = (inst.opcode & INST_CLS_MASK) == INST_CLS_LD;
292,974✔
246
        switch (inst.opcode & INST_MODE_MASK) {
292,974✔
247
        case INST_MODE_IMM: throw InvalidInstruction(pc, inst.opcode);
30✔
248

249
        case INST_MODE_ABS:
210✔
250
            if (!isLD || (width == 8)) {
210✔
251
                throw InvalidInstruction(pc, inst.opcode);
26✔
252
            }
253
            if (inst.dst != 0) {
184✔
254
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
6✔
255
            }
256
            if (inst.src > 0) {
178✔
257
                throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode));
6✔
258
            }
259
            if (inst.offset != 0) {
172✔
260
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
6✔
261
            }
262
            return Packet{.width = width, .offset = inst.imm, .regoffset = {}};
166✔
263

264
        case INST_MODE_IND:
488✔
265
            if (!isLD || (width == 8)) {
488✔
266
                throw InvalidInstruction(pc, inst.opcode);
26✔
267
            }
268
            if (inst.dst != 0) {
462✔
269
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
6✔
270
            }
271
            if (inst.src > R10_STACK_POINTER) {
456✔
272
                throw InvalidInstruction(pc, "bad register");
273
            }
274
            if (inst.offset != 0) {
456✔
275
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
6✔
276
            }
277
            return Packet{.width = width, .offset = inst.imm, .regoffset = Reg{inst.src}};
450✔
278

279
        case INST_MODE_MEM: {
290,704✔
280
            if (isLD) {
290,704✔
281
                throw InvalidInstruction(pc, inst.opcode);
8✔
282
            }
283
            const bool isLoad = getMemIsLoad(inst.opcode);
290,696✔
284
            if (isLoad && inst.dst == R10_STACK_POINTER) {
290,696✔
285
                throw InvalidInstruction(pc, "cannot modify r10");
2✔
286
            }
287
            const bool isImm = !(inst.opcode & 1);
290,694✔
288
            if (isImm && inst.src != 0) {
290,694✔
289
                throw InvalidInstruction(pc, inst.opcode);
8✔
290
            }
291
            if (!isImm && inst.imm != 0) {
290,649✔
292
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
16✔
293
            }
294

295
            assert(!(isLoad && isImm));
290,670✔
296
            const uint8_t basereg = isLoad ? inst.src : inst.dst;
290,670✔
297

298
            if (basereg == R10_STACK_POINTER &&
371,329✔
299
                (inst.offset + opcode_to_width(inst.opcode) > 0 || inst.offset < -subprogram_stack_size)) {
161,318✔
300
                note("Stack access out of bounds");
16✔
301
            }
302
            auto res = Mem{
290,670✔
303
                .access =
304
                    Deref{
305
                        .width = width,
306
                        .basereg = Reg{basereg},
307
                        .offset = inst.offset,
290,670✔
308
                    },
309
                .value = isLoad  ? Value{Reg{inst.dst}}
71,481✔
310
                         : isImm ? Value{Imm{zero_extend(inst.imm)}}
147,708✔
311
                                 : Value{Reg{inst.src}},
73,817✔
312
                .is_load = isLoad,
313
            };
438,378✔
314
            return res;
290,670✔
315
        }
316
        case INST_MODE_MEMSX: {
50✔
317
            // Sign-extending loads are only valid for LDX B/H/W forms.
318
            if ((inst.opcode & INST_CLS_MASK) != INST_CLS_LDX || width == 8) {
50✔
319
                throw InvalidInstruction(pc, inst.opcode);
26✔
320
            }
321
            if (inst.dst == R10_STACK_POINTER) {
24✔
322
                throw InvalidInstruction(pc, "cannot modify r10");
×
323
            }
324
            if (inst.imm != 0) {
24✔
325
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
6✔
326
            }
327
            if (inst.src == R10_STACK_POINTER &&
21✔
328
                (inst.offset + opcode_to_width(inst.opcode) > 0 || inst.offset < -subprogram_stack_size)) {
6✔
329
                note("Stack access out of bounds");
×
330
            }
331
            return Mem{
36✔
332
                .access =
333
                    Deref{
334
                        .width = width,
335
                        .basereg = Reg{inst.src},
18✔
336
                        .offset = inst.offset,
18✔
337
                    },
338
                .value = Value{Reg{inst.dst}},
27✔
339
                .is_load = true,
340
                .is_signed = true,
341
            };
18✔
342
        }
343

344
        case INST_MODE_ATOMIC:
1,428✔
345
            if (((inst.opcode & INST_CLS_MASK) != INST_CLS_STX) ||
1,428✔
346
                ((inst.opcode & INST_SIZE_MASK) != INST_SIZE_W && (inst.opcode & INST_SIZE_MASK) != INST_SIZE_DW)) {
1,404✔
347
                throw InvalidInstruction(pc, inst.opcode);
28✔
348
            }
349
            return Atomic{
2,084✔
350
                .op = getAtomicOp(pc, inst),
1,400✔
351
                .fetch = (inst.imm & INST_FETCH) == INST_FETCH,
1,368✔
352
                .access =
353
                    Deref{
354
                        .width = width,
355
                        .basereg = Reg{inst.dst},
1,368✔
356
                        .offset = inst.offset,
1,368✔
357
                    },
358
                .valreg = Reg{inst.src},
1,368✔
359
            };
1,368✔
360
        default: throw InvalidInstruction(pc, inst.opcode);
64✔
361
        }
362
    }
363

364
    auto makeAluOp(const size_t pc, const EbpfInst inst) -> Instruction {
598,194✔
365
        const bool is64 = (inst.opcode & INST_CLS_MASK) == INST_CLS_ALU64;
598,194✔
366
        if (inst.dst == R10_STACK_POINTER) {
598,194✔
367
            throw InvalidInstruction(pc, "invalid target r10");
2✔
368
        }
369
        if (inst.dst > R10_STACK_POINTER || inst.src > R10_STACK_POINTER) {
598,192✔
370
            throw InvalidInstruction(pc, "bad register");
210✔
371
        }
372
        return std::visit(
298,991✔
373
            Overloaded{[&](const Un::Op op) -> Instruction { return Un{.op = op, .dst = Reg{inst.dst}, .is64 = is64}; },
307,024✔
374
                       [&](const Bin::Op op) -> Instruction {
589,710✔
375
                           Bin res{
589,710✔
376
                               .op = op,
377
                               .dst = Reg{inst.dst},
589,710✔
378
                               .v = getBinValue(pc, inst),
589,710✔
379
                               .is64 = is64,
589,588✔
380
                           };
589,710✔
381
                           if (!allow_division_by_zero && (op == Bin::Op::UDIV || op == Bin::Op::UMOD)) {
589,588✔
382
                               if (const auto pimm = std::get_if<Imm>(&res.v)) {
×
383
                                   if (pimm->v == 0) {
×
384
                                       note("division by zero");
×
385
                                   }
386
                               }
387
                           }
388
                           return res;
884,382✔
389
                       }},
390
            getAluOp(pc, inst));
896,873✔
391
    }
392

393
    [[nodiscard]]
394
    auto makeLddw(const EbpfInst inst, const int32_t next_imm, const vector<EbpfInst>& insts, const Pc pc) const
60,396✔
395
        -> Instruction {
396
        if (pc >= insts.size() - 1) {
60,396✔
397
            throw InvalidInstruction(pc, "incomplete lddw");
20✔
398
        }
399
        const EbpfInst next = insts[pc + 1];
60,376✔
400
        if (next.opcode != 0 || next.dst != 0 || next.src != 0 || next.offset != 0) {
60,376✔
401
            throw InvalidInstruction(pc, "invalid lddw");
16✔
402
        }
403
        if (inst.offset != 0) {
60,360✔
404
            throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
14✔
405
        }
406
        if (inst.dst > R10_STACK_POINTER) {
60,346✔
407
            throw InvalidInstruction(pc, "bad register");
14✔
408
        }
409

410
        switch (inst.src) {
60,332✔
411
        case INST_LD_MODE_IMM:
18,178✔
412
            return Bin{
45,445✔
413
                .op = Bin::Op::MOV,
414
                .dst = Reg{inst.dst},
18,178✔
415
                .v = Imm{merge(inst.imm, next_imm)},
27,267✔
416
                .is64 = true,
417
                .lddw = true,
418
            };
18,178✔
419
        case INST_LD_MODE_MAP_FD: {
37,608✔
420
            // magic number, meaning we're a per-process file descriptor defining the map.
421
            // (for details, look for BPF_PSEUDO_MAP_FD in the kernel)
422
            if (next.imm != 0) {
37,608✔
423
                throw InvalidInstruction(pc, "lddw uses reserved fields");
2✔
424
            }
425
            return LoadMapFd{.dst = Reg{inst.dst}, .mapfd = inst.imm};
37,606✔
426
        }
427
        case INST_LD_MODE_MAP_VALUE: return LoadMapAddress{.dst = Reg{inst.dst}, .mapfd = inst.imm, .offset = next_imm};
4,498✔
428
        case INST_LD_MODE_VARIABLE_ADDR:
10✔
429
            if (next.imm != 0) {
10✔
430
                throw InvalidInstruction(pc, "lddw uses reserved fields");
2✔
431
            }
432
            return LoadPseudo{.dst = Reg{inst.dst},
8✔
433
                              .addr = PseudoAddress{
434
                                  .kind = PseudoAddress::Kind::VARIABLE_ADDR, .imm = inst.imm, .next_imm = next_imm}};
8✔
435
        case INST_LD_MODE_CODE_ADDR:
22✔
436
            if (next.imm != 0) {
22✔
437
                throw InvalidInstruction(pc, "lddw uses reserved fields");
2✔
438
            }
439
            return LoadPseudo{
20✔
440
                .dst = Reg{inst.dst},
20✔
441
                .addr = PseudoAddress{.kind = PseudoAddress::Kind::CODE_ADDR, .imm = inst.imm, .next_imm = next_imm}};
20✔
442
        case INST_LD_MODE_MAP_BY_IDX:
8✔
443
            if (next.imm != 0) {
8✔
444
                throw InvalidInstruction(pc, "lddw uses reserved fields");
2✔
445
            }
446
            return LoadPseudo{
6✔
447
                .dst = Reg{inst.dst},
6✔
448
                .addr = PseudoAddress{.kind = PseudoAddress::Kind::MAP_BY_IDX, .imm = inst.imm, .next_imm = next_imm}};
6✔
449
        case INST_LD_MODE_MAP_VALUE_BY_IDX:
6✔
450
            // map_value_by_idx carries the value offset in next_imm (same encoding role as map_value),
451
            // so next.imm is not reserved in this mode.
452
            return LoadPseudo{.dst = Reg{inst.dst},
6✔
453
                              .addr = PseudoAddress{.kind = PseudoAddress::Kind::MAP_VALUE_BY_IDX,
454
                                                    .imm = inst.imm,
3✔
455
                                                    .next_imm = next_imm}};
6✔
456
        default: throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode));
2✔
457
        }
458
    }
459

460
    /// Given a program counter and an offset, get the label of the target instruction.
461
    static Label getJumpTarget(const int32_t offset, const vector<EbpfInst>& insts, const Pc pc) {
127,104✔
462
        const Pc new_pc = pc + 1 + offset;
127,104✔
463
        if (new_pc >= insts.size()) {
127,104✔
464
            throw InvalidInstruction(pc, "jump out of bounds");
138✔
465
        }
466
        if (insts[new_pc].opcode == 0) {
126,966✔
467
            throw InvalidInstruction(pc, "jump to middle of lddw");
×
468
        }
469
        return Label{gsl::narrow<int>(new_pc)};
126,966✔
470
    }
471

472
    static auto makeCallLocal(const EbpfInst inst, const vector<EbpfInst>& insts, const Pc pc) -> CallLocal {
936✔
473
        if (inst.opcode & INST_SRC_REG) {
936✔
474
            throw InvalidInstruction(pc, inst.opcode);
2✔
475
        }
476
        if (inst.dst != 0) {
934✔
477
            throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
2✔
478
        }
479
        return CallLocal{.target = getJumpTarget(inst.imm, insts, pc)};
932✔
480
    }
481

482
    static auto makeCallx(const EbpfInst inst, const Pc pc) -> Callx {
26✔
483
        // callx puts the register number in the 'dst' field rather than the 'src' field.
484
        if (inst.dst > R10_STACK_POINTER) {
26✔
485
            throw InvalidInstruction(pc, "bad register");
4✔
486
        }
487
        if (inst.imm != 0) {
22✔
488
            // Clang prior to v19 put the register number into the 'imm' field.
489
            if (inst.dst > 0) {
10✔
490
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
4✔
491
            }
492
            if (inst.imm < 0 || inst.imm > R10_STACK_POINTER) {
6✔
493
                throw InvalidInstruction(pc, "bad register");
4✔
494
            }
495
            return Callx{gsl::narrow<uint8_t>(inst.imm)};
2✔
496
        }
497
        return Callx{inst.dst};
12✔
498
    }
499

500
    [[nodiscard]]
501
    auto makeJmp(const EbpfInst inst, const vector<EbpfInst>& insts, const Pc pc) const -> Instruction {
194,240✔
502
        switch ((inst.opcode >> 4) & 0xF) {
194,240✔
503
        case INST_CALL:
62,526✔
504
            if ((inst.opcode & INST_CLS_MASK) != INST_CLS_JMP) {
62,526✔
505
                throw InvalidInstruction(pc, inst.opcode);
4✔
506
            }
507
            if (inst.src > INST_CALL_BTF_HELPER) {
62,522✔
508
                throw InvalidInstruction(pc, inst.opcode);
2✔
509
            }
510
            if (inst.src == INST_CALL_LOCAL) {
62,520✔
511
                if (inst.offset != 0) {
938✔
512
                    throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
2✔
513
                }
514
                return makeCallLocal(inst, insts, pc);
1,401✔
515
            }
516
            if (inst.opcode & INST_SRC_REG) {
61,582✔
517
                // Register-call opcode form is reserved for callx and must not be used for src-based call modes.
518
                if (inst.src != 0) {
30✔
519
                    throw InvalidInstruction(pc, inst.opcode);
2✔
520
                }
521
                if (inst.offset != 0) {
28✔
522
                    throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
2✔
523
                }
524
                return makeCallx(inst, pc);
26✔
525
            }
526
            if (inst.src == INST_CALL_BTF_HELPER) {
61,552✔
527
                if (inst.dst != 0) {
82✔
528
                    throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
2✔
529
                }
530
                if (inst.offset < 0) {
80✔
531
                    throw InvalidInstruction(pc, make_opcode_message("negative module for", inst.opcode));
2✔
532
                }
533
                return CallBtf{.btf_id = inst.imm, .module = inst.offset};
78✔
534
            }
535
            if (inst.dst != 0) {
61,470✔
536
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
2✔
537
            }
538
            if (inst.offset != 0) {
61,468✔
539
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
2✔
540
            }
541
            // Builtin vs helper is a per-PC distinction (the ELF loader flags
542
            // relocation targets in info.builtin_call_offsets). Resolution
543
            // happens later via call_resolver::resolve(); unmarshal produces
544
            // only the key-shaped Call.
545
            if (info.builtin_call_offsets.contains(pc)) {
61,466✔
546
                return Call{.func = inst.imm, .kind = CallKind::builtin};
2,460✔
547
            }
548
            return Call{.func = inst.imm, .kind = CallKind::helper};
59,006✔
549
        case INST_EXIT:
5,422✔
550
            if ((inst.opcode & INST_CLS_MASK) != INST_CLS_JMP || (inst.opcode & INST_SRC_REG)) {
5,422✔
551
                throw InvalidInstruction(pc, inst.opcode);
6✔
552
            }
553
            if (inst.src != 0) {
5,416✔
554
                throw InvalidInstruction(pc, inst.opcode);
2✔
555
            }
556
            if (inst.dst != 0) {
5,414✔
557
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
2✔
558
            }
559
            if (inst.imm != 0) {
5,412✔
560
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
2✔
561
            }
562
            if (inst.offset != 0) {
5,410✔
563
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
2✔
564
            }
565
            return Exit{};
5,408✔
566
        case INST_JA:
22,688✔
567
            if ((inst.opcode & INST_CLS_MASK) != INST_CLS_JMP && (inst.opcode & INST_CLS_MASK) != INST_CLS_JMP32) {
22,688✔
568
                throw InvalidInstruction(pc, inst.opcode);
×
569
            }
570
            if (inst.opcode & INST_SRC_REG) {
22,688✔
571
                throw InvalidInstruction(pc, inst.opcode);
4✔
572
            }
573
            if ((inst.opcode & INST_CLS_MASK) == INST_CLS_JMP && (inst.imm != 0)) {
22,684✔
574
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
2✔
575
            }
576
            if ((inst.opcode & INST_CLS_MASK) == INST_CLS_JMP32 && (inst.offset != 0)) {
22,682✔
577
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
2✔
578
            }
579
            if (inst.dst != 0) {
22,680✔
580
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
4✔
581
            }
582
        default: {
63,140✔
583
            // First validate the opcode, src, and imm.
584
            const auto op = getJmpOp(pc, inst.opcode);
126,280✔
585
            if (!(inst.opcode & INST_SRC_REG) && (inst.src != 0)) {
126,264✔
586
                throw InvalidInstruction(pc, inst.opcode);
48✔
587
            }
588
            if ((inst.opcode & INST_SRC_REG) && (inst.imm != 0)) {
126,216✔
589
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
44✔
590
            }
591

592
            const int32_t offset = (inst.opcode == INST_OP_JA32) ? inst.imm : inst.offset;
126,172✔
593
            const Label target = getJumpTarget(offset, insts, pc);
126,172✔
594
            if (inst.opcode != INST_OP_JA16 && inst.opcode != INST_OP_JA32) {
126,036✔
595
                if (inst.dst > R10_STACK_POINTER) {
103,368✔
596
                    throw InvalidInstruction(pc, "bad register");
88✔
597
                }
598
                if ((inst.opcode & INST_SRC_REG) && inst.src > R10_STACK_POINTER) {
103,280✔
599
                    throw InvalidInstruction(pc, "bad register");
44✔
600
                }
601
            }
602

603
            const auto cond = (inst.opcode == INST_OP_JA16 || inst.opcode == INST_OP_JA32)
114,576✔
604
                                  ? std::optional<Condition>{}
125,904✔
605
                                  : Condition{.op = op,
51,618✔
606
                                              .left = Reg{inst.dst},
103,236✔
607
                                              .right = (inst.opcode & INST_SRC_REG) ? Value{Reg{inst.src}}
103,236✔
608
                                                                                    : Value{Imm{sign_extend(inst.imm)}},
86,430✔
609
                                              .is64 = (inst.opcode & INST_CLS_MASK) == INST_CLS_JMP};
125,904✔
610
            return Jmp{.cond = cond, .target = target};
188,922✔
611
        }
125,970✔
612
        }
613
    }
614

615
    vector<LabeledInstruction> unmarshal(vector<EbpfInst> const& insts, const prevail::VerifierOptions& options) {
5,318✔
616
        options.runtime.validate();
5,318✔
617
        subprogram_stack_size = options.runtime.subprogram_stack_size;
5,318✔
618
        allow_division_by_zero = options.runtime.allow_division_by_zero;
5,318✔
619
        vector<LabeledInstruction> prog;
5,318✔
620
        int exit_count = 0;
5,318✔
621
        if (insts.empty()) {
5,318✔
622
            throw std::invalid_argument("Zero length programs are not allowed");
×
623
        }
624
        for (size_t pc = 0; pc < insts.size();) {
1,134,826✔
625
            const EbpfInst inst = insts[pc];
1,130,996✔
626
            Instruction new_ins;
1,130,996✔
627
            bool skip_instruction = false;
1,130,996✔
628
            bool fallthrough = true;
1,130,996✔
629
            switch (inst.opcode & INST_CLS_MASK) {
1,130,996✔
630
            case INST_CLS_LD:
61,098✔
631
                if (inst.opcode == INST_OP_LDDW_IMM) {
61,098✔
632
                    const int32_t next_imm = pc < insts.size() - 1 ? insts[pc + 1].imm : 0;
60,396✔
633
                    new_ins = makeLddw(inst, next_imm, insts, pc);
90,557✔
634
                    skip_instruction = true;
60,322✔
635
                    break;
60,322✔
636
                }
637
                // fallthrough
638
            case INST_CLS_LDX:
146,556✔
639
            case INST_CLS_ST:
146,556✔
640
            case INST_CLS_STX: new_ins = makeMemOp(pc, inst); break;
439,448✔
641

642
            case INST_CLS_ALU:
583,248✔
643
            case INST_CLS_ALU64: {
291,624✔
644
                new_ins = makeAluOp(pc, inst);
874,605✔
645

646
                // Merge (rX <<= 32; rX >>>= 32) into wX = rX
647
                //       (rX <<= 32; rX >>= 32)  into rX s32= rX
648
                if (pc >= insts.size() - 1) {
582,714✔
649
                    break;
101✔
650
                }
651
                const EbpfInst next = insts[pc + 1];
582,512✔
652
                auto dst = Reg{inst.dst};
582,512✔
653

654
                if (new_ins != shift32(dst, Bin::Op::LSH)) {
582,512✔
655
                    break;
283,053✔
656
                }
657

658
                if ((next.opcode & INST_CLS_MASK) != INST_CLS_ALU64) {
16,406✔
659
                    break;
730✔
660
                }
661
                auto next_ins = makeAluOp(pc + 1, next);
14,946✔
662
                if (next_ins == shift32(dst, Bin::Op::RSH)) {
14,946✔
663
                    new_ins = Bin{.op = Bin::Op::MOV, .dst = dst, .v = dst, .is64 = false};
6,628✔
664
                    skip_instruction = true;
6,628✔
665
                } else if (next_ins == shift32(dst, Bin::Op::ARSH)) {
8,318✔
666
                    new_ins = Bin{.op = Bin::Op::MOVSX32, .dst = dst, .v = dst, .is64 = true};
3,422✔
667
                    skip_instruction = true;
3,422✔
668
                }
669

670
                break;
14,946✔
671
            }
14,946✔
672

673
            case INST_CLS_JMP32:
194,240✔
674
            case INST_CLS_JMP: {
97,120✔
675
                new_ins = makeJmp(inst, insts, pc);
291,140✔
676
                if (std::holds_alternative<Exit>(new_ins)) {
193,800✔
677
                    fallthrough = false;
5,408✔
678
                    exit_count++;
5,408✔
679
                }
680
                if (const auto pjmp = std::get_if<Jmp>(&new_ins)) {
193,800✔
681
                    if (!pjmp->cond) {
125,904✔
682
                        fallthrough = false;
576,088✔
683
                    }
684
                }
685
                break;
96,900✔
686
            }
687
            default: CRAB_ERROR("invalid class: ", inst.opcode & INST_CLS_MASK);
564,754✔
688
            }
689
            if (pc == insts.size() - 1 && fallthrough) {
1,129,508✔
690
                note("fallthrough in last instruction");
1,400✔
691
            }
692

693
            std::optional<btf_line_info_t> current_line_info = {};
1,129,508✔
694

695
            if (options.verbosity_opts.print_line_info && pc < info.line_info.size()) {
1,129,508✔
696
                current_line_info = info.line_info.at(pc);
×
697
            }
698

699
            prog.emplace_back(Label(gsl::narrow<int>(pc)), new_ins, current_line_info);
1,694,262✔
700

701
            pc++;
1,129,508✔
702
            note_next_pc();
1,129,508✔
703
            if (skip_instruction) {
1,129,508✔
704
                pc++;
70,372✔
705
                note_next_pc();
599,940✔
706
            }
707
        }
1,130,996✔
708
        if (exit_count == 0) {
3,830✔
709
            note("no exit instruction");
672✔
710
        }
711
        return prog;
3,830✔
712
    }
1,488✔
713
};
714

715
std::variant<InstructionSeq, std::string> unmarshal(const RawProgram& raw_prog, vector<vector<string>>& notes,
5,318✔
716
                                                    const prevail::VerifierOptions& options) {
717
    try {
2,659✔
718
        return Unmarshaller{notes, raw_prog.info}.unmarshal(raw_prog.prog, options);
7,977✔
719
    } catch (InvalidInstruction& arg) {
1,488✔
720
        std::ostringstream ss;
1,488✔
721
        ss << arg.pc << ": " << arg.what() << "\n";
1,488✔
722
        return ss.str();
1,488✔
723
    }
1,488✔
724
}
725

726
std::variant<InstructionSeq, std::string> unmarshal(const RawProgram& raw_prog,
5,318✔
727
                                                    const prevail::VerifierOptions& options) {
728
    vector<vector<string>> notes;
5,318✔
729
    return unmarshal(raw_prog, notes, options);
10,636✔
730
}
5,318✔
731

732
int size(const Instruction& inst) {
1,340✔
733
    if (const auto pins = std::get_if<Bin>(&inst)) {
1,340✔
734
        if (pins->lddw) {
556✔
735
            return 2;
2✔
736
        }
737
    }
738
    if (std::holds_alternative<LoadMapFd>(inst)) {
1,336✔
739
        return 2;
24✔
740
    }
741
    if (std::holds_alternative<LoadMapAddress>(inst)) {
1,288✔
742
        return 2;
743
    }
744
    if (std::holds_alternative<LoadPseudo>(inst)) {
1,288✔
745
        return 2;
×
746
    }
747
    return 1;
644✔
748
}
749

750
} // namespace prevail
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc