• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Alan-Jowett / ebpf-verifier / 21993000823

13 Feb 2026 01:02PM UTC coverage: 86.313% (-0.5%) from 86.783%
21993000823

push

github

web-flow
ISA feature support matrix and precise rejection semantics (#999)

* ISA feature support matrix and precise rejection semantics

Signed-off-by: Elazar Gershuni <elazarg@gmail.com>

282 of 380 new or added lines in 14 files covered. (74.21%)

3 existing lines in 3 files now uncovered.

9535 of 11047 relevant lines covered (86.31%)

3060772.25 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.78
/src/ir/unmarshal.cpp
1
// Copyright (c) Prevail Verifier contributors.
2
// SPDX-License-Identifier: MIT
3
#include <cassert>
4
#include <iostream>
5
#include <string>
6
#include <vector>
7

8
#include "crab_utils/debug.hpp"
9
#include "crab_utils/num_safety.hpp"
10
#include "ir/unmarshal.hpp"
11
#include "spec/vm_isa.hpp"
12

13
using std::string;
14
using std::vector;
15

16
namespace prevail {
17
int opcode_to_width(const uint8_t opcode) {
57,536✔
18
    switch (opcode & INST_SIZE_MASK) {
57,536✔
19
    case INST_SIZE_B: return 1;
2,988✔
20
    case INST_SIZE_H: return 2;
8,912✔
21
    case INST_SIZE_W: return 4;
17,234✔
22
    case INST_SIZE_DW: return 8;
25,414✔
23
    default: CRAB_ERROR("unexpected opcode", opcode);
×
24
    }
25
}
26

27
uint8_t width_to_opcode(const int width) {
130✔
28
    switch (width) {
130✔
29
    case 1: return INST_SIZE_B;
14✔
30
    case 2: return INST_SIZE_H;
14✔
31
    case 4: return INST_SIZE_W;
38✔
32
    case 8: return INST_SIZE_DW;
50✔
33
    default: CRAB_ERROR("unexpected width", width);
×
34
    }
35
}
36

37
template <typename T>
38
void compare(const string& field, T actual, T expected) {
39
    if (actual != expected) {
40
        std::cerr << field << ": (actual) " << std::hex << static_cast<int>(actual)
41
                  << " != " << static_cast<int>(expected) << " (expected)\n";
42
    }
43
}
44

45
static std::string make_opcode_message(const char* msg, const uint8_t opcode) {
744✔
46
    std::ostringstream oss;
744✔
47
    oss << msg << " op 0x" << std::hex << static_cast<int>(opcode);
744✔
48
    return oss.str();
1,488✔
49
}
744✔
50

51
struct InvalidInstruction final : std::invalid_argument {
52
    size_t pc;
53
    explicit InvalidInstruction(const size_t pc, const char* what) : std::invalid_argument{what}, pc{pc} {}
1,104✔
54
    InvalidInstruction(const size_t pc, const std::string& what) : std::invalid_argument{what}, pc{pc} {}
237✔
55
    InvalidInstruction(const size_t pc, const uint8_t opcode)
402✔
56
        : std::invalid_argument{make_opcode_message("bad instruction", opcode)}, pc{pc} {}
603✔
57
};
58

59
static auto getMemIsLoad(const uint8_t opcode) -> bool {
85,666✔
60
    switch (opcode & INST_CLS_MASK) {
85,666✔
61
    case INST_CLS_LD:
18,747✔
62
    case INST_CLS_LDX: return true;
18,747✔
63
    case INST_CLS_ST:
48,172✔
64
    case INST_CLS_STX: return false;
48,172✔
65
    default: CRAB_ERROR("unexpected opcode", opcode);
×
66
    }
67
}
68

69
static auto getMemWidth(const uint8_t opcode) -> int {
87,070✔
70
    switch (opcode & INST_SIZE_MASK) {
87,070✔
71
    case INST_SIZE_B: return 1;
7,474✔
72
    case INST_SIZE_H: return 2;
13,656✔
73
    case INST_SIZE_W: return 4;
29,852✔
74
    case INST_SIZE_DW: return 8;
28,614✔
75
    default: CRAB_ERROR("unexpected opcode", opcode);
×
76
    }
77
}
78

79
static Instruction shift32(const Reg dst, const Bin::Op op) {
192,052✔
80
    return Bin{.op = op, .dst = dst, .v = Imm{32}, .is64 = true, .lddw = false};
192,052✔
81
}
82

83
struct Unmarshaller {
84
    vector<vector<string>>& notes;
85
    const ProgramInfo& info;
86
    // ReSharper disable once CppMemberFunctionMayBeConst
87
    void note(const string& what) { notes.back().emplace_back(what); }
856✔
88
    // ReSharper disable once CppMemberFunctionMayBeConst
89
    void note_next_pc() { notes.emplace_back(); }
388,812✔
90
    explicit Unmarshaller(vector<vector<string>>& notes, const ProgramInfo& info) : notes{notes}, info{info} {
5,008✔
91
        note_next_pc();
7,512✔
92
    }
5,008✔
93

94
    auto getAluOp(const size_t pc, const EbpfInst inst) -> std::variant<Bin::Op, Un::Op> {
188,746✔
95
        // First handle instructions that support a non-zero offset.
96
        switch (inst.opcode & INST_ALU_OP_MASK) {
188,746✔
97
        case INST_ALU_OP_DIV:
2,512✔
98
            switch (inst.offset) {
2,512✔
99
            case 0: return Bin::Op::UDIV;
2,434✔
100
            case 1: return Bin::Op::SDIV;
70✔
101
            default: throw InvalidInstruction(pc, make_opcode_message("invalid offset for", inst.opcode));
12✔
102
            }
103
        case INST_ALU_OP_MOD:
166✔
104
            switch (inst.offset) {
166✔
105
            case 0: return Bin::Op::UMOD;
74✔
106
            case 1: return Bin::Op::SMOD;
84✔
107
            default: throw InvalidInstruction(pc, make_opcode_message("invalid offset for", inst.opcode));
12✔
108
            }
109
        case INST_ALU_OP_MOV:
100,512✔
110
            if (inst.offset > 0 && !(inst.opcode & INST_SRC_REG)) {
100,512✔
111
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
6✔
112
            }
113
            switch (inst.offset) {
100,508✔
114
            case 0: return Bin::Op::MOV;
100,432✔
115
            case 8: return Bin::Op::MOVSX8;
24✔
116
            case 16: return Bin::Op::MOVSX16;
24✔
117
            case 32: return Bin::Op::MOVSX32;
14✔
118
            default: throw InvalidInstruction(pc, make_opcode_message("invalid offset for", inst.opcode));
21✔
119
            }
120
        default: break;
85,556✔
121
        }
122

123
        // All the rest require a zero offset.
124
        if (inst.offset != 0) {
85,556✔
125
            throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
141✔
126
        }
127

128
        switch (inst.opcode & INST_ALU_OP_MASK) {
85,462✔
129
        case INST_ALU_OP_ADD: return Bin::Op::ADD;
34,654✔
130
        case INST_ALU_OP_SUB: return Bin::Op::SUB;
876✔
131
        case INST_ALU_OP_MUL: return Bin::Op::MUL;
226✔
132
        case INST_ALU_OP_OR: return Bin::Op::OR;
10,842✔
133
        case INST_ALU_OP_AND: return Bin::Op::AND;
9,718✔
134
        case INST_ALU_OP_LSH: return Bin::Op::LSH;
12,398✔
135
        case INST_ALU_OP_RSH: return Bin::Op::RSH;
7,684✔
136
        case INST_ALU_OP_NEG:
194✔
137
            // Negation is a unary operation. The SRC bit, src, and imm must be all 0.
138
            if (inst.opcode & INST_SRC_REG) {
194✔
139
                throw InvalidInstruction{pc, inst.opcode};
4✔
140
            }
141
            if (inst.src != 0) {
190✔
142
                throw InvalidInstruction{pc, inst.opcode};
4✔
143
            }
144
            if (inst.imm != 0) {
186✔
145
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
6✔
146
            }
147
            return Un::Op::NEG;
182✔
148
        case INST_ALU_OP_XOR: return Bin::Op::XOR;
1,142✔
149
        case INST_ALU_OP_ARSH:
4,430✔
150
            if ((inst.opcode & INST_CLS_MASK) == INST_CLS_ALU) {
4,430✔
151
                note("arsh32 is not allowed");
372✔
152
            }
153
            return Bin::Op::ARSH;
4,430✔
154
        case INST_ALU_OP_END:
3,282✔
155
            if (inst.src != 0) {
3,282✔
156
                throw InvalidInstruction{pc, inst.opcode};
18✔
157
            }
158
            if ((inst.opcode & INST_CLS_MASK) == INST_CLS_ALU64) {
3,264✔
159
                if (inst.opcode & INST_END_BE) {
52✔
160
                    throw InvalidInstruction(pc, inst.opcode);
2✔
161
                }
162
                switch (inst.imm) {
50✔
163
                case 16: return Un::Op::SWAP16;
14✔
164
                case 32: return Un::Op::SWAP32;
14✔
165
                case 64: return Un::Op::SWAP64;
14✔
166
                default: throw InvalidInstruction(pc, "unsupported immediate");
8✔
167
                }
168
            }
169
            switch (inst.imm) {
3,212✔
170
            case 16: return (inst.opcode & INST_END_BE) ? Un::Op::BE16 : Un::Op::LE16;
2,966✔
171
            case 32: return (inst.opcode & INST_END_BE) ? Un::Op::BE32 : Un::Op::LE32;
194✔
172
            case 64: return (inst.opcode & INST_END_BE) ? Un::Op::BE64 : Un::Op::LE64;
59✔
173
            default: throw InvalidInstruction(pc, "unsupported immediate");
16✔
174
            }
175
        case 0xe0: throw InvalidInstruction{pc, inst.opcode};
8✔
176
        case 0xf0: throw InvalidInstruction{pc, inst.opcode};
8✔
177
        default: return {};
×
178
        }
179
    }
180

181
    static auto getAtomicOp(const size_t pc, const EbpfInst inst) -> Atomic::Op {
634✔
182
        switch (const auto op = gsl::narrow<Atomic::Op>(inst.imm & ~INST_FETCH)) {
634✔
183
        case Atomic::Op::XCHG:
56✔
184
        case Atomic::Op::CMPXCHG:
28✔
185
            if ((inst.imm & INST_FETCH) == 0) {
56✔
186
                throw InvalidInstruction(pc, "unsupported immediate");
8✔
187
            }
188
        case Atomic::Op::ADD:
301✔
189
        case Atomic::Op::OR:
301✔
190
        case Atomic::Op::AND:
301✔
191
        case Atomic::Op::XOR: return op;
602✔
192
        }
193
        throw InvalidInstruction(pc, "unsupported immediate");
24✔
194
    }
195

196
    static uint64_t sign_extend(const int32_t imm) { return to_unsigned(int64_t{imm}); }
138,936✔
197

198
    static uint64_t zero_extend(const int32_t imm) { return uint64_t{to_unsigned(imm)}; }
74✔
199

200
    static auto getBinValue(const Pc pc, const EbpfInst inst) -> Value {
185,126✔
201
        if (inst.opcode & INST_SRC_REG) {
185,126✔
202
            if (inst.imm != 0) {
72,786✔
203
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
66✔
204
            }
205
            return Reg{inst.src};
72,720✔
206
        }
207
        if (inst.src != 0) {
112,340✔
208
            throw InvalidInstruction{pc, inst.opcode};
56✔
209
        }
210
        // Imm is a signed 32-bit number.  Sign extend it to 64-bits for storage.
211
        return Imm{sign_extend(inst.imm)};
112,284✔
212
    }
213

214
    static auto getJmpOp(const size_t pc, const uint8_t opcode) -> Condition::Op {
39,640✔
215
        using Op = Condition::Op;
19,820✔
216
        switch ((opcode >> 4) & 0xF) {
39,640✔
217
        case 0x0: return {}; // goto
3,045✔
218
        case 0x1: return Op::EQ;
7,031✔
219
        case 0x2: return Op::GT;
2,626✔
220
        case 0x3: return Op::GE;
414✔
221
        case 0x4: return Op::SET;
82✔
222
        case 0x5: return Op::NE;
10,972✔
223
        case 0x6: return Op::SGT;
2,452✔
224
        case 0x7: return Op::SGE;
88✔
225
        case 0x8: return {}; // call
226
        case 0x9: return {}; // exit
227
        case 0xa: return Op::LT;
832✔
228
        case 0xb: return Op::LE;
136✔
229
        case 0xc: return Op::SLT;
1,782✔
230
        case 0xd: return Op::SLE;
88✔
231
        case 0xe: throw InvalidInstruction(pc, opcode);
8✔
232
        case 0xf: throw InvalidInstruction(pc, opcode);
8✔
233
        default: return {};
234
        }
235
    }
236

237
    auto makeMemOp(const Pc pc, const EbpfInst inst) -> Instruction {
87,208✔
238
        if (inst.dst > R10_STACK_POINTER || inst.src > R10_STACK_POINTER) {
87,208✔
239
            throw InvalidInstruction(pc, "bad register");
138✔
240
        }
241

242
        const int width = getMemWidth(inst.opcode);
87,070✔
243
        const bool isLD = (inst.opcode & INST_CLS_MASK) == INST_CLS_LD;
87,070✔
244
        switch (inst.opcode & INST_MODE_MASK) {
87,070✔
245
        case INST_MODE_IMM: throw InvalidInstruction(pc, inst.opcode);
30✔
246

247
        case INST_MODE_ABS:
202✔
248
            if (!isLD || (width == 8)) {
202✔
249
                throw InvalidInstruction(pc, inst.opcode);
26✔
250
            }
251
            if (inst.dst != 0) {
176✔
252
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
6✔
253
            }
254
            if (inst.src > 0) {
170✔
255
                throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode));
6✔
256
            }
257
            if (inst.offset != 0) {
164✔
258
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
6✔
259
            }
260
            return Packet{.width = width, .offset = inst.imm, .regoffset = {}};
158✔
261

262
        case INST_MODE_IND:
388✔
263
            if (!isLD || (width == 8)) {
388✔
264
                throw InvalidInstruction(pc, inst.opcode);
26✔
265
            }
266
            if (inst.dst != 0) {
362✔
267
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
6✔
268
            }
269
            if (inst.src > R10_STACK_POINTER) {
356✔
270
                throw InvalidInstruction(pc, "bad register");
271
            }
272
            if (inst.offset != 0) {
356✔
273
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
6✔
274
            }
275
            return Packet{.width = width, .offset = inst.imm, .regoffset = Reg{inst.src}};
350✔
276

277
        case INST_MODE_MEM: {
85,674✔
278
            if (isLD) {
85,674✔
279
                throw InvalidInstruction(pc, inst.opcode);
8✔
280
            }
281
            const bool isLoad = getMemIsLoad(inst.opcode);
85,666✔
282
            if (isLoad && inst.dst == R10_STACK_POINTER) {
85,666✔
283
                throw InvalidInstruction(pc, "cannot modify r10");
2✔
284
            }
285
            const bool isImm = !(inst.opcode & 1);
85,664✔
286
            if (isImm && inst.src != 0) {
85,664✔
287
                throw InvalidInstruction(pc, inst.opcode);
8✔
288
            }
289
            if (!isImm && inst.imm != 0) {
85,619✔
290
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
16✔
291
            }
292

293
            assert(!(isLoad && isImm));
85,640✔
294
            const uint8_t basereg = isLoad ? inst.src : inst.dst;
85,640✔
295

296
            if (basereg == R10_STACK_POINTER &&
114,405✔
297
                (inst.offset + opcode_to_width(inst.opcode) > 0 || inst.offset < -EBPF_TOTAL_STACK_SIZE)) {
57,530✔
298
                note("Stack access out of bounds");
16✔
299
            }
300
            auto res = Mem{
85,640✔
301
                .access =
302
                    Deref{
303
                        .width = width,
304
                        .basereg = Reg{basereg},
305
                        .offset = inst.offset,
85,640✔
306
                    },
307
                .value = isLoad  ? Value{Reg{inst.dst}}
18,742✔
308
                         : isImm ? Value{Imm{zero_extend(inst.imm)}}
48,156✔
309
                                 : Value{Reg{inst.src}},
24,041✔
310
                .is_load = isLoad,
311
            };
133,796✔
312
            return res;
85,640✔
313
        }
314
        case INST_MODE_MEMSX: {
50✔
315
            // Sign-extending loads are only valid for LDX B/H/W forms.
316
            if ((inst.opcode & INST_CLS_MASK) != INST_CLS_LDX || width == 8) {
50✔
317
                throw InvalidInstruction(pc, inst.opcode);
26✔
318
            }
319
            if (inst.dst == R10_STACK_POINTER) {
24✔
NEW
320
                throw InvalidInstruction(pc, "cannot modify r10");
×
321
            }
322
            if (inst.imm != 0) {
24✔
323
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
6✔
324
            }
325
            if (inst.src == R10_STACK_POINTER &&
21✔
326
                (inst.offset + opcode_to_width(inst.opcode) > 0 || inst.offset < -EBPF_TOTAL_STACK_SIZE)) {
6✔
NEW
327
                note("Stack access out of bounds");
×
328
            }
329
            return Mem{
36✔
330
                .access =
331
                    Deref{
332
                        .width = width,
333
                        .basereg = Reg{inst.src},
18✔
334
                        .offset = inst.offset,
18✔
335
                    },
336
                .value = Value{Reg{inst.dst}},
27✔
337
                .is_load = true,
338
                .is_signed = true,
339
            };
18✔
340
        }
341

342
        case INST_MODE_ATOMIC:
662✔
343
            if (((inst.opcode & INST_CLS_MASK) != INST_CLS_STX) ||
662✔
344
                ((inst.opcode & INST_SIZE_MASK) != INST_SIZE_W && (inst.opcode & INST_SIZE_MASK) != INST_SIZE_DW)) {
638✔
345
                throw InvalidInstruction(pc, inst.opcode);
28✔
346
            }
347
            return Atomic{
935✔
348
                .op = getAtomicOp(pc, inst),
634✔
349
                .fetch = (inst.imm & INST_FETCH) == INST_FETCH,
602✔
350
                .access =
351
                    Deref{
352
                        .width = width,
353
                        .basereg = Reg{inst.dst},
602✔
354
                        .offset = inst.offset,
602✔
355
                    },
356
                .valreg = Reg{inst.src},
602✔
357
            };
602✔
358
        default: throw InvalidInstruction(pc, inst.opcode);
64✔
359
        }
360
    }
361

362
    auto makeAluOp(const size_t pc, const EbpfInst inst) -> Instruction {
188,958✔
363
        const bool is64 = (inst.opcode & INST_CLS_MASK) == INST_CLS_ALU64;
188,958✔
364
        if (inst.dst == R10_STACK_POINTER) {
188,958✔
365
            throw InvalidInstruction(pc, "invalid target r10");
2✔
366
        }
367
        if (inst.dst > R10_STACK_POINTER || inst.src > R10_STACK_POINTER) {
188,956✔
368
            throw InvalidInstruction(pc, "bad register");
210✔
369
        }
370
        return std::visit(
94,373✔
371
            Overloaded{[&](const Un::Op op) -> Instruction { return Un{.op = op, .dst = Reg{inst.dst}, .is64 = is64}; },
97,754✔
372
                       [&](const Bin::Op op) -> Instruction {
185,126✔
373
                           Bin res{
185,126✔
374
                               .op = op,
375
                               .dst = Reg{inst.dst},
185,126✔
376
                               .v = getBinValue(pc, inst),
185,126✔
377
                               .is64 = is64,
185,004✔
378
                           };
185,126✔
379
                           if (!thread_local_options.allow_division_by_zero &&
185,004✔
380
                               (op == Bin::Op::UDIV || op == Bin::Op::UMOD)) {
×
381
                               if (const auto pimm = std::get_if<Imm>(&res.v)) {
×
382
                                   if (pimm->v == 0) {
×
383
                                       note("division by zero");
×
384
                                   }
385
                               }
386
                           }
387
                           return res;
277,506✔
388
                       }},
389
            getAluOp(pc, inst));
283,019✔
390
    }
391

392
    [[nodiscard]]
393
    auto makeLddw(const EbpfInst inst, const int32_t next_imm, const vector<EbpfInst>& insts, const Pc pc) const
22,140✔
394
        -> Instruction {
395
        if (pc >= insts.size() - 1) {
22,140✔
396
            throw InvalidInstruction(pc, "incomplete lddw");
18✔
397
        }
398
        const EbpfInst next = insts[pc + 1];
22,122✔
399
        if (next.opcode != 0 || next.dst != 0 || next.src != 0 || next.offset != 0) {
22,122✔
400
            throw InvalidInstruction(pc, "invalid lddw");
16✔
401
        }
402
        if (inst.offset != 0) {
22,106✔
403
            throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
14✔
404
        }
405
        if (inst.dst > R10_STACK_POINTER) {
22,092✔
406
            throw InvalidInstruction(pc, "bad register");
14✔
407
        }
408

409
        switch (inst.src) {
22,078✔
410
        case INST_LD_MODE_IMM:
7,368✔
411
            return Bin{
18,420✔
412
                .op = Bin::Op::MOV,
413
                .dst = Reg{inst.dst},
7,368✔
414
                .v = Imm{merge(inst.imm, next_imm)},
11,052✔
415
                .is64 = true,
416
                .lddw = true,
417
            };
7,368✔
418
        case INST_LD_MODE_MAP_FD: {
14,254✔
419
            // magic number, meaning we're a per-process file descriptor defining the map.
420
            // (for details, look for BPF_PSEUDO_MAP_FD in the kernel)
421
            if (next.imm != 0) {
14,254✔
422
                throw InvalidInstruction(pc, "lddw uses reserved fields");
2✔
423
            }
424
            return LoadMapFd{.dst = Reg{inst.dst}, .mapfd = inst.imm};
14,252✔
425
        }
426
        case INST_LD_MODE_MAP_VALUE: return LoadMapAddress{.dst = Reg{inst.dst}, .mapfd = inst.imm, .offset = next_imm};
428✔
427
        case INST_LD_MODE_VARIABLE_ADDR:
8✔
428
            if (next.imm != 0) {
8✔
NEW
429
                throw InvalidInstruction(pc, "lddw uses reserved fields");
×
430
            }
431
            return LoadPseudo{.dst = Reg{inst.dst},
8✔
432
                              .addr = PseudoAddress{
433
                                  .kind = PseudoAddress::Kind::VARIABLE_ADDR, .imm = inst.imm, .next_imm = next_imm}};
8✔
434
        case INST_LD_MODE_CODE_ADDR:
6✔
435
            if (next.imm != 0) {
6✔
NEW
436
                throw InvalidInstruction(pc, "lddw uses reserved fields");
×
437
            }
438
            return LoadPseudo{
6✔
439
                .dst = Reg{inst.dst},
6✔
440
                .addr = PseudoAddress{.kind = PseudoAddress::Kind::CODE_ADDR, .imm = inst.imm, .next_imm = next_imm}};
6✔
441
        case INST_LD_MODE_MAP_BY_IDX:
6✔
442
            if (next.imm != 0) {
6✔
NEW
443
                throw InvalidInstruction(pc, "lddw uses reserved fields");
×
444
            }
445
            return LoadPseudo{
6✔
446
                .dst = Reg{inst.dst},
6✔
447
                .addr = PseudoAddress{.kind = PseudoAddress::Kind::MAP_BY_IDX, .imm = inst.imm, .next_imm = next_imm}};
6✔
448
        case INST_LD_MODE_MAP_VALUE_BY_IDX:
6✔
449
            // map_value_by_idx carries the value offset in next_imm (same encoding role as map_value),
450
            // so next.imm is not reserved in this mode.
451
            return LoadPseudo{.dst = Reg{inst.dst},
6✔
452
                              .addr = PseudoAddress{.kind = PseudoAddress::Kind::MAP_VALUE_BY_IDX,
453
                                                    .imm = inst.imm,
3✔
454
                                                    .next_imm = next_imm}};
6✔
455
        default: throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode));
2✔
456
        }
457
    }
458

459
    static ArgSingle::Kind toArgSingleKind(const ebpf_argument_type_t t) {
57,096✔
460
        switch (t) {
57,096✔
461
        case EBPF_ARGUMENT_TYPE_ANYTHING: return ArgSingle::Kind::ANYTHING;
8,372✔
462
        case EBPF_ARGUMENT_TYPE_PTR_TO_STACK: return ArgSingle::Kind::PTR_TO_STACK;
463
        case EBPF_ARGUMENT_TYPE_PTR_TO_STACK_OR_NULL: return ArgSingle::Kind::PTR_TO_STACK;
464
        case EBPF_ARGUMENT_TYPE_PTR_TO_MAP: return ArgSingle::Kind::MAP_FD;
7,163✔
465
        case EBPF_ARGUMENT_TYPE_PTR_TO_MAP_OF_PROGRAMS: return ArgSingle::Kind::MAP_FD_PROGRAMS;
289✔
466
        case EBPF_ARGUMENT_TYPE_PTR_TO_MAP_KEY: return ArgSingle::Kind::PTR_TO_MAP_KEY;
6,436✔
467
        case EBPF_ARGUMENT_TYPE_PTR_TO_MAP_VALUE: return ArgSingle::Kind::PTR_TO_MAP_VALUE;
2,657✔
468
        case EBPF_ARGUMENT_TYPE_PTR_TO_CTX: return ArgSingle::Kind::PTR_TO_CTX;
3,607✔
469
        case EBPF_ARGUMENT_TYPE_PTR_TO_CTX_OR_NULL: return ArgSingle::Kind::PTR_TO_CTX;
24✔
470
        default: break;
471
        }
472
        return {};
473
    }
474

475
    static ArgPair::Kind toArgPairKind(const ebpf_argument_type_t t) {
5,902✔
476
        switch (t) {
5,902✔
477
        case EBPF_ARGUMENT_TYPE_PTR_TO_READABLE_MEM_OR_NULL:
2,198✔
478
        case EBPF_ARGUMENT_TYPE_PTR_TO_READABLE_MEM: return ArgPair::Kind::PTR_TO_READABLE_MEM;
2,198✔
479
        case EBPF_ARGUMENT_TYPE_PTR_TO_WRITABLE_MEM_OR_NULL:
1,506✔
480
        case EBPF_ARGUMENT_TYPE_PTR_TO_WRITABLE_MEM: return ArgPair::Kind::PTR_TO_WRITABLE_MEM;
1,506✔
481
        default: break;
482
        }
483
        return {};
484
    }
485

486
    [[nodiscard]]
487
    auto makeCall(const int32_t imm) const -> Call {
24,182✔
488
        const EbpfHelperPrototype proto = info.platform->get_helper_prototype(imm);
24,182✔
489
        Call res;
24,182✔
490
        res.func = imm;
24,182✔
491
        res.name = proto.name;
24,182✔
492
        auto mark_unsupported = [&](const std::string& why) -> Call {
12,091✔
NEW
493
            res.is_supported = false;
×
NEW
494
            res.unsupported_reason = why;
×
NEW
495
            return res;
×
496
        };
24,182✔
497
        if (proto.return_type == EBPF_RETURN_TYPE_UNSUPPORTED) {
24,182✔
NEW
498
            return mark_unsupported(std::string("helper prototype is unavailable on this platform: ") + proto.name);
×
499
        }
500
        res.reallocate_packet = proto.reallocate_packet;
24,182✔
501
        res.is_map_lookup = proto.return_type == EBPF_RETURN_TYPE_PTR_TO_MAP_VALUE_OR_NULL;
24,182✔
502
        const std::array<ebpf_argument_type_t, 7> args = {
24,182✔
503
            {EBPF_ARGUMENT_TYPE_DONTCARE, proto.argument_type[0], proto.argument_type[1], proto.argument_type[2],
24,182✔
504
             proto.argument_type[3], proto.argument_type[4], EBPF_ARGUMENT_TYPE_DONTCARE}};
24,182✔
505
        for (size_t i = 1; i < args.size() - 1; i++) {
130,770✔
506
            switch (args[i]) {
82,968✔
NEW
507
            case EBPF_ARGUMENT_TYPE_UNSUPPORTED:
×
NEW
508
                return mark_unsupported(std::string("helper argument type is unavailable on this platform: ") +
×
NEW
509
                                        proto.name);
×
510
            case EBPF_ARGUMENT_TYPE_DONTCARE: return res;
19,970✔
511
            case EBPF_ARGUMENT_TYPE_ANYTHING:
57,048✔
512
            case EBPF_ARGUMENT_TYPE_PTR_TO_MAP:
28,524✔
513
            case EBPF_ARGUMENT_TYPE_PTR_TO_MAP_OF_PROGRAMS:
28,524✔
514
            case EBPF_ARGUMENT_TYPE_PTR_TO_MAP_KEY:
28,524✔
515
            case EBPF_ARGUMENT_TYPE_PTR_TO_MAP_VALUE:
28,524✔
516
            case EBPF_ARGUMENT_TYPE_PTR_TO_STACK:
28,524✔
517
            case EBPF_ARGUMENT_TYPE_PTR_TO_CTX:
28,524✔
518
                res.singles.push_back({toArgSingleKind(args[i]), false, Reg{gsl::narrow<uint8_t>(i)}});
57,048✔
519
                break;
57,048✔
520
            case EBPF_ARGUMENT_TYPE_PTR_TO_STACK_OR_NULL:
48✔
521
            case EBPF_ARGUMENT_TYPE_PTR_TO_CTX_OR_NULL:
24✔
522
                res.singles.push_back({toArgSingleKind(args[i]), true, Reg{gsl::narrow<uint8_t>(i)}});
48✔
523
                break;
48✔
524
            case EBPF_ARGUMENT_TYPE_CONST_SIZE: {
×
525
                // Sanity check: This argument should never be seen in isolation.
526
                return mark_unsupported(
527
                    std::string("mismatched EBPF_ARGUMENT_TYPE_PTR_TO* and EBPF_ARGUMENT_TYPE_CONST_SIZE: ") +
×
528
                    proto.name);
×
529
            }
530
            case EBPF_ARGUMENT_TYPE_CONST_SIZE_OR_ZERO: {
×
531
                // Sanity check: This argument should never be seen in isolation.
532
                return mark_unsupported(
533
                    std::string("mismatched EBPF_ARGUMENT_TYPE_PTR_TO* and EBPF_ARGUMENT_TYPE_CONST_SIZE_OR_ZERO: ") +
×
534
                    proto.name);
×
535
            }
536
            case EBPF_ARGUMENT_TYPE_PTR_TO_READABLE_MEM_OR_NULL:
5,902✔
537
            case EBPF_ARGUMENT_TYPE_PTR_TO_READABLE_MEM:
2,951✔
538
            case EBPF_ARGUMENT_TYPE_PTR_TO_WRITABLE_MEM_OR_NULL:
2,951✔
539
            case EBPF_ARGUMENT_TYPE_PTR_TO_WRITABLE_MEM:
2,951✔
540
                // Sanity check: This argument must be followed by EBPF_ARGUMENT_TYPE_CONST_SIZE or
541
                // EBPF_ARGUMENT_TYPE_CONST_SIZE_OR_ZERO.
542
                if (args.size() - i < 2) {
5,902✔
543
                    return mark_unsupported(
544
                        std::string(
545
                            "missing EBPF_ARGUMENT_TYPE_CONST_SIZE or EBPF_ARGUMENT_TYPE_CONST_SIZE_OR_ZERO: ") +
546
                        proto.name);
547
                }
548
                if (args[i + 1] != EBPF_ARGUMENT_TYPE_CONST_SIZE &&
7,212✔
549
                    args[i + 1] != EBPF_ARGUMENT_TYPE_CONST_SIZE_OR_ZERO) {
2,620✔
550
                    return mark_unsupported(
551
                        std::string("Pointer argument not followed by EBPF_ARGUMENT_TYPE_CONST_SIZE or "
×
552
                                    "EBPF_ARGUMENT_TYPE_CONST_SIZE_OR_ZERO: ") +
×
553
                        proto.name);
×
554
                }
555
                const bool can_be_zero = (args[i + 1] == EBPF_ARGUMENT_TYPE_CONST_SIZE_OR_ZERO);
5,902✔
556
                const bool or_null = args[i] == EBPF_ARGUMENT_TYPE_PTR_TO_READABLE_MEM_OR_NULL ||
8,315✔
557
                                     args[i] == EBPF_ARGUMENT_TYPE_PTR_TO_WRITABLE_MEM_OR_NULL;
4,826✔
558
                res.pairs.push_back({toArgPairKind(args[i]), or_null, Reg{gsl::narrow<uint8_t>(i)},
9,606✔
559
                                     Reg{gsl::narrow<uint8_t>(i + 1)}, can_be_zero});
5,902✔
560
                i++;
5,902✔
561
                break;
5,902✔
562
            }
563
        }
564
        return res;
4,212✔
565
    }
24,182✔
566

567
    /// Given a program counter and an offset, get the label of the target instruction.
568
    static Label getJumpTarget(const int32_t offset, const vector<EbpfInst>& insts, const Pc pc) {
39,554✔
569
        const Pc new_pc = pc + 1 + offset;
39,554✔
570
        if (new_pc >= insts.size()) {
39,554✔
571
            throw InvalidInstruction(pc, "jump out of bounds");
138✔
572
        }
573
        if (insts[new_pc].opcode == 0) {
39,416✔
574
            throw InvalidInstruction(pc, "jump to middle of lddw");
×
575
        }
576
        return Label{gsl::narrow<int>(new_pc)};
39,416✔
577
    }
578

579
    static auto makeCallLocal(const EbpfInst inst, const vector<EbpfInst>& insts, const Pc pc) -> CallLocal {
26✔
580
        if (inst.opcode & INST_SRC_REG) {
26✔
581
            throw InvalidInstruction(pc, inst.opcode);
2✔
582
        }
583
        if (inst.dst != 0) {
24✔
584
            throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
2✔
585
        }
586
        return CallLocal{.target = getJumpTarget(inst.imm, insts, pc)};
22✔
587
    }
588

589
    static auto makeCallx(const EbpfInst inst, const Pc pc) -> Callx {
26✔
590
        // callx puts the register number in the 'dst' field rather than the 'src' field.
591
        if (inst.dst > R10_STACK_POINTER) {
26✔
592
            throw InvalidInstruction(pc, "bad register");
4✔
593
        }
594
        if (inst.imm != 0) {
22✔
595
            // Clang prior to v19 put the register number into the 'imm' field.
596
            if (inst.dst > 0) {
10✔
597
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
4✔
598
            }
599
            if (inst.imm < 0 || inst.imm > R10_STACK_POINTER) {
6✔
600
                throw InvalidInstruction(pc, "bad register");
4✔
601
            }
602
            return Callx{gsl::narrow<uint8_t>(inst.imm)};
2✔
603
        }
604
        return Callx{inst.dst};
12✔
605
    }
606

607
    [[nodiscard]]
608
    auto makeJmp(const EbpfInst inst, const vector<EbpfInst>& insts, const Pc pc) const -> Instruction {
66,080✔
609
        switch ((inst.opcode >> 4) & 0xF) {
66,080✔
610
        case INST_CALL:
23,246✔
611
            if ((inst.opcode & INST_CLS_MASK) != INST_CLS_JMP) {
23,246✔
612
                throw InvalidInstruction(pc, inst.opcode);
4✔
613
            }
614
            if (inst.src > INST_CALL_BTF_HELPER) {
23,242✔
615
                throw InvalidInstruction(pc, inst.opcode);
2✔
616
            }
617
            if (inst.offset != 0) {
23,240✔
618
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
8✔
619
            }
620
            if (inst.src == INST_CALL_LOCAL) {
23,232✔
621
                return makeCallLocal(inst, insts, pc);
36✔
622
            }
623
            if (inst.opcode & INST_SRC_REG) {
23,206✔
624
                // Register-call opcode form is reserved for callx and must not be used for src-based call modes.
625
                if (inst.src != 0) {
28✔
626
                    throw InvalidInstruction(pc, inst.opcode);
2✔
627
                }
628
                return makeCallx(inst, pc);
26✔
629
            }
630
            if (inst.src == INST_CALL_BTF_HELPER) {
23,178✔
631
                if (inst.dst != 0) {
10✔
632
                    throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
2✔
633
                }
634
                return CallBtf{.btf_id = inst.imm};
8✔
635
            }
636
            if (inst.dst != 0) {
23,168✔
637
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
2✔
638
            }
639
            if (!info.platform->is_helper_usable(inst.imm)) {
23,166✔
640
                std::string name = std::to_string(inst.imm);
2✔
641
                try {
1✔
642
                    name = info.platform->get_helper_prototype(inst.imm).name;
2✔
643
                } catch (const std::exception&) {
2✔
644
                }
2✔
645
                return Call{.func = inst.imm,
5✔
646
                            .name = std::move(name),
1✔
647
                            .is_supported = false,
648
                            .unsupported_reason = "helper function is unavailable on this platform"};
3✔
649
            }
2✔
650
            return makeCall(inst.imm);
34,746✔
651
        case INST_EXIT:
3,182✔
652
            if ((inst.opcode & INST_CLS_MASK) != INST_CLS_JMP || (inst.opcode & INST_SRC_REG)) {
3,182✔
653
                throw InvalidInstruction(pc, inst.opcode);
6✔
654
            }
655
            if (inst.src != 0) {
3,176✔
656
                throw InvalidInstruction(pc, inst.opcode);
2✔
657
            }
658
            if (inst.dst != 0) {
3,174✔
659
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
2✔
660
            }
661
            if (inst.imm != 0) {
3,172✔
662
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
2✔
663
            }
664
            if (inst.offset != 0) {
3,170✔
665
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
2✔
666
            }
667
            return Exit{};
3,168✔
668
        case INST_JA:
6,102✔
669
            if ((inst.opcode & INST_CLS_MASK) != INST_CLS_JMP && (inst.opcode & INST_CLS_MASK) != INST_CLS_JMP32) {
6,102✔
670
                throw InvalidInstruction(pc, inst.opcode);
×
671
            }
672
            if (inst.opcode & INST_SRC_REG) {
6,102✔
673
                throw InvalidInstruction(pc, inst.opcode);
4✔
674
            }
675
            if ((inst.opcode & INST_CLS_MASK) == INST_CLS_JMP && (inst.imm != 0)) {
6,098✔
676
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
2✔
677
            }
678
            if ((inst.opcode & INST_CLS_MASK) == INST_CLS_JMP32 && (inst.offset != 0)) {
6,096✔
679
                throw InvalidInstruction(pc, make_opcode_message("nonzero offset for", inst.opcode));
2✔
680
            }
681
            if (inst.dst != 0) {
6,094✔
682
                throw InvalidInstruction(pc, make_opcode_message("nonzero dst for register", inst.opcode));
4✔
683
            }
684
        default: {
19,820✔
685
            // First validate the opcode, src, and imm.
686
            const auto op = getJmpOp(pc, inst.opcode);
39,640✔
687
            if (!(inst.opcode & INST_SRC_REG) && (inst.src != 0)) {
39,624✔
688
                throw InvalidInstruction(pc, inst.opcode);
48✔
689
            }
690
            if ((inst.opcode & INST_SRC_REG) && (inst.imm != 0)) {
39,576✔
691
                throw InvalidInstruction(pc, make_opcode_message("nonzero imm for", inst.opcode));
44✔
692
            }
693

694
            const int32_t offset = (inst.opcode == INST_OP_JA32) ? inst.imm : inst.offset;
39,532✔
695
            const Label target = getJumpTarget(offset, insts, pc);
39,532✔
696
            if (inst.opcode != INST_OP_JA16 && inst.opcode != INST_OP_JA32) {
39,396✔
697
                if (inst.dst > R10_STACK_POINTER) {
33,314✔
698
                    throw InvalidInstruction(pc, "bad register");
88✔
699
                }
700
                if ((inst.opcode & INST_SRC_REG) && inst.src > R10_STACK_POINTER) {
33,226✔
701
                    throw InvalidInstruction(pc, "bad register");
44✔
702
                }
703
            }
704

705
            const auto cond = (inst.opcode == INST_OP_JA16 || inst.opcode == INST_OP_JA32)
36,229✔
706
                                  ? std::optional<Condition>{}
39,264✔
707
                                  : Condition{.op = op,
16,591✔
708
                                              .left = Reg{inst.dst},
33,182✔
709
                                              .right = (inst.opcode & INST_SRC_REG) ? Value{Reg{inst.src}}
33,182✔
710
                                                                                    : Value{Imm{sign_extend(inst.imm)}},
26,652✔
711
                                              .is64 = (inst.opcode & INST_CLS_MASK) == INST_CLS_JMP};
39,264✔
712
            return Jmp{.cond = cond, .target = target};
58,962✔
713
        }
39,330✔
714
        }
715
    }
716

717
    vector<LabeledInstruction> unmarshal(vector<EbpfInst> const& insts,
3,990✔
718
                                         const prevail::ebpf_verifier_options_t& options) {
719
        vector<LabeledInstruction> prog;
3,990✔
720
        int exit_count = 0;
3,990✔
721
        if (insts.empty()) {
3,990✔
722
            throw std::invalid_argument("Zero length programs are not allowed");
×
723
        }
724
        for (size_t pc = 0; pc < insts.size();) {
361,130✔
725
            const EbpfInst inst = insts[pc];
358,620✔
726
            Instruction new_ins;
358,620✔
727
            bool skip_instruction = false;
358,620✔
728
            bool fallthrough = true;
358,620✔
729
            switch (inst.opcode & INST_CLS_MASK) {
358,620✔
730
            case INST_CLS_LD:
22,734✔
731
                if (inst.opcode == INST_OP_LDDW_IMM) {
22,734✔
732
                    const int32_t next_imm = pc < insts.size() - 1 ? insts[pc + 1].imm : 0;
22,140✔
733
                    new_ins = makeLddw(inst, next_imm, insts, pc);
33,177✔
734
                    skip_instruction = true;
22,074✔
735
                    break;
22,074✔
736
                }
737
                // fallthrough
738
            case INST_CLS_LDX:
43,604✔
739
            case INST_CLS_ST:
43,604✔
740
            case INST_CLS_STX: new_ins = makeMemOp(pc, inst); break;
130,592✔
741

742
            case INST_CLS_ALU:
183,192✔
743
            case INST_CLS_ALU64: {
91,596✔
744
                new_ins = makeAluOp(pc, inst);
274,521✔
745

746
                // Merge (rX <<= 32; rX >>>= 32) into wX = rX
747
                //       (rX <<= 32; rX >>= 32)  into rX s32= rX
748
                if (pc >= insts.size() - 1) {
182,658✔
749
                    break;
101✔
750
                }
751
                const EbpfInst next = insts[pc + 1];
182,456✔
752
                auto dst = Reg{inst.dst};
182,456✔
753

754
                if (new_ins != shift32(dst, Bin::Op::LSH)) {
182,456✔
755
                    break;
87,789✔
756
                }
757

758
                if ((next.opcode & INST_CLS_MASK) != INST_CLS_ALU64) {
6,878✔
759
                    break;
556✔
760
                }
761
                auto next_ins = makeAluOp(pc + 1, next);
5,766✔
762
                if (next_ins == shift32(dst, Bin::Op::RSH)) {
5,766✔
763
                    new_ins = Bin{.op = Bin::Op::MOV, .dst = dst, .v = dst, .is64 = false};
1,936✔
764
                    skip_instruction = true;
1,936✔
765
                } else if (next_ins == shift32(dst, Bin::Op::ARSH)) {
3,830✔
766
                    new_ins = Bin{.op = Bin::Op::MOVSX32, .dst = dst, .v = dst, .is64 = true};
2,654✔
767
                    skip_instruction = true;
2,654✔
768
                }
769

770
                break;
5,766✔
771
            }
5,766✔
772

773
            case INST_CLS_JMP32:
66,080✔
774
            case INST_CLS_JMP: {
33,040✔
775
                new_ins = makeJmp(inst, insts, pc);
98,900✔
776
                if (std::holds_alternative<Exit>(new_ins)) {
65,640✔
777
                    fallthrough = false;
3,168✔
778
                    exit_count++;
3,168✔
779
                }
780
                if (const auto pjmp = std::get_if<Jmp>(&new_ins)) {
65,640✔
781
                    if (!pjmp->cond) {
39,264✔
782
                        fallthrough = false;
181,611✔
783
                    }
784
                }
785
                break;
32,820✔
786
            }
787
            default: CRAB_ERROR("invalid class: ", inst.opcode & INST_CLS_MASK);
178,570✔
788
            }
789
            if (pc == insts.size() - 1 && fallthrough) {
357,140✔
790
                note("fallthrough in last instruction");
1,396✔
791
            }
792

793
            std::optional<btf_line_info_t> current_line_info = {};
357,140✔
794

795
            if (options.verbosity_opts.print_line_info && pc < info.line_info.size()) {
357,140✔
796
                current_line_info = info.line_info.at(pc);
×
797
            }
798

799
            prog.emplace_back(Label(gsl::narrow<int>(pc)), new_ins, current_line_info);
535,710✔
800

801
            pc++;
357,140✔
802
            note_next_pc();
357,140✔
803
            if (skip_instruction) {
357,140✔
804
                pc++;
26,664✔
805
                note_next_pc();
191,902✔
806
            }
807
        }
358,620✔
808
        if (exit_count == 0) {
2,510✔
809
            note("no exit instruction");
668✔
810
        }
811
        return prog;
2,510✔
812
    }
1,480✔
813
};
814

815
std::variant<InstructionSeq, std::string> unmarshal(const RawProgram& raw_prog, vector<vector<string>>& notes,
3,990✔
816
                                                    const prevail::ebpf_verifier_options_t& options) {
817
    thread_local_program_info = raw_prog.info;
3,990✔
818
    try {
1,995✔
819
        return Unmarshaller{notes, raw_prog.info}.unmarshal(raw_prog.prog, options);
5,985✔
820
    } catch (InvalidInstruction& arg) {
1,480✔
821
        std::ostringstream ss;
1,480✔
822
        ss << arg.pc << ": " << arg.what() << "\n";
1,480✔
823
        return ss.str();
1,480✔
824
    }
1,480✔
825
}
826

827
std::variant<InstructionSeq, std::string> unmarshal(const RawProgram& raw_prog,
3,990✔
828
                                                    const prevail::ebpf_verifier_options_t& options) {
829
    vector<vector<string>> notes;
3,990✔
830
    return unmarshal(raw_prog, notes, options);
7,980✔
831
}
3,990✔
832

833
Call make_call(const int imm, const ebpf_platform_t& platform) {
1,018✔
834
    vector<vector<string>> notes;
1,018✔
835
    const ProgramInfo info{.platform = &platform};
1,018✔
836
    return Unmarshaller{notes, info}.makeCall(imm);
2,545✔
837
}
1,018✔
838
} // namespace prevail
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc