• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Alan-Jowett / ebpf-verifier / 22250793910

21 Feb 2026 05:02AM UTC coverage: 88.009% (+0.007%) from 88.002%
22250793910

Pull #165

github

web-flow
Merge a5ac0b03d into e68a347b2
Pull Request #165: Bump external/libbtf from `ba5ab5e` to `55c22b7`

11743 of 13343 relevant lines covered (88.01%)

3262872.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.64
/src/elf_loader.cpp
1
// Copyright (c) Prevail Verifier contributors.
2
// SPDX-License-Identifier: MIT
3
#include <algorithm>
4
#include <cerrno>
5
#include <cstddef>
6
#include <cstring>
7
#include <functional>
8
#include <iostream>
9
#include <limits>
10
#include <map>
11
#include <optional>
12
#include <set>
13
#include <sstream>
14
#include <string>
15
#include <sys/stat.h>
16
#include <variant>
17
#include <vector>
18

19
#include <elfio/elfio.hpp>
20
#include <libbtf/btf_c_type.h>
21
#include <libbtf/btf_json.h>
22
#include <libbtf/btf_map.h>
23
#include <libbtf/btf_parse.h>
24

25
#include "crab_utils/num_safety.hpp"
26
#include "elf_loader.hpp"
27
#include "platform.hpp"
28

29
/// @brief ELF file parser for BPF programs with support for legacy and BTF-based formats.
30
///
31
/// This file implements a complete BPF ELF loader that handles:
32
/// - Legacy map definitions (struct bpf_elf_map in "maps" sections)
33
/// - BTF-based map definitions (parsed from .BTF section metadata)
34
/// - Global variables in .data/.rodata/.bss sections (as implicit array maps)
35
/// - CO-RE (Compile Once - Run Everywhere) relocations
36
/// - Subprogram linking and function call relocations
37
/// - Mixed-mode files (BTF metadata + legacy map sections)
38
///
39
/// The loader performs ELF parsing, symbol resolution, relocation processing,
40
/// and produces fully-linked RawProgram objects ready for verification.
41

42
namespace prevail {
43

44
namespace {
45

46
/// @brief Validate and return a LDDW instruction pair for relocation.
47
///
48
/// LDDW (Load Double Word) is a two-slot instruction used for 64-bit immediate loads.
49
/// Encoding: first slot has opcode 0x18, second slot has opcode 0x00.
50
///
51
/// @param instructions Instruction vector
52
/// @param location Index of the first instruction
53
/// @param context Description for error messages (e.g., "global variable 'foo'")
54
/// @return Pair of references to the low and high instruction slots
55
/// @throws UnmarshalError if validation fails
56
std::pair<std::reference_wrapper<EbpfInst>, std::reference_wrapper<EbpfInst>>
57
validate_and_get_lddw_pair(std::vector<EbpfInst>& instructions, size_t location, const std::string& context) {
9,920✔
58
    constexpr uint8_t BPF_LDDW = 0x18;
9,920✔
59
    constexpr uint8_t BPF_LDDW_HI = 0x00;
9,920✔
60

61
    if (instructions.size() <= location + 1) {
9,920✔
62
        throw UnmarshalError("Invalid relocation: " + std::string(context) + " reference at instruction boundary");
×
63
    }
64

65
    auto& lo_inst = instructions[location];
9,920✔
66
    auto& hi_inst = instructions[location + 1];
9,920✔
67

68
    if (lo_inst.opcode != BPF_LDDW) {
9,920✔
69
        throw UnmarshalError("Invalid relocation: expected LDDW first slot (opcode 0x18) for " + std::string(context) +
×
70
                             ", found opcode 0x" + std::to_string(static_cast<int>(lo_inst.opcode)));
×
71
    }
72
    if (hi_inst.opcode != BPF_LDDW_HI) {
9,920✔
73
        throw UnmarshalError("Invalid relocation: expected LDDW second slot (opcode 0x00) for " + std::string(context) +
×
74
                             ", found opcode 0x" + std::to_string(static_cast<int>(hi_inst.opcode)));
×
75
    }
76

77
    return {std::ref(lo_inst), std::ref(hi_inst)};
9,920✔
78
}
79

80
template <typename T>
81
    requires std::is_trivially_copyable_v<T>
82
std::vector<T> vector_of(const char* data, ELFIO::Elf_Xword size) {
24,012✔
83
    if (!data || size % sizeof(T) != 0 || size > std::numeric_limits<uint32_t>::max()) {
24,012✔
84
        throw UnmarshalError("Invalid argument to vector_of");
×
85
    }
86
    const size_t n = size / sizeof(T);
24,012✔
87
    std::vector<T> v(n);
24,012✔
88
    std::memcpy(v.data(), data, n * sizeof(T));
24,012✔
89
    return v;
24,012✔
90
}
91

92
template <typename T>
93
    requires std::is_trivially_copyable_v<T>
94
std::vector<T> vector_of(const ELFIO::section& sec) {
610✔
95
    return vector_of<T>(sec.get_data(), sec.get_size());
610✔
96
}
97

98
bool is_map_section(const std::string& name) {
107,174✔
99
    const std::string maps_prefix = "maps/";
107,174✔
100
    return name == "maps" || (name.length() > 5 && name.compare(0, maps_prefix.length(), maps_prefix) == 0);
214,348✔
101
}
107,174✔
102

103
bool is_global_section(const std::string& name) {
60,190✔
104
    return name == ".data" || name == ".rodata" || name == ".bss" || name.starts_with(".data.") ||
150,325✔
105
           name.starts_with(".rodata.") || name.starts_with(".bss.");
180,189✔
106
}
107

108
struct symbol_details_t {
2,710,613✔
109
    std::string name;
110
    ELFIO::Elf64_Addr value{};
111
    ELFIO::Elf_Xword size{};
112
    unsigned char bind{};
113
    unsigned char type{};
114
    ELFIO::Elf_Half section_index{};
115
    unsigned char other{};
116
};
117

118
symbol_details_t get_symbol_details(const ELFIO::const_symbol_section_accessor& symbols, const ELFIO::Elf_Xword index) {
220,566,624✔
119
    symbol_details_t details;
220,566,624✔
120
    symbols.get_symbol(index, details.name, details.value, details.size, details.bind, details.type,
220,566,624✔
121
                       details.section_index, details.other);
220,566,624✔
122
    return details;
220,566,624✔
123
}
×
124

125
struct parse_params_t {
3✔
126
    const std::string& path;
127
    const ebpf_verifier_options_t& options;
128
    const ebpf_platform_t* platform;
129
    const std::string desired_section;
130
};
131

132
std::tuple<std::string, ELFIO::Elf_Xword>
133
get_program_name_and_size(const ELFIO::section& sec, const ELFIO::Elf_Xword start,
23,402✔
134
                          const ELFIO::const_symbol_section_accessor& symbols) {
135
    const ELFIO::Elf_Xword symbol_count = symbols.get_symbols_num();
23,402✔
136
    const ELFIO::Elf_Half section_index = sec.get_index();
23,402✔
137
    std::string program_name = sec.get_name();
23,402✔
138
    ELFIO::Elf_Xword size = sec.get_size() - start;
23,402✔
139
    for (ELFIO::Elf_Xword index = 0; index < symbol_count; index++) {
215,161,368✔
140
        auto symbol_details = get_symbol_details(symbols, index);
215,137,966✔
141
        if (symbol_details.section_index == section_index && !symbol_details.name.empty()) {
215,137,966✔
142
            if (symbol_details.type != ELFIO::STT_FUNC) {
1,001,138✔
143
                continue;
966,330✔
144
            }
145
            const auto relocation_offset = symbol_details.value;
34,808✔
146
            if (relocation_offset == start) {
34,808✔
147
                program_name = symbol_details.name;
107,105,074✔
148
            } else if (relocation_offset > start && relocation_offset < start + size) {
15,552✔
149
                size = relocation_offset - start;
942✔
150
            }
151
        }
152
    }
215,137,966✔
153
    return {program_name, size};
46,804✔
154
}
23,402✔
155

156
std::string bad_reloc_value(const size_t reloc_value) {
×
157
    return "Bad reloc value (" + std::to_string(reloc_value) + "). " + "Make sure to compile with -O2.";
×
158
}
159

160
struct FunctionRelocation {
54✔
161
    size_t prog_index{};
162
    ELFIO::Elf_Xword source_offset{};
163
    ELFIO::Elf_Xword relocation_entry_index{};
164
    std::string target_function_name;
165
};
166

167
RawProgram* find_subprogram(std::vector<RawProgram>& programs, const ELFIO::section& subprogram_section,
12✔
168
                            const std::string& symbol_name) {
169
    for (auto& subprog : programs) {
18✔
170
        if (subprog.section_name == subprogram_section.get_name() && subprog.function_name == symbol_name) {
27✔
171
            return &subprog;
12✔
172
        }
173
    }
174
    return nullptr;
175
}
176

177
using MapOffsets = std::map<std::string, size_t>;
178

179
/// @brief EBPF-Global data extracted from an ELF file during parsing.
180
///
181
/// This structure aggregates all map descriptors and metadata about sections
182
/// containing maps and global variables. It uses a variant to support both
183
/// legacy and BTF-based map resolution strategies.
184
struct ElfGlobalData {
494✔
185
    /// Section indices containing map definitions (e.g., "maps", "maps/xyz")
186
    std::set<ELFIO::Elf_Half> map_section_indices;
187

188
    /// All map descriptors extracted from the file
189
    std::vector<EbpfMapDescriptor> map_descriptors;
190

191
    /// Strategy for resolving map symbols to descriptors:
192
    /// - size_t: Legacy mode - fixed record size, use offset/size arithmetic
193
    /// - MapOffsets: BTF mode - name-based lookup from map name to descriptor index
194
    std::variant<size_t, MapOffsets> map_record_size_or_map_offsets;
195

196
    /// Section indices containing global variables (.data, .rodata, .bss)
197
    std::set<ELFIO::Elf_Half> variable_section_indices;
198
};
199

200
/// @brief Collect all global variable sections from the ELF file.
201
///
202
/// @param reader The ELF file reader
203
/// @return Vector of pointers to global variable sections (can be empty)
204
std::vector<ELFIO::section*> global_sections(const ELFIO::elfio& reader) {
986✔
205
    std::vector<ELFIO::section*> result;
986✔
206
    for (auto& section : reader.sections) {
61,176✔
207
        if (!section || !is_global_section(section->get_name())) {
90,285✔
208
            continue;
59,680✔
209
        }
210

211
        const auto type = section->get_type();
510✔
212

213
        // Global variables in eBPF are stored in special sections:
214
        // - .data, .data.*     -> initialized read-write globals (SHT_PROGBITS)
215
        // - .rodata, .rodata.* -> constants (SHT_PROGBITS)
216
        // - .bss, .bss.*       -> uninitialized globals (SHT_NOBITS, zero-initialized at load)
217
        // .bss sections have type SHT_NOBITS and contain no file data, but still
218
        // have a non-zero size representing the memory allocation needed at runtime.
219
        if (type == ELFIO::SHT_NOBITS || (type == ELFIO::SHT_PROGBITS && section->get_size() != 0)) {
510✔
220
            result.push_back(section.get());
510✔
221
        }
222
    }
223
    return result;
986✔
224
}
×
225

226
constexpr int DEFAULT_MAP_FD = -1;
227

228
/// @brief Add implicit map descriptors for global variable sections.
229
///
230
/// Creates single-entry array maps for .data/.rodata/.bss sections.
231
/// Each section becomes a map where the entire section content is the map value.
232
///
233
/// @param reader ELF file reader
234
/// @param global Global data to populate with map descriptors
235
/// @param map_offsets Map name to descriptor index mapping
236
void add_global_variable_maps(const ELFIO::elfio& reader, ElfGlobalData& global, MapOffsets& map_offsets) {
188✔
237
    for (const auto section : global_sections(reader)) {
256✔
238
        map_offsets[section->get_name()] = global.map_descriptors.size();
68✔
239

240
        global.map_descriptors.push_back(EbpfMapDescriptor{
34✔
241
            .original_fd = gsl::narrow<int>(global.map_descriptors.size() + 1),
68✔
242
            .type = 0,
243
            .key_size = sizeof(uint32_t),
244
            .value_size = gsl::narrow<uint32_t>(section->get_size()),
68✔
245
            .max_entries = 1,
246
            .inner_map_fd = DEFAULT_MAP_FD,
247
        });
248

249
        global.variable_section_indices.insert(section->get_index());
68✔
250
    }
188✔
251
}
188✔
252

253
ELFIO::const_symbol_section_accessor read_and_validate_symbol_section(const ELFIO::elfio& reader,
990✔
254
                                                                      const std::string& path) {
255
    const ELFIO::section* symbol_section = reader.sections[".symtab"];
990✔
256
    if (!symbol_section) {
990✔
257
        throw UnmarshalError("No symbol section found in ELF file " + path);
×
258
    }
259
    const auto expected_entry_size =
495✔
260
        reader.get_class() == ELFIO::ELFCLASS32 ? sizeof(ELFIO::Elf32_Sym) : sizeof(ELFIO::Elf64_Sym);
990✔
261
    if (symbol_section->get_entry_size() != expected_entry_size) {
990✔
262
        throw UnmarshalError("Invalid symbol section in ELF file " + path);
3✔
263
    }
264
    return ELFIO::const_symbol_section_accessor{reader, symbol_section};
988✔
265
}
266

267
ELFIO::elfio load_elf(std::istream& input_stream, const std::string& path) {
990✔
268
    ELFIO::elfio reader;
990✔
269
    if (!reader.load(input_stream)) {
990✔
270
        throw UnmarshalError("Can't process ELF file " + path);
×
271
    }
272
    return reader;
990✔
273
}
×
274

275
void dump_btf_types(const libbtf::btf_type_data& btf_data, const std::string& path) {
×
276
    std::stringstream output;
×
277
    std::cout << "Dumping BTF data for " << path << std::endl;
×
278
    btf_data.to_json(output);
×
279
    std::cout << libbtf::pretty_print_json(output.str()) << std::endl;
×
280
}
×
281

282
void update_line_info(std::vector<RawProgram>& raw_programs, const ELFIO::section* btf_section,
×
283
                      const ELFIO::section* btf_ext) {
284
    auto visitor = [&raw_programs](const std::string& section, const uint32_t instruction_offset,
×
285
                                   const std::string& file_name, const std::string& source, const uint32_t line_number,
286
                                   const uint32_t column_number) {
287
        for (auto& program : raw_programs) {
×
288
            if (program.section_name == section && instruction_offset >= program.insn_off &&
×
289
                instruction_offset < program.insn_off + program.prog.size() * sizeof(EbpfInst)) {
×
290
                const size_t inst_index = (instruction_offset - program.insn_off) / sizeof(EbpfInst);
×
291
                if (inst_index >= program.prog.size()) {
×
292
                    throw UnmarshalError("Invalid BTF data");
×
293
                }
294
                program.info.line_info.insert_or_assign(inst_index,
×
295
                                                        btf_line_info_t{file_name, source, line_number, column_number});
×
296
            }
297
        }
298
    };
×
299
    libbtf::btf_parse_line_information(vector_of<std::byte>(*btf_section), vector_of<std::byte>(*btf_ext), visitor);
×
300
    for (auto& program : raw_programs) {
×
301
        std::optional<btf_line_info_t> last;
×
302
        for (size_t i = 0; i < program.prog.size(); ++i) {
×
303
            auto it = program.info.line_info.find(i);
×
304
            if (it != program.info.line_info.end()) {
×
305
                if (it->second.line_number != 0) {
×
306
                    last = it->second;
×
307
                }
308
            } else if (last) {
×
309
                program.info.line_info[i] = *last;
×
310
            }
311
        }
312
    }
×
313
}
×
314

315
std::map<int, int> map_typeid_to_fd(const std::vector<EbpfMapDescriptor>& map_descriptors) {
188✔
316
    std::map<int, int> type_id_to_fd_map;
188✔
317
    int pseudo_fd = 1;
188✔
318
    for (const auto& map_descriptor : map_descriptors) {
2,564✔
319
        if (!type_id_to_fd_map.contains(map_descriptor.original_fd)) {
2,376✔
320
            type_id_to_fd_map[map_descriptor.original_fd] = pseudo_fd++;
2,376✔
321
        }
322
    }
323
    return type_id_to_fd_map;
188✔
324
}
×
325

326
ElfGlobalData parse_btf_section(const parse_params_t& parse_params, const ELFIO::elfio& reader) {
190✔
327
    const auto btf_section = reader.sections[".BTF"];
190✔
328
    if (!btf_section) {
190✔
329
        return {};
×
330
    }
331

332
    std::optional<libbtf::btf_type_data> btf_data;
190✔
333
    try {
95✔
334
        btf_data.emplace(vector_of<std::byte>(*btf_section));
190✔
335
    } catch (const std::exception& e) {
×
336
        throw UnmarshalError(std::string("Unsupported or invalid BTF data: ") + e.what());
×
337
    }
×
338
    if (parse_params.options.verbosity_opts.dump_btf_types_json) {
190✔
339
        dump_btf_types(*btf_data, parse_params.path);
×
340
    }
341

342
    ElfGlobalData global;
190✔
343
    MapOffsets map_offsets;
190✔
344

345
    // Parse BTF-defined maps from the .maps DATASEC
346
    try {
95✔
347
        for (const auto& map : parse_btf_map_section(*btf_data)) {
2,566✔
348
            map_offsets.emplace(map.name, global.map_descriptors.size());
2,376✔
349
            global.map_descriptors.push_back(EbpfMapDescriptor{
2,376✔
350
                .original_fd = gsl::narrow<int>(map.type_id), // Temporary: stores BTF type ID
2,376✔
351
                .type = map.map_type,
2,376✔
352
                .key_size = map.key_size,
2,376✔
353
                .value_size = map.value_size,
2,376✔
354
                .max_entries = map.max_entries,
2,376✔
355
                .inner_map_fd = map.inner_map_type_id == 0 ? DEFAULT_MAP_FD : gsl::narrow<int>(map.inner_map_type_id),
2,376✔
356
            });
357
        }
188✔
358
    } catch (const std::exception& e) {
2✔
359
        throw UnmarshalError(std::string("Unsupported or invalid BTF map metadata: ") + e.what());
5✔
360
    }
2✔
361

362
    // Remap BTF type IDs to pseudo file descriptors
363
    // Only remap values that are actually set (not the sentinel)
364
    const auto type_id_to_fd_map = map_typeid_to_fd(global.map_descriptors);
188✔
365
    for (auto& desc : global.map_descriptors) {
2,564✔
366
        // Remap the outer map's type ID to a pseudo-FD
367
        if (auto it = type_id_to_fd_map.find(desc.original_fd); it != type_id_to_fd_map.end()) {
2,376✔
368
            desc.original_fd = it->second;
2,376✔
369
        } else {
370
            throw UnmarshalError("Unknown map type ID in BTF: " + std::to_string(desc.original_fd));
×
371
        }
372

373
        // Only remap inner_map_fd if it's not the sentinel value
374
        if (desc.inner_map_fd != DEFAULT_MAP_FD) {
2,376✔
375
            auto inner_it = type_id_to_fd_map.find(desc.inner_map_fd);
240✔
376
            if (inner_it == type_id_to_fd_map.end()) {
240✔
377
                throw UnmarshalError("Unknown inner map type ID in BTF: " + std::to_string(desc.inner_map_fd));
×
378
            }
379
            desc.inner_map_fd = inner_it->second;
240✔
380
        }
381
    }
382

383
    // Remember the .maps section index if present (used for relocation classification)
384
    if (const auto maps_section = reader.sections[".maps"]) {
188✔
385
        global.map_section_indices.insert(maps_section->get_index());
136✔
386
    }
387

388
    // Create implicit maps for all global variable sections
389
    add_global_variable_maps(reader, global, map_offsets);
188✔
390

391
    global.map_record_size_or_map_offsets = std::move(map_offsets);
188✔
392
    return global;
188✔
393
}
192✔
394

395
/// @brief Create implicit map descriptors for global variable sections.
396
///
397
/// In eBPF, global variables are implemented as single-entry array maps:
398
/// - .data section -> read-write array map (initialized globals)
399
/// - .rodata section -> read-only array map (constants)
400
/// - .bss section -> zero-initialized array map (uninitialized globals)
401
///
402
/// Each section becomes a map descriptor with:
403
/// - key_size = 4 (uint32_t index, always 0)
404
/// - value_size = section size (entire section is the map value)
405
/// - max_entries = 1 (single entry containing all variables)
406
///
407
/// Access pattern: `r0 = *(type *)(map_value_ptr + offset_within_section)`
408
///
409
/// @param reader ELF file reader
410
/// @return Global data with map descriptors for each non-empty variable section
411
ElfGlobalData create_global_variable_maps(const ELFIO::elfio& reader) {
72✔
412
    ElfGlobalData global;
72✔
413
    MapOffsets offsets;
72✔
414

415
    // For legacy (non-BTF) files without map sections, create implicit map descriptors
416
    // for global variable sections only
417
    for (const auto section : global_sections(reader)) {
104✔
418
        offsets[section->get_name()] = global.map_descriptors.size();
32✔
419

420
        global.map_descriptors.push_back(EbpfMapDescriptor{
16✔
421
            .original_fd = gsl::narrow<int>(global.map_descriptors.size() + 1),
32✔
422
            .type = 0,
423
            .key_size = sizeof(uint32_t),
424
            .value_size = gsl::narrow<uint32_t>(section->get_size()),
32✔
425
            .max_entries = 1,
426
            .inner_map_fd = DEFAULT_MAP_FD,
427
        });
428

429
        global.variable_section_indices.insert(section->get_index());
32✔
430
    }
72✔
431

432
    global.map_record_size_or_map_offsets = std::move(offsets);
72✔
433
    return global;
108✔
434
}
72✔
435

436
/// @brief Parse legacy map sections with per-section validation.
437
///
438
/// Legacy BPF ELF files define maps using struct bpf_elf_map in "maps" or "maps/*"
439
/// sections. This function:
440
/// 1. Identifies all legacy map sections
441
/// 2. Calculates the per-section record size (section_size / symbol_count)
442
/// 3. Validates symbol offsets are aligned and within bounds
443
/// 4. Builds a name-to-descriptor mapping for relocation resolution
444
///
445
/// Note: Different map sections may have different record sizes, so validation
446
/// must be done per-section, not globally.
447
///
448
/// @param parse_params Parsing parameters including platform callbacks
449
/// @param reader The ELF file reader
450
/// @param symbols Symbol table accessor
451
/// @return Global data structure with map descriptors and metadata
452
ElfGlobalData parse_map_sections(const parse_params_t& parse_params, const ELFIO::elfio& reader,
726✔
453
                                 const ELFIO::const_symbol_section_accessor& symbols) {
454
    ElfGlobalData global;
726✔
455
    std::map<ELFIO::Elf_Half, size_t> section_record_sizes; // Per-section record size
726✔
456
    std::map<ELFIO::Elf_Half, size_t> section_base_index;   // Starting descriptor index per section
726✔
457

458
    // Parse each legacy map section
459
    for (ELFIO::Elf_Half i = 0; i < reader.sections.size(); ++i) {
55,362✔
460
        const auto s = reader.sections[i];
54,636✔
461
        if (!s || !is_map_section(s->get_name())) {
81,954✔
462
            continue;
53,908✔
463
        }
464

465
        // Count map symbols in this section
466
        int map_count = 0;
364✔
467
        for (ELFIO::Elf_Xword index = 0; index < symbols.get_symbols_num(); ++index) {
2,436,912✔
468
            const auto symbol_details = get_symbol_details(symbols, index);
2,436,184✔
469
            if (symbol_details.section_index == i && !symbol_details.name.empty()) {
2,436,184✔
470
                map_count++;
7,420✔
471
            }
472
        }
2,436,184✔
473

474
        // Track this as a map section even if empty
475
        global.map_section_indices.insert(s->get_index());
728✔
476

477
        if (map_count <= 0) {
728✔
478
            continue;
×
479
        }
480

481
        const size_t base_index = global.map_descriptors.size();
728✔
482
        const size_t map_record_size = s->get_size() / map_count;
728✔
483

484
        // Validate section structure
485
        if (s->get_data() == nullptr || map_record_size == 0 || s->get_size() % map_record_size != 0) {
728✔
486
            throw UnmarshalError("Malformed legacy maps section: " + s->get_name());
×
487
        }
488

489
        section_record_sizes[i] = map_record_size;
728✔
490
        section_base_index[i] = base_index;
728✔
491

492
        // Platform-specific parsing of map definitions
493
        parse_params.platform->parse_maps_section(global.map_descriptors, s->get_data(), map_record_size, map_count,
728✔
494
                                                  parse_params.platform, parse_params.options);
728✔
495
    }
496

497
    // Resolve inner map references (platform-specific logic)
498
    parse_params.platform->resolve_inner_map_references(global.map_descriptors);
726✔
499

500
    // Build name-to-index mapping with per-section validation
501
    MapOffsets map_offsets;
726✔
502
    for (ELFIO::Elf_Xword index = 0; index < symbols.get_symbols_num(); ++index) {
2,436,898✔
503
        const auto sym_details = get_symbol_details(symbols, index);
2,436,172✔
504

505
        // Skip symbols not in map sections or without names
506
        if (!global.map_section_indices.contains(sym_details.section_index) || sym_details.name.empty()) {
2,436,172✔
507
            continue;
2,428,752✔
508
        }
509

510
        // Look up the per-section metadata
511
        const auto record_size_it = section_record_sizes.find(sym_details.section_index);
7,420✔
512
        const auto base_index_it = section_base_index.find(sym_details.section_index);
7,420✔
513
        if (record_size_it == section_record_sizes.end() || base_index_it == section_base_index.end()) {
7,420✔
514
            continue; // Section was not parsed (empty)
×
515
        }
516

517
        const auto* section = reader.sections[sym_details.section_index];
7,420✔
518
        const size_t record_size = record_size_it->second;
7,420✔
519
        if (!section) {
7,420✔
520
            continue;
×
521
        }
522

523
        // Validate alignment and bounds before calculating index.
524
        // A malformed ELF could have symbol offsets that don't align to record boundaries
525
        // or that exceed the section size, leading to incorrect descriptor lookups
526
        if (sym_details.value % record_size != 0 || sym_details.value >= section->get_size()) {
7,420✔
527
            throw UnmarshalError("Legacy map symbol '" + sym_details.name + "' has invalid offset: not aligned to " +
×
528
                                 std::to_string(record_size) + "-byte boundary or out of section bounds");
×
529
        }
530

531
        const size_t local_index = sym_details.value / record_size;
7,420✔
532
        const size_t descriptor_index = base_index_it->second + local_index;
7,420✔
533

534
        if (descriptor_index >= global.map_descriptors.size()) {
7,420✔
535
            throw UnmarshalError("Legacy map symbol index out of range for: " + sym_details.name);
×
536
        }
537

538
        map_offsets[sym_details.name] = descriptor_index;
7,420✔
539
    }
2,436,172✔
540

541
    // Add implicit maps for global variable sections
542
    for (const auto section : global_sections(reader)) {
1,136✔
543
        map_offsets[section->get_name()] = global.map_descriptors.size();
410✔
544
        global.map_descriptors.push_back(EbpfMapDescriptor{
205✔
545
            .original_fd = gsl::narrow<int>(global.map_descriptors.size() + 1),
410✔
546
            .type = 0,
547
            .key_size = sizeof(uint32_t),
548
            .value_size = gsl::narrow<uint32_t>(section->get_size()),
410✔
549
            .max_entries = 1,
550
            .inner_map_fd = DEFAULT_MAP_FD,
551
        });
552
        global.variable_section_indices.insert(section->get_index());
410✔
553
    }
726✔
554

555
    global.map_record_size_or_map_offsets = std::move(map_offsets);
726✔
556
    return global;
1,089✔
557
}
726✔
558

559
/// @brief Extract maps and global variable metadata from an ELF file.
560
///
561
/// This function determines the appropriate parsing strategy based on the file's format:
562
/// 1. **Legacy maps** (priority): If a "maps" section exists, use struct bpf_elf_map parsing
563
///    - The .BTF section (if present) contains only type information, not map definitions
564
/// 2. **BTF-only**: If no legacy maps but .BTF exists, parse map definitions from BTF
565
///    - Modern format where maps are defined as BTF VAR types in a DATASEC
566
/// 3. **No maps**: If neither exists, create implicit maps for global variable sections only
567
///
568
/// @param params Parsing parameters including path, options, and platform
569
/// @param reader The loaded ELF file reader
570
/// @param symbols Symbol table accessor for the ELF file
571
/// @return Global data structure containing all extracted metadata
572
ElfGlobalData extract_global_data(const parse_params_t& params, const ELFIO::elfio& reader,
988✔
573
                                  const ELFIO::const_symbol_section_accessor& symbols) {
574
    const bool has_legacy_maps =
494✔
575
        std::ranges::any_of(reader.sections, [](const auto& s) { return is_map_section(s->get_name()); });
53,526✔
576
    // If we have legacy maps section, always use legacy parser regardless of BTF.
577
    // The BTF in these files is just for type info, not map definitions
578
    if (has_legacy_maps) {
988✔
579
        return parse_map_sections(params, reader, symbols);
726✔
580
    }
581

582
    // Only use BTF for maps if there's no legacy maps section
583
    if (reader.sections[".BTF"]) {
262✔
584
        return parse_btf_section(params, reader);
190✔
585
    }
586

587
    // No maps or BTF, but might still have global variables
588
    return create_global_variable_maps(reader);
72✔
589
}
590

591
enum bpf_core_relo_kind {
592
    BPF_CORE_FIELD_BYTE_OFFSET = 0,
593
    BPF_CORE_FIELD_BYTE_SIZE = 1,
594
    BPF_CORE_FIELD_EXISTS = 2,
595
    BPF_CORE_FIELD_SIGNED = 3,
596
    BPF_CORE_FIELD_LSHIFT_U64 = 4,
597
    BPF_CORE_FIELD_RSHIFT_U64 = 5,
598
    BPF_CORE_TYPE_ID_LOCAL = 6,
599
    BPF_CORE_TYPE_ID_TARGET = 7,
600
    BPF_CORE_TYPE_EXISTS = 8,
601
    BPF_CORE_TYPE_SIZE = 9,
602
    BPF_CORE_ENUMVAL_EXISTS = 10,
603
    BPF_CORE_ENUMVAL_VALUE = 11,
604
    BPF_CORE_TYPE_MATCHES = 12,
605
};
606

607
struct bpf_core_relo {
608
    uint32_t insn_off;
609
    uint32_t type_id;
610
    uint32_t access_str_off;
611
    bpf_core_relo_kind kind;
612
};
613

614
std::vector<uint32_t> parse_core_access_string(const std::string& s) {
×
615
    std::vector<uint32_t> indices;
×
616
    std::stringstream ss(s);
×
617
    std::string item;
×
618
    while (std::getline(ss, item, ':')) {
×
619
        if (!item.empty()) {
×
620
            try {
621
                indices.push_back(std::stoul(item));
×
622
            } catch (const std::exception&) {
×
623
                throw UnmarshalError("Invalid CO-RE access string: " + s);
×
624
            }
×
625
        }
626
    }
627
    return indices;
×
628
}
×
629

630
class ProgramReader {
631
    const parse_params_t& parse_params;
632
    const ELFIO::elfio& reader;
633
    const ELFIO::const_symbol_section_accessor& symbols;
634
    const ElfGlobalData& global;
635
    std::vector<FunctionRelocation> function_relocations;
636
    std::vector<std::string> unresolved_symbol_errors;
637
    std::set<size_t> builtin_offsets_for_current_program;
638

639
    // loop detection for recursive subprogram resolution
640
    std::map<const RawProgram*, bool> resolved_subprograms;
641
    std::set<const RawProgram*> currently_visiting;
642

643
    /// @brief Apply a single CO-RE relocation to an instruction.
644
    ///
645
    /// CO-RE (Compile Once - Run Everywhere) relocations allow BPF programs to access
646
    /// kernel data structures in a portable way. The loader resolves these relocations
647
    /// by traversing the BTF type graph to calculate field offsets, type sizes, etc.
648
    ///
649
    /// Supported relocation kinds:
650
    /// - FIELD_BYTE_OFFSET: Calculate offset of a struct field (with nested access)
651
    /// - TYPE_ID_LOCAL/TARGET: Replace with type ID
652
    /// - TYPE_SIZE: Replace with sizeof(type)
653
    ///
654
    /// @param prog RawProgram containing the instruction to relocate
655
    /// @param relo CO-RE relocation descriptor (from .BTF.ext section)
656
    /// @param btf_data BTF type information for offset calculations
657
    /// @throws UnmarshalError if relocation is invalid or unsupported
658
    void apply_core_relocation(RawProgram& prog, const bpf_core_relo& relo,
659
                               const libbtf::btf_type_data& btf_data) const;
660
    void process_core_relocations(const libbtf::btf_type_data& btf_data);
661

662
    int32_t compute_lddw_reloc_offset_imm(ELFIO::Elf_Sxword addend, ELFIO::Elf_Word index,
663
                                          std::reference_wrapper<EbpfInst> lo_inst) const;
664

665
  public:
666
    std::vector<RawProgram> raw_programs;
667

668
    ProgramReader(const parse_params_t& p, const ELFIO::elfio& r, const ELFIO::const_symbol_section_accessor& s,
986✔
669
                  const ElfGlobalData& g)
670
        : parse_params{p}, reader{r}, symbols{s}, global{g} {}
986✔
671

672
    std::string append_subprograms(RawProgram& prog);
673
    [[nodiscard]]
674
    int relocate_map(const std::string& name, ELFIO::Elf_Word index) const;
675
    [[nodiscard]]
676
    int relocate_global_variable(const std::string& name) const;
677

678
    /// @brief Attempt to relocate a symbol reference in an instruction.
679
    ///
680
    /// Handles multiple relocation types:
681
    /// - **Function calls**: Queue for later resolution when all programs are loaded
682
    /// - **Map references**: Resolve to map file descriptor
683
    /// - **Global variables**: Resolve to implicit map FD + offset within section
684
    /// - **Config symbols**: Zero out (compile-time configuration)
685
    ///
686
    /// Global variable relocations MUST be applied to LDDW instruction pairs
687
    /// (opcode 0x18 followed by opcode 0x00). This function validates the instruction
688
    /// structure before patching to prevent corruption of non-LDDW instructions.
689
    ///
690
    /// @param symbol_name Name of the symbol (maybe empty for unnamed relocations)
691
    /// @param symbol_section_index Section containing the symbol
692
    /// @param instructions Instruction vector to modify
693
    /// @param location Instruction index to relocate
694
    /// @param index Symbol table index for additional lookup
695
    /// @param addend Additional offset to apply
696
    /// @return true if relocation succeeded or should be skipped, false if unresolved
697
    bool try_reloc(const std::string& symbol_name, ELFIO::Elf_Half symbol_section_index,
698
                   std::vector<EbpfInst>& instructions, size_t location, ELFIO::Elf_Word index,
699
                   ELFIO::Elf_Sxword addend);
700
    void process_relocations(std::vector<EbpfInst>& instructions, const ELFIO::const_relocation_section_accessor& reloc,
701
                             const std::string& section_name, ELFIO::Elf_Xword program_offset, size_t program_size);
702
    [[nodiscard]]
703
    const ELFIO::section* get_relocation_section(const std::string& name) const;
704
    void read_programs();
705
};
706

707
void ProgramReader::apply_core_relocation(RawProgram& prog, const bpf_core_relo& relo,
×
708
                                          const libbtf::btf_type_data& btf_data) const {
709
    const size_t inst_idx = (relo.insn_off - prog.insn_off) / sizeof(EbpfInst);
×
710
    if (inst_idx >= prog.prog.size()) {
×
711
        throw UnmarshalError("CO-RE relocation offset out of bounds");
×
712
    }
713
    EbpfInst& inst = prog.prog[inst_idx];
×
714

715
    switch (relo.kind) {
×
716
    case BPF_CORE_FIELD_BYTE_OFFSET: {
×
717
        const auto* btf_section = reader.sections[".BTF"];
×
718
        const auto* hdr = reinterpret_cast<const btf_header_t*>(btf_section->get_data());
×
719
        const char* base = btf_section->get_data() + hdr->hdr_len;
×
720
        const char* str_base = base + hdr->str_off;
×
721
        const std::string access_string(str_base + relo.access_str_off);
×
722

723
        const auto indices = parse_core_access_string(access_string);
×
724
        uint32_t current_type_id = relo.type_id;
×
725
        uint32_t final_offset_bits = 0;
×
726

727
        for (const uint32_t index : indices) {
×
728
            int depth = 0;
×
729
            while (true) {
730
                if (++depth > 255) {
×
731
                    throw UnmarshalError("CO-RE type resolution exceeded depth limit (possible corrupt BTF)");
×
732
                }
733
                const auto kind_index = btf_data.get_kind_index(current_type_id);
×
734
                if (kind_index == libbtf::BTF_KIND_TYPEDEF) {
×
735
                    current_type_id = btf_data.get_kind_type<libbtf::btf_kind_typedef>(current_type_id).type;
×
736
                } else if (kind_index == libbtf::BTF_KIND_CONST) {
×
737
                    current_type_id = btf_data.get_kind_type<libbtf::btf_kind_const>(current_type_id).type;
×
738
                } else if (kind_index == libbtf::BTF_KIND_VOLATILE) {
×
739
                    current_type_id = btf_data.get_kind_type<libbtf::btf_kind_volatile>(current_type_id).type;
×
740
                } else if (kind_index == libbtf::BTF_KIND_RESTRICT) {
×
741
                    current_type_id = btf_data.get_kind_type<libbtf::btf_kind_restrict>(current_type_id).type;
×
742
                } else {
743
                    break;
744
                }
745
            }
746
            const auto kind_index = btf_data.get_kind_index(current_type_id);
×
747
            if (kind_index == libbtf::BTF_KIND_STRUCT) {
×
748
                auto s = btf_data.get_kind_type<libbtf::btf_kind_struct>(current_type_id);
×
749
                if (index < s.members.size()) {
×
750
                    final_offset_bits += s.members[index].offset_from_start_in_bits;
×
751
                    current_type_id = s.members[index].type;
×
752
                } else {
753
                    throw UnmarshalError("CO-RE: member index out of bounds");
×
754
                }
755
            } else if (kind_index == libbtf::BTF_KIND_ARRAY) {
×
756
                const auto a = btf_data.get_kind_type<libbtf::btf_kind_array>(current_type_id);
×
757
                final_offset_bits += index * btf_data.get_size(a.element_type) * 8;
×
758
                current_type_id = a.element_type;
×
759
            } else {
760
                throw UnmarshalError("CO-RE: indexing into non-aggregate type");
×
761
            }
762
        }
763
        inst.imm = gsl::narrow<int32_t>(final_offset_bits) / 8;
×
764
        break;
×
765
    }
×
766
    case BPF_CORE_TYPE_ID_LOCAL:
×
767
    case BPF_CORE_TYPE_ID_TARGET: inst.imm = gsl::narrow<int>(relo.type_id); break;
×
768
    case BPF_CORE_TYPE_SIZE: inst.imm = gsl::narrow<int>(btf_data.get_size(relo.type_id)); break;
×
769
    default: throw UnmarshalError("Unsupported CO-RE relocation kind: " + std::to_string(relo.kind));
×
770
    }
771
}
×
772

773
void ProgramReader::process_core_relocations(const libbtf::btf_type_data& btf_data) {
420✔
774
    const ELFIO::section* relo_sec = reader.sections[".rel.BTF"];
420✔
775
    if (!relo_sec) {
420✔
776
        relo_sec = reader.sections[".rela.BTF"];
46✔
777
    }
778
    if (!relo_sec) {
233✔
779
        return;
46✔
780
    }
781

782
    const ELFIO::section* btf_ext_sec = reader.sections[".BTF.ext"];
374✔
783
    if (!btf_ext_sec) {
374✔
784
        throw UnmarshalError(".BTF.ext section missing for CO-RE relocations");
×
785
    }
786

787
    const char* btf_ext_data = btf_ext_sec->get_data();
374✔
788
    const ELFIO::const_relocation_section_accessor relocs(reader, relo_sec);
374✔
789

790
    // R_BPF_64_NODYLD32 from the kernel UAPI (linux/bpf.h)
791
    // This relocation type is specifically for CO-RE field access relocations.
792
    // The value 19 is stable across kernel versions as part of the BPF ELF ABI.
793
    constexpr unsigned int R_BPF_64_NODYLD32 = 19;
374✔
794

795
    for (ELFIO::Elf_Xword i = 0; i < relocs.get_entries_num(); i++) {
7,490✔
796
        ELFIO::Elf64_Addr offset{};
7,116✔
797
        ELFIO::Elf_Word sym_idx{};
7,116✔
798
        unsigned type{};
7,116✔
799
        ELFIO::Elf_Sxword addend{};
7,116✔
800
        if (relocs.get_entry(i, offset, sym_idx, type, addend)) {
7,116✔
801
            // Only process relocations that are specifically for CO-RE.
802
            // Ignore other relocation types like function calls (308).
803
            if (type != R_BPF_64_NODYLD32) {
7,116✔
804
                continue;
7,116✔
805
            }
806

807
            const auto sym = get_symbol_details(symbols, sym_idx);
×
808
            if (sym.value + sizeof(bpf_core_relo) > btf_ext_sec->get_size()) {
×
809
                throw UnmarshalError("CO-RE relocation offset out of BTF.ext bounds");
×
810
            }
811
            const auto* relo = reinterpret_cast<const bpf_core_relo*>(btf_ext_data + sym.value);
×
812
            bool applied = false;
×
813

814
            for (auto& prog : raw_programs) {
×
815
                // Find the right program based on the instruction offset from the CO-RE struct.
816
                if (relo->insn_off >= prog.insn_off &&
×
817
                    relo->insn_off < prog.insn_off + prog.prog.size() * sizeof(EbpfInst)) {
×
818
                    apply_core_relocation(prog, *relo, btf_data);
×
819
                    applied = true;
820
                    break;
821
                }
822
            }
823

824
            if (!applied) {
×
825
                throw UnmarshalError("Failed to find program for CO-RE relocation at instruction offset " +
×
826
                                     std::to_string(relo->insn_off));
×
827
            }
828
        }
×
829
    }
830
}
831

832
/// @brief Recursively append subprograms to a main program.
833
///
834
/// BPF programs can call local functions (subprograms). The linker must:
835
/// 1. Identify all CallLocal instructions in the program
836
/// 2. Find the target subprogram by name
837
/// 3. Append the subprogram's instructions to the caller
838
/// 4. Update the CallLocal immediate with the correct PC-relative offset
839
/// 5. Recursively process any subprograms called by the subprogram
840
///
841
/// Note: Recursive calls are detected and rejected.
842
///
843
/// @param prog RawProgram to process
844
/// @return Empty string on success, error message on failure
845
std::string ProgramReader::append_subprograms(RawProgram& prog) {
23,414✔
846
    if (resolved_subprograms[&prog]) {
23,414✔
847
        return {};
12✔
848
    }
849

850
    if (currently_visiting.contains(&prog)) {
23,402✔
851
        throw UnmarshalError("Mutual recursion in subprogram calls");
×
852
    }
853
    currently_visiting.insert(&prog);
23,402✔
854

855
    std::map<std::string, ELFIO::Elf_Xword> subprogram_offsets;
23,402✔
856
    for (const auto& reloc : function_relocations) {
23,474✔
857
        if (reloc.prog_index >= raw_programs.size() ||
108✔
858
            raw_programs[reloc.prog_index].function_name != prog.function_name) {
72✔
859
            continue;
48✔
860
        }
861
        if (!subprogram_offsets.contains(reloc.target_function_name)) {
24✔
862
            subprogram_offsets[reloc.target_function_name] = prog.prog.size();
12✔
863
            auto sym = get_symbol_details(symbols, reloc.relocation_entry_index);
12✔
864
            if (sym.section_index >= reader.sections.size()) {
12✔
865
                throw UnmarshalError("Invalid section index");
×
866
            }
867
            const auto& sub_sec = *reader.sections[sym.section_index];
12✔
868
            if (const auto sub = find_subprogram(raw_programs, sub_sec, sym.name)) {
12✔
869
                if (sub == &prog) {
12✔
870
                    throw UnmarshalError("Recursive subprogram call");
×
871
                }
872
                const std::string err = append_subprograms(*sub);
12✔
873
                if (!err.empty()) {
12✔
874
                    return err;
×
875
                }
876
                const size_t base = subprogram_offsets[reloc.target_function_name];
12✔
877

878
                // Append subprogram to program
879
                prog.prog.insert(prog.prog.end(), sub->prog.begin(), sub->prog.end());
12✔
880
                if (parse_params.options.verbosity_opts.print_line_info) {
12✔
881
                    for (const auto& [k, info] : sub->info.line_info) {
×
882
                        prog.info.line_info[base + k] = info;
×
883
                    }
884
                }
885
                for (const size_t builtin_offset : sub->info.builtin_call_offsets) {
12✔
886
                    prog.info.builtin_call_offsets.insert(base + builtin_offset);
×
887
                }
888
            } else {
12✔
889
                return "Subprogram not found: " + sym.name;
×
890
            }
891
        }
12✔
892
        // BPF uses signed 32-bit immediates: offset = target - (source + 1)
893
        const auto target_offset = gsl::narrow<int64_t>(subprogram_offsets[reloc.target_function_name]);
24✔
894
        const auto source_offset = gsl::narrow<int64_t>(reloc.source_offset);
24✔
895
        prog.prog[reloc.source_offset].imm = gsl::narrow<int32_t>(target_offset - source_offset - 1);
24✔
896
    }
897
    currently_visiting.erase(&prog);
23,402✔
898
    resolved_subprograms[&prog] = true;
23,402✔
899
    return {};
23,402✔
900
}
23,402✔
901

902
int ProgramReader::relocate_map(const std::string& name, const ELFIO::Elf_Word index) const {
308,760✔
903
    size_t val{};
308,760✔
904
    if (const auto* record_size = std::get_if<size_t>(&global.map_record_size_or_map_offsets)) {
308,760✔
905
        // Legacy path: map symbol value is byte offset into maps section
906
        // Divide by struct size to get descriptor index
907
        const auto symbol_value = get_symbol_details(symbols, index).value;
×
908
        if (symbol_value % *record_size != 0) {
×
909
            throw UnmarshalError("Map symbol offset " + std::to_string(symbol_value) +
×
910
                                 " is not aligned to record size " + std::to_string(*record_size));
×
911
        }
912

913
        val = symbol_value / *record_size;
×
914
    } else {
915
        // BTF path: use map name to look up descriptor index
916
        const auto& offsets = std::get<MapOffsets>(global.map_record_size_or_map_offsets);
308,760✔
917
        const auto it = offsets.find(name);
308,760✔
918
        if (it == offsets.end()) {
308,760✔
919
            throw UnmarshalError("Map descriptor not found: " + name);
×
920
        }
921
        val = it->second;
308,760✔
922
    }
923
    if (val >= global.map_descriptors.size()) {
308,760✔
924
        throw UnmarshalError(bad_reloc_value(val));
×
925
    }
926
    return global.map_descriptors.at(val).original_fd;
308,760✔
927
}
928

929
int ProgramReader::relocate_global_variable(const std::string& name) const {
9,920✔
930
    const auto* offsets = std::get_if<MapOffsets>(&global.map_record_size_or_map_offsets);
9,920✔
931
    if (!offsets) {
9,920✔
932
        throw UnmarshalError("Invalid map offsets");
×
933
    }
934
    const auto it = offsets->find(name);
9,920✔
935
    if (it == offsets->end()) {
9,920✔
936
        throw UnmarshalError("Map descriptor not found: " + name);
×
937
    }
938
    const size_t val = it->second;
9,920✔
939
    if (val >= global.map_descriptors.size()) {
9,920✔
940
        throw UnmarshalError(bad_reloc_value(val));
×
941
    }
942
    return global.map_descriptors.at(val).original_fd;
14,880✔
943
}
944

945
/// Compute the 32-bit offset to store in the *high* LDDW imm for a global-variable relocation.
946
///
947
/// The encoding rules differ depending on the relocation kind:
948
/// - For relocations against a _section_ symbol (sym.type == STT_SECTION):
949
///   * In RELA ELFs, the relocation addend holds the section-relative offset.
950
///   * In REL ELFs, the addend is zero and the compiler encodes the offset in the
951
///     low LDDW instruction's imm field (`lo_inst_imm`).
952
///   In both cases, we interpret:
953
///       offset = (addend != 0) ? addend : lo_inst_imm
954
///
955
/// - For relocations against a _data_ symbol (e.g., `global_var4`):
956
///   The symbol value is already section-relative, so the offset is:
957
///       offset = sym.value + addend
958
///
959
/// The result is narrowed to int32_t, matching the 32-bit imm field of a BPF instruction.
960
///
961
/// This function is only used for global-variable LDDW relocations.
962
int32_t ProgramReader::compute_lddw_reloc_offset_imm(const ELFIO::Elf_Sxword addend, const ELFIO::Elf_Word index,
9,920✔
963
                                                     const std::reference_wrapper<EbpfInst> lo_inst) const {
964
    const auto& sym = get_symbol_details(symbols, index);
9,920✔
965
    if (sym.type == ELFIO::STT_SECTION) {
9,920✔
966
        return addend != 0 ? gsl::narrow<int32_t>(addend) : lo_inst.get().imm;
8✔
967
    }
968
    return gsl::narrow<int32_t>(sym.value + addend);
9,912✔
969
}
9,920✔
970

971
bool ProgramReader::try_reloc(const std::string& symbol_name, const ELFIO::Elf_Half symbol_section_index,
546,370✔
972
                              std::vector<EbpfInst>& instructions, const size_t location, const ELFIO::Elf_Word index,
973
                              const ELFIO::Elf_Sxword addend) {
974
    // Handle empty symbol names for global variable sections
975
    // These occur in legacy ELF files where relocations reference
976
    // section symbols rather than named variable symbols
977
    if (symbol_name.empty()) {
546,370✔
978
        if (global.variable_section_indices.contains(symbol_section_index)) {
10✔
979
            if (!std::holds_alternative<MapOffsets>(global.map_record_size_or_map_offsets)) {
8✔
980
                return false; // Legacy path without MapOffsets; let caller handle
981
            }
982

983
            auto [lo_inst, hi_inst] = validate_and_get_lddw_pair(instructions, location, "global variable");
12✔
984

985
            hi_inst.get().imm = compute_lddw_reloc_offset_imm(addend, index, lo_inst);
8✔
986
            lo_inst.get().src = INST_LD_MODE_MAP_VALUE;
8✔
987

988
            const std::string section_name = reader.sections[symbol_section_index]->get_name();
12✔
989
            lo_inst.get().imm = relocate_global_variable(section_name);
8✔
990
            return true;
8✔
991
        }
8✔
992
        // Empty symbol name in non-variable section - skip it
993
        return true;
1✔
994
    }
995

996
    EbpfInst& instruction_to_relocate = instructions[location];
546,360✔
997

998
    // Handle local function calls - queue for post-processing.
999
    // Builtins such as memset/memcpy may arrive as local calls against SHN_UNDEF symbols;
1000
    // those are rewritten to static helper calls and gated via builtin_call_offsets.
1001
    if (instruction_to_relocate.opcode == INST_OP_CALL && instruction_to_relocate.src == INST_CALL_LOCAL) {
546,360✔
1002
        if (symbol_section_index == ELFIO::SHN_UNDEF && parse_params.platform->resolve_builtin_call) {
227,688✔
1003
            if (const auto builtin_id = parse_params.platform->resolve_builtin_call(symbol_name)) {
227,664✔
1004
                instruction_to_relocate.src = INST_CALL_STATIC_HELPER;
227,664✔
1005
                instruction_to_relocate.imm = *builtin_id;
227,664✔
1006
                builtin_offsets_for_current_program.insert(location);
227,664✔
1007
                return true;
227,664✔
1008
            }
1009
        }
1010
        function_relocations.emplace_back(FunctionRelocation{raw_programs.size(), location, index, symbol_name});
24✔
1011
        return true;
24✔
1012
    }
1013

1014
    // Only LD-class instructions can be map/global loads
1015
    if ((instruction_to_relocate.opcode & INST_CLS_MASK) != INST_CLS_LD) {
318,672✔
1016
        return false;
1017
    }
1018

1019
    // Handle map relocations (BTF or legacy)
1020
    if (global.map_section_indices.contains(symbol_section_index)) {
318,672✔
1021
        instruction_to_relocate.src = INST_LD_MODE_MAP_FD;
308,760✔
1022
        instruction_to_relocate.imm = relocate_map(symbol_name, index);
308,760✔
1023
        return true;
308,760✔
1024
    }
1025

1026
    // Handle named global variables (including __config_* symbols in .rodata.config)
1027
    if (global.variable_section_indices.contains(symbol_section_index)) {
9,912✔
1028
        auto [lo_inst, hi_inst] =
14,868✔
1029
            validate_and_get_lddw_pair(instructions, location, "global variable '" + symbol_name + "'");
14,868✔
1030

1031
        hi_inst.get().imm = compute_lddw_reloc_offset_imm(addend, index, lo_inst);
9,912✔
1032
        lo_inst.get().src = INST_LD_MODE_MAP_VALUE;
9,912✔
1033
        lo_inst.get().imm = relocate_global_variable(reader.sections[symbol_section_index]->get_name());
14,868✔
1034
        return true;
9,912✔
1035
    }
1036

1037
    // Legacy fallback: zero out __config_* symbols not in a variable section
1038
    // (for compatibility with older toolchains)
1039
    if (symbol_name.rfind("__config_", 0) == 0) {
×
1040
        instruction_to_relocate.imm = 0;
×
1041
        return true;
×
1042
    }
1043

1044
    return false;
1045
}
1046

1047
void ProgramReader::process_relocations(std::vector<EbpfInst>& instructions,
22,956✔
1048
                                        const ELFIO::const_relocation_section_accessor& reloc,
1049
                                        const std::string& section_name, const ELFIO::Elf_Xword program_offset,
1050
                                        const size_t program_size) {
1051
    for (ELFIO::Elf_Xword i = 0; i < reloc.get_entries_num(); i++) {
1,200,018✔
1052
        ELFIO::Elf64_Addr o{};
1,177,062✔
1053
        ELFIO::Elf_Word idx{};
1,177,062✔
1054
        unsigned type{};
1,177,062✔
1055
        ELFIO::Elf_Sxword addend{};
1,177,062✔
1056
        if (reloc.get_entry(i, o, idx, type, addend)) {
1,177,062✔
1057
            if (o < program_offset || o >= program_offset + program_size) {
1,177,062✔
1058
                continue;
630,692✔
1059
            }
1060
            o -= program_offset;
546,370✔
1061

1062
            if (o % sizeof(EbpfInst) != 0) {
546,370✔
1063
                throw UnmarshalError("Unaligned relocation offset");
×
1064
            }
1065
            const auto loc = o / sizeof(EbpfInst);
546,370✔
1066
            if (loc >= instructions.size()) {
546,370✔
1067
                throw UnmarshalError("Invalid relocation");
×
1068
            }
1069
            auto sym = get_symbol_details(symbols, idx);
546,370✔
1070

1071
            if (!try_reloc(sym.name, sym.section_index, instructions, loc, idx, addend)) {
546,370✔
1072
                unresolved_symbol_errors.push_back("Unresolved external symbol " + sym.name + " in section " +
×
1073
                                                   section_name + " at location " + std::to_string(loc));
×
1074
            }
1075
        }
546,370✔
1076
    }
1077
}
22,956✔
1078

1079
const ELFIO::section* ProgramReader::get_relocation_section(const std::string& name) const {
23,402✔
1080
    if (name == ".BTF") {
23,402✔
1081
        return nullptr;
1082
    }
1083
    const auto* relocs = reader.sections[".rel" + name];
23,402✔
1084
    if (!relocs) {
23,402✔
1085
        relocs = reader.sections[".rela" + name];
1,076✔
1086
    }
1087
    if (!relocs || !relocs->get_data()) {
23,402✔
1088
        return nullptr;
446✔
1089
    }
1090
    return relocs;
11,478✔
1091
}
1092

1093
void ProgramReader::read_programs() {
986✔
1094
    // Clear cycle detection state for this batch
1095
    resolved_subprograms.clear();
986✔
1096

1097
    for (const auto& sec : reader.sections) {
61,176✔
1098
        if (!(sec->get_flags() & ELFIO::SHF_EXECINSTR) || !sec->get_size() || !sec->get_data()) {
60,190✔
1099
            continue;
37,480✔
1100
        }
1101
        const auto& sec_name = sec->get_name();
22,710✔
1102
        const auto prog_type = parse_params.platform->get_program_type(sec_name, parse_params.path);
22,710✔
1103
        for (ELFIO::Elf_Xword offset = 0; offset < sec->get_size();) {
46,112✔
1104
            builtin_offsets_for_current_program.clear();
23,402✔
1105
            auto [name, size] = get_program_name_and_size(*sec, offset, symbols);
23,402✔
1106
            auto instructions = vector_of<EbpfInst>(sec->get_data() + offset, size);
23,402✔
1107
            if (const auto reloc_sec = get_relocation_section(sec_name)) {
23,402✔
1108
                process_relocations(instructions, ELFIO::const_relocation_section_accessor{reader, reloc_sec}, sec_name,
22,956✔
1109
                                    offset, size);
1110
            }
1111
            ProgramInfo program_info{
11,701✔
1112
                .platform = parse_params.platform,
23,402✔
1113
                .map_descriptors = global.map_descriptors,
23,402✔
1114
                .type = prog_type,
1115
                .builtin_call_offsets = std::move(builtin_offsets_for_current_program),
23,402✔
1116
            };
23,402✔
1117
            raw_programs.emplace_back(RawProgram{
58,505✔
1118
                parse_params.path,
23,402✔
1119
                sec_name,
1120
                gsl::narrow<uint32_t>(offset),
23,402✔
1121
                name,
1122
                std::move(instructions),
11,701✔
1123
                std::move(program_info),
11,701✔
1124
            });
1125
            offset += size;
23,402✔
1126
        }
23,402✔
1127
    }
22,710✔
1128

1129
    if (const auto btf_sec = reader.sections[".BTF"]) {
986✔
1130
        try {
210✔
1131
            process_core_relocations({vector_of<std::byte>(*btf_sec)});
420✔
1132
        } catch (const std::exception& e) {
×
1133
            throw UnmarshalError(std::string("Unsupported or invalid CO-RE/BTF relocation data: ") + e.what());
×
1134
        }
×
1135
    }
1136

1137
    if (!unresolved_symbol_errors.empty()) {
986✔
1138
        for (const auto& err : unresolved_symbol_errors) {
×
1139
            std::cerr << err << std::endl;
×
1140
        }
1141
        throw UnmarshalError("Unresolved symbols found.");
×
1142
    }
1143

1144
    if (parse_params.options.verbosity_opts.print_line_info) {
986✔
1145
        if (const auto btf_sec = reader.sections[".BTF"]) {
×
1146
            if (const auto btf_ext = reader.sections[".BTF.ext"]) {
×
1147
                try {
1148
                    update_line_info(raw_programs, btf_sec, btf_ext);
×
1149
                } catch (const std::exception& e) {
×
1150
                    throw UnmarshalError(std::string("Unsupported or invalid BTF line info: ") + e.what());
×
1151
                }
×
1152
            }
1153
        }
1154
    }
1155

1156
    for (auto& prog : raw_programs) {
24,388✔
1157
        const auto err = append_subprograms(prog);
23,402✔
1158
        if (!err.empty() && prog.section_name == parse_params.desired_section) {
23,402✔
1159
            throw UnmarshalError(err);
×
1160
        }
1161
    }
23,402✔
1162

1163
    if (!parse_params.desired_section.empty()) {
986✔
1164
        std::erase_if(raw_programs, [&](const auto& p) { return p.section_name != parse_params.desired_section; });
12,628✔
1165
    }
1166

1167
    if (raw_programs.empty()) {
986✔
1168
        throw UnmarshalError(parse_params.desired_section.empty() ? "No executable sections" : "Section not found");
6✔
1169
    }
1170
}
984✔
1171
} // namespace
1172

1173
int create_map_crab(const EbpfMapType& map_type, const uint32_t key_size, const uint32_t value_size,
7,420✔
1174
                    const uint32_t max_entries, ebpf_verifier_options_t) {
1175
    const EquivalenceKey equiv{map_type.value_type, key_size, value_size, map_type.is_array ? max_entries : 0};
7,420✔
1176
    if (!thread_local_program_info->cache.contains(equiv)) {
7,420✔
1177
        // +1 so 0 is the null FD
1178
        thread_local_program_info->cache[equiv] = gsl::narrow<int>(thread_local_program_info->cache.size()) + 1;
6,480✔
1179
    }
1180
    return thread_local_program_info->cache.at(equiv);
11,130✔
1181
}
1182

1183
EbpfMapDescriptor* find_map_descriptor(const int map_fd) {
24,806✔
1184
    for (EbpfMapDescriptor& map : thread_local_program_info->map_descriptors) {
197,026✔
1185
        if (map.original_fd == map_fd) {
197,026✔
1186
            return &map;
24,806✔
1187
        }
1188
    }
1189
    return nullptr;
1190
}
1191

1192
std::vector<RawProgram> read_elf(std::istream& input_stream, const std::string& path,
990✔
1193
                                 const std::string& desired_section, const std::string& desired_program,
1194
                                 const ebpf_verifier_options_t& options, const ebpf_platform_t* platform) {
1195
    try {
495✔
1196
        std::vector<RawProgram> res;
990✔
1197
        parse_params_t params{path, options, platform, desired_section};
990✔
1198
        auto reader = load_elf(input_stream, path);
990✔
1199
        auto symbols = read_and_validate_symbol_section(reader, path);
990✔
1200
        auto global = extract_global_data(params, reader, symbols);
988✔
1201
        ProgramReader program_reader{params, reader, symbols, global};
986✔
1202
        program_reader.read_programs();
986✔
1203

1204
        // Return the desired_program, or raw_programs
1205
        if (desired_program.empty()) {
984✔
1206
            return std::move(program_reader.raw_programs);
984✔
1207
        }
1208
        for (RawProgram& cur : program_reader.raw_programs) {
×
1209
            if (cur.function_name == desired_program) {
×
1210
                res.emplace_back(std::move(cur));
×
1211
                return res;
×
1212
            }
1213
        }
1214
        return std::move(program_reader.raw_programs);
×
1215
    } catch (const UnmarshalError&) {
1,009✔
1216
        throw;
6✔
1217
    } catch (const std::exception& e) {
6✔
1218
        throw UnmarshalError(std::string("Unsupported or invalid ELF/BTF data: ") + e.what());
×
1219
    }
×
1220
}
1221

1222
std::vector<RawProgram> read_elf(const std::string& path, const std::string& desired_section,
992✔
1223
                                 const std::string& desired_program, const ebpf_verifier_options_t& options,
1224
                                 const ebpf_platform_t* platform) {
1225
    if (std::ifstream stream{path, std::ios::in | std::ios::binary}) {
992✔
1226
        return read_elf(stream, path, desired_section, desired_program, options, platform);
1,974✔
1227
    }
992✔
1228
    struct stat st; // NOLINT(*-pro-type-member-init)
1✔
1229
    if (stat(path.c_str(), &st)) {
2✔
1230
        throw UnmarshalError(std::string(strerror(errno)) + " opening " + path);
6✔
1231
    }
1232
    throw UnmarshalError("Can't process ELF file " + path);
×
1233
}
1234

1235
} // namespace prevail
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc