• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nickg / nvc / 4834279765

pending completion
4834279765

push

github

Nick Gasson
Fix corner case releasing signal without driver. Fixes #681

5 of 5 new or added lines in 1 file covered. (100.0%)

40789 of 45107 relevant lines covered (90.43%)

922873.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.63
/src/jit/jit-code.c
1
//
2
//  Copyright (C) 2022-2023  Nick Gasson
3
//
4
//  This program is free software: you can redistribute it and/or modify
5
//  it under the terms of the GNU General Public License as published by
6
//  the Free Software Foundation, either version 3 of the License, or
7
//  (at your option) any later version.
8
//
9
//  This program is distributed in the hope that it will be useful,
10
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
//  GNU General Public License for more details.
13
//
14
//  You should have received a copy of the GNU General Public License
15
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

18
#include "util.h"
19
#include "cpustate.h"
20
#include "debug.h"
21
#include "hash.h"
22
#include "ident.h"
23
#include "jit/jit-priv.h"
24
#include "option.h"
25
#include "thread.h"
26

27
#include <assert.h>
28
#include <stdlib.h>
29
#include <string.h>
30
#include <stdio.h>
31
#include <unistd.h>
32

33
#if defined __MINGW32__
34
#include <winnt.h>
35
#elif defined __APPLE__
36
#include <mach-o/loader.h>
37
#include <mach-o/reloc.h>
38
#include <mach-o/nlist.h>
39
#include <mach-o/stab.h>
40
#include <mach-o/arm64/reloc.h>
41
#include <mach-o/x86_64/reloc.h>
42
#else
43
#include <elf.h>
44
#endif
45

46
#ifdef HAVE_CAPSTONE
47
#include <capstone.h>
48
#endif
49

50
#ifndef R_AARCH64_MOVW_UABS_G0_NC
51
#define R_AARCH64_MOVW_UABS_G0_NC 264
52
#endif
53

54
#ifndef R_AARCH64_MOVW_UABS_G1_NC
55
#define R_AARCH64_MOVW_UABS_G1_NC 266
56
#endif
57

58
#ifndef R_AARCH64_MOVW_UABS_G2_NC
59
#define R_AARCH64_MOVW_UABS_G2_NC 268
60
#endif
61

62
#ifndef R_AARCH64_MOVW_UABS_G3
63
#define R_AARCH64_MOVW_UABS_G3 269
64
#endif
65

66
#ifndef SHT_X86_64_UNWIND
67
#define SHT_X86_64_UNWIND 0x70000001
68
#endif
69

70
#define CODECACHE_ALIGN   4096
71
#define CODECACHE_SIZE    0x400000
72
#define THREAD_CACHE_SIZE 0x10000
73
#define CODE_BLOB_ALIGN   256
74
#define MIN_BLOB_SIZE     0x4000
75

76
#define __IMM64(x) __IMM32(x), __IMM32((x) >> 32)
77
#define __IMM32(x) __IMM16(x), __IMM16((x) >> 16)
78
#define __IMM16(x) (x) & 0xff, ((x) >> 8) & 0xff
79

80
STATIC_ASSERT(MIN_BLOB_SIZE <= THREAD_CACHE_SIZE);
81
STATIC_ASSERT(MIN_BLOB_SIZE % CODE_BLOB_ALIGN == 0);
82
STATIC_ASSERT(CODECACHE_SIZE % THREAD_CACHE_SIZE == 0);
83

84
typedef struct _code_span {
85
   code_cache_t *owner;
86
   code_span_t  *next;
87
   ident_t       name;
88
   uint8_t      *base;
89
   size_t        size;
90
} code_span_t;
91

92
typedef struct _patch_list {
93
   patch_list_t    *next;
94
   uint8_t         *wptr;
95
   jit_label_t      label;
96
   code_patch_fn_t  fn;
97
} patch_list_t;
98

99
typedef struct _code_cache {
100
   nvc_lock_t   lock;
101
   uint8_t     *mem;
102
   code_span_t *spans;
103
   code_span_t *freelist[MAX_THREADS];
104
   code_span_t *globalfree;
105
   FILE        *perfmap;
106
#ifdef HAVE_CAPSTONE
107
   csh          capstone;
108
#endif
109
#ifdef DEBUG
110
   size_t       used;
111
#endif
112
} code_cache_t;
113

114
static void code_disassemble(code_span_t *span, uintptr_t mark,
115
                             struct cpu_state *cpu);
116

117
static void code_cache_unwinder(uintptr_t addr, debug_frame_t *frame,
×
118
                                void *context)
119
{
120
   code_cache_t *code = context;
×
121

122
   const uint8_t *pc = (uint8_t *)addr;
×
123
   for (code_span_t *span = code->spans; span; span = span->next) {
×
124
      if (pc >= span->base && pc < span->base + span->size) {
×
125
         frame->kind = FRAME_VHDL;
×
126
         frame->disp = pc - span->base;
×
127
         frame->symbol = istr(span->name);
×
128
      }
129
   }
130
}
×
131

132
static void code_fault_handler(int sig, void *addr, struct cpu_state *cpu,
×
133
                               void *context)
134
{
135
   code_cache_t *code = context;
×
136

137
   const uint8_t *pc = (uint8_t *)cpu->pc;
×
138
   if (pc < code->mem || pc > code->mem + CODECACHE_SIZE)
×
139
      return;
140

141
   uintptr_t mark = cpu->pc;
×
142
#ifndef __MINGW32__
143
   if (sig == SIGTRAP)
×
144
      mark--;   // Point to faulting instruction
145
#endif
146

147
   for (code_span_t *span = code->spans; span; span = span->next) {
×
148
      if (pc >= span->base && pc < span->base + span->size && span->name)
×
149
         code_disassemble(span, mark, cpu);
150
   }
151
}
152

153
static code_span_t *code_span_new(code_cache_t *code, ident_t name,
220✔
154
                                  uint8_t *base, size_t size)
155
{
156
   assert(base >= code->mem);
220✔
157
   assert(base + size <= code->mem + CODECACHE_SIZE);
220✔
158

159
   SCOPED_LOCK(code->lock);
220✔
160

161
   code_span_t *span = xcalloc(sizeof(code_span_t));
220✔
162
   span->name  = name;
220✔
163
   span->next  = code->spans;
220✔
164
   span->base  = base;
220✔
165
   span->size  = size;
220✔
166
   span->owner = code;
220✔
167

168
   code->spans = span;
220✔
169
   return span;
220✔
170
}
171

172
code_cache_t *code_cache_new(void)
19✔
173
{
174
   code_cache_t *code = xcalloc(sizeof(code_cache_t));
19✔
175
   code->mem = map_jit_pages(CODECACHE_ALIGN, CODECACHE_SIZE);
19✔
176

177
#ifdef HAVE_CAPSTONE
178
#if defined ARCH_X86_64
179
   if (cs_open(CS_ARCH_X86, CS_MODE_64, &(code->capstone)) != CS_ERR_OK)
180
      fatal_trace("failed to init capstone for x86_64");
181
#elif defined ARCH_ARM64
182
   if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &(code->capstone)) != CS_ERR_OK)
183
      fatal_trace("failed to init capstone for Arm64");
184
#else
185
#error Cannot configure capstone for this architecture
186
#endif
187

188
   if (cs_option(code->capstone, CS_OPT_DETAIL, 1) != CS_ERR_OK)
189
      fatal_trace("failed to set capstone detailed mode");
190
#endif
191

192
   add_fault_handler(code_fault_handler, code);
19✔
193
   debug_add_unwinder(code->mem, CODECACHE_SIZE, code_cache_unwinder, code);
19✔
194

195
   code->globalfree = code_span_new(code, NULL, code->mem, CODECACHE_SIZE);
19✔
196

197
   return code;
19✔
198
}
199

200
void code_cache_free(code_cache_t *code)
19✔
201
{
202
   debug_remove_unwinder(code->mem);
19✔
203
   remove_fault_handler(code_fault_handler, code);
19✔
204

205
   nvc_munmap(code->mem, CODECACHE_SIZE);
19✔
206

207
   for (code_span_t *it = code->spans, *tmp; it; it = tmp) {
239✔
208
      tmp = it->next;
220✔
209
      free(it);
220✔
210
   }
211

212
#ifdef HAVE_CAPSTONE
213
   cs_close(&(code->capstone));
214
#endif
215

216
#ifdef DEBUG
217
   if (!opt_get_int(OPT_UNIT_TEST))
19✔
218
      debugf("JIT code footprint: %zu bytes", code->used);
×
219
#endif
220

221
   free(code);
19✔
222
}
19✔
223

224
#ifdef HAVE_CAPSTONE
225
static int code_print_spaces(int col, int tab)
226
{
227
   for (; col < tab; col++)
228
      fputc(' ', stdout);
229
   return col;
230
}
231
#endif
232

233
static void code_disassemble(code_span_t *span, uintptr_t mark,
234
                             struct cpu_state *cpu)
235
{
236
#ifdef HAVE_CAPSTONE
237
   SCOPED_LOCK(span->owner->lock);
238

239
   printf("--");
240

241
   const int namelen = ident_len(span->name);
242
   for (int i = 0; i < 72 - namelen; i++)
243
      fputc('-', stdout);
244

245
   printf(" %s ----\n", istr(span->name));
246

247
   cs_insn *insn = cs_malloc(span->owner->capstone);
248

249
   const uint8_t *const eptr = span->base + span->size;
250
   for (const uint8_t *ptr = span->base; ptr < eptr; ) {
251
      size_t size = eptr - ptr;
252
      uint64_t address = (uint64_t)ptr;
253
      int col = 0;
254
      if (cs_disasm_iter(span->owner->capstone, &ptr, &size, &address, insn)) {
255
         char hex1[33], *p = hex1;
256
         for (size_t k = 0; k < insn->size; k++)
257
            p += checked_sprintf(p, hex1 + sizeof(hex1) - p, "%02x",
258
                                 insn->bytes[k]);
259

260
         col = printf("%-12" PRIx64 " %-16.16s %s %s", insn->address,
261
                          hex1, insn->mnemonic, insn->op_str);
262

263
#ifdef ARCH_X86_64
264
         if (strcmp(insn->mnemonic, "movabs") == 0) {
265
            const cs_x86_op *src = &(insn->detail->x86.operands[1]);
266
            if (src->type == X86_OP_IMM) {
267
               const char *sym = debug_symbol_name((void *)src->imm);
268
               if (sym != NULL) {
269
                  col = code_print_spaces(col, 60);
270
                  col += printf(" ; %s", sym);
271
               }
272
            }
273
         }
274
#endif
275

276
         if (strlen(hex1) > 16)
277
            col = printf("\n%15s -%-16s", "", hex1 + 16) - 1;
278
      }
279
      else {
280
#ifdef ARCH_ARM64
281
         col = printf("%-12" PRIx64 " %-16.08x %s 0x%08x", (uint64_t)ptr,
282
                      *(uint32_t *)ptr, ".word", *(uint32_t *)ptr);
283
         ptr += 4;
284
#else
285
         col = printf("%-12" PRIx64 " %-16.02x %s 0x%02x", (uint64_t)ptr,
286
                      *ptr, ".byte", *ptr);
287
         ptr++;
288
#endif
289
      }
290

291
      if (mark != 0 && (ptr >= eptr || address > mark)) {
292
         col = code_print_spaces(col, 66);
293
         printf("<=============\n");
294
         if (cpu != NULL) {
295
#ifdef ARCH_X86_64
296
            const char *names[] = {
297
               "RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI",
298
               "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"
299
            };
300
            for (int i = 0; i < ARRAY_LEN(names); i++)
301
               printf("\t%s\t%"PRIxPTR"\n", names[i], cpu->regs[i]);
302
#else
303
            for (int i = 0; i < 32; i++)
304
               printf("\tR%d\t%"PRIxPTR"\n", i, cpu->regs[i]);
305
#endif
306
         }
307
         mark = 0;
308
      }
309
      else
310
         printf("\n");
311
   }
312

313
   cs_free(insn, 1);
314

315
   for (int i = 0; i < 80; i++)
316
      fputc('-', stdout);
317
   printf("\n");
318
   fflush(stdout);
319
#endif
320
}
321

322
static void code_write_perf_map(code_span_t *span)
×
323
{
324
   SCOPED_LOCK(span->owner->lock);
×
325

326
   if (span->owner->perfmap == NULL) {
×
327
      char *fname LOCAL = xasprintf("/tmp/perf-%d.map", getpid());
×
328
      if ((span->owner->perfmap = fopen(fname, "w")) == NULL) {
×
329
         warnf("cannot create %s: %s", fname, last_os_error());
×
330
         opt_set_int(OPT_PERF_MAP, 0);
×
331
         return;
×
332
      }
333
      else
334
         debugf("writing perf map to %s", fname);
×
335
   }
336

337
   fprintf(span->owner->perfmap, "%p 0x%zx %s\n", span->base, span->size,
×
338
           istr(span->name));
339
   fflush(span->owner->perfmap);
×
340
}
341

342
code_blob_t *code_blob_new(code_cache_t *code, ident_t name, size_t hint)
182✔
343
{
344
   code_span_t **freeptr = &(code->freelist[thread_id()]);
182✔
345

346
   code_span_t *free = relaxed_load(freeptr);
182✔
347
   if (free == NULL) {
182✔
348
      free = code_span_new(code, NULL, code->mem, 0);
19✔
349
      relaxed_store(freeptr, free);
19✔
350
   }
351

352
   const size_t reqsz = hint ?: MIN_BLOB_SIZE;
182✔
353

354
   if (free->size < reqsz) {
182✔
355
      SCOPED_LOCK(code->lock);
38✔
356

357
      if (code->globalfree->size == 0)
19✔
358
         return NULL;
×
359

360
#ifdef DEBUG
361
      if (free->size > 0)
19✔
362
         debugf("thread %d needs new code cache from global free list "
×
363
                "(requested %zu bytes, wasted %zu bytes)",
364
                thread_id(), reqsz, free->size);
365
#endif
366

367
      const size_t chunksz = MAX(reqsz, THREAD_CACHE_SIZE);
19✔
368
      const size_t alignedsz = ALIGN_UP(chunksz, CODE_BLOB_ALIGN);
19✔
369
      const size_t take = MIN(code->globalfree->size, alignedsz);
19✔
370

371
      free->size = take;
19✔
372
      free->base = code->globalfree->base;
19✔
373

374
      code->globalfree->base += take;
19✔
375
      code->globalfree->size -= take;
19✔
376

377
      if (code->globalfree->size == 0)
19✔
378
         warnf("global JIT code buffer exhausted");
×
379
   }
380

381
   assert(reqsz <= free->size);
182✔
382
   assert(((uintptr_t)free->base & (CODE_BLOB_ALIGN - 1)) == 0);
182✔
383

384
   code_span_t *span = code_span_new(code, name, free->base, free->size);
182✔
385

386
   free->base += span->size;
182✔
387
   free->size -= span->size;
182✔
388

389
   code_blob_t *blob = xcalloc(sizeof(code_blob_t));
182✔
390
   blob->span = span;
182✔
391
   blob->wptr = span->base;
182✔
392

393
   thread_wx_mode(WX_WRITE);
182✔
394

395
   return blob;
182✔
396
}
397

398
void code_blob_finalise(code_blob_t *blob, jit_entry_fn_t *entry)
182✔
399
{
400
   code_span_t *span = blob->span;
182✔
401
   span->size = blob->wptr - span->base;
182✔
402

403
   code_span_t *freespan = relaxed_load(&(span->owner->freelist[thread_id()]));
182✔
404
   assert(freespan->size == 0);
182✔
405

406
   ihash_free(blob->labels);
182✔
407
   blob->labels = NULL;
182✔
408

409
   if (unlikely(blob->patches != NULL))
182✔
410
      fatal_trace("not all labels in %s were patched", istr(span->name));
×
411
   else if (unlikely(blob->overflow)) {
182✔
412
      // Return all the memory
413
      freespan->size = freespan->base - span->base;
×
414
      freespan->base = span->base;
×
415
      free(blob);
×
416
      return;
×
417
   }
418
   else if (span->size == 0)
182✔
419
      fatal_trace("code span %s is empty", istr(span->name));
×
420

421
   uint8_t *aligned = ALIGN_UP(blob->wptr, CODE_BLOB_ALIGN);
182✔
422
   freespan->size = freespan->base - aligned;
182✔
423
   freespan->base = aligned;
182✔
424

425
   if (opt_get_verbose(OPT_ASM_VERBOSE, istr(span->name))) {
182✔
426
      color_printf("\n$bold$$blue$");
×
427
      code_disassemble(span, 0, NULL);
×
428
      color_printf("$$\n");
×
429
   }
430

431
   __builtin___clear_cache((char *)span->base, (char *)blob->wptr);
182✔
432

433
   thread_wx_mode(WX_EXECUTE);
182✔
434

435
   store_release(entry, (jit_entry_fn_t)span->base);
182✔
436

437
   DEBUG_ONLY(relaxed_add(&span->owner->used, span->size));
182✔
438
   free(blob);
182✔
439

440
   if (opt_get_int(OPT_PERF_MAP))
182✔
441
      code_write_perf_map(span);
×
442
}
443

444
void code_blob_emit(code_blob_t *blob, const uint8_t *bytes, size_t len)
8,118✔
445
{
446
   if (unlikely(blob->overflow))
8,118✔
447
      return;
448
   else if (unlikely(blob->wptr + len >= blob->span->base + blob->span->size)) {
8,118✔
449
      warnf("JIT code buffer for %s too small", istr(blob->span->name));
×
450
      for (patch_list_t *it = blob->patches, *tmp; it; it = tmp) {
×
451
         tmp = it->next;
×
452
         free(it);
×
453
      }
454
      blob->patches = NULL;
×
455
      blob->overflow = true;
×
456
      return;
×
457
   }
458

459
   for (size_t i = 0; i < len; i++)
19,400✔
460
      *(blob->wptr++) = bytes[i];
11,282✔
461
}
462

463
void code_blob_align(code_blob_t *blob, unsigned align)
×
464
{
465
#ifdef ARCH_X86_64
466
   const uint8_t pad[] = { 0x90 };
×
467
#else
468
   const uint8_t pad[] = { 0x00 };
469
#endif
470

471
   assert(is_power_of_2(align));
×
472
   assert(align % ARRAY_LEN(pad) == 0);
473

474
   while (((uintptr_t)blob->wptr & (align - 1)) && !blob->overflow)
×
475
      code_blob_emit(blob, pad, ARRAY_LEN(pad));
×
476
}
×
477

478
void code_blob_mark(code_blob_t *blob, jit_label_t label)
54✔
479
{
480
   if (unlikely(blob->overflow))
54✔
481
      return;
482
   else if (blob->labels == NULL)
54✔
483
      blob->labels = ihash_new(256);
49✔
484

485
   ihash_put(blob->labels, label, blob->wptr);
54✔
486

487
   for (patch_list_t **p = &(blob->patches); *p; ) {
71✔
488
      if ((*p)->label == label) {
17✔
489
         patch_list_t *next = (*p)->next;
7✔
490
         (*(*p)->fn)(blob, label, (*p)->wptr, blob->wptr);
7✔
491
         free(*p);
7✔
492
         *p = next;
7✔
493
      }
494
      else
495
         p = &((*p)->next);
10✔
496
   }
497
}
498

499
void code_blob_patch(code_blob_t *blob, jit_label_t label, code_patch_fn_t fn)
8✔
500
{
501
   void *ptr = NULL;
8✔
502
   if (unlikely(blob->overflow))
8✔
503
      return;
504
   else if (blob->labels != NULL && (ptr = ihash_get(blob->labels, label)))
8✔
505
      (*fn)(blob, label, blob->wptr, ptr);
1✔
506
   else {
507
      patch_list_t *new = xmalloc(sizeof(patch_list_t));
7✔
508
      new->next  = blob->patches;
7✔
509
      new->fn    = fn;
7✔
510
      new->label = label;
7✔
511
      new->wptr  = blob->wptr;
7✔
512

513
      blob->patches = new;
7✔
514
   }
515
}
516

517
#ifdef ARCH_ARM64
518
static void *arm64_emit_trampoline(code_blob_t *blob, uintptr_t dest)
519
{
520
   const uint8_t veneer[] = {
521
      0x50, 0x00, 0x00, 0x58,   // LDR X16, [PC+8]
522
      0x00, 0x02, 0x1f, 0xd6,   // BR X16
523
      __IMM64(dest)
524
   };
525

526
   void *prev = memmem(blob->span->base, blob->span->size,
527
                       veneer, ARRAY_LEN(veneer));
528
   if (prev != NULL)
529
      return prev;
530
   else {
531
      void *addr = blob->wptr;
532
      code_blob_emit(blob, veneer, ARRAY_LEN(veneer));
533
      return addr;
534
   }
535
}
536
#else
537
#define arm64_emit_trampoline(blob, dest) NULL
538
#endif
539

540
#if defined __MINGW32__
541
static void code_load_pe(code_blob_t *blob, const void *data, size_t size)
542
{
543
   const IMAGE_FILE_HEADER *imghdr = data;
544

545
   if (imghdr->Machine != IMAGE_FILE_MACHINE_AMD64)
546
      fatal_trace("unknown target machine %x", imghdr->Machine);
547

548
   const IMAGE_SYMBOL *symtab = data + imghdr->PointerToSymbolTable;
549
   const char *strtab = data + imghdr->PointerToSymbolTable
550
      + imghdr->NumberOfSymbols * sizeof(IMAGE_SYMBOL);
551

552
   const IMAGE_SECTION_HEADER *sections =
553
      data + IMAGE_SIZEOF_FILE_HEADER + imghdr->SizeOfOptionalHeader;
554

555
   void **load_addr LOCAL =
556
      xmalloc_array(imghdr->NumberOfSections, sizeof(void *));
557

558
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
559
      if ((sections[i].Characteristics & IMAGE_SCN_CNT_CODE)
560
          || (sections[i].Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)) {
561
         const int align = sections[i].Characteristics & IMAGE_SCN_ALIGN_MASK;
562
         code_blob_align(blob, 1 << ((align >> 20) - 1));
563
         load_addr[i] = blob->wptr;
564
         code_blob_emit(blob, data + sections[i].PointerToRawData,
565
                        sections[i].SizeOfRawData);
566
      }
567
      else if ((sections[i].Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
568
               && sections[i].Misc.VirtualSize > 0)
569
         fatal_trace("non-empty BSS not supported");
570
   }
571

572
   if (blob->overflow)
573
      return;   // Relocations might point outside of code span
574

575
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
576
      const IMAGE_RELOCATION *relocs = data + sections[i].PointerToRelocations;
577
      for (int j = 0; j < sections[i].NumberOfRelocations; j++) {
578
         const char *name = NULL;
579
         char tmp[9];
580

581
         assert(relocs[j].SymbolTableIndex < imghdr->NumberOfSymbols);
582
         const IMAGE_SYMBOL *sym = symtab + relocs[j].SymbolTableIndex;
583

584
         if (sym->N.Name.Short) {
585
            memcpy(tmp, sym->N.ShortName, 8);
586
            tmp[8] = '\0';
587
            name = tmp;
588
         }
589
         else
590
            name = strtab + sym->N.Name.Long;
591

592
         void *ptr = NULL;
593
         if (sym->SectionNumber > 0) {
594
            assert(sym->SectionNumber - 1 < imghdr->NumberOfSections);
595
            ptr = load_addr[sym->SectionNumber - 1];
596
         }
597
         else if (strcmp(name, "___chkstk_ms") == 0) {
598
            extern void ___chkstk_ms(void);
599
            ptr = &___chkstk_ms;
600
         }
601
         else
602
            ptr = ffi_find_symbol(NULL, name);
603

604
         if (ptr == NULL)
605
            fatal_trace("failed to resolve symbol %s", name);
606

607
         void *patch = load_addr[i] + relocs[j].VirtualAddress;
608
         assert((uint8_t *)patch >= blob->span->base);
609
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
610

611
         switch (relocs[j].Type) {
612
         case IMAGE_REL_AMD64_ADDR64:
613
            *(uint64_t *)patch += (uint64_t)ptr;
614
            break;
615
         case IMAGE_REL_AMD64_ADDR32NB:
616
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
617
            break;
618
         default:
619
            blob->span->size = blob->wptr - blob->span->base;
620
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
621
            fatal_trace("cannot handle relocation type %d for symbol %s",
622
                        relocs[j].Type, name);
623
         }
624
      }
625

626
      if (strncmp((const char *)sections[i].Name, ".pdata",
627
                  IMAGE_SIZEOF_SHORT_NAME) == 0) {
628
         assert(sections[i].SizeOfRawData % sizeof(RUNTIME_FUNCTION) == 0);
629
         const int count = sections[i].SizeOfRawData / sizeof(RUNTIME_FUNCTION);
630
         const DWORD64 base = (DWORD64)blob->span->base;
631

632
         // TODO: we should also call RtlDeleteFunctionTable at some point
633
         if (!RtlAddFunctionTable(load_addr[i], count, base))
634
            fatal_trace("RtlAddFunctionTable failed: %s", last_os_error());
635
      }
636
   }
637
}
638
#elif defined __APPLE__
639
static void code_load_macho(code_blob_t *blob, const void *data, size_t size)
640
{
641
   const void *rptr = data;
642

643
   const struct mach_header_64 *fhdr = rptr;
644
   rptr += sizeof(struct mach_header_64);
645

646
   if (fhdr->magic != MH_MAGIC_64)
647
      fatal_trace("bad Mach-O magic %x", fhdr->magic);
648

649
   const struct segment_command_64 *seg = NULL;
650
   const struct symtab_command *symtab = NULL;
651

652
   void **load_addr LOCAL = NULL;
653

654
   for (int i = 0; i < fhdr->ncmds; i++) {
655
      const struct load_command *load = rptr;
656
      switch (load->cmd) {
657
      case LC_SEGMENT_64:
658
         {
659
            seg = rptr;
660
            load_addr = xmalloc_array(seg->nsects, sizeof(void *));
661

662
            for (int j = 0; j < seg->nsects; j++) {
663
               const struct section_64 *sec =
664
                  (void *)seg + sizeof(struct segment_command_64)
665
                  + j * sizeof(struct section_64);
666
               code_blob_align(blob, 1 << sec->align);
667
               load_addr[j] = blob->wptr;
668
               code_blob_emit(blob, data + sec->offset, sec->size);
669
            }
670
         }
671
         break;
672
      case LC_SYMTAB:
673
         symtab = rptr;
674
         assert(symtab->cmdsize == sizeof(struct symtab_command));
675
         break;
676
      case LC_DATA_IN_CODE:
677
      case LC_LINKER_OPTIMIZATION_HINT:
678
      case LC_BUILD_VERSION:
679
      case LC_DYSYMTAB:
680
         break;
681
      default:
682
         warnf("unrecognised load command 0x%0x", load->cmd);
683
      }
684

685
      rptr += load->cmdsize;
686
   }
687
   assert(rptr == data + sizeof(struct mach_header_64) + fhdr->sizeofcmds);
688

689
   if (blob->overflow)
690
      return;   // Relocations might point outside of code span
691

692
   assert(seg != NULL);
693
   assert(symtab != NULL);
694

695
   for (int i = 0; i < seg->nsects; i++) {
696
      const struct section_64 *sec =
697
         (void *)seg + sizeof(struct segment_command_64)
698
         + i * sizeof(struct section_64);
699

700
      uint32_t addend = 0;
701
      for (int j = 0; j < sec->nreloc; j++) {
702
         const struct relocation_info *rel =
703
            data + sec->reloff + j * sizeof(struct relocation_info);
704
         const char *name = NULL;
705
         void *ptr = NULL;
706
         if (rel->r_extern) {
707
            assert(rel->r_symbolnum < symtab->nsyms);
708
            const struct nlist_64 *nl = data + symtab->symoff
709
               + rel->r_symbolnum * sizeof(struct nlist_64);
710
            name = data + symtab->stroff + nl->n_un.n_strx;
711

712
            if (nl->n_type & N_EXT) {
713
               if (icmp(blob->span->name, name + 1))
714
                  ptr = blob->span->base;
715
               else if ((ptr = ffi_find_symbol(NULL, name + 1)) == NULL)
716
                  fatal_trace("failed to resolve symbol %s", name + 1);
717
            }
718
            else if (nl->n_sect != NO_SECT)
719
               ptr = blob->span->base + nl->n_value;
720
         }
721
         else
722
            ptr = blob->span->base;
723

724
         ptr += addend;
725
         addend = 0;
726

727
         void *patch = load_addr[i] + rel->r_address;
728
         assert((uint8_t *)patch >= blob->span->base);
729
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
730

731
         switch (rel->r_type) {
732
#ifdef ARCH_ARM64
733
         case ARM64_RELOC_UNSIGNED:
734
            assert(rel->r_length == 3);
735
            *(void **)patch = ptr;
736
            break;
737
         case ARM64_RELOC_SUBTRACTOR:
738
            break;   // What is this?
739
         case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
740
         case ARM64_RELOC_PAGEOFF12:
741
            switch ((*(uint32_t *)patch >> 23) & 0x7f) {
742
            case 0b1111010:   // LDR (immediate, SIMD&FP)
743
            case 0b1110010:   // LDR (immediate)
744
               assert(*(uint32_t *)patch & (1 << 30));  // Quadword
745
               assert(((uintptr_t)ptr & 7) == 0);
746
               *(uint32_t *)patch |= (((uintptr_t)ptr & 0xfff) >> 3) << 10;
747
               break;
748
            case 0b0100010:   // ADD (immediate)
749
               *(uint32_t *)patch |= ((uintptr_t)ptr & 0xfff) << 10;
750
               break;
751
            default:
752
               blob->span->size = blob->wptr - blob->span->base;
753
               code_disassemble(blob->span, (uintptr_t)patch, NULL);
754
               fatal_trace("cannot patch instruction");
755
            }
756
            break;
757
         case ARM64_RELOC_GOT_LOAD_PAGE21:
758
         case ARM64_RELOC_PAGE21:
759
            {
760
               const intptr_t dst_page = (intptr_t)ptr & ~UINT64_C(0xfff);
761
               const intptr_t src_page = (intptr_t)patch & ~UINT64_C(0xfff);
762
               const intptr_t upper21 = (dst_page - src_page) >> 12;
763
               *(uint32_t *)patch |= (upper21 & 3) << 29;
764
               *(uint32_t *)patch |= ((upper21 >> 2) & 0x7ffff) << 5;
765
            }
766
            break;
767
         case ARM64_RELOC_BRANCH26:
768
            {
769
               void *veneer = arm64_emit_trampoline(blob, (uintptr_t)ptr);
770
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
771
               *(uint32_t *)patch &= ~0x3ffffff;
772
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
773
            }
774
            break;
775
         case ARM64_RELOC_ADDEND:
776
            addend = rel->r_symbolnum;
777
            break;
778
#elif defined ARCH_X86_64
779
         case X86_64_RELOC_UNSIGNED:
780
            *(uint64_t *)patch += (uint64_t)ptr;
781
            break;
782
         case X86_64_RELOC_BRANCH:
783
            *(uint32_t *)patch += (uint32_t)(ptr - patch - 4);
784
            break;
785
#endif
786
         default:
787
            blob->span->size = blob->wptr - blob->span->base;
788
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
789
            fatal_trace("cannot handle relocation type %d for symbol %s",
790
                        rel->r_type, name);
791
         }
792
      }
793
   }
794
}
795
#elif !defined __MINGW32__
796
static void code_load_elf(code_blob_t *blob, const void *data, size_t size)
797
{
798
   const Elf64_Ehdr *ehdr = data;
799

800
   if (ehdr->e_ident[EI_MAG0] != ELFMAG0
801
       || ehdr->e_ident[EI_MAG1] != ELFMAG1
802
       || ehdr->e_ident[EI_MAG2] != ELFMAG2
803
       || ehdr->e_ident[EI_MAG3] != ELFMAG3)
804
      fatal_trace("bad ELF magic");
805
   else if (ehdr->e_shentsize != sizeof(Elf64_Shdr))
806
      fatal_trace("bad section header size %d != %zu", ehdr->e_shentsize,
807
                  sizeof(Elf64_Shdr));
808

809
   const Elf64_Shdr *strtab_hdr =
810
      data + ehdr->e_shoff + ehdr->e_shstrndx * ehdr->e_shentsize;
811
   const char *strtab = data + strtab_hdr->sh_offset;
812

813
   void **load_addr LOCAL = xcalloc_array(ehdr->e_shnum, sizeof(void *));
814

815
   for (int i = 0; i < ehdr->e_shnum; i++) {
816
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
817

818
      switch (shdr->sh_type) {
819
      case SHT_PROGBITS:
820
         if (shdr->sh_flags & SHF_ALLOC) {
821
            code_blob_align(blob, shdr->sh_addralign);
822
            load_addr[i] = blob->wptr;
823
            code_blob_emit(blob, data + shdr->sh_offset, shdr->sh_size);
824
         }
825
         break;
826

827
      case SHT_RELA:
828
         // Handled in second pass
829
         break;
830

831
      case SHT_NULL:
832
      case SHT_STRTAB:
833
      case SHT_X86_64_UNWIND:
834
      case SHT_SYMTAB:
835
         break;
836

837
      default:
838
         warnf("ignoring ELF section %s with type %x", strtab + shdr->sh_name,
839
               shdr->sh_type);
840
      }
841
   }
842

843
   if (blob->overflow)
844
      return;   // Relocations might point outside of code span
845

846
   for (int i = 0; i < ehdr->e_shnum; i++) {
847
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
848
      if (shdr->sh_type != SHT_RELA)
849
         continue;
850

851
      const Elf64_Shdr *mod =
852
         data + ehdr->e_shoff + shdr->sh_info * ehdr->e_shentsize;
853
      if (mod->sh_type != SHT_PROGBITS || !(mod->sh_flags & SHF_ALLOC))
854
         continue;
855
      else if (load_addr[shdr->sh_info] == NULL)
856
         fatal_trace("section %s not loaded", strtab + mod->sh_name);
857

858
      const Elf64_Shdr *symtab =
859
         data + ehdr->e_shoff + shdr->sh_link * ehdr->e_shentsize;
860
      if (symtab->sh_type != SHT_SYMTAB)
861
         fatal_trace("section %s is not a symbol table",
862
                     strtab + symtab->sh_name);
863

864
      const Elf64_Rela *endp = data + shdr->sh_offset + shdr->sh_size;
865
      for (const Elf64_Rela *r = data + shdr->sh_offset; r < endp; r++) {
866
         const Elf64_Sym *sym = data + symtab->sh_offset
867
            + ELF64_R_SYM(r->r_info) * symtab->sh_entsize;
868

869
         char *ptr = NULL;
870
         switch (ELF64_ST_TYPE(sym->st_info)) {
871
         case STT_NOTYPE:
872
         case STT_FUNC:
873
            ptr = ffi_find_symbol(NULL, strtab + sym->st_name);
874
            break;
875
         case STT_SECTION:
876
            ptr = load_addr[sym->st_shndx];
877
            break;
878
         }
879

880
         if (ptr == NULL)
881
            fatal_trace("cannot resolve symbol %s type %d",
882
                        strtab + sym->st_name, ELF64_ST_TYPE(sym->st_info));
883

884
         ptr += r->r_addend;
885

886
         void *patch = load_addr[shdr->sh_info] + r->r_offset;
887
         assert(r->r_offset < mod->sh_size);
888

889
         switch (ELF64_R_TYPE(r->r_info)) {
890
         case R_X86_64_64:
891
            *(uint64_t *)patch = (uint64_t)ptr;
892
            break;
893
         case R_AARCH64_CALL26:
894
            {
895
               void *veneer = arm64_emit_trampoline(blob, (uintptr_t)ptr);
896
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
897
               *(uint32_t *)patch &= ~0x3ffffff;
898
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
899
            }
900
            break;
901
         case R_AARCH64_PREL64:
902
            *(uint64_t *)patch = ptr - (char *)patch;
903
            break;
904
         case R_AARCH64_MOVW_UABS_G0_NC:
905
            *(uint32_t *)patch |= ((uintptr_t)ptr & 0xffff) << 5;
906
            break;
907
         case R_AARCH64_MOVW_UABS_G1_NC:
908
            *(uint32_t *)patch |= (((uintptr_t)ptr >> 16) & 0xffff) << 5;
909
            break;
910
         case R_AARCH64_MOVW_UABS_G2_NC:
911
            *(uint32_t *)patch |= (((uintptr_t)ptr >> 32) & 0xffff) << 5;
912
            break;
913
         case R_AARCH64_MOVW_UABS_G3:
914
            *(uint32_t *)patch |= (((uintptr_t)ptr >> 48) & 0xffff) << 5;
915
            break;
916
         default:
917
            blob->span->size = blob->wptr - blob->span->base;
918
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
919
            fatal_trace("cannot handle relocation type %ld for symbol %s",
920
                        ELF64_R_TYPE(r->r_info), strtab + sym->st_name);
921
         }
922
      }
923
   }
924
}
925
#endif
926

927
void code_load_object(code_blob_t *blob, const void *data, size_t size)
×
928
{
929
#if defined __APPLE__
930
   code_load_macho(blob, data, size);
931
#elif defined __MINGW32__
932
   code_load_pe(blob, data, size);
933
#else
934
   code_load_elf(blob, data, size);
×
935
#endif
936
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc