• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nickg / nvc / 24477811592

15 Apr 2026 08:52PM UTC coverage: 92.338% (+0.002%) from 92.336%
24477811592

Pull #1504

github

web-flow
Merge b442faec9 into dbf4b7801
Pull Request #1504: Code changes to allow WASM build

1 of 1 new or added line in 1 file covered. (100.0%)

1 existing line in 1 file now uncovered.

76648 of 83008 relevant lines covered (92.34%)

601138.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.63
/src/jit/jit-code.c
1
//
2
//  Copyright (C) 2022-2024  Nick Gasson
3
//
4
//  This program is free software: you can redistribute it and/or modify
5
//  it under the terms of the GNU General Public License as published by
6
//  the Free Software Foundation, either version 3 of the License, or
7
//  (at your option) any later version.
8
//
9
//  This program is distributed in the hope that it will be useful,
10
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
//  GNU General Public License for more details.
13
//
14
//  You should have received a copy of the GNU General Public License
15
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

18
#include "util.h"
19
#include "cpustate.h"
20
#include "debug.h"
21
#include "hash.h"
22
#include "ident.h"
23
#include "jit/jit-priv.h"
24
#include "option.h"
25
#include "printf.h"
26
#include "thread.h"
27

28
#include <assert.h>
29
#include <math.h>
30
#include <stdlib.h>
31
#include <string.h>
32
#include <stdio.h>
33
#include <unistd.h>
34
#include <inttypes.h>
35
#include <signal.h>
36

37
#if defined __MINGW32__
38
#include <winnt.h>
39
#elif defined __APPLE__
40
#include <mach-o/loader.h>
41
#include <mach-o/reloc.h>
42
#include <mach-o/nlist.h>
43
#include <mach-o/stab.h>
44
#include <mach-o/arm64/reloc.h>
45
#include <mach-o/x86_64/reloc.h>
46
#else
47
#include <elf.h>
48
#endif
49

50
#ifdef HAVE_CAPSTONE
51
#include <capstone.h>
52
#endif
53

54
#ifndef R_AARCH64_MOVW_UABS_G0_NC
55
#define R_AARCH64_MOVW_UABS_G0_NC 264
56
#endif
57

58
#ifndef R_AARCH64_MOVW_UABS_G1_NC
59
#define R_AARCH64_MOVW_UABS_G1_NC 266
60
#endif
61

62
#ifndef R_AARCH64_MOVW_UABS_G2_NC
63
#define R_AARCH64_MOVW_UABS_G2_NC 268
64
#endif
65

66
#ifndef R_AARCH64_MOVW_UABS_G3
67
#define R_AARCH64_MOVW_UABS_G3 269
68
#endif
69

70
#ifndef SHT_X86_64_UNWIND
71
#define SHT_X86_64_UNWIND 0x70000001
72
#endif
73

74
#ifndef IMAGE_REL_ARM64_BRANCH26
75
#define IMAGE_REL_ARM64_BRANCH26 0x03
76
#endif
77

78
#ifndef IMAGE_REL_ARM64_ADDR32NB
79
#define IMAGE_REL_ARM64_ADDR32NB 0x02
80
#endif
81

82
#ifndef IMAGE_REL_ARM64_PAGEBASE_REL21
83
#define IMAGE_REL_ARM64_PAGEBASE_REL21 0x04
84
#endif
85

86
#ifndef IMAGE_REL_ARM64_PAGEOFFSET_12A
87
#define IMAGE_REL_ARM64_PAGEOFFSET_12A 0x06
88
#endif
89

90
#ifndef IMAGE_REL_ARM64_PAGEOFFSET_12L
91
#define IMAGE_REL_ARM64_PAGEOFFSET_12L 0x07
92
#endif
93

94
#define CODE_PAGE_ALIGN   4096
95
#define CODE_PAGE_SIZE    0x400000
96
#define THREAD_CACHE_SIZE 0x10000
97
#define CODE_BLOB_ALIGN   256
98
#define MIN_BLOB_SIZE     0x4000
99

100
#define __IMM64(x) __IMM32(x), __IMM32((x) >> 32)
101
#define __IMM32(x) __IMM16(x), __IMM16((x) >> 16)
102
#define __IMM16(x) (x) & 0xff, ((x) >> 8) & 0xff
103

104
STATIC_ASSERT(MIN_BLOB_SIZE <= THREAD_CACHE_SIZE);
105
STATIC_ASSERT(MIN_BLOB_SIZE % CODE_BLOB_ALIGN == 0);
106
STATIC_ASSERT(CODE_PAGE_SIZE % THREAD_CACHE_SIZE == 0);
107

108
typedef struct _code_page code_page_t;
109

110
typedef struct {
111
   uintptr_t  addr;
112
   char      *text;
113
} code_comment_t;
114

115
typedef struct {
116
   unsigned        count;
117
   unsigned        max;
118
   code_comment_t *comments;
119
} code_debug_t;
120

121
typedef struct _code_span {
122
   code_cache_t *owner;
123
   code_span_t  *next;
124
   ident_t       name;
125
   uint8_t      *base;
126
   void         *entry;
127
   size_t        size;
128
#ifdef DEBUG
129
   code_debug_t  debug;
130
#endif
131
} code_span_t;
132

133
typedef struct _patch_list {
134
   patch_list_t    *next;
135
   uint8_t         *wptr;
136
   jit_label_t      label;
137
   code_patch_fn_t  fn;
138
} patch_list_t;
139

140
typedef struct _code_page {
141
   code_cache_t *owner;
142
   code_page_t  *next;
143
   uint8_t      *mem;
144
} code_page_t;
145

146
typedef struct _code_cache {
147
   nvc_lock_t   lock;
148
   code_page_t *pages;
149
   code_span_t *spans;
150
   code_span_t *freelist[MAX_THREADS];
151
   code_span_t *globalfree;
152
   shash_t     *symbols;
153
   FILE        *perfmap;
154
#ifdef HAVE_CAPSTONE
155
   csh          capstone;
156
#endif
157
#ifdef DEBUG
158
   size_t       used;
159
#endif
160
} code_cache_t;
161

162
static void code_disassemble(code_span_t *span, uintptr_t mark,
163
                             struct cpu_state *cpu);
164

165
static void code_cache_unwinder(uintptr_t addr, debug_frame_t *frame,
×
166
                                void *context)
167
{
168
   code_cache_t *code = context;
×
169

170
   const uint8_t *pc = (uint8_t *)addr;
×
171
   for (code_span_t *span = code->spans; span; span = span->next) {
×
172
      if (pc >= span->base && pc < span->base + span->size) {
×
173
         frame->kind = FRAME_VHDL;
×
174
         frame->disp = pc - span->base;
×
175
         frame->symbol = istr(span->name);
×
176
      }
177
   }
178
}
×
179

180
static void code_fault_handler(int sig, void *addr, struct cpu_state *cpu,
×
181
                               void *context)
182
{
183
   code_page_t *page = context;
×
184

185
   const uint8_t *pc = (uint8_t *)cpu->pc;
×
186
   if (pc < page->mem || pc > page->mem + CODE_PAGE_SIZE)
×
187
      return;
188

189
   uintptr_t mark = cpu->pc;
×
190
#ifndef __MINGW32__
191
   if (sig == SIGTRAP)
×
192
      mark--;   // Point to faulting instruction
×
193
#endif
194

195
   for (code_span_t *span = page->owner->spans; span; span = span->next) {
×
196
      if (pc >= span->base && pc < span->base + span->size && span->name)
×
197
         code_disassemble(span, mark, cpu);
×
198
   }
199
}
200

201
#ifdef DEBUG
202
static bool code_cache_contains(code_cache_t *code, uint8_t *base, size_t size)
16,341✔
203
{
204
   assert_lock_held(&code->lock);
16,341✔
205

206
   for (code_page_t *p = code->pages; p; p = p->next) {
16,341✔
207
      if (base >= p->mem && base + size <= p->mem + CODE_PAGE_SIZE)
16,341✔
208
         return true;
209
   }
210

211
   return false;
212
}
213
#endif
214

215
static code_span_t *code_span_new(code_cache_t *code, ident_t name,
16,341✔
216
                                  uint8_t *base, size_t size)
217
{
218
   SCOPED_LOCK(code->lock);
16,341✔
219

220
   assert(code_cache_contains(code, base, size));
16,341✔
221

222
   code_span_t *span = xcalloc(sizeof(code_span_t));
16,341✔
223
   span->name  = name;
16,341✔
224
   span->next  = code->spans;
16,341✔
225
   span->base  = base;
16,341✔
226
   span->entry = base;
16,341✔
227
   span->size  = size;
16,341✔
228
   span->owner = code;
16,341✔
229

230
   code->spans = span;
16,341✔
231
   return span;
16,341✔
232
}
233

234
static void code_page_new(code_cache_t *code)
5,449✔
235
{
236
   assert_lock_held(&code->lock);
5,449✔
237

238
   code_page_t *page = xcalloc(sizeof(code_page_t));
5,449✔
239
   page->owner = code;
5,449✔
240
   page->next  = code->pages;
5,449✔
241
   page->mem   = map_jit_pages(CODE_PAGE_ALIGN, CODE_PAGE_SIZE);
5,449✔
242

243
   add_fault_handler(code_fault_handler, page);
5,449✔
244
   debug_add_unwinder(page->mem, CODE_PAGE_SIZE, code_cache_unwinder, code);
5,449✔
245

246
   code->pages = page;
5,449✔
247

248
   code_span_t *span = xcalloc(sizeof(code_span_t));
5,449✔
249
   span->next  = code->spans;
5,449✔
250
   span->base  = page->mem;
5,449✔
251
   span->size  = CODE_PAGE_SIZE;
5,449✔
252
   span->owner = code;
5,449✔
253

254
   code->globalfree = code->spans = span;
5,449✔
255
}
5,449✔
256

257
code_cache_t *code_cache_new(void)
5,442✔
258
{
259
   code_cache_t *code = xcalloc(sizeof(code_cache_t));
5,442✔
260

261
   {
262
      SCOPED_LOCK(code->lock);
10,884✔
263
      code_page_new(code);
5,442✔
264
   }
265

266
#ifdef HAVE_CAPSTONE
267
#if defined ARCH_X86_64
268
   if (cs_open(CS_ARCH_X86, CS_MODE_64, &(code->capstone)) != CS_ERR_OK)
269
      fatal_trace("failed to init capstone for x86_64");
270
#elif defined ARCH_ARM64
271
   if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &(code->capstone)) != CS_ERR_OK)
272
      fatal_trace("failed to init capstone for Arm64");
273
#else
274
#error Cannot configure capstone for this architecture
275
#endif
276

277
   if (cs_option(code->capstone, CS_OPT_DETAIL, 1) != CS_ERR_OK)
278
      fatal_trace("failed to set capstone detailed mode");
279
#endif
280

281
   shash_t *s = shash_new(32);
5,442✔
282

283
   extern void __nvc_putpriv(jit_handle_t, void *);
5,442✔
284
   extern void __nvc_sched_waveform(jit_anchor_t *, jit_scalar_t *, tlab_t *);
5,442✔
285
   extern void __nvc_sched_process(jit_anchor_t *, jit_scalar_t *, tlab_t *);
5,442✔
286
   extern void __nvc_test_event(jit_anchor_t *, jit_scalar_t *, tlab_t *);
5,442✔
287
   extern void __nvc_last_event(jit_anchor_t *, jit_scalar_t *, tlab_t *);
5,442✔
288

289
   shash_put(s, "__nvc_sched_waveform", &__nvc_sched_waveform);
5,442✔
290
   shash_put(s, "__nvc_sched_process", &__nvc_sched_process);
5,442✔
291
   shash_put(s, "__nvc_test_event", &__nvc_test_event);
5,442✔
292
   shash_put(s, "__nvc_last_event", &__nvc_last_event);
5,442✔
293
   shash_put(s, "__nvc_mspace_alloc", &__nvc_mspace_alloc);
5,442✔
294
   shash_put(s, "__nvc_putpriv", &__nvc_putpriv);
5,442✔
295
   shash_put(s, "__nvc_do_exit", &__nvc_do_exit);
5,442✔
296
   shash_put(s, "__nvc_pack", &__nvc_pack);
5,442✔
297
   shash_put(s, "__nvc_unpack", &__nvc_unpack);
5,442✔
298
   shash_put(s, "__nvc_vec4op", &__nvc_vec4op);
5,442✔
299
   shash_put(s, "memmove", &memmove);
5,442✔
300
   shash_put(s, "memcpy", &memcpy);
5,442✔
301
   shash_put(s, "memset", &memset);
5,442✔
302
   shash_put(s, "pow", &pow);
5,442✔
303
   shash_put(s, "ldexp", &ldexp);
5,442✔
304
   shash_put(s, "exp2", &exp2);
5,442✔
305

306
#if defined __APPLE__ && defined ARCH_ARM64
307
   shash_put(s, "bzero", &bzero);
308
#elif defined __APPLE__ && defined ARCH_X86_64
309
   shash_put(s, "__bzero", &bzero);
310
#elif defined __MINGW32__ && defined ARCH_X86_64
311
   extern void ___chkstk_ms(void);
312
   shash_put(s, "___chkstk_ms", &___chkstk_ms);
313
#endif
314

315
   store_release(&code->symbols, s);
5,442✔
316

317
   return code;
5,442✔
318
}
319

320
void code_cache_free(code_cache_t *code)
5,433✔
321
{
322
   for (code_page_t *it = code->pages, *tmp; it; it = tmp) {
10,873✔
323
      debug_remove_unwinder(it->mem);
5,440✔
324
      remove_fault_handler(code_fault_handler, it);
5,440✔
325

326
      nvc_munmap(it->mem, CODE_PAGE_SIZE);
5,440✔
327

328
      tmp = it->next;
5,440✔
329
      free(it);
5,440✔
330
   }
331

332
   for (code_span_t *it = code->spans, *tmp; it; it = tmp) {
27,206✔
333
      tmp = it->next;
21,773✔
334
      DEBUG_ONLY(free(it->debug.comments));
21,773✔
335
      free(it);
21,773✔
336
   }
337

338
#ifdef HAVE_CAPSTONE
339
   cs_close(&(code->capstone));
340
#endif
341

342
#ifdef DEBUG
343
   if (code->used > 0)
5,433✔
344
      debugf("JIT code footprint: %zu bytes", code->used);
1,496✔
345
#endif
346

347
   shash_free(code->symbols);
5,433✔
348
   free(code);
5,433✔
349
}
5,433✔
350

351
#ifdef HAVE_CAPSTONE
352
static int code_print_spaces(int col, int tab)
353
{
354
   for (; col < tab; col++)
355
      fputc(' ', stdout);
356
   return col;
357
}
358
#endif
359

360
#if defined DEBUG && HAVE_CAPSTONE
361
static int code_comment_compare(const void *a, const void *b)
362
{
363
   const code_comment_t *ca = a;
364
   const code_comment_t *cb = b;
365

366
   if (ca->addr < cb->addr)
367
      return -1;
368
   else if (ca->addr > cb->addr)
369
      return 1;
370
   else
371
      return 0;
372
}
373
#endif
374

375
static void code_disassemble(code_span_t *span, uintptr_t mark,
×
376
                             struct cpu_state *cpu)
377
{
378
   SCOPED_LOCK(span->owner->lock);
×
379

380
   printf("--");
×
381

382
   const int namelen = ident_len(span->name);
×
383
   for (int i = 0; i < 72 - namelen; i++)
×
384
      fputc('-', stdout);
×
385

386
   printf(" %s ----\n", istr(span->name));
×
387

388
#ifdef HAVE_CAPSTONE
389
   cs_insn *insn = cs_malloc(span->owner->capstone);
390

391
#ifdef DEBUG
392
   qsort(span->debug.comments, span->debug.count, sizeof(code_comment_t),
393
         code_comment_compare);
394
   code_comment_t *comment = span->debug.comments;
395
#endif
396

397
   const uint8_t *const eptr = span->base + span->size;
398
   for (const uint8_t *ptr = span->base; ptr < eptr; ) {
399
      uint64_t address = (uint64_t)ptr;
400

401
#ifdef DEBUG
402
      for (; comment < span->debug.comments + span->debug.count
403
              && comment->addr <= address; comment++)
404
         printf("%30s;; %s\n", "", comment->text);
405
#endif
406

407
      int zeros = 0;
408
      for (const uint8_t *zp = ptr; zp < eptr && *zp == 0; zp++, zeros++);
409

410
      if (zeros > 8 || zeros == eptr - ptr) {
411
         printf("%30s;; skipping %d zero bytes\n", "", zeros);
412
         ptr += zeros;
413
         continue;
414
      }
415

416
      size_t size = eptr - ptr;
417
      int col = 0;
418
      if (cs_disasm_iter(span->owner->capstone, &ptr, &size, &address, insn)) {
419
         char hex1[33], *p = hex1;
420
         for (size_t k = 0; k < insn->size; k++)
421
            p += checked_sprintf(p, hex1 + sizeof(hex1) - p, "%02x",
422
                                 insn->bytes[k]);
423

424
         col = printf("%-12" PRIx64 " %-16.16s %s %s", insn->address,
425
                          hex1, insn->mnemonic, insn->op_str);
426

427
#ifdef ARCH_X86_64
428
         if (strcmp(insn->mnemonic, "movabs") == 0) {
429
            const cs_x86_op *src = &(insn->detail->x86.operands[1]);
430
            if (src->type == X86_OP_IMM) {
431
               const char *sym = debug_symbol_name((void *)src->imm);
432
               if (sym != NULL) {
433
                  col = code_print_spaces(col, 60);
434
                  col += printf(" ; %s", sym);
435
               }
436
            }
437
         }
438
#endif
439

440
         if (strlen(hex1) > 16)
441
            col = printf("\n%15s -%-16s", "", hex1 + 16) - 1;
442
      }
443
      else {
444
#ifdef ARCH_ARM64
445
         col = printf("%-12" PRIx64 " %-16.08x %s 0x%08x", (uint64_t)ptr,
446
                      *(uint32_t *)ptr, ".word", *(uint32_t *)ptr);
447
         ptr += 4;
448
#else
449
         col = printf("%-12" PRIx64 " %-16.02x %s 0x%02x", (uint64_t)ptr,
450
                      *ptr, ".byte", *ptr);
451
         ptr++;
452
#endif
453
      }
454

455
      if (mark != 0 && (ptr >= eptr || address > mark)) {
456
         col = code_print_spaces(col, 66);
457
         printf("<=============\n");
458
         if (cpu != NULL) {
459
#ifdef ARCH_X86_64
460
            const char *names[] = {
461
               "RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI",
462
               "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"
463
            };
464
            for (int i = 0; i < ARRAY_LEN(names); i++)
465
               printf("\t%s\t%"PRIxPTR"\n", names[i], cpu->regs[i]);
466
#else
467
            for (int i = 0; i < 32; i++)
468
               printf("\tR%d\t%"PRIxPTR"\n", i, cpu->regs[i]);
469
#endif
470
         }
471
         mark = 0;
472
      }
473
      else
474
         printf("\n");
475
   }
476

477
   cs_free(insn, 1);
478
#else
479
   jit_hexdump(span->base, span->size, 16, (void *)mark, "");
×
480
#endif
481

482
   for (int i = 0; i < 80; i++)
×
483
      fputc('-', stdout);
×
484
   printf("\n");
×
485
   fflush(stdout);
×
486
}
×
487

488
static void code_write_perf_map(code_span_t *span)
×
489
{
490
   SCOPED_LOCK(span->owner->lock);
×
491

492
   if (span->owner->perfmap == NULL) {
×
493
      char *fname LOCAL = xasprintf("/tmp/perf-%d.map", getpid());
×
494
      if ((span->owner->perfmap = fopen(fname, "w")) == NULL) {
×
495
         warnf("cannot create %s: %s", fname, last_os_error());
×
496
         opt_set_int(OPT_PERF_MAP, 0);
×
497
         return;
×
498
      }
499
      else
500
         debugf("writing perf map to %s", fname);
×
501
   }
502

503
   fprintf(span->owner->perfmap, "%p 0x%zx %s\n", span->base, span->size,
×
504
           istr(span->name));
505
   fflush(span->owner->perfmap);
×
506
}
507

508
code_blob_t *code_blob_new(code_cache_t *code, ident_t name, size_t hint)
14,785✔
509
{
510
   code_span_t **freeptr = &(code->freelist[thread_id()]);
14,785✔
511

512
   code_span_t *free = relaxed_load(freeptr);
14,785✔
513
   if (free == NULL) {
14,785✔
514
      free = code_span_new(code, NULL, code->pages->mem, 0);
1,556✔
515
      relaxed_store(freeptr, free);
1,556✔
516
   }
517

518
   const size_t reqsz = hint ?: MIN_BLOB_SIZE;
14,785✔
519

520
   if (free->size < reqsz) {
14,785✔
521
      SCOPED_LOCK(code->lock);
1,631✔
522

523
#ifdef DEBUG
524
      if (free->size > 0)
1,631✔
525
         debugf("thread %d needs new code cache from global free list "
29✔
526
                "(requested %zu bytes, wasted %zu bytes)",
527
                thread_id(), reqsz, free->size);
528
#endif
529

530
      const size_t chunksz = MAX(reqsz, THREAD_CACHE_SIZE);
1,631✔
531
      const size_t alignedsz = ALIGN_UP(chunksz, CODE_BLOB_ALIGN);
1,631✔
532

533
      if (alignedsz > code->globalfree->size) {
1,631✔
534
         DEBUG_ONLY(debugf("requesting new %d byte code page", CODE_PAGE_SIZE));
7✔
535
         code_page_new(code);
7✔
536
         assert(code->globalfree->size == CODE_PAGE_SIZE);
7✔
537
      }
538

539
      const size_t take = MIN(code->globalfree->size, alignedsz);
1,631✔
540

541
      free->size = take;
1,631✔
542
      free->base = code->globalfree->base;
1,631✔
543

544
      code->globalfree->base += take;
1,631✔
545
      code->globalfree->size -= take;
1,631✔
546
   }
547

548
   assert(reqsz <= free->size);
14,785✔
549
   assert(((uintptr_t)free->base & (CODE_BLOB_ALIGN - 1)) == 0);
14,785✔
550

551
   code_span_t *span = code_span_new(code, name, free->base, free->size);
14,785✔
552

553
   free->base += span->size;
14,785✔
554
   free->size -= span->size;
14,785✔
555

556
   code_blob_t *blob = xcalloc(sizeof(code_blob_t));
14,785✔
557
   blob->span = span;
14,785✔
558
   blob->wptr = span->base;
14,785✔
559

560
   thread_wx_mode(WX_WRITE);
14,785✔
561

562
   return blob;
14,785✔
563
}
564

565
void code_blob_finalise(code_blob_t *blob, jit_entry_fn_t *entry)
14,785✔
566
{
567
   code_span_t *span = blob->span;
14,785✔
568
   span->size = blob->wptr - span->base;
14,785✔
569

570
   code_span_t *freespan = relaxed_load(&(span->owner->freelist[thread_id()]));
14,785✔
571
   assert(freespan->size == 0);
14,785✔
572

573
   ihash_free(blob->labels);
14,785✔
574
   blob->labels = NULL;
14,785✔
575

576
   if (unlikely(blob->patches != NULL))
14,785✔
577
      fatal_trace("not all labels in %s were patched", istr(span->name));
578
   else if (unlikely(blob->overflow)) {
14,785✔
579
      // Return all the memory
580
      freespan->size = freespan->base - span->base;
1✔
581
      freespan->base = span->base;
1✔
582
      free(blob);
1✔
583
      return;
1✔
584
   }
585
   else if (span->size == 0)
14,784✔
586
      fatal_trace("code span %s is empty", istr(span->name));
587

588
   uint8_t *aligned = ALIGN_UP(blob->wptr, CODE_BLOB_ALIGN);
14,784✔
589
   freespan->size = freespan->base - aligned;
14,784✔
590
   freespan->base = aligned;
14,784✔
591

592
   if (opt_get_verbose(OPT_ASM_VERBOSE, istr(span->name))) {
14,784✔
593
      nvc_printf("\n$bold$$blue$");
×
594
      code_disassemble(span, 0, NULL);
×
595
      nvc_printf("$$\n");
×
596
   }
597

598
   __builtin___clear_cache((char *)span->base, (char *)blob->wptr);
14,784✔
599

600
   thread_wx_mode(WX_EXECUTE);
14,784✔
601

602
   store_release(entry, (jit_entry_fn_t)span->entry);
14,784✔
603

604
   DEBUG_ONLY(relaxed_add(&span->owner->used, span->size));
14,784✔
605
   free(blob);
14,784✔
606

607
   if (opt_get_int(OPT_PERF_MAP))
14,784✔
608
      code_write_perf_map(span);
×
609
}
610

611
__attribute__((cold, noinline))
612
static void code_blob_overflow(code_blob_t *blob)
1✔
613
{
614
   warnf("JIT code buffer for %s too small", istr(blob->span->name));
1✔
615
   for (patch_list_t *it = blob->patches, *tmp; it; it = tmp) {
1✔
616
      tmp = it->next;
×
617
      free(it);
×
618
   }
619
   blob->patches = NULL;
1✔
620
   blob->overflow = true;
1✔
621
}
1✔
622

623
void code_blob_emit(code_blob_t *blob, const uint8_t *bytes, size_t len)
25,811✔
624
{
625
   if (unlikely(blob->overflow))
25,811✔
626
      return;
627
   else if (unlikely(blob->wptr + len > blob->span->base + blob->span->size)) {
25,811✔
628
      code_blob_overflow(blob);
1✔
629
      return;
1✔
630
   }
631

632
   memcpy(blob->wptr, bytes, len);
25,810✔
633
   blob->wptr += len;
25,810✔
634
}
635

636
void code_blob_align(code_blob_t *blob, unsigned align)
15,377✔
637
{
638
#ifdef ARCH_X86_64
639
   const uint8_t pad[] = { 0x90 };
15,377✔
640
#else
641
   const uint8_t pad[] = { 0x00 };
642
#endif
643

644
   assert(is_power_of_2(align));
15,377✔
645
   assert(align % ARRAY_LEN(pad) == 0);
646

647
   while (((uintptr_t)blob->wptr & (align - 1)) && !blob->overflow)
20,595✔
648
      code_blob_emit(blob, pad, ARRAY_LEN(pad));
5,218✔
649
}
15,377✔
650

651
void code_blob_mark(code_blob_t *blob, jit_label_t label)
71✔
652
{
653
   if (unlikely(blob->overflow))
71✔
654
      return;
655
   else if (blob->labels == NULL)
71✔
656
      blob->labels = ihash_new(256);
66✔
657

658
   ihash_put(blob->labels, label, blob->wptr);
71✔
659

660
   for (patch_list_t **p = &(blob->patches); *p; ) {
88✔
661
      if ((*p)->label == label) {
17✔
662
         patch_list_t *next = (*p)->next;
7✔
663
         (*(*p)->fn)(blob, label, (*p)->wptr, blob->wptr);
7✔
664
         free(*p);
7✔
665
         *p = next;
7✔
666
      }
667
      else
668
         p = &((*p)->next);
10✔
669
   }
670
}
671

672
void code_blob_patch(code_blob_t *blob, jit_label_t label, code_patch_fn_t fn)
8✔
673
{
674
   void *ptr = NULL;
8✔
675
   if (unlikely(blob->overflow))
8✔
676
      return;
677
   else if (blob->labels != NULL && (ptr = ihash_get(blob->labels, label)))
8✔
678
      (*fn)(blob, label, blob->wptr, ptr);
1✔
679
   else {
680
      patch_list_t *new = xmalloc(sizeof(patch_list_t));
7✔
681
      new->next  = blob->patches;
7✔
682
      new->fn    = fn;
7✔
683
      new->label = label;
7✔
684
      new->wptr  = blob->wptr;
7✔
685

686
      blob->patches = new;
7✔
687
   }
688
}
689

690
#ifdef DEBUG
691
static void code_blob_print_value(text_buf_t *tb, jit_value_t value)
392✔
692
{
693
   switch (value.kind) {
392✔
694
   case JIT_VALUE_REG:
162✔
695
      tb_printf(tb, "R%d", value.reg);
162✔
696
      break;
162✔
697
   case JIT_VALUE_INT64:
203✔
698
      if (value.int64 < 4096)
203✔
699
         tb_printf(tb, "#%"PRIi64, value.int64);
199✔
700
      else
701
         tb_printf(tb, "#0x%"PRIx64, value.int64);
4✔
702
      break;
703
   case JIT_VALUE_DOUBLE:
1✔
704
      tb_printf(tb, "%%%g", value.dval);
1✔
705
      break;
1✔
706
   case JIT_ADDR_CPOOL:
×
707
      tb_printf(tb, "[CP+%"PRIi64"]", value.int64);
×
708
      break;
×
709
   case JIT_ADDR_REG:
19✔
710
      tb_printf(tb, "[R%d", value.reg);
19✔
711
      if (value.disp != 0)
19✔
712
         tb_printf(tb, "+%d", value.disp);
1✔
713
      tb_cat(tb, "]");
19✔
714
      break;
19✔
715
   case JIT_ADDR_ABS:
×
716
      tb_printf(tb, "[#%016"PRIx64"]", value.int64);
×
717
      break;
×
718
   case JIT_VALUE_LABEL:
5✔
719
      tb_printf(tb, "%d", value.label);
5✔
720
      break;
5✔
721
   case JIT_VALUE_HANDLE:
2✔
722
      tb_printf(tb, "<%d>", value.handle);
2✔
723
      break;
2✔
724
   case JIT_VALUE_EXIT:
×
725
      tb_printf(tb, "%s", jit_exit_name(value.exit));
×
726
      break;
×
727
   case JIT_VALUE_LOC:
×
728
      tb_printf(tb, "<%s:%d>", loc_file_str(&value.loc), value.loc.first_line);
×
729
      break;
×
730
   case JIT_VALUE_LOCUS:
×
731
      tb_printf(tb, "%p", value.locus);
×
732
      break;
×
733
   case JIT_VALUE_VPOS:
×
734
      tb_printf(tb, "%u:%u", value.vpos.block, value.vpos.op);
×
735
      break;
×
736
   default:
×
737
      tb_cat(tb, "???");
×
738
   }
739
}
392✔
740

741
static void code_blob_add_comment(code_blob_t *blob, uintptr_t addr, char *text)
79,832✔
742
{
743
   code_debug_t *dbg = &(blob->span->debug);
79,832✔
744

745
   if (dbg->count == dbg->max) {
79,832✔
746
      dbg->max = MAX(128, dbg->max * 2);
14,607✔
747
      dbg->comments = xrealloc_array(dbg->comments, dbg->max,
14,607✔
748
                                     sizeof(code_comment_t));
749
   }
750

751
   dbg->comments[dbg->count].addr = addr;
79,832✔
752
   dbg->comments[dbg->count].text = text;
79,832✔
753
   dbg->count++;
79,832✔
754
}
79,832✔
755

756
void code_blob_print_ir(code_blob_t *blob, jit_ir_t *ir)
348✔
757
{
758
   LOCAL_TEXT_BUF tb = tb_new();
696✔
759
   tb_printf(tb, "%s%s", jit_op_name(ir->op), jit_cc_name(ir->cc));
348✔
760

761
   if (ir->size != JIT_SZ_UNSPEC)
348✔
762
      tb_printf(tb, ".%d", 1 << (3 + ir->size));
36✔
763

764
   tb_printf(tb, "%*.s", (int)MAX(0, 10 - tb_len(tb)), "");
348✔
765

766
   if (ir->result != JIT_REG_INVALID)
348✔
767
      tb_printf(tb, "R%d", ir->result);
203✔
768

769
   if (ir->arg1.kind != JIT_VALUE_INVALID) {
348✔
770
      if (ir->result != JIT_REG_INVALID)
263✔
771
         tb_cat(tb, ", ");
187✔
772
      code_blob_print_value(tb, ir->arg1);
263✔
773
   }
774

775
   if (ir->arg2.kind != JIT_VALUE_INVALID) {
348✔
776
      tb_cat(tb, ", ");
129✔
777
      code_blob_print_value(tb, ir->arg2);
129✔
778
   }
779

780
   code_blob_add_comment(blob, (uintptr_t)blob->wptr, tb_claim(tb));
348✔
781
}
348✔
782

783
void code_blob_printf(code_blob_t *blob, const char *fmt, ...)
15,377✔
784
{
785
   va_list ap;
15,377✔
786
   va_start(ap, fmt);
15,377✔
787

788
   char *text = xvasprintf(fmt, ap);
15,377✔
789
   code_blob_add_comment(blob, (uintptr_t)blob->wptr, text);
15,377✔
790

791
   va_end(ap);
15,377✔
792
}
15,377✔
793

794
__attribute__((format(printf, 3, 4)))
795
static void debug_reloc(code_blob_t *blob, void *patch, const char *fmt, ...)
64,107✔
796
{
797
   va_list ap;
64,107✔
798
   va_start(ap, fmt);
64,107✔
799

800
   char *text = xvasprintf(fmt, ap);
64,107✔
801
   code_blob_add_comment(blob, (uintptr_t)patch, text);
64,107✔
802

803
   va_end(ap);
64,107✔
804
}
64,107✔
805
#else
806
#define debug_reloc(...)
807
#endif   // DEBUG
808

809
#ifdef ARCH_ARM64
810
static void arm64_patch_page_offset21(code_blob_t *blob, uint32_t *patch,
811
                                      void *ptr)
812
{
813
   switch ((*patch >> 23) & 0x7f) {
814
   case 0b1111010:   // LDR (immediate, SIMD&FP)
815
   case 0b1110010:   // LDR (immediate)
816
      assert(*patch & (1 << 30));  // Quadword
817
      assert(((uintptr_t)ptr & 7) == 0);
818
      *patch |= (((uintptr_t)ptr & 0xfff) >> 3) << 10;
819
      break;
820
   case 0b0100010:   // ADD (immediate)
821
      *patch |= ((uintptr_t)ptr & 0xfff) << 10;
822
      break;
823
   default:
824
      blob->span->size = blob->wptr - blob->span->base;
825
      code_disassemble(blob->span, (uintptr_t)patch, NULL);
826
      fatal_trace("cannot patch instruction");
827
   }
828
}
829

830
static void arm64_patch_page_base_rel21(uint32_t *patch, void *ptr)
831
{
832
   const intptr_t dst_page = (intptr_t)ptr & ~UINT64_C(0xfff);
833
   const intptr_t src_page = (intptr_t)patch & ~UINT64_C(0xfff);
834
   const intptr_t upper21 = (dst_page - src_page) >> 12;
835
   assert(upper21 >= -(1 << 20) && upper21 < (1 << 20));
836
   *patch &= ~((0x3 << 29) | (0x7ffff << 5));
837
   *patch |= (upper21 & 3) << 29;
838
   *patch |= ((upper21 >> 2) & 0x7ffff) << 5;
839
}
840
#endif
841

842
static void *code_emit_trampoline(code_blob_t *blob, void *dest)
×
843
{
844
#if defined ARCH_WASM32
845
   // wasm32 does not execute bytes from writable code buffers.
846
   // Calls are represented by function/table indices instead of absolute jumps.
847
   return dest;
848
#else
849
#if defined ARCH_X86_64
850
   const uint8_t veneer[] = {
×
851
      0x48, 0xb8, __IMM64((uintptr_t)dest),  // MOVABS RAX, dest
×
852
      0xff, 0xe0                             // CALL RAX
853
   };
854
#elif defined ARCH_ARM64
855
   const uint8_t veneer[] = {
856
      0x50, 0x00, 0x00, 0x58,   // LDR X16, [PC+8]
857
      0x00, 0x02, 0x1f, 0xd6,   // BR X16
858
      __IMM64((uintptr_t)dest)
859
   };
860
#else
861
   should_not_reach_here();
862
#endif
863

864
   void *prev = memmem(blob->veneers, blob->wptr - blob->veneers,
×
865
                       veneer, ARRAY_LEN(veneer));
866
   if (prev != NULL)
×
867
      return prev;
868
   else {
869
      DEBUG_ONLY(code_blob_printf(blob, "Trampoline for %p", dest));
×
870

871
      void *addr = blob->wptr;
×
872
      code_blob_emit(blob, veneer, ARRAY_LEN(veneer));
×
873
      return addr;
×
874
   }
875
#endif
876
}
877

878
#if !defined __MINGW32__ && !defined __APPLE__
879
static void *code_emit_got(code_blob_t *blob, void *dest)
×
880
{
881
#if defined ARCH_WASM32
882
   return dest;
883
#else
UNCOV
884
   const uint8_t data[] = { __IMM64((uintptr_t)dest) };
×
885

886
   void *prev = memmem(blob->veneers, blob->veneers - blob->wptr,
×
887
                       data, ARRAY_LEN(data));
888
   if (prev != NULL)
×
889
      return prev;
890
   else {
891
      DEBUG_ONLY(code_blob_printf(blob, "GOT entry for %p", dest));
×
892

893
      void *addr = blob->wptr;
×
894
      code_blob_emit(blob, data, ARRAY_LEN(data));
×
895
      return addr;
×
896
   }
897
#endif
898
}
899
#endif
900

901
#if defined __MINGW32__
902
static void code_load_pe(code_blob_t *blob, const void *data, size_t size)
903
{
904
   const IMAGE_FILE_HEADER *imghdr = data;
905

906
   switch (imghdr->Machine) {
907
   case IMAGE_FILE_MACHINE_AMD64:
908
   case IMAGE_FILE_MACHINE_ARM64:
909
      break;
910
   default:
911
      fatal_trace("unknown target machine %x", imghdr->Machine);
912
   }
913

914
   const IMAGE_SYMBOL *symtab = data + imghdr->PointerToSymbolTable;
915
   const char *strtab = data + imghdr->PointerToSymbolTable
916
      + imghdr->NumberOfSymbols * sizeof(IMAGE_SYMBOL);
917

918
   const IMAGE_SECTION_HEADER *sections =
919
      data + IMAGE_SIZEOF_FILE_HEADER + imghdr->SizeOfOptionalHeader;
920

921
   void **load_addr LOCAL =
922
      xmalloc_array(imghdr->NumberOfSections, sizeof(void *));
923

924
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
925
      if ((sections[i].Characteristics & IMAGE_SCN_CNT_CODE)
926
          || (sections[i].Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)) {
927
         const int align = sections[i].Characteristics & IMAGE_SCN_ALIGN_MASK;
928
         code_blob_align(blob, 1 << ((align >> 20) - 1));
929
         load_addr[i] = blob->wptr;
930
         code_blob_emit(blob, data + sections[i].PointerToRawData,
931
                        sections[i].SizeOfRawData);
932
      }
933
      else if ((sections[i].Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
934
               && sections[i].Misc.VirtualSize > 0)
935
         fatal_trace("non-empty BSS not supported");
936
   }
937

938
   if (blob->overflow)
939
      return;   // Relocations might point outside of code span
940

941
   blob->veneers = blob->wptr;
942

943
   shash_t *external = load_acquire(&blob->span->owner->symbols);
944

945
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
946
      const IMAGE_RELOCATION *relocs = data + sections[i].PointerToRelocations;
947
      for (int j = 0; j < sections[i].NumberOfRelocations; j++) {
948
         const char *name = NULL;
949
         char tmp[9];
950

951
         assert(relocs[j].SymbolTableIndex < imghdr->NumberOfSymbols);
952
         const IMAGE_SYMBOL *sym = symtab + relocs[j].SymbolTableIndex;
953

954
         if (sym->N.Name.Short) {
955
            memcpy(tmp, sym->N.ShortName, 8);
956
            tmp[8] = '\0';
957
            name = tmp;
958
         }
959
         else
960
            name = strtab + sym->N.Name.Long;
961

962
         void *ptr = NULL;
963
         if (sym->SectionNumber > 0) {
964
            assert(sym->SectionNumber - 1 < imghdr->NumberOfSections);
965
            ptr = load_addr[sym->SectionNumber - 1] + sym->Value;
966
         }
967
         else
968
            ptr = shash_get(external, name);
969

970
         if (ptr == NULL && icmp(blob->span->name, name))
971
            ptr = blob->span->base;
972

973
         if (ptr == NULL)
974
            fatal_trace("failed to resolve symbol %s", name);
975

976
         void *patch = load_addr[i] + relocs[j].VirtualAddress;
977
         assert((uint8_t *)patch >= blob->span->base);
978
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
979

980
         switch (relocs[j].Type) {
981
#if defined ARCH_X86_64
982
         case IMAGE_REL_AMD64_ADDR64:
983
            *(uint64_t *)patch += (uint64_t)ptr;
984
            break;
985
         case IMAGE_REL_AMD64_ADDR32NB:
986
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
987
            break;
988
#elif defined ARCH_ARM64
989
         case IMAGE_REL_ARM64_BRANCH26:
990
            {
991
               void *veneer = code_emit_trampoline(blob, ptr);
992
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
993
               *(uint32_t *)patch &= ~0x3ffffff;
994
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
995
            }
996
            break;
997
         case IMAGE_REL_ARM64_ADDR32NB:
998
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
999
            break;
1000
         case IMAGE_REL_ARM64_PAGEBASE_REL21:
1001
            arm64_patch_page_base_rel21(patch, ptr);
1002
            break;
1003
         case IMAGE_REL_ARM64_PAGEOFFSET_12A:
1004
         case IMAGE_REL_ARM64_PAGEOFFSET_12L:
1005
            arm64_patch_page_offset21(blob, patch, ptr);
1006
            break;
1007
#endif
1008
         default:
1009
            blob->span->size = blob->wptr - blob->span->base;
1010
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
1011
            fatal_trace("cannot handle relocation type %d for symbol %s",
1012
                        relocs[j].Type, name);
1013
         }
1014
      }
1015

1016
      if (strncmp((const char *)sections[i].Name, ".pdata",
1017
                  IMAGE_SIZEOF_SHORT_NAME) == 0) {
1018
         assert(sections[i].SizeOfRawData % sizeof(RUNTIME_FUNCTION) == 0);
1019
         const int count = sections[i].SizeOfRawData / sizeof(RUNTIME_FUNCTION);
1020
         const DWORD64 base = (DWORD64)blob->span->base;
1021

1022
         // TODO: we should also call RtlDeleteFunctionTable at some point
1023
         if (!RtlAddFunctionTable(load_addr[i], count, base))
1024
            fatal_trace("RtlAddFunctionTable failed: %s", last_os_error());
1025
      }
1026
   }
1027

1028
   for (int i = 0; i < imghdr->NumberOfSymbols; i++) {
1029
      const IMAGE_SYMBOL *sym = &(symtab[i]);
1030

1031
      if (sym->SectionNumber == 0 || sym->N.Name.Short)
1032
         continue;
1033
      else if ((sym->Type >> 4) != IMAGE_SYM_DTYPE_FUNCTION)
1034
         continue;
1035
      else if (icmp(blob->span->name, strtab + sym->N.Name.Long)) {
1036
         blob->span->entry = load_addr[sym->SectionNumber - 1] + sym->Value;
1037
         break;
1038
      }
1039
   }
1040
}
1041
#elif defined __APPLE__
1042
static void code_load_macho(code_blob_t *blob, const void *data, size_t size)
1043
{
1044
   const void *rptr = data;
1045

1046
   const struct mach_header_64 *fhdr = rptr;
1047
   rptr += sizeof(struct mach_header_64);
1048

1049
   if (fhdr->magic != MH_MAGIC_64)
1050
      fatal_trace("bad Mach-O magic %x", fhdr->magic);
1051

1052
   const struct segment_command_64 *seg = NULL;
1053
   const struct symtab_command *symtab = NULL;
1054

1055
   void **load_addr LOCAL = NULL;
1056

1057
   for (int i = 0; i < fhdr->ncmds; i++) {
1058
      const struct load_command *load = rptr;
1059
      switch (load->cmd) {
1060
      case LC_SEGMENT_64:
1061
         {
1062
            seg = rptr;
1063
            load_addr = xmalloc_array(seg->nsects, sizeof(void *));
1064

1065
            for (int j = 0; j < seg->nsects; j++) {
1066
               const struct section_64 *sec =
1067
                  (void *)seg + sizeof(struct segment_command_64)
1068
                  + j * sizeof(struct section_64);
1069
               code_blob_align(blob, 1 << sec->align);
1070
               load_addr[j] = blob->wptr;
1071
               DEBUG_ONLY(code_blob_printf(blob, "%s", sec->sectname));
1072
               code_blob_emit(blob, data + sec->offset, sec->size);
1073
            }
1074
         }
1075
         break;
1076
      case LC_SYMTAB:
1077
         symtab = rptr;
1078
         assert(symtab->cmdsize == sizeof(struct symtab_command));
1079
         break;
1080
      case LC_DATA_IN_CODE:
1081
      case LC_LINKER_OPTIMIZATION_HINT:
1082
      case LC_BUILD_VERSION:
1083
      case LC_DYSYMTAB:
1084
         break;
1085
      default:
1086
         warnf("unrecognised load command 0x%0x", load->cmd);
1087
      }
1088

1089
      rptr += load->cmdsize;
1090
   }
1091
   assert(rptr == data + sizeof(struct mach_header_64) + fhdr->sizeofcmds);
1092

1093
   if (blob->overflow)
1094
      return;   // Relocations might point outside of code span
1095

1096
   blob->veneers = blob->wptr;
1097

1098
   assert(seg != NULL);
1099
   assert(symtab != NULL);
1100

1101
   shash_t *external = load_acquire(&blob->span->owner->symbols);
1102

1103
   for (int i = 0; i < seg->nsects; i++) {
1104
      const struct section_64 *sec =
1105
         (void *)seg + sizeof(struct segment_command_64)
1106
         + i * sizeof(struct section_64);
1107

1108
      uint32_t addend = 0;
1109
      for (int j = 0; j < sec->nreloc; j++) {
1110
         const struct relocation_info *rel =
1111
            data + sec->reloff + j * sizeof(struct relocation_info);
1112
         const char *name = NULL;
1113
         void *ptr = NULL;
1114
         if (rel->r_extern) {
1115
            assert(rel->r_symbolnum < symtab->nsyms);
1116
            const struct nlist_64 *nl = data + symtab->symoff
1117
               + rel->r_symbolnum * sizeof(struct nlist_64);
1118
            name = data + symtab->stroff + nl->n_un.n_strx;
1119

1120
            if (nl->n_type & N_EXT) {
1121
               if (icmp(blob->span->name, name + 1))
1122
                  ptr = blob->span->base;
1123
               else if ((ptr = shash_get(external, name + 1)) == NULL)
1124
                  fatal_trace("failed to resolve symbol %s", name + 1);
1125
            }
1126
            else if (nl->n_sect != NO_SECT)
1127
               ptr = blob->span->base + nl->n_value;
1128
         }
1129
         else
1130
            ptr = blob->span->base;
1131

1132
         ptr += addend;
1133
         addend = 0;
1134

1135
         void *patch = load_addr[i] + rel->r_address;
1136
         assert((uint8_t *)patch >= blob->span->base);
1137
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
1138

1139
         switch (rel->r_type) {
1140
#ifdef ARCH_ARM64
1141
         case ARM64_RELOC_UNSIGNED:
1142
            assert(rel->r_length == 3);
1143
            *(void **)patch = ptr;
1144
            break;
1145
         case ARM64_RELOC_SUBTRACTOR:
1146
            break;   // What is this?
1147
         case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
1148
         case ARM64_RELOC_PAGEOFF12:
1149
            arm64_patch_page_offset21(blob, patch, ptr);
1150
            break;
1151
         case ARM64_RELOC_GOT_LOAD_PAGE21:
1152
         case ARM64_RELOC_PAGE21:
1153
            arm64_patch_page_base_rel21(patch, ptr);
1154
            break;
1155
         case ARM64_RELOC_BRANCH26:
1156
            {
1157
               void *veneer = code_emit_trampoline(blob, ptr);
1158
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
1159
               debug_reloc(blob, patch, "ARM64_RELOC_BRANCH26 %s PC%+"PRIiPTR,
1160
                           name, pcrel);
1161
               *(uint32_t *)patch &= ~0x3ffffff;
1162
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
1163
            }
1164
            break;
1165
         case ARM64_RELOC_ADDEND:
1166
            addend = rel->r_symbolnum;
1167
            break;
1168
#elif defined ARCH_X86_64
1169
         case X86_64_RELOC_UNSIGNED:
1170
            *(uint64_t *)patch += (uint64_t)ptr;
1171
            break;
1172
         case X86_64_RELOC_BRANCH:
1173
            *(uint32_t *)patch += (uint32_t)(ptr - patch - 4);
1174
            break;
1175
#endif
1176
         default:
1177
            blob->span->size = blob->wptr - blob->span->base;
1178
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
1179
            fatal_trace("cannot handle relocation type %d for symbol %s",
1180
                        rel->r_type, name);
1181
         }
1182
      }
1183
   }
1184

1185
   for (int i = 0; i < symtab->nsyms; i++) {
1186
      const struct nlist_64 *sym =
1187
         data + symtab->symoff + i * sizeof(struct nlist_64);
1188

1189
      if (sym->n_sect == NO_SECT || (sym->n_type & N_TYPE) != N_SECT)
1190
         continue;
1191

1192
      const char *name = data + symtab->stroff + sym->n_un.n_strx;
1193
      if (name[0] == '_' && icmp(blob->span->name, name + 1)) {
1194
         blob->span->entry = load_addr[sym->n_sect - 1] + sym->n_value;
1195
         break;
1196
      }
1197
   }
1198
}
1199
#elif !defined __MINGW32__
1200
static void code_load_elf(code_blob_t *blob, const void *data, size_t size)
14,530✔
1201
{
1202
   const Elf64_Ehdr *ehdr = data;
14,530✔
1203

1204
   if (ehdr->e_ident[EI_MAG0] != ELFMAG0
14,530✔
1205
       || ehdr->e_ident[EI_MAG1] != ELFMAG1
1206
       || ehdr->e_ident[EI_MAG2] != ELFMAG2
1207
       || ehdr->e_ident[EI_MAG3] != ELFMAG3)
14,530✔
1208
      fatal_trace("bad ELF magic");
1209
   else if (ehdr->e_shentsize != sizeof(Elf64_Shdr))
14,530✔
1210
      fatal_trace("bad section header size %d != %zu", ehdr->e_shentsize,
1211
                  sizeof(Elf64_Shdr));
1212

1213
   const Elf64_Shdr *strtab_hdr =
14,530✔
1214
      data + ehdr->e_shoff + ehdr->e_shstrndx * ehdr->e_shentsize;
14,530✔
1215
   const char *strtab = data + strtab_hdr->sh_offset;
14,530✔
1216

1217
   void **load_addr LOCAL = xcalloc_array(ehdr->e_shnum, sizeof(void *));
29,060✔
1218

1219
   for (int i = 0; i < ehdr->e_shnum; i++) {
130,528✔
1220
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
115,998✔
1221

1222
      switch (shdr->sh_type) {
115,998✔
1223
      case SHT_PROGBITS:
29,907✔
1224
         if (shdr->sh_flags & SHF_ALLOC) {
29,907✔
1225
            code_blob_align(blob, shdr->sh_addralign);
15,377✔
1226
            load_addr[i] = blob->wptr;
15,377✔
1227
            DEBUG_ONLY(code_blob_printf(blob, "%s", strtab + shdr->sh_name));
15,377✔
1228
            code_blob_emit(blob, data + shdr->sh_offset, shdr->sh_size);
15,377✔
1229
         }
1230
         break;
1231

1232
      case SHT_RELA:
1233
         // Handled in second pass
1234
         break;
1235

1236
      case SHT_NULL:
1237
      case SHT_STRTAB:
1238
      case SHT_X86_64_UNWIND:
1239
         break;
1240

1241
      case SHT_SYMTAB:
1242
         for (int i = 0; i < shdr->sh_size / shdr->sh_entsize; i++) {
58,967✔
1243
            const Elf64_Sym *sym =
58,967✔
1244
               data + shdr->sh_offset + i * shdr->sh_entsize;
58,967✔
1245

1246
            if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
58,967✔
1247
               continue;
44,437✔
1248
            else if (!icmp(blob->span->name, strtab + sym->st_name))
14,530✔
1249
               continue;
×
1250
            else if (load_addr[sym->st_shndx] == NULL)
14,530✔
1251
               fatal_trace("missing section %d for symbol %s", sym->st_shndx,
1252
                           strtab + sym->st_name);
×
1253
            else {
1254
               blob->span->entry = load_addr[sym->st_shndx] + sym->st_value;
14,530✔
1255
               break;
14,530✔
1256
            }
1257
         }
1258
         break;
1259

1260
      default:
×
1261
         warnf("ignoring ELF section %s with type %x", strtab + shdr->sh_name,
×
1262
               shdr->sh_type);
1263
      }
1264
   }
1265

1266
   if (blob->overflow)
14,530✔
1267
      return;   // Relocations might point outside of code span
×
1268

1269
   blob->veneers = blob->wptr;
14,530✔
1270

1271
   shash_t *external = load_acquire(&blob->span->owner->symbols);
14,530✔
1272

1273
   for (int i = 0; i < ehdr->e_shnum; i++) {
130,528✔
1274
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
115,998✔
1275
      if (shdr->sh_type != SHT_RELA)
115,998✔
1276
         continue;
88,027✔
1277

1278
      const Elf64_Shdr *mod =
27,971✔
1279
         data + ehdr->e_shoff + shdr->sh_info * ehdr->e_shentsize;
27,971✔
1280
      if (mod->sh_type != SHT_PROGBITS || !(mod->sh_flags & SHF_ALLOC))
27,971✔
1281
         continue;
14,530✔
1282
      else if (load_addr[shdr->sh_info] == NULL)
13,441✔
1283
         fatal_trace("section %s not loaded", strtab + mod->sh_name);
1284

1285
      const Elf64_Shdr *symtab =
13,441✔
1286
         data + ehdr->e_shoff + shdr->sh_link * ehdr->e_shentsize;
13,441✔
1287
      if (symtab->sh_type != SHT_SYMTAB)
13,441✔
1288
         fatal_trace("section %s is not a symbol table",
1289
                     strtab + symtab->sh_name);
×
1290

1291
      const Elf64_Rela *endp = data + shdr->sh_offset + shdr->sh_size;
13,441✔
1292
      for (const Elf64_Rela *r = data + shdr->sh_offset; r < endp; r++) {
77,548✔
1293
         const Elf64_Sym *sym = data + symtab->sh_offset
64,107✔
1294
            + ELF64_R_SYM(r->r_info) * symtab->sh_entsize;
64,107✔
1295

1296
         void *ptr = NULL;
64,107✔
1297
         switch (ELF64_ST_TYPE(sym->st_info)) {
64,107✔
1298
         case STT_NOTYPE:
45,238✔
1299
         case STT_FUNC:
1300
            if (sym->st_shndx == 0)
45,238✔
1301
               ptr = shash_get(external, strtab + sym->st_name);
44,904✔
1302
            else
1303
               ptr = load_addr[sym->st_shndx] + sym->st_value;
334✔
1304
            break;
1305
         case STT_SECTION:
18,869✔
1306
            ptr = load_addr[sym->st_shndx];
18,869✔
1307
            break;
18,869✔
1308
         default:
×
1309
            fatal_trace("cannot handle ELF symbol type %d",
1310
                        ELF64_ST_TYPE(sym->st_info));
1311
         }
1312

1313
         if (ptr == NULL)
64,107✔
1314
            fatal_trace("cannot resolve symbol %s type %d",
1315
                        strtab + sym->st_name, ELF64_ST_TYPE(sym->st_info));
×
1316

1317
         void *patch = load_addr[shdr->sh_info] + r->r_offset;
64,107✔
1318
         assert(r->r_offset < mod->sh_size);
64,107✔
1319

1320
         switch (ELF64_R_TYPE(r->r_info)) {
64,107✔
1321
         case R_X86_64_64:
64,107✔
1322
            debug_reloc(blob, patch, "R_X86_64_64 %s", strtab + sym->st_name);
64,107✔
1323
            *(uint64_t *)patch = (uint64_t)ptr + r->r_addend;
64,107✔
1324
            break;
64,107✔
1325
         case R_X86_64_PC32:
×
1326
            {
1327
               const ptrdiff_t pcrel = ptr + r->r_addend - patch;
×
1328
               debug_reloc(blob, patch, "R_X86_64_PC32 %s PC%+"PRIiPTR,
×
1329
                           strtab + sym->st_name, pcrel);
×
1330
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1331
               *(uint32_t *)patch = pcrel;
×
1332
            }
1333
            break;
×
1334
         case R_X86_64_GOTPCREL:
×
1335
            {
1336
               void *got = code_emit_got(blob, ptr);
×
1337
               const ptrdiff_t pcrel = got + r->r_addend - patch;
×
1338
               debug_reloc(blob, patch, "R_X86_64_GOTPCREL %s PC%+"PRIiPTR,
×
1339
                           strtab + sym->st_name, pcrel);
×
1340
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1341
               *(uint32_t *)patch = pcrel;
×
1342
            }
1343
            break;
×
1344
         case R_X86_64_PLT32:
×
1345
            {
1346
               void *veneer = code_emit_trampoline(blob, ptr);
×
1347
               const ptrdiff_t pcrel = veneer + r->r_addend - patch;
×
1348
               debug_reloc(blob, patch, "R_X86_64_PLT32 %s PC%+"PRIiPTR,
×
1349
                           strtab + sym->st_name, pcrel);
×
1350
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1351
               *(uint32_t *)patch = pcrel;
×
1352
            }
1353
            break;
×
1354
         case R_AARCH64_CALL26:
×
1355
            {
1356
               void *veneer = code_emit_trampoline(blob, ptr);
×
1357
               const ptrdiff_t pcrel = (veneer + r->r_addend - patch) >> 2;
×
1358
               *(uint32_t *)patch &= ~0x3ffffff;
×
1359
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
×
1360
            }
1361
            break;
×
1362
         case R_AARCH64_PREL64:
×
1363
            *(uint64_t *)patch = ptr + r->r_addend - patch;
×
1364
            break;
×
1365
         case R_AARCH64_MOVW_UABS_G0_NC:
×
1366
            *(uint32_t *)patch |=
×
1367
               (((uintptr_t)ptr + r->r_addend) & 0xffff) << 5;
×
1368
            break;
×
1369
         case R_AARCH64_MOVW_UABS_G1_NC:
×
1370
            *(uint32_t *)patch |=
×
1371
               ((((uintptr_t)ptr + r->r_addend) >> 16) & 0xffff) << 5;
×
1372
            break;
×
1373
         case R_AARCH64_MOVW_UABS_G2_NC:
×
1374
            *(uint32_t *)patch |=
×
1375
               ((((uintptr_t)ptr + r->r_addend) >> 32) & 0xffff) << 5;
×
1376
            break;
×
1377
         case R_AARCH64_MOVW_UABS_G3:
×
1378
            *(uint32_t *)patch |=
×
1379
               ((((uintptr_t)ptr + r->r_addend) >> 48) & 0xffff) << 5;
×
1380
            break;
×
1381
         default:
×
1382
            blob->span->size = blob->wptr - blob->span->base;
×
1383
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
×
1384
            fatal_trace("cannot handle relocation type %ld for symbol %s",
1385
                        ELF64_R_TYPE(r->r_info), strtab + sym->st_name);
×
1386
         }
1387
      }
1388
   }
1389
}
1390
#endif
1391

1392
void code_load_object(code_blob_t *blob, const void *data, size_t size)
14,530✔
1393
{
1394
#if defined __APPLE__
1395
   code_load_macho(blob, data, size);
1396
#elif defined __MINGW32__
1397
   code_load_pe(blob, data, size);
1398
#else
1399
   code_load_elf(blob, data, size);
14,530✔
1400
#endif
1401
}
14,530✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc