• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nickg / nvc / 27057863722

05 Jun 2026 08:01AM UTC coverage: 92.284% (-0.004%) from 92.288%
27057863722

push

github

nickg
Improve reporting for system call errors on Windows

2 of 9 new or added lines in 6 files covered. (22.22%)

2 existing lines in 1 file now uncovered.

79017 of 85624 relevant lines covered (92.28%)

641708.54 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.63
/src/jit/jit-code.c
1
//
2
//  Copyright (C) 2022-2024  Nick Gasson
3
//
4
//  This program is free software: you can redistribute it and/or modify
5
//  it under the terms of the GNU General Public License as published by
6
//  the Free Software Foundation, either version 3 of the License, or
7
//  (at your option) any later version.
8
//
9
//  This program is distributed in the hope that it will be useful,
10
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
//  GNU General Public License for more details.
13
//
14
//  You should have received a copy of the GNU General Public License
15
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

18
#include "util.h"
19
#include "cpustate.h"
20
#include "debug.h"
21
#include "hash.h"
22
#include "ident.h"
23
#include "jit/jit-priv.h"
24
#include "option.h"
25
#include "printf.h"
26
#include "thread.h"
27

28
#include <assert.h>
29
#include <errno.h>
30
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
33
#include <stdio.h>
34
#include <unistd.h>
35
#include <inttypes.h>
36
#include <signal.h>
37

38
#if defined __MINGW32__
39
#include <winnt.h>
40
#elif defined __APPLE__
41
#include <mach-o/loader.h>
42
#include <mach-o/reloc.h>
43
#include <mach-o/nlist.h>
44
#include <mach-o/stab.h>
45
#include <mach-o/arm64/reloc.h>
46
#include <mach-o/x86_64/reloc.h>
47
#else
48
#include <elf.h>
49
#endif
50

51
#ifdef HAVE_CAPSTONE
52
#include <capstone.h>
53
#endif
54

55
#ifndef R_AARCH64_MOVW_UABS_G0_NC
56
#define R_AARCH64_MOVW_UABS_G0_NC 264
57
#endif
58

59
#ifndef R_AARCH64_MOVW_UABS_G1_NC
60
#define R_AARCH64_MOVW_UABS_G1_NC 266
61
#endif
62

63
#ifndef R_AARCH64_MOVW_UABS_G2_NC
64
#define R_AARCH64_MOVW_UABS_G2_NC 268
65
#endif
66

67
#ifndef R_AARCH64_MOVW_UABS_G3
68
#define R_AARCH64_MOVW_UABS_G3 269
69
#endif
70

71
#ifndef SHT_X86_64_UNWIND
72
#define SHT_X86_64_UNWIND 0x70000001
73
#endif
74

75
#ifndef IMAGE_REL_ARM64_BRANCH26
76
#define IMAGE_REL_ARM64_BRANCH26 0x03
77
#endif
78

79
#ifndef IMAGE_REL_ARM64_ADDR32NB
80
#define IMAGE_REL_ARM64_ADDR32NB 0x02
81
#endif
82

83
#ifndef IMAGE_REL_ARM64_PAGEBASE_REL21
84
#define IMAGE_REL_ARM64_PAGEBASE_REL21 0x04
85
#endif
86

87
#ifndef IMAGE_REL_ARM64_PAGEOFFSET_12A
88
#define IMAGE_REL_ARM64_PAGEOFFSET_12A 0x06
89
#endif
90

91
#ifndef IMAGE_REL_ARM64_PAGEOFFSET_12L
92
#define IMAGE_REL_ARM64_PAGEOFFSET_12L 0x07
93
#endif
94

95
#define CODE_PAGE_ALIGN   4096
96
#define CODE_PAGE_SIZE    0x400000
97
#define THREAD_CACHE_SIZE 0x10000
98
#define CODE_BLOB_ALIGN   256
99
#define MIN_BLOB_SIZE     0x4000
100

101
#define __IMM64(x) __IMM32(x), __IMM32((x) >> 32)
102
#define __IMM32(x) __IMM16(x), __IMM16((x) >> 16)
103
#define __IMM16(x) (x) & 0xff, ((x) >> 8) & 0xff
104

105
STATIC_ASSERT(MIN_BLOB_SIZE <= THREAD_CACHE_SIZE);
106
STATIC_ASSERT(MIN_BLOB_SIZE % CODE_BLOB_ALIGN == 0);
107
STATIC_ASSERT(CODE_PAGE_SIZE % THREAD_CACHE_SIZE == 0);
108

109
typedef struct _code_page code_page_t;
110

111
typedef struct {
112
   uintptr_t  addr;
113
   char      *text;
114
} code_comment_t;
115

116
typedef struct {
117
   unsigned        count;
118
   unsigned        max;
119
   code_comment_t *comments;
120
} code_debug_t;
121

122
typedef struct _code_span {
123
   code_cache_t *owner;
124
   code_span_t  *next;
125
   ident_t       name;
126
   uint8_t      *base;
127
   void         *entry;
128
   size_t        size;
129
#ifdef DEBUG
130
   code_debug_t  debug;
131
#endif
132
} code_span_t;
133

134
typedef struct _patch_list {
135
   patch_list_t    *next;
136
   uint8_t         *wptr;
137
   jit_label_t      label;
138
   code_patch_fn_t  fn;
139
} patch_list_t;
140

141
typedef struct _code_page {
142
   code_cache_t *owner;
143
   code_page_t  *next;
144
   uint8_t      *mem;
145
} code_page_t;
146

147
typedef struct _code_cache {
148
   nvc_lock_t   lock;
149
   code_page_t *pages;
150
   code_span_t *spans;
151
   code_span_t *freelist[MAX_THREADS];
152
   code_span_t *globalfree;
153
   shash_t     *symbols;
154
   FILE        *perfmap;
155
#ifdef HAVE_CAPSTONE
156
   csh          capstone;
157
#endif
158
#ifdef DEBUG
159
   size_t       used;
160
#endif
161
} code_cache_t;
162

163
static void code_disassemble(code_span_t *span, uintptr_t mark,
164
                             struct cpu_state *cpu);
165

166
static void code_cache_unwinder(uintptr_t addr, debug_frame_t *frame,
×
167
                                void *context)
168
{
169
   code_cache_t *code = context;
×
170

171
   const uint8_t *pc = (uint8_t *)addr;
×
172
   for (code_span_t *span = code->spans; span; span = span->next) {
×
173
      if (pc >= span->base && pc < span->base + span->size) {
×
174
         frame->kind = FRAME_VHDL;
×
175
         frame->disp = pc - span->base;
×
176
         frame->symbol = istr(span->name);
×
177
      }
178
   }
179
}
×
180

181
static void code_fault_handler(int sig, void *addr, struct cpu_state *cpu,
×
182
                               void *context)
183
{
184
   code_page_t *page = context;
×
185

186
   const uint8_t *pc = (uint8_t *)cpu->pc;
×
187
   if (pc < page->mem || pc > page->mem + CODE_PAGE_SIZE)
×
188
      return;
189

190
   uintptr_t mark = cpu->pc;
×
191
#ifndef __MINGW32__
192
   if (sig == SIGTRAP)
×
193
      mark--;   // Point to faulting instruction
×
194
#endif
195

196
   for (code_span_t *span = page->owner->spans; span; span = span->next) {
×
197
      if (pc >= span->base && pc < span->base + span->size && span->name)
×
198
         code_disassemble(span, mark, cpu);
×
199
   }
200
}
201

202
#ifdef DEBUG
203
static bool code_cache_contains(code_cache_t *code, uint8_t *base, size_t size)
17,839✔
204
{
205
   assert_lock_held(&code->lock);
17,839✔
206

207
   for (code_page_t *p = code->pages; p; p = p->next) {
17,839✔
208
      if (base >= p->mem && base + size <= p->mem + CODE_PAGE_SIZE)
17,839✔
209
         return true;
210
   }
211

212
   return false;
213
}
214
#endif
215

216
static code_span_t *code_span_new(code_cache_t *code, ident_t name,
17,839✔
217
                                  uint8_t *base, size_t size)
218
{
219
   SCOPED_LOCK(code->lock);
17,839✔
220

221
   assert(code_cache_contains(code, base, size));
17,839✔
222

223
   code_span_t *span = xcalloc(sizeof(code_span_t));
17,839✔
224
   span->name  = name;
17,839✔
225
   span->next  = code->spans;
17,839✔
226
   span->base  = base;
17,839✔
227
   span->entry = base;
17,839✔
228
   span->size  = size;
17,839✔
229
   span->owner = code;
17,839✔
230

231
   code->spans = span;
17,839✔
232
   return span;
17,839✔
233
}
234

235
static void code_page_new(code_cache_t *code)
5,906✔
236
{
237
   assert_lock_held(&code->lock);
5,906✔
238

239
   code_page_t *page = xcalloc(sizeof(code_page_t));
5,906✔
240
   page->owner = code;
5,906✔
241
   page->next  = code->pages;
5,906✔
242
   page->mem   = map_jit_pages(CODE_PAGE_ALIGN, CODE_PAGE_SIZE);
5,906✔
243

244
   add_fault_handler(code_fault_handler, page);
5,906✔
245
   debug_add_unwinder(page->mem, CODE_PAGE_SIZE, code_cache_unwinder, code);
5,906✔
246

247
   code->pages = page;
5,906✔
248

249
   code_span_t *span = xcalloc(sizeof(code_span_t));
5,906✔
250
   span->next  = code->spans;
5,906✔
251
   span->base  = page->mem;
5,906✔
252
   span->size  = CODE_PAGE_SIZE;
5,906✔
253
   span->owner = code;
5,906✔
254

255
   code->globalfree = code->spans = span;
5,906✔
256
}
5,906✔
257

258
code_cache_t *code_cache_new(void)
5,899✔
259
{
260
   code_cache_t *code = xcalloc(sizeof(code_cache_t));
5,899✔
261

262
   {
263
      SCOPED_LOCK(code->lock);
11,798✔
264
      code_page_new(code);
5,899✔
265
   }
266

267
#ifdef HAVE_CAPSTONE
268
#if defined ARCH_X86_64
269
   if (cs_open(CS_ARCH_X86, CS_MODE_64, &(code->capstone)) != CS_ERR_OK)
270
      fatal_trace("failed to init capstone for x86_64");
271
#elif defined ARCH_ARM64
272
   if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &(code->capstone)) != CS_ERR_OK)
273
      fatal_trace("failed to init capstone for Arm64");
274
#else
275
#error Cannot configure capstone for this architecture
276
#endif
277

278
   if (cs_option(code->capstone, CS_OPT_DETAIL, 1) != CS_ERR_OK)
279
      fatal_trace("failed to set capstone detailed mode");
280
#endif
281

282
   shash_t *s = shash_new(32);
5,899✔
283

284
   extern void __nvc_putpriv(jit_handle_t, void *);
5,899✔
285
   extern void __nvc_sched_waveform(jit_anchor_t *, jit_scalar_t *, tlab_t *);
5,899✔
286
   extern void __nvc_sched_process(jit_anchor_t *, jit_scalar_t *, tlab_t *);
5,899✔
287
   extern void __nvc_test_event(jit_anchor_t *, jit_scalar_t *, tlab_t *);
5,899✔
288
   extern void __nvc_last_event(jit_anchor_t *, jit_scalar_t *, tlab_t *);
5,899✔
289

290
   shash_put(s, "__nvc_sched_waveform", &__nvc_sched_waveform);
5,899✔
291
   shash_put(s, "__nvc_sched_process", &__nvc_sched_process);
5,899✔
292
   shash_put(s, "__nvc_test_event", &__nvc_test_event);
5,899✔
293
   shash_put(s, "__nvc_last_event", &__nvc_last_event);
5,899✔
294
   shash_put(s, "__nvc_mspace_alloc", &__nvc_mspace_alloc);
5,899✔
295
   shash_put(s, "__nvc_putpriv", &__nvc_putpriv);
5,899✔
296
   shash_put(s, "__nvc_do_exit", &__nvc_do_exit);
5,899✔
297
   shash_put(s, "__nvc_pack", &__nvc_pack);
5,899✔
298
   shash_put(s, "__nvc_unpack", &__nvc_unpack);
5,899✔
299
   shash_put(s, "__nvc_vec4op", &__nvc_vec4op);
5,899✔
300
   shash_put(s, "memmove", &memmove);
5,899✔
301
   shash_put(s, "memcpy", &memcpy);
5,899✔
302
   shash_put(s, "memset", &memset);
5,899✔
303
   shash_put(s, "pow", &pow);
5,899✔
304
   shash_put(s, "ldexp", &ldexp);
5,899✔
305
   shash_put(s, "exp2", &exp2);
5,899✔
306

307
#if defined __APPLE__ && defined ARCH_ARM64
308
   shash_put(s, "bzero", &bzero);
309
#elif defined __APPLE__ && defined ARCH_X86_64
310
   shash_put(s, "__bzero", &bzero);
311
#elif defined __MINGW32__ && defined ARCH_X86_64
312
   extern void ___chkstk_ms(void);
313
   shash_put(s, "___chkstk_ms", &___chkstk_ms);
314
#endif
315

316
   store_release(&code->symbols, s);
5,899✔
317

318
   return code;
5,899✔
319
}
320

321
void code_cache_free(code_cache_t *code)
5,890✔
322
{
323
   for (code_page_t *it = code->pages, *tmp; it; it = tmp) {
11,787✔
324
      debug_remove_unwinder(it->mem);
5,897✔
325
      remove_fault_handler(code_fault_handler, it);
5,897✔
326

327
      nvc_munmap(it->mem, CODE_PAGE_SIZE);
5,897✔
328

329
      tmp = it->next;
5,897✔
330
      free(it);
5,897✔
331
   }
332

333
   for (code_span_t *it = code->spans, *tmp; it; it = tmp) {
29,618✔
334
      tmp = it->next;
23,728✔
335
      DEBUG_ONLY(free(it->debug.comments));
23,728✔
336
      free(it);
23,728✔
337
   }
338

339
#ifdef HAVE_CAPSTONE
340
   cs_close(&(code->capstone));
341
#endif
342

343
#ifdef DEBUG
344
   if (code->used > 0)
5,890✔
345
      debugf("JIT code footprint: %zu bytes", code->used);
1,620✔
346
#endif
347

348
   shash_free(code->symbols);
5,890✔
349
   free(code);
5,890✔
350
}
5,890✔
351

352
#ifdef HAVE_CAPSTONE
353
static int code_print_spaces(int col, int tab)
354
{
355
   for (; col < tab; col++)
356
      fputc(' ', stdout);
357
   return col;
358
}
359
#endif
360

361
#if defined DEBUG && HAVE_CAPSTONE
362
static int code_comment_compare(const void *a, const void *b)
363
{
364
   const code_comment_t *ca = a;
365
   const code_comment_t *cb = b;
366

367
   if (ca->addr < cb->addr)
368
      return -1;
369
   else if (ca->addr > cb->addr)
370
      return 1;
371
   else
372
      return 0;
373
}
374
#endif
375

376
static void code_disassemble(code_span_t *span, uintptr_t mark,
×
377
                             struct cpu_state *cpu)
378
{
379
   SCOPED_LOCK(span->owner->lock);
×
380

381
   printf("--");
×
382

383
   const int namelen = ident_len(span->name);
×
384
   for (int i = 0; i < 72 - namelen; i++)
×
385
      fputc('-', stdout);
×
386

387
   printf(" %s ----\n", istr(span->name));
×
388

389
#ifdef HAVE_CAPSTONE
390
   cs_insn *insn = cs_malloc(span->owner->capstone);
391

392
#ifdef DEBUG
393
   qsort(span->debug.comments, span->debug.count, sizeof(code_comment_t),
394
         code_comment_compare);
395
   code_comment_t *comment = span->debug.comments;
396
#endif
397

398
   const uint8_t *const eptr = span->base + span->size;
399
   for (const uint8_t *ptr = span->base; ptr < eptr; ) {
400
      uint64_t address = (uint64_t)ptr;
401

402
#ifdef DEBUG
403
      for (; comment < span->debug.comments + span->debug.count
404
              && comment->addr <= address; comment++)
405
         printf("%30s;; %s\n", "", comment->text);
406
#endif
407

408
      int zeros = 0;
409
      for (const uint8_t *zp = ptr; zp < eptr && *zp == 0; zp++, zeros++);
410

411
      if (zeros > 8 || zeros == eptr - ptr) {
412
         printf("%30s;; skipping %d zero bytes\n", "", zeros);
413
         ptr += zeros;
414
         continue;
415
      }
416

417
      size_t size = eptr - ptr;
418
      int col = 0;
419
      if (cs_disasm_iter(span->owner->capstone, &ptr, &size, &address, insn)) {
420
         char hex1[33], *p = hex1;
421
         for (size_t k = 0; k < insn->size; k++)
422
            p += checked_sprintf(p, hex1 + sizeof(hex1) - p, "%02x",
423
                                 insn->bytes[k]);
424

425
         col = printf("%-12" PRIx64 " %-16.16s %s %s", insn->address,
426
                          hex1, insn->mnemonic, insn->op_str);
427

428
#ifdef ARCH_X86_64
429
         if (strcmp(insn->mnemonic, "movabs") == 0) {
430
            const cs_x86_op *src = &(insn->detail->x86.operands[1]);
431
            if (src->type == X86_OP_IMM) {
432
               const char *sym = debug_symbol_name((void *)src->imm);
433
               if (sym != NULL) {
434
                  col = code_print_spaces(col, 60);
435
                  col += printf(" ; %s", sym);
436
               }
437
            }
438
         }
439
#endif
440

441
         if (strlen(hex1) > 16)
442
            col = printf("\n%15s -%-16s", "", hex1 + 16) - 1;
443
      }
444
      else {
445
#ifdef ARCH_ARM64
446
         col = printf("%-12" PRIx64 " %-16.08x %s 0x%08x", (uint64_t)ptr,
447
                      *(uint32_t *)ptr, ".word", *(uint32_t *)ptr);
448
         ptr += 4;
449
#else
450
         col = printf("%-12" PRIx64 " %-16.02x %s 0x%02x", (uint64_t)ptr,
451
                      *ptr, ".byte", *ptr);
452
         ptr++;
453
#endif
454
      }
455

456
      if (mark != 0 && (ptr >= eptr || address > mark)) {
457
         col = code_print_spaces(col, 66);
458
         printf("<=============\n");
459
         if (cpu != NULL) {
460
#ifdef ARCH_X86_64
461
            const char *names[] = {
462
               "RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI",
463
               "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"
464
            };
465
            for (int i = 0; i < ARRAY_LEN(names); i++)
466
               printf("\t%s\t%"PRIxPTR"\n", names[i], cpu->regs[i]);
467
#else
468
            for (int i = 0; i < 32; i++)
469
               printf("\tR%d\t%"PRIxPTR"\n", i, cpu->regs[i]);
470
#endif
471
         }
472
         mark = 0;
473
      }
474
      else
475
         printf("\n");
476
   }
477

478
   cs_free(insn, 1);
479
#else
480
   jit_hexdump(span->base, span->size, 16, (void *)mark, "");
×
481
#endif
482

483
   for (int i = 0; i < 80; i++)
×
484
      fputc('-', stdout);
×
485
   printf("\n");
×
486
   fflush(stdout);
×
487
}
×
488

489
static void code_write_perf_map(code_span_t *span)
×
490
{
491
   SCOPED_LOCK(span->owner->lock);
×
492

493
   if (span->owner->perfmap == NULL) {
×
494
      char *fname LOCAL = xasprintf("/tmp/perf-%d.map", getpid());
×
495
      if ((span->owner->perfmap = fopen(fname, "w")) == NULL) {
×
NEW
496
         warnf("cannot create %s: %s", fname, strerror(errno));
×
497
         opt_set_int(OPT_PERF_MAP, 0);
×
498
         return;
×
499
      }
500
      else
501
         debugf("writing perf map to %s", fname);
×
502
   }
503

504
   fprintf(span->owner->perfmap, "%p 0x%zx %s\n", span->base, span->size,
×
505
           istr(span->name));
506
   fflush(span->owner->perfmap);
×
507
}
508

509
code_blob_t *code_blob_new(code_cache_t *code, ident_t name, size_t hint)
16,150✔
510
{
511
   code_span_t **freeptr = &(code->freelist[thread_id()]);
16,150✔
512

513
   code_span_t *free = relaxed_load(freeptr);
16,150✔
514
   if (free == NULL) {
16,150✔
515
      free = code_span_new(code, NULL, code->pages->mem, 0);
1,689✔
516
      relaxed_store(freeptr, free);
1,689✔
517
   }
518

519
   const size_t reqsz = hint ?: MIN_BLOB_SIZE;
16,150✔
520

521
   if (free->size < reqsz) {
16,150✔
522
      SCOPED_LOCK(code->lock);
1,770✔
523

524
#ifdef DEBUG
525
      if (free->size > 0)
1,770✔
526
         debugf("thread %d needs new code cache from global free list "
36✔
527
                "(requested %zu bytes, wasted %zu bytes)",
528
                thread_id(), reqsz, free->size);
529
#endif
530

531
      const size_t chunksz = MAX(reqsz, THREAD_CACHE_SIZE);
1,770✔
532
      const size_t alignedsz = ALIGN_UP(chunksz, CODE_BLOB_ALIGN);
1,770✔
533

534
      if (alignedsz > code->globalfree->size) {
1,770✔
535
         DEBUG_ONLY(debugf("requesting new %d byte code page", CODE_PAGE_SIZE));
7✔
536
         code_page_new(code);
7✔
537
         assert(code->globalfree->size == CODE_PAGE_SIZE);
7✔
538
      }
539

540
      const size_t take = MIN(code->globalfree->size, alignedsz);
1,770✔
541

542
      free->size = take;
1,770✔
543
      free->base = code->globalfree->base;
1,770✔
544

545
      code->globalfree->base += take;
1,770✔
546
      code->globalfree->size -= take;
1,770✔
547
   }
548

549
   assert(reqsz <= free->size);
16,150✔
550
   assert(((uintptr_t)free->base & (CODE_BLOB_ALIGN - 1)) == 0);
16,150✔
551

552
   code_span_t *span = code_span_new(code, name, free->base, free->size);
16,150✔
553

554
   free->base += span->size;
16,150✔
555
   free->size -= span->size;
16,150✔
556

557
   code_blob_t *blob = xcalloc(sizeof(code_blob_t));
16,150✔
558
   blob->span = span;
16,150✔
559
   blob->wptr = span->base;
16,150✔
560

561
   thread_wx_mode(WX_WRITE);
16,150✔
562

563
   return blob;
16,150✔
564
}
565

566
void code_blob_finalise(code_blob_t *blob, jit_entry_fn_t *entry)
16,150✔
567
{
568
   code_span_t *span = blob->span;
16,150✔
569
   span->size = blob->wptr - span->base;
16,150✔
570

571
   code_span_t *freespan = relaxed_load(&(span->owner->freelist[thread_id()]));
16,150✔
572
   assert(freespan->size == 0);
16,150✔
573

574
   ihash_free(blob->labels);
16,150✔
575
   blob->labels = NULL;
16,150✔
576

577
   if (unlikely(blob->patches != NULL))
16,150✔
578
      fatal_trace("not all labels in %s were patched", istr(span->name));
579
   else if (unlikely(blob->overflow)) {
16,150✔
580
      // Return all the memory
581
      freespan->size = freespan->base - span->base;
1✔
582
      freespan->base = span->base;
1✔
583
      free(blob);
1✔
584
      return;
1✔
585
   }
586
   else if (span->size == 0)
16,149✔
587
      fatal_trace("code span %s is empty", istr(span->name));
588

589
   uint8_t *aligned = ALIGN_UP(blob->wptr, CODE_BLOB_ALIGN);
16,149✔
590
   freespan->size = freespan->base - aligned;
16,149✔
591
   freespan->base = aligned;
16,149✔
592

593
   if (opt_get_verbose(OPT_ASM_VERBOSE, istr(span->name))) {
16,149✔
594
      nvc_printf("\n$bold$$blue$");
×
595
      code_disassemble(span, 0, NULL);
×
596
      nvc_printf("$$\n");
×
597
   }
598

599
   __builtin___clear_cache((char *)span->base, (char *)blob->wptr);
16,149✔
600

601
   thread_wx_mode(WX_EXECUTE);
16,149✔
602

603
   store_release(entry, (jit_entry_fn_t)span->entry);
16,149✔
604

605
   DEBUG_ONLY(relaxed_add(&span->owner->used, span->size));
16,149✔
606
   free(blob);
16,149✔
607

608
   if (opt_get_int(OPT_PERF_MAP))
16,149✔
609
      code_write_perf_map(span);
×
610
}
611

612
__attribute__((cold, noinline))
613
static void code_blob_overflow(code_blob_t *blob)
1✔
614
{
615
   warnf("JIT code buffer for %s too small", istr(blob->span->name));
1✔
616
   for (patch_list_t *it = blob->patches, *tmp; it; it = tmp) {
1✔
617
      tmp = it->next;
×
618
      free(it);
×
619
   }
620
   blob->patches = NULL;
1✔
621
   blob->overflow = true;
1✔
622
}
1✔
623

624
void code_blob_emit(code_blob_t *blob, const uint8_t *bytes, size_t len)
28,664✔
625
{
626
   if (unlikely(blob->overflow))
28,664✔
627
      return;
628
   else if (unlikely(blob->wptr + len > blob->span->base + blob->span->size)) {
28,664✔
629
      code_blob_overflow(blob);
1✔
630
      return;
1✔
631
   }
632

633
   memcpy(blob->wptr, bytes, len);
28,663✔
634
   blob->wptr += len;
28,663✔
635
}
636

637
void code_blob_align(code_blob_t *blob, unsigned align)
16,864✔
638
{
639
#ifdef ARCH_X86_64
640
   const uint8_t pad[] = { 0x90 };
16,864✔
641
#else
642
   const uint8_t pad[] = { 0x00 };
643
#endif
644

645
   assert(is_power_of_2(align));
16,864✔
646
   assert(align % ARRAY_LEN(pad) == 0);
647

648
   while (((uintptr_t)blob->wptr & (align - 1)) && !blob->overflow)
23,008✔
649
      code_blob_emit(blob, pad, ARRAY_LEN(pad));
6,144✔
650
}
16,864✔
651

652
void code_blob_mark(code_blob_t *blob, jit_label_t label)
82✔
653
{
654
   if (unlikely(blob->overflow))
82✔
655
      return;
656
   else if (blob->labels == NULL)
82✔
657
      blob->labels = ihash_new(256);
77✔
658

659
   ihash_put(blob->labels, label, blob->wptr);
82✔
660

661
   for (patch_list_t **p = &(blob->patches); *p; ) {
99✔
662
      if ((*p)->label == label) {
17✔
663
         patch_list_t *next = (*p)->next;
7✔
664
         (*(*p)->fn)(blob, label, (*p)->wptr, blob->wptr);
7✔
665
         free(*p);
7✔
666
         *p = next;
7✔
667
      }
668
      else
669
         p = &((*p)->next);
10✔
670
   }
671
}
672

673
void code_blob_patch(code_blob_t *blob, jit_label_t label, code_patch_fn_t fn)
8✔
674
{
675
   void *ptr = NULL;
8✔
676
   if (unlikely(blob->overflow))
8✔
677
      return;
678
   else if (blob->labels != NULL && (ptr = ihash_get(blob->labels, label)))
8✔
679
      (*fn)(blob, label, blob->wptr, ptr);
1✔
680
   else {
681
      patch_list_t *new = xmalloc(sizeof(patch_list_t));
7✔
682
      new->next  = blob->patches;
7✔
683
      new->fn    = fn;
7✔
684
      new->label = label;
7✔
685
      new->wptr  = blob->wptr;
7✔
686

687
      blob->patches = new;
7✔
688
   }
689
}
690

691
#ifdef DEBUG
692
static void code_blob_print_value(text_buf_t *tb, jit_value_t value)
459✔
693
{
694
   switch (value.kind) {
459✔
695
   case JIT_VALUE_REG:
195✔
696
      tb_printf(tb, "R%d", value.reg);
195✔
697
      break;
195✔
698
   case JIT_VALUE_INT64:
235✔
699
      if (value.int64 < 4096)
235✔
700
         tb_printf(tb, "#%"PRIi64, value.int64);
231✔
701
      else
702
         tb_printf(tb, "#0x%"PRIx64, value.int64);
4✔
703
      break;
704
   case JIT_VALUE_DOUBLE:
3✔
705
      tb_printf(tb, "%%%g", value.dval);
3✔
706
      break;
3✔
707
   case JIT_ADDR_CPOOL:
×
708
      tb_printf(tb, "[CP+%"PRIi64"]", value.int64);
×
709
      break;
×
710
   case JIT_ADDR_REG:
19✔
711
      tb_printf(tb, "[R%d", value.reg);
19✔
712
      if (value.disp != 0)
19✔
713
         tb_printf(tb, "+%d", value.disp);
1✔
714
      tb_cat(tb, "]");
19✔
715
      break;
19✔
716
   case JIT_ADDR_ABS:
×
717
      tb_printf(tb, "[#%016"PRIx64"]", value.int64);
×
718
      break;
×
719
   case JIT_VALUE_LABEL:
5✔
720
      tb_printf(tb, "%d", value.label);
5✔
721
      break;
5✔
722
   case JIT_VALUE_HANDLE:
2✔
723
      tb_printf(tb, "<%d>", value.handle);
2✔
724
      break;
2✔
725
   case JIT_VALUE_EXIT:
×
726
      tb_printf(tb, "%s", jit_exit_name(value.exit));
×
727
      break;
×
728
   case JIT_VALUE_LOC:
×
729
      tb_printf(tb, "<%s:%d>", loc_file_str(&value.loc), value.loc.first_line);
×
730
      break;
×
731
   case JIT_VALUE_LOCUS:
×
732
      tb_printf(tb, "%p", value.locus);
×
733
      break;
×
734
   case JIT_VALUE_VPOS:
×
735
      tb_printf(tb, "%u:%u", value.vpos.block, value.vpos.op);
×
736
      break;
×
737
   default:
×
738
      tb_cat(tb, "???");
×
739
   }
740
}
459✔
741

742
static void code_blob_add_comment(code_blob_t *blob, uintptr_t addr, char *text)
94,532✔
743
{
744
   code_debug_t *dbg = &(blob->span->debug);
94,532✔
745

746
   if (dbg->count == dbg->max) {
94,532✔
747
      dbg->max = MAX(128, dbg->max * 2);
15,981✔
748
      dbg->comments = xrealloc_array(dbg->comments, dbg->max,
15,981✔
749
                                     sizeof(code_comment_t));
750
   }
751

752
   dbg->comments[dbg->count].addr = addr;
94,532✔
753
   dbg->comments[dbg->count].text = text;
94,532✔
754
   dbg->count++;
94,532✔
755
}
94,532✔
756

757
void code_blob_print_ir(code_blob_t *blob, jit_ir_t *ir)
413✔
758
{
759
   LOCAL_TEXT_BUF tb = tb_new();
826✔
760
   tb_printf(tb, "%s%s", jit_op_name(ir->op), jit_cc_name(ir->cc));
413✔
761

762
   if (ir->size != JIT_SZ_UNSPEC)
413✔
763
      tb_printf(tb, ".%d", 1 << (3 + ir->size));
36✔
764

765
   tb_printf(tb, "%*.s", (int)MAX(0, 10 - tb_len(tb)), "");
413✔
766

767
   if (ir->result != JIT_REG_INVALID)
413✔
768
      tb_printf(tb, "R%d", ir->result);
235✔
769

770
   if (ir->arg1.kind != JIT_VALUE_INVALID) {
413✔
771
      if (ir->result != JIT_REG_INVALID)
307✔
772
         tb_cat(tb, ", ");
209✔
773
      code_blob_print_value(tb, ir->arg1);
307✔
774
   }
775

776
   if (ir->arg2.kind != JIT_VALUE_INVALID) {
413✔
777
      tb_cat(tb, ", ");
152✔
778
      code_blob_print_value(tb, ir->arg2);
152✔
779
   }
780

781
   code_blob_add_comment(blob, (uintptr_t)blob->wptr, tb_claim(tb));
413✔
782
}
413✔
783

784
void code_blob_printf(code_blob_t *blob, const char *fmt, ...)
16,864✔
785
{
786
   va_list ap;
16,864✔
787
   va_start(ap, fmt);
16,864✔
788

789
   char *text = xvasprintf(fmt, ap);
16,864✔
790
   code_blob_add_comment(blob, (uintptr_t)blob->wptr, text);
16,864✔
791

792
   va_end(ap);
16,864✔
793
}
16,864✔
794

795
__attribute__((format(printf, 3, 4)))
796
static void debug_reloc(code_blob_t *blob, void *patch, const char *fmt, ...)
77,255✔
797
{
798
   va_list ap;
77,255✔
799
   va_start(ap, fmt);
77,255✔
800

801
   char *text = xvasprintf(fmt, ap);
77,255✔
802
   code_blob_add_comment(blob, (uintptr_t)patch, text);
77,255✔
803

804
   va_end(ap);
77,255✔
805
}
77,255✔
806
#else
807
#define debug_reloc(...)
808
#endif   // DEBUG
809

810
#ifdef ARCH_ARM64
811
static void arm64_patch_page_offset21(code_blob_t *blob, uint32_t *patch,
812
                                      void *ptr)
813
{
814
   switch ((*patch >> 23) & 0x7f) {
815
   case 0b1111010:   // LDR (immediate, SIMD&FP)
816
   case 0b1110010:   // LDR (immediate)
817
      assert(*patch & (1 << 30));  // Quadword
818
      assert(((uintptr_t)ptr & 7) == 0);
819
      *patch |= (((uintptr_t)ptr & 0xfff) >> 3) << 10;
820
      break;
821
   case 0b0100010:   // ADD (immediate)
822
      *patch |= ((uintptr_t)ptr & 0xfff) << 10;
823
      break;
824
   default:
825
      blob->span->size = blob->wptr - blob->span->base;
826
      code_disassemble(blob->span, (uintptr_t)patch, NULL);
827
      fatal_trace("cannot patch instruction");
828
   }
829
}
830

831
static void arm64_patch_page_base_rel21(uint32_t *patch, void *ptr)
832
{
833
   const intptr_t dst_page = (intptr_t)ptr & ~UINT64_C(0xfff);
834
   const intptr_t src_page = (intptr_t)patch & ~UINT64_C(0xfff);
835
   const intptr_t upper21 = (dst_page - src_page) >> 12;
836
   assert(upper21 >= -(1 << 20) && upper21 < (1 << 20));
837
   *patch &= ~((0x3 << 29) | (0x7ffff << 5));
838
   *patch |= (upper21 & 3) << 29;
839
   *patch |= ((upper21 >> 2) & 0x7ffff) << 5;
840
}
841
#endif
842

843
static void *code_emit_trampoline(code_blob_t *blob, void *dest)
×
844
{
845
#if defined ARCH_X86_64
846
   const uint8_t veneer[] = {
×
847
      0x48, 0xb8, __IMM64((uintptr_t)dest),  // MOVABS RAX, dest
×
848
      0xff, 0xe0                             // CALL RAX
849
   };
850
#elif defined ARCH_ARM64
851
   const uint8_t veneer[] = {
852
      0x50, 0x00, 0x00, 0x58,   // LDR X16, [PC+8]
853
      0x00, 0x02, 0x1f, 0xd6,   // BR X16
854
      __IMM64((uintptr_t)dest)
855
   };
856
#else
857
   should_not_reach_here();
858
#endif
859

860
   void *prev = memmem(blob->veneers, blob->wptr - blob->veneers,
×
861
                       veneer, ARRAY_LEN(veneer));
862
   if (prev != NULL)
×
863
      return prev;
864
   else {
865
      DEBUG_ONLY(code_blob_printf(blob, "Trampoline for %p", dest));
×
866

867
      void *addr = blob->wptr;
×
868
      code_blob_emit(blob, veneer, ARRAY_LEN(veneer));
×
869
      return addr;
×
870
   }
871
}
872

873
#if !defined __MINGW32__ && !defined __APPLE__
874
static void *code_emit_got(code_blob_t *blob, void *dest)
×
875
{
876
   const uint8_t data[] = { __IMM64((uintptr_t)dest) };
×
877

878
   void *prev = memmem(blob->veneers, blob->veneers - blob->wptr,
×
879
                       data, ARRAY_LEN(data));
880
   if (prev != NULL)
×
881
      return prev;
882
   else {
883
      DEBUG_ONLY(code_blob_printf(blob, "GOT entry for %p", dest));
×
884

885
      void *addr = blob->wptr;
×
886
      code_blob_emit(blob, data, ARRAY_LEN(data));
×
887
      return addr;
×
888
   }
889
}
890
#endif
891

892
#if defined __MINGW32__
893
static void code_load_pe(code_blob_t *blob, const void *data, size_t size)
894
{
895
   const IMAGE_FILE_HEADER *imghdr = data;
896

897
   switch (imghdr->Machine) {
898
   case IMAGE_FILE_MACHINE_AMD64:
899
   case IMAGE_FILE_MACHINE_ARM64:
900
      break;
901
   default:
902
      fatal_trace("unknown target machine %x", imghdr->Machine);
903
   }
904

905
   const IMAGE_SYMBOL *symtab = data + imghdr->PointerToSymbolTable;
906
   const char *strtab = data + imghdr->PointerToSymbolTable
907
      + imghdr->NumberOfSymbols * sizeof(IMAGE_SYMBOL);
908

909
   const IMAGE_SECTION_HEADER *sections =
910
      data + IMAGE_SIZEOF_FILE_HEADER + imghdr->SizeOfOptionalHeader;
911

912
   void **load_addr LOCAL =
913
      xmalloc_array(imghdr->NumberOfSections, sizeof(void *));
914

915
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
916
      if ((sections[i].Characteristics & IMAGE_SCN_CNT_CODE)
917
          || (sections[i].Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)) {
918
         const int align = sections[i].Characteristics & IMAGE_SCN_ALIGN_MASK;
919
         code_blob_align(blob, 1 << ((align >> 20) - 1));
920
         load_addr[i] = blob->wptr;
921
         code_blob_emit(blob, data + sections[i].PointerToRawData,
922
                        sections[i].SizeOfRawData);
923
      }
924
      else if ((sections[i].Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
925
               && sections[i].Misc.VirtualSize > 0)
926
         fatal_trace("non-empty BSS not supported");
927
   }
928

929
   if (blob->overflow)
930
      return;   // Relocations might point outside of code span
931

932
   blob->veneers = blob->wptr;
933

934
   shash_t *external = load_acquire(&blob->span->owner->symbols);
935

936
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
937
      const IMAGE_RELOCATION *relocs = data + sections[i].PointerToRelocations;
938
      for (int j = 0; j < sections[i].NumberOfRelocations; j++) {
939
         const char *name = NULL;
940
         char tmp[9];
941

942
         assert(relocs[j].SymbolTableIndex < imghdr->NumberOfSymbols);
943
         const IMAGE_SYMBOL *sym = symtab + relocs[j].SymbolTableIndex;
944

945
         if (sym->N.Name.Short) {
946
            memcpy(tmp, sym->N.ShortName, 8);
947
            tmp[8] = '\0';
948
            name = tmp;
949
         }
950
         else
951
            name = strtab + sym->N.Name.Long;
952

953
         void *ptr = NULL;
954
         if (sym->SectionNumber > 0) {
955
            assert(sym->SectionNumber - 1 < imghdr->NumberOfSections);
956
            ptr = load_addr[sym->SectionNumber - 1] + sym->Value;
957
         }
958
         else
959
            ptr = shash_get(external, name);
960

961
         if (ptr == NULL && icmp(blob->span->name, name))
962
            ptr = blob->span->base;
963

964
         if (ptr == NULL)
965
            fatal_trace("failed to resolve symbol %s", name);
966

967
         void *patch = load_addr[i] + relocs[j].VirtualAddress;
968
         assert((uint8_t *)patch >= blob->span->base);
969
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
970

971
         switch (relocs[j].Type) {
972
#if defined ARCH_X86_64
973
         case IMAGE_REL_AMD64_ADDR64:
974
            *(uint64_t *)patch += (uint64_t)ptr;
975
            break;
976
         case IMAGE_REL_AMD64_ADDR32NB:
977
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
978
            break;
979
#elif defined ARCH_ARM64
980
         case IMAGE_REL_ARM64_BRANCH26:
981
            {
982
               void *veneer = code_emit_trampoline(blob, ptr);
983
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
984
               *(uint32_t *)patch &= ~0x3ffffff;
985
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
986
            }
987
            break;
988
         case IMAGE_REL_ARM64_ADDR32NB:
989
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
990
            break;
991
         case IMAGE_REL_ARM64_PAGEBASE_REL21:
992
            arm64_patch_page_base_rel21(patch, ptr);
993
            break;
994
         case IMAGE_REL_ARM64_PAGEOFFSET_12A:
995
         case IMAGE_REL_ARM64_PAGEOFFSET_12L:
996
            arm64_patch_page_offset21(blob, patch, ptr);
997
            break;
998
#endif
999
         default:
1000
            blob->span->size = blob->wptr - blob->span->base;
1001
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
1002
            fatal_trace("cannot handle relocation type %d for symbol %s",
1003
                        relocs[j].Type, name);
1004
         }
1005
      }
1006

1007
      if (strncmp((const char *)sections[i].Name, ".pdata",
1008
                  IMAGE_SIZEOF_SHORT_NAME) == 0) {
1009
         assert(sections[i].SizeOfRawData % sizeof(RUNTIME_FUNCTION) == 0);
1010
         const int count = sections[i].SizeOfRawData / sizeof(RUNTIME_FUNCTION);
1011
         const DWORD64 base = (DWORD64)blob->span->base;
1012

1013
         // TODO: we should also call RtlDeleteFunctionTable at some point
1014
         if (!RtlAddFunctionTable(load_addr[i], count, base))
1015
            fatal_win32("RtlAddFunctionTable");
1016
      }
1017
   }
1018

1019
   for (int i = 0; i < imghdr->NumberOfSymbols; i++) {
1020
      const IMAGE_SYMBOL *sym = &(symtab[i]);
1021

1022
      if (sym->SectionNumber == 0 || sym->N.Name.Short)
1023
         continue;
1024
      else if ((sym->Type >> 4) != IMAGE_SYM_DTYPE_FUNCTION)
1025
         continue;
1026
      else if (icmp(blob->span->name, strtab + sym->N.Name.Long)) {
1027
         blob->span->entry = load_addr[sym->SectionNumber - 1] + sym->Value;
1028
         break;
1029
      }
1030
   }
1031
}
1032
#elif defined __APPLE__
1033
static void code_load_macho(code_blob_t *blob, const void *data, size_t size)
1034
{
1035
   const void *rptr = data;
1036

1037
   const struct mach_header_64 *fhdr = rptr;
1038
   rptr += sizeof(struct mach_header_64);
1039

1040
   if (fhdr->magic != MH_MAGIC_64)
1041
      fatal_trace("bad Mach-O magic %x", fhdr->magic);
1042

1043
   const struct segment_command_64 *seg = NULL;
1044
   const struct symtab_command *symtab = NULL;
1045

1046
   void **load_addr LOCAL = NULL;
1047

1048
   for (int i = 0; i < fhdr->ncmds; i++) {
1049
      const struct load_command *load = rptr;
1050
      switch (load->cmd) {
1051
      case LC_SEGMENT_64:
1052
         {
1053
            seg = rptr;
1054
            load_addr = xmalloc_array(seg->nsects, sizeof(void *));
1055

1056
            for (int j = 0; j < seg->nsects; j++) {
1057
               const struct section_64 *sec =
1058
                  (void *)seg + sizeof(struct segment_command_64)
1059
                  + j * sizeof(struct section_64);
1060
               code_blob_align(blob, 1 << sec->align);
1061
               load_addr[j] = blob->wptr;
1062
               DEBUG_ONLY(code_blob_printf(blob, "%s", sec->sectname));
1063
               code_blob_emit(blob, data + sec->offset, sec->size);
1064
            }
1065
         }
1066
         break;
1067
      case LC_SYMTAB:
1068
         symtab = rptr;
1069
         assert(symtab->cmdsize == sizeof(struct symtab_command));
1070
         break;
1071
      case LC_DATA_IN_CODE:
1072
      case LC_LINKER_OPTIMIZATION_HINT:
1073
      case LC_BUILD_VERSION:
1074
      case LC_DYSYMTAB:
1075
         break;
1076
      default:
1077
         warnf("unrecognised load command 0x%0x", load->cmd);
1078
      }
1079

1080
      rptr += load->cmdsize;
1081
   }
1082
   assert(rptr == data + sizeof(struct mach_header_64) + fhdr->sizeofcmds);
1083

1084
   if (blob->overflow)
1085
      return;   // Relocations might point outside of code span
1086

1087
   blob->veneers = blob->wptr;
1088

1089
   assert(seg != NULL);
1090
   assert(symtab != NULL);
1091

1092
   shash_t *external = load_acquire(&blob->span->owner->symbols);
1093

1094
   for (int i = 0; i < seg->nsects; i++) {
1095
      const struct section_64 *sec =
1096
         (void *)seg + sizeof(struct segment_command_64)
1097
         + i * sizeof(struct section_64);
1098

1099
      uint32_t addend = 0;
1100
      for (int j = 0; j < sec->nreloc; j++) {
1101
         const struct relocation_info *rel =
1102
            data + sec->reloff + j * sizeof(struct relocation_info);
1103
         const char *name = NULL;
1104
         void *ptr = NULL;
1105
         if (rel->r_extern) {
1106
            assert(rel->r_symbolnum < symtab->nsyms);
1107
            const struct nlist_64 *nl = data + symtab->symoff
1108
               + rel->r_symbolnum * sizeof(struct nlist_64);
1109
            name = data + symtab->stroff + nl->n_un.n_strx;
1110

1111
            if (nl->n_type & N_EXT) {
1112
               if (icmp(blob->span->name, name + 1))
1113
                  ptr = blob->span->base;
1114
               else if ((ptr = shash_get(external, name + 1)) == NULL)
1115
                  fatal_trace("failed to resolve symbol %s", name + 1);
1116
            }
1117
            else if (nl->n_sect != NO_SECT)
1118
               ptr = blob->span->base + nl->n_value;
1119
         }
1120
         else
1121
            ptr = blob->span->base;
1122

1123
         ptr += addend;
1124
         addend = 0;
1125

1126
         void *patch = load_addr[i] + rel->r_address;
1127
         assert((uint8_t *)patch >= blob->span->base);
1128
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
1129

1130
         switch (rel->r_type) {
1131
#ifdef ARCH_ARM64
1132
         case ARM64_RELOC_UNSIGNED:
1133
            assert(rel->r_length == 3);
1134
            *(void **)patch = ptr;
1135
            break;
1136
         case ARM64_RELOC_SUBTRACTOR:
1137
            break;   // What is this?
1138
         case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
1139
         case ARM64_RELOC_PAGEOFF12:
1140
            arm64_patch_page_offset21(blob, patch, ptr);
1141
            break;
1142
         case ARM64_RELOC_GOT_LOAD_PAGE21:
1143
         case ARM64_RELOC_PAGE21:
1144
            arm64_patch_page_base_rel21(patch, ptr);
1145
            break;
1146
         case ARM64_RELOC_BRANCH26:
1147
            {
1148
               void *veneer = code_emit_trampoline(blob, ptr);
1149
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
1150
               debug_reloc(blob, patch, "ARM64_RELOC_BRANCH26 %s PC%+"PRIiPTR,
1151
                           name, pcrel);
1152
               *(uint32_t *)patch &= ~0x3ffffff;
1153
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
1154
            }
1155
            break;
1156
         case ARM64_RELOC_ADDEND:
1157
            addend = rel->r_symbolnum;
1158
            break;
1159
#elif defined ARCH_X86_64
1160
         case X86_64_RELOC_UNSIGNED:
1161
            *(uint64_t *)patch += (uint64_t)ptr;
1162
            break;
1163
         case X86_64_RELOC_BRANCH:
1164
            *(uint32_t *)patch += (uint32_t)(ptr - patch - 4);
1165
            break;
1166
#endif
1167
         default:
1168
            blob->span->size = blob->wptr - blob->span->base;
1169
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
1170
            fatal_trace("cannot handle relocation type %d for symbol %s",
1171
                        rel->r_type, name);
1172
         }
1173
      }
1174
   }
1175

1176
   for (int i = 0; i < symtab->nsyms; i++) {
1177
      const struct nlist_64 *sym =
1178
         data + symtab->symoff + i * sizeof(struct nlist_64);
1179

1180
      if (sym->n_sect == NO_SECT || (sym->n_type & N_TYPE) != N_SECT)
1181
         continue;
1182

1183
      const char *name = data + symtab->stroff + sym->n_un.n_strx;
1184
      if (name[0] == '_' && icmp(blob->span->name, name + 1)) {
1185
         blob->span->entry = load_addr[sym->n_sect - 1] + sym->n_value;
1186
         break;
1187
      }
1188
   }
1189
}
1190
#elif !defined __MINGW32__
1191
static void code_load_elf(code_blob_t *blob, const void *data, size_t size)
15,878✔
1192
{
1193
   const Elf64_Ehdr *ehdr = data;
15,878✔
1194

1195
   if (ehdr->e_ident[EI_MAG0] != ELFMAG0
15,878✔
1196
       || ehdr->e_ident[EI_MAG1] != ELFMAG1
1197
       || ehdr->e_ident[EI_MAG2] != ELFMAG2
1198
       || ehdr->e_ident[EI_MAG3] != ELFMAG3)
15,878✔
1199
      fatal_trace("bad ELF magic");
1200
   else if (ehdr->e_shentsize != sizeof(Elf64_Shdr))
15,878✔
1201
      fatal_trace("bad section header size %d != %zu", ehdr->e_shentsize,
1202
                  sizeof(Elf64_Shdr));
1203

1204
   const Elf64_Shdr *strtab_hdr =
15,878✔
1205
      data + ehdr->e_shoff + ehdr->e_shstrndx * ehdr->e_shentsize;
15,878✔
1206
   const char *strtab = data + strtab_hdr->sh_offset;
15,878✔
1207

1208
   void **load_addr LOCAL = xcalloc_array(ehdr->e_shnum, sizeof(void *));
31,756✔
1209

1210
   for (int i = 0; i < ehdr->e_shnum; i++) {
142,697✔
1211
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
126,819✔
1212

1213
      switch (shdr->sh_type) {
126,819✔
1214
      case SHT_PROGBITS:
32,742✔
1215
         if (shdr->sh_flags & SHF_ALLOC) {
32,742✔
1216
            code_blob_align(blob, shdr->sh_addralign);
16,864✔
1217
            load_addr[i] = blob->wptr;
16,864✔
1218
            DEBUG_ONLY(code_blob_printf(blob, "%s", strtab + shdr->sh_name));
16,864✔
1219
            code_blob_emit(blob, data + shdr->sh_offset, shdr->sh_size);
16,864✔
1220
         }
1221
         break;
1222

1223
      case SHT_RELA:
1224
         // Handled in second pass
1225
         break;
1226

1227
      case SHT_NULL:
1228
      case SHT_STRTAB:
1229
      case SHT_X86_64_UNWIND:
1230
         break;
1231

1232
      case SHT_SYMTAB:
1233
         for (int i = 0; i < shdr->sh_size / shdr->sh_entsize; i++) {
64,498✔
1234
            const Elf64_Sym *sym =
64,498✔
1235
               data + shdr->sh_offset + i * shdr->sh_entsize;
64,498✔
1236

1237
            if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
64,498✔
1238
               continue;
48,620✔
1239
            else if (!icmp(blob->span->name, strtab + sym->st_name))
15,878✔
1240
               continue;
×
1241
            else if (load_addr[sym->st_shndx] == NULL)
15,878✔
1242
               fatal_trace("missing section %d for symbol %s", sym->st_shndx,
1243
                           strtab + sym->st_name);
×
1244
            else {
1245
               blob->span->entry = load_addr[sym->st_shndx] + sym->st_value;
15,878✔
1246
               break;
15,878✔
1247
            }
1248
         }
1249
         break;
1250

1251
      default:
×
1252
         warnf("ignoring ELF section %s with type %x", strtab + shdr->sh_name,
×
1253
               shdr->sh_type);
1254
      }
1255
   }
1256

1257
   if (blob->overflow)
15,878✔
1258
      return;   // Relocations might point outside of code span
×
1259

1260
   blob->veneers = blob->wptr;
15,878✔
1261

1262
   shash_t *external = load_acquire(&blob->span->owner->symbols);
15,878✔
1263

1264
   for (int i = 0; i < ehdr->e_shnum; i++) {
142,697✔
1265
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
126,819✔
1266
      if (shdr->sh_type != SHT_RELA)
126,819✔
1267
         continue;
96,254✔
1268

1269
      const Elf64_Shdr *mod =
30,565✔
1270
         data + ehdr->e_shoff + shdr->sh_info * ehdr->e_shentsize;
30,565✔
1271
      if (mod->sh_type != SHT_PROGBITS || !(mod->sh_flags & SHF_ALLOC))
30,565✔
1272
         continue;
15,878✔
1273
      else if (load_addr[shdr->sh_info] == NULL)
14,687✔
1274
         fatal_trace("section %s not loaded", strtab + mod->sh_name);
1275

1276
      const Elf64_Shdr *symtab =
14,687✔
1277
         data + ehdr->e_shoff + shdr->sh_link * ehdr->e_shentsize;
14,687✔
1278
      if (symtab->sh_type != SHT_SYMTAB)
14,687✔
1279
         fatal_trace("section %s is not a symbol table",
1280
                     strtab + symtab->sh_name);
×
1281

1282
      const Elf64_Rela *endp = data + shdr->sh_offset + shdr->sh_size;
14,687✔
1283
      for (const Elf64_Rela *r = data + shdr->sh_offset; r < endp; r++) {
91,942✔
1284
         const Elf64_Sym *sym = data + symtab->sh_offset
77,255✔
1285
            + ELF64_R_SYM(r->r_info) * symtab->sh_entsize;
77,255✔
1286

1287
         void *ptr = NULL;
77,255✔
1288
         switch (ELF64_ST_TYPE(sym->st_info)) {
77,255✔
1289
         case STT_NOTYPE:
55,361✔
1290
         case STT_FUNC:
1291
            if (sym->st_shndx == 0)
55,361✔
1292
               ptr = shash_get(external, strtab + sym->st_name);
55,020✔
1293
            else
1294
               ptr = load_addr[sym->st_shndx] + sym->st_value;
341✔
1295
            break;
1296
         case STT_SECTION:
21,894✔
1297
            ptr = load_addr[sym->st_shndx];
21,894✔
1298
            break;
21,894✔
1299
         default:
×
1300
            fatal_trace("cannot handle ELF symbol type %d",
1301
                        ELF64_ST_TYPE(sym->st_info));
1302
         }
1303

1304
         if (ptr == NULL)
77,255✔
1305
            fatal_trace("cannot resolve symbol %s type %d",
1306
                        strtab + sym->st_name, ELF64_ST_TYPE(sym->st_info));
×
1307

1308
         void *patch = load_addr[shdr->sh_info] + r->r_offset;
77,255✔
1309
         assert(r->r_offset < mod->sh_size);
77,255✔
1310

1311
         switch (ELF64_R_TYPE(r->r_info)) {
77,255✔
1312
         case R_X86_64_64:
77,255✔
1313
            debug_reloc(blob, patch, "R_X86_64_64 %s", strtab + sym->st_name);
77,255✔
1314
            *(uint64_t *)patch = (uint64_t)ptr + r->r_addend;
77,255✔
1315
            break;
77,255✔
1316
         case R_X86_64_PC32:
×
1317
            {
1318
               const ptrdiff_t pcrel = ptr + r->r_addend - patch;
×
1319
               debug_reloc(blob, patch, "R_X86_64_PC32 %s PC%+"PRIiPTR,
×
1320
                           strtab + sym->st_name, pcrel);
×
1321
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1322
               *(uint32_t *)patch = pcrel;
×
1323
            }
1324
            break;
×
1325
         case R_X86_64_GOTPCREL:
×
1326
            {
1327
               void *got = code_emit_got(blob, ptr);
×
1328
               const ptrdiff_t pcrel = got + r->r_addend - patch;
×
1329
               debug_reloc(blob, patch, "R_X86_64_GOTPCREL %s PC%+"PRIiPTR,
×
1330
                           strtab + sym->st_name, pcrel);
×
1331
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1332
               *(uint32_t *)patch = pcrel;
×
1333
            }
1334
            break;
×
1335
         case R_X86_64_PLT32:
×
1336
            {
1337
               void *veneer = code_emit_trampoline(blob, ptr);
×
1338
               const ptrdiff_t pcrel = veneer + r->r_addend - patch;
×
1339
               debug_reloc(blob, patch, "R_X86_64_PLT32 %s PC%+"PRIiPTR,
×
1340
                           strtab + sym->st_name, pcrel);
×
1341
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1342
               *(uint32_t *)patch = pcrel;
×
1343
            }
1344
            break;
×
1345
         case R_AARCH64_CALL26:
×
1346
            {
1347
               void *veneer = code_emit_trampoline(blob, ptr);
×
1348
               const ptrdiff_t pcrel = (veneer + r->r_addend - patch) >> 2;
×
1349
               *(uint32_t *)patch &= ~0x3ffffff;
×
1350
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
×
1351
            }
1352
            break;
×
1353
         case R_AARCH64_PREL64:
×
1354
            *(uint64_t *)patch = ptr + r->r_addend - patch;
×
1355
            break;
×
1356
         case R_AARCH64_MOVW_UABS_G0_NC:
×
1357
            *(uint32_t *)patch |=
×
1358
               (((uintptr_t)ptr + r->r_addend) & 0xffff) << 5;
×
1359
            break;
×
1360
         case R_AARCH64_MOVW_UABS_G1_NC:
×
1361
            *(uint32_t *)patch |=
×
1362
               ((((uintptr_t)ptr + r->r_addend) >> 16) & 0xffff) << 5;
×
1363
            break;
×
1364
         case R_AARCH64_MOVW_UABS_G2_NC:
×
1365
            *(uint32_t *)patch |=
×
1366
               ((((uintptr_t)ptr + r->r_addend) >> 32) & 0xffff) << 5;
×
1367
            break;
×
1368
         case R_AARCH64_MOVW_UABS_G3:
×
1369
            *(uint32_t *)patch |=
×
1370
               ((((uintptr_t)ptr + r->r_addend) >> 48) & 0xffff) << 5;
×
1371
            break;
×
1372
         default:
×
1373
            blob->span->size = blob->wptr - blob->span->base;
×
1374
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
×
1375
            fatal_trace("cannot handle relocation type %ld for symbol %s",
1376
                        ELF64_R_TYPE(r->r_info), strtab + sym->st_name);
×
1377
         }
1378
      }
1379
   }
1380
}
1381
#endif
1382

1383
void code_load_object(code_blob_t *blob, const void *data, size_t size)
15,878✔
1384
{
1385
#if defined __APPLE__
1386
   code_load_macho(blob, data, size);
1387
#elif defined __MINGW32__
1388
   code_load_pe(blob, data, size);
1389
#else
1390
   code_load_elf(blob, data, size);
15,878✔
1391
#endif
1392
}
15,878✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc