• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nickg / nvc / 20414172721

21 Dec 2025 06:41PM UTC coverage: 92.602% (+0.006%) from 92.596%
20414172721

push

github

nickg
Move all printf-related functions to a new file

357 of 413 new or added lines in 12 files covered. (86.44%)

3 existing lines in 3 files now uncovered.

75646 of 81689 relevant lines covered (92.6%)

463080.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.4
/src/jit/jit-code.c
1
//
2
//  Copyright (C) 2022-2024  Nick Gasson
3
//
4
//  This program is free software: you can redistribute it and/or modify
5
//  it under the terms of the GNU General Public License as published by
6
//  the Free Software Foundation, either version 3 of the License, or
7
//  (at your option) any later version.
8
//
9
//  This program is distributed in the hope that it will be useful,
10
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
//  GNU General Public License for more details.
13
//
14
//  You should have received a copy of the GNU General Public License
15
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

18
#include "util.h"
19
#include "cpustate.h"
20
#include "debug.h"
21
#include "hash.h"
22
#include "ident.h"
23
#include "jit/jit-priv.h"
24
#include "option.h"
25
#include "printf.h"
26
#include "thread.h"
27

28
#include <assert.h>
29
#include <math.h>
30
#include <stdlib.h>
31
#include <string.h>
32
#include <stdio.h>
33
#include <unistd.h>
34
#include <inttypes.h>
35

36
#if defined __MINGW32__
37
#include <winnt.h>
38
#elif defined __APPLE__
39
#include <mach-o/loader.h>
40
#include <mach-o/reloc.h>
41
#include <mach-o/nlist.h>
42
#include <mach-o/stab.h>
43
#include <mach-o/arm64/reloc.h>
44
#include <mach-o/x86_64/reloc.h>
45
#else
46
#include <elf.h>
47
#endif
48

49
#ifdef HAVE_CAPSTONE
50
#include <capstone.h>
51
#endif
52

53
#ifndef R_AARCH64_MOVW_UABS_G0_NC
54
#define R_AARCH64_MOVW_UABS_G0_NC 264
55
#endif
56

57
#ifndef R_AARCH64_MOVW_UABS_G1_NC
58
#define R_AARCH64_MOVW_UABS_G1_NC 266
59
#endif
60

61
#ifndef R_AARCH64_MOVW_UABS_G2_NC
62
#define R_AARCH64_MOVW_UABS_G2_NC 268
63
#endif
64

65
#ifndef R_AARCH64_MOVW_UABS_G3
66
#define R_AARCH64_MOVW_UABS_G3 269
67
#endif
68

69
#ifndef SHT_X86_64_UNWIND
70
#define SHT_X86_64_UNWIND 0x70000001
71
#endif
72

73
#ifndef IMAGE_REL_ARM64_BRANCH26
74
#define IMAGE_REL_ARM64_BRANCH26 0x03
75
#endif
76

77
#ifndef IMAGE_REL_ARM64_ADDR32NB
78
#define IMAGE_REL_ARM64_ADDR32NB 0x02
79
#endif
80

81
#ifndef IMAGE_REL_ARM64_PAGEBASE_REL21
82
#define IMAGE_REL_ARM64_PAGEBASE_REL21 0x04
83
#endif
84

85
#ifndef IMAGE_REL_ARM64_PAGEOFFSET_12A
86
#define IMAGE_REL_ARM64_PAGEOFFSET_12A 0x06
87
#endif
88

89
#ifndef IMAGE_REL_ARM64_PAGEOFFSET_12L
90
#define IMAGE_REL_ARM64_PAGEOFFSET_12L 0x07
91
#endif
92

93
#define CODE_PAGE_ALIGN   4096
94
#define CODE_PAGE_SIZE    0x400000
95
#define THREAD_CACHE_SIZE 0x10000
96
#define CODE_BLOB_ALIGN   256
97
#define MIN_BLOB_SIZE     0x4000
98

99
#define __IMM64(x) __IMM32(x), __IMM32((x) >> 32)
100
#define __IMM32(x) __IMM16(x), __IMM16((x) >> 16)
101
#define __IMM16(x) (x) & 0xff, ((x) >> 8) & 0xff
102

103
STATIC_ASSERT(MIN_BLOB_SIZE <= THREAD_CACHE_SIZE);
104
STATIC_ASSERT(MIN_BLOB_SIZE % CODE_BLOB_ALIGN == 0);
105
STATIC_ASSERT(CODE_PAGE_SIZE % THREAD_CACHE_SIZE == 0);
106

107
typedef struct _code_page code_page_t;
108

109
typedef struct {
110
   uintptr_t  addr;
111
   char      *text;
112
} code_comment_t;
113

114
typedef struct {
115
   unsigned        count;
116
   unsigned        max;
117
   code_comment_t *comments;
118
} code_debug_t;
119

120
typedef struct _code_span {
121
   code_cache_t *owner;
122
   code_span_t  *next;
123
   ident_t       name;
124
   uint8_t      *base;
125
   void         *entry;
126
   size_t        size;
127
#ifdef DEBUG
128
   code_debug_t  debug;
129
#endif
130
} code_span_t;
131

132
typedef struct _patch_list {
133
   patch_list_t    *next;
134
   uint8_t         *wptr;
135
   jit_label_t      label;
136
   code_patch_fn_t  fn;
137
} patch_list_t;
138

139
typedef struct _code_page {
140
   code_cache_t *owner;
141
   code_page_t  *next;
142
   uint8_t      *mem;
143
} code_page_t;
144

145
typedef struct _code_cache {
146
   nvc_lock_t   lock;
147
   code_page_t *pages;
148
   code_span_t *spans;
149
   code_span_t *freelist[MAX_THREADS];
150
   code_span_t *globalfree;
151
   shash_t     *symbols;
152
   FILE        *perfmap;
153
#ifdef HAVE_CAPSTONE
154
   csh          capstone;
155
#endif
156
#ifdef DEBUG
157
   size_t       used;
158
#endif
159
} code_cache_t;
160

161
static void code_disassemble(code_span_t *span, uintptr_t mark,
162
                             struct cpu_state *cpu);
163

164
static void code_cache_unwinder(uintptr_t addr, debug_frame_t *frame,
×
165
                                void *context)
166
{
167
   code_cache_t *code = context;
×
168

169
   const uint8_t *pc = (uint8_t *)addr;
×
170
   for (code_span_t *span = code->spans; span; span = span->next) {
×
171
      if (pc >= span->base && pc < span->base + span->size) {
×
172
         frame->kind = FRAME_VHDL;
×
173
         frame->disp = pc - span->base;
×
174
         frame->symbol = istr(span->name);
×
175
      }
176
   }
177
}
×
178

179
static void code_fault_handler(int sig, void *addr, struct cpu_state *cpu,
×
180
                               void *context)
181
{
182
   code_page_t *page = context;
×
183

184
   const uint8_t *pc = (uint8_t *)cpu->pc;
×
185
   if (pc < page->mem || pc > page->mem + CODE_PAGE_SIZE)
×
186
      return;
187

188
   uintptr_t mark = cpu->pc;
×
189
#ifndef __MINGW32__
190
   if (sig == SIGTRAP)
×
191
      mark--;   // Point to faulting instruction
×
192
#endif
193

194
   for (code_span_t *span = page->owner->spans; span; span = span->next) {
×
195
      if (pc >= span->base && pc < span->base + span->size && span->name)
×
196
         code_disassemble(span, mark, cpu);
×
197
   }
198
}
199

200
#ifdef DEBUG
201
static bool code_cache_contains(code_cache_t *code, uint8_t *base, size_t size)
15,259✔
202
{
203
   assert_lock_held(&code->lock);
15,259✔
204

205
   for (code_page_t *p = code->pages; p; p = p->next) {
15,259✔
206
      if (base >= p->mem && base + size <= p->mem + CODE_PAGE_SIZE)
15,259✔
207
         return true;
208
   }
209

210
   return false;
211
}
212
#endif
213

214
static code_span_t *code_span_new(code_cache_t *code, ident_t name,
15,259✔
215
                                  uint8_t *base, size_t size)
216
{
217
   SCOPED_LOCK(code->lock);
15,259✔
218

219
   assert(code_cache_contains(code, base, size));
15,259✔
220

221
   code_span_t *span = xcalloc(sizeof(code_span_t));
15,259✔
222
   span->name  = name;
15,259✔
223
   span->next  = code->spans;
15,259✔
224
   span->base  = base;
15,259✔
225
   span->entry = base;
15,259✔
226
   span->size  = size;
15,259✔
227
   span->owner = code;
15,259✔
228

229
   code->spans = span;
15,259✔
230
   return span;
15,259✔
231
}
232

233
static void code_page_new(code_cache_t *code)
2,798✔
234
{
235
   assert_lock_held(&code->lock);
2,798✔
236

237
   code_page_t *page = xcalloc(sizeof(code_page_t));
2,798✔
238
   page->owner = code;
2,798✔
239
   page->next  = code->pages;
2,798✔
240
   page->mem   = map_jit_pages(CODE_PAGE_ALIGN, CODE_PAGE_SIZE);
2,798✔
241

242
   add_fault_handler(code_fault_handler, page);
2,798✔
243
   debug_add_unwinder(page->mem, CODE_PAGE_SIZE, code_cache_unwinder, code);
2,798✔
244

245
   code->pages = page;
2,798✔
246

247
   code_span_t *span = xcalloc(sizeof(code_span_t));
2,798✔
248
   span->next  = code->spans;
2,798✔
249
   span->base  = page->mem;
2,798✔
250
   span->size  = CODE_PAGE_SIZE;
2,798✔
251
   span->owner = code;
2,798✔
252

253
   code->globalfree = code->spans = span;
2,798✔
254
}
2,798✔
255

256
code_cache_t *code_cache_new(void)
2,792✔
257
{
258
   code_cache_t *code = xcalloc(sizeof(code_cache_t));
2,792✔
259

260
   {
261
      SCOPED_LOCK(code->lock);
5,584✔
262
      code_page_new(code);
2,792✔
263
   }
264

265
#ifdef HAVE_CAPSTONE
266
#if defined ARCH_X86_64
267
   if (cs_open(CS_ARCH_X86, CS_MODE_64, &(code->capstone)) != CS_ERR_OK)
268
      fatal_trace("failed to init capstone for x86_64");
269
#elif defined ARCH_ARM64
270
   if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &(code->capstone)) != CS_ERR_OK)
271
      fatal_trace("failed to init capstone for Arm64");
272
#else
273
#error Cannot configure capstone for this architecture
274
#endif
275

276
   if (cs_option(code->capstone, CS_OPT_DETAIL, 1) != CS_ERR_OK)
277
      fatal_trace("failed to set capstone detailed mode");
278
#endif
279

280
   shash_t *s = shash_new(32);
2,792✔
281

282
   extern void __nvc_putpriv(jit_handle_t, void *);
2,792✔
283
   extern void __nvc_sched_waveform(jit_anchor_t *, jit_scalar_t *, tlab_t *);
2,792✔
284
   extern void __nvc_sched_process(jit_anchor_t *, jit_scalar_t *, tlab_t *);
2,792✔
285
   extern void __nvc_test_event(jit_anchor_t *, jit_scalar_t *, tlab_t *);
2,792✔
286
   extern void __nvc_last_event(jit_anchor_t *, jit_scalar_t *, tlab_t *);
2,792✔
287

288
   shash_put(s, "__nvc_sched_waveform", &__nvc_sched_waveform);
2,792✔
289
   shash_put(s, "__nvc_sched_process", &__nvc_sched_process);
2,792✔
290
   shash_put(s, "__nvc_test_event", &__nvc_test_event);
2,792✔
291
   shash_put(s, "__nvc_last_event", &__nvc_last_event);
2,792✔
292
   shash_put(s, "__nvc_mspace_alloc", &__nvc_mspace_alloc);
2,792✔
293
   shash_put(s, "__nvc_putpriv", &__nvc_putpriv);
2,792✔
294
   shash_put(s, "__nvc_do_exit", &__nvc_do_exit);
2,792✔
295
   shash_put(s, "__nvc_pack", &__nvc_pack);
2,792✔
296
   shash_put(s, "__nvc_unpack", &__nvc_unpack);
2,792✔
297
   shash_put(s, "__nvc_vec4op", &__nvc_vec4op);
2,792✔
298
   shash_put(s, "memmove", &memmove);
2,792✔
299
   shash_put(s, "memcpy", &memcpy);
2,792✔
300
   shash_put(s, "memset", &memset);
2,792✔
301
   shash_put(s, "pow", &pow);
2,792✔
302
   shash_put(s, "ldexp", &ldexp);
2,792✔
303
   shash_put(s, "exp2", &exp2);
2,792✔
304

305
#if defined __APPLE__ && defined ARCH_ARM64
306
   shash_put(s, "bzero", &bzero);
307
#elif defined __APPLE__ && defined ARCH_X86_64
308
   shash_put(s, "__bzero", &bzero);
309
#elif defined __MINGW32__ && defined ARCH_X86_64
310
   extern void ___chkstk_ms(void);
311
   shash_put(s, "___chkstk_ms", &___chkstk_ms);
312
#endif
313

314
   store_release(&code->symbols, s);
2,792✔
315

316
   return code;
2,792✔
317
}
318

319
void code_cache_free(code_cache_t *code)
2,786✔
320
{
321
   for (code_page_t *it = code->pages, *tmp; it; it = tmp) {
5,578✔
322
      debug_remove_unwinder(it->mem);
2,792✔
323
      remove_fault_handler(code_fault_handler, it);
2,792✔
324

325
      nvc_munmap(it->mem, CODE_PAGE_SIZE);
2,792✔
326

327
      tmp = it->next;
2,792✔
328
      free(it);
2,792✔
329
   }
330

331
   for (code_span_t *it = code->spans, *tmp; it; it = tmp) {
20,829✔
332
      tmp = it->next;
18,043✔
333
      DEBUG_ONLY(free(it->debug.comments));
18,043✔
334
      free(it);
18,043✔
335
   }
336

337
#ifdef HAVE_CAPSTONE
338
   cs_close(&(code->capstone));
339
#endif
340

341
#ifdef DEBUG
342
   if (code->used > 0)
2,786✔
343
      debugf("JIT code footprint: %zu bytes", code->used);
1,418✔
344
#endif
345

346
   shash_free(code->symbols);
2,786✔
347
   free(code);
2,786✔
348
}
2,786✔
349

350
#ifdef HAVE_CAPSTONE
351
static int code_print_spaces(int col, int tab)
352
{
353
   for (; col < tab; col++)
354
      fputc(' ', stdout);
355
   return col;
356
}
357
#endif
358

359
#if defined DEBUG && HAVE_CAPSTONE
360
static int code_comment_compare(const void *a, const void *b)
361
{
362
   const code_comment_t *ca = a;
363
   const code_comment_t *cb = b;
364

365
   if (ca->addr < cb->addr)
366
      return -1;
367
   else if (ca->addr > cb->addr)
368
      return 1;
369
   else
370
      return 0;
371
}
372
#endif
373

374
static void code_disassemble(code_span_t *span, uintptr_t mark,
×
375
                             struct cpu_state *cpu)
376
{
377
   SCOPED_LOCK(span->owner->lock);
×
378

379
   printf("--");
×
380

381
   const int namelen = ident_len(span->name);
×
382
   for (int i = 0; i < 72 - namelen; i++)
×
383
      fputc('-', stdout);
×
384

385
   printf(" %s ----\n", istr(span->name));
×
386

387
#ifdef HAVE_CAPSTONE
388
   cs_insn *insn = cs_malloc(span->owner->capstone);
389

390
#ifdef DEBUG
391
   qsort(span->debug.comments, span->debug.count, sizeof(code_comment_t),
392
         code_comment_compare);
393
   code_comment_t *comment = span->debug.comments;
394
#endif
395

396
   const uint8_t *const eptr = span->base + span->size;
397
   for (const uint8_t *ptr = span->base; ptr < eptr; ) {
398
      uint64_t address = (uint64_t)ptr;
399

400
#ifdef DEBUG
401
      for (; comment < span->debug.comments + span->debug.count
402
              && comment->addr <= address; comment++)
403
         printf("%30s;; %s\n", "", comment->text);
404
#endif
405

406
      int zeros = 0;
407
      for (const uint8_t *zp = ptr; zp < eptr && *zp == 0; zp++, zeros++);
408

409
      if (zeros > 8 || zeros == eptr - ptr) {
410
         printf("%30s;; skipping %d zero bytes\n", "", zeros);
411
         ptr += zeros;
412
         continue;
413
      }
414

415
      size_t size = eptr - ptr;
416
      int col = 0;
417
      if (cs_disasm_iter(span->owner->capstone, &ptr, &size, &address, insn)) {
418
         char hex1[33], *p = hex1;
419
         for (size_t k = 0; k < insn->size; k++)
420
            p += checked_sprintf(p, hex1 + sizeof(hex1) - p, "%02x",
421
                                 insn->bytes[k]);
422

423
         col = printf("%-12" PRIx64 " %-16.16s %s %s", insn->address,
424
                          hex1, insn->mnemonic, insn->op_str);
425

426
#ifdef ARCH_X86_64
427
         if (strcmp(insn->mnemonic, "movabs") == 0) {
428
            const cs_x86_op *src = &(insn->detail->x86.operands[1]);
429
            if (src->type == X86_OP_IMM) {
430
               const char *sym = debug_symbol_name((void *)src->imm);
431
               if (sym != NULL) {
432
                  col = code_print_spaces(col, 60);
433
                  col += printf(" ; %s", sym);
434
               }
435
            }
436
         }
437
#endif
438

439
         if (strlen(hex1) > 16)
440
            col = printf("\n%15s -%-16s", "", hex1 + 16) - 1;
441
      }
442
      else {
443
#ifdef ARCH_ARM64
444
         col = printf("%-12" PRIx64 " %-16.08x %s 0x%08x", (uint64_t)ptr,
445
                      *(uint32_t *)ptr, ".word", *(uint32_t *)ptr);
446
         ptr += 4;
447
#else
448
         col = printf("%-12" PRIx64 " %-16.02x %s 0x%02x", (uint64_t)ptr,
449
                      *ptr, ".byte", *ptr);
450
         ptr++;
451
#endif
452
      }
453

454
      if (mark != 0 && (ptr >= eptr || address > mark)) {
455
         col = code_print_spaces(col, 66);
456
         printf("<=============\n");
457
         if (cpu != NULL) {
458
#ifdef ARCH_X86_64
459
            const char *names[] = {
460
               "RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI",
461
               "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"
462
            };
463
            for (int i = 0; i < ARRAY_LEN(names); i++)
464
               printf("\t%s\t%"PRIxPTR"\n", names[i], cpu->regs[i]);
465
#else
466
            for (int i = 0; i < 32; i++)
467
               printf("\tR%d\t%"PRIxPTR"\n", i, cpu->regs[i]);
468
#endif
469
         }
470
         mark = 0;
471
      }
472
      else
473
         printf("\n");
474
   }
475

476
   cs_free(insn, 1);
477
#else
478
   jit_hexdump(span->base, span->size, 16, (void *)mark, "");
×
479
#endif
480

481
   for (int i = 0; i < 80; i++)
×
482
      fputc('-', stdout);
×
483
   printf("\n");
×
484
   fflush(stdout);
×
485
}
×
486

487
static void code_write_perf_map(code_span_t *span)
×
488
{
489
   SCOPED_LOCK(span->owner->lock);
×
490

491
   if (span->owner->perfmap == NULL) {
×
492
      char *fname LOCAL = xasprintf("/tmp/perf-%d.map", getpid());
×
493
      if ((span->owner->perfmap = fopen(fname, "w")) == NULL) {
×
494
         warnf("cannot create %s: %s", fname, last_os_error());
×
495
         opt_set_int(OPT_PERF_MAP, 0);
×
496
         return;
×
497
      }
498
      else
499
         debugf("writing perf map to %s", fname);
×
500
   }
501

502
   fprintf(span->owner->perfmap, "%p 0x%zx %s\n", span->base, span->size,
×
503
           istr(span->name));
504
   fflush(span->owner->perfmap);
×
505
}
506

507
code_blob_t *code_blob_new(code_cache_t *code, ident_t name, size_t hint)
13,805✔
508
{
509
   code_span_t **freeptr = &(code->freelist[thread_id()]);
13,805✔
510

511
   code_span_t *free = relaxed_load(freeptr);
13,805✔
512
   if (free == NULL) {
13,805✔
513
      free = code_span_new(code, NULL, code->pages->mem, 0);
1,454✔
514
      relaxed_store(freeptr, free);
1,454✔
515
   }
516

517
   const size_t reqsz = hint ?: MIN_BLOB_SIZE;
13,805✔
518

519
   if (free->size < reqsz) {
13,805✔
520
      SCOPED_LOCK(code->lock);
1,532✔
521

522
#ifdef DEBUG
523
      if (free->size > 0)
1,532✔
524
         debugf("thread %d needs new code cache from global free list "
33✔
525
                "(requested %zu bytes, wasted %zu bytes)",
526
                thread_id(), reqsz, free->size);
527
#endif
528

529
      const size_t chunksz = MAX(reqsz, THREAD_CACHE_SIZE);
1,532✔
530
      const size_t alignedsz = ALIGN_UP(chunksz, CODE_BLOB_ALIGN);
1,532✔
531

532
      if (alignedsz > code->globalfree->size) {
1,532✔
533
         DEBUG_ONLY(debugf("requesting new %d byte code page", CODE_PAGE_SIZE));
6✔
534
         code_page_new(code);
6✔
535
         assert(code->globalfree->size == CODE_PAGE_SIZE);
6✔
536
      }
537

538
      const size_t take = MIN(code->globalfree->size, alignedsz);
1,532✔
539

540
      free->size = take;
1,532✔
541
      free->base = code->globalfree->base;
1,532✔
542

543
      code->globalfree->base += take;
1,532✔
544
      code->globalfree->size -= take;
1,532✔
545
   }
546

547
   assert(reqsz <= free->size);
13,805✔
548
   assert(((uintptr_t)free->base & (CODE_BLOB_ALIGN - 1)) == 0);
13,805✔
549

550
   code_span_t *span = code_span_new(code, name, free->base, free->size);
13,805✔
551

552
   free->base += span->size;
13,805✔
553
   free->size -= span->size;
13,805✔
554

555
   code_blob_t *blob = xcalloc(sizeof(code_blob_t));
13,805✔
556
   blob->span = span;
13,805✔
557
   blob->wptr = span->base;
13,805✔
558

559
   thread_wx_mode(WX_WRITE);
13,805✔
560

561
   return blob;
13,805✔
562
}
563

564
void code_blob_finalise(code_blob_t *blob, jit_entry_fn_t *entry)
13,805✔
565
{
566
   code_span_t *span = blob->span;
13,805✔
567
   span->size = blob->wptr - span->base;
13,805✔
568

569
   code_span_t *freespan = relaxed_load(&(span->owner->freelist[thread_id()]));
13,805✔
570
   assert(freespan->size == 0);
13,805✔
571

572
   ihash_free(blob->labels);
13,805✔
573
   blob->labels = NULL;
13,805✔
574

575
   if (unlikely(blob->patches != NULL))
13,805✔
576
      fatal_trace("not all labels in %s were patched", istr(span->name));
577
   else if (unlikely(blob->overflow)) {
13,805✔
578
      // Return all the memory
579
      freespan->size = freespan->base - span->base;
1✔
580
      freespan->base = span->base;
1✔
581
      free(blob);
1✔
582
      return;
1✔
583
   }
584
   else if (span->size == 0)
13,804✔
585
      fatal_trace("code span %s is empty", istr(span->name));
586

587
   uint8_t *aligned = ALIGN_UP(blob->wptr, CODE_BLOB_ALIGN);
13,804✔
588
   freespan->size = freespan->base - aligned;
13,804✔
589
   freespan->base = aligned;
13,804✔
590

591
   if (opt_get_verbose(OPT_ASM_VERBOSE, istr(span->name))) {
13,804✔
NEW
592
      nvc_printf("\n$bold$$blue$");
×
593
      code_disassemble(span, 0, NULL);
×
NEW
594
      nvc_printf("$$\n");
×
595
   }
596

597
   __builtin___clear_cache((char *)span->base, (char *)blob->wptr);
13,804✔
598

599
   thread_wx_mode(WX_EXECUTE);
13,804✔
600

601
   store_release(entry, (jit_entry_fn_t)span->entry);
13,804✔
602

603
   DEBUG_ONLY(relaxed_add(&span->owner->used, span->size));
13,804✔
604
   free(blob);
13,804✔
605

606
   if (opt_get_int(OPT_PERF_MAP))
13,804✔
607
      code_write_perf_map(span);
×
608
}
609

610
__attribute__((cold, noinline))
611
static void code_blob_overflow(code_blob_t *blob)
1✔
612
{
613
   warnf("JIT code buffer for %s too small", istr(blob->span->name));
1✔
614
   for (patch_list_t *it = blob->patches, *tmp; it; it = tmp) {
1✔
615
      tmp = it->next;
×
616
      free(it);
×
617
   }
618
   blob->patches = NULL;
1✔
619
   blob->overflow = true;
1✔
620
}
1✔
621

622
void code_blob_emit(code_blob_t *blob, const uint8_t *bytes, size_t len)
24,423✔
623
{
624
   if (unlikely(blob->overflow))
24,423✔
625
      return;
626
   else if (unlikely(blob->wptr + len > blob->span->base + blob->span->size)) {
24,423✔
627
      code_blob_overflow(blob);
1✔
628
      return;
1✔
629
   }
630

631
   memcpy(blob->wptr, bytes, len);
24,422✔
632
   blob->wptr += len;
24,422✔
633
}
634

635
void code_blob_align(code_blob_t *blob, unsigned align)
14,356✔
636
{
637
#ifdef ARCH_X86_64
638
   const uint8_t pad[] = { 0x90 };
14,356✔
639
#else
640
   const uint8_t pad[] = { 0x00 };
641
#endif
642

643
   assert(is_power_of_2(align));
14,356✔
644
   assert(align % ARRAY_LEN(pad) == 0);
645

646
   while (((uintptr_t)blob->wptr & (align - 1)) && !blob->overflow)
19,207✔
647
      code_blob_emit(blob, pad, ARRAY_LEN(pad));
4,851✔
648
}
14,356✔
649

650
void code_blob_mark(code_blob_t *blob, jit_label_t label)
71✔
651
{
652
   if (unlikely(blob->overflow))
71✔
653
      return;
654
   else if (blob->labels == NULL)
71✔
655
      blob->labels = ihash_new(256);
66✔
656

657
   ihash_put(blob->labels, label, blob->wptr);
71✔
658

659
   for (patch_list_t **p = &(blob->patches); *p; ) {
88✔
660
      if ((*p)->label == label) {
17✔
661
         patch_list_t *next = (*p)->next;
7✔
662
         (*(*p)->fn)(blob, label, (*p)->wptr, blob->wptr);
7✔
663
         free(*p);
7✔
664
         *p = next;
7✔
665
      }
666
      else
667
         p = &((*p)->next);
10✔
668
   }
669
}
670

671
void code_blob_patch(code_blob_t *blob, jit_label_t label, code_patch_fn_t fn)
8✔
672
{
673
   void *ptr = NULL;
8✔
674
   if (unlikely(blob->overflow))
8✔
675
      return;
676
   else if (blob->labels != NULL && (ptr = ihash_get(blob->labels, label)))
8✔
677
      (*fn)(blob, label, blob->wptr, ptr);
1✔
678
   else {
679
      patch_list_t *new = xmalloc(sizeof(patch_list_t));
7✔
680
      new->next  = blob->patches;
7✔
681
      new->fn    = fn;
7✔
682
      new->label = label;
7✔
683
      new->wptr  = blob->wptr;
7✔
684

685
      blob->patches = new;
7✔
686
   }
687
}
688

689
#ifdef DEBUG
690
static void code_blob_print_value(text_buf_t *tb, jit_value_t value)
392✔
691
{
692
   switch (value.kind) {
392✔
693
   case JIT_VALUE_REG:
162✔
694
      tb_printf(tb, "R%d", value.reg);
162✔
695
      break;
162✔
696
   case JIT_VALUE_INT64:
203✔
697
      if (value.int64 < 4096)
203✔
698
         tb_printf(tb, "#%"PRIi64, value.int64);
199✔
699
      else
700
         tb_printf(tb, "#0x%"PRIx64, value.int64);
4✔
701
      break;
702
   case JIT_VALUE_DOUBLE:
1✔
703
      tb_printf(tb, "%%%g", value.dval);
1✔
704
      break;
1✔
705
   case JIT_ADDR_CPOOL:
×
706
      tb_printf(tb, "[CP+%"PRIi64"]", value.int64);
×
707
      break;
×
708
   case JIT_ADDR_REG:
19✔
709
      tb_printf(tb, "[R%d", value.reg);
19✔
710
      if (value.disp != 0)
19✔
711
         tb_printf(tb, "+%d", value.disp);
1✔
712
      tb_cat(tb, "]");
19✔
713
      break;
19✔
714
   case JIT_ADDR_ABS:
×
715
      tb_printf(tb, "[#%016"PRIx64"]", value.int64);
×
716
      break;
×
717
   case JIT_ADDR_COVER:
×
718
      tb_printf(tb, "@%"PRIi64, value.int64);
×
719
      break;
×
720
   case JIT_VALUE_LABEL:
5✔
721
      tb_printf(tb, "%d", value.label);
5✔
722
      break;
5✔
723
   case JIT_VALUE_HANDLE:
2✔
724
      tb_printf(tb, "<%d>", value.handle);
2✔
725
      break;
2✔
726
   case JIT_VALUE_EXIT:
×
727
      tb_printf(tb, "%s", jit_exit_name(value.exit));
×
728
      break;
×
729
   case JIT_VALUE_LOC:
×
730
      tb_printf(tb, "<%s:%d>", loc_file_str(&value.loc), value.loc.first_line);
×
731
      break;
×
732
   case JIT_VALUE_LOCUS:
×
733
      tb_printf(tb, "%p", value.locus);
×
734
      break;
×
735
   case JIT_VALUE_VPOS:
×
736
      tb_printf(tb, "%u:%u", value.vpos.block, value.vpos.op);
×
737
      break;
×
738
   default:
×
739
      tb_cat(tb, "???");
×
740
   }
741
}
392✔
742

743
static void code_blob_add_comment(code_blob_t *blob, uintptr_t addr, char *text)
78,186✔
744
{
745
   code_debug_t *dbg = &(blob->span->debug);
78,186✔
746

747
   if (dbg->count == dbg->max) {
78,186✔
748
      dbg->max = MAX(128, dbg->max * 2);
13,627✔
749
      dbg->comments = xrealloc_array(dbg->comments, dbg->max,
13,627✔
750
                                     sizeof(code_comment_t));
751
   }
752

753
   dbg->comments[dbg->count].addr = addr;
78,186✔
754
   dbg->comments[dbg->count].text = text;
78,186✔
755
   dbg->count++;
78,186✔
756
}
78,186✔
757

758
void code_blob_print_ir(code_blob_t *blob, jit_ir_t *ir)
348✔
759
{
760
   LOCAL_TEXT_BUF tb = tb_new();
696✔
761
   tb_printf(tb, "%s%s", jit_op_name(ir->op), jit_cc_name(ir->cc));
348✔
762

763
   if (ir->size != JIT_SZ_UNSPEC)
348✔
764
      tb_printf(tb, ".%d", 1 << (3 + ir->size));
36✔
765

766
   tb_printf(tb, "%*.s", (int)MAX(0, 10 - tb_len(tb)), "");
348✔
767

768
   if (ir->result != JIT_REG_INVALID)
348✔
769
      tb_printf(tb, "R%d", ir->result);
203✔
770

771
   if (ir->arg1.kind != JIT_VALUE_INVALID) {
348✔
772
      if (ir->result != JIT_REG_INVALID)
263✔
773
         tb_cat(tb, ", ");
187✔
774
      code_blob_print_value(tb, ir->arg1);
263✔
775
   }
776

777
   if (ir->arg2.kind != JIT_VALUE_INVALID) {
348✔
778
      tb_cat(tb, ", ");
129✔
779
      code_blob_print_value(tb, ir->arg2);
129✔
780
   }
781

782
   code_blob_add_comment(blob, (uintptr_t)blob->wptr, tb_claim(tb));
348✔
783
}
348✔
784

785
void code_blob_printf(code_blob_t *blob, const char *fmt, ...)
14,356✔
786
{
787
   va_list ap;
14,356✔
788
   va_start(ap, fmt);
14,356✔
789

790
   char *text = xvasprintf(fmt, ap);
14,356✔
791
   code_blob_add_comment(blob, (uintptr_t)blob->wptr, text);
14,356✔
792

793
   va_end(ap);
14,356✔
794
}
14,356✔
795

796
__attribute__((format(printf, 3, 4)))
797
static void debug_reloc(code_blob_t *blob, void *patch, const char *fmt, ...)
63,482✔
798
{
799
   va_list ap;
63,482✔
800
   va_start(ap, fmt);
63,482✔
801

802
   char *text = xvasprintf(fmt, ap);
63,482✔
803
   code_blob_add_comment(blob, (uintptr_t)patch, text);
63,482✔
804

805
   va_end(ap);
63,482✔
806
}
63,482✔
807
#else
808
#define debug_reloc(...)
809
#endif   // DEBUG
810

811
#ifdef ARCH_ARM64
812
static void arm64_patch_page_offset21(code_blob_t *blob, uint32_t *patch,
813
                                      void *ptr)
814
{
815
   switch ((*patch >> 23) & 0x7f) {
816
   case 0b1111010:   // LDR (immediate, SIMD&FP)
817
   case 0b1110010:   // LDR (immediate)
818
      assert(*patch & (1 << 30));  // Quadword
819
      assert(((uintptr_t)ptr & 7) == 0);
820
      *patch |= (((uintptr_t)ptr & 0xfff) >> 3) << 10;
821
      break;
822
   case 0b0100010:   // ADD (immediate)
823
      *patch |= ((uintptr_t)ptr & 0xfff) << 10;
824
      break;
825
   default:
826
      blob->span->size = blob->wptr - blob->span->base;
827
      code_disassemble(blob->span, (uintptr_t)patch, NULL);
828
      fatal_trace("cannot patch instruction");
829
   }
830
}
831

832
static void arm64_patch_page_base_rel21(uint32_t *patch, void *ptr)
833
{
834
   const intptr_t dst_page = (intptr_t)ptr & ~UINT64_C(0xfff);
835
   const intptr_t src_page = (intptr_t)patch & ~UINT64_C(0xfff);
836
   const intptr_t upper21 = (dst_page - src_page) >> 12;
837
   assert(upper21 >= -(1 << 20) && upper21 < (1 << 20));
838
   *patch &= ~((0x3 << 29) | (0x7ffff << 5));
839
   *patch |= (upper21 & 3) << 29;
840
   *patch |= ((upper21 >> 2) & 0x7ffff) << 5;
841
}
842
#endif
843

844
static void *code_emit_trampoline(code_blob_t *blob, void *dest)
×
845
{
846
#if defined ARCH_X86_64
847
   const uint8_t veneer[] = {
×
848
      0x48, 0xb8, __IMM64((uintptr_t)dest),  // MOVABS RAX, dest
×
849
      0xff, 0xe0                             // CALL RAX
850
   };
851
#elif defined ARCH_ARM64
852
   const uint8_t veneer[] = {
853
      0x50, 0x00, 0x00, 0x58,   // LDR X16, [PC+8]
854
      0x00, 0x02, 0x1f, 0xd6,   // BR X16
855
      __IMM64((uintptr_t)dest)
856
   };
857
#else
858
   should_not_reach_here();
859
#endif
860

861
   void *prev = memmem(blob->veneers, blob->wptr - blob->veneers,
×
862
                       veneer, ARRAY_LEN(veneer));
863
   if (prev != NULL)
×
864
      return prev;
865
   else {
866
      DEBUG_ONLY(code_blob_printf(blob, "Trampoline for %p", dest));
×
867

868
      void *addr = blob->wptr;
×
869
      code_blob_emit(blob, veneer, ARRAY_LEN(veneer));
×
870
      return addr;
×
871
   }
872
}
873

874
#if !defined __MINGW32__ && !defined __APPLE__
875
static void *code_emit_got(code_blob_t *blob, void *dest)
×
876
{
877
   const uint8_t data[] = { __IMM64((uintptr_t)dest) };
×
878

879
   void *prev = memmem(blob->veneers, blob->veneers - blob->wptr,
×
880
                       data, ARRAY_LEN(data));
881
   if (prev != NULL)
×
882
      return prev;
883
   else {
884
      DEBUG_ONLY(code_blob_printf(blob, "GOT entry for %p", dest));
×
885

886
      void *addr = blob->wptr;
×
887
      code_blob_emit(blob, data, ARRAY_LEN(data));
×
888
      return addr;
×
889
   }
890
}
891
#endif
892

893
#if defined __MINGW32__
894
static void code_load_pe(code_blob_t *blob, const void *data, size_t size)
895
{
896
   const IMAGE_FILE_HEADER *imghdr = data;
897

898
   switch (imghdr->Machine) {
899
   case IMAGE_FILE_MACHINE_AMD64:
900
   case IMAGE_FILE_MACHINE_ARM64:
901
      break;
902
   default:
903
      fatal_trace("unknown target machine %x", imghdr->Machine);
904
   }
905

906
   const IMAGE_SYMBOL *symtab = data + imghdr->PointerToSymbolTable;
907
   const char *strtab = data + imghdr->PointerToSymbolTable
908
      + imghdr->NumberOfSymbols * sizeof(IMAGE_SYMBOL);
909

910
   const IMAGE_SECTION_HEADER *sections =
911
      data + IMAGE_SIZEOF_FILE_HEADER + imghdr->SizeOfOptionalHeader;
912

913
   void **load_addr LOCAL =
914
      xmalloc_array(imghdr->NumberOfSections, sizeof(void *));
915

916
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
917
      if ((sections[i].Characteristics & IMAGE_SCN_CNT_CODE)
918
          || (sections[i].Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)) {
919
         const int align = sections[i].Characteristics & IMAGE_SCN_ALIGN_MASK;
920
         code_blob_align(blob, 1 << ((align >> 20) - 1));
921
         load_addr[i] = blob->wptr;
922
         code_blob_emit(blob, data + sections[i].PointerToRawData,
923
                        sections[i].SizeOfRawData);
924
      }
925
      else if ((sections[i].Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
926
               && sections[i].Misc.VirtualSize > 0)
927
         fatal_trace("non-empty BSS not supported");
928
   }
929

930
   if (blob->overflow)
931
      return;   // Relocations might point outside of code span
932

933
   blob->veneers = blob->wptr;
934

935
   shash_t *external = load_acquire(&blob->span->owner->symbols);
936

937
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
938
      const IMAGE_RELOCATION *relocs = data + sections[i].PointerToRelocations;
939
      for (int j = 0; j < sections[i].NumberOfRelocations; j++) {
940
         const char *name = NULL;
941
         char tmp[9];
942

943
         assert(relocs[j].SymbolTableIndex < imghdr->NumberOfSymbols);
944
         const IMAGE_SYMBOL *sym = symtab + relocs[j].SymbolTableIndex;
945

946
         if (sym->N.Name.Short) {
947
            memcpy(tmp, sym->N.ShortName, 8);
948
            tmp[8] = '\0';
949
            name = tmp;
950
         }
951
         else
952
            name = strtab + sym->N.Name.Long;
953

954
         void *ptr = NULL;
955
         if (sym->SectionNumber > 0) {
956
            assert(sym->SectionNumber - 1 < imghdr->NumberOfSections);
957
            ptr = load_addr[sym->SectionNumber - 1] + sym->Value;
958
         }
959
         else
960
            ptr = shash_get(external, name);
961

962
         if (ptr == NULL && icmp(blob->span->name, name))
963
            ptr = blob->span->base;
964

965
         if (ptr == NULL)
966
            fatal_trace("failed to resolve symbol %s", name);
967

968
         void *patch = load_addr[i] + relocs[j].VirtualAddress;
969
         assert((uint8_t *)patch >= blob->span->base);
970
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
971

972
         switch (relocs[j].Type) {
973
#if defined ARCH_X86_64
974
         case IMAGE_REL_AMD64_ADDR64:
975
            *(uint64_t *)patch += (uint64_t)ptr;
976
            break;
977
         case IMAGE_REL_AMD64_ADDR32NB:
978
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
979
            break;
980
#elif defined ARCH_ARM64
981
         case IMAGE_REL_ARM64_BRANCH26:
982
            {
983
               void *veneer = code_emit_trampoline(blob, ptr);
984
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
985
               *(uint32_t *)patch &= ~0x3ffffff;
986
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
987
            }
988
            break;
989
         case IMAGE_REL_ARM64_ADDR32NB:
990
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
991
            break;
992
         case IMAGE_REL_ARM64_PAGEBASE_REL21:
993
            arm64_patch_page_base_rel21(patch, ptr);
994
            break;
995
         case IMAGE_REL_ARM64_PAGEOFFSET_12A:
996
         case IMAGE_REL_ARM64_PAGEOFFSET_12L:
997
            arm64_patch_page_offset21(blob, patch, ptr);
998
            break;
999
#endif
1000
         default:
1001
            blob->span->size = blob->wptr - blob->span->base;
1002
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
1003
            fatal_trace("cannot handle relocation type %d for symbol %s",
1004
                        relocs[j].Type, name);
1005
         }
1006
      }
1007

1008
      if (strncmp((const char *)sections[i].Name, ".pdata",
1009
                  IMAGE_SIZEOF_SHORT_NAME) == 0) {
1010
         assert(sections[i].SizeOfRawData % sizeof(RUNTIME_FUNCTION) == 0);
1011
         const int count = sections[i].SizeOfRawData / sizeof(RUNTIME_FUNCTION);
1012
         const DWORD64 base = (DWORD64)blob->span->base;
1013

1014
         // TODO: we should also call RtlDeleteFunctionTable at some point
1015
         if (!RtlAddFunctionTable(load_addr[i], count, base))
1016
            fatal_trace("RtlAddFunctionTable failed: %s", last_os_error());
1017
      }
1018
   }
1019

1020
   for (int i = 0; i < imghdr->NumberOfSymbols; i++) {
1021
      const IMAGE_SYMBOL *sym = &(symtab[i]);
1022

1023
      if (sym->SectionNumber == 0 || sym->N.Name.Short)
1024
         continue;
1025
      else if ((sym->Type >> 4) != IMAGE_SYM_DTYPE_FUNCTION)
1026
         continue;
1027
      else if (icmp(blob->span->name, strtab + sym->N.Name.Long)) {
1028
         blob->span->entry = load_addr[sym->SectionNumber - 1] + sym->Value;
1029
         break;
1030
      }
1031
   }
1032
}
1033
#elif defined __APPLE__
1034
static void code_load_macho(code_blob_t *blob, const void *data, size_t size)
1035
{
1036
   const void *rptr = data;
1037

1038
   const struct mach_header_64 *fhdr = rptr;
1039
   rptr += sizeof(struct mach_header_64);
1040

1041
   if (fhdr->magic != MH_MAGIC_64)
1042
      fatal_trace("bad Mach-O magic %x", fhdr->magic);
1043

1044
   const struct segment_command_64 *seg = NULL;
1045
   const struct symtab_command *symtab = NULL;
1046

1047
   void **load_addr LOCAL = NULL;
1048

1049
   for (int i = 0; i < fhdr->ncmds; i++) {
1050
      const struct load_command *load = rptr;
1051
      switch (load->cmd) {
1052
      case LC_SEGMENT_64:
1053
         {
1054
            seg = rptr;
1055
            load_addr = xmalloc_array(seg->nsects, sizeof(void *));
1056

1057
            for (int j = 0; j < seg->nsects; j++) {
1058
               const struct section_64 *sec =
1059
                  (void *)seg + sizeof(struct segment_command_64)
1060
                  + j * sizeof(struct section_64);
1061
               code_blob_align(blob, 1 << sec->align);
1062
               load_addr[j] = blob->wptr;
1063
               DEBUG_ONLY(code_blob_printf(blob, "%s", sec->sectname));
1064
               code_blob_emit(blob, data + sec->offset, sec->size);
1065
            }
1066
         }
1067
         break;
1068
      case LC_SYMTAB:
1069
         symtab = rptr;
1070
         assert(symtab->cmdsize == sizeof(struct symtab_command));
1071
         break;
1072
      case LC_DATA_IN_CODE:
1073
      case LC_LINKER_OPTIMIZATION_HINT:
1074
      case LC_BUILD_VERSION:
1075
      case LC_DYSYMTAB:
1076
         break;
1077
      default:
1078
         warnf("unrecognised load command 0x%0x", load->cmd);
1079
      }
1080

1081
      rptr += load->cmdsize;
1082
   }
1083
   assert(rptr == data + sizeof(struct mach_header_64) + fhdr->sizeofcmds);
1084

1085
   if (blob->overflow)
1086
      return;   // Relocations might point outside of code span
1087

1088
   blob->veneers = blob->wptr;
1089

1090
   assert(seg != NULL);
1091
   assert(symtab != NULL);
1092

1093
   shash_t *external = load_acquire(&blob->span->owner->symbols);
1094

1095
   for (int i = 0; i < seg->nsects; i++) {
1096
      const struct section_64 *sec =
1097
         (void *)seg + sizeof(struct segment_command_64)
1098
         + i * sizeof(struct section_64);
1099

1100
      uint32_t addend = 0;
1101
      for (int j = 0; j < sec->nreloc; j++) {
1102
         const struct relocation_info *rel =
1103
            data + sec->reloff + j * sizeof(struct relocation_info);
1104
         const char *name = NULL;
1105
         void *ptr = NULL;
1106
         if (rel->r_extern) {
1107
            assert(rel->r_symbolnum < symtab->nsyms);
1108
            const struct nlist_64 *nl = data + symtab->symoff
1109
               + rel->r_symbolnum * sizeof(struct nlist_64);
1110
            name = data + symtab->stroff + nl->n_un.n_strx;
1111

1112
            if (nl->n_type & N_EXT) {
1113
               if (icmp(blob->span->name, name + 1))
1114
                  ptr = blob->span->base;
1115
               else if ((ptr = shash_get(external, name + 1)) == NULL)
1116
                  fatal_trace("failed to resolve symbol %s", name + 1);
1117
            }
1118
            else if (nl->n_sect != NO_SECT)
1119
               ptr = blob->span->base + nl->n_value;
1120
         }
1121
         else
1122
            ptr = blob->span->base;
1123

1124
         ptr += addend;
1125
         addend = 0;
1126

1127
         void *patch = load_addr[i] + rel->r_address;
1128
         assert((uint8_t *)patch >= blob->span->base);
1129
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
1130

1131
         switch (rel->r_type) {
1132
#ifdef ARCH_ARM64
1133
         case ARM64_RELOC_UNSIGNED:
1134
            assert(rel->r_length == 3);
1135
            *(void **)patch = ptr;
1136
            break;
1137
         case ARM64_RELOC_SUBTRACTOR:
1138
            break;   // What is this?
1139
         case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
1140
         case ARM64_RELOC_PAGEOFF12:
1141
            arm64_patch_page_offset21(blob, patch, ptr);
1142
            break;
1143
         case ARM64_RELOC_GOT_LOAD_PAGE21:
1144
         case ARM64_RELOC_PAGE21:
1145
            arm64_patch_page_base_rel21(patch, ptr);
1146
            break;
1147
         case ARM64_RELOC_BRANCH26:
1148
            {
1149
               void *veneer = code_emit_trampoline(blob, ptr);
1150
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
1151
               debug_reloc(blob, patch, "ARM64_RELOC_BRANCH26 %s PC%+"PRIiPTR,
1152
                           name, pcrel);
1153
               *(uint32_t *)patch &= ~0x3ffffff;
1154
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
1155
            }
1156
            break;
1157
         case ARM64_RELOC_ADDEND:
1158
            addend = rel->r_symbolnum;
1159
            break;
1160
#elif defined ARCH_X86_64
1161
         case X86_64_RELOC_UNSIGNED:
1162
            *(uint64_t *)patch += (uint64_t)ptr;
1163
            break;
1164
         case X86_64_RELOC_BRANCH:
1165
            *(uint32_t *)patch += (uint32_t)(ptr - patch - 4);
1166
            break;
1167
#endif
1168
         default:
1169
            blob->span->size = blob->wptr - blob->span->base;
1170
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
1171
            fatal_trace("cannot handle relocation type %d for symbol %s",
1172
                        rel->r_type, name);
1173
         }
1174
      }
1175
   }
1176

1177
   for (int i = 0; i < symtab->nsyms; i++) {
1178
      const struct nlist_64 *sym =
1179
         data + symtab->symoff + i * sizeof(struct nlist_64);
1180

1181
      if (sym->n_sect == NO_SECT || (sym->n_type & N_TYPE) != N_SECT)
1182
         continue;
1183

1184
      const char *name = data + symtab->stroff + sym->n_un.n_strx;
1185
      if (name[0] == '_' && icmp(blob->span->name, name + 1)) {
1186
         blob->span->entry = load_addr[sym->n_sect - 1] + sym->n_value;
1187
         break;
1188
      }
1189
   }
1190
}
1191
#elif !defined __MINGW32__
1192
static void code_load_elf(code_blob_t *blob, const void *data, size_t size)
13,550✔
1193
{
1194
   const Elf64_Ehdr *ehdr = data;
13,550✔
1195

1196
   if (ehdr->e_ident[EI_MAG0] != ELFMAG0
13,550✔
1197
       || ehdr->e_ident[EI_MAG1] != ELFMAG1
1198
       || ehdr->e_ident[EI_MAG2] != ELFMAG2
1199
       || ehdr->e_ident[EI_MAG3] != ELFMAG3)
13,550✔
1200
      fatal_trace("bad ELF magic");
1201
   else if (ehdr->e_shentsize != sizeof(Elf64_Shdr))
13,550✔
1202
      fatal_trace("bad section header size %d != %zu", ehdr->e_shentsize,
1203
                  sizeof(Elf64_Shdr));
1204

1205
   const Elf64_Shdr *strtab_hdr =
13,550✔
1206
      data + ehdr->e_shoff + ehdr->e_shstrndx * ehdr->e_shentsize;
13,550✔
1207
   const char *strtab = data + strtab_hdr->sh_offset;
13,550✔
1208

1209
   void **load_addr LOCAL = xcalloc_array(ehdr->e_shnum, sizeof(void *));
27,100✔
1210

1211
   for (int i = 0; i < ehdr->e_shnum; i++) {
121,799✔
1212
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
108,249✔
1213

1214
      switch (shdr->sh_type) {
108,249✔
1215
      case SHT_PROGBITS:
27,906✔
1216
         if (shdr->sh_flags & SHF_ALLOC) {
27,906✔
1217
            code_blob_align(blob, shdr->sh_addralign);
14,356✔
1218
            load_addr[i] = blob->wptr;
14,356✔
1219
            DEBUG_ONLY(code_blob_printf(blob, "%s", strtab + shdr->sh_name));
14,356✔
1220
            code_blob_emit(blob, data + shdr->sh_offset, shdr->sh_size);
14,356✔
1221
         }
1222
         break;
1223

1224
      case SHT_RELA:
1225
         // Handled in second pass
1226
         break;
1227

1228
      case SHT_NULL:
1229
      case SHT_STRTAB:
1230
      case SHT_X86_64_UNWIND:
1231
         break;
1232

1233
      case SHT_SYMTAB:
1234
         for (int i = 0; i < shdr->sh_size / shdr->sh_entsize; i++) {
55,039✔
1235
            const Elf64_Sym *sym =
55,039✔
1236
               data + shdr->sh_offset + i * shdr->sh_entsize;
55,039✔
1237

1238
            if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
55,039✔
1239
               continue;
41,456✔
1240
            else if (!icmp(blob->span->name, strtab + sym->st_name))
13,583✔
1241
               continue;
33✔
1242
            else if (load_addr[sym->st_shndx] == NULL)
13,550✔
1243
               fatal_trace("missing section %d for symbol %s", sym->st_shndx,
1244
                           strtab + sym->st_name);
×
1245
            else {
1246
               blob->span->entry = load_addr[sym->st_shndx] + sym->st_value;
13,550✔
1247
               break;
13,550✔
1248
            }
1249
         }
1250
         break;
1251

1252
      default:
×
1253
         warnf("ignoring ELF section %s with type %x", strtab + shdr->sh_name,
×
1254
               shdr->sh_type);
1255
      }
1256
   }
1257

1258
   if (blob->overflow)
13,550✔
1259
      return;   // Relocations might point outside of code span
×
1260

1261
   blob->veneers = blob->wptr;
13,550✔
1262

1263
   shash_t *external = load_acquire(&blob->span->owner->symbols);
13,550✔
1264

1265
   for (int i = 0; i < ehdr->e_shnum; i++) {
121,799✔
1266
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
108,249✔
1267
      if (shdr->sh_type != SHT_RELA)
108,249✔
1268
         continue;
82,106✔
1269

1270
      const Elf64_Shdr *mod =
26,143✔
1271
         data + ehdr->e_shoff + shdr->sh_info * ehdr->e_shentsize;
26,143✔
1272
      if (mod->sh_type != SHT_PROGBITS || !(mod->sh_flags & SHF_ALLOC))
26,143✔
1273
         continue;
13,550✔
1274
      else if (load_addr[shdr->sh_info] == NULL)
12,593✔
1275
         fatal_trace("section %s not loaded", strtab + mod->sh_name);
1276

1277
      const Elf64_Shdr *symtab =
12,593✔
1278
         data + ehdr->e_shoff + shdr->sh_link * ehdr->e_shentsize;
12,593✔
1279
      if (symtab->sh_type != SHT_SYMTAB)
12,593✔
1280
         fatal_trace("section %s is not a symbol table",
1281
                     strtab + symtab->sh_name);
×
1282

1283
      const Elf64_Rela *endp = data + shdr->sh_offset + shdr->sh_size;
12,593✔
1284
      for (const Elf64_Rela *r = data + shdr->sh_offset; r < endp; r++) {
76,075✔
1285
         const Elf64_Sym *sym = data + symtab->sh_offset
63,482✔
1286
            + ELF64_R_SYM(r->r_info) * symtab->sh_entsize;
63,482✔
1287

1288
         void *ptr = NULL;
63,482✔
1289
         switch (ELF64_ST_TYPE(sym->st_info)) {
63,482✔
1290
         case STT_NOTYPE:
48,849✔
1291
         case STT_FUNC:
1292
            if (sym->st_shndx == 0)
48,849✔
1293
               ptr = shash_get(external, strtab + sym->st_name);
48,531✔
1294
            else
1295
               ptr = load_addr[sym->st_shndx] + sym->st_value;
318✔
1296
            break;
1297
         case STT_SECTION:
14,633✔
1298
            ptr = load_addr[sym->st_shndx];
14,633✔
1299
            break;
14,633✔
1300
         default:
×
1301
            fatal_trace("cannot handle ELF symbol type %d",
1302
                        ELF64_ST_TYPE(sym->st_info));
1303
         }
1304

1305
         if (ptr == NULL)
63,482✔
1306
            fatal_trace("cannot resolve symbol %s type %d",
1307
                        strtab + sym->st_name, ELF64_ST_TYPE(sym->st_info));
×
1308

1309
         void *patch = load_addr[shdr->sh_info] + r->r_offset;
63,482✔
1310
         assert(r->r_offset < mod->sh_size);
63,482✔
1311

1312
         switch (ELF64_R_TYPE(r->r_info)) {
63,482✔
1313
         case R_X86_64_64:
63,482✔
1314
            debug_reloc(blob, patch, "R_X86_64_64 %s", strtab + sym->st_name);
63,482✔
1315
            *(uint64_t *)patch = (uint64_t)ptr + r->r_addend;
63,482✔
1316
            break;
63,482✔
1317
         case R_X86_64_PC32:
×
1318
            {
1319
               const ptrdiff_t pcrel = ptr + r->r_addend - patch;
×
1320
               debug_reloc(blob, patch, "R_X86_64_PC32 %s PC%+"PRIiPTR,
×
1321
                           strtab + sym->st_name, pcrel);
×
1322
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1323
               *(uint32_t *)patch = pcrel;
×
1324
            }
1325
            break;
×
1326
         case R_X86_64_GOTPCREL:
×
1327
            {
1328
               void *got = code_emit_got(blob, ptr);
×
1329
               const ptrdiff_t pcrel = got + r->r_addend - patch;
×
1330
               debug_reloc(blob, patch, "R_X86_64_GOTPCREL %s PC%+"PRIiPTR,
×
1331
                           strtab + sym->st_name, pcrel);
×
1332
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1333
               *(uint32_t *)patch = pcrel;
×
1334
            }
1335
            break;
×
1336
         case R_X86_64_PLT32:
×
1337
            {
1338
               void *veneer = code_emit_trampoline(blob, ptr);
×
1339
               const ptrdiff_t pcrel = veneer + r->r_addend - patch;
×
1340
               debug_reloc(blob, patch, "R_X86_64_PLT32 %s PC%+"PRIiPTR,
×
1341
                           strtab + sym->st_name, pcrel);
×
1342
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1343
               *(uint32_t *)patch = pcrel;
×
1344
            }
1345
            break;
×
1346
         case R_AARCH64_CALL26:
×
1347
            {
1348
               void *veneer = code_emit_trampoline(blob, ptr);
×
1349
               const ptrdiff_t pcrel = (veneer + r->r_addend - patch) >> 2;
×
1350
               *(uint32_t *)patch &= ~0x3ffffff;
×
1351
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
×
1352
            }
1353
            break;
×
1354
         case R_AARCH64_PREL64:
×
1355
            *(uint64_t *)patch = ptr + r->r_addend - patch;
×
1356
            break;
×
1357
         case R_AARCH64_MOVW_UABS_G0_NC:
×
1358
            *(uint32_t *)patch |=
×
1359
               (((uintptr_t)ptr + r->r_addend) & 0xffff) << 5;
×
1360
            break;
×
1361
         case R_AARCH64_MOVW_UABS_G1_NC:
×
1362
            *(uint32_t *)patch |=
×
1363
               ((((uintptr_t)ptr + r->r_addend) >> 16) & 0xffff) << 5;
×
1364
            break;
×
1365
         case R_AARCH64_MOVW_UABS_G2_NC:
×
1366
            *(uint32_t *)patch |=
×
1367
               ((((uintptr_t)ptr + r->r_addend) >> 32) & 0xffff) << 5;
×
1368
            break;
×
1369
         case R_AARCH64_MOVW_UABS_G3:
×
1370
            *(uint32_t *)patch |=
×
1371
               ((((uintptr_t)ptr + r->r_addend) >> 48) & 0xffff) << 5;
×
1372
            break;
×
1373
         default:
×
1374
            blob->span->size = blob->wptr - blob->span->base;
×
1375
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
×
1376
            fatal_trace("cannot handle relocation type %ld for symbol %s",
1377
                        ELF64_R_TYPE(r->r_info), strtab + sym->st_name);
×
1378
         }
1379
      }
1380
   }
1381
}
1382
#endif
1383

1384
void code_load_object(code_blob_t *blob, const void *data, size_t size)
13,550✔
1385
{
1386
#if defined __APPLE__
1387
   code_load_macho(blob, data, size);
1388
#elif defined __MINGW32__
1389
   code_load_pe(blob, data, size);
1390
#else
1391
   code_load_elf(blob, data, size);
13,550✔
1392
#endif
1393
}
13,550✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc