• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nickg / nvc / 15225267182

24 May 2025 08:30AM UTC coverage: 92.268% (-0.03%) from 92.297%
15225267182

push

github

nickg
Switch back to large code model on X86_64

1 of 1 new or added line in 1 file covered. (100.0%)

33 existing lines in 1 file now uncovered.

69383 of 75197 relevant lines covered (92.27%)

517508.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.82
/src/jit/jit-code.c
1
//
2
//  Copyright (C) 2022-2024  Nick Gasson
3
//
4
//  This program is free software: you can redistribute it and/or modify
5
//  it under the terms of the GNU General Public License as published by
6
//  the Free Software Foundation, either version 3 of the License, or
7
//  (at your option) any later version.
8
//
9
//  This program is distributed in the hope that it will be useful,
10
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
//  GNU General Public License for more details.
13
//
14
//  You should have received a copy of the GNU General Public License
15
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

18
#include "util.h"
19
#include "cpustate.h"
20
#include "debug.h"
21
#include "hash.h"
22
#include "ident.h"
23
#include "jit/jit-priv.h"
24
#include "option.h"
25
#include "thread.h"
26

27
#include <assert.h>
28
#include <math.h>
29
#include <stdlib.h>
30
#include <string.h>
31
#include <stdio.h>
32
#include <unistd.h>
33
#include <inttypes.h>
34

35
#if defined __MINGW32__
36
#include <winnt.h>
37
#elif defined __APPLE__
38
#include <mach-o/loader.h>
39
#include <mach-o/reloc.h>
40
#include <mach-o/nlist.h>
41
#include <mach-o/stab.h>
42
#include <mach-o/arm64/reloc.h>
43
#include <mach-o/x86_64/reloc.h>
44
#else
45
#include <elf.h>
46
#endif
47

48
#ifdef HAVE_CAPSTONE
49
#include <capstone.h>
50
#endif
51

52
#ifndef R_AARCH64_MOVW_UABS_G0_NC
53
#define R_AARCH64_MOVW_UABS_G0_NC 264
54
#endif
55

56
#ifndef R_AARCH64_MOVW_UABS_G1_NC
57
#define R_AARCH64_MOVW_UABS_G1_NC 266
58
#endif
59

60
#ifndef R_AARCH64_MOVW_UABS_G2_NC
61
#define R_AARCH64_MOVW_UABS_G2_NC 268
62
#endif
63

64
#ifndef R_AARCH64_MOVW_UABS_G3
65
#define R_AARCH64_MOVW_UABS_G3 269
66
#endif
67

68
#ifndef SHT_X86_64_UNWIND
69
#define SHT_X86_64_UNWIND 0x70000001
70
#endif
71

72
#ifndef IMAGE_REL_ARM64_BRANCH26
73
#define IMAGE_REL_ARM64_BRANCH26 0x03
74
#endif
75

76
#ifndef IMAGE_REL_ARM64_ADDR32NB
77
#define IMAGE_REL_ARM64_ADDR32NB 0x02
78
#endif
79

80
#ifndef IMAGE_REL_ARM64_PAGEBASE_REL21
81
#define IMAGE_REL_ARM64_PAGEBASE_REL21 0x04
82
#endif
83

84
#ifndef IMAGE_REL_ARM64_PAGEOFFSET_12A
85
#define IMAGE_REL_ARM64_PAGEOFFSET_12A 0x06
86
#endif
87

88
#ifndef IMAGE_REL_ARM64_PAGEOFFSET_12L
89
#define IMAGE_REL_ARM64_PAGEOFFSET_12L 0x07
90
#endif
91

92
#define CODE_PAGE_ALIGN   4096
93
#define CODE_PAGE_SIZE    0x400000
94
#define THREAD_CACHE_SIZE 0x10000
95
#define CODE_BLOB_ALIGN   256
96
#define MIN_BLOB_SIZE     0x4000
97

98
#define __IMM64(x) __IMM32(x), __IMM32((x) >> 32)
99
#define __IMM32(x) __IMM16(x), __IMM16((x) >> 16)
100
#define __IMM16(x) (x) & 0xff, ((x) >> 8) & 0xff
101

102
STATIC_ASSERT(MIN_BLOB_SIZE <= THREAD_CACHE_SIZE);
103
STATIC_ASSERT(MIN_BLOB_SIZE % CODE_BLOB_ALIGN == 0);
104
STATIC_ASSERT(CODE_PAGE_SIZE % THREAD_CACHE_SIZE == 0);
105

106
typedef struct _code_page code_page_t;
107

108
typedef struct {
109
   uintptr_t  addr;
110
   char      *text;
111
} code_comment_t;
112

113
typedef struct {
114
   unsigned        count;
115
   unsigned        max;
116
   code_comment_t *comments;
117
} code_debug_t;
118

119
typedef struct _code_span {
120
   code_cache_t *owner;
121
   code_span_t  *next;
122
   ident_t       name;
123
   uint8_t      *base;
124
   void         *entry;
125
   size_t        size;
126
#ifdef DEBUG
127
   code_debug_t  debug;
128
#endif
129
} code_span_t;
130

131
typedef struct _patch_list {
132
   patch_list_t    *next;
133
   uint8_t         *wptr;
134
   jit_label_t      label;
135
   code_patch_fn_t  fn;
136
} patch_list_t;
137

138
typedef struct _code_page {
139
   code_cache_t *owner;
140
   code_page_t  *next;
141
   uint8_t      *mem;
142
} code_page_t;
143

144
typedef struct _code_cache {
145
   nvc_lock_t   lock;
146
   code_page_t *pages;
147
   code_span_t *spans;
148
   code_span_t *freelist[MAX_THREADS];
149
   code_span_t *globalfree;
150
   shash_t     *symbols;
151
   FILE        *perfmap;
152
#ifdef HAVE_CAPSTONE
153
   csh          capstone;
154
#endif
155
#ifdef DEBUG
156
   size_t       used;
157
#endif
158
} code_cache_t;
159

160
static void code_disassemble(code_span_t *span, uintptr_t mark,
161
                             struct cpu_state *cpu);
162

163
static void code_cache_unwinder(uintptr_t addr, debug_frame_t *frame,
×
164
                                void *context)
165
{
166
   code_cache_t *code = context;
×
167

168
   const uint8_t *pc = (uint8_t *)addr;
×
169
   for (code_span_t *span = code->spans; span; span = span->next) {
×
170
      if (pc >= span->base && pc < span->base + span->size) {
×
171
         frame->kind = FRAME_VHDL;
×
172
         frame->disp = pc - span->base;
×
173
         frame->symbol = istr(span->name);
×
174
      }
175
   }
176
}
×
177

178
static void code_fault_handler(int sig, void *addr, struct cpu_state *cpu,
×
179
                               void *context)
180
{
181
   code_page_t *page = context;
×
182

183
   const uint8_t *pc = (uint8_t *)cpu->pc;
×
184
   if (pc < page->mem || pc > page->mem + CODE_PAGE_SIZE)
×
185
      return;
186

187
   uintptr_t mark = cpu->pc;
×
188
#ifndef __MINGW32__
189
   if (sig == SIGTRAP)
×
190
      mark--;   // Point to faulting instruction
×
191
#endif
192

193
   for (code_span_t *span = page->owner->spans; span; span = span->next) {
×
194
      if (pc >= span->base && pc < span->base + span->size && span->name)
×
195
         code_disassemble(span, mark, cpu);
×
196
   }
197
}
198

199
#ifdef DEBUG
200
static bool code_cache_contains(code_cache_t *code, uint8_t *base, size_t size)
9,962✔
201
{
202
   assert_lock_held(&code->lock);
9,962✔
203

204
   for (code_page_t *p = code->pages; p; p = p->next) {
9,962✔
205
      if (base >= p->mem && base + size <= p->mem + CODE_PAGE_SIZE)
9,962✔
206
         return true;
207
   }
208

209
   return false;
210
}
211
#endif
212

213
static code_span_t *code_span_new(code_cache_t *code, ident_t name,
9,962✔
214
                                  uint8_t *base, size_t size)
215
{
216
   SCOPED_LOCK(code->lock);
9,962✔
217

218
   assert(code_cache_contains(code, base, size));
9,962✔
219

220
   code_span_t *span = xcalloc(sizeof(code_span_t));
9,962✔
221
   span->name  = name;
9,962✔
222
   span->next  = code->spans;
9,962✔
223
   span->base  = base;
9,962✔
224
   span->entry = base;
9,962✔
225
   span->size  = size;
9,962✔
226
   span->owner = code;
9,962✔
227

228
   code->spans = span;
9,962✔
229
   return span;
9,962✔
230
}
231

232
static void code_page_new(code_cache_t *code)
4,864✔
233
{
234
   assert_lock_held(&code->lock);
4,864✔
235

236
   code_page_t *page = xcalloc(sizeof(code_page_t));
4,864✔
237
   page->owner = code;
4,864✔
238
   page->next  = code->pages;
4,864✔
239
   page->mem   = map_jit_pages(CODE_PAGE_ALIGN, CODE_PAGE_SIZE);
4,864✔
240

241
   add_fault_handler(code_fault_handler, page);
4,864✔
242
   debug_add_unwinder(page->mem, CODE_PAGE_SIZE, code_cache_unwinder, code);
4,864✔
243

244
   code->pages = page;
4,864✔
245

246
   code_span_t *span = xcalloc(sizeof(code_span_t));
4,864✔
247
   span->next  = code->spans;
4,864✔
248
   span->base  = page->mem;
4,864✔
249
   span->size  = CODE_PAGE_SIZE;
4,864✔
250
   span->owner = code;
4,864✔
251

252
   code->globalfree = code->spans = span;
4,864✔
253
}
4,864✔
254

255
code_cache_t *code_cache_new(void)
4,857✔
256
{
257
   code_cache_t *code = xcalloc(sizeof(code_cache_t));
4,857✔
258

259
   {
260
      SCOPED_LOCK(code->lock);
9,714✔
261
      code_page_new(code);
4,857✔
262
   }
263

264
#ifdef HAVE_CAPSTONE
265
#if defined ARCH_X86_64
266
   if (cs_open(CS_ARCH_X86, CS_MODE_64, &(code->capstone)) != CS_ERR_OK)
267
      fatal_trace("failed to init capstone for x86_64");
268
#elif defined ARCH_ARM64
269
   if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &(code->capstone)) != CS_ERR_OK)
270
      fatal_trace("failed to init capstone for Arm64");
271
#else
272
#error Cannot configure capstone for this architecture
273
#endif
274

275
   if (cs_option(code->capstone, CS_OPT_DETAIL, 1) != CS_ERR_OK)
276
      fatal_trace("failed to set capstone detailed mode");
277
#endif
278

279
   shash_t *s = shash_new(32);
4,857✔
280

281
   extern void __nvc_putpriv(jit_handle_t, void *);
4,857✔
282
   extern void __nvc_sched_waveform(jit_anchor_t *, jit_scalar_t *, tlab_t *);
4,857✔
283
   extern void __nvc_sched_process(jit_anchor_t *, jit_scalar_t *, tlab_t *);
4,857✔
284
   extern void __nvc_test_event(jit_anchor_t *, jit_scalar_t *, tlab_t *);
4,857✔
285
   extern void __nvc_last_event(jit_anchor_t *, jit_scalar_t *, tlab_t *);
4,857✔
286

287
   shash_put(s, "__nvc_sched_waveform", &__nvc_sched_waveform);
4,857✔
288
   shash_put(s, "__nvc_sched_process", &__nvc_sched_process);
4,857✔
289
   shash_put(s, "__nvc_test_event", &__nvc_test_event);
4,857✔
290
   shash_put(s, "__nvc_last_event", &__nvc_last_event);
4,857✔
291
   shash_put(s, "__nvc_mspace_alloc", &__nvc_mspace_alloc);
4,857✔
292
   shash_put(s, "__nvc_putpriv", &__nvc_putpriv);
4,857✔
293
   shash_put(s, "__nvc_do_exit", &__nvc_do_exit);
4,857✔
294
   shash_put(s, "memmove", &memmove);
4,857✔
295
   shash_put(s, "memcpy", &memcpy);
4,857✔
296
   shash_put(s, "memset", &memset);
4,857✔
297
   shash_put(s, "pow", &pow);
4,857✔
298

299
#if defined __APPLE__ && defined ARCH_ARM64
300
   shash_put(s, "bzero", &bzero);
301
#elif defined __APPLE__ && defined ARCH_X86_64
302
   shash_put(s, "__bzero", &bzero);
303
#elif defined __MINGW32__ && defined ARCH_X86_64
304
   extern void ___chkstk_ms(void);
305
   shash_put(s, "___chkstk_ms", &___chkstk_ms);
306
#endif
307

308
   store_release(&code->symbols, s);
4,857✔
309

310
   return code;
4,857✔
311
}
312

313
void code_cache_free(code_cache_t *code)
4,851✔
314
{
315
   for (code_page_t *it = code->pages, *tmp; it; it = tmp) {
9,709✔
316
      debug_remove_unwinder(it->mem);
4,858✔
317
      remove_fault_handler(code_fault_handler, it);
4,858✔
318

319
      nvc_munmap(it->mem, CODE_PAGE_SIZE);
4,858✔
320

321
      tmp = it->next;
4,858✔
322
      free(it);
4,858✔
323
   }
324

325
   for (code_span_t *it = code->spans, *tmp; it; it = tmp) {
19,669✔
326
      tmp = it->next;
14,818✔
327
      DEBUG_ONLY(free(it->debug.comments));
14,818✔
328
      free(it);
14,818✔
329
   }
330

331
#ifdef HAVE_CAPSTONE
332
   cs_close(&(code->capstone));
333
#endif
334

335
#ifdef DEBUG
336
   if (code->used > 0)
4,851✔
337
      debugf("JIT code footprint: %zu bytes", code->used);
1,319✔
338
#endif
339

340
   shash_free(code->symbols);
4,851✔
341
   free(code);
4,851✔
342
}
4,851✔
343

344
#ifdef HAVE_CAPSTONE
345
static int code_print_spaces(int col, int tab)
346
{
347
   for (; col < tab; col++)
348
      fputc(' ', stdout);
349
   return col;
350
}
351
#endif
352

353
#ifdef DEBUG
354
static int code_comment_compare(const void *a, const void *b)
355
{
356
   const code_comment_t *ca = a;
357
   const code_comment_t *cb = b;
358

359
   if (ca->addr < cb->addr)
360
      return -1;
361
   else if (ca->addr > cb->addr)
362
      return 1;
363
   else
364
      return 0;
365
}
366
#endif
367

368
static void code_disassemble(code_span_t *span, uintptr_t mark,
×
369
                             struct cpu_state *cpu)
370
{
371
   SCOPED_LOCK(span->owner->lock);
×
372

373
   printf("--");
×
374

375
   const int namelen = ident_len(span->name);
×
376
   for (int i = 0; i < 72 - namelen; i++)
×
377
      fputc('-', stdout);
×
378

379
   printf(" %s ----\n", istr(span->name));
×
380

381
#ifdef HAVE_CAPSTONE
382
   cs_insn *insn = cs_malloc(span->owner->capstone);
383

384
#ifdef DEBUG
385
   qsort(span->debug.comments, span->debug.count, sizeof(code_comment_t),
386
         code_comment_compare);
387
   code_comment_t *comment = span->debug.comments;
388
#endif
389

390
   const uint8_t *const eptr = span->base + span->size;
391
   for (const uint8_t *ptr = span->base; ptr < eptr; ) {
392
      uint64_t address = (uint64_t)ptr;
393

394
#ifdef DEBUG
395
      for (; comment < span->debug.comments + span->debug.count
396
              && comment->addr <= address; comment++)
397
         printf("%30s;; %s\n", "", comment->text);
398
#endif
399

400
      int zeros = 0;
401
      for (const uint8_t *zp = ptr; zp < eptr && *zp == 0; zp++, zeros++);
402

403
      if (zeros > 8 || zeros == eptr - ptr) {
404
         printf("%30s;; skipping %d zero bytes\n", "", zeros);
405
         ptr += zeros;
406
         continue;
407
      }
408

409
      size_t size = eptr - ptr;
410
      int col = 0;
411
      if (cs_disasm_iter(span->owner->capstone, &ptr, &size, &address, insn)) {
412
         char hex1[33], *p = hex1;
413
         for (size_t k = 0; k < insn->size; k++)
414
            p += checked_sprintf(p, hex1 + sizeof(hex1) - p, "%02x",
415
                                 insn->bytes[k]);
416

417
         col = printf("%-12" PRIx64 " %-16.16s %s %s", insn->address,
418
                          hex1, insn->mnemonic, insn->op_str);
419

420
#ifdef ARCH_X86_64
421
         if (strcmp(insn->mnemonic, "movabs") == 0) {
422
            const cs_x86_op *src = &(insn->detail->x86.operands[1]);
423
            if (src->type == X86_OP_IMM) {
424
               const char *sym = debug_symbol_name((void *)src->imm);
425
               if (sym != NULL) {
426
                  col = code_print_spaces(col, 60);
427
                  col += printf(" ; %s", sym);
428
               }
429
            }
430
         }
431
#endif
432

433
         if (strlen(hex1) > 16)
434
            col = printf("\n%15s -%-16s", "", hex1 + 16) - 1;
435
      }
436
      else {
437
#ifdef ARCH_ARM64
438
         col = printf("%-12" PRIx64 " %-16.08x %s 0x%08x", (uint64_t)ptr,
439
                      *(uint32_t *)ptr, ".word", *(uint32_t *)ptr);
440
         ptr += 4;
441
#else
442
         col = printf("%-12" PRIx64 " %-16.02x %s 0x%02x", (uint64_t)ptr,
443
                      *ptr, ".byte", *ptr);
444
         ptr++;
445
#endif
446
      }
447

448
      if (mark != 0 && (ptr >= eptr || address > mark)) {
449
         col = code_print_spaces(col, 66);
450
         printf("<=============\n");
451
         if (cpu != NULL) {
452
#ifdef ARCH_X86_64
453
            const char *names[] = {
454
               "RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI",
455
               "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"
456
            };
457
            for (int i = 0; i < ARRAY_LEN(names); i++)
458
               printf("\t%s\t%"PRIxPTR"\n", names[i], cpu->regs[i]);
459
#else
460
            for (int i = 0; i < 32; i++)
461
               printf("\tR%d\t%"PRIxPTR"\n", i, cpu->regs[i]);
462
#endif
463
         }
464
         mark = 0;
465
      }
466
      else
467
         printf("\n");
468
   }
469

470
   cs_free(insn, 1);
471
#else
472
   jit_hexdump(span->base, span->size, 16, (void *)mark, "");
×
473
#endif
474

475
   for (int i = 0; i < 80; i++)
×
476
      fputc('-', stdout);
×
477
   printf("\n");
×
478
   fflush(stdout);
×
479
}
×
480

481
static void code_write_perf_map(code_span_t *span)
×
482
{
483
   SCOPED_LOCK(span->owner->lock);
×
484

485
   if (span->owner->perfmap == NULL) {
×
486
      char *fname LOCAL = xasprintf("/tmp/perf-%d.map", getpid());
×
487
      if ((span->owner->perfmap = fopen(fname, "w")) == NULL) {
×
488
         warnf("cannot create %s: %s", fname, last_os_error());
×
489
         opt_set_int(OPT_PERF_MAP, 0);
×
490
         return;
×
491
      }
492
      else
493
         debugf("writing perf map to %s", fname);
×
494
   }
495

496
   fprintf(span->owner->perfmap, "%p 0x%zx %s\n", span->base, span->size,
×
497
           istr(span->name));
498
   fflush(span->owner->perfmap);
×
499
}
500

501
code_blob_t *code_blob_new(code_cache_t *code, ident_t name, size_t hint)
8,634✔
502
{
503
   code_span_t **freeptr = &(code->freelist[thread_id()]);
8,634✔
504

505
   code_span_t *free = relaxed_load(freeptr);
8,634✔
506
   if (free == NULL) {
8,634✔
507
      free = code_span_new(code, NULL, code->pages->mem, 0);
1,328✔
508
      relaxed_store(freeptr, free);
1,328✔
509
   }
510

511
   const size_t reqsz = hint ?: MIN_BLOB_SIZE;
8,634✔
512

513
   if (free->size < reqsz) {
8,634✔
514
      SCOPED_LOCK(code->lock);
1,397✔
515

516
#ifdef DEBUG
517
      if (free->size > 0)
1,397✔
518
         debugf("thread %d needs new code cache from global free list "
21✔
519
                "(requested %zu bytes, wasted %zu bytes)",
520
                thread_id(), reqsz, free->size);
521
#endif
522

523
      const size_t chunksz = MAX(reqsz, THREAD_CACHE_SIZE);
1,397✔
524
      const size_t alignedsz = ALIGN_UP(chunksz, CODE_BLOB_ALIGN);
1,397✔
525

526
      if (alignedsz > code->globalfree->size) {
1,397✔
527
         DEBUG_ONLY(debugf("requesting new %d byte code page", CODE_PAGE_SIZE));
7✔
528
         code_page_new(code);
7✔
529
         assert(code->globalfree->size == CODE_PAGE_SIZE);
7✔
530
      }
531

532
      const size_t take = MIN(code->globalfree->size, alignedsz);
1,397✔
533

534
      free->size = take;
1,397✔
535
      free->base = code->globalfree->base;
1,397✔
536

537
      code->globalfree->base += take;
1,397✔
538
      code->globalfree->size -= take;
1,397✔
539
   }
540

541
   assert(reqsz <= free->size);
8,634✔
542
   assert(((uintptr_t)free->base & (CODE_BLOB_ALIGN - 1)) == 0);
8,634✔
543

544
   code_span_t *span = code_span_new(code, name, free->base, free->size);
8,634✔
545

546
   free->base += span->size;
8,634✔
547
   free->size -= span->size;
8,634✔
548

549
   code_blob_t *blob = xcalloc(sizeof(code_blob_t));
8,634✔
550
   blob->span = span;
8,634✔
551
   blob->wptr = span->base;
8,634✔
552

553
   thread_wx_mode(WX_WRITE);
8,634✔
554

555
   return blob;
8,634✔
556
}
557

558
void code_blob_finalise(code_blob_t *blob, jit_entry_fn_t *entry)
8,634✔
559
{
560
   code_span_t *span = blob->span;
8,634✔
561
   span->size = blob->wptr - span->base;
8,634✔
562

563
   code_span_t *freespan = relaxed_load(&(span->owner->freelist[thread_id()]));
8,634✔
564
   assert(freespan->size == 0);
8,634✔
565

566
   ihash_free(blob->labels);
8,634✔
567
   blob->labels = NULL;
8,634✔
568

569
   if (unlikely(blob->patches != NULL))
8,634✔
570
      fatal_trace("not all labels in %s were patched", istr(span->name));
571
   else if (unlikely(blob->overflow)) {
8,634✔
572
      // Return all the memory
573
      freespan->size = freespan->base - span->base;
1✔
574
      freespan->base = span->base;
1✔
575
      free(blob);
1✔
576
      return;
1✔
577
   }
578
   else if (span->size == 0)
8,633✔
579
      fatal_trace("code span %s is empty", istr(span->name));
580

581
   uint8_t *aligned = ALIGN_UP(blob->wptr, CODE_BLOB_ALIGN);
8,633✔
582
   freespan->size = freespan->base - aligned;
8,633✔
583
   freespan->base = aligned;
8,633✔
584

585
   if (opt_get_verbose(OPT_ASM_VERBOSE, istr(span->name))) {
8,633✔
586
      color_printf("\n$bold$$blue$");
×
587
      code_disassemble(span, 0, NULL);
×
588
      color_printf("$$\n");
×
589
   }
590

591
   __builtin___clear_cache((char *)span->base, (char *)blob->wptr);
8,633✔
592

593
   thread_wx_mode(WX_EXECUTE);
8,633✔
594

595
   store_release(entry, (jit_entry_fn_t)span->entry);
8,633✔
596

597
   DEBUG_ONLY(relaxed_add(&span->owner->used, span->size));
8,633✔
598
   free(blob);
8,633✔
599

600
   if (opt_get_int(OPT_PERF_MAP))
8,633✔
601
      code_write_perf_map(span);
×
602
}
603

604
__attribute__((cold, noinline))
605
static void code_blob_overflow(code_blob_t *blob)
1✔
606
{
607
   warnf("JIT code buffer for %s too small", istr(blob->span->name));
1✔
608
   for (patch_list_t *it = blob->patches, *tmp; it; it = tmp) {
1✔
609
      tmp = it->next;
×
610
      free(it);
×
611
   }
612
   blob->patches = NULL;
1✔
613
   blob->overflow = true;
1✔
614
}
1✔
615

616
void code_blob_emit(code_blob_t *blob, const uint8_t *bytes, size_t len)
18,014✔
617
{
618
   if (unlikely(blob->overflow))
18,014✔
619
      return;
620
   else if (unlikely(blob->wptr + len > blob->span->base + blob->span->size)) {
18,014✔
621
      code_blob_overflow(blob);
1✔
622
      return;
1✔
623
   }
624

625
   memcpy(blob->wptr, bytes, len);
18,013✔
626
   blob->wptr += len;
18,013✔
627
}
628

629
void code_blob_align(code_blob_t *blob, unsigned align)
8,977✔
630
{
631
#ifdef ARCH_X86_64
632
   const uint8_t pad[] = { 0x90 };
8,977✔
633
#else
634
   const uint8_t pad[] = { 0x00 };
635
#endif
636

637
   assert(is_power_of_2(align));
8,977✔
638
   assert(align % ARRAY_LEN(pad) == 0);
639

640
   while (((uintptr_t)blob->wptr & (align - 1)) && !blob->overflow)
12,798✔
641
      code_blob_emit(blob, pad, ARRAY_LEN(pad));
3,821✔
642
}
8,977✔
643

644
void code_blob_mark(code_blob_t *blob, jit_label_t label)
71✔
645
{
646
   if (unlikely(blob->overflow))
71✔
647
      return;
648
   else if (blob->labels == NULL)
71✔
649
      blob->labels = ihash_new(256);
66✔
650

651
   ihash_put(blob->labels, label, blob->wptr);
71✔
652

653
   for (patch_list_t **p = &(blob->patches); *p; ) {
88✔
654
      if ((*p)->label == label) {
17✔
655
         patch_list_t *next = (*p)->next;
7✔
656
         (*(*p)->fn)(blob, label, (*p)->wptr, blob->wptr);
7✔
657
         free(*p);
7✔
658
         *p = next;
7✔
659
      }
660
      else
661
         p = &((*p)->next);
10✔
662
   }
663
}
664

665
void code_blob_patch(code_blob_t *blob, jit_label_t label, code_patch_fn_t fn)
8✔
666
{
667
   void *ptr = NULL;
8✔
668
   if (unlikely(blob->overflow))
8✔
669
      return;
670
   else if (blob->labels != NULL && (ptr = ihash_get(blob->labels, label)))
8✔
671
      (*fn)(blob, label, blob->wptr, ptr);
1✔
672
   else {
673
      patch_list_t *new = xmalloc(sizeof(patch_list_t));
7✔
674
      new->next  = blob->patches;
7✔
675
      new->fn    = fn;
7✔
676
      new->label = label;
7✔
677
      new->wptr  = blob->wptr;
7✔
678

679
      blob->patches = new;
7✔
680
   }
681
}
682

683
#ifdef DEBUG
684
static void code_blob_print_value(text_buf_t *tb, jit_value_t value)
392✔
685
{
686
   switch (value.kind) {
392✔
687
   case JIT_VALUE_REG:
162✔
688
      tb_printf(tb, "R%d", value.reg);
162✔
689
      break;
162✔
690
   case JIT_VALUE_INT64:
203✔
691
      if (value.int64 < 4096)
203✔
692
         tb_printf(tb, "#%"PRIi64, value.int64);
199✔
693
      else
694
         tb_printf(tb, "#0x%"PRIx64, value.int64);
4✔
695
      break;
696
   case JIT_VALUE_DOUBLE:
1✔
697
      tb_printf(tb, "%%%g", value.dval);
1✔
698
      break;
1✔
699
   case JIT_ADDR_CPOOL:
×
700
      tb_printf(tb, "[CP+%"PRIi64"]", value.int64);
×
701
      break;
×
702
   case JIT_ADDR_REG:
19✔
703
      tb_printf(tb, "[R%d", value.reg);
19✔
704
      if (value.disp != 0)
19✔
705
         tb_printf(tb, "+%d", value.disp);
1✔
706
      tb_cat(tb, "]");
19✔
707
      break;
19✔
708
   case JIT_ADDR_ABS:
×
709
      tb_printf(tb, "[#%016"PRIx64"]", value.int64);
×
710
      break;
×
711
   case JIT_ADDR_COVER:
×
712
      tb_printf(tb, "@%"PRIi64, value.int64);
×
713
      break;
×
714
   case JIT_VALUE_LABEL:
5✔
715
      tb_printf(tb, "%d", value.label);
5✔
716
      break;
5✔
717
   case JIT_VALUE_HANDLE:
2✔
718
      tb_printf(tb, "<%d>", value.handle);
2✔
719
      break;
2✔
720
   case JIT_VALUE_EXIT:
×
721
      tb_printf(tb, "%s", jit_exit_name(value.exit));
×
722
      break;
×
723
   case JIT_VALUE_LOC:
×
724
      tb_printf(tb, "<%s:%d>", loc_file_str(&value.loc), value.loc.first_line);
×
725
      break;
×
726
   case JIT_VALUE_LOCUS:
×
727
      tb_printf(tb, "%p", value.locus);
×
728
      break;
×
729
   case JIT_VALUE_VPOS:
×
730
      tb_printf(tb, "%u:%u", value.vpos.block, value.vpos.op);
×
731
      break;
×
732
   default:
×
733
      tb_cat(tb, "???");
×
734
   }
735
}
392✔
736

737
static void code_blob_add_comment(code_blob_t *blob, uintptr_t addr, char *text)
49,447✔
738
{
739
   code_debug_t *dbg = &(blob->span->debug);
49,447✔
740

741
   if (dbg->count == dbg->max) {
49,447✔
742
      dbg->max = MAX(128, dbg->max * 2);
8,448✔
743
      dbg->comments = xrealloc_array(dbg->comments, dbg->max,
8,448✔
744
                                     sizeof(code_comment_t));
745
   }
746

747
   dbg->comments[dbg->count].addr = addr;
49,447✔
748
   dbg->comments[dbg->count].text = text;
49,447✔
749
   dbg->count++;
49,447✔
750
}
49,447✔
751

752
void code_blob_print_ir(code_blob_t *blob, jit_ir_t *ir)
348✔
753
{
754
   LOCAL_TEXT_BUF tb = tb_new();
696✔
755
   tb_printf(tb, "%s%s", jit_op_name(ir->op), jit_cc_name(ir->cc));
348✔
756

757
   if (ir->size != JIT_SZ_UNSPEC)
348✔
758
      tb_printf(tb, ".%d", 1 << (3 + ir->size));
36✔
759

760
   tb_printf(tb, "%*.s", (int)MAX(0, 10 - tb_len(tb)), "");
348✔
761

762
   if (ir->result != JIT_REG_INVALID)
348✔
763
      tb_printf(tb, "R%d", ir->result);
203✔
764

765
   if (ir->arg1.kind != JIT_VALUE_INVALID) {
348✔
766
      if (ir->result != JIT_REG_INVALID)
263✔
767
         tb_cat(tb, ", ");
187✔
768
      code_blob_print_value(tb, ir->arg1);
263✔
769
   }
770

771
   if (ir->arg2.kind != JIT_VALUE_INVALID) {
348✔
772
      tb_cat(tb, ", ");
129✔
773
      code_blob_print_value(tb, ir->arg2);
129✔
774
   }
775

776
   code_blob_add_comment(blob, (uintptr_t)blob->wptr, tb_claim(tb));
348✔
777
}
348✔
778

779
void code_blob_printf(code_blob_t *blob, const char *fmt, ...)
8,977✔
780
{
781
   va_list ap;
8,977✔
782
   va_start(ap, fmt);
8,977✔
783

784
   char *text = xvasprintf(fmt, ap);
8,977✔
785
   code_blob_add_comment(blob, (uintptr_t)blob->wptr, text);
8,977✔
786

787
   va_end(ap);
8,977✔
788
}
8,977✔
789

790
__attribute__((format(printf, 3, 4)))
791
static void debug_reloc(code_blob_t *blob, void *patch, const char *fmt, ...)
40,122✔
792
{
793
   va_list ap;
40,122✔
794
   va_start(ap, fmt);
40,122✔
795

796
   char *text = xvasprintf(fmt, ap);
40,122✔
797
   code_blob_add_comment(blob, (uintptr_t)patch, text);
40,122✔
798

799
   va_end(ap);
40,122✔
800
}
40,122✔
801
#else
802
#define debug_reloc(...)
803
#endif   // DEBUG
804

805
#ifdef ARCH_ARM64
806
static void arm64_patch_page_offset21(code_blob_t *blob, uint32_t *patch,
807
                                      void *ptr)
808
{
809
   switch ((*patch >> 23) & 0x7f) {
810
   case 0b1111010:   // LDR (immediate, SIMD&FP)
811
   case 0b1110010:   // LDR (immediate)
812
      assert(*patch & (1 << 30));  // Quadword
813
      assert(((uintptr_t)ptr & 7) == 0);
814
      *patch |= (((uintptr_t)ptr & 0xfff) >> 3) << 10;
815
      break;
816
   case 0b0100010:   // ADD (immediate)
817
      *patch |= ((uintptr_t)ptr & 0xfff) << 10;
818
      break;
819
   default:
820
      blob->span->size = blob->wptr - blob->span->base;
821
      code_disassemble(blob->span, (uintptr_t)patch, NULL);
822
      fatal_trace("cannot patch instruction");
823
   }
824
}
825

826
static void arm64_patch_page_base_rel21(uint32_t *patch, void *ptr)
827
{
828
   const intptr_t dst_page = (intptr_t)ptr & ~UINT64_C(0xfff);
829
   const intptr_t src_page = (intptr_t)patch & ~UINT64_C(0xfff);
830
   const intptr_t upper21 = (dst_page - src_page) >> 12;
831
   assert((upper21 & ~UINT64_C(0x1fffff)) == 0);
832
   *(uint32_t *)patch |= (upper21 & 3) << 29;
833
   *(uint32_t *)patch |= ((upper21 >> 2) & 0x7ffff) << 5;
834
}
835
#endif
836

UNCOV
837
static void *code_emit_trampoline(code_blob_t *blob, void *dest)
×
838
{
839
#if defined ARCH_X86_64
UNCOV
840
   const uint8_t veneer[] = {
×
UNCOV
841
      0x48, 0xb8, __IMM64((uintptr_t)dest),  // MOVABS RAX, dest
×
842
      0xff, 0xe0                             // CALL RAX
843
   };
844
#elif defined ARCH_ARM64
845
   const uint8_t veneer[] = {
846
      0x50, 0x00, 0x00, 0x58,   // LDR X16, [PC+8]
847
      0x00, 0x02, 0x1f, 0xd6,   // BR X16
848
      __IMM64((uintptr_t)dest)
849
   };
850
#else
851
   should_not_reach_here();
852
#endif
853

UNCOV
854
   void *prev = memmem(blob->veneers, blob->wptr - blob->veneers,
×
855
                       veneer, ARRAY_LEN(veneer));
UNCOV
856
   if (prev != NULL)
×
857
      return prev;
858
   else {
UNCOV
859
      DEBUG_ONLY(code_blob_printf(blob, "Trampoline for %p", dest));
×
860

UNCOV
861
      void *addr = blob->wptr;
×
UNCOV
862
      code_blob_emit(blob, veneer, ARRAY_LEN(veneer));
×
UNCOV
863
      return addr;
×
864
   }
865
}
866

867
#if defined ARCH_X86_64
UNCOV
868
static void *code_emit_got(code_blob_t *blob, void *dest)
×
869
{
UNCOV
870
   const uint8_t data[] = { __IMM64((uintptr_t)dest) };
×
871

UNCOV
872
   void *prev = memmem(blob->veneers, blob->veneers - blob->wptr,
×
873
                       data, ARRAY_LEN(data));
UNCOV
874
   if (prev != NULL)
×
875
      return prev;
876
   else {
UNCOV
877
      DEBUG_ONLY(code_blob_printf(blob, "GOT entry for %p", dest));
×
878

UNCOV
879
      void *addr = blob->wptr;
×
UNCOV
880
      code_blob_emit(blob, data, ARRAY_LEN(data));
×
UNCOV
881
      return addr;
×
882
   }
883
}
884
#endif
885

886
#if defined __MINGW32__
887
static void code_load_pe(code_blob_t *blob, const void *data, size_t size)
888
{
889
   const IMAGE_FILE_HEADER *imghdr = data;
890

891
   switch (imghdr->Machine) {
892
   case IMAGE_FILE_MACHINE_AMD64:
893
   case IMAGE_FILE_MACHINE_ARM64:
894
      break;
895
   default:
896
      fatal_trace("unknown target machine %x", imghdr->Machine);
897
   }
898

899
   const IMAGE_SYMBOL *symtab = data + imghdr->PointerToSymbolTable;
900
   const char *strtab = data + imghdr->PointerToSymbolTable
901
      + imghdr->NumberOfSymbols * sizeof(IMAGE_SYMBOL);
902

903
   const IMAGE_SECTION_HEADER *sections =
904
      data + IMAGE_SIZEOF_FILE_HEADER + imghdr->SizeOfOptionalHeader;
905

906
   void **load_addr LOCAL =
907
      xmalloc_array(imghdr->NumberOfSections, sizeof(void *));
908

909
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
910
      if ((sections[i].Characteristics & IMAGE_SCN_CNT_CODE)
911
          || (sections[i].Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)) {
912
         const int align = sections[i].Characteristics & IMAGE_SCN_ALIGN_MASK;
913
         code_blob_align(blob, 1 << ((align >> 20) - 1));
914
         load_addr[i] = blob->wptr;
915
         code_blob_emit(blob, data + sections[i].PointerToRawData,
916
                        sections[i].SizeOfRawData);
917
      }
918
      else if ((sections[i].Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
919
               && sections[i].Misc.VirtualSize > 0)
920
         fatal_trace("non-empty BSS not supported");
921
   }
922

923
   if (blob->overflow)
924
      return;   // Relocations might point outside of code span
925

926
   blob->veneers = blob->wptr;
927

928
   shash_t *external = load_acquire(&blob->span->owner->symbols);
929

930
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
931
      const IMAGE_RELOCATION *relocs = data + sections[i].PointerToRelocations;
932
      for (int j = 0; j < sections[i].NumberOfRelocations; j++) {
933
         const char *name = NULL;
934
         char tmp[9];
935

936
         assert(relocs[j].SymbolTableIndex < imghdr->NumberOfSymbols);
937
         const IMAGE_SYMBOL *sym = symtab + relocs[j].SymbolTableIndex;
938

939
         if (sym->N.Name.Short) {
940
            memcpy(tmp, sym->N.ShortName, 8);
941
            tmp[8] = '\0';
942
            name = tmp;
943
         }
944
         else
945
            name = strtab + sym->N.Name.Long;
946

947
         void *ptr = NULL;
948
         if (sym->SectionNumber > 0) {
949
            assert(sym->SectionNumber - 1 < imghdr->NumberOfSections);
950
            ptr = load_addr[sym->SectionNumber - 1] + sym->Value;
951
         }
952
         else
953
            ptr = shash_get(external, name);
954

955
         if (ptr == NULL && icmp(blob->span->name, name))
956
            ptr = blob->span->base;
957

958
         if (ptr == NULL)
959
            fatal_trace("failed to resolve symbol %s", name);
960

961
         void *patch = load_addr[i] + relocs[j].VirtualAddress;
962
         assert((uint8_t *)patch >= blob->span->base);
963
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
964

965
         switch (relocs[j].Type) {
966
#if defined ARCH_X86_64
967
         case IMAGE_REL_AMD64_ADDR64:
968
            *(uint64_t *)patch += (uint64_t)ptr;
969
            break;
970
         case IMAGE_REL_AMD64_ADDR32NB:
971
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
972
            break;
973
#elif defined ARCH_ARM64
974
         case IMAGE_REL_ARM64_BRANCH26:
975
            {
976
               void *veneer = code_emit_trampoline(blob, ptr);
977
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
978
               *(uint32_t *)patch &= ~0x3ffffff;
979
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
980
            }
981
            break;
982
         case IMAGE_REL_ARM64_ADDR32NB:
983
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
984
            break;
985
         case IMAGE_REL_ARM64_PAGEBASE_REL21:
986
            arm64_patch_page_base_rel21(patch, ptr);
987
            break;
988
         case IMAGE_REL_ARM64_PAGEOFFSET_12A:
989
         case IMAGE_REL_ARM64_PAGEOFFSET_12L:
990
            arm64_patch_page_offset21(blob, patch, ptr);
991
            break;
992
#endif
993
         default:
994
            blob->span->size = blob->wptr - blob->span->base;
995
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
996
            fatal_trace("cannot handle relocation type %d for symbol %s",
997
                        relocs[j].Type, name);
998
         }
999
      }
1000

1001
      if (strncmp((const char *)sections[i].Name, ".pdata",
1002
                  IMAGE_SIZEOF_SHORT_NAME) == 0) {
1003
         assert(sections[i].SizeOfRawData % sizeof(RUNTIME_FUNCTION) == 0);
1004
         const int count = sections[i].SizeOfRawData / sizeof(RUNTIME_FUNCTION);
1005
         const DWORD64 base = (DWORD64)blob->span->base;
1006

1007
         // TODO: we should also call RtlDeleteFunctionTable at some point
1008
         if (!RtlAddFunctionTable(load_addr[i], count, base))
1009
            fatal_trace("RtlAddFunctionTable failed: %s", last_os_error());
1010
      }
1011
   }
1012

1013
   for (int i = 0; i < imghdr->NumberOfSymbols; i++) {
1014
      const IMAGE_SYMBOL *sym = &(symtab[i]);
1015

1016
      if (sym->SectionNumber == 0 || sym->N.Name.Short)
1017
         continue;
1018
      else if ((sym->Type >> 4) != IMAGE_SYM_DTYPE_FUNCTION)
1019
         continue;
1020
      else if (icmp(blob->span->name, strtab + sym->N.Name.Long)) {
1021
         blob->span->entry = load_addr[sym->SectionNumber - 1] + sym->Value;
1022
         break;
1023
      }
1024
   }
1025
}
1026
#elif defined __APPLE__
1027
static void code_load_macho(code_blob_t *blob, const void *data, size_t size)
1028
{
1029
   const void *rptr = data;
1030

1031
   const struct mach_header_64 *fhdr = rptr;
1032
   rptr += sizeof(struct mach_header_64);
1033

1034
   if (fhdr->magic != MH_MAGIC_64)
1035
      fatal_trace("bad Mach-O magic %x", fhdr->magic);
1036

1037
   const struct segment_command_64 *seg = NULL;
1038
   const struct symtab_command *symtab = NULL;
1039

1040
   void **load_addr LOCAL = NULL;
1041

1042
   for (int i = 0; i < fhdr->ncmds; i++) {
1043
      const struct load_command *load = rptr;
1044
      switch (load->cmd) {
1045
      case LC_SEGMENT_64:
1046
         {
1047
            seg = rptr;
1048
            load_addr = xmalloc_array(seg->nsects, sizeof(void *));
1049

1050
            for (int j = 0; j < seg->nsects; j++) {
1051
               const struct section_64 *sec =
1052
                  (void *)seg + sizeof(struct segment_command_64)
1053
                  + j * sizeof(struct section_64);
1054
               code_blob_align(blob, 1 << sec->align);
1055
               load_addr[j] = blob->wptr;
1056
               DEBUG_ONLY(code_blob_printf(blob, "%s", sec->sectname));
1057
               code_blob_emit(blob, data + sec->offset, sec->size);
1058
            }
1059
         }
1060
         break;
1061
      case LC_SYMTAB:
1062
         symtab = rptr;
1063
         assert(symtab->cmdsize == sizeof(struct symtab_command));
1064
         break;
1065
      case LC_DATA_IN_CODE:
1066
      case LC_LINKER_OPTIMIZATION_HINT:
1067
      case LC_BUILD_VERSION:
1068
      case LC_DYSYMTAB:
1069
         break;
1070
      default:
1071
         warnf("unrecognised load command 0x%0x", load->cmd);
1072
      }
1073

1074
      rptr += load->cmdsize;
1075
   }
1076
   assert(rptr == data + sizeof(struct mach_header_64) + fhdr->sizeofcmds);
1077

1078
   if (blob->overflow)
1079
      return;   // Relocations might point outside of code span
1080

1081
   blob->veneers = blob->wptr;
1082

1083
   assert(seg != NULL);
1084
   assert(symtab != NULL);
1085

1086
   shash_t *external = load_acquire(&blob->span->owner->symbols);
1087

1088
   for (int i = 0; i < seg->nsects; i++) {
1089
      const struct section_64 *sec =
1090
         (void *)seg + sizeof(struct segment_command_64)
1091
         + i * sizeof(struct section_64);
1092

1093
      uint32_t addend = 0;
1094
      for (int j = 0; j < sec->nreloc; j++) {
1095
         const struct relocation_info *rel =
1096
            data + sec->reloff + j * sizeof(struct relocation_info);
1097
         const char *name = NULL;
1098
         void *ptr = NULL;
1099
         if (rel->r_extern) {
1100
            assert(rel->r_symbolnum < symtab->nsyms);
1101
            const struct nlist_64 *nl = data + symtab->symoff
1102
               + rel->r_symbolnum * sizeof(struct nlist_64);
1103
            name = data + symtab->stroff + nl->n_un.n_strx;
1104

1105
            if (nl->n_type & N_EXT) {
1106
               if (icmp(blob->span->name, name + 1))
1107
                  ptr = blob->span->base;
1108
               else if ((ptr = shash_get(external, name + 1)) == NULL)
1109
                  fatal_trace("failed to resolve symbol %s", name + 1);
1110
            }
1111
            else if (nl->n_sect != NO_SECT)
1112
               ptr = blob->span->base + nl->n_value;
1113
         }
1114
         else
1115
            ptr = blob->span->base;
1116

1117
         ptr += addend;
1118
         addend = 0;
1119

1120
         void *patch = load_addr[i] + rel->r_address;
1121
         assert((uint8_t *)patch >= blob->span->base);
1122
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
1123

1124
         switch (rel->r_type) {
1125
#ifdef ARCH_ARM64
1126
         case ARM64_RELOC_UNSIGNED:
1127
            assert(rel->r_length == 3);
1128
            *(void **)patch = ptr;
1129
            break;
1130
         case ARM64_RELOC_SUBTRACTOR:
1131
            break;   // What is this?
1132
         case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
1133
         case ARM64_RELOC_PAGEOFF12:
1134
            arm64_patch_page_offset21(blob, patch, ptr);
1135
            break;
1136
         case ARM64_RELOC_GOT_LOAD_PAGE21:
1137
         case ARM64_RELOC_PAGE21:
1138
            arm64_patch_page_base_rel21(patch, ptr);
1139
            break;
1140
         case ARM64_RELOC_BRANCH26:
1141
            {
1142
               void *veneer = code_emit_trampoline(blob, ptr);
1143
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
1144
               debug_reloc(blob, patch, "ARM64_RELOC_BRANCH26 %s PC%+"PRIiPTR,
1145
                           name, pcrel);
1146
               *(uint32_t *)patch &= ~0x3ffffff;
1147
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
1148
            }
1149
            break;
1150
         case ARM64_RELOC_ADDEND:
1151
            addend = rel->r_symbolnum;
1152
            break;
1153
#elif defined ARCH_X86_64
1154
         case X86_64_RELOC_UNSIGNED:
1155
            *(uint64_t *)patch += (uint64_t)ptr;
1156
            break;
1157
         case X86_64_RELOC_BRANCH:
1158
            *(uint32_t *)patch += (uint32_t)(ptr - patch - 4);
1159
            break;
1160
#endif
1161
         default:
1162
            blob->span->size = blob->wptr - blob->span->base;
1163
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
1164
            fatal_trace("cannot handle relocation type %d for symbol %s",
1165
                        rel->r_type, name);
1166
         }
1167
      }
1168
   }
1169

1170
   for (int i = 0; i < symtab->nsyms; i++) {
1171
      const struct nlist_64 *sym =
1172
         data + symtab->symoff + i * sizeof(struct nlist_64);
1173

1174
      if (sym->n_sect == NO_SECT || (sym->n_type & N_TYPE) != N_SECT)
1175
         continue;
1176

1177
      const char *name = data + symtab->stroff + sym->n_un.n_strx;
1178
      if (name[0] == '_' && icmp(blob->span->name, name + 1)) {
1179
         blob->span->entry = load_addr[sym->n_sect - 1] + sym->n_value;
1180
         break;
1181
      }
1182
   }
1183
}
1184
#elif !defined __MINGW32__
1185
static void code_load_elf(code_blob_t *blob, const void *data, size_t size)
8,379✔
1186
{
1187
   const Elf64_Ehdr *ehdr = data;
8,379✔
1188

1189
   if (ehdr->e_ident[EI_MAG0] != ELFMAG0
8,379✔
1190
       || ehdr->e_ident[EI_MAG1] != ELFMAG1
1191
       || ehdr->e_ident[EI_MAG2] != ELFMAG2
1192
       || ehdr->e_ident[EI_MAG3] != ELFMAG3)
8,379✔
1193
      fatal_trace("bad ELF magic");
1194
   else if (ehdr->e_shentsize != sizeof(Elf64_Shdr))
8,379✔
1195
      fatal_trace("bad section header size %d != %zu", ehdr->e_shentsize,
1196
                  sizeof(Elf64_Shdr));
1197

1198
   const Elf64_Shdr *strtab_hdr =
8,379✔
1199
      data + ehdr->e_shoff + ehdr->e_shstrndx * ehdr->e_shentsize;
8,379✔
1200
   const char *strtab = data + strtab_hdr->sh_offset;
8,379✔
1201

1202
   void **load_addr LOCAL = xcalloc_array(ehdr->e_shnum, sizeof(void *));
16,758✔
1203

1204
   for (int i = 0; i < ehdr->e_shnum; i++) {
75,964✔
1205
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
67,585✔
1206

1207
      switch (shdr->sh_type) {
67,585✔
1208
      case SHT_PROGBITS:
17,356✔
1209
         if (shdr->sh_flags & SHF_ALLOC) {
17,356✔
1210
            code_blob_align(blob, shdr->sh_addralign);
8,977✔
1211
            load_addr[i] = blob->wptr;
8,977✔
1212
            DEBUG_ONLY(code_blob_printf(blob, "%s", strtab + shdr->sh_name));
8,977✔
1213
            code_blob_emit(blob, data + shdr->sh_offset, shdr->sh_size);
8,977✔
1214
         }
1215
         break;
1216

1217
      case SHT_RELA:
1218
         // Handled in second pass
1219
         break;
1220

1221
      case SHT_NULL:
1222
      case SHT_STRTAB:
1223
      case SHT_X86_64_UNWIND:
1224
         break;
1225

1226
      case SHT_SYMTAB:
1227
         for (int i = 0; i < shdr->sh_size / shdr->sh_entsize; i++) {
34,114✔
1228
            const Elf64_Sym *sym =
34,114✔
1229
               data + shdr->sh_offset + i * shdr->sh_entsize;
34,114✔
1230

1231
            if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
34,114✔
1232
               continue;
25,735✔
1233
            else if (!icmp(blob->span->name, strtab + sym->st_name))
8,379✔
1234
               continue;
×
1235
            else if (load_addr[sym->st_shndx] == NULL)
8,379✔
1236
               fatal_trace("missing section %d for symbol %s", sym->st_shndx,
1237
                           strtab + sym->st_name);
×
1238
            else {
1239
               blob->span->entry = load_addr[sym->st_shndx] + sym->st_value;
8,379✔
1240
               break;
8,379✔
1241
            }
1242
         }
1243
         break;
1244

1245
      default:
×
1246
         warnf("ignoring ELF section %s with type %x", strtab + shdr->sh_name,
×
1247
               shdr->sh_type);
1248
      }
1249
   }
1250

1251
   if (blob->overflow)
8,379✔
1252
      return;   // Relocations might point outside of code span
×
1253

1254
   blob->veneers = blob->wptr;
8,379✔
1255

1256
   shash_t *external = load_acquire(&blob->span->owner->symbols);
8,379✔
1257

1258
   for (int i = 0; i < ehdr->e_shnum; i++) {
75,964✔
1259
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
67,585✔
1260
      if (shdr->sh_type != SHT_RELA)
67,585✔
1261
         continue;
50,872✔
1262

1263
      const Elf64_Shdr *mod =
16,713✔
1264
         data + ehdr->e_shoff + shdr->sh_info * ehdr->e_shentsize;
16,713✔
1265
      if (mod->sh_type != SHT_PROGBITS || !(mod->sh_flags & SHF_ALLOC))
16,713✔
1266
         continue;
8,379✔
1267
      else if (load_addr[shdr->sh_info] == NULL)
8,334✔
1268
         fatal_trace("section %s not loaded", strtab + mod->sh_name);
1269

1270
      const Elf64_Shdr *symtab =
8,334✔
1271
         data + ehdr->e_shoff + shdr->sh_link * ehdr->e_shentsize;
8,334✔
1272
      if (symtab->sh_type != SHT_SYMTAB)
8,334✔
1273
         fatal_trace("section %s is not a symbol table",
1274
                     strtab + symtab->sh_name);
×
1275

1276
      const Elf64_Rela *endp = data + shdr->sh_offset + shdr->sh_size;
8,334✔
1277
      for (const Elf64_Rela *r = data + shdr->sh_offset; r < endp; r++) {
48,456✔
1278
         const Elf64_Sym *sym = data + symtab->sh_offset
40,122✔
1279
            + ELF64_R_SYM(r->r_info) * symtab->sh_entsize;
40,122✔
1280

1281
         void *ptr = NULL;
40,122✔
1282
         switch (ELF64_ST_TYPE(sym->st_info)) {
40,122✔
1283
         case STT_NOTYPE:
29,435✔
1284
         case STT_FUNC:
1285
            if (sym->st_shndx == 0)
29,435✔
1286
               ptr = shash_get(external, strtab + sym->st_name);
29,391✔
1287
            else
1288
               ptr = load_addr[sym->st_shndx] + sym->st_value;
44✔
1289
            break;
1290
         case STT_SECTION:
10,687✔
1291
            ptr = load_addr[sym->st_shndx];
10,687✔
1292
            break;
10,687✔
1293
         default:
×
1294
            fatal_trace("cannot handle ELF symbol type %d",
1295
                        ELF64_ST_TYPE(sym->st_info));
1296
         }
1297

1298
         if (ptr == NULL)
40,122✔
1299
            fatal_trace("cannot resolve symbol %s type %d",
1300
                        strtab + sym->st_name, ELF64_ST_TYPE(sym->st_info));
×
1301

1302
         void *patch = load_addr[shdr->sh_info] + r->r_offset;
40,122✔
1303
         assert(r->r_offset < mod->sh_size);
40,122✔
1304

1305
         switch (ELF64_R_TYPE(r->r_info)) {
40,122✔
1306
         case R_X86_64_64:
40,122✔
1307
            debug_reloc(blob, patch, "R_X86_64_64 %s", strtab + sym->st_name);
40,122✔
1308
            *(uint64_t *)patch = (uint64_t)ptr + r->r_addend;
40,122✔
1309
            break;
40,122✔
1310
         case R_X86_64_PC32:
×
1311
            {
1312
               const ptrdiff_t pcrel = ptr + r->r_addend - patch;
×
1313
               debug_reloc(blob, patch, "R_X86_64_PC32 %s PC%+"PRIiPTR,
×
1314
                           strtab + sym->st_name, pcrel);
×
1315
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
1316
               *(uint32_t *)patch = pcrel;
×
1317
            }
1318
            break;
×
UNCOV
1319
         case R_X86_64_GOTPCREL:
×
1320
            {
UNCOV
1321
               void *got = code_emit_got(blob, ptr);
×
UNCOV
1322
               const ptrdiff_t pcrel = got + r->r_addend - patch;
×
UNCOV
1323
               debug_reloc(blob, patch, "R_X86_64_GOTPCREL %s PC%+"PRIiPTR,
×
UNCOV
1324
                           strtab + sym->st_name, pcrel);
×
UNCOV
1325
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
UNCOV
1326
               *(uint32_t *)patch = pcrel;
×
1327
            }
UNCOV
1328
            break;
×
UNCOV
1329
         case R_X86_64_PLT32:
×
1330
            {
UNCOV
1331
               void *veneer = code_emit_trampoline(blob, ptr);
×
UNCOV
1332
               const ptrdiff_t pcrel = veneer + r->r_addend - patch;
×
UNCOV
1333
               debug_reloc(blob, patch, "R_X86_64_PLT32 %s PC%+"PRIiPTR,
×
UNCOV
1334
                           strtab + sym->st_name, pcrel);
×
UNCOV
1335
               assert(pcrel >= INT32_MIN && pcrel <= INT32_MAX);
×
UNCOV
1336
               *(uint32_t *)patch = pcrel;
×
1337
            }
UNCOV
1338
            break;
×
1339
         case R_AARCH64_CALL26:
×
1340
            {
1341
               void *veneer = code_emit_trampoline(blob, ptr);
×
1342
               const ptrdiff_t pcrel = (veneer + r->r_addend - patch) >> 2;
×
1343
               *(uint32_t *)patch &= ~0x3ffffff;
×
1344
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
×
1345
            }
1346
            break;
×
1347
         case R_AARCH64_PREL64:
×
1348
            *(uint64_t *)patch = ptr + r->r_addend - patch;
×
1349
            break;
×
1350
         case R_AARCH64_MOVW_UABS_G0_NC:
×
1351
            *(uint32_t *)patch |=
×
1352
               (((uintptr_t)ptr + r->r_addend) & 0xffff) << 5;
×
1353
            break;
×
1354
         case R_AARCH64_MOVW_UABS_G1_NC:
×
1355
            *(uint32_t *)patch |=
×
1356
               ((((uintptr_t)ptr + r->r_addend) >> 16) & 0xffff) << 5;
×
1357
            break;
×
1358
         case R_AARCH64_MOVW_UABS_G2_NC:
×
1359
            *(uint32_t *)patch |=
×
1360
               ((((uintptr_t)ptr + r->r_addend) >> 32) & 0xffff) << 5;
×
1361
            break;
×
1362
         case R_AARCH64_MOVW_UABS_G3:
×
1363
            *(uint32_t *)patch |=
×
1364
               ((((uintptr_t)ptr + r->r_addend) >> 48) & 0xffff) << 5;
×
1365
            break;
×
1366
         default:
×
1367
            blob->span->size = blob->wptr - blob->span->base;
×
1368
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
×
1369
            fatal_trace("cannot handle relocation type %ld for symbol %s",
1370
                        ELF64_R_TYPE(r->r_info), strtab + sym->st_name);
×
1371
         }
1372
      }
1373
   }
1374
}
1375
#endif
1376

1377
void code_load_object(code_blob_t *blob, const void *data, size_t size)
8,379✔
1378
{
1379
#if defined __APPLE__
1380
   code_load_macho(blob, data, size);
1381
#elif defined __MINGW32__
1382
   code_load_pe(blob, data, size);
1383
#else
1384
   code_load_elf(blob, data, size);
8,379✔
1385
#endif
1386
}
8,379✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc