• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nickg / nvc / 6474253934

10 Oct 2023 07:38PM UTC coverage: 91.136% (-0.02%) from 91.159%
6474253934

push

github

nickg
Verilog continuous assignment and binary "&"

41 of 41 new or added lines in 2 files covered. (100.0%)

48876 of 53630 relevant lines covered (91.14%)

590714.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.97
/src/jit/jit-code.c
1
//
2
//  Copyright (C) 2022-2023  Nick Gasson
3
//
4
//  This program is free software: you can redistribute it and/or modify
5
//  it under the terms of the GNU General Public License as published by
6
//  the Free Software Foundation, either version 3 of the License, or
7
//  (at your option) any later version.
8
//
9
//  This program is distributed in the hope that it will be useful,
10
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
//  GNU General Public License for more details.
13
//
14
//  You should have received a copy of the GNU General Public License
15
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

18
#include "util.h"
19
#include "cpustate.h"
20
#include "debug.h"
21
#include "hash.h"
22
#include "ident.h"
23
#include "jit/jit-priv.h"
24
#include "option.h"
25
#include "thread.h"
26

27
#include <assert.h>
28
#include <stdlib.h>
29
#include <string.h>
30
#include <stdio.h>
31
#include <unistd.h>
32
#include <inttypes.h>
33

34
#if defined __MINGW32__
35
#include <winnt.h>
36
#elif defined __APPLE__
37
#include <mach-o/loader.h>
38
#include <mach-o/reloc.h>
39
#include <mach-o/nlist.h>
40
#include <mach-o/stab.h>
41
#include <mach-o/arm64/reloc.h>
42
#include <mach-o/x86_64/reloc.h>
43
#else
44
#include <elf.h>
45
#endif
46

47
#ifdef HAVE_CAPSTONE
48
#include <capstone.h>
49
#endif
50

51
#ifndef R_AARCH64_MOVW_UABS_G0_NC
52
#define R_AARCH64_MOVW_UABS_G0_NC 264
53
#endif
54

55
#ifndef R_AARCH64_MOVW_UABS_G1_NC
56
#define R_AARCH64_MOVW_UABS_G1_NC 266
57
#endif
58

59
#ifndef R_AARCH64_MOVW_UABS_G2_NC
60
#define R_AARCH64_MOVW_UABS_G2_NC 268
61
#endif
62

63
#ifndef R_AARCH64_MOVW_UABS_G3
64
#define R_AARCH64_MOVW_UABS_G3 269
65
#endif
66

67
#ifndef SHT_X86_64_UNWIND
68
#define SHT_X86_64_UNWIND 0x70000001
69
#endif
70

71
#define CODE_PAGE_ALIGN   4096
72
#define CODE_PAGE_SIZE    0x400000
73
#define THREAD_CACHE_SIZE 0x10000
74
#define CODE_BLOB_ALIGN   256
75
#define MIN_BLOB_SIZE     0x4000
76

77
#define __IMM64(x) __IMM32(x), __IMM32((x) >> 32)
78
#define __IMM32(x) __IMM16(x), __IMM16((x) >> 16)
79
#define __IMM16(x) (x) & 0xff, ((x) >> 8) & 0xff
80

81
STATIC_ASSERT(MIN_BLOB_SIZE <= THREAD_CACHE_SIZE);
82
STATIC_ASSERT(MIN_BLOB_SIZE % CODE_BLOB_ALIGN == 0);
83
STATIC_ASSERT(CODE_PAGE_SIZE % THREAD_CACHE_SIZE == 0);
84

85
typedef struct _code_page code_page_t;
86

87
typedef struct {
88
   uintptr_t  addr;
89
   char      *text;
90
} code_comment_t;
91

92
typedef struct {
93
   unsigned        count;
94
   unsigned        max;
95
   code_comment_t *comments;
96
} code_debug_t;
97

98
typedef struct _code_span {
99
   code_cache_t *owner;
100
   code_span_t  *next;
101
   ident_t       name;
102
   uint8_t      *base;
103
   size_t        size;
104
#ifdef DEBUG
105
   code_debug_t  debug;
106
#endif
107
} code_span_t;
108

109
typedef struct _patch_list {
110
   patch_list_t    *next;
111
   uint8_t         *wptr;
112
   jit_label_t      label;
113
   code_patch_fn_t  fn;
114
} patch_list_t;
115

116
typedef struct _code_page {
117
   code_cache_t *owner;
118
   code_page_t  *next;
119
   uint8_t      *mem;
120
} code_page_t;
121

122
typedef struct _code_cache {
123
   nvc_lock_t   lock;
124
   code_page_t *pages;
125
   code_span_t *spans;
126
   code_span_t *freelist[MAX_THREADS];
127
   code_span_t *globalfree;
128
   FILE        *perfmap;
129
#ifdef HAVE_CAPSTONE
130
   csh          capstone;
131
#endif
132
#ifdef DEBUG
133
   size_t       used;
134
#endif
135
} code_cache_t;
136

137
static void code_disassemble(code_span_t *span, uintptr_t mark,
138
                             struct cpu_state *cpu);
139

140
static void code_cache_unwinder(uintptr_t addr, debug_frame_t *frame,
×
141
                                void *context)
142
{
143
   code_cache_t *code = context;
×
144

145
   const uint8_t *pc = (uint8_t *)addr;
×
146
   for (code_span_t *span = code->spans; span; span = span->next) {
×
147
      if (pc >= span->base && pc < span->base + span->size) {
×
148
         frame->kind = FRAME_VHDL;
×
149
         frame->disp = pc - span->base;
×
150
         frame->symbol = istr(span->name);
×
151
      }
152
   }
153
}
×
154

155
static void code_fault_handler(int sig, void *addr, struct cpu_state *cpu,
×
156
                               void *context)
157
{
158
   code_page_t *page = context;
×
159

160
   const uint8_t *pc = (uint8_t *)cpu->pc;
×
161
   if (pc < page->mem || pc > page->mem + CODE_PAGE_SIZE)
×
162
      return;
163

164
   uintptr_t mark = cpu->pc;
×
165
#ifndef __MINGW32__
166
   if (sig == SIGTRAP)
×
167
      mark--;   // Point to faulting instruction
168
#endif
169

170
   for (code_span_t *span = page->owner->spans; span; span = span->next) {
×
171
      if (pc >= span->base && pc < span->base + span->size && span->name)
×
172
         code_disassemble(span, mark, cpu);
173
   }
174
}
175

176
#ifdef DEBUG
177
static bool code_cache_contains(code_cache_t *code, uint8_t *base, size_t size)
7,009✔
178
{
179
   assert_lock_held(&code->lock);
7,009✔
180

181
   for (code_page_t *p = code->pages; p; p = p->next) {
7,011✔
182
      if (base >= p->mem && base + size <= p->mem + CODE_PAGE_SIZE)
7,011✔
183
         return true;
184
   }
185

186
   return false;
187
}
188
#endif
189

190
static code_span_t *code_span_new(code_cache_t *code, ident_t name,
7,009✔
191
                                  uint8_t *base, size_t size)
192
{
193
   SCOPED_LOCK(code->lock);
7,009✔
194

195
   assert(code_cache_contains(code, base, size));
7,009✔
196

197
   code_span_t *span = xcalloc(sizeof(code_span_t));
7,009✔
198
   span->name  = name;
7,009✔
199
   span->next  = code->spans;
7,009✔
200
   span->base  = base;
7,009✔
201
   span->size  = size;
7,009✔
202
   span->owner = code;
7,009✔
203

204
   code->spans = span;
7,009✔
205
   return span;
7,009✔
206
}
207

208
static void code_page_new(code_cache_t *code)
2,828✔
209
{
210
   assert_lock_held(&code->lock);
2,828✔
211

212
   code_page_t *page = xcalloc(sizeof(code_page_t));
2,828✔
213
   page->owner = code;
2,828✔
214
   page->next  = code->pages;
2,828✔
215
   page->mem   = map_jit_pages(CODE_PAGE_ALIGN, CODE_PAGE_SIZE);
2,828✔
216

217
   add_fault_handler(code_fault_handler, page);
2,828✔
218
   debug_add_unwinder(page->mem, CODE_PAGE_SIZE, code_cache_unwinder, code);
2,828✔
219

220
   code->pages = page;
2,828✔
221

222
   code_span_t *span = xcalloc(sizeof(code_span_t));
2,828✔
223
   span->next  = code->spans;
2,828✔
224
   span->base  = page->mem;
2,828✔
225
   span->size  = CODE_PAGE_SIZE;
2,828✔
226
   span->owner = code;
2,828✔
227

228
   code->globalfree = code->spans = span;
2,828✔
229
}
2,828✔
230

231
code_cache_t *code_cache_new(void)
2,827✔
232
{
233
   code_cache_t *code = xcalloc(sizeof(code_cache_t));
2,827✔
234

235
   {
236
      SCOPED_LOCK(code->lock);
5,654✔
237
      code_page_new(code);
2,827✔
238
   }
239

240
#ifdef HAVE_CAPSTONE
241
#if defined ARCH_X86_64
242
   if (cs_open(CS_ARCH_X86, CS_MODE_64, &(code->capstone)) != CS_ERR_OK)
243
      fatal_trace("failed to init capstone for x86_64");
244
#elif defined ARCH_ARM64
245
   if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &(code->capstone)) != CS_ERR_OK)
246
      fatal_trace("failed to init capstone for Arm64");
247
#else
248
#error Cannot configure capstone for this architecture
249
#endif
250

251
   if (cs_option(code->capstone, CS_OPT_DETAIL, 1) != CS_ERR_OK)
252
      fatal_trace("failed to set capstone detailed mode");
253
#endif
254

255
   return code;
2,827✔
256
}
257

258
void code_cache_free(code_cache_t *code)
2,825✔
259
{
260
   for (code_page_t *it = code->pages, *tmp; it; it = tmp) {
5,651✔
261
      debug_remove_unwinder(it->mem);
2,826✔
262
      remove_fault_handler(code_fault_handler, it);
2,826✔
263

264
      nvc_munmap(it->mem, CODE_PAGE_SIZE);
2,826✔
265

266
      tmp = it->next;
2,826✔
267
      free(it);
2,826✔
268
   }
269

270
   for (code_span_t *it = code->spans, *tmp; it; it = tmp) {
12,660✔
271
      tmp = it->next;
9,835✔
272
      free(it);
9,835✔
273
   }
274

275
#ifdef HAVE_CAPSTONE
276
   cs_close(&(code->capstone));
277
#endif
278

279
#ifdef DEBUG
280
   debugf("JIT code footprint: %zu bytes", code->used);
2,825✔
281
#endif
282

283
   free(code);
2,825✔
284
}
2,825✔
285

286
#ifdef HAVE_CAPSTONE
287
static int code_print_spaces(int col, int tab)
288
{
289
   for (; col < tab; col++)
290
      fputc(' ', stdout);
291
   return col;
292
}
293
#endif
294

295
static void code_disassemble(code_span_t *span, uintptr_t mark,
296
                             struct cpu_state *cpu)
297
{
298
#ifdef HAVE_CAPSTONE
299
   SCOPED_LOCK(span->owner->lock);
300

301
   printf("--");
302

303
   const int namelen = ident_len(span->name);
304
   for (int i = 0; i < 72 - namelen; i++)
305
      fputc('-', stdout);
306

307
   printf(" %s ----\n", istr(span->name));
308

309
   cs_insn *insn = cs_malloc(span->owner->capstone);
310

311
#ifdef DEBUG
312
   code_comment_t *comment = span->debug.comments;
313
#endif
314

315
   const uint8_t *const eptr = span->base + span->size;
316
   for (const uint8_t *ptr = span->base; ptr < eptr; ) {
317
      uint64_t address = (uint64_t)ptr;
318

319
#ifdef DEBUG
320
      for (; comment < span->debug.comments + span->debug.count
321
              && comment->addr <= address; comment++)
322
         printf("%30s;; %s\n", "", comment->text);
323
#endif
324

325
      size_t size = eptr - ptr;
326
      int col = 0;
327
      if (cs_disasm_iter(span->owner->capstone, &ptr, &size, &address, insn)) {
328
         char hex1[33], *p = hex1;
329
         for (size_t k = 0; k < insn->size; k++)
330
            p += checked_sprintf(p, hex1 + sizeof(hex1) - p, "%02x",
331
                                 insn->bytes[k]);
332

333
         col = printf("%-12" PRIx64 " %-16.16s %s %s", insn->address,
334
                          hex1, insn->mnemonic, insn->op_str);
335

336
#ifdef ARCH_X86_64
337
         if (strcmp(insn->mnemonic, "movabs") == 0) {
338
            const cs_x86_op *src = &(insn->detail->x86.operands[1]);
339
            if (src->type == X86_OP_IMM) {
340
               const char *sym = debug_symbol_name((void *)src->imm);
341
               if (sym != NULL) {
342
                  col = code_print_spaces(col, 60);
343
                  col += printf(" ; %s", sym);
344
               }
345
            }
346
         }
347
#endif
348

349
         if (strlen(hex1) > 16)
350
            col = printf("\n%15s -%-16s", "", hex1 + 16) - 1;
351
      }
352
      else {
353
#ifdef ARCH_ARM64
354
         col = printf("%-12" PRIx64 " %-16.08x %s 0x%08x", (uint64_t)ptr,
355
                      *(uint32_t *)ptr, ".word", *(uint32_t *)ptr);
356
         ptr += 4;
357
#else
358
         col = printf("%-12" PRIx64 " %-16.02x %s 0x%02x", (uint64_t)ptr,
359
                      *ptr, ".byte", *ptr);
360
         ptr++;
361
#endif
362
      }
363

364
      if (mark != 0 && (ptr >= eptr || address > mark)) {
365
         col = code_print_spaces(col, 66);
366
         printf("<=============\n");
367
         if (cpu != NULL) {
368
#ifdef ARCH_X86_64
369
            const char *names[] = {
370
               "RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI",
371
               "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"
372
            };
373
            for (int i = 0; i < ARRAY_LEN(names); i++)
374
               printf("\t%s\t%"PRIxPTR"\n", names[i], cpu->regs[i]);
375
#else
376
            for (int i = 0; i < 32; i++)
377
               printf("\tR%d\t%"PRIxPTR"\n", i, cpu->regs[i]);
378
#endif
379
         }
380
         mark = 0;
381
      }
382
      else
383
         printf("\n");
384
   }
385

386
   cs_free(insn, 1);
387

388
   for (int i = 0; i < 80; i++)
389
      fputc('-', stdout);
390
   printf("\n");
391
   fflush(stdout);
392
#endif
393
}
394

395
static void code_write_perf_map(code_span_t *span)
×
396
{
397
   SCOPED_LOCK(span->owner->lock);
×
398

399
   if (span->owner->perfmap == NULL) {
×
400
      char *fname LOCAL = xasprintf("/tmp/perf-%d.map", getpid());
×
401
      if ((span->owner->perfmap = fopen(fname, "w")) == NULL) {
×
402
         warnf("cannot create %s: %s", fname, last_os_error());
×
403
         opt_set_int(OPT_PERF_MAP, 0);
×
404
         return;
×
405
      }
406
      else
407
         debugf("writing perf map to %s", fname);
×
408
   }
409

410
   fprintf(span->owner->perfmap, "%p 0x%zx %s\n", span->base, span->size,
×
411
           istr(span->name));
412
   fflush(span->owner->perfmap);
×
413
}
414

415
code_blob_t *code_blob_new(code_cache_t *code, ident_t name, size_t hint)
6,097✔
416
{
417
   code_span_t **freeptr = &(code->freelist[thread_id()]);
6,097✔
418

419
   code_span_t *free = relaxed_load(freeptr);
6,097✔
420
   if (free == NULL) {
6,097✔
421
      free = code_span_new(code, NULL, code->pages->mem, 0);
912✔
422
      relaxed_store(freeptr, free);
912✔
423
   }
424

425
   const size_t reqsz = hint ?: MIN_BLOB_SIZE;
6,097✔
426

427
   if (free->size < reqsz) {
6,097✔
428
      SCOPED_LOCK(code->lock);
2,044✔
429

430
      if (code->globalfree->size == 0)
1,022✔
431
         return NULL;
×
432

433
#ifdef DEBUG
434
      if (free->size > 0)
1,022✔
435
         debugf("thread %d needs new code cache from global free list "
11✔
436
                "(requested %zu bytes, wasted %zu bytes)",
437
                thread_id(), reqsz, free->size);
438
#endif
439

440
      const size_t chunksz = MAX(reqsz, THREAD_CACHE_SIZE);
1,022✔
441
      const size_t alignedsz = ALIGN_UP(chunksz, CODE_BLOB_ALIGN);
1,022✔
442
      const size_t take = MIN(code->globalfree->size, alignedsz);
1,022✔
443

444
      free->size = take;
1,022✔
445
      free->base = code->globalfree->base;
1,022✔
446

447
      code->globalfree->base += take;
1,022✔
448
      code->globalfree->size -= take;
1,022✔
449

450
      if (code->globalfree->size == 0) {
1,022✔
451
         DEBUG_ONLY(debugf("requesting new %d byte code page", CODE_PAGE_SIZE));
1✔
452
         code_page_new(code);
1✔
453
         assert(code->globalfree->size == CODE_PAGE_SIZE);
1✔
454
      }
455
   }
456

457
   assert(reqsz <= free->size);
6,097✔
458
   assert(((uintptr_t)free->base & (CODE_BLOB_ALIGN - 1)) == 0);
6,097✔
459

460
   code_span_t *span = code_span_new(code, name, free->base, free->size);
6,097✔
461

462
   free->base += span->size;
6,097✔
463
   free->size -= span->size;
6,097✔
464

465
   code_blob_t *blob = xcalloc(sizeof(code_blob_t));
6,097✔
466
   blob->span = span;
6,097✔
467
   blob->wptr = span->base;
6,097✔
468

469
   thread_wx_mode(WX_WRITE);
6,097✔
470

471
   return blob;
6,097✔
472
}
473

474
void code_blob_finalise(code_blob_t *blob, jit_entry_fn_t *entry)
6,097✔
475
{
476
   code_span_t *span = blob->span;
6,097✔
477
   span->size = blob->wptr - span->base;
6,097✔
478

479
   code_span_t *freespan = relaxed_load(&(span->owner->freelist[thread_id()]));
6,097✔
480
   assert(freespan->size == 0);
6,097✔
481

482
   ihash_free(blob->labels);
6,097✔
483
   blob->labels = NULL;
6,097✔
484

485
   if (unlikely(blob->patches != NULL))
6,097✔
486
      fatal_trace("not all labels in %s were patched", istr(span->name));
×
487
   else if (unlikely(blob->overflow)) {
6,097✔
488
      // Return all the memory
489
      freespan->size = freespan->base - span->base;
1✔
490
      freespan->base = span->base;
1✔
491
      free(blob);
1✔
492
      return;
1✔
493
   }
494
   else if (span->size == 0)
6,096✔
495
      fatal_trace("code span %s is empty", istr(span->name));
×
496

497
   uint8_t *aligned = ALIGN_UP(blob->wptr, CODE_BLOB_ALIGN);
6,096✔
498
   freespan->size = freespan->base - aligned;
6,096✔
499
   freespan->base = aligned;
6,096✔
500

501
   if (opt_get_verbose(OPT_ASM_VERBOSE, istr(span->name))) {
6,096✔
502
      color_printf("\n$bold$$blue$");
×
503
      code_disassemble(span, 0, NULL);
×
504
      color_printf("$$\n");
×
505
   }
506

507
   __builtin___clear_cache((char *)span->base, (char *)blob->wptr);
6,096✔
508

509
   thread_wx_mode(WX_EXECUTE);
6,096✔
510

511
   store_release(entry, (jit_entry_fn_t)span->base);
6,096✔
512

513
   DEBUG_ONLY(relaxed_add(&span->owner->used, span->size));
6,096✔
514
   free(blob);
6,096✔
515

516
   if (opt_get_int(OPT_PERF_MAP))
6,096✔
517
      code_write_perf_map(span);
×
518
}
519

520
void code_blob_emit(code_blob_t *blob, const uint8_t *bytes, size_t len)
15,958✔
521
{
522
   if (unlikely(blob->overflow))
15,958✔
523
      return;
524
   else if (unlikely(blob->wptr + len > blob->span->base + blob->span->size)) {
15,958✔
525
      warnf("JIT code buffer for %s too small", istr(blob->span->name));
1✔
526
      for (patch_list_t *it = blob->patches, *tmp; it; it = tmp) {
1✔
527
         tmp = it->next;
×
528
         free(it);
×
529
      }
530
      blob->patches = NULL;
1✔
531
      blob->overflow = true;
1✔
532
      return;
1✔
533
   }
534

535
   for (size_t i = 0; i < len; i++)
10,313,100✔
536
      *(blob->wptr++) = bytes[i];
10,297,200✔
537
}
538

539
void code_blob_align(code_blob_t *blob, unsigned align)
6,131✔
540
{
541
#ifdef ARCH_X86_64
542
   const uint8_t pad[] = { 0x90 };
6,131✔
543
#else
544
   const uint8_t pad[] = { 0x00 };
545
#endif
546

547
   assert(is_power_of_2(align));
6,131✔
548
   assert(align % ARRAY_LEN(pad) == 0);
549

550
   while (((uintptr_t)blob->wptr & (align - 1)) && !blob->overflow)
6,454✔
551
      code_blob_emit(blob, pad, ARRAY_LEN(pad));
323✔
552
}
6,131✔
553

554
void code_blob_mark(code_blob_t *blob, jit_label_t label)
60✔
555
{
556
   if (unlikely(blob->overflow))
60✔
557
      return;
558
   else if (blob->labels == NULL)
60✔
559
      blob->labels = ihash_new(256);
55✔
560

561
   ihash_put(blob->labels, label, blob->wptr);
60✔
562

563
   for (patch_list_t **p = &(blob->patches); *p; ) {
77✔
564
      if ((*p)->label == label) {
17✔
565
         patch_list_t *next = (*p)->next;
7✔
566
         (*(*p)->fn)(blob, label, (*p)->wptr, blob->wptr);
7✔
567
         free(*p);
7✔
568
         *p = next;
7✔
569
      }
570
      else
571
         p = &((*p)->next);
10✔
572
   }
573
}
574

575
void code_blob_patch(code_blob_t *blob, jit_label_t label, code_patch_fn_t fn)
8✔
576
{
577
   void *ptr = NULL;
8✔
578
   if (unlikely(blob->overflow))
8✔
579
      return;
580
   else if (blob->labels != NULL && (ptr = ihash_get(blob->labels, label)))
8✔
581
      (*fn)(blob, label, blob->wptr, ptr);
1✔
582
   else {
583
      patch_list_t *new = xmalloc(sizeof(patch_list_t));
7✔
584
      new->next  = blob->patches;
7✔
585
      new->fn    = fn;
7✔
586
      new->label = label;
7✔
587
      new->wptr  = blob->wptr;
7✔
588

589
      blob->patches = new;
7✔
590
   }
591
}
592

593
#ifdef DEBUG
594
static void code_blob_print_value(text_buf_t *tb, jit_value_t value)
332✔
595
{
596
   switch (value.kind) {
332✔
597
   case JIT_VALUE_REG:
135✔
598
      tb_printf(tb, "R%d", value.reg);
135✔
599
      break;
135✔
600
   case JIT_VALUE_INT64:
170✔
601
      if (value.int64 < 4096)
170✔
602
         tb_printf(tb, "#%"PRIi64, value.int64);
167✔
603
      else
604
         tb_printf(tb, "#0x%"PRIx64, value.int64);
3✔
605
      break;
606
   case JIT_VALUE_DOUBLE:
1✔
607
      tb_printf(tb, "%%%g", value.dval);
1✔
608
      break;
1✔
609
   case JIT_ADDR_CPOOL:
×
610
      tb_printf(tb, "[CP+%"PRIi64"]", value.int64);
×
611
      break;
×
612
   case JIT_ADDR_REG:
19✔
613
      tb_printf(tb, "[R%d", value.reg);
19✔
614
      if (value.disp != 0)
19✔
615
         tb_printf(tb, "+%d", value.disp);
1✔
616
      tb_cat(tb, "]");
19✔
617
      break;
19✔
618
   case JIT_ADDR_ABS:
×
619
      tb_printf(tb, "[#%016"PRIx64"]", value.int64);
×
620
      break;
×
621
   case JIT_ADDR_COVER:
×
622
      tb_printf(tb, "@%"PRIi64, value.int64);
×
623
      break;
×
624
   case JIT_VALUE_LABEL:
5✔
625
      tb_printf(tb, "%d", value.label);
5✔
626
      break;
5✔
627
   case JIT_VALUE_HANDLE:
2✔
628
      tb_printf(tb, "<%d>", value.handle);
2✔
629
      break;
2✔
630
   case JIT_VALUE_EXIT:
×
631
      tb_printf(tb, "%s", jit_exit_name(value.exit));
×
632
      break;
×
633
   case JIT_VALUE_LOC:
×
634
      tb_printf(tb, "<%s:%d>", loc_file_str(&value.loc), value.loc.first_line);
×
635
      break;
×
636
   case JIT_VALUE_FOREIGN:
×
637
      tb_printf(tb, "$%s", istr(ffi_get_sym(value.foreign)));
×
638
      break;
×
639
   case JIT_VALUE_LOCUS:
×
640
      tb_printf(tb, "%s%+d", istr(value.ident), value.disp);
×
641
      break;
×
642
   case JIT_VALUE_VPOS:
×
643
      tb_printf(tb, "%u:%u", value.vpos.block, value.vpos.op);
×
644
      break;
×
645
   default:
×
646
      tb_cat(tb, "???");
×
647
   }
648
}
332✔
649

650
static void code_blob_add_comment(code_blob_t *blob, char *text)
651
{
652
   code_debug_t *dbg = &(blob->span->debug);
653

654
   if (dbg->count == dbg->max) {
655
      dbg->max = MAX(128, dbg->max * 2);
656
      dbg->comments = xrealloc_array(dbg->comments, dbg->max,
657
                                     sizeof(code_comment_t));
658
   }
659

660
   dbg->comments[dbg->count].addr = (uintptr_t)blob->wptr;
661
   dbg->comments[dbg->count].text = text;
662
   dbg->count++;
663
}
664

665
void code_blob_print_ir(code_blob_t *blob, jit_ir_t *ir)
291✔
666
{
667
   LOCAL_TEXT_BUF tb = tb_new();
582✔
668
   tb_printf(tb, "%s%s", jit_op_name(ir->op), jit_cc_name(ir->cc));
291✔
669

670
   if (ir->size != JIT_SZ_UNSPEC)
291✔
671
      tb_printf(tb, ".%d", 1 << (3 + ir->size));
26✔
672

673
   tb_printf(tb, "%*.s", (int)MAX(0, 10 - tb_len(tb)), "");
291✔
674

675
   if (ir->result != JIT_REG_INVALID)
291✔
676
      tb_printf(tb, "R%d", ir->result);
168✔
677

678
   if (ir->arg1.kind != JIT_VALUE_INVALID) {
291✔
679
      if (ir->result != JIT_REG_INVALID)
225✔
680
         tb_cat(tb, ", ");
160✔
681
      code_blob_print_value(tb, ir->arg1);
225✔
682
   }
683

684
   if (ir->arg2.kind != JIT_VALUE_INVALID) {
291✔
685
      tb_cat(tb, ", ");
107✔
686
      code_blob_print_value(tb, ir->arg2);
107✔
687
   }
688

689
   code_blob_add_comment(blob, tb_claim(tb));
291✔
690
}
291✔
691

692
void code_blob_printf(code_blob_t *blob, const char *fmt, ...)
×
693
{
694
   code_debug_t *dbg = &(blob->span->debug);
×
695

696
   if (dbg->count == dbg->max) {
×
697
      dbg->max = MAX(128, dbg->max * 2);
×
698
      dbg->comments = xrealloc_array(dbg->comments, dbg->max,
×
699
                                     sizeof(code_comment_t));
700
   }
701

702
   va_list ap;
×
703
   va_start(ap, fmt);
×
704

705
   char *text = xvasprintf(fmt, ap);
×
706
   code_blob_add_comment(blob, text);
×
707

708
   va_end(ap);
×
709
}
×
710
#endif   // DEBUG
711

712
#ifdef ARCH_ARM64
713
static void *arm64_emit_trampoline(code_blob_t *blob, uintptr_t dest)
714
{
715
   const uint8_t veneer[] = {
716
      0x50, 0x00, 0x00, 0x58,   // LDR X16, [PC+8]
717
      0x00, 0x02, 0x1f, 0xd6,   // BR X16
718
      __IMM64(dest)
719
   };
720

721
   void *prev = memmem(blob->span->base, blob->span->size,
722
                       veneer, ARRAY_LEN(veneer));
723
   if (prev != NULL)
724
      return prev;
725
   else {
726
      void *addr = blob->wptr;
727
      code_blob_emit(blob, veneer, ARRAY_LEN(veneer));
728
      return addr;
729
   }
730
}
731
#else
732
#define arm64_emit_trampoline(blob, dest) NULL
733
#endif
734

735
#if defined __MINGW32__
736
static void code_load_pe(code_blob_t *blob, const void *data, size_t size)
737
{
738
   const IMAGE_FILE_HEADER *imghdr = data;
739

740
   if (imghdr->Machine != IMAGE_FILE_MACHINE_AMD64)
741
      fatal_trace("unknown target machine %x", imghdr->Machine);
742

743
   const IMAGE_SYMBOL *symtab = data + imghdr->PointerToSymbolTable;
744
   const char *strtab = data + imghdr->PointerToSymbolTable
745
      + imghdr->NumberOfSymbols * sizeof(IMAGE_SYMBOL);
746

747
   const IMAGE_SECTION_HEADER *sections =
748
      data + IMAGE_SIZEOF_FILE_HEADER + imghdr->SizeOfOptionalHeader;
749

750
   void **load_addr LOCAL =
751
      xmalloc_array(imghdr->NumberOfSections, sizeof(void *));
752

753
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
754
      if ((sections[i].Characteristics & IMAGE_SCN_CNT_CODE)
755
          || (sections[i].Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)) {
756
         const int align = sections[i].Characteristics & IMAGE_SCN_ALIGN_MASK;
757
         code_blob_align(blob, 1 << ((align >> 20) - 1));
758
         load_addr[i] = blob->wptr;
759
         code_blob_emit(blob, data + sections[i].PointerToRawData,
760
                        sections[i].SizeOfRawData);
761
      }
762
      else if ((sections[i].Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
763
               && sections[i].Misc.VirtualSize > 0)
764
         fatal_trace("non-empty BSS not supported");
765
   }
766

767
   if (blob->overflow)
768
      return;   // Relocations might point outside of code span
769

770
   for (int i = 0; i < imghdr->NumberOfSections; i++) {
771
      const IMAGE_RELOCATION *relocs = data + sections[i].PointerToRelocations;
772
      for (int j = 0; j < sections[i].NumberOfRelocations; j++) {
773
         const char *name = NULL;
774
         char tmp[9];
775

776
         assert(relocs[j].SymbolTableIndex < imghdr->NumberOfSymbols);
777
         const IMAGE_SYMBOL *sym = symtab + relocs[j].SymbolTableIndex;
778

779
         if (sym->N.Name.Short) {
780
            memcpy(tmp, sym->N.ShortName, 8);
781
            tmp[8] = '\0';
782
            name = tmp;
783
         }
784
         else
785
            name = strtab + sym->N.Name.Long;
786

787
         void *ptr = NULL;
788
         if (sym->SectionNumber > 0) {
789
            assert(sym->SectionNumber - 1 < imghdr->NumberOfSections);
790
            ptr = load_addr[sym->SectionNumber - 1];
791
         }
792
         else if (strcmp(name, "___chkstk_ms") == 0) {
793
            extern void ___chkstk_ms(void);
794
            ptr = &___chkstk_ms;
795
         }
796
         else
797
            ptr = ffi_find_symbol(NULL, name);
798

799
         if (ptr == NULL)
800
            fatal_trace("failed to resolve symbol %s", name);
801

802
         void *patch = load_addr[i] + relocs[j].VirtualAddress;
803
         assert((uint8_t *)patch >= blob->span->base);
804
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
805

806
         switch (relocs[j].Type) {
807
         case IMAGE_REL_AMD64_ADDR64:
808
            *(uint64_t *)patch += (uint64_t)ptr;
809
            break;
810
         case IMAGE_REL_AMD64_ADDR32NB:
811
            *(uint32_t *)patch += (uint32_t)(ptr - (void *)blob->span->base);
812
            break;
813
         default:
814
            blob->span->size = blob->wptr - blob->span->base;
815
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
816
            fatal_trace("cannot handle relocation type %d for symbol %s",
817
                        relocs[j].Type, name);
818
         }
819
      }
820

821
      if (strncmp((const char *)sections[i].Name, ".pdata",
822
                  IMAGE_SIZEOF_SHORT_NAME) == 0) {
823
         assert(sections[i].SizeOfRawData % sizeof(RUNTIME_FUNCTION) == 0);
824
         const int count = sections[i].SizeOfRawData / sizeof(RUNTIME_FUNCTION);
825
         const DWORD64 base = (DWORD64)blob->span->base;
826

827
         // TODO: we should also call RtlDeleteFunctionTable at some point
828
         if (!RtlAddFunctionTable(load_addr[i], count, base))
829
            fatal_trace("RtlAddFunctionTable failed: %s", last_os_error());
830
      }
831
   }
832
}
833
#elif defined __APPLE__
834
static void code_load_macho(code_blob_t *blob, const void *data, size_t size)
835
{
836
   const void *rptr = data;
837

838
   const struct mach_header_64 *fhdr = rptr;
839
   rptr += sizeof(struct mach_header_64);
840

841
   if (fhdr->magic != MH_MAGIC_64)
842
      fatal_trace("bad Mach-O magic %x", fhdr->magic);
843

844
   const struct segment_command_64 *seg = NULL;
845
   const struct symtab_command *symtab = NULL;
846

847
   void **load_addr LOCAL = NULL;
848

849
   for (int i = 0; i < fhdr->ncmds; i++) {
850
      const struct load_command *load = rptr;
851
      switch (load->cmd) {
852
      case LC_SEGMENT_64:
853
         {
854
            seg = rptr;
855
            load_addr = xmalloc_array(seg->nsects, sizeof(void *));
856

857
            for (int j = 0; j < seg->nsects; j++) {
858
               const struct section_64 *sec =
859
                  (void *)seg + sizeof(struct segment_command_64)
860
                  + j * sizeof(struct section_64);
861
               code_blob_align(blob, 1 << sec->align);
862
               load_addr[j] = blob->wptr;
863
               code_blob_emit(blob, data + sec->offset, sec->size);
864
            }
865
         }
866
         break;
867
      case LC_SYMTAB:
868
         symtab = rptr;
869
         assert(symtab->cmdsize == sizeof(struct symtab_command));
870
         break;
871
      case LC_DATA_IN_CODE:
872
      case LC_LINKER_OPTIMIZATION_HINT:
873
      case LC_BUILD_VERSION:
874
      case LC_DYSYMTAB:
875
         break;
876
      default:
877
         warnf("unrecognised load command 0x%0x", load->cmd);
878
      }
879

880
      rptr += load->cmdsize;
881
   }
882
   assert(rptr == data + sizeof(struct mach_header_64) + fhdr->sizeofcmds);
883

884
   if (blob->overflow)
885
      return;   // Relocations might point outside of code span
886

887
   assert(seg != NULL);
888
   assert(symtab != NULL);
889

890
   for (int i = 0; i < seg->nsects; i++) {
891
      const struct section_64 *sec =
892
         (void *)seg + sizeof(struct segment_command_64)
893
         + i * sizeof(struct section_64);
894

895
      uint32_t addend = 0;
896
      for (int j = 0; j < sec->nreloc; j++) {
897
         const struct relocation_info *rel =
898
            data + sec->reloff + j * sizeof(struct relocation_info);
899
         const char *name = NULL;
900
         void *ptr = NULL;
901
         if (rel->r_extern) {
902
            assert(rel->r_symbolnum < symtab->nsyms);
903
            const struct nlist_64 *nl = data + symtab->symoff
904
               + rel->r_symbolnum * sizeof(struct nlist_64);
905
            name = data + symtab->stroff + nl->n_un.n_strx;
906

907
            if (nl->n_type & N_EXT) {
908
               if (icmp(blob->span->name, name + 1))
909
                  ptr = blob->span->base;
910
               else if ((ptr = ffi_find_symbol(NULL, name + 1)) == NULL)
911
                  fatal_trace("failed to resolve symbol %s", name + 1);
912
            }
913
            else if (nl->n_sect != NO_SECT)
914
               ptr = blob->span->base + nl->n_value;
915
         }
916
         else
917
            ptr = blob->span->base;
918

919
         ptr += addend;
920
         addend = 0;
921

922
         void *patch = load_addr[i] + rel->r_address;
923
         assert((uint8_t *)patch >= blob->span->base);
924
         assert((uint8_t *)patch < blob->span->base + blob->span->size);
925

926
         switch (rel->r_type) {
927
#ifdef ARCH_ARM64
928
         case ARM64_RELOC_UNSIGNED:
929
            assert(rel->r_length == 3);
930
            *(void **)patch = ptr;
931
            break;
932
         case ARM64_RELOC_SUBTRACTOR:
933
            break;   // What is this?
934
         case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
935
         case ARM64_RELOC_PAGEOFF12:
936
            switch ((*(uint32_t *)patch >> 23) & 0x7f) {
937
            case 0b1111010:   // LDR (immediate, SIMD&FP)
938
            case 0b1110010:   // LDR (immediate)
939
               assert(*(uint32_t *)patch & (1 << 30));  // Quadword
940
               assert(((uintptr_t)ptr & 7) == 0);
941
               *(uint32_t *)patch |= (((uintptr_t)ptr & 0xfff) >> 3) << 10;
942
               break;
943
            case 0b0100010:   // ADD (immediate)
944
               *(uint32_t *)patch |= ((uintptr_t)ptr & 0xfff) << 10;
945
               break;
946
            default:
947
               blob->span->size = blob->wptr - blob->span->base;
948
               code_disassemble(blob->span, (uintptr_t)patch, NULL);
949
               fatal_trace("cannot patch instruction");
950
            }
951
            break;
952
         case ARM64_RELOC_GOT_LOAD_PAGE21:
953
         case ARM64_RELOC_PAGE21:
954
            {
955
               const intptr_t dst_page = (intptr_t)ptr & ~UINT64_C(0xfff);
956
               const intptr_t src_page = (intptr_t)patch & ~UINT64_C(0xfff);
957
               const intptr_t upper21 = (dst_page - src_page) >> 12;
958
               *(uint32_t *)patch |= (upper21 & 3) << 29;
959
               *(uint32_t *)patch |= ((upper21 >> 2) & 0x7ffff) << 5;
960
            }
961
            break;
962
         case ARM64_RELOC_BRANCH26:
963
            {
964
               void *veneer = arm64_emit_trampoline(blob, (uintptr_t)ptr);
965
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
966
               *(uint32_t *)patch &= ~0x3ffffff;
967
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
968
            }
969
            break;
970
         case ARM64_RELOC_ADDEND:
971
            addend = rel->r_symbolnum;
972
            break;
973
#elif defined ARCH_X86_64
974
         case X86_64_RELOC_UNSIGNED:
975
            *(uint64_t *)patch += (uint64_t)ptr;
976
            break;
977
         case X86_64_RELOC_BRANCH:
978
            *(uint32_t *)patch += (uint32_t)(ptr - patch - 4);
979
            break;
980
#endif
981
         default:
982
            blob->span->size = blob->wptr - blob->span->base;
983
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
984
            fatal_trace("cannot handle relocation type %d for symbol %s",
985
                        rel->r_type, name);
986
         }
987
      }
988
   }
989
}
990
#elif !defined __MINGW32__
991
static void code_load_elf(code_blob_t *blob, const void *data, size_t size)
992
{
993
   const Elf64_Ehdr *ehdr = data;
994

995
   if (ehdr->e_ident[EI_MAG0] != ELFMAG0
996
       || ehdr->e_ident[EI_MAG1] != ELFMAG1
997
       || ehdr->e_ident[EI_MAG2] != ELFMAG2
998
       || ehdr->e_ident[EI_MAG3] != ELFMAG3)
999
      fatal_trace("bad ELF magic");
1000
   else if (ehdr->e_shentsize != sizeof(Elf64_Shdr))
1001
      fatal_trace("bad section header size %d != %zu", ehdr->e_shentsize,
1002
                  sizeof(Elf64_Shdr));
1003

1004
   const Elf64_Shdr *strtab_hdr =
1005
      data + ehdr->e_shoff + ehdr->e_shstrndx * ehdr->e_shentsize;
1006
   const char *strtab = data + strtab_hdr->sh_offset;
1007

1008
   void **load_addr LOCAL = xcalloc_array(ehdr->e_shnum, sizeof(void *));
1009

1010
   for (int i = 0; i < ehdr->e_shnum; i++) {
1011
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
1012

1013
      switch (shdr->sh_type) {
1014
      case SHT_PROGBITS:
1015
         if (shdr->sh_flags & SHF_ALLOC) {
1016
            code_blob_align(blob, shdr->sh_addralign);
1017
            load_addr[i] = blob->wptr;
1018
            code_blob_emit(blob, data + shdr->sh_offset, shdr->sh_size);
1019
         }
1020
         break;
1021

1022
      case SHT_RELA:
1023
         // Handled in second pass
1024
         break;
1025

1026
      case SHT_NULL:
1027
      case SHT_STRTAB:
1028
      case SHT_X86_64_UNWIND:
1029
      case SHT_SYMTAB:
1030
         break;
1031

1032
      default:
1033
         warnf("ignoring ELF section %s with type %x", strtab + shdr->sh_name,
1034
               shdr->sh_type);
1035
      }
1036
   }
1037

1038
   if (blob->overflow)
1039
      return;   // Relocations might point outside of code span
1040

1041
   for (int i = 0; i < ehdr->e_shnum; i++) {
1042
      const Elf64_Shdr *shdr = data + ehdr->e_shoff + i * ehdr->e_shentsize;
1043
      if (shdr->sh_type != SHT_RELA)
1044
         continue;
1045

1046
      const Elf64_Shdr *mod =
1047
         data + ehdr->e_shoff + shdr->sh_info * ehdr->e_shentsize;
1048
      if (mod->sh_type != SHT_PROGBITS || !(mod->sh_flags & SHF_ALLOC))
1049
         continue;
1050
      else if (load_addr[shdr->sh_info] == NULL)
1051
         fatal_trace("section %s not loaded", strtab + mod->sh_name);
1052

1053
      const Elf64_Shdr *symtab =
1054
         data + ehdr->e_shoff + shdr->sh_link * ehdr->e_shentsize;
1055
      if (symtab->sh_type != SHT_SYMTAB)
1056
         fatal_trace("section %s is not a symbol table",
1057
                     strtab + symtab->sh_name);
1058

1059
      const Elf64_Rela *endp = data + shdr->sh_offset + shdr->sh_size;
1060
      for (const Elf64_Rela *r = data + shdr->sh_offset; r < endp; r++) {
1061
         const Elf64_Sym *sym = data + symtab->sh_offset
1062
            + ELF64_R_SYM(r->r_info) * symtab->sh_entsize;
1063

1064
         char *ptr = NULL;
1065
         switch (ELF64_ST_TYPE(sym->st_info)) {
1066
         case STT_NOTYPE:
1067
         case STT_FUNC:
1068
            ptr = ffi_find_symbol(NULL, strtab + sym->st_name);
1069
            break;
1070
         case STT_SECTION:
1071
            ptr = load_addr[sym->st_shndx];
1072
            break;
1073
         }
1074

1075
         if (ptr == NULL)
1076
            fatal_trace("cannot resolve symbol %s type %d",
1077
                        strtab + sym->st_name, ELF64_ST_TYPE(sym->st_info));
1078

1079
         ptr += r->r_addend;
1080

1081
         void *patch = load_addr[shdr->sh_info] + r->r_offset;
1082
         assert(r->r_offset < mod->sh_size);
1083

1084
         switch (ELF64_R_TYPE(r->r_info)) {
1085
         case R_X86_64_64:
1086
            *(uint64_t *)patch = (uint64_t)ptr;
1087
            break;
1088
         case R_AARCH64_CALL26:
1089
            {
1090
               void *veneer = arm64_emit_trampoline(blob, (uintptr_t)ptr);
1091
               const ptrdiff_t pcrel = (veneer - patch) >> 2;
1092
               *(uint32_t *)patch &= ~0x3ffffff;
1093
               *(uint32_t *)patch |= pcrel & 0x3ffffff;
1094
            }
1095
            break;
1096
         case R_AARCH64_PREL64:
1097
            *(uint64_t *)patch = ptr - (char *)patch;
1098
            break;
1099
         case R_AARCH64_MOVW_UABS_G0_NC:
1100
            *(uint32_t *)patch |= ((uintptr_t)ptr & 0xffff) << 5;
1101
            break;
1102
         case R_AARCH64_MOVW_UABS_G1_NC:
1103
            *(uint32_t *)patch |= (((uintptr_t)ptr >> 16) & 0xffff) << 5;
1104
            break;
1105
         case R_AARCH64_MOVW_UABS_G2_NC:
1106
            *(uint32_t *)patch |= (((uintptr_t)ptr >> 32) & 0xffff) << 5;
1107
            break;
1108
         case R_AARCH64_MOVW_UABS_G3:
1109
            *(uint32_t *)patch |= (((uintptr_t)ptr >> 48) & 0xffff) << 5;
1110
            break;
1111
         default:
1112
            blob->span->size = blob->wptr - blob->span->base;
1113
            code_disassemble(blob->span, (uintptr_t)patch, NULL);
1114
            fatal_trace("cannot handle relocation type %ld for symbol %s",
1115
                        ELF64_R_TYPE(r->r_info), strtab + sym->st_name);
1116
         }
1117
      }
1118
   }
1119
}
1120
#endif
1121

1122
void code_load_object(code_blob_t *blob, const void *data, size_t size)
5,687✔
1123
{
1124
#if defined __APPLE__
1125
   code_load_macho(blob, data, size);
1126
#elif defined __MINGW32__
1127
   code_load_pe(blob, data, size);
1128
#else
1129
   code_load_elf(blob, data, size);
5,687✔
1130
#endif
1131
}
5,687✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc