• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / luajit / 6145668764

11 Sep 2023 11:17AM UTC coverage: 88.224% (-0.04%) from 88.259%
6145668764

push

github

fckxorg
Fix frame for more types of on-trace error messages.

Thanks to Maxim Kokryashkin.

(cherry-picked from commit d5bbf9cdb)

This patch fixes the same issue with frame, as the previous
one, but now for the table overflow error in the `err_msgv`
function. The test for the problem uses the table of GC
finalizers, although they are not required to reproduce the
issue. They only used to make the test as simple as possible.

Resolves tarantool/tarantool#562
Part of tarantool/tarantool#8825

5339 of 5971 branches covered (0.0%)

Branch coverage included in aggregate %.

3 of 3 new or added lines in 1 file covered. (100.0%)

20479 of 23293 relevant lines covered (87.92%)

2738247.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.92
/src/lj_asm.c
1
/*
2
** IR assembler (SSA IR -> machine code).
3
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
*/
5

6
#define lj_asm_c
7
#define LUA_CORE
8

9
#include "lj_obj.h"
10

11
#if LJ_HASJIT
12

13
#include "lj_gc.h"
14
#include "lj_str.h"
15
#include "lj_tab.h"
16
#include "lj_frame.h"
17
#if LJ_HASFFI
18
#include "lj_ctype.h"
19
#endif
20
#include "lj_ir.h"
21
#include "lj_jit.h"
22
#include "lj_ircall.h"
23
#include "lj_iropt.h"
24
#include "lj_mcode.h"
25
#include "lj_iropt.h"
26
#include "lj_trace.h"
27
#include "lj_snap.h"
28
#include "lj_asm.h"
29
#include "lj_dispatch.h"
30
#include "lj_vm.h"
31
#include "lj_target.h"
32

33
#ifdef LUA_USE_ASSERT
34
#include <stdio.h>
35
#endif
36

37
/* -- Assembler state and common macros ----------------------------------- */
38

39
/* Assembler state. */
40
typedef struct ASMState {
41
  RegCost cost[RID_MAX];  /* Reference and blended allocation cost for regs. */
42

43
  MCode *mcp;                /* Current MCode pointer (grows down). */
44
  MCode *mclim;                /* Lower limit for MCode memory + red zone. */
45
#ifdef LUA_USE_ASSERT
46
  MCode *mcp_prev;        /* Red zone overflow check. */
47
#endif
48

49
  IRIns *ir;                /* Copy of pointer to IR instructions/constants. */
50
  jit_State *J;                /* JIT compiler state. */
51

52
#if LJ_TARGET_X86ORX64
53
  x86ModRM mrm;                /* Fused x86 address operand. */
54
#endif
55

56
  RegSet freeset;        /* Set of free registers. */
57
  RegSet modset;        /* Set of registers modified inside the loop. */
58
  RegSet weakset;        /* Set of weakly referenced registers. */
59
  RegSet phiset;        /* Set of PHI registers. */
60

61
  uint32_t flags;        /* Copy of JIT compiler flags. */
62
  int loopinv;                /* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */
63

64
  int32_t evenspill;        /* Next even spill slot. */
65
  int32_t oddspill;        /* Next odd spill slot (or 0). */
66

67
  IRRef curins;                /* Reference of current instruction. */
68
  IRRef stopins;        /* Stop assembly before hitting this instruction. */
69
  IRRef orignins;        /* Original T->nins. */
70

71
  IRRef snapref;        /* Current snapshot is active after this reference. */
72
  IRRef snaprename;        /* Rename highwater mark for snapshot check. */
73
  SnapNo snapno;        /* Current snapshot number. */
74
  SnapNo loopsnapno;        /* Loop snapshot number. */
75
  BloomFilter snapfilt1, snapfilt2;        /* Filled with snapshot refs. */
76
  int snapalloc;        /* Current snapshot needs allocation. */
77

78
  IRRef fuseref;        /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
79
  IRRef sectref;        /* Section base reference (loopref or 0). */
80
  IRRef loopref;        /* Reference of LOOP instruction (or 0). */
81

82
  BCReg topslot;        /* Number of slots for stack check (unless 0). */
83
  int32_t gcsteps;        /* Accumulated number of GC steps (per section). */
84

85
  GCtrace *T;                /* Trace to assemble. */
86
  GCtrace *parent;        /* Parent trace (or NULL). */
87

88
  MCode *mcbot;                /* Bottom of reserved MCode. */
89
  MCode *mctop;                /* Top of generated MCode. */
90
  MCode *mctoporig;        /* Original top of generated MCode. */
91
  MCode *mcloop;        /* Pointer to loop MCode (or NULL). */
92
  MCode *invmcp;        /* Points to invertible loop branch (or NULL). */
93
  MCode *flagmcp;        /* Pending opportunity to merge flag setting ins. */
94
  MCode *realign;        /* Realign loop if not NULL. */
95

96
#ifdef RID_NUM_KREF
97
  intptr_t krefk[RID_NUM_KREF];
98
#endif
99
  IRRef1 phireg[RID_MAX];  /* PHI register references. */
100
  uint16_t parentmap[LJ_MAX_JSLOTS];  /* Parent instruction to RegSP map. */
101
} ASMState;
102

103
#ifdef LUA_USE_ASSERT
104
#define lj_assertA(c, ...)        lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
105
#else
106
#define lj_assertA(c, ...)        ((void)as)
107
#endif
108

109
#define IR(ref)                        (&as->ir[(ref)])
110

111
#define ASMREF_TMP1                REF_TRUE        /* Temp. register. */
112
#define ASMREF_TMP2                REF_FALSE        /* Temp. register. */
113
#define ASMREF_L                REF_NIL                /* Stores register for L. */
114

115
/* Check for variant to invariant references. */
116
#define iscrossref(as, ref)        ((ref) < as->sectref)
117

118
/* Inhibit memory op fusion from variant to invariant references. */
119
#define FUSE_DISABLED                (~(IRRef)0)
120
#define mayfuse(as, ref)        ((ref) > as->fuseref)
121
#define neverfuse(as)                (as->fuseref == FUSE_DISABLED)
122
#define canfuse(as, ir)                (!neverfuse(as) && !irt_isphi((ir)->t))
123
#define opisfusableload(o) \
124
  ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \
125
   (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD)
126

127
/* Sparse limit checks using a red zone before the actual limit. */
128
#define MCLIM_REDZONE        64
129

130
static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
7✔
131
{
132
  lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
7✔
133
}
134

135
static LJ_AINLINE void checkmclim(ASMState *as)
85,500✔
136
{
137
#ifdef LUA_USE_ASSERT
138
  if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
139
    IRIns *ir = IR(as->curins+1);
140
    lj_assertA(0, "red zone overflow: %p IR %04d  %02d %04d %04d\n", as->mcp,
141
      as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
142
  }
143
#endif
144
  if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
1,043✔
145
#ifdef LUA_USE_ASSERT
146
  as->mcp_prev = as->mcp;
147
#endif
148
}
149

150
#ifdef RID_NUM_KREF
151
#define ra_iskref(ref)                ((ref) < RID_NUM_KREF)
152
#define ra_krefreg(ref)                ((Reg)(RID_MIN_KREF + (Reg)(ref)))
153
#define ra_krefk(as, ref)        (as->krefk[(ref)])
154

155
static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
156
{
157
  IRRef ref = (IRRef)(r - RID_MIN_KREF);
158
  as->krefk[ref] = k;
159
  as->cost[r] = REGCOST(ref, ref);
160
}
161

162
#else
163
#define ra_iskref(ref)                0
164
#define ra_krefreg(ref)                RID_MIN_GPR
165
#define ra_krefk(as, ref)        0
166
#endif
167

168
/* Arch-specific field offsets. */
169
static const uint8_t field_ofs[IRFL__MAX+1] = {
170
#define FLOFS(name, ofs)        (uint8_t)(ofs),
171
IRFLDEF(FLOFS)
172
#undef FLOFS
173
  0
174
};
175

176
/* -- Target-specific instruction emitter --------------------------------- */
177

178
#if LJ_TARGET_X86ORX64
179
#include "lj_emit_x86.h"
180
#elif LJ_TARGET_ARM
181
#include "lj_emit_arm.h"
182
#elif LJ_TARGET_ARM64
183
#include "lj_emit_arm64.h"
184
#elif LJ_TARGET_PPC
185
#include "lj_emit_ppc.h"
186
#elif LJ_TARGET_MIPS
187
#include "lj_emit_mips.h"
188
#else
189
#error "Missing instruction emitter for target CPU"
190
#endif
191

192
/* Generic load/store of register from/to stack slot. */
193
#define emit_spload(as, ir, r, ofs) \
194
  emit_loadofs(as, ir, (r), RID_SP, (ofs))
195
#define emit_spstore(as, ir, r, ofs) \
196
  emit_storeofs(as, ir, (r), RID_SP, (ofs))
197

198
/* -- Register allocator debugging ---------------------------------------- */
199

200
/* #define LUAJIT_DEBUG_RA */
201

202
#ifdef LUAJIT_DEBUG_RA
203

204
#include <stdio.h>
205
#include <stdarg.h>
206

207
#define RIDNAME(name)        #name,
208
static const char *const ra_regname[] = {
209
  GPRDEF(RIDNAME)
210
  FPRDEF(RIDNAME)
211
  VRIDDEF(RIDNAME)
212
  NULL
213
};
214
#undef RIDNAME
215

216
static char ra_dbg_buf[65536];
217
static char *ra_dbg_p;
218
static char *ra_dbg_merge;
219
static MCode *ra_dbg_mcp;
220

221
static void ra_dstart(void)
222
{
223
  ra_dbg_p = ra_dbg_buf;
224
  ra_dbg_merge = NULL;
225
  ra_dbg_mcp = NULL;
226
}
227

228
static void ra_dflush(void)
229
{
230
  fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout);
231
  ra_dstart();
232
}
233

234
static void ra_dprintf(ASMState *as, const char *fmt, ...)
235
{
236
  char *p;
237
  va_list argp;
238
  va_start(argp, fmt);
239
  p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p;
240
  ra_dbg_mcp = NULL;
241
  p += sprintf(p, "%08x  \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS);
242
  for (;;) {
243
    const char *e = strchr(fmt, '$');
244
    if (e == NULL) break;
245
    memcpy(p, fmt, (size_t)(e-fmt));
246
    p += e-fmt;
247
    if (e[1] == 'r') {
248
      Reg r = va_arg(argp, Reg) & RID_MASK;
249
      if (r <= RID_MAX) {
250
        const char *q;
251
        for (q = ra_regname[r]; *q; q++)
252
          *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
253
      } else {
254
        *p++ = '?';
255
        lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
256
      }
257
    } else if (e[1] == 'f' || e[1] == 'i') {
258
      IRRef ref;
259
      if (e[1] == 'f')
260
        ref = va_arg(argp, IRRef);
261
      else
262
        ref = va_arg(argp, IRIns *) - as->ir;
263
      if (ref >= REF_BIAS)
264
        p += sprintf(p, "%04d", ref - REF_BIAS);
265
      else
266
        p += sprintf(p, "K%03d", REF_BIAS - ref);
267
    } else if (e[1] == 's') {
268
      uint32_t slot = va_arg(argp, uint32_t);
269
      p += sprintf(p, "[sp+0x%x]", sps_scale(slot));
270
    } else if (e[1] == 'x') {
271
      p += sprintf(p, "%08x", va_arg(argp, int32_t));
272
    } else {
273
      lj_assertA(0, "bad debug format code");
274
    }
275
    fmt = e+2;
276
  }
277
  va_end(argp);
278
  while (*fmt)
279
    *p++ = *fmt++;
280
  *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n';
281
  if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) {
282
    fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout);
283
    p = ra_dbg_buf;
284
  }
285
  ra_dbg_p = p;
286
}
287

288
#define RA_DBG_START()        ra_dstart()
289
#define RA_DBG_FLUSH()        ra_dflush()
290
#define RA_DBG_REF() \
291
  do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \
292
       ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0)
293
#define RA_DBGX(x)        ra_dprintf x
294

295
#else
296
#define RA_DBG_START()        ((void)0)
297
#define RA_DBG_FLUSH()        ((void)0)
298
#define RA_DBG_REF()        ((void)0)
299
#define RA_DBGX(x)        ((void)0)
300
#endif
301

302
/* -- Register allocator -------------------------------------------------- */
303

304
#define ra_free(as, r)                rset_set(as->freeset, (r))
305
#define ra_modified(as, r)        rset_set(as->modset, (r))
306
#define ra_weak(as, r)                rset_set(as->weakset, (r))
307
#define ra_noweak(as, r)        rset_clear(as->weakset, (r))
308

309
#define ra_used(ir)                (ra_hasreg((ir)->r) || ra_hasspill((ir)->s))
310

311
/* Setup register allocator. */
312
static void ra_setup(ASMState *as)
3,430✔
313
{
314
  Reg r;
3,430✔
315
  /* Initially all regs (except the stack pointer) are free for use. */
316
  as->freeset = RSET_INIT;
3,430✔
317
  as->modset = RSET_EMPTY;
3,430✔
318
  as->weakset = RSET_EMPTY;
3,430✔
319
  as->phiset = RSET_EMPTY;
3,430✔
320
  memset(as->phireg, 0, sizeof(as->phireg));
3,430✔
321
  for (r = RID_MIN_GPR; r < RID_MAX; r++)
113,190✔
322
    as->cost[r] = REGCOST(~0u, 0u);
109,760✔
323
}
3,430✔
324

325
/* Rematerialize constants. */
326
static Reg ra_rematk(ASMState *as, IRRef ref)
6,631✔
327
{
328
  IRIns *ir;
6,631✔
329
  Reg r;
6,631✔
330
  if (ra_iskref(ref)) {
6,631✔
331
    r = ra_krefreg(ref);
332
    lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
333
    ra_free(as, r);
334
    ra_modified(as, r);
335
#if LJ_64
336
    emit_loadu64(as, r, ra_krefk(as, ref));
337
#else
338
    emit_loadi(as, r, ra_krefk(as, ref));
339
#endif
340
    return r;
341
  }
342
  ir = IR(ref);
6,631✔
343
  r = ir->r;
6,631✔
344
  lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
6,631✔
345
  lj_assertA(!ra_hasspill(ir->s),
6,631✔
346
             "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
347
  ra_free(as, r);
6,631✔
348
  ra_modified(as, r);
6,631✔
349
  ir->r = RID_INIT;  /* Do not keep any hint. */
6,631✔
350
  RA_DBGX((as, "remat     $i $r", ir, r));
6,631✔
351
#if !LJ_SOFTFP32
352
  if (ir->o == IR_KNUM) {
6,631✔
353
    emit_loadk64(as, r, ir);
2,457✔
354
  } else
355
#endif
356
  if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
4,174✔
357
    ra_sethint(ir->r, RID_BASE);  /* Restore BASE register hint. */
1,177✔
358
    emit_getgl(as, r, jit_base);
1,177✔
359
  } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
2,997✔
360
    /* REF_NIL stores ASMREF_L register. */
361
    lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
864✔
362
    emit_getgl(as, r, cur_L);
864✔
363
#if LJ_64
364
  } else if (ir->o == IR_KINT64) {
2,133✔
365
    emit_loadu64(as, r, ir_kint64(ir)->u64);
34✔
366
#if LJ_GC64
367
  } else if (ir->o == IR_KGC) {
2,099✔
368
    emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
2,024✔
369
  } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
75✔
370
    emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
23✔
371
#endif
372
#endif
373
  } else {
374
    lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
52✔
375
               ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
376
               "rematk of bad IR op %d", ir->o);
377
    emit_loadi(as, r, ir->i);
52✔
378
  }
379
  return r;
6,631✔
380
}
381

382
/* Force a spill. Allocate a new spill slot if needed. */
383
static int32_t ra_spill(ASMState *as, IRIns *ir)
384
{
385
  int32_t slot = ir->s;
386
  lj_assertA(ir >= as->ir + REF_TRUE,
387
             "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
388
  if (!ra_hasspill(slot)) {
389
    if (irt_is64(ir->t)) {
390
      slot = as->evenspill;
391
      as->evenspill += 2;
392
    } else if (as->oddspill) {
393
      slot = as->oddspill;
394
      as->oddspill = 0;
395
    } else {
396
      slot = as->evenspill;
397
      as->oddspill = slot+1;
398
      as->evenspill += 2;
399
    }
400
    if (as->evenspill > 256)
401
      lj_trace_err(as->J, LJ_TRERR_SPILLOV);
402
    ir->s = (uint8_t)slot;
403
  }
404
  return sps_scale(slot);
405
}
406

407
/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
408
static Reg ra_releasetmp(ASMState *as, IRRef ref)
2,165✔
409
{
410
  IRIns *ir = IR(ref);
2,165✔
411
  Reg r = ir->r;
2,165✔
412
  lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
2,165✔
413
  lj_assertA(!ra_hasspill(ir->s),
2,165✔
414
             "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
415
  ra_free(as, r);
2,165✔
416
  ra_modified(as, r);
2,165✔
417
  ir->r = RID_INIT;
2,165✔
418
  return r;
818✔
419
}
420

421
/* Restore a register (marked as free). Rematerialize or force a spill. */
422
static Reg ra_restore(ASMState *as, IRRef ref)
5,623✔
423
{
424
  if (emit_canremat(ref)) {
5,623✔
425
    return ra_rematk(as, ref);
2,935✔
426
  } else {
427
    IRIns *ir = IR(ref);
2,688✔
428
    int32_t ofs = ra_spill(as, ir);  /* Force a spill slot. */
2,688✔
429
    Reg r = ir->r;
2,688✔
430
    lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS);
2,688✔
431
    ra_sethint(ir->r, r);  /* Keep hint. */
2,688✔
432
    ra_free(as, r);
2,688✔
433
    if (!rset_test(as->weakset, r)) {  /* Only restore non-weak references. */
2,688✔
434
      ra_modified(as, r);
2,287✔
435
      RA_DBGX((as, "restore   $i $r", ir, r));
2,287✔
436
      emit_spload(as, ir, r, ofs);
2,287✔
437
    }
438
    return r;
2,688✔
439
  }
440
}
441

442
/* Save a register to a spill slot. */
443
static void ra_save(ASMState *as, IRIns *ir, Reg r)
2,433✔
444
{
445
  RA_DBGX((as, "save      $i $r", ir, r));
2,433✔
446
  emit_spstore(as, ir, r, sps_scale(ir->s));
2,433✔
447
}
2,227✔
448

449
#define MINCOST(name) \
450
  if (rset_test(RSET_ALL, RID_##name) && \
451
      LJ_LIKELY(allow&RID2RSET(RID_##name)) && as->cost[RID_##name] < cost) \
452
    cost = as->cost[RID_##name];
453

454
/* Evict the register with the lowest cost, forcing a restore. */
455
static Reg ra_evict(ASMState *as, RegSet allow)
120✔
456
{
457
  IRRef ref;
120✔
458
  RegCost cost = ~(RegCost)0;
120✔
459
  lj_assertA(allow != RSET_EMPTY, "evict from empty set");
120✔
460
  if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
120✔
461
    GPRDEF(MINCOST)
16✔
462
  } else {
463
    FPRDEF(MINCOST)
104✔
464
  }
465
  ref = regcost_ref(cost);
120✔
466
  lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins),
120✔
467
             "evict of out-of-range IR %04d", ref - REF_BIAS);
468
  /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
469
  if (!irref_isk(ref) && (as->weakset & allow)) {
120✔
470
    IRIns *ir = IR(ref);
11✔
471
    if (!rset_test(as->weakset, ir->r))
11✔
472
      ref = regcost_ref(as->cost[rset_pickbot((as->weakset & allow))]);
7✔
473
  }
474
  return ra_restore(as, ref);
120✔
475
}
476

477
/* Pick any register (marked as free). Evict on-demand. */
478
static Reg ra_pick(ASMState *as, RegSet allow)
7,334✔
479
{
480
  RegSet pick = as->freeset & allow;
7,334✔
481
  if (!pick)
7,334✔
482
    return ra_evict(as, allow);
8✔
483
  else
484
    return rset_picktop(pick);
7,326✔
485
}
486

487
/* Get a scratch register (marked as free). */
488
static Reg ra_scratch(ASMState *as, RegSet allow)
7,284✔
489
{
490
  Reg r = ra_pick(as, allow);
7,284✔
491
  ra_modified(as, r);
7,284✔
492
  RA_DBGX((as, "scratch        $r", r));
7,284✔
493
  return r;
7,284✔
494
}
495

496
/* Evict all registers from a set (if not free). */
497
static void ra_evictset(ASMState *as, RegSet drop)
4,528✔
498
{
499
  RegSet work;
4,528✔
500
  as->modset |= drop;
4,528✔
501
#if !LJ_SOFTFP
502
  work = (drop & ~as->freeset) & RSET_FPR;
4,528✔
503
  while (work) {
4,528✔
504
    Reg r = rset_pickbot(work);
1,405✔
505
    ra_restore(as, regcost_ref(as->cost[r]));
1,405✔
506
    rset_clear(work, r);
1,405✔
507
    checkmclim(as);
5,933✔
508
  }
509
#endif
510
  work = (drop & ~as->freeset);
4,528✔
511
  while (work) {
4,528✔
512
    Reg r = rset_pickbot(work);
3,857✔
513
    ra_restore(as, regcost_ref(as->cost[r]));
3,857✔
514
    rset_clear(work, r);
3,857✔
515
    checkmclim(as);
8,385✔
516
  }
517
}
4,528✔
518

519
/* Evict (rematerialize) all registers allocated to constants. */
520
static void ra_evictk(ASMState *as)
2,320✔
521
{
522
  RegSet work;
2,320✔
523
#if !LJ_SOFTFP
524
  work = ~as->freeset & RSET_FPR;
2,320✔
525
  while (work) {
4,436✔
526
    Reg r = rset_pickbot(work);
2,116✔
527
    IRRef ref = regcost_ref(as->cost[r]);
2,116✔
528
    if (emit_canremat(ref) && irref_isk(ref)) {
2,116✔
529
      ra_rematk(as, ref);
1,767✔
530
      checkmclim(as);
1,767✔
531
    }
532
    rset_clear(work, r);
2,116✔
533
  }
534
#endif
535
  work = ~as->freeset & RSET_GPR;
2,320✔
536
  while (work) {
6,834✔
537
    Reg r = rset_pickbot(work);
4,514✔
538
    IRRef ref = regcost_ref(as->cost[r]);
4,514✔
539
    if (emit_canremat(ref) && irref_isk(ref)) {
4,514✔
540
      ra_rematk(as, ref);
1,723✔
541
      checkmclim(as);
1,723✔
542
    }
543
    rset_clear(work, r);
4,514✔
544
  }
545
}
2,320✔
546

547
#ifdef RID_NUM_KREF
548
/* Allocate a register for a constant. */
549
static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
550
{
551
  /* First try to find a register which already holds the same constant. */
552
  RegSet pick, work = ~as->freeset & RSET_GPR;
553
  Reg r;
554
  while (work) {
555
    IRRef ref;
556
    r = rset_pickbot(work);
557
    ref = regcost_ref(as->cost[r]);
558
#if LJ_64
559
    if (ref < ASMREF_L) {
560
      if (ra_iskref(ref)) {
561
        if (k == ra_krefk(as, ref))
562
          return r;
563
      } else {
564
        IRIns *ir = IR(ref);
565
        if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
566
#if LJ_GC64
567
            (ir->o == IR_KINT && k == ir->i) ||
568
            (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
569
            ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
570
             k == (intptr_t)ir_kptr(ir))
571
#else
572
            (ir->o != IR_KINT64 && k == ir->i)
573
#endif
574
           )
575
          return r;
576
      }
577
    }
578
#else
579
    if (ref < ASMREF_L &&
580
        k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
581
      return r;
582
#endif
583
    rset_clear(work, r);
584
  }
585
  pick = as->freeset & allow;
586
  if (pick) {
587
    /* Constants should preferably get unmodified registers. */
588
    if ((pick & ~as->modset))
589
      pick &= ~as->modset;
590
    r = rset_pickbot(pick);  /* Reduce conflicts with inverse allocation. */
591
  } else {
592
    r = ra_evict(as, allow);
593
  }
594
  RA_DBGX((as, "allock    $x $r", k, r));
595
  ra_setkref(as, r, k);
596
  rset_clear(as->freeset, r);
597
  ra_noweak(as, r);
598
  return r;
599
}
600

601
/* Allocate a specific register for a constant. */
602
static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
603
{
604
  Reg kr = ra_allock(as, k, RID2RSET(r));
605
  if (kr != r) {
606
    IRIns irdummy;
607
    irdummy.t.irt = IRT_INT;
608
    ra_scratch(as, RID2RSET(r));
609
    emit_movrr(as, &irdummy, r, kr);
610
  }
611
}
612
#else
613
#define ra_allockreg(as, k, r)                emit_loadi(as, (r), (k))
614
#endif
615

616
/* Allocate a register for ref from the allowed set of registers.
617
** Note: this function assumes the ref does NOT have a register yet!
618
** Picks an optimal register, sets the cost and marks the register as non-free.
619
*/
620
static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
52,194✔
621
{
622
  IRIns *ir = IR(ref);
52,194✔
623
  RegSet pick = as->freeset & allow;
52,194✔
624
  Reg r;
52,194✔
625
  lj_assertA(ra_noreg(ir->r),
52,194✔
626
             "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
627
  if (pick) {
52,194✔
628
    /* First check register hint from propagation or PHI. */
629
    if (ra_hashint(ir->r)) {
52,082✔
630
      r = ra_gethint(ir->r);
20,960✔
631
      if (rset_test(pick, r))  /* Use hint register if possible. */
20,960✔
632
        goto found;
16,449✔
633
      /* Rematerialization is cheaper than missing a hint. */
634
      if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) {
4,511✔
635
        ra_rematk(as, regcost_ref(as->cost[r]));
206✔
636
        goto found;
206✔
637
      }
638
      RA_DBGX((as, "hintmiss  $f $r", ref, r));
35,427✔
639
    }
640
    /* Invariants should preferably get unmodified registers. */
641
    if (ref < as->loopref && !irt_isphi(ir->t)) {
35,427✔
642
      if ((pick & ~as->modset))
15,212✔
643
        pick &= ~as->modset;
8,278✔
644
      r = rset_pickbot(pick);  /* Reduce conflicts with inverse allocation. */
15,212✔
645
    } else {
646
      /* We've got plenty of regs, so get callee-save regs if possible. */
647
      if (RID_NUM_GPR > 8 && (pick & ~RSET_SCRATCH))
20,215✔
648
        pick &= ~RSET_SCRATCH;
10,764✔
649
      r = rset_picktop(pick);
20,215✔
650
    }
651
  } else {
652
    r = ra_evict(as, allow);
112✔
653
  }
654
found:
52,194✔
655
  RA_DBGX((as, "alloc     $f $r", ref, r));
52,194✔
656
  ir->r = (uint8_t)r;
52,194✔
657
  rset_clear(as->freeset, r);
52,194✔
658
  ra_noweak(as, r);
52,194✔
659
  as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t));
52,194✔
660
  return r;
52,194✔
661
}
662

663
/* Allocate a register on-demand. */
664
static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
47,461✔
665
{
666
  Reg r = IR(ref)->r;
47,461✔
667
  /* Note: allow is ignored if the register is already allocated. */
668
  if (ra_noreg(r)) r = ra_allocref(as, ref, allow);
47,461✔
669
  ra_noweak(as, r);
47,461✔
670
  return r;
47,461✔
671
}
672

673
/* Add a register rename to the IR. */
674
static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
675
{
676
  IRRef ren;
677
  lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
678
  ren = tref_ref(lj_ir_emit(as->J));
679
  as->J->cur.ir[ren].r = (uint8_t)down;
680
  as->J->cur.ir[ren].s = SPS_NONE;
681
}
682

683
/* Rename register allocation and emit move. */
684
static void ra_rename(ASMState *as, Reg down, Reg up)
707✔
685
{
686
  IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
707✔
687
  IRIns *ir = IR(ref);
707✔
688
  ir->r = (uint8_t)up;
707✔
689
  as->cost[down] = 0;
707✔
690
  lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
707✔
691
             "rename between GPR/FPR %d and %d", down, up);
692
  lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down);
707✔
693
  lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
707✔
694
  ra_free(as, down);  /* 'down' is free ... */
707✔
695
  ra_modified(as, down);
707✔
696
  rset_clear(as->freeset, up);  /* ... and 'up' is now allocated. */
707✔
697
  ra_noweak(as, up);
707✔
698
  RA_DBGX((as, "rename    $f $r $r", regcost_ref(as->cost[up]), down, up));
707✔
699
  emit_movrr(as, ir, down, up);  /* Backwards codegen needs inverse move. */
707✔
700
  if (!ra_hasspill(IR(ref)->s)) {  /* Add the rename to the IR. */
707✔
701
    /*
702
    ** The rename is effective at the subsequent (already emitted) exit
703
    ** branch. This is for the current snapshot (as->snapno). Except if we
704
    ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
705
    ** then it belongs to the next snapshot.
706
    ** See also the discussion at asm_snap_checkrename().
707
    */
708
    ra_addrename(as, down, ref, as->snapno + as->snapalloc);
682✔
709
  }
710
}
707✔
711

712
/* Pick a destination register (marked as free).
713
** Caveat: allow is ignored if there's already a destination register.
714
** Use ra_destreg() to get a specific register.
715
*/
716
static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow)
35,159✔
717
{
718
  Reg dest = ir->r;
35,159✔
719
  if (ra_hasreg(dest)) {
35,159✔
720
    ra_free(as, dest);
33,457✔
721
    ra_modified(as, dest);
33,457✔
722
  } else {
723
    if (ra_hashint(dest) && rset_test((as->freeset&allow), ra_gethint(dest))) {
1,702✔
724
      dest = ra_gethint(dest);
555✔
725
      ra_modified(as, dest);
555✔
726
      RA_DBGX((as, "dest           $r", dest));
555✔
727
    } else {
728
      dest = ra_scratch(as, allow);
1,147✔
729
    }
730
    ir->r = dest;
1,702✔
731
  }
732
  if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest);
35,159✔
733
  return dest;
35,159✔
734
}
735

736
/* Force a specific destination register (marked as free). */
737
static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
3,626✔
738
{
739
  Reg dest = ra_dest(as, ir, RID2RSET(r));
3,626✔
740
  if (dest != r) {
3,626✔
741
    lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
2,110✔
742
    ra_modified(as, r);
2,110✔
743
    emit_movrr(as, ir, dest, r);
2,110✔
744
  }
745
}
3,626✔
746

747
#if LJ_TARGET_X86ORX64
748
/* Propagate dest register to left reference. Emit moves as needed.
749
** This is a required fixup step for all 2-operand machine instructions.
750
*/
751
static void ra_left(ASMState *as, Reg dest, IRRef lref)
11,917✔
752
{
753
  IRIns *ir = IR(lref);
11,917✔
754
  Reg left = ir->r;
11,917✔
755
  if (ra_noreg(left)) {
11,917✔
756
    if (irref_isk(lref)) {
10,881✔
757
      if (ir->o == IR_KNUM) {
873✔
758
        /* FP remat needs a load except for +0. Still better than eviction. */
759
        if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
45✔
760
          emit_loadk64(as, dest, ir);
37✔
761
          return;
37✔
762
        }
763
#if LJ_64
764
      } else if (ir->o == IR_KINT64) {
828✔
765
        emit_loadk64(as, dest, ir);
5✔
766
        return;
5✔
767
#if LJ_GC64
768
      } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
823✔
769
        emit_loadk64(as, dest, ir);
748✔
770
        return;
748✔
771
#endif
772
#endif
773
      } else if (ir->o != IR_KPRI) {
75✔
774
        lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
74✔
775
                   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
776
                   "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
777
        emit_loadi(as, dest, ir->i);
74✔
778
        return;
74✔
779
      }
780
    }
781
    if (!ra_hashint(left) && !iscrossref(as, lref))
10,017✔
782
      ra_sethint(ir->r, dest);  /* Propagate register hint. */
6,841✔
783
    left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR);
15,441✔
784
  }
785
  ra_noweak(as, left);
11,053✔
786
  /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */
787
  if (dest != left) {
11,053✔
788
    /* Use register renaming if dest is the PHI reg. */
789
    if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
1,057✔
790
      ra_modified(as, left);
125✔
791
      ra_rename(as, left, dest);
125✔
792
    } else {
793
      emit_movrr(as, ir, dest, left);
932✔
794
    }
795
  }
796
}
797
#else
798
/* Similar to ra_left, except we override any hints. */
799
static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
800
{
801
  IRIns *ir = IR(lref);
802
  Reg left = ir->r;
803
  if (ra_noreg(left)) {
804
    ra_sethint(ir->r, dest);  /* Propagate register hint. */
805
    left = ra_allocref(as, lref,
806
                       (LJ_SOFTFP || dest < RID_MAX_GPR) ? RSET_GPR : RSET_FPR);
807
  }
808
  ra_noweak(as, left);
809
  if (dest != left) {
810
    /* Use register renaming if dest is the PHI reg. */
811
    if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
812
      ra_modified(as, left);
813
      ra_rename(as, left, dest);
814
    } else {
815
      emit_movrr(as, ir, dest, left);
816
    }
817
  }
818
}
819
#endif
820

821
#if !LJ_64
822
/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
823
static void ra_destpair(ASMState *as, IRIns *ir)
824
{
825
  Reg destlo = ir->r, desthi = (ir+1)->r;
826
  /* First spill unrelated refs blocking the destination registers. */
827
  if (!rset_test(as->freeset, RID_RETLO) &&
828
      destlo != RID_RETLO && desthi != RID_RETLO)
829
    ra_restore(as, regcost_ref(as->cost[RID_RETLO]));
830
  if (!rset_test(as->freeset, RID_RETHI) &&
831
      destlo != RID_RETHI && desthi != RID_RETHI)
832
    ra_restore(as, regcost_ref(as->cost[RID_RETHI]));
833
  /* Next free the destination registers (if any). */
834
  if (ra_hasreg(destlo)) {
835
    ra_free(as, destlo);
836
    ra_modified(as, destlo);
837
  } else {
838
    destlo = RID_RETLO;
839
  }
840
  if (ra_hasreg(desthi)) {
841
    ra_free(as, desthi);
842
    ra_modified(as, desthi);
843
  } else {
844
    desthi = RID_RETHI;
845
  }
846
  /* Check for conflicts and shuffle the registers as needed. */
847
  if (destlo == RID_RETHI) {
848
    if (desthi == RID_RETLO) {
849
#if LJ_TARGET_X86
850
      *--as->mcp = XI_XCHGa + RID_RETHI;
851
#else
852
      emit_movrr(as, ir, RID_RETHI, RID_TMP);
853
      emit_movrr(as, ir, RID_RETLO, RID_RETHI);
854
      emit_movrr(as, ir, RID_TMP, RID_RETLO);
855
#endif
856
    } else {
857
      emit_movrr(as, ir, RID_RETHI, RID_RETLO);
858
      if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
859
    }
860
  } else if (desthi == RID_RETLO) {
861
    emit_movrr(as, ir, RID_RETLO, RID_RETHI);
862
    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
863
  } else {
864
    if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
865
    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
866
  }
867
  /* Restore spill slots (if any). */
868
  if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
869
  if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
870
}
871
#endif
872

873
/* -- Snapshot handling --------- ----------------------------------------- */
874

875
/* Can we rematerialize a KNUM instead of forcing a spill? */
876
static int asm_snap_canremat(ASMState *as)
877
{
878
  Reg r;
879
  for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++)
×
880
    if (irref_isk(regcost_ref(as->cost[r])))
×
881
      return 1;
882
  return 0;
883
}
884

885
/* Check whether a sunk store corresponds to an allocation. */
886
static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
887
{
888
  if (irs->s == 255) {
889
    if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
890
        irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
891
      IRIns *irk = IR(irs->op1);
892
      if (irk->o == IR_AREF || irk->o == IR_HREFK)
893
        irk = IR(irk->op1);
894
      return (IR(irk->op1) == ira);
895
    }
896
    return 0;
897
  } else {
898
    return (ira + irs->s == irs);  /* Quick check. */
899
  }
900
}
901

902
/* Allocate register or spill slot for a ref that escapes to a snapshot. */
903
static void asm_snap_alloc1(ASMState *as, IRRef ref)
21,544✔
904
{
905
  IRIns *ir = IR(ref);
21,757✔
906
  if (!irref_isk(ref) && ir->r != RID_SUNK) {
21,757✔
907
    bloomset(as->snapfilt1, ref);
21,510✔
908
    bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
21,510✔
909
    if (ra_used(ir)) return;
21,510✔
910
    if (ir->r == RID_SINK) {
4,405✔
911
      ir->r = RID_SUNK;
296✔
912
#if LJ_HASFFI
913
      if (ir->o == IR_CNEWI) {  /* Allocate CNEWI value. */
296✔
914
        asm_snap_alloc1(as, ir->op2);
177✔
915
        if (LJ_32 && (ir+1)->o == IR_HIOP)
916
          asm_snap_alloc1(as, (ir+1)->op2);
917
      } else
918
#endif
919
      {  /* Allocate stored values for TNEW, TDUP and CNEW. */
920
        IRIns *irs;
119✔
921
        lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
119✔
922
                   "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
923
        for (irs = IR(as->snapref-1); irs > ir; irs--)
1,398✔
924
          if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
1,279✔
925
            lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
240✔
926
                       irs->o == IR_FSTORE || irs->o == IR_XSTORE,
927
                       "sunk store IR %04d has bad op %d",
928
                       (int)(irs - as->ir) - REF_BIAS, irs->o);
929
            asm_snap_alloc1(as, irs->op2);
240✔
930
            if (LJ_32 && (irs+1)->o == IR_HIOP)
240✔
931
              asm_snap_alloc1(as, (irs+1)->op2);
932
          }
933
      }
934
    } else {
935
      RegSet allow;
4,109✔
936
      if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) {
4,109✔
937
        IRIns *irc;
54✔
938
        for (irc = IR(as->curins); irc > ir; irc--)
468✔
939
          if ((irc->op1 == ref || irc->op2 == ref) &&
432✔
940
              !(irc->r == RID_SINK || irc->r == RID_SUNK))
54✔
941
            goto nosink;  /* Don't sink conversion if result is used. */
18✔
942
        asm_snap_alloc1(as, ir->op1);
36✔
943
        return;
36✔
944
      }
945
    nosink:
4,055✔
946
      allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR;
4,073✔
947
      if ((as->freeset & allow) ||
4,073✔
948
               (allow == RSET_FPR && asm_snap_canremat(as))) {
×
949
        /* Get a weak register if we have a free one or can rematerialize. */
950
        Reg r = ra_allocref(as, ref, allow);  /* Allocate a register. */
3,969✔
951
        if (!irt_isphi(ir->t))
3,969✔
952
          ra_weak(as, r);  /* But mark it as weakly referenced. */
3,351✔
953
        checkmclim(as);
3,969✔
954
        RA_DBGX((as, "snapreg   $f $r", ref, ir->r));
955
      } else {
956
        ra_spill(as, ir);  /* Otherwise force a spill slot. */
104✔
957
        RA_DBGX((as, "snapspill $f $s", ref, ir->s));
21,544✔
958
      }
959
    }
960
  }
961
}
962

963
/* Allocate refs escaping to a snapshot. */
964
static void asm_snap_alloc(ASMState *as, int snapno)
13,418✔
965
{
966
  SnapShot *snap = &as->T->snap[snapno];
13,418✔
967
  SnapEntry *map = &as->T->snapmap[snap->mapofs];
13,418✔
968
  MSize n, nent = snap->nent;
13,418✔
969
  as->snapfilt1 = as->snapfilt2 = 0;
13,418✔
970
  for (n = 0; n < nent; n++) {
51,237✔
971
    SnapEntry sn = map[n];
37,819✔
972
    IRRef ref = snap_ref(sn);
37,819✔
973
    if (!irref_isk(ref)) {
37,819✔
974
      asm_snap_alloc1(as, ref);
21,304✔
975
      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
21,304✔
976
        lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
977
                   "snap %d[%d] points to bad SOFTFP IR %04d",
978
                   snapno, n, ref - REF_BIAS);
979
        asm_snap_alloc1(as, ref+1);
980
      }
981
    }
982
  }
983
}
13,418✔
984

985
/* All guards for a snapshot use the same exitno. This is currently the
986
** same as the snapshot number. Since the exact origin of the exit cannot
987
** be determined, all guards for the same snapshot must exit with the same
988
** RegSP mapping.
989
** A renamed ref which has been used in a prior guard for the same snapshot
990
** would cause an inconsistency. The easy way out is to force a spill slot.
991
*/
992
static int asm_snap_checkrename(ASMState *as, IRRef ren)
36✔
993
{
994
  if (bloomtest(as->snapfilt1, ren) &&
36✔
995
      bloomtest(as->snapfilt2, hashrot(ren, ren + HASH_BIAS))) {
36✔
996
    IRIns *ir = IR(ren);
36✔
997
    ra_spill(as, ir);  /* Register renamed, so force a spill slot. */
36✔
998
    RA_DBGX((as, "snaprensp $f $s", ren, ir->s));
36✔
999
    return 1;  /* Found. */
36✔
1000
  }
1001
  return 0;  /* Not found. */
1002
}
1003

1004
/* Prepare snapshot for next guard or throwing instruction. */
1005
static void asm_snap_prep(ASMState *as)
33,569✔
1006
{
1007
  if (as->snapalloc) {
33,569✔
1008
    /* Alloc on first invocation for each snapshot. */
1009
    as->snapalloc = 0;
13,379✔
1010
    asm_snap_alloc(as, as->snapno);
13,379✔
1011
    as->snaprename = as->T->nins;
13,379✔
1012
  } else {
1013
    /* Check any renames above the highwater mark. */
1014
    for (; as->snaprename < as->T->nins; as->snaprename++) {
20,226✔
1015
      IRIns *ir = &as->T->ir[as->snaprename];
36✔
1016
      if (asm_snap_checkrename(as, ir->op1))
36✔
1017
        ir->op2 = REF_BIAS-1;  /* Kill rename. */
36✔
1018
    }
1019
  }
1020
}
33,569✔
1021

1022
/* Move to previous snapshot when we cross the current snapshot ref. */
1023
static void asm_snap_prev(ASMState *as)
82,398✔
1024
{
1025
  if (as->curins < as->snapref) {
82,398✔
1026
    ptrdiff_t ofs = as->mctoporig - as->mcp;
14,221✔
1027
    if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
14,221✔
1028
    do {
15,244✔
1029
      if (as->snapno == 0) return;
15,244✔
1030
      as->snapno--;
14,635✔
1031
      as->snapref = as->T->snap[as->snapno].ref;
14,635✔
1032
      as->T->snap[as->snapno].mcofs = ofs;  /* Remember mcode offset. */
14,635✔
1033
    } while (as->curins < as->snapref);  /* May have no ins inbetween. */
14,635✔
1034
    as->snapalloc = 1;
13,612✔
1035
  }
1036
}
1037

1038
/* Fixup snapshot mcode offsetst. */
1039
static void asm_snap_fixup_mcofs(ASMState *as)
2,265✔
1040
{
1041
  uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
2,265✔
1042
  SnapShot *snap = as->T->snap;
2,265✔
1043
  SnapNo i;
2,265✔
1044
  for (i = as->T->nsnap-1; i > 0; i--) {
12,012✔
1045
    /* Compute offset from mcode start and store in correct snapshot. */
1046
    snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
9,747✔
1047
  }
1048
  snap[0].mcofs = 0;
2,265✔
1049
}
2,265✔
1050

1051
/* -- Miscellaneous helpers ----------------------------------------------- */
1052

1053
/* Calculate stack adjustment. */
1054
static int32_t asm_stack_adjust(ASMState *as)
2,320✔
1055
{
1056
  if (as->evenspill <= SPS_FIXED)
2,320✔
1057
    return 0;
1058
  return sps_scale(sps_align(as->evenspill));
565✔
1059
}
1060

1061
/* Must match with hash*() in lj_tab.c. */
1062
static uint32_t ir_khash(ASMState *as, IRIns *ir)
1063
{
1064
  uint32_t lo, hi;
1065
  UNUSED(as);
1066
  if (irt_isstr(ir->t)) {
1067
    return ir_kstr(ir)->hash;
1068
  } else if (irt_isnum(ir->t)) {
1069
    lo = ir_knum(ir)->u32.lo;
1070
    hi = ir_knum(ir)->u32.hi << 1;
1071
  } else if (irt_ispri(ir->t)) {
1072
    lj_assertA(!irt_isnil(ir->t), "hash of nil key");
1073
    return irt_type(ir->t)-IRT_FALSE;
1074
  } else {
1075
    lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t));
1076
    lo = u32ptr(ir_kgc(ir));
1077
#if LJ_GC64
1078
    hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1079
#else
1080
    hi = lo + HASH_BIAS;
1081
#endif
1082
  }
1083
  return hashrot(lo, hi);
1084
}
1085

1086
/* -- Allocations --------------------------------------------------------- */
1087

1088
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args);
1089
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci);
1090

1091
static void asm_snew(ASMState *as, IRIns *ir)
105✔
1092
{
1093
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
105✔
1094
  IRRef args[3];
105✔
1095
  asm_snap_prep(as);
105✔
1096
  args[0] = ASMREF_L;  /* lua_State *L    */
105✔
1097
  args[1] = ir->op1;   /* const char *str */
105✔
1098
  args[2] = ir->op2;   /* size_t len      */
105✔
1099
  as->gcsteps++;
105✔
1100
  asm_setupresult(as, ir, ci);  /* GCstr * */
105✔
1101
  asm_gencall(as, ci, args);
105✔
1102
}
105✔
1103

1104
static void asm_tnew(ASMState *as, IRIns *ir)
179✔
1105
{
1106
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
179✔
1107
  IRRef args[2];
179✔
1108
  asm_snap_prep(as);
179✔
1109
  args[0] = ASMREF_L;     /* lua_State *L    */
179✔
1110
  args[1] = ASMREF_TMP1;  /* uint32_t ahsize */
179✔
1111
  as->gcsteps++;
179✔
1112
  asm_setupresult(as, ir, ci);  /* GCtab * */
179✔
1113
  asm_gencall(as, ci, args);
179✔
1114
  ra_allockreg(as, ir->op1 | (ir->op2 << 24), ra_releasetmp(as, ASMREF_TMP1));
179✔
1115
}
179✔
1116

1117
static void asm_tdup(ASMState *as, IRIns *ir)
47✔
1118
{
1119
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
47✔
1120
  IRRef args[2];
47✔
1121
  asm_snap_prep(as);
47✔
1122
  args[0] = ASMREF_L;  /* lua_State *L    */
47✔
1123
  args[1] = ir->op1;   /* const GCtab *kt */
47✔
1124
  as->gcsteps++;
47✔
1125
  asm_setupresult(as, ir, ci);  /* GCtab * */
47✔
1126
  asm_gencall(as, ci, args);
47✔
1127
}
47✔
1128

1129
static void asm_gc_check(ASMState *as);
1130

1131
/* Explicit GC step. */
1132
static void asm_gcstep(ASMState *as, IRIns *ir)
25✔
1133
{
1134
  IRIns *ira;
25✔
1135
  for (ira = IR(as->stopins+1); ira < ir; ira++)
105✔
1136
    if ((ira->o == IR_TNEW || ira->o == IR_TDUP ||
80✔
1137
         (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI))) &&
29✔
1138
        ra_used(ira))
29✔
1139
      as->gcsteps++;
4✔
1140
  if (as->gcsteps)
25✔
1141
    asm_gc_check(as);
19✔
1142
  as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
25✔
1143
}
25✔
1144

1145
/* -- Buffer operations --------------------------------------------------- */
1146

1147
static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1148

1149
static void asm_bufhdr(ASMState *as, IRIns *ir)
751✔
1150
{
1151
  Reg sb = ra_dest(as, ir, RSET_GPR);
751✔
1152
  if ((ir->op2 & IRBUFHDR_APPEND)) {
751✔
1153
    /* Rematerialize const buffer pointer instead of likely spill. */
1154
    IRIns *irp = IR(ir->op1);
47✔
1155
    if (!(ra_hasreg(irp->r) || irp == ir-1 ||
47✔
1156
          (irp == ir-2 && !ra_used(ir-1)))) {
47✔
1157
      while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
150✔
1158
        irp = IR(irp->op1);
106✔
1159
      if (irref_isk(irp->op1)) {
44✔
1160
        ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
44✔
1161
        ir = irp;
44✔
1162
      }
1163
    }
1164
  } else {
1165
    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
704✔
1166
    /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
1167
    emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
704✔
1168
    emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
704✔
1169
  }
1170
#if LJ_TARGET_X86ORX64
1171
  ra_left(as, sb, ir->op1);
751✔
1172
#else
1173
  ra_leftov(as, sb, ir->op1);
1174
#endif
1175
}
751✔
1176

1177
static void asm_bufput(ASMState *as, IRIns *ir)
1,330✔
1178
{
1179
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1,330✔
1180
  IRRef args[3];
1,330✔
1181
  IRIns *irs;
1,330✔
1182
  int kchar = -129;
1,330✔
1183
  args[0] = ir->op1;  /* SBuf * */
1,330✔
1184
  args[1] = ir->op2;  /* GCstr * */
1,330✔
1185
  irs = IR(ir->op2);
1,330✔
1186
  lj_assertA(irt_isstr(irs->t),
1,330✔
1187
             "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
1188
  if (irs->o == IR_KGC) {
1,330✔
1189
    GCstr *s = ir_kstr(irs);
502✔
1190
    if (s->len == 1) {  /* Optimize put of single-char string constant. */
502✔
1191
      kchar = (int8_t)strdata(s)[0];  /* Signed! */
152✔
1192
      args[1] = ASMREF_TMP1;  /* int, truncated to char */
152✔
1193
      ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
152✔
1194
    }
1195
  } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
828✔
1196
    if (irs->o == IR_TOSTR) {  /* Fuse number to string conversions. */
696✔
1197
      if (irs->op2 == IRTOSTR_NUM) {
240✔
1198
        args[1] = ASMREF_TMP1;  /* TValue * */
12✔
1199
        ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
12✔
1200
      } else {
1201
        lj_assertA(irt_isinteger(IR(irs->op1)->t),
228✔
1202
                   "TOSTR of non-numeric IR %04d", irs->op1);
1203
        args[1] = irs->op1;  /* int */
228✔
1204
        if (irs->op2 == IRTOSTR_INT)
228✔
1205
          ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1206
        else
1207
          ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
164✔
1208
      }
1209
    } else if (irs->o == IR_SNEW) {  /* Fuse string allocation. */
456✔
1210
      args[1] = irs->op1;  /* const void * */
4✔
1211
      args[2] = irs->op2;  /* MSize */
4✔
1212
      ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
4✔
1213
    }
1214
  }
1215
  asm_setupresult(as, ir, ci);  /* SBuf * */
1,330✔
1216
  asm_gencall(as, ci, args);
1,330✔
1217
  if (args[1] == ASMREF_TMP1) {
1,329✔
1218
    Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
164✔
1219
    if (kchar == -129)
164✔
1220
      asm_tvptr(as, tmp, irs->op1);
12✔
1221
    else
1222
      ra_allockreg(as, kchar, tmp);
152✔
1223
  }
1224
}
1,329✔
1225

1226
static void asm_bufstr(ASMState *as, IRIns *ir)
738✔
1227
{
1228
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
738✔
1229
  IRRef args[1];
738✔
1230
  args[0] = ir->op1;  /* SBuf *sb */
738✔
1231
  as->gcsteps++;
738✔
1232
  asm_setupresult(as, ir, ci);  /* GCstr * */
738✔
1233
  asm_gencall(as, ci, args);
738✔
1234
}
738✔
1235

1236
/* -- Type conversions ---------------------------------------------------- */
1237

1238
static void asm_tostr(ASMState *as, IRIns *ir)
42✔
1239
{
1240
  const CCallInfo *ci;
42✔
1241
  IRRef args[2];
42✔
1242
  asm_snap_prep(as);
42✔
1243
  args[0] = ASMREF_L;
42✔
1244
  as->gcsteps++;
42✔
1245
  if (ir->op2 == IRTOSTR_NUM) {
42✔
1246
    args[1] = ASMREF_TMP1;  /* cTValue * */
23✔
1247
    ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
23✔
1248
  } else {
1249
    args[1] = ir->op1;  /* int32_t k */
19✔
1250
    if (ir->op2 == IRTOSTR_INT)
19✔
1251
      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1252
    else
1253
      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
8✔
1254
  }
1255
  asm_setupresult(as, ir, ci);  /* GCstr * */
42✔
1256
  asm_gencall(as, ci, args);
42✔
1257
  if (ir->op2 == IRTOSTR_NUM)
42✔
1258
    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
23✔
1259
}
42✔
1260

1261
#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1262
static void asm_conv64(ASMState *as, IRIns *ir)
1263
{
1264
  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1265
  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1266
  IRCallID id;
1267
  IRRef args[2];
1268
  lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1269
             "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1270
  args[LJ_BE] = (ir-1)->op1;
1271
  args[LJ_LE] = ir->op1;
1272
  if (st == IRT_NUM || st == IRT_FLOAT) {
1273
    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1274
    ir--;
1275
  } else {
1276
    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1277
  }
1278
  {
1279
#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1280
    CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1281
    cim.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
1282
#else
1283
    const CCallInfo *ci = &lj_ir_callinfo[id];
1284
#endif
1285
    asm_setupresult(as, ir, ci);
1286
    asm_gencall(as, ci, args);
1287
  }
1288
}
1289
#endif
1290

1291
/* -- Memory references --------------------------------------------------- */
1292

1293
static void asm_newref(ASMState *as, IRIns *ir)
354✔
1294
{
1295
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
354✔
1296
  IRRef args[3];
354✔
1297
  if (ir->r == RID_SINK)
354✔
1298
    return;
29✔
1299
  asm_snap_prep(as);
325✔
1300
  args[0] = ASMREF_L;     /* lua_State *L */
325✔
1301
  args[1] = ir->op1;      /* GCtab *t     */
325✔
1302
  args[2] = ASMREF_TMP1;  /* cTValue *key */
325✔
1303
  asm_setupresult(as, ir, ci);  /* TValue * */
325✔
1304
  asm_gencall(as, ci, args);
325✔
1305
  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
325✔
1306
}
1307

1308
static void asm_lref(ASMState *as, IRIns *ir)
2✔
1309
{
1310
  Reg r = ra_dest(as, ir, RSET_GPR);
2✔
1311
#if LJ_TARGET_X86ORX64
1312
  ra_left(as, r, ASMREF_L);
2✔
1313
#else
1314
  ra_leftov(as, r, ASMREF_L);
1315
#endif
1316
}
2✔
1317

1318
/* -- Calls --------------------------------------------------------------- */
1319

1320
/* Collect arguments from CALL* and CARG instructions. */
1321
static void asm_collectargs(ASMState *as, IRIns *ir,
1322
                            const CCallInfo *ci, IRRef *args)
1323
{
1324
  uint32_t n = CCI_XNARGS(ci);
1325
  /* Account for split args. */
1326
  lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
1327
  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1328
  while (n-- > 1) {
1329
    ir = IR(ir->op1);
1330
    lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
1331
    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1332
  }
1333
  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1334
  lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
1335
}
1336

1337
/* Reconstruct CCallInfo flags for CALLX*. */
1338
static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1339
{
1340
  uint32_t nargs = 0;
1341
  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
1342
    IRIns *ira = IR(ir->op1);
1343
    nargs++;
1344
    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1345
  }
1346
#if LJ_HASFFI
1347
  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
1348
    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1349
    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1350
    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1351
#if LJ_TARGET_X86
1352
    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1353
#endif
1354
  }
1355
#endif
1356
  return (nargs | (ir->t.irt << CCI_OTSHIFT));
1357
}
1358

1359
static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
300✔
1360
{
1361
  const CCallInfo *ci = &lj_ir_callinfo[id];
300✔
1362
  IRRef args[2];
300✔
1363
  args[0] = ir->op1;
300✔
1364
  args[1] = ir->op2;
300✔
1365
  asm_setupresult(as, ir, ci);
300✔
1366
  asm_gencall(as, ci, args);
300✔
1367
}
300✔
1368

1369
static void asm_call(ASMState *as, IRIns *ir)
463✔
1370
{
1371
  IRRef args[CCI_NARGS_MAX];
463✔
1372
  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
463✔
1373
  asm_collectargs(as, ir, ci, args);
463✔
1374
  asm_setupresult(as, ir, ci);
463✔
1375
  asm_gencall(as, ci, args);
463✔
1376
}
463✔
1377

1378
/* -- PHI and loop handling ----------------------------------------------- */
1379

1380
/* Break a PHI cycle by renaming to a free register (evict if needed). */
1381
static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby,
92✔
1382
                          RegSet allow)
1383
{
1384
  RegSet candidates = blocked & allow;
92✔
1385
  if (candidates) {  /* If this register file has candidates. */
92✔
1386
    /* Note: the set for ra_pick cannot be empty, since each register file
1387
    ** has some registers never allocated to PHIs.
1388
    */
1389
    Reg down, up = ra_pick(as, ~blocked & allow);  /* Get a free register. */
50✔
1390
    if (candidates & ~blockedby)  /* Optimize shifts, else it's a cycle. */
50✔
1391
      candidates = candidates & ~blockedby;
×
1392
    down = rset_picktop(candidates);  /* Pick candidate PHI register. */
50✔
1393
    ra_rename(as, down, up);  /* And rename it to the free register. */
50✔
1394
  }
1395
}
92✔
1396

1397
/* PHI register shuffling.
1398
**
1399
** The allocator tries hard to preserve PHI register assignments across
1400
** the loop body. Most of the time this loop does nothing, since there
1401
** are no register mismatches.
1402
**
1403
** If a register mismatch is detected and ...
1404
** - the register is currently free: rename it.
1405
** - the register is blocked by an invariant: restore/remat and rename it.
1406
** - Otherwise the register is used by another PHI, so mark it as blocked.
1407
**
1408
** The renames are order-sensitive, so just retry the loop if a register
1409
** is marked as blocked, but has been freed in the meantime. A cycle is
1410
** detected if all of the blocked registers are allocated. To break the
1411
** cycle rename one of them to a free register and retry.
1412
**
1413
** Note that PHI spill slots are kept in sync and don't need to be shuffled.
1414
*/
1415
static void asm_phi_shuffle(ASMState *as)
2,411✔
1416
{
1417
  RegSet work;
2,560✔
1418

1419
  /* Find and resolve PHI register mismatches. */
1420
  for (;;) {
2,560✔
1421
    RegSet blocked = RSET_EMPTY;
2,560✔
1422
    RegSet blockedby = RSET_EMPTY;
2,560✔
1423
    RegSet phiset = as->phiset;
2,560✔
1424
    while (phiset) {  /* Check all left PHI operand registers. */
7,397✔
1425
      Reg r = rset_pickbot(phiset);
4,837✔
1426
      IRIns *irl = IR(as->phireg[r]);
4,837✔
1427
      Reg left = irl->r;
4,837✔
1428
      if (r != left) {  /* Mismatch? */
4,837✔
1429
        if (!rset_test(as->freeset, r)) {  /* PHI register blocked? */
1,282✔
1430
          IRRef ref = regcost_ref(as->cost[r]);
645✔
1431
          /* Blocked by other PHI (w/reg)? */
1432
          if (!ra_iskref(ref) && irt_ismarked(IR(ref)->t)) {
645✔
1433
            rset_set(blocked, r);
642✔
1434
            if (ra_hasreg(left))
642✔
1435
              rset_set(blockedby, left);
642✔
1436
            left = RID_NONE;
1437
          } else {  /* Otherwise grab register from invariant. */
1438
            ra_restore(as, ref);
3✔
1439
            checkmclim(as);
3✔
1440
          }
1441
        }
1442
        if (ra_hasreg(left)) {
1,282✔
1443
          ra_rename(as, left, r);
529✔
1444
          checkmclim(as);
529✔
1445
        }
1446
      }
1447
      rset_clear(phiset, r);
4,837✔
1448
    }
1449
    if (!blocked) break;  /* Finished. */
2,560✔
1450
    if (!(as->freeset & blocked)) {  /* Break cycles if none are free. */
149✔
1451
      asm_phi_break(as, blocked, blockedby, RSET_GPR);
46✔
1452
      if (!LJ_SOFTFP) asm_phi_break(as, blocked, blockedby, RSET_FPR);
46✔
1453
      checkmclim(as);
2,606✔
1454
    }  /* Else retry some more renames. */
1455
  }
1456

1457
  /* Restore/remat invariants whose registers are modified inside the loop. */
1458
#if !LJ_SOFTFP
1459
  work = as->modset & ~(as->freeset | as->phiset) & RSET_FPR;
2,411✔
1460
  while (work) {
2,411✔
1461
    Reg r = rset_pickbot(work);
5✔
1462
    ra_restore(as, regcost_ref(as->cost[r]));
5✔
1463
    rset_clear(work, r);
5✔
1464
    checkmclim(as);
2,416✔
1465
  }
1466
#endif
1467
  work = as->modset & ~(as->freeset | as->phiset);
2,411✔
1468
  while (work) {
2,411✔
1469
    Reg r = rset_pickbot(work);
233✔
1470
    ra_restore(as, regcost_ref(as->cost[r]));
233✔
1471
    rset_clear(work, r);
233✔
1472
    checkmclim(as);
2,644✔
1473
  }
1474

1475
  /* Allocate and save all unsaved PHI regs and clear marks. */
1476
  work = as->phiset;
2,411✔
1477
  while (work) {
6,104✔
1478
    Reg r = rset_picktop(work);
3,693✔
1479
    IRRef lref = as->phireg[r];
3,693✔
1480
    IRIns *ir = IR(lref);
3,693✔
1481
    if (ra_hasspill(ir->s)) {  /* Left PHI gained a spill slot? */
3,693✔
1482
      irt_clearmark(ir->t);  /* Handled here, so clear marker now. */
158✔
1483
      ra_alloc1(as, lref, RID2RSET(r));
158✔
1484
      ra_save(as, ir, r);  /* Save to spill slot inside the loop. */
158✔
1485
      checkmclim(as);
158✔
1486
    }
1487
    rset_clear(work, r);
3,693✔
1488
  }
1489
}
2,411✔
1490

1491
/* Copy unsynced left/right PHI spill slots. Rarely needed. */
1492
static void asm_phi_copyspill(ASMState *as)
2,411✔
1493
{
1494
  int need = 0;
2,411✔
1495
  IRIns *ir;
2,411✔
1496
  for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--)
6,169✔
1497
    if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s))
3,758✔
1498
      need |= irt_isfp(ir->t) ? 2 : 1;  /* Unsynced spill slot? */
1✔
1499
  if ((need & 1)) {  /* Copy integer spill slots. */
2,411✔
1500
#if !LJ_TARGET_X86ORX64
1501
    Reg r = RID_TMP;
1502
#else
1503
    Reg r = RID_RET;
×
1504
    if ((as->freeset & RSET_GPR))
×
1505
      r = rset_pickbot((as->freeset & RSET_GPR));
×
1506
    else
1507
      emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1508
#endif
1509
    for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
×
1510
      if (ra_hasspill(ir->s)) {
×
1511
        IRIns *irl = IR(ir->op1);
×
1512
        if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) {
×
1513
          emit_spstore(as, irl, r, sps_scale(irl->s));
×
1514
          emit_spload(as, ir, r, sps_scale(ir->s));
×
1515
          checkmclim(as);
×
1516
        }
1517
      }
1518
    }
1519
#if LJ_TARGET_X86ORX64
1520
    if (!rset_test(as->freeset, r))
×
1521
      emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1522
#endif
1523
  }
1524
#if !LJ_SOFTFP
1525
  if ((need & 2)) {  /* Copy FP spill slots. */
2,411✔
1526
#if LJ_TARGET_X86
1527
    Reg r = RID_XMM0;
1528
#else
1529
    Reg r = RID_FPRET;
1✔
1530
#endif
1531
    if ((as->freeset & RSET_FPR))
1✔
1532
      r = rset_pickbot((as->freeset & RSET_FPR));
1✔
1533
    if (!rset_test(as->freeset, r))
1✔
1534
      emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1535
    for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
18✔
1536
      if (ra_hasspill(ir->s)) {
17✔
1537
        IRIns *irl = IR(ir->op1);
1✔
1538
        if (ra_hasspill(irl->s) && irt_isfp(ir->t)) {
1✔
1539
          emit_spstore(as, irl, r, sps_scale(irl->s));
1✔
1540
          emit_spload(as, ir, r, sps_scale(ir->s));
1✔
1541
          checkmclim(as);
17✔
1542
        }
1543
      }
1544
    }
1545
    if (!rset_test(as->freeset, r))
1✔
1546
      emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1547
  }
1548
#endif
1549
}
2,411✔
1550

1551
/* Emit renames for left PHIs which are only spilled outside the loop. */
1552
static void asm_phi_fixup(ASMState *as)
2,320✔
1553
{
1554
  RegSet work = as->phiset;
2,320✔
1555
  while (work) {
4,366✔
1556
    Reg r = rset_picktop(work);
2,046✔
1557
    IRRef lref = as->phireg[r];
2,046✔
1558
    IRIns *ir = IR(lref);
2,046✔
1559
    if (irt_ismarked(ir->t)) {
2,046✔
1560
      irt_clearmark(ir->t);
1,934✔
1561
      /* Left PHI gained a spill slot before the loop? */
1562
      if (ra_hasspill(ir->s)) {
1,934✔
1563
        ra_addrename(as, r, lref, as->loopsnapno);
21✔
1564
      }
1565
    }
1566
    rset_clear(work, r);
2,046✔
1567
  }
1568
}
2,320✔
1569

1570
/* Setup right PHI reference. */
1571
static void asm_phi(ASMState *as, IRIns *ir)
3,878✔
1572
{
1573
  RegSet allow = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) &
3,878✔
1574
                 ~as->phiset;
3,878✔
1575
  RegSet afree = (as->freeset & allow);
3,878✔
1576
  IRIns *irl = IR(ir->op1);
3,878✔
1577
  IRIns *irr = IR(ir->op2);
3,878✔
1578
  if (ir->r == RID_SINK)  /* Sink PHI. */
3,878✔
1579
    return;
1580
  /* Spill slot shuffling is not implemented yet (but rarely needed). */
1581
  if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
3,814✔
1582
    lj_trace_err(as->J, LJ_TRERR_NYIPHI);
×
1583
  /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */
1584
  if ((afree & (afree-1))) {  /* Two or more free registers? */
3,814✔
1585
    Reg r;
3,800✔
1586
    if (ra_noreg(irr->r)) {  /* Get a register for the right PHI. */
3,800✔
1587
      r = ra_allocref(as, ir->op2, allow);
3,798✔
1588
    } else {  /* Duplicate right PHI, need a copy (rare). */
1589
      r = ra_scratch(as, allow);
2✔
1590
      emit_movrr(as, irr, r, irr->r);
2✔
1591
    }
1592
    ir->r = (uint8_t)r;
3,800✔
1593
    rset_set(as->phiset, r);
3,800✔
1594
    as->phireg[r] = (IRRef1)ir->op1;
3,800✔
1595
    irt_setmark(irl->t);  /* Marks left PHIs _with_ register. */
3,800✔
1596
    if (ra_noreg(irl->r))
3,800✔
1597
      ra_sethint(irl->r, r); /* Set register hint for left PHI. */
3,596✔
1598
  } else {  /* Otherwise allocate a spill slot. */
1599
    /* This is overly restrictive, but it triggers only on synthetic code. */
1600
    if (ra_hasreg(irl->r) || ra_hasreg(irr->r))
14✔
1601
      lj_trace_err(as->J, LJ_TRERR_NYIPHI);
6✔
1602
    ra_spill(as, ir);
8✔
1603
    irr->s = ir->s;  /* Set right PHI spill slot. Sync left slot later. */
8✔
1604
  }
1605
}
1606

1607
static void asm_loop_fixup(ASMState *as);
1608

1609
/* Middle part of a loop. */
1610
static void asm_loop(ASMState *as)
2,411✔
1611
{
1612
  MCode *mcspill;
2,411✔
1613
  /* LOOP is a guard, so the snapno is up to date. */
1614
  as->loopsnapno = as->snapno;
2,411✔
1615
  if (as->gcsteps)
2,411✔
1616
    asm_gc_check(as);
242✔
1617
  /* LOOP marks the transition from the variant to the invariant part. */
1618
  as->flagmcp = as->invmcp = NULL;
2,411✔
1619
  as->sectref = 0;
2,411✔
1620
  if (!neverfuse(as)) as->fuseref = 0;
2,411✔
1621
  asm_phi_shuffle(as);
2,411✔
1622
  mcspill = as->mcp;
2,411✔
1623
  asm_phi_copyspill(as);
2,411✔
1624
  asm_loop_fixup(as);
2,411✔
1625
  as->mcloop = as->mcp;
2,411✔
1626
  RA_DBGX((as, "===== LOOP ====="));
2,411✔
1627
  if (!as->realign) RA_DBG_FLUSH();
2,411✔
1628
  if (as->mcp != mcspill)
2,411✔
1629
    emit_jmp(as, mcspill);
1✔
1630
}
2,411✔
1631

1632
/* -- Target-specific assembler ------------------------------------------- */
1633

1634
#if LJ_TARGET_X86ORX64
1635
#include "lj_asm_x86.h"
1636
#elif LJ_TARGET_ARM
1637
#include "lj_asm_arm.h"
1638
#elif LJ_TARGET_ARM64
1639
#include "lj_asm_arm64.h"
1640
#elif LJ_TARGET_PPC
1641
#include "lj_asm_ppc.h"
1642
#elif LJ_TARGET_MIPS
1643
#include "lj_asm_mips.h"
1644
#else
1645
#error "Missing assembler for target CPU"
1646
#endif
1647

1648
/* -- Common instruction helpers ------------------------------------------ */
1649

1650
#if !LJ_SOFTFP32
1651
#if !LJ_TARGET_X86ORX64
1652
#define asm_ldexp(as, ir)        asm_callid(as, ir, IRCALL_ldexp)
1653
#endif
1654

1655
static void asm_pow(ASMState *as, IRIns *ir)
234✔
1656
{
1657
#if LJ_64 && LJ_HASFFI
1658
  if (!irt_isnum(ir->t))
234✔
1659
    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
×
1660
                                          IRCALL_lj_carith_powu64);
1661
  else
1662
#endif
1663
  asm_callid(as, ir, IRCALL_pow);
234✔
1664
}
234✔
1665

1666
static void asm_div(ASMState *as, IRIns *ir)
136✔
1667
{
1668
#if LJ_64 && LJ_HASFFI
1669
  if (!irt_isnum(ir->t))
136✔
1670
    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
×
1671
                                          IRCALL_lj_carith_divu64);
1672
  else
1673
#endif
1674
    asm_fpdiv(as, ir);
136✔
1675
}
136✔
1676
#endif
1677

1678
static void asm_mod(ASMState *as, IRIns *ir)
66✔
1679
{
1680
#if LJ_64 && LJ_HASFFI
1681
  if (!irt_isint(ir->t))
66✔
1682
    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
×
1683
                                          IRCALL_lj_carith_modu64);
1684
  else
1685
#endif
1686
    asm_callid(as, ir, IRCALL_lj_vm_modi);
66✔
1687
}
66✔
1688

1689
static void asm_fuseequal(ASMState *as, IRIns *ir)
8,152✔
1690
{
1691
  /* Fuse HREF + EQ/NE. */
1692
  if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
8,152✔
1693
    as->curins--;
332✔
1694
    asm_href(as, ir-1, (IROp)ir->o);
332✔
1695
  } else {
1696
    asm_equal(as, ir);
7,820✔
1697
  }
1698
}
8,152✔
1699

1700
/* -- Instruction dispatch ------------------------------------------------ */
1701

1702
/* Assemble a single instruction. */
1703
static void asm_ir(ASMState *as, IRIns *ir)
67,132✔
1704
{
1705
  switch ((IROp)ir->o) {
67,132✔
1706
  /* Miscellaneous ops. */
1707
  case IR_LOOP: asm_loop(as); break;
2,411✔
1708
  case IR_NOP: case IR_XBAR:
1709
    lj_assertA(!ra_used(ir),
1710
               "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
1711
    break;
1712
  case IR_USE:
5✔
1713
    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
10✔
1714
  case IR_PHI: asm_phi(as, ir); break;
3,878✔
1715
  case IR_HIOP: asm_hiop(as, ir); break;
1716
  case IR_GCSTEP: asm_gcstep(as, ir); break;
25✔
1717
  case IR_PROF: asm_prof(as, ir); break;
1✔
1718

1719
  /* Guarded assertions. */
1720
  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
7,717✔
1721
  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1722
  case IR_ABC:
1723
    asm_comp(as, ir);
7,717✔
1724
    break;
7,717✔
1725
  case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
8,152✔
1726

1727
  case IR_RETF: asm_retf(as, ir); break;
280✔
1728

1729
  /* Bit ops. */
1730
  case IR_BNOT: asm_bnot(as, ir); break;
3✔
1731
  case IR_BSWAP: asm_bswap(as, ir); break;
9✔
1732
  case IR_BAND: asm_band(as, ir); break;
249✔
1733
  case IR_BOR: asm_bor(as, ir); break;
87✔
1734
  case IR_BXOR: asm_bxor(as, ir); break;
35✔
1735
  case IR_BSHL: asm_bshl(as, ir); break;
84✔
1736
  case IR_BSHR: asm_bshr(as, ir); break;
31✔
1737
  case IR_BSAR: asm_bsar(as, ir); break;
12✔
1738
  case IR_BROL: asm_brol(as, ir); break;
23✔
1739
  case IR_BROR: asm_bror(as, ir); break;
3✔
1740

1741
  /* Arithmetic ops. */
1742
  case IR_ADD: asm_add(as, ir); break;
9,088✔
1743
  case IR_SUB: asm_sub(as, ir); break;
695✔
1744
  case IR_MUL: asm_mul(as, ir); break;
274✔
1745
  case IR_MOD: asm_mod(as, ir); break;
66✔
1746
  case IR_NEG: asm_neg(as, ir); break;
26✔
1747
#if LJ_SOFTFP32
1748
  case IR_DIV: case IR_POW: case IR_ABS:
1749
  case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1750
    /* Unused for LJ_SOFTFP32. */
1751
    lj_assertA(0, "IR %04d with unused op %d",
1752
                  (int)(ir - as->ir) - REF_BIAS, ir->o);
1753
    break;
1754
#else
1755
  case IR_DIV: asm_div(as, ir); break;
136✔
1756
  case IR_POW: asm_pow(as, ir); break;
234✔
1757
  case IR_ABS: asm_abs(as, ir); break;
1✔
1758
  case IR_LDEXP: asm_ldexp(as, ir); break;
270✔
1759
  case IR_FPMATH: asm_fpmath(as, ir); break;
157✔
1760
  case IR_TOBIT: asm_tobit(as, ir); break;
65✔
1761
#endif
1762
  case IR_MIN: asm_min(as, ir); break;
17✔
1763
  case IR_MAX: asm_max(as, ir); break;
408✔
1764

1765
  /* Overflow-checking arithmetic ops. */
1766
  case IR_ADDOV: asm_addov(as, ir); break;
61✔
1767
  case IR_SUBOV: asm_subov(as, ir); break;
67✔
1768
  case IR_MULOV: asm_mulov(as, ir); break;
2✔
1769

1770
  /* Memory references. */
1771
  case IR_AREF: asm_aref(as, ir); break;
34✔
1772
  case IR_HREF: asm_href(as, ir, 0); break;
38✔
1773
  case IR_HREFK: asm_hrefk(as, ir); break;
3,741✔
1774
  case IR_NEWREF: asm_newref(as, ir); break;
354✔
1775
  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
408✔
1776
  case IR_FREF: asm_fref(as, ir); break;
×
1777
  case IR_STRREF: asm_strref(as, ir); break;
293✔
1778
  case IR_LREF: asm_lref(as, ir); break;
2✔
1779

1780
  /* Loads and stores. */
1781
  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
4,525✔
1782
    asm_ahuvload(as, ir);
4,525✔
1783
    break;
4,525✔
1784
  case IR_FLOAD: asm_fload(as, ir); break;
5,709✔
1785
  case IR_XLOAD: asm_xload(as, ir); break;
442✔
1786
  case IR_SLOAD: asm_sload(as, ir); break;
6,794✔
1787

1788
  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
3,119✔
1789
  case IR_FSTORE: asm_fstore(as, ir); break;
72✔
1790
  case IR_XSTORE: asm_xstore(as, ir); break;
616✔
1791

1792
  /* Allocations. */
1793
  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
105✔
1794
  case IR_TNEW: asm_tnew(as, ir); break;
179✔
1795
  case IR_TDUP: asm_tdup(as, ir); break;
47✔
1796
  case IR_CNEW: case IR_CNEWI:
162✔
1797
#if LJ_HASFFI
1798
    asm_cnew(as, ir);
162✔
1799
#else
1800
    lj_assertA(0, "IR %04d with unused op %d",
1801
                  (int)(ir - as->ir) - REF_BIAS, ir->o);
1802
#endif
1803
    break;
162✔
1804

1805
  /* Buffer operations. */
1806
  case IR_BUFHDR: asm_bufhdr(as, ir); break;
751✔
1807
  case IR_BUFPUT: asm_bufput(as, ir); break;
1,330✔
1808
  case IR_BUFSTR: asm_bufstr(as, ir); break;
738✔
1809

1810
  /* Write barriers. */
1811
  case IR_TBAR: asm_tbar(as, ir); break;
640✔
1812
  case IR_OBAR: asm_obar(as, ir); break;
4✔
1813

1814
  /* Type conversions. */
1815
  case IR_CONV: asm_conv(as, ir); break;
1,736✔
1816
  case IR_TOSTR: asm_tostr(as, ir); break;
42✔
1817
  case IR_STRTO: asm_strto(as, ir); break;
158✔
1818

1819
  /* Calls. */
1820
  case IR_CALLA:
3✔
1821
    as->gcsteps++;
3✔
1822
    /* fallthrough */
1823
  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
463✔
1824
  case IR_CALLXS: asm_callx(as, ir); break;
19✔
1825
  case IR_CARG: break;
1826

1827
  default:
×
1828
    setintV(&as->J->errinfo, ir->o);
×
1829
    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
×
1830
    break;
67,125✔
1831
  }
1832
}
67,125✔
1833

1834
/* -- Head of trace ------------------------------------------------------- */
1835

1836
/* Head of a root trace. */
1837
static void asm_head_root(ASMState *as)
1,428✔
1838
{
1839
  int32_t spadj;
1,428✔
1840
  asm_head_root_base(as);
1,428✔
1841
  emit_setvmstate(as, (int32_t)as->T->traceno);
1,428✔
1842
  spadj = asm_stack_adjust(as);
1,428✔
1843
  as->T->spadjust = (uint16_t)spadj;
1,428✔
1844
  emit_spsub(as, spadj);
1,428✔
1845
  /* Root traces assume a checked stack for the starting proto. */
1846
  as->T->topslot = gcref(as->T->startpt)->pt.framesize;
1,428✔
1847
}
1,428✔
1848

1849
/* Head of a side trace.
1850
**
1851
** The current simplistic algorithm requires that all slots inherited
1852
** from the parent are live in a register between pass 2 and pass 3. This
1853
** avoids the complexity of stack slot shuffling. But of course this may
1854
** overflow the register set in some cases and cause the dreaded error:
1855
** "NYI: register coalescing too complex". A refined algorithm is needed.
1856
*/
1857
static void asm_head_side(ASMState *as)
892✔
1858
{
1859
  IRRef1 sloadins[RID_MAX];
892✔
1860
  RegSet allow = RSET_ALL;  /* Inverse of all coalesced registers. */
892✔
1861
  RegSet live = RSET_EMPTY;  /* Live parent registers. */
892✔
1862
  IRIns *irp = &as->parent->ir[REF_BASE];  /* Parent base. */
892✔
1863
  int32_t spadj, spdelta;
892✔
1864
  int pass2 = 0;
892✔
1865
  int pass3 = 0;
892✔
1866
  IRRef i;
892✔
1867

1868
  if (as->snapno && as->topslot > as->parent->topslot) {
892✔
1869
    /* Force snap #0 alloc to prevent register overwrite in stack check. */
1870
    asm_snap_alloc(as, 0);
39✔
1871
  }
1872
  allow = asm_head_side_base(as, irp, allow);
892✔
1873

1874
  /* Scan all parent SLOADs and collect register dependencies. */
1875
  for (i = as->stopins; i > REF_BASE; i--) {
2,290✔
1876
    IRIns *ir = IR(i);
1,398✔
1877
    RegSP rs;
1,398✔
1878
    lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
1,398✔
1879
               (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
1880
               "IR %04d has bad parent op %d",
1881
               (int)(ir - as->ir) - REF_BIAS, ir->o);
1882
    rs = as->parentmap[i - REF_FIRST];
1,398✔
1883
    if (ra_hasreg(ir->r)) {
1,398✔
1884
      rset_clear(allow, ir->r);
1,235✔
1885
      if (ra_hasspill(ir->s)) {
1,235✔
1886
        ra_save(as, ir, ir->r);
11✔
1887
        checkmclim(as);
11✔
1888
      }
1889
    } else if (ra_hasspill(ir->s)) {
163✔
1890
      irt_setmark(ir->t);
55✔
1891
      pass2 = 1;
55✔
1892
    }
1893
    if (ir->r == rs) {  /* Coalesce matching registers right now. */
1,398✔
1894
      ra_free(as, ir->r);
1,006✔
1895
    } else if (ra_hasspill(regsp_spill(rs))) {
392✔
1896
      if (ra_hasreg(ir->r))
186✔
1897
        pass3 = 1;
64✔
1898
    } else if (ra_used(ir)) {
206✔
1899
      sloadins[rs] = (IRRef1)i;
186✔
1900
      rset_set(live, rs);  /* Block live parent register. */
186✔
1901
    }
1902
  }
1903

1904
  /* Calculate stack frame adjustment. */
1905
  spadj = asm_stack_adjust(as);
892✔
1906
  spdelta = spadj - (int32_t)as->parent->spadjust;
892✔
1907
  if (spdelta < 0) {  /* Don't shrink the stack frame. */
892✔
1908
    spadj = (int32_t)as->parent->spadjust;
172✔
1909
    spdelta = 0;
172✔
1910
  }
1911
  as->T->spadjust = (uint16_t)spadj;
892✔
1912

1913
  /* Reload spilled target registers. */
1914
  if (pass2) {
892✔
1915
    for (i = as->stopins; i > REF_BASE; i--) {
170✔
1916
      IRIns *ir = IR(i);
122✔
1917
      if (irt_ismarked(ir->t)) {
122✔
1918
        RegSet mask;
55✔
1919
        Reg r;
55✔
1920
        RegSP rs;
55✔
1921
        irt_clearmark(ir->t);
55✔
1922
        rs = as->parentmap[i - REF_FIRST];
55✔
1923
        if (!ra_hasspill(regsp_spill(rs)))
55✔
1924
          ra_sethint(ir->r, rs);  /* Hint may be gone, set it again. */
21✔
1925
        else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
34✔
1926
          continue;  /* Same spill slot, do nothing. */
18✔
1927
        mask = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) & allow;
37✔
1928
        if (mask == RSET_EMPTY)
37✔
1929
          lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
1930
        r = ra_allocref(as, i, mask);
37✔
1931
        ra_save(as, ir, r);
37✔
1932
        rset_clear(allow, r);
37✔
1933
        if (r == rs) {  /* Coalesce matching registers right now. */
37✔
1934
          ra_free(as, r);
19✔
1935
          rset_clear(live, r);
19✔
1936
        } else if (ra_hasspill(regsp_spill(rs))) {
18✔
1937
          pass3 = 1;
16✔
1938
        }
1939
        checkmclim(as);
122✔
1940
      }
1941
    }
1942
  }
1943

1944
  /* Store trace number and adjust stack frame relative to the parent. */
1945
  emit_setvmstate(as, (int32_t)as->T->traceno);
892✔
1946
  emit_spsub(as, spdelta);
892✔
1947

1948
#if !LJ_TARGET_X86ORX64
1949
  /* Restore BASE register from parent spill slot. */
1950
  if (ra_hasspill(irp->s))
1951
    emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, sps_scale(irp->s));
1952
#endif
1953

1954
  /* Restore target registers from parent spill slots. */
1955
  if (pass3) {
892✔
1956
    RegSet work = ~as->freeset & RSET_ALL;
57✔
1957
    while (work) {
57✔
1958
      Reg r = rset_pickbot(work);
101✔
1959
      IRRef ref = regcost_ref(as->cost[r]);
101✔
1960
      RegSP rs = as->parentmap[ref - REF_FIRST];
101✔
1961
      rset_clear(work, r);
101✔
1962
      if (ra_hasspill(regsp_spill(rs))) {
101✔
1963
        int32_t ofs = sps_scale(regsp_spill(rs));
80✔
1964
        ra_free(as, r);
80✔
1965
        emit_spload(as, IR(ref), r, ofs);
80✔
1966
        checkmclim(as);
238✔
1967
      }
1968
    }
1969
  }
1970

1971
  /* Shuffle registers to match up target regs with parent regs. */
1972
  for (;;) {
1973
    RegSet work;
1974

1975
    /* Repeatedly coalesce free live registers by moving to their target. */
1976
    while ((work = as->freeset & live) != RSET_EMPTY) {
1,062✔
1977
      Reg rp = rset_pickbot(work);
167✔
1978
      IRIns *ir = IR(sloadins[rp]);
167✔
1979
      rset_clear(live, rp);
167✔
1980
      rset_clear(allow, rp);
167✔
1981
      ra_free(as, ir->r);
167✔
1982
      emit_movrr(as, ir, ir->r, rp);
167✔
1983
      checkmclim(as);
167✔
1984
    }
1985

1986
    /* We're done if no live registers remain. */
1987
    if (live == RSET_EMPTY)
895✔
1988
      break;
1989

1990
    /* Break cycles by renaming one target to a temp. register. */
1991
    if (live & RSET_GPR) {
3✔
1992
      RegSet tmpset = as->freeset & ~live & allow & RSET_GPR;
2✔
1993
      if (tmpset == RSET_EMPTY)
2✔
1994
        lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
1995
      ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset));
2✔
1996
    }
1997
    if (!LJ_SOFTFP && (live & RSET_FPR)) {
3✔
1998
      RegSet tmpset = as->freeset & ~live & allow & RSET_FPR;
1✔
1999
      if (tmpset == RSET_EMPTY)
1✔
2000
        lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
2001
      ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset));
1✔
2002
    }
2003
    checkmclim(as);
1,065✔
2004
    /* Continue with coalescing to fix up the broken cycle(s). */
2005
  }
2006

2007
  /* Inherit top stack slot already checked by parent trace. */
2008
  as->T->topslot = as->parent->topslot;
892✔
2009
  if (as->topslot > as->T->topslot) {  /* Need to check for higher slot? */
892✔
2010
#ifdef EXITSTATE_CHECKEXIT
2011
    /* Highest exit + 1 indicates stack check. */
2012
    ExitNo exitno = as->T->nsnap;
2013
#else
2014
    /* Reuse the parent exit in the context of the parent trace. */
2015
    ExitNo exitno = as->J->exitno;
107✔
2016
#endif
2017
    as->T->topslot = (uint8_t)as->topslot;  /* Remember for child traces. */
107✔
2018
    asm_stack_check(as, as->topslot, irp, allow & RSET_GPR, exitno);
107✔
2019
  }
2020
}
892✔
2021

2022
/* -- Tail of trace ------------------------------------------------------- */
2023

2024
/* Get base slot for a snapshot. */
2025
static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1,011✔
2026
{
2027
  SnapEntry *map = &as->T->snapmap[snap->mapofs];
1,011✔
2028
  MSize n;
1,011✔
2029
  for (n = snap->nent; n > 0; n--) {
3,365✔
2030
    SnapEntry sn = map[n-1];
2,904✔
2031
    if ((sn & SNAP_FRAME)) {
2,904✔
2032
      *gotframe = 1;
550✔
2033
      return snap_slot(sn) - LJ_FR2;
550✔
2034
    }
2035
  }
2036
  return 0;
2037
}
2038

2039
/* Link to another trace. */
2040
static void asm_tail_link(ASMState *as)
1,011✔
2041
{
2042
  SnapNo snapno = as->T->nsnap-1;  /* Last snapshot. */
1,011✔
2043
  SnapShot *snap = &as->T->snap[snapno];
1,011✔
2044
  int gotframe = 0;
1,011✔
2045
  BCReg baseslot = asm_baseslot(as, snap, &gotframe);
1,011✔
2046

2047
  as->topslot = snap->topslot;
1,011✔
2048
  checkmclim(as);
1,011✔
2049
  ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
1,011✔
2050

2051
  if (as->T->link == 0) {
1,011✔
2052
    /* Setup fixed registers for exit to interpreter. */
2053
    const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
318✔
2054
    int32_t mres;
318✔
2055
    if (bc_op(*pc) == BC_JLOOP) {  /* NYI: find a better way to do this. */
318✔
2056
      BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
2✔
2057
      if (bc_isret(bc_op(*retpc)))
2✔
2058
        pc = retpc;
2✔
2059
    }
2060
#if LJ_GC64
2061
    emit_loadu64(as, RID_LPC, u64ptr(pc));
318✔
2062
#else
2063
    ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
2064
    ra_allockreg(as, i32ptr(pc), RID_LPC);
2065
#endif
2066
    mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
318✔
2067
    switch (bc_op(*pc)) {
318✔
2068
    case BC_CALLM: case BC_CALLMT:
×
2069
      mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
×
2070
    case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
×
2071
    case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
×
2072
    default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
318✔
2073
    }
2074
    ra_allockreg(as, mres, RID_RET);  /* Return MULTRES or 0. */
318✔
2075
  } else if (baseslot) {
693✔
2076
    /* Save modified BASE for linking to trace with higher start frame. */
2077
    emit_setgl(as, RID_BASE, jit_base);
165✔
2078
  }
2079
  emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1,011✔
2080

2081
  if (as->J->ktrace) {  /* Patch ktrace slot with the final GCtrace pointer. */
1,011✔
2082
    setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
77✔
2083
    IR(as->J->ktrace)->o = IR_KGC;
77✔
2084
  }
2085

2086
  /* Sync the interpreter state with the on-trace state. */
2087
  asm_stack_restore(as, snap);
1,011✔
2088

2089
  /* Root traces that add frames need to check the stack at the end. */
2090
  if (!as->parent && gotframe)
1,011✔
2091
    asm_stack_check(as, as->topslot, NULL, as->freeset & RSET_GPR, snapno);
104✔
2092
}
1,011✔
2093

2094
/* -- Trace setup --------------------------------------------------------- */
2095

2096
/* Clear reg/sp for all instructions and add register hints. */
2097
static void asm_setup_regsp(ASMState *as)
3,430✔
2098
{
2099
  GCtrace *T = as->T;
3,430✔
2100
  int sink = T->sinktags;
3,430✔
2101
  IRRef nins = T->nins;
3,430✔
2102
  IRIns *ir, *lastir;
3,430✔
2103
  int inloop;
3,430✔
2104
#if LJ_TARGET_ARM
2105
  uint32_t rload = 0xa6402a64;
2106
#endif
2107

2108
  ra_setup(as);
3,430✔
2109

2110
  /* Clear reg/sp for constants. */
2111
  for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
51,373✔
2112
    ir->prev = REGSP_INIT;
47,943✔
2113
    if (irt_is64(ir->t) && ir->o != IR_KNULL) {
47,943✔
2114
#if LJ_GC64
2115
      /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2116
      ir->i = 0;  /* Will become non-zero only for RIP-relative addresses. */
23,821✔
2117
#else
2118
      /* Make life easier for backends by putting address of constant in i. */
2119
      ir->i = (int32_t)(intptr_t)(ir+1);
2120
#endif
2121
      ir++;
23,821✔
2122
    }
2123
  }
2124

2125
  /* REF_BASE is used for implicit references to the BASE register. */
2126
  lastir->prev = REGSP_HINT(RID_BASE);
3,430✔
2127

2128
  as->snaprename = nins;
3,430✔
2129
  as->snapref = nins;
3,430✔
2130
  as->snapno = T->nsnap;
3,430✔
2131
  as->snapalloc = 0;
3,430✔
2132

2133
  as->stopins = REF_BASE;
3,430✔
2134
  as->orignins = nins;
3,430✔
2135
  as->curins = nins;
3,430✔
2136

2137
  /* Setup register hints for parent link instructions. */
2138
  ir = IR(REF_FIRST);
3,430✔
2139
  if (as->parent) {
3,430✔
2140
    uint16_t *p;
1,022✔
2141
    lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
1,022✔
2142
    if (lastir - ir > LJ_MAX_JSLOTS)
1,022✔
2143
      lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
2144
    as->stopins = (IRRef)((lastir-1) - as->ir);
1,022✔
2145
    for (p = as->parentmap; ir < lastir; ir++) {
2,428✔
2146
      RegSP rs = ir->prev;
1,406✔
2147
      *p++ = (uint16_t)rs;  /* Copy original parent RegSP to parentmap. */
1,406✔
2148
      if (!ra_hasspill(regsp_spill(rs)))
1,406✔
2149
        ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
1,220✔
2150
      else
2151
        ir->prev = REGSP_INIT;
186✔
2152
    }
2153
  }
2154

2155
  inloop = 0;
3,430✔
2156
  as->evenspill = SPS_FIRST;
3,430✔
2157
  for (lastir = IR(nins); ir < lastir; ir++) {
104,293✔
2158
    if (sink) {
100,863✔
2159
      if (ir->r == RID_SINK)
8,442✔
2160
        continue;
962✔
2161
      if (ir->r == RID_SUNK) {  /* Revert after ASM restart. */
7,480✔
2162
        ir->r = RID_SINK;
102✔
2163
        continue;
102✔
2164
      }
2165
    }
2166
    switch (ir->o) {
99,799✔
2167
    case IR_LOOP:
2,419✔
2168
      inloop = 1;
2,419✔
2169
      break;
2,419✔
2170
#if LJ_TARGET_ARM
2171
    case IR_SLOAD:
2172
      if (!((ir->op2 & IRSLOAD_TYPECHECK) || (ir+1)->o == IR_HIOP))
2173
        break;
2174
      /* fallthrough */
2175
    case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2176
      if (!LJ_SOFTFP && irt_isnum(ir->t)) break;
2177
      ir->prev = (uint16_t)REGSP_HINT((rload & 15));
2178
      rload = lj_ror(rload, 4);
2179
      continue;
2180
#endif
2181
    case IR_CALLXS: {
22✔
2182
      CCallInfo ci;
22✔
2183
      ci.flags = asm_callx_flags(as, ir);
22✔
2184
      ir->prev = asm_setup_call_slots(as, ir, &ci);
22✔
2185
      if (inloop)
22✔
2186
        as->modset |= RSET_SCRATCH;
7✔
2187
      continue;
22✔
2188
      }
2189
    case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
584✔
2190
      const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
584✔
2191
      ir->prev = asm_setup_call_slots(as, ir, ci);
584✔
2192
      if (inloop)
584✔
2193
        as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
211✔
2194
                      (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
211✔
2195
      continue;
584✔
2196
      }
2197
#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
2198
    case IR_HIOP:
2199
      switch ((ir-1)->o) {
2200
#if LJ_SOFTFP && LJ_TARGET_ARM
2201
      case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2202
        if (ra_hashint((ir-1)->r)) {
2203
          ir->prev = (ir-1)->prev + 1;
2204
          continue;
2205
        }
2206
        break;
2207
#endif
2208
#if !LJ_SOFTFP && LJ_NEED_FP64
2209
      case IR_CONV:
2210
        if (irt_isfp((ir-1)->t)) {
2211
          ir->prev = REGSP_HINT(RID_FPRET);
2212
          continue;
2213
        }
2214
#endif
2215
      /* fallthrough */
2216
      case IR_CALLN: case IR_CALLXS:
2217
#if LJ_SOFTFP
2218
      case IR_MIN: case IR_MAX:
2219
#endif
2220
        (ir-1)->prev = REGSP_HINT(RID_RETLO);
2221
        ir->prev = REGSP_HINT(RID_RETHI);
2222
        continue;
2223
      default:
2224
        break;
2225
      }
2226
      break;
2227
#endif
2228
#if LJ_SOFTFP
2229
    case IR_MIN: case IR_MAX:
2230
      if ((ir+1)->o != IR_HIOP) break;
2231
#endif
2232
    /* fallthrough */
2233
    /* C calls evict all scratch regs and return results in RID_RET. */
2234
    case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
2235
      if (REGARG_NUMGPR < 3 && as->evenspill < 3)
2236
        as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
2237
#if LJ_TARGET_X86 && LJ_HASFFI
2238
      if (0) {
2239
    case IR_CNEW:
2240
        if (ir->op2 != REF_NIL && as->evenspill < 4)
2241
          as->evenspill = 4;  /* lj_cdata_newv needs 4 args. */
2242
      }
2243
      /* fallthrough */
2244
#else
2245
      /* fallthrough */
2246
    case IR_CNEW:
2247
#endif
2248
      /* fallthrough */
2249
    case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2250
    case IR_BUFSTR:
2251
      ir->prev = REGSP_HINT(RID_RET);
3,689✔
2252
      if (inloop)
3,689✔
2253
        as->modset = RSET_SCRATCH;
1,336✔
2254
      continue;
3,689✔
2255
    case IR_STRTO: case IR_OBAR:
173✔
2256
      if (inloop)
173✔
2257
        as->modset = RSET_SCRATCH;
74✔
2258
      break;
2259
#if !LJ_SOFTFP
2260
#if !LJ_TARGET_X86ORX64
2261
    case IR_LDEXP:
2262
#endif
2263
#endif
2264
      /* fallthrough */
2265
    case IR_POW:
463✔
2266
      if (!LJ_SOFTFP && irt_isnum(ir->t)) {
463✔
2267
        if (inloop)
463✔
2268
          as->modset |= RSET_SCRATCH;
2✔
2269
#if LJ_TARGET_X86
2270
        if (irt_isnum(IR(ir->op2)->t)) {
2271
          if (as->evenspill < 4)  /* Leave room to call pow(). */
2272
            as->evenspill = 4;
2273
        }
2274
        break;
2275
#else
2276
        ir->prev = REGSP_HINT(RID_FPRET);
463✔
2277
        continue;
463✔
2278
#endif
2279
      }
2280
      /* fallthrough */ /* for integer POW */
2281
    case IR_DIV: case IR_MOD:
2282
      if (!irt_isnum(ir->t)) {
217✔
2283
        ir->prev = REGSP_HINT(RID_RET);
70✔
2284
        if (inloop)
70✔
2285
          as->modset |= (RSET_SCRATCH & RSET_GPR);
35✔
2286
        continue;
70✔
2287
      }
2288
      break;
2289
    case IR_FPMATH:
172✔
2290
#if LJ_TARGET_X86ORX64
2291
      if (ir->op2 <= IRFPM_TRUNC) {
172✔
2292
        if (!(as->flags & JIT_F_SSE4_1)) {
168✔
2293
          ir->prev = REGSP_HINT(RID_XMM0);
×
2294
          if (inloop)
×
2295
            as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
×
2296
          continue;
×
2297
        }
2298
        break;
2299
      }
2300
#endif
2301
      if (inloop)
4✔
2302
        as->modset |= RSET_SCRATCH;
2✔
2303
#if LJ_TARGET_X86
2304
      break;
2305
#else
2306
      ir->prev = REGSP_HINT(RID_FPRET);
4✔
2307
      continue;
4✔
2308
#endif
2309
#if LJ_TARGET_X86ORX64
2310
    /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
2311
    case IR_BSHL: case IR_BSHR: case IR_BSAR:
409✔
2312
      if ((as->flags & JIT_F_BMI2))  /* Except if BMI2 is available. */
409✔
2313
        break;
2314
      /* fallthrough */
2315
    case IR_BROL: case IR_BROR:
2316
      if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
34✔
2317
        IR(ir->op2)->r = REGSP_HINT(RID_ECX);
10✔
2318
        if (inloop)
10✔
2319
          rset_set(as->modset, RID_ECX);
3✔
2320
      }
2321
      break;
2322
#endif
2323
    /* Do not propagate hints across type conversions or loads. */
2324
    case IR_TOBIT:
2325
    case IR_XLOAD:
2326
#if !LJ_TARGET_ARM
2327
    case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2328
#endif
2329
      break;
2330
    case IR_CONV:
2,216✔
2331
      if (irt_isfp(ir->t) || (ir->op2 & IRCONV_SRCMASK) == IRT_NUM ||
2,216✔
2332
          (ir->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
2333
        break;
2334
      /* fallthrough */
2335
    default:
2336
      /* Propagate hints across likely 'op reg, imm' or 'op reg'. */
2337
      if (irref_isk(ir->op2) && !irref_isk(ir->op1) &&
83,149✔
2338
          ra_hashint(regsp_reg(IR(ir->op1)->prev))) {
49,955✔
2339
        ir->prev = IR(ir->op1)->prev;
4,162✔
2340
        continue;
4,162✔
2341
      }
2342
      break;
2343
    }
2344
    ir->prev = REGSP_INIT;
90,805✔
2345
  }
2346
  if ((as->evenspill & 1))
3,430✔
2347
    as->oddspill = as->evenspill++;
×
2348
  else
2349
    as->oddspill = 0;
3,430✔
2350
}
3,430✔
2351

2352
/* -- Assembler core ------------------------------------------------------ */
2353

2354
/* Assemble a trace. */
2355
void lj_asm_trace(jit_State *J, GCtrace *T)
2,278✔
2356
{
2357
  ASMState as_;
2,278✔
2358
  ASMState *as = &as_;
2,278✔
2359

2360
  /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2361
  {
2362
    IRRef nins = T->nins;
2,278✔
2363
    IRIns *ir = &T->ir[nins-1];
2,278✔
2364
    if (ir->o == IR_NOP || ir->o == IR_RENAME) {
2,278✔
2365
      do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
120✔
2366
      T->nins = nins;
120✔
2367
    }
2368
  }
2369

2370
  /* Ensure an initialized instruction beyond the last one for HIOP checks. */
2371
  /* This also allows one RENAME to be added without reallocating curfinal. */
2372
  as->orignins = lj_ir_nextins(J);
2,278✔
2373
  J->cur.ir[as->orignins].o = IR_NOP;
2,278✔
2374

2375
  /* Setup initial state. Copy some fields to reduce indirections. */
2376
  as->J = J;
2,278✔
2377
  as->T = T;
2,278✔
2378
  J->curfinal = lj_trace_alloc(J->L, T);  /* This copies the IR, too. */
2,278✔
2379
  as->flags = J->flags;
2,278✔
2380
  as->loopref = J->loopref;
2,278✔
2381
  as->realign = NULL;
2,278✔
2382
  as->loopinv = 0;
2,278✔
2383
  as->parent = J->parent ? traceref(J, J->parent) : NULL;
2,278✔
2384

2385
  /* Reserve MCode memory. */
2386
  as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
2,278✔
2387
  as->mcp = as->mctop;
2,278✔
2388
  as->mclim = as->mcbot + MCLIM_REDZONE;
2,278✔
2389
  asm_setup_target(as);
2,278✔
2390

2391
  /*
2392
  ** This is a loop, because the MCode may have to be (re-)assembled
2393
  ** multiple times:
2394
  **
2395
  ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2396
  **    backend wants the MCode to be aligned differently.
2397
  **
2398
  **    This is currently only the case on x86/x64, where small loops get
2399
  **    an aligned loop body plus a short branch. Not much effort is wasted,
2400
  **    because the abort happens very quickly and only once.
2401
  **
2402
  ** 2. The IR is immovable, since the MCode embeds pointers to various
2403
  **    constants inside the IR. But RENAMEs may need to be added to the IR
2404
  **    during assembly, which might grow and reallocate the IR. We check
2405
  **    at the end if the IR (in J->cur.ir) has actually grown, resize the
2406
  **    copy (in J->curfinal.ir) and try again.
2407
  **
2408
  **    95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2409
  **    2 RENAMEs and only 0.5% have more than that. That's why we opt to
2410
  **    always have one spare slot in the IR (see above), which means we
2411
  **    have to redo the assembly for only ~2% of all traces.
2412
  **
2413
  **    Very, very rarely, this needs to be done repeatedly, since the
2414
  **    location of constants inside the IR (actually, reachability from
2415
  **    a global pointer) may affect register allocation and thus the
2416
  **    number of RENAMEs.
2417
  */
2418
  for (;;) {
3,430✔
2419
    as->mcp = as->mctop;
3,430✔
2420
#ifdef LUA_USE_ASSERT
2421
    as->mcp_prev = as->mcp;
2422
#endif
2423
    as->ir = J->curfinal->ir;  /* Use the copied IR. */
3,430✔
2424
    as->curins = J->cur.nins = as->orignins;
3,430✔
2425

2426
    RA_DBG_START();
3,430✔
2427
    RA_DBGX((as, "===== STOP ====="));
3,430✔
2428

2429
    /* General trace setup. Emit tail of trace. */
2430
    asm_tail_prep(as);
3,430✔
2431
    as->mcloop = NULL;
3,430✔
2432
    as->flagmcp = NULL;
3,430✔
2433
    as->topslot = 0;
3,430✔
2434
    as->gcsteps = 0;
3,430✔
2435
    as->sectref = as->loopref;
3,430✔
2436
    as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
3,430✔
2437
    asm_setup_regsp(as);
3,430✔
2438
    if (!as->loopref)
3,430✔
2439
      asm_tail_link(as);
1,011✔
2440

2441
    /* Assemble a trace in linear backwards order. */
2442
    for (as->curins--; as->curins > as->stopins; as->curins--) {
83,550✔
2443
      IRIns *ir = IR(as->curins);
80,133✔
2444
      /* 64 bit types handled by SPLIT for 32 bit archs. */
2445
      lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
80,133✔
2446
                 "IR %04d has unsplit 64 bit type",
2447
                 (int)(ir - as->ir) - REF_BIAS);
2448
      asm_snap_prev(as);
80,133✔
2449
      if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
80,133✔
2450
        continue;  /* Dead-code elimination can be soooo easy. */
12,995✔
2451
      if (irt_isguard(ir->t))
67,138✔
2452
        asm_snap_prep(as);
32,476✔
2453
      RA_DBG_REF();
67,138✔
2454
      checkmclim(as);
67,138✔
2455
      asm_ir(as, ir);
67,132✔
2456
    }
2457

2458
    if (as->realign && J->curfinal->nins >= T->nins)
3,417✔
2459
      continue;  /* Retry in case only the MCode needs to be realigned. */
1,097✔
2460

2461
    /* Emit head of trace. */
2462
    RA_DBG_REF();
2,320✔
2463
    checkmclim(as);
2,320✔
2464
    if (as->gcsteps > 0) {
2,320✔
2465
      as->curins = as->T->snap[0].ref;
395✔
2466
      asm_snap_prep(as);  /* The GC check is a guard. */
395✔
2467
      asm_gc_check(as);
395✔
2468
      as->curins = as->stopins;
395✔
2469
    }
2470
    ra_evictk(as);
2,320✔
2471
    if (as->parent)
2,320✔
2472
      asm_head_side(as);
892✔
2473
    else
2474
      asm_head_root(as);
1,428✔
2475
    asm_phi_fixup(as);
2,320✔
2476

2477
    if (J->curfinal->nins >= T->nins) {  /* IR didn't grow? */
2,320✔
2478
      lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth");
2,265✔
2479
      memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2,265✔
2480
             (T->nins - as->orignins) * sizeof(IRIns));  /* Copy RENAMEs. */
2,265✔
2481
      T->nins = J->curfinal->nins;
2,265✔
2482
      /* Fill mcofs of any unprocessed snapshots. */
2483
      as->curins = REF_FIRST;
2,265✔
2484
      asm_snap_prev(as);
2,265✔
2485
      break;  /* Done. */
2,265✔
2486
    }
2487

2488
    /* Otherwise try again with a bigger IR. */
2489
    lj_trace_free(J2G(J), J->curfinal);
55✔
2490
    J->curfinal = NULL;  /* In case lj_trace_alloc() OOMs. */
55✔
2491
    J->curfinal = lj_trace_alloc(J->L, T);
55✔
2492
    as->realign = NULL;
55✔
2493
  }
2494

2495
  RA_DBGX((as, "===== START ===="));
2,265✔
2496
  RA_DBG_FLUSH();
2,265✔
2497
  if (as->freeset != RSET_ALL)
2,265✔
2498
    lj_trace_err(as->J, LJ_TRERR_BADRA);  /* Ouch! Should never happen. */
×
2499

2500
  /* Set trace entry point before fixing up tail to allow link to self. */
2501
  T->mcode = as->mcp;
2,265✔
2502
  T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
2,265✔
2503
  if (!as->loopref)
2,265✔
2504
    asm_tail_fixup(as, T->link);  /* Note: this may change as->mctop! */
1,007✔
2505
  T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
2,265✔
2506
  asm_snap_fixup_mcofs(as);
2,265✔
2507
#if LJ_TARGET_MCODE_FIXUP
2508
  asm_mcode_fixup(T->mcode, T->szmcode);
2509
#endif
2510
  lj_mcode_sync(T->mcode, as->mctoporig);
2,265✔
2511
}
2,265✔
2512

2513
#undef IR
2514

2515
#endif
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc