• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / luajit / 6789043982

07 Nov 2023 06:53PM UTC coverage: 88.253% (+0.01%) from 88.243%
6789043982

push

github

igormunkin
Fix base register coalescing in side trace.

Thanks to Sergey Kaplun, NiLuJe and Peter Cawley.

(cherry-picked from commit aa2db7ebd)

The previous patch fixed just part of the problem with the register
coalescing. For example, the parent base register may be used inside the
parent or child register sets when it shouldn't. This leads to incorrect
register allocations, which may lead to crashes or undefined behaviour.
This patch fixes it by excluding the parent base register from both
register sets.

The test case for this patch doesn't fail before the commit since it
requires specific register allocation, which is hard to construct and
very fragile. Due to the lack of ideal sync with the upstream
repository, the test is passed before the patch.
It should become correct after backporting future patches.

Sergey Kaplun:
* added the description and the test for the problem

Resolves tarantool/tarantool#8767
Part of tarantool/tarantool#9145

5344 of 5974 branches covered (0.0%)

Branch coverage included in aggregate %.

5 of 5 new or added lines in 1 file covered. (100.0%)

13 existing lines in 1 file now uncovered.

20493 of 23302 relevant lines covered (87.95%)

2744052.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.47
/src/lj_asm.c
1
/*
2
** IR assembler (SSA IR -> machine code).
3
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
*/
5

6
#define lj_asm_c
7
#define LUA_CORE
8

9
#include "lj_obj.h"
10

11
#if LJ_HASJIT
12

13
#include "lj_gc.h"
14
#include "lj_str.h"
15
#include "lj_tab.h"
16
#include "lj_frame.h"
17
#if LJ_HASFFI
18
#include "lj_ctype.h"
19
#endif
20
#include "lj_ir.h"
21
#include "lj_jit.h"
22
#include "lj_ircall.h"
23
#include "lj_iropt.h"
24
#include "lj_mcode.h"
25
#include "lj_iropt.h"
26
#include "lj_trace.h"
27
#include "lj_snap.h"
28
#include "lj_asm.h"
29
#include "lj_dispatch.h"
30
#include "lj_vm.h"
31
#include "lj_target.h"
32

33
#ifdef LUA_USE_ASSERT
34
#include <stdio.h>
35
#endif
36

37
/* -- Assembler state and common macros ----------------------------------- */
38

39
/* Assembler state. */
40
typedef struct ASMState {
41
  RegCost cost[RID_MAX];  /* Reference and blended allocation cost for regs. */
42

43
  MCode *mcp;                /* Current MCode pointer (grows down). */
44
  MCode *mclim;                /* Lower limit for MCode memory + red zone. */
45
#ifdef LUA_USE_ASSERT
46
  MCode *mcp_prev;        /* Red zone overflow check. */
47
#endif
48

49
  IRIns *ir;                /* Copy of pointer to IR instructions/constants. */
50
  jit_State *J;                /* JIT compiler state. */
51

52
#if LJ_TARGET_X86ORX64
53
  x86ModRM mrm;                /* Fused x86 address operand. */
54
#endif
55

56
  RegSet freeset;        /* Set of free registers. */
57
  RegSet modset;        /* Set of registers modified inside the loop. */
58
  RegSet weakset;        /* Set of weakly referenced registers. */
59
  RegSet phiset;        /* Set of PHI registers. */
60

61
  uint32_t flags;        /* Copy of JIT compiler flags. */
62
  int loopinv;                /* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */
63

64
  int32_t evenspill;        /* Next even spill slot. */
65
  int32_t oddspill;        /* Next odd spill slot (or 0). */
66

67
  IRRef curins;                /* Reference of current instruction. */
68
  IRRef stopins;        /* Stop assembly before hitting this instruction. */
69
  IRRef orignins;        /* Original T->nins. */
70

71
  IRRef snapref;        /* Current snapshot is active after this reference. */
72
  IRRef snaprename;        /* Rename highwater mark for snapshot check. */
73
  SnapNo snapno;        /* Current snapshot number. */
74
  SnapNo loopsnapno;        /* Loop snapshot number. */
75
  BloomFilter snapfilt1, snapfilt2;        /* Filled with snapshot refs. */
76
  int snapalloc;        /* Current snapshot needs allocation. */
77

78
  IRRef fuseref;        /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
79
  IRRef sectref;        /* Section base reference (loopref or 0). */
80
  IRRef loopref;        /* Reference of LOOP instruction (or 0). */
81

82
  BCReg topslot;        /* Number of slots for stack check (unless 0). */
83
  int32_t gcsteps;        /* Accumulated number of GC steps (per section). */
84

85
  GCtrace *T;                /* Trace to assemble. */
86
  GCtrace *parent;        /* Parent trace (or NULL). */
87

88
  MCode *mcbot;                /* Bottom of reserved MCode. */
89
  MCode *mctop;                /* Top of generated MCode. */
90
  MCode *mctoporig;        /* Original top of generated MCode. */
91
  MCode *mcloop;        /* Pointer to loop MCode (or NULL). */
92
  MCode *invmcp;        /* Points to invertible loop branch (or NULL). */
93
  MCode *flagmcp;        /* Pending opportunity to merge flag setting ins. */
94
  MCode *realign;        /* Realign loop if not NULL. */
95

96
#ifdef RID_NUM_KREF
97
  intptr_t krefk[RID_NUM_KREF];
98
#endif
99
  IRRef1 phireg[RID_MAX];  /* PHI register references. */
100
  uint16_t parentmap[LJ_MAX_JSLOTS];  /* Parent instruction to RegSP map. */
101
} ASMState;
102

103
#ifdef LUA_USE_ASSERT
104
#define lj_assertA(c, ...)        lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
105
#else
106
#define lj_assertA(c, ...)        ((void)as)
107
#endif
108

109
#define IR(ref)                        (&as->ir[(ref)])
110

111
#define ASMREF_TMP1                REF_TRUE        /* Temp. register. */
112
#define ASMREF_TMP2                REF_FALSE        /* Temp. register. */
113
#define ASMREF_L                REF_NIL                /* Stores register for L. */
114

115
/* Check for variant to invariant references. */
116
#define iscrossref(as, ref)        ((ref) < as->sectref)
117

118
/* Inhibit memory op fusion from variant to invariant references. */
119
#define FUSE_DISABLED                (~(IRRef)0)
120
#define mayfuse(as, ref)        ((ref) > as->fuseref)
121
#define neverfuse(as)                (as->fuseref == FUSE_DISABLED)
122
#define canfuse(as, ir)                (!neverfuse(as) && !irt_isphi((ir)->t))
123
#define opisfusableload(o) \
124
  ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \
125
   (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD)
126

127
/* Sparse limit checks using a red zone before the actual limit. */
128
#define MCLIM_REDZONE        64
129

130
static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
44✔
131
{
132
  lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
44✔
133
}
134

135
static LJ_AINLINE void checkmclim(ASMState *as)
633,845✔
136
{
137
#ifdef LUA_USE_ASSERT
138
  if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
139
    IRIns *ir = IR(as->curins+1);
140
    lj_assertA(0, "red zone overflow: %p IR %04d  %02d %04d %04d\n", as->mcp,
141
      as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
142
  }
143
#endif
144
  if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
1,912✔
145
#ifdef LUA_USE_ASSERT
146
  as->mcp_prev = as->mcp;
147
#endif
148
}
149

150
#ifdef RID_NUM_KREF
151
#define ra_iskref(ref)                ((ref) < RID_NUM_KREF)
152
#define ra_krefreg(ref)                ((Reg)(RID_MIN_KREF + (Reg)(ref)))
153
#define ra_krefk(as, ref)        (as->krefk[(ref)])
154

155
static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
156
{
157
  IRRef ref = (IRRef)(r - RID_MIN_KREF);
158
  as->krefk[ref] = k;
159
  as->cost[r] = REGCOST(ref, ref);
160
}
161

162
#else
163
#define ra_iskref(ref)                0
164
#define ra_krefreg(ref)                RID_MIN_GPR
165
#define ra_krefk(as, ref)        0
166
#endif
167

168
/* Arch-specific field offsets. */
169
static const uint8_t field_ofs[IRFL__MAX+1] = {
170
#define FLOFS(name, ofs)        (uint8_t)(ofs),
171
IRFLDEF(FLOFS)
172
#undef FLOFS
173
  0
174
};
175

176
/* -- Target-specific instruction emitter --------------------------------- */
177

178
#if LJ_TARGET_X86ORX64
179
#include "lj_emit_x86.h"
180
#elif LJ_TARGET_ARM
181
#include "lj_emit_arm.h"
182
#elif LJ_TARGET_ARM64
183
#include "lj_emit_arm64.h"
184
#elif LJ_TARGET_PPC
185
#include "lj_emit_ppc.h"
186
#elif LJ_TARGET_MIPS
187
#include "lj_emit_mips.h"
188
#else
189
#error "Missing instruction emitter for target CPU"
190
#endif
191

192
/* Generic load/store of register from/to stack slot. */
193
#define emit_spload(as, ir, r, ofs) \
194
  emit_loadofs(as, ir, (r), RID_SP, (ofs))
195
#define emit_spstore(as, ir, r, ofs) \
196
  emit_storeofs(as, ir, (r), RID_SP, (ofs))
197

198
/* -- Register allocator debugging ---------------------------------------- */
199

200
/* #define LUAJIT_DEBUG_RA */
201

202
#ifdef LUAJIT_DEBUG_RA
203

204
#include <stdio.h>
205
#include <stdarg.h>
206

207
#define RIDNAME(name)        #name,
208
static const char *const ra_regname[] = {
209
  GPRDEF(RIDNAME)
210
  FPRDEF(RIDNAME)
211
  VRIDDEF(RIDNAME)
212
  NULL
213
};
214
#undef RIDNAME
215

216
static char ra_dbg_buf[65536];
217
static char *ra_dbg_p;
218
static char *ra_dbg_merge;
219
static MCode *ra_dbg_mcp;
220

221
static void ra_dstart(void)
222
{
223
  ra_dbg_p = ra_dbg_buf;
224
  ra_dbg_merge = NULL;
225
  ra_dbg_mcp = NULL;
226
}
227

228
static void ra_dflush(void)
229
{
230
  fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout);
231
  ra_dstart();
232
}
233

234
static void ra_dprintf(ASMState *as, const char *fmt, ...)
235
{
236
  char *p;
237
  va_list argp;
238
  va_start(argp, fmt);
239
  p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p;
240
  ra_dbg_mcp = NULL;
241
  p += sprintf(p, "%08x  \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS);
242
  for (;;) {
243
    const char *e = strchr(fmt, '$');
244
    if (e == NULL) break;
245
    memcpy(p, fmt, (size_t)(e-fmt));
246
    p += e-fmt;
247
    if (e[1] == 'r') {
248
      Reg r = va_arg(argp, Reg) & RID_MASK;
249
      if (r <= RID_MAX) {
250
        const char *q;
251
        for (q = ra_regname[r]; *q; q++)
252
          *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
253
      } else {
254
        *p++ = '?';
255
        lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
256
      }
257
    } else if (e[1] == 'f' || e[1] == 'i') {
258
      IRRef ref;
259
      if (e[1] == 'f')
260
        ref = va_arg(argp, IRRef);
261
      else
262
        ref = va_arg(argp, IRIns *) - as->ir;
263
      if (ref >= REF_BIAS)
264
        p += sprintf(p, "%04d", ref - REF_BIAS);
265
      else
266
        p += sprintf(p, "K%03d", REF_BIAS - ref);
267
    } else if (e[1] == 's') {
268
      uint32_t slot = va_arg(argp, uint32_t);
269
      p += sprintf(p, "[sp+0x%x]", sps_scale(slot));
270
    } else if (e[1] == 'x') {
271
      p += sprintf(p, "%08x", va_arg(argp, int32_t));
272
    } else {
273
      lj_assertA(0, "bad debug format code");
274
    }
275
    fmt = e+2;
276
  }
277
  va_end(argp);
278
  while (*fmt)
279
    *p++ = *fmt++;
280
  *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n';
281
  if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) {
282
    fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout);
283
    p = ra_dbg_buf;
284
  }
285
  ra_dbg_p = p;
286
}
287

288
#define RA_DBG_START()        ra_dstart()
289
#define RA_DBG_FLUSH()        ra_dflush()
290
#define RA_DBG_REF() \
291
  do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \
292
       ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0)
293
#define RA_DBGX(x)        ra_dprintf x
294

295
#else
296
#define RA_DBG_START()        ((void)0)
297
#define RA_DBG_FLUSH()        ((void)0)
298
#define RA_DBG_REF()        ((void)0)
299
#define RA_DBGX(x)        ((void)0)
300
#endif
301

302
/* -- Register allocator -------------------------------------------------- */
303

304
#define ra_free(as, r)                rset_set(as->freeset, (r))
305
#define ra_modified(as, r)        rset_set(as->modset, (r))
306
#define ra_weak(as, r)                rset_set(as->weakset, (r))
307
#define ra_noweak(as, r)        rset_clear(as->weakset, (r))
308

309
#define ra_used(ir)                (ra_hasreg((ir)->r) || ra_hasspill((ir)->s))
310

311
/* Setup register allocator. */
312
static void ra_setup(ASMState *as)
5,008✔
313
{
314
  Reg r;
5,008✔
315
  /* Initially all regs (except the stack pointer) are free for use. */
316
  as->freeset = RSET_INIT;
5,008✔
317
  as->modset = RSET_EMPTY;
5,008✔
318
  as->weakset = RSET_EMPTY;
5,008✔
319
  as->phiset = RSET_EMPTY;
5,008✔
320
  memset(as->phireg, 0, sizeof(as->phireg));
5,008✔
321
  for (r = RID_MIN_GPR; r < RID_MAX; r++)
165,264✔
322
    as->cost[r] = REGCOST(~0u, 0u);
160,256✔
323
}
5,008✔
324

325
/* Rematerialize constants. */
326
static Reg ra_rematk(ASMState *as, IRRef ref)
13,666✔
327
{
328
  IRIns *ir;
13,666✔
329
  Reg r;
13,666✔
330
  if (ra_iskref(ref)) {
13,666✔
331
    r = ra_krefreg(ref);
332
    lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
333
    ra_free(as, r);
334
    ra_modified(as, r);
335
#if LJ_64
336
    emit_loadu64(as, r, ra_krefk(as, ref));
337
#else
338
    emit_loadi(as, r, ra_krefk(as, ref));
339
#endif
340
    return r;
341
  }
342
  ir = IR(ref);
13,666✔
343
  r = ir->r;
13,666✔
344
  lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
13,666✔
345
  lj_assertA(!ra_hasspill(ir->s),
13,666✔
346
             "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
347
  ra_free(as, r);
13,666✔
348
  ra_modified(as, r);
13,666✔
349
  ir->r = RID_INIT;  /* Do not keep any hint. */
13,666✔
350
  RA_DBGX((as, "remat     $i $r", ir, r));
13,666✔
351
#if !LJ_SOFTFP32
352
  if (ir->o == IR_KNUM) {
13,666✔
353
    emit_loadk64(as, r, ir);
4,859✔
354
  } else
355
#endif
356
  if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
8,807✔
357
    ra_sethint(ir->r, RID_BASE);  /* Restore BASE register hint. */
1,990✔
358
    emit_getgl(as, r, jit_base);
1,990✔
359
  } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
6,817✔
360
    /* REF_NIL stores ASMREF_L register. */
361
    lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
3,043✔
362
    emit_getgl(as, r, cur_L);
3,043✔
363
#if LJ_64
364
  } else if (ir->o == IR_KINT64) {
3,774✔
365
    emit_loadu64(as, r, ir_kint64(ir)->u64);
34✔
366
#if LJ_GC64
367
  } else if (ir->o == IR_KGC) {
3,740✔
368
    emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
3,601✔
369
  } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
139✔
370
    emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
23✔
371
#endif
372
#endif
373
  } else {
374
    lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
116✔
375
               ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
376
               "rematk of bad IR op %d", ir->o);
377
    emit_loadi(as, r, ir->i);
116✔
378
  }
379
  return r;
13,666✔
380
}
381

382
/* Force a spill. Allocate a new spill slot if needed. */
383
static int32_t ra_spill(ASMState *as, IRIns *ir)
384
{
385
  int32_t slot = ir->s;
386
  lj_assertA(ir >= as->ir + REF_TRUE,
387
             "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
388
  if (!ra_hasspill(slot)) {
389
    if (irt_is64(ir->t)) {
390
      slot = as->evenspill;
391
      as->evenspill += 2;
392
    } else if (as->oddspill) {
393
      slot = as->oddspill;
394
      as->oddspill = 0;
395
    } else {
396
      slot = as->evenspill;
397
      as->oddspill = slot+1;
398
      as->evenspill += 2;
399
    }
400
    if (as->evenspill > 256)
401
      lj_trace_err(as->J, LJ_TRERR_SPILLOV);
402
    ir->s = (uint8_t)slot;
403
  }
404
  return sps_scale(slot);
405
}
406

407
/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
408
static Reg ra_releasetmp(ASMState *as, IRRef ref)
4,536✔
409
{
410
  IRIns *ir = IR(ref);
4,536✔
411
  Reg r = ir->r;
4,536✔
412
  lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
4,536✔
413
  lj_assertA(!ra_hasspill(ir->s),
4,536✔
414
             "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
415
  ra_free(as, r);
4,536✔
416
  ra_modified(as, r);
4,536✔
417
  ir->r = RID_INIT;
4,536✔
418
  return r;
2,374✔
419
}
420

421
/* Restore a register (marked as free). Rematerialize or force a spill. */
422
static Reg ra_restore(ASMState *as, IRRef ref)
11,971✔
423
{
424
  if (emit_canremat(ref)) {
11,971✔
425
    return ra_rematk(as, ref);
6,155✔
426
  } else {
427
    IRIns *ir = IR(ref);
5,816✔
428
    int32_t ofs = ra_spill(as, ir);  /* Force a spill slot. */
5,816✔
429
    Reg r = ir->r;
5,816✔
430
    lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS);
5,816✔
431
    ra_sethint(ir->r, r);  /* Keep hint. */
5,816✔
432
    ra_free(as, r);
5,816✔
433
    if (!rset_test(as->weakset, r)) {  /* Only restore non-weak references. */
5,816✔
434
      ra_modified(as, r);
5,398✔
435
      RA_DBGX((as, "restore   $i $r", ir, r));
5,398✔
436
      emit_spload(as, ir, r, ofs);
5,398✔
437
    }
438
    return r;
5,816✔
439
  }
440
}
441

442
/* Save a register to a spill slot. */
443
static void ra_save(ASMState *as, IRIns *ir, Reg r)
4,367✔
444
{
445
  RA_DBGX((as, "save      $i $r", ir, r));
4,367✔
446
  emit_spstore(as, ir, r, sps_scale(ir->s));
4,367✔
447
}
3,430✔
448

449
#define MINCOST(name) \
450
  if (rset_test(RSET_ALL, RID_##name) && \
451
      LJ_LIKELY(allow&RID2RSET(RID_##name)) && as->cost[RID_##name] < cost) \
452
    cost = as->cost[RID_##name];
453

454
/* Evict the register with the lowest cost, forcing a restore. */
455
static Reg ra_evict(ASMState *as, RegSet allow)
2,147✔
456
{
457
  IRRef ref;
2,147✔
458
  RegCost cost = ~(RegCost)0;
2,147✔
459
  lj_assertA(allow != RSET_EMPTY, "evict from empty set");
2,147✔
460
  if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
2,147✔
461
    GPRDEF(MINCOST)
1,719✔
462
  } else {
463
    FPRDEF(MINCOST)
428✔
464
  }
465
  ref = regcost_ref(cost);
2,147✔
466
  lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins),
2,147✔
467
             "evict of out-of-range IR %04d", ref - REF_BIAS);
468
  /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
469
  if (!irref_isk(ref) && (as->weakset & allow)) {
2,147✔
470
    IRIns *ir = IR(ref);
18✔
471
    if (!rset_test(as->weakset, ir->r))
18✔
472
      ref = regcost_ref(as->cost[rset_pickbot((as->weakset & allow))]);
12✔
473
  }
474
  return ra_restore(as, ref);
2,147✔
475
}
476

477
/* Pick any register (marked as free). Evict on-demand. */
478
static Reg ra_pick(ASMState *as, RegSet allow)
121,881✔
479
{
480
  RegSet pick = as->freeset & allow;
121,881✔
481
  if (!pick)
121,881✔
482
    return ra_evict(as, allow);
20✔
483
  else
484
    return rset_picktop(pick);
121,861✔
485
}
486

487
/* Get a scratch register (marked as free). */
488
static Reg ra_scratch(ASMState *as, RegSet allow)
121,831✔
489
{
490
  Reg r = ra_pick(as, allow);
121,831✔
491
  ra_modified(as, r);
121,831✔
492
  RA_DBGX((as, "scratch        $r", r));
121,831✔
493
  return r;
121,831✔
494
}
495

496
/* Evict all registers from a set (if not free). */
497
static void ra_evictset(ASMState *as, RegSet drop)
6,895✔
498
{
499
  RegSet work;
6,895✔
500
  as->modset |= drop;
6,895✔
501
#if !LJ_SOFTFP
502
  work = (drop & ~as->freeset) & RSET_FPR;
6,895✔
503
  while (work) {
6,895✔
504
    Reg r = rset_pickbot(work);
2,791✔
505
    ra_restore(as, regcost_ref(as->cost[r]));
2,791✔
506
    rset_clear(work, r);
2,791✔
507
    checkmclim(as);
9,686✔
508
  }
509
#endif
510
  work = (drop & ~as->freeset);
6,895✔
511
  while (work) {
6,895✔
512
    Reg r = rset_pickbot(work);
6,791✔
513
    ra_restore(as, regcost_ref(as->cost[r]));
6,791✔
514
    rset_clear(work, r);
6,791✔
515
    checkmclim(as);
13,686✔
516
  }
517
}
6,895✔
518

519
/* Evict (rematerialize) all registers allocated to constants. */
520
static void ra_evictk(ASMState *as)
3,860✔
521
{
522
  RegSet work;
3,860✔
523
#if !LJ_SOFTFP
524
  work = ~as->freeset & RSET_FPR;
3,860✔
525
  while (work) {
7,455✔
526
    Reg r = rset_pickbot(work);
3,595✔
527
    IRRef ref = regcost_ref(as->cost[r]);
3,595✔
528
    if (emit_canremat(ref) && irref_isk(ref)) {
3,595✔
529
      ra_rematk(as, ref);
3,242✔
530
      checkmclim(as);
3,242✔
531
    }
532
    rset_clear(work, r);
3,595✔
533
  }
534
#endif
535
  work = ~as->freeset & RSET_GPR;
3,860✔
536
  while (work) {
12,558✔
537
    Reg r = rset_pickbot(work);
8,698✔
538
    IRRef ref = regcost_ref(as->cost[r]);
8,698✔
539
    if (emit_canremat(ref) && irref_isk(ref)) {
8,698✔
540
      ra_rematk(as, ref);
4,054✔
541
      checkmclim(as);
4,054✔
542
    }
543
    rset_clear(work, r);
8,698✔
544
  }
545
}
3,860✔
546

547
#ifdef RID_NUM_KREF
548
/* Allocate a register for a constant. */
549
static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
550
{
551
  /* First try to find a register which already holds the same constant. */
552
  RegSet pick, work = ~as->freeset & RSET_GPR;
553
  Reg r;
554
  while (work) {
555
    IRRef ref;
556
    r = rset_pickbot(work);
557
    ref = regcost_ref(as->cost[r]);
558
#if LJ_64
559
    if (ref < ASMREF_L) {
560
      if (ra_iskref(ref)) {
561
        if (k == ra_krefk(as, ref))
562
          return r;
563
      } else {
564
        IRIns *ir = IR(ref);
565
        if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
566
#if LJ_GC64
567
            (ir->o == IR_KINT && k == ir->i) ||
568
            (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
569
            ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
570
             k == (intptr_t)ir_kptr(ir))
571
#else
572
            (ir->o != IR_KINT64 && k == ir->i)
573
#endif
574
           )
575
          return r;
576
      }
577
    }
578
#else
579
    if (ref < ASMREF_L &&
580
        k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
581
      return r;
582
#endif
583
    rset_clear(work, r);
584
  }
585
  pick = as->freeset & allow;
586
  if (pick) {
587
    /* Constants should preferably get unmodified registers. */
588
    if ((pick & ~as->modset))
589
      pick &= ~as->modset;
590
    r = rset_pickbot(pick);  /* Reduce conflicts with inverse allocation. */
591
  } else {
592
    r = ra_evict(as, allow);
593
  }
594
  RA_DBGX((as, "allock    $x $r", k, r));
595
  ra_setkref(as, r, k);
596
  rset_clear(as->freeset, r);
597
  ra_noweak(as, r);
598
  return r;
599
}
600

601
/* Allocate a specific register for a constant. */
602
static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
603
{
604
  Reg kr = ra_allock(as, k, RID2RSET(r));
605
  if (kr != r) {
606
    IRIns irdummy;
607
    irdummy.t.irt = IRT_INT;
608
    ra_scratch(as, RID2RSET(r));
609
    emit_movrr(as, &irdummy, r, kr);
610
  }
611
}
612
#else
613
#define ra_allockreg(as, k, r)                emit_loadi(as, (r), (k))
614
#endif
615

616
/* Allocate a register for ref from the allowed set of registers.
617
** Note: this function assumes the ref does NOT have a register yet!
618
** Picks an optimal register, sets the cost and marks the register as non-free.
619
*/
620
static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
409,498✔
621
{
622
  IRIns *ir = IR(ref);
409,498✔
623
  RegSet pick = as->freeset & allow;
409,498✔
624
  Reg r;
409,498✔
625
  lj_assertA(ra_noreg(ir->r),
409,498✔
626
             "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
627
  if (pick) {
409,498✔
628
    /* First check register hint from propagation or PHI. */
629
    if (ra_hashint(ir->r)) {
407,371✔
630
      r = ra_gethint(ir->r);
63,862✔
631
      if (rset_test(pick, r))  /* Use hint register if possible. */
63,862✔
632
        goto found;
57,780✔
633
      /* Rematerialization is cheaper than missing a hint. */
634
      if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) {
6,082✔
635
        ra_rematk(as, regcost_ref(as->cost[r]));
215✔
636
        goto found;
215✔
637
      }
638
      RA_DBGX((as, "hintmiss  $f $r", ref, r));
349,376✔
639
    }
640
    /* Invariants should preferably get unmodified registers. */
641
    if (ref < as->loopref && !irt_isphi(ir->t)) {
349,376✔
642
      if ((pick & ~as->modset))
15,346✔
643
        pick &= ~as->modset;
8,280✔
644
      r = rset_pickbot(pick);  /* Reduce conflicts with inverse allocation. */
15,346✔
645
    } else {
646
      /* We've got plenty of regs, so get callee-save regs if possible. */
647
      if (RID_NUM_GPR > 8 && (pick & ~RSET_SCRATCH))
334,030✔
648
        pick &= ~RSET_SCRATCH;
277,009✔
649
      r = rset_picktop(pick);
334,030✔
650
    }
651
  } else {
652
    r = ra_evict(as, allow);
2,127✔
653
  }
654
found:
409,498✔
655
  RA_DBGX((as, "alloc     $f $r", ref, r));
409,498✔
656
  ir->r = (uint8_t)r;
409,498✔
657
  rset_clear(as->freeset, r);
409,498✔
658
  ra_noweak(as, r);
409,498✔
659
  as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t));
409,498✔
660
  return r;
409,498✔
661
}
662

663
/* Allocate a register on-demand. */
664
static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
474,749✔
665
{
666
  Reg r = IR(ref)->r;
474,749✔
667
  /* Note: allow is ignored if the register is already allocated. */
668
  if (ra_noreg(r)) r = ra_allocref(as, ref, allow);
474,749✔
669
  ra_noweak(as, r);
474,749✔
670
  return r;
474,749✔
671
}
672

673
/* Add a register rename to the IR. */
674
static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
675
{
676
  IRRef ren;
677
  lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
678
  ren = tref_ref(lj_ir_emit(as->J));
679
  as->J->cur.ir[ren].r = (uint8_t)down;
680
  as->J->cur.ir[ren].s = SPS_NONE;
681
}
682

683
/* Rename register allocation and emit move. */
684
static void ra_rename(ASMState *as, Reg down, Reg up)
710✔
685
{
686
  IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
710✔
687
  IRIns *ir = IR(ref);
710✔
688
  ir->r = (uint8_t)up;
710✔
689
  as->cost[down] = 0;
710✔
690
  lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
710✔
691
             "rename between GPR/FPR %d and %d", down, up);
692
  lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down);
710✔
693
  lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
710✔
694
  ra_free(as, down);  /* 'down' is free ... */
710✔
695
  ra_modified(as, down);
710✔
696
  rset_clear(as->freeset, up);  /* ... and 'up' is now allocated. */
710✔
697
  ra_noweak(as, up);
710✔
698
  RA_DBGX((as, "rename    $f $r $r", regcost_ref(as->cost[up]), down, up));
710✔
699
  emit_movrr(as, ir, down, up);  /* Backwards codegen needs inverse move. */
710✔
700
  if (!ra_hasspill(IR(ref)->s)) {  /* Add the rename to the IR. */
710✔
701
    /*
702
    ** The rename is effective at the subsequent (already emitted) exit
703
    ** branch. This is for the current snapshot (as->snapno). Except if we
704
    ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
705
    ** then it belongs to the next snapshot.
706
    ** See also the discussion at asm_snap_checkrename().
707
    */
708
    ra_addrename(as, down, ref, as->snapno + as->snapalloc);
685✔
709
  }
710
}
710✔
711

712
/* Pick a destination register (marked as free).
713
** Caveat: allow is ignored if there's already a destination register.
714
** Use ra_destreg() to get a specific register.
715
*/
716
static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow)
378,990✔
717
{
718
  Reg dest = ir->r;
378,990✔
719
  if (ra_hasreg(dest)) {
378,990✔
720
    ra_free(as, dest);
375,389✔
721
    ra_modified(as, dest);
375,389✔
722
  } else {
723
    if (ra_hashint(dest) && rset_test((as->freeset&allow), ra_gethint(dest))) {
3,601✔
724
      dest = ra_gethint(dest);
756✔
725
      ra_modified(as, dest);
756✔
726
      RA_DBGX((as, "dest           $r", dest));
756✔
727
    } else {
728
      dest = ra_scratch(as, allow);
2,845✔
729
    }
730
    ir->r = dest;
3,601✔
731
  }
732
  if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest);
378,990✔
733
  return dest;
378,990✔
734
}
735

736
/* Force a specific destination register (marked as free). */
737
static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
5,873✔
738
{
739
  Reg dest = ra_dest(as, ir, RID2RSET(r));
5,873✔
740
  if (dest != r) {
5,873✔
741
    lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
2,553✔
742
    ra_modified(as, r);
2,553✔
743
    emit_movrr(as, ir, dest, r);
2,553✔
744
  }
745
}
5,873✔
746

747
#if LJ_TARGET_X86ORX64
748
/* Propagate dest register to left reference. Emit moves as needed.
749
** This is a required fixup step for all 2-operand machine instructions.
750
*/
751
static void ra_left(ASMState *as, Reg dest, IRRef lref)
49,813✔
752
{
753
  IRIns *ir = IR(lref);
49,813✔
754
  Reg left = ir->r;
49,813✔
755
  if (ra_noreg(left)) {
49,813✔
756
    if (irref_isk(lref)) {
48,743✔
757
      if (ir->o == IR_KNUM) {
895✔
758
        /* FP remat needs a load except for +0. Still better than eviction. */
759
        if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
50✔
760
          emit_loadk64(as, dest, ir);
42✔
761
          return;
42✔
762
        }
763
#if LJ_64
764
      } else if (ir->o == IR_KINT64) {
845✔
765
        emit_loadk64(as, dest, ir);
5✔
766
        return;
5✔
767
#if LJ_GC64
768
      } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
840✔
769
        emit_loadk64(as, dest, ir);
768✔
770
        return;
768✔
771
#endif
772
#endif
773
      } else if (ir->o != IR_KPRI) {
72✔
774
        lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
71✔
775
                   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
776
                   "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
777
        emit_loadi(as, dest, ir->i);
71✔
778
        return;
71✔
779
      }
780
    }
781
    if (!ra_hashint(left) && !iscrossref(as, lref))
47,857✔
782
      ra_sethint(ir->r, dest);  /* Propagate register hint. */
44,676✔
783
    left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR);
91,109✔
784
  }
785
  ra_noweak(as, left);
48,927✔
786
  /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */
787
  if (dest != left) {
48,927✔
788
    /* Use register renaming if dest is the PHI reg. */
789
    if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
1,093✔
790
      ra_modified(as, left);
123✔
791
      ra_rename(as, left, dest);
123✔
792
    } else {
793
      emit_movrr(as, ir, dest, left);
970✔
794
    }
795
  }
796
}
797
#else
798
/* Similar to ra_left, except we override any hints. */
799
static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
800
{
801
  IRIns *ir = IR(lref);
802
  Reg left = ir->r;
803
  if (ra_noreg(left)) {
804
    ra_sethint(ir->r, dest);  /* Propagate register hint. */
805
    left = ra_allocref(as, lref,
806
                       (LJ_SOFTFP || dest < RID_MAX_GPR) ? RSET_GPR : RSET_FPR);
807
  }
808
  ra_noweak(as, left);
809
  if (dest != left) {
810
    /* Use register renaming if dest is the PHI reg. */
811
    if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
812
      ra_modified(as, left);
813
      ra_rename(as, left, dest);
814
    } else {
815
      emit_movrr(as, ir, dest, left);
816
    }
817
  }
818
}
819
#endif
820

821
#if !LJ_64
822
/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
823
static void ra_destpair(ASMState *as, IRIns *ir)
824
{
825
  Reg destlo = ir->r, desthi = (ir+1)->r;
826
  /* First spill unrelated refs blocking the destination registers. */
827
  if (!rset_test(as->freeset, RID_RETLO) &&
828
      destlo != RID_RETLO && desthi != RID_RETLO)
829
    ra_restore(as, regcost_ref(as->cost[RID_RETLO]));
830
  if (!rset_test(as->freeset, RID_RETHI) &&
831
      destlo != RID_RETHI && desthi != RID_RETHI)
832
    ra_restore(as, regcost_ref(as->cost[RID_RETHI]));
833
  /* Next free the destination registers (if any). */
834
  if (ra_hasreg(destlo)) {
835
    ra_free(as, destlo);
836
    ra_modified(as, destlo);
837
  } else {
838
    destlo = RID_RETLO;
839
  }
840
  if (ra_hasreg(desthi)) {
841
    ra_free(as, desthi);
842
    ra_modified(as, desthi);
843
  } else {
844
    desthi = RID_RETHI;
845
  }
846
  /* Check for conflicts and shuffle the registers as needed. */
847
  if (destlo == RID_RETHI) {
848
    if (desthi == RID_RETLO) {
849
#if LJ_TARGET_X86
850
      *--as->mcp = XI_XCHGa + RID_RETHI;
851
#else
852
      emit_movrr(as, ir, RID_RETHI, RID_TMP);
853
      emit_movrr(as, ir, RID_RETLO, RID_RETHI);
854
      emit_movrr(as, ir, RID_TMP, RID_RETLO);
855
#endif
856
    } else {
857
      emit_movrr(as, ir, RID_RETHI, RID_RETLO);
858
      if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
859
    }
860
  } else if (desthi == RID_RETLO) {
861
    emit_movrr(as, ir, RID_RETLO, RID_RETHI);
862
    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
863
  } else {
864
    if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
865
    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
866
  }
867
  /* Restore spill slots (if any). */
868
  if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
869
  if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
870
}
871
#endif
872

873
/* -- Snapshot handling --------- ----------------------------------------- */
874

875
/* Can we rematerialize a KNUM instead of forcing a spill? */
876
static int asm_snap_canremat(ASMState *as)
877
{
878
  Reg r;
879
  for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++)
340✔
880
    if (irref_isk(regcost_ref(as->cost[r])))
320✔
881
      return 1;
882
  return 0;
883
}
884

885
/* Check whether a sunk store corresponds to an allocation. */
886
static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
887
{
888
  if (irs->s == 255) {
889
    if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
890
        irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
891
      IRIns *irk = IR(irs->op1);
892
      if (irk->o == IR_AREF || irk->o == IR_HREFK)
893
        irk = IR(irk->op1);
894
      return (IR(irk->op1) == ira);
895
    }
896
    return 0;
897
  } else {
898
    return (ira + irs->s == irs);  /* Quick check. */
899
  }
900
}
901

902
/* Allocate register or spill slot for a ref that escapes to a snapshot. */
903
static void asm_snap_alloc1(ASMState *as, IRRef ref)
61,766✔
904
{
905
  IRIns *ir = IR(ref);
62,017✔
906
  if (!irref_isk(ref) && ir->r != RID_SUNK) {
62,017✔
907
    bloomset(as->snapfilt1, ref);
61,650✔
908
    bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
61,650✔
909
    if (ra_used(ir)) return;
61,650✔
910
    if (ir->r == RID_SINK) {
4,832✔
911
      ir->r = RID_SUNK;
337✔
912
#if LJ_HASFFI
913
      if (ir->o == IR_CNEWI) {  /* Allocate CNEWI value. */
337✔
914
        asm_snap_alloc1(as, ir->op2);
215✔
915
        if (LJ_32 && (ir+1)->o == IR_HIOP)
916
          asm_snap_alloc1(as, (ir+1)->op2);
917
      } else
918
#endif
919
      {  /* Allocate stored values for TNEW, TDUP and CNEW. */
920
        IRIns *irs;
122✔
921
        lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
122✔
922
                   "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
923
        for (irs = IR(as->snapref-1); irs > ir; irs--)
1,428✔
924
          if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
1,306✔
925
            lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
246✔
926
                       irs->o == IR_FSTORE || irs->o == IR_XSTORE,
927
                       "sunk store IR %04d has bad op %d",
928
                       (int)(irs - as->ir) - REF_BIAS, irs->o);
929
            asm_snap_alloc1(as, irs->op2);
246✔
930
            if (LJ_32 && (irs+1)->o == IR_HIOP)
246✔
931
              asm_snap_alloc1(as, (irs+1)->op2);
932
          }
933
      }
934
    } else {
935
      RegSet allow;
4,495✔
936
      if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) {
4,495✔
937
        IRIns *irc;
54✔
938
        for (irc = IR(as->curins); irc > ir; irc--)
468✔
939
          if ((irc->op1 == ref || irc->op2 == ref) &&
432✔
940
              !(irc->r == RID_SINK || irc->r == RID_SUNK))
54✔
941
            goto nosink;  /* Don't sink conversion if result is used. */
18✔
942
        asm_snap_alloc1(as, ir->op1);
36✔
943
        return;
36✔
944
      }
945
    nosink:
4,441✔
946
      allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR;
4,459✔
947
      if ((as->freeset & allow) ||
4,459✔
948
               (allow == RSET_FPR && asm_snap_canremat(as))) {
20✔
949
        /* Get a weak register if we have a free one or can rematerialize. */
950
        Reg r = ra_allocref(as, ref, allow);  /* Allocate a register. */
4,322✔
951
        if (!irt_isphi(ir->t))
4,322✔
952
          ra_weak(as, r);  /* But mark it as weakly referenced. */
3,701✔
953
        checkmclim(as);
4,322✔
954
        RA_DBGX((as, "snapreg   $f $r", ref, ir->r));
955
      } else {
956
        ra_spill(as, ir);  /* Otherwise force a spill slot. */
137✔
957
        RA_DBGX((as, "snapspill $f $s", ref, ir->s));
61,766✔
958
      }
959
    }
960
  }
961
}
962

963
/* Allocate refs escaping to a snapshot. */
964
static void asm_snap_alloc(ASMState *as, int snapno)
53,982✔
965
{
966
  SnapShot *snap = &as->T->snap[snapno];
53,982✔
967
  SnapEntry *map = &as->T->snapmap[snap->mapofs];
53,982✔
968
  MSize n, nent = snap->nent;
53,982✔
969
  as->snapfilt1 = as->snapfilt2 = 0;
53,982✔
970
  for (n = 0; n < nent; n++) {
134,326✔
971
    SnapEntry sn = map[n];
80,344✔
972
    IRRef ref = snap_ref(sn);
80,344✔
973
    if (!irref_isk(ref)) {
80,344✔
974
      asm_snap_alloc1(as, ref);
61,520✔
975
      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
61,520✔
976
        lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
977
                   "snap %d[%d] points to bad SOFTFP IR %04d",
978
                   snapno, n, ref - REF_BIAS);
979
        asm_snap_alloc1(as, ref+1);
980
      }
981
    }
982
  }
983
}
53,982✔
984

985
/* All guards for a snapshot use the same exitno. This is currently the
986
** same as the snapshot number. Since the exact origin of the exit cannot
987
** be determined, all guards for the same snapshot must exit with the same
988
** RegSP mapping.
989
** A renamed ref which has been used in a prior guard for the same snapshot
990
** would cause an inconsistency. The easy way out is to force a spill slot.
991
*/
992
static int asm_snap_checkrename(ASMState *as, IRRef ren)
34✔
993
{
994
  if (bloomtest(as->snapfilt1, ren) &&
34✔
995
      bloomtest(as->snapfilt2, hashrot(ren, ren + HASH_BIAS))) {
34✔
996
    IRIns *ir = IR(ren);
34✔
997
    ra_spill(as, ir);  /* Register renamed, so force a spill slot. */
34✔
998
    RA_DBGX((as, "snaprensp $f $s", ren, ir->s));
34✔
999
    return 1;  /* Found. */
34✔
1000
  }
1001
  return 0;  /* Not found. */
1002
}
1003

1004
/* Prepare snapshot for next guard or throwing instruction. */
1005
static void asm_snap_prep(ASMState *as)
266,367✔
1006
{
1007
  if (as->snapalloc) {
266,367✔
1008
    /* Alloc on first invocation for each snapshot. */
1009
    as->snapalloc = 0;
53,943✔
1010
    asm_snap_alloc(as, as->snapno);
53,943✔
1011
    as->snaprename = as->T->nins;
53,943✔
1012
  } else {
1013
    /* Check any renames above the highwater mark. */
1014
    for (; as->snaprename < as->T->nins; as->snaprename++) {
212,458✔
1015
      IRIns *ir = &as->T->ir[as->snaprename];
34✔
1016
      if (asm_snap_checkrename(as, ir->op1))
34✔
1017
        ir->op2 = REF_BIAS-1;  /* Kill rename. */
34✔
1018
    }
1019
  }
1020
}
266,367✔
1021

1022
/* Move to previous snapshot when we cross the current snapshot ref. */
1023
static void asm_snap_prev(ASMState *as)
619,637✔
1024
{
1025
  if (as->curins < as->snapref) {
619,637✔
1026
    ptrdiff_t ofs = as->mctoporig - as->mcp;
56,287✔
1027
    if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
56,287✔
1028
    do {
58,886✔
1029
      if (as->snapno == 0) return;
58,886✔
1030
      as->snapno--;
56,793✔
1031
      as->snapref = as->T->snap[as->snapno].ref;
56,793✔
1032
      as->T->snap[as->snapno].mcofs = ofs;  /* Remember mcode offset. */
56,793✔
1033
    } while (as->curins < as->snapref);  /* May have no ins inbetween. */
56,793✔
1034
    as->snapalloc = 1;
54,194✔
1035
  }
1036
}
1037

1038
/* Fixup snapshot mcode offsetst. */
1039
static void asm_snap_fixup_mcofs(ASMState *as)
3,729✔
1040
{
1041
  uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
3,729✔
1042
  SnapShot *snap = as->T->snap;
3,729✔
1043
  SnapNo i;
3,729✔
1044
  for (i = as->T->nsnap-1; i > 0; i--) {
53,247✔
1045
    /* Compute offset from mcode start and store in correct snapshot. */
1046
    snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
49,518✔
1047
  }
1048
  snap[0].mcofs = 0;
3,729✔
1049
}
3,729✔
1050

1051
/* -- Miscellaneous helpers ----------------------------------------------- */
1052

1053
/* Calculate stack adjustment. */
1054
static int32_t asm_stack_adjust(ASMState *as)
3,860✔
1055
{
1056
  if (as->evenspill <= SPS_FIXED)
3,860✔
1057
    return 0;
1058
  return sps_scale(sps_align(as->evenspill));
658✔
1059
}
1060

1061
/* Must match with hash*() in lj_tab.c. */
1062
static uint32_t ir_khash(ASMState *as, IRIns *ir)
1063
{
1064
  uint32_t lo, hi;
1065
  UNUSED(as);
1066
  if (irt_isstr(ir->t)) {
1067
    return ir_kstr(ir)->hash;
1068
  } else if (irt_isnum(ir->t)) {
1069
    lo = ir_knum(ir)->u32.lo;
1070
    hi = ir_knum(ir)->u32.hi << 1;
1071
  } else if (irt_ispri(ir->t)) {
1072
    lj_assertA(!irt_isnil(ir->t), "hash of nil key");
1073
    return irt_type(ir->t)-IRT_FALSE;
1074
  } else {
1075
    lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t));
1076
    lo = u32ptr(ir_kgc(ir));
1077
#if LJ_GC64
1078
    hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1079
#else
1080
    hi = lo + HASH_BIAS;
1081
#endif
1082
  }
1083
  return hashrot(lo, hi);
1084
}
1085

1086
/* -- Allocations --------------------------------------------------------- */
1087

1088
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args);
1089
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci);
1090

1091
static void asm_snew(ASMState *as, IRIns *ir)
105✔
1092
{
1093
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
105✔
1094
  IRRef args[3];
105✔
1095
  asm_snap_prep(as);
105✔
1096
  args[0] = ASMREF_L;  /* lua_State *L    */
105✔
1097
  args[1] = ir->op1;   /* const char *str */
105✔
1098
  args[2] = ir->op2;   /* size_t len      */
105✔
1099
  as->gcsteps++;
105✔
1100
  asm_setupresult(as, ir, ci);  /* GCstr * */
105✔
1101
  asm_gencall(as, ci, args);
105✔
1102
}
105✔
1103

1104
static void asm_tnew(ASMState *as, IRIns *ir)
184✔
1105
{
1106
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
184✔
1107
  IRRef args[2];
184✔
1108
  asm_snap_prep(as);
184✔
1109
  args[0] = ASMREF_L;     /* lua_State *L    */
184✔
1110
  args[1] = ASMREF_TMP1;  /* uint32_t ahsize */
184✔
1111
  as->gcsteps++;
184✔
1112
  asm_setupresult(as, ir, ci);  /* GCtab * */
184✔
1113
  asm_gencall(as, ci, args);
184✔
1114
  ra_allockreg(as, ir->op1 | (ir->op2 << 24), ra_releasetmp(as, ASMREF_TMP1));
184✔
1115
}
184✔
1116

1117
static void asm_tdup(ASMState *as, IRIns *ir)
53✔
1118
{
1119
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
53✔
1120
  IRRef args[2];
53✔
1121
  asm_snap_prep(as);
53✔
1122
  args[0] = ASMREF_L;  /* lua_State *L    */
53✔
1123
  args[1] = ir->op1;   /* const GCtab *kt */
53✔
1124
  as->gcsteps++;
53✔
1125
  asm_setupresult(as, ir, ci);  /* GCtab * */
53✔
1126
  asm_gencall(as, ci, args);
53✔
1127
}
53✔
1128

1129
static void asm_gc_check(ASMState *as);
1130

1131
/* Explicit GC step. */
1132
static void asm_gcstep(ASMState *as, IRIns *ir)
102✔
1133
{
1134
  IRIns *ira;
102✔
1135
  for (ira = IR(as->stopins+1); ira < ir; ira++)
1,645✔
1136
    if ((ira->o == IR_TNEW || ira->o == IR_TDUP ||
1,543✔
1137
         (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI))) &&
1,492✔
1138
        ra_used(ira))
1,492✔
1139
      as->gcsteps++;
1,467✔
1140
  if (as->gcsteps)
102✔
1141
    asm_gc_check(as);
96✔
1142
  as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
102✔
1143
}
102✔
1144

1145
/* -- Buffer operations --------------------------------------------------- */
1146

1147
static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1148

1149
static void asm_bufhdr(ASMState *as, IRIns *ir)
771✔
1150
{
1151
  Reg sb = ra_dest(as, ir, RSET_GPR);
771✔
1152
  if ((ir->op2 & IRBUFHDR_APPEND)) {
771✔
1153
    /* Rematerialize const buffer pointer instead of likely spill. */
1154
    IRIns *irp = IR(ir->op1);
47✔
1155
    if (!(ra_hasreg(irp->r) || irp == ir-1 ||
47✔
1156
          (irp == ir-2 && !ra_used(ir-1)))) {
47✔
1157
      while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
150✔
1158
        irp = IR(irp->op1);
106✔
1159
      if (irref_isk(irp->op1)) {
44✔
1160
        ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
44✔
1161
        ir = irp;
44✔
1162
      }
1163
    }
1164
  } else {
1165
    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
724✔
1166
    /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
1167
    emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
724✔
1168
    emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
724✔
1169
  }
1170
#if LJ_TARGET_X86ORX64
1171
  ra_left(as, sb, ir->op1);
771✔
1172
#else
1173
  ra_leftov(as, sb, ir->op1);
1174
#endif
1175
}
771✔
1176

1177
static void asm_bufput(ASMState *as, IRIns *ir)
1,364✔
1178
{
1179
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1,364✔
1180
  IRRef args[3];
1,364✔
1181
  IRIns *irs;
1,364✔
1182
  int kchar = -129;
1,364✔
1183
  args[0] = ir->op1;  /* SBuf * */
1,364✔
1184
  args[1] = ir->op2;  /* GCstr * */
1,364✔
1185
  irs = IR(ir->op2);
1,364✔
1186
  lj_assertA(irt_isstr(irs->t),
1,364✔
1187
             "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
1188
  if (irs->o == IR_KGC) {
1,364✔
1189
    GCstr *s = ir_kstr(irs);
529✔
1190
    if (s->len == 1) {  /* Optimize put of single-char string constant. */
529✔
1191
      kchar = (int8_t)strdata(s)[0];  /* Signed! */
162✔
1192
      args[1] = ASMREF_TMP1;  /* int, truncated to char */
162✔
1193
      ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
162✔
1194
    }
1195
  } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
835✔
1196
    if (irs->o == IR_TOSTR) {  /* Fuse number to string conversions. */
701✔
1197
      if (irs->op2 == IRTOSTR_NUM) {
242✔
1198
        args[1] = ASMREF_TMP1;  /* TValue * */
12✔
1199
        ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
12✔
1200
      } else {
1201
        lj_assertA(irt_isinteger(IR(irs->op1)->t),
230✔
1202
                   "TOSTR of non-numeric IR %04d", irs->op1);
1203
        args[1] = irs->op1;  /* int */
230✔
1204
        if (irs->op2 == IRTOSTR_INT)
230✔
1205
          ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1206
        else
1207
          ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
166✔
1208
      }
1209
    } else if (irs->o == IR_SNEW) {  /* Fuse string allocation. */
459✔
1210
      args[1] = irs->op1;  /* const void * */
4✔
1211
      args[2] = irs->op2;  /* MSize */
4✔
1212
      ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
4✔
1213
    }
1214
  }
1215
  asm_setupresult(as, ir, ci);  /* SBuf * */
1,364✔
1216
  asm_gencall(as, ci, args);
1,364✔
1217
  if (args[1] == ASMREF_TMP1) {
1,364✔
1218
    Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
174✔
1219
    if (kchar == -129)
174✔
1220
      asm_tvptr(as, tmp, irs->op1);
12✔
1221
    else
1222
      ra_allockreg(as, kchar, tmp);
162✔
1223
  }
1224
}
1,364✔
1225

1226
static void asm_bufstr(ASMState *as, IRIns *ir)
757✔
1227
{
1228
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
757✔
1229
  IRRef args[1];
757✔
1230
  args[0] = ir->op1;  /* SBuf *sb */
757✔
1231
  as->gcsteps++;
757✔
1232
  asm_setupresult(as, ir, ci);  /* GCstr * */
757✔
1233
  asm_gencall(as, ci, args);
757✔
1234
}
757✔
1235

1236
/* -- Type conversions ---------------------------------------------------- */
1237

1238
static void asm_tostr(ASMState *as, IRIns *ir)
42✔
1239
{
1240
  const CCallInfo *ci;
42✔
1241
  IRRef args[2];
42✔
1242
  asm_snap_prep(as);
42✔
1243
  args[0] = ASMREF_L;
42✔
1244
  as->gcsteps++;
42✔
1245
  if (ir->op2 == IRTOSTR_NUM) {
42✔
1246
    args[1] = ASMREF_TMP1;  /* cTValue * */
23✔
1247
    ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
23✔
1248
  } else {
1249
    args[1] = ir->op1;  /* int32_t k */
19✔
1250
    if (ir->op2 == IRTOSTR_INT)
19✔
1251
      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1252
    else
1253
      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
8✔
1254
  }
1255
  asm_setupresult(as, ir, ci);  /* GCstr * */
42✔
1256
  asm_gencall(as, ci, args);
42✔
1257
  if (ir->op2 == IRTOSTR_NUM)
42✔
1258
    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
23✔
1259
}
42✔
1260

1261
#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1262
static void asm_conv64(ASMState *as, IRIns *ir)
1263
{
1264
  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1265
  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1266
  IRCallID id;
1267
  IRRef args[2];
1268
  lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1269
             "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1270
  args[LJ_BE] = (ir-1)->op1;
1271
  args[LJ_LE] = ir->op1;
1272
  if (st == IRT_NUM || st == IRT_FLOAT) {
1273
    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1274
    ir--;
1275
  } else {
1276
    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1277
  }
1278
  {
1279
#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1280
    CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1281
    cim.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
1282
#else
1283
    const CCallInfo *ci = &lj_ir_callinfo[id];
1284
#endif
1285
    asm_setupresult(as, ir, ci);
1286
    asm_gencall(as, ci, args);
1287
  }
1288
}
1289
#endif
1290

1291
/* -- Memory references --------------------------------------------------- */
1292

1293
static void asm_newref(ASMState *as, IRIns *ir)
1,064✔
1294
{
1295
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1,064✔
1296
  IRRef args[3];
1,064✔
1297
  if (ir->r == RID_SINK)
1,064✔
1298
    return;
33✔
1299
  asm_snap_prep(as);
1,031✔
1300
  args[0] = ASMREF_L;     /* lua_State *L */
1,031✔
1301
  args[1] = ir->op1;      /* GCtab *t     */
1,031✔
1302
  args[2] = ASMREF_TMP1;  /* cTValue *key */
1,031✔
1303
  asm_setupresult(as, ir, ci);  /* TValue * */
1,031✔
1304
  asm_gencall(as, ci, args);
1,031✔
1305
  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1,031✔
1306
}
1307

1308
static void asm_lref(ASMState *as, IRIns *ir)
2✔
1309
{
1310
  Reg r = ra_dest(as, ir, RSET_GPR);
2✔
1311
#if LJ_TARGET_X86ORX64
1312
  ra_left(as, r, ASMREF_L);
2✔
1313
#else
1314
  ra_leftov(as, r, ASMREF_L);
1315
#endif
1316
}
2✔
1317

1318
/* -- Calls --------------------------------------------------------------- */
1319

1320
/* Collect arguments from CALL* and CARG instructions. */
1321
static void asm_collectargs(ASMState *as, IRIns *ir,
1322
                            const CCallInfo *ci, IRRef *args)
1323
{
1324
  uint32_t n = CCI_XNARGS(ci);
1325
  /* Account for split args. */
1326
  lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
1327
  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1328
  while (n-- > 1) {
1329
    ir = IR(ir->op1);
1330
    lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
1331
    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1332
  }
1333
  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1334
  lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
1335
}
1336

1337
/* Reconstruct CCallInfo flags for CALLX*. */
1338
static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1339
{
1340
  uint32_t nargs = 0;
1341
  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
1342
    IRIns *ira = IR(ir->op1);
1343
    nargs++;
1344
    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1345
  }
1346
#if LJ_HASFFI
1347
  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
1348
    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1349
    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1350
    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1351
#if LJ_TARGET_X86
1352
    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1353
#endif
1354
  }
1355
#endif
1356
  return (nargs | (ir->t.irt << CCI_OTSHIFT));
1357
}
1358

1359
static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
301✔
1360
{
1361
  const CCallInfo *ci = &lj_ir_callinfo[id];
301✔
1362
  IRRef args[2];
301✔
1363
  args[0] = ir->op1;
301✔
1364
  args[1] = ir->op2;
301✔
1365
  asm_setupresult(as, ir, ci);
301✔
1366
  asm_gencall(as, ci, args);
301✔
1367
}
301✔
1368

1369
static void asm_call(ASMState *as, IRIns *ir)
503✔
1370
{
1371
  IRRef args[CCI_NARGS_MAX];
503✔
1372
  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
503✔
1373
  asm_collectargs(as, ir, ci, args);
503✔
1374
  asm_setupresult(as, ir, ci);
503✔
1375
  asm_gencall(as, ci, args);
503✔
1376
}
503✔
1377

1378
/* -- PHI and loop handling ----------------------------------------------- */
1379

1380
/* Break a PHI cycle by renaming to a free register (evict if needed). */
1381
static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby,
92✔
1382
                          RegSet allow)
1383
{
1384
  RegSet candidates = blocked & allow;
92✔
1385
  if (candidates) {  /* If this register file has candidates. */
92✔
1386
    /* Note: the set for ra_pick cannot be empty, since each register file
1387
    ** has some registers never allocated to PHIs.
1388
    */
1389
    Reg down, up = ra_pick(as, ~blocked & allow);  /* Get a free register. */
50✔
1390
    if (candidates & ~blockedby)  /* Optimize shifts, else it's a cycle. */
50✔
1391
      candidates = candidates & ~blockedby;
×
1392
    down = rset_picktop(candidates);  /* Pick candidate PHI register. */
50✔
1393
    ra_rename(as, down, up);  /* And rename it to the free register. */
50✔
1394
  }
1395
}
92✔
1396

1397
/* PHI register shuffling.
1398
**
1399
** The allocator tries hard to preserve PHI register assignments across
1400
** the loop body. Most of the time this loop does nothing, since there
1401
** are no register mismatches.
1402
**
1403
** If a register mismatch is detected and ...
1404
** - the register is currently free: rename it.
1405
** - the register is blocked by an invariant: restore/remat and rename it.
1406
** - Otherwise the register is used by another PHI, so mark it as blocked.
1407
**
1408
** The renames are order-sensitive, so just retry the loop if a register
1409
** is marked as blocked, but has been freed in the meantime. A cycle is
1410
** detected if all of the blocked registers are allocated. To break the
1411
** cycle rename one of them to a free register and retry.
1412
**
1413
** Note that PHI spill slots are kept in sync and don't need to be shuffled.
1414
*/
1415
static void asm_phi_shuffle(ASMState *as)
2,417✔
1416
{
1417
  RegSet work;
2,566✔
1418

1419
  /* Find and resolve PHI register mismatches. */
1420
  for (;;) {
2,566✔
1421
    RegSet blocked = RSET_EMPTY;
2,566✔
1422
    RegSet blockedby = RSET_EMPTY;
2,566✔
1423
    RegSet phiset = as->phiset;
2,566✔
1424
    while (phiset) {  /* Check all left PHI operand registers. */
7,427✔
1425
      Reg r = rset_pickbot(phiset);
4,861✔
1426
      IRIns *irl = IR(as->phireg[r]);
4,861✔
1427
      Reg left = irl->r;
4,861✔
1428
      if (r != left) {  /* Mismatch? */
4,861✔
1429
        if (!rset_test(as->freeset, r)) {  /* PHI register blocked? */
1,301✔
1430
          IRRef ref = regcost_ref(as->cost[r]);
646✔
1431
          /* Blocked by other PHI (w/reg)? */
1432
          if (!ra_iskref(ref) && irt_ismarked(IR(ref)->t)) {
646✔
1433
            rset_set(blocked, r);
642✔
1434
            if (ra_hasreg(left))
642✔
1435
              rset_set(blockedby, left);
642✔
1436
            left = RID_NONE;
1437
          } else {  /* Otherwise grab register from invariant. */
1438
            ra_restore(as, ref);
4✔
1439
            checkmclim(as);
4✔
1440
          }
1441
        }
1442
        if (ra_hasreg(left)) {
1,301✔
1443
          ra_rename(as, left, r);
530✔
1444
          checkmclim(as);
530✔
1445
        }
1446
      }
1447
      rset_clear(phiset, r);
4,861✔
1448
    }
1449
    if (!blocked) break;  /* Finished. */
2,566✔
1450
    if (!(as->freeset & blocked)) {  /* Break cycles if none are free. */
149✔
1451
      asm_phi_break(as, blocked, blockedby, RSET_GPR);
46✔
1452
      if (!LJ_SOFTFP) asm_phi_break(as, blocked, blockedby, RSET_FPR);
46✔
1453
      checkmclim(as);
2,612✔
1454
    }  /* Else retry some more renames. */
1455
  }
1456

1457
  /* Restore/remat invariants whose registers are modified inside the loop. */
1458
#if !LJ_SOFTFP
1459
  work = as->modset & ~(as->freeset | as->phiset) & RSET_FPR;
2,417✔
1460
  while (work) {
2,417✔
1461
    Reg r = rset_pickbot(work);
5✔
1462
    ra_restore(as, regcost_ref(as->cost[r]));
5✔
1463
    rset_clear(work, r);
5✔
1464
    checkmclim(as);
2,422✔
1465
  }
1466
#endif
1467
  work = as->modset & ~(as->freeset | as->phiset);
2,417✔
1468
  while (work) {
2,417✔
1469
    Reg r = rset_pickbot(work);
233✔
1470
    ra_restore(as, regcost_ref(as->cost[r]));
233✔
1471
    rset_clear(work, r);
233✔
1472
    checkmclim(as);
2,650✔
1473
  }
1474

1475
  /* Allocate and save all unsaved PHI regs and clear marks. */
1476
  work = as->phiset;
2,417✔
1477
  while (work) {
6,134✔
1478
    Reg r = rset_picktop(work);
3,717✔
1479
    IRRef lref = as->phireg[r];
3,717✔
1480
    IRIns *ir = IR(lref);
3,717✔
1481
    if (ra_hasspill(ir->s)) {  /* Left PHI gained a spill slot? */
3,717✔
1482
      irt_clearmark(ir->t);  /* Handled here, so clear marker now. */
174✔
1483
      ra_alloc1(as, lref, RID2RSET(r));
174✔
1484
      ra_save(as, ir, r);  /* Save to spill slot inside the loop. */
174✔
1485
      checkmclim(as);
174✔
1486
    }
1487
    rset_clear(work, r);
3,717✔
1488
  }
1489
}
2,417✔
1490

1491
/* Copy unsynced left/right PHI spill slots. Rarely needed. */
1492
static void asm_phi_copyspill(ASMState *as)
2,417✔
1493
{
1494
  int need = 0;
2,417✔
1495
  IRIns *ir;
2,417✔
1496
  for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--)
6,204✔
1497
    if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s))
3,787✔
1498
      need |= irt_isfp(ir->t) ? 2 : 1;  /* Unsynced spill slot? */
6✔
1499
  if ((need & 1)) {  /* Copy integer spill slots. */
2,417✔
1500
#if !LJ_TARGET_X86ORX64
1501
    Reg r = RID_TMP;
1502
#else
1503
    Reg r = RID_RET;
×
1504
    if ((as->freeset & RSET_GPR))
×
1505
      r = rset_pickbot((as->freeset & RSET_GPR));
×
1506
    else
1507
      emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1508
#endif
1509
    for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
×
1510
      if (ra_hasspill(ir->s)) {
×
1511
        IRIns *irl = IR(ir->op1);
×
1512
        if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) {
×
1513
          emit_spstore(as, irl, r, sps_scale(irl->s));
×
1514
          emit_spload(as, ir, r, sps_scale(ir->s));
×
1515
          checkmclim(as);
×
1516
        }
1517
      }
1518
    }
1519
#if LJ_TARGET_X86ORX64
1520
    if (!rset_test(as->freeset, r))
×
1521
      emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1522
#endif
1523
  }
1524
#if !LJ_SOFTFP
1525
  if ((need & 2)) {  /* Copy FP spill slots. */
2,417✔
1526
#if LJ_TARGET_X86
1527
    Reg r = RID_XMM0;
1528
#else
1529
    Reg r = RID_FPRET;
2✔
1530
#endif
1531
    if ((as->freeset & RSET_FPR))
2✔
1532
      r = rset_pickbot((as->freeset & RSET_FPR));
2✔
1533
    if (!rset_test(as->freeset, r))
2✔
1534
      emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1535
    for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
39✔
1536
      if (ra_hasspill(ir->s)) {
37✔
1537
        IRIns *irl = IR(ir->op1);
6✔
1538
        if (ra_hasspill(irl->s) && irt_isfp(ir->t)) {
6✔
1539
          emit_spstore(as, irl, r, sps_scale(irl->s));
6✔
1540
          emit_spload(as, ir, r, sps_scale(ir->s));
6✔
1541
          checkmclim(as);
37✔
1542
        }
1543
      }
1544
    }
1545
    if (!rset_test(as->freeset, r))
2✔
1546
      emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1547
  }
1548
#endif
1549
}
2,417✔
1550

1551
/* Emit renames for left PHIs which are only spilled outside the loop. */
1552
static void asm_phi_fixup(ASMState *as)
3,783✔
1553
{
1554
  RegSet work = as->phiset;
3,783✔
1555
  while (work) {
5,850✔
1556
    Reg r = rset_picktop(work);
2,067✔
1557
    IRRef lref = as->phireg[r];
2,067✔
1558
    IRIns *ir = IR(lref);
2,067✔
1559
    if (irt_ismarked(ir->t)) {
2,067✔
1560
      irt_clearmark(ir->t);
1,939✔
1561
      /* Left PHI gained a spill slot before the loop? */
1562
      if (ra_hasspill(ir->s)) {
1,939✔
1563
        ra_addrename(as, r, lref, as->loopsnapno);
22✔
1564
      }
1565
    }
1566
    rset_clear(work, r);
2,067✔
1567
  }
1568
}
3,783✔
1569

1570
/* Setup right PHI reference. */
1571
static void asm_phi(ASMState *as, IRIns *ir)
3,887✔
1572
{
1573
  RegSet allow = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) &
3,887✔
1574
                 ~as->phiset;
3,887✔
1575
  RegSet afree = (as->freeset & allow);
3,887✔
1576
  IRIns *irl = IR(ir->op1);
3,887✔
1577
  IRIns *irr = IR(ir->op2);
3,887✔
1578
  if (ir->r == RID_SINK)  /* Sink PHI. */
3,887✔
1579
    return;
1580
  /* Spill slot shuffling is not implemented yet (but rarely needed). */
1581
  if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
3,823✔
1582
    lj_trace_err(as->J, LJ_TRERR_NYIPHI);
×
1583
  /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */
1584
  if ((afree & (afree-1))) {  /* Two or more free registers? */
3,823✔
1585
    Reg r;
3,809✔
1586
    if (ra_noreg(irr->r)) {  /* Get a register for the right PHI. */
3,809✔
1587
      r = ra_allocref(as, ir->op2, allow);
3,807✔
1588
    } else {  /* Duplicate right PHI, need a copy (rare). */
1589
      r = ra_scratch(as, allow);
2✔
1590
      emit_movrr(as, irr, r, irr->r);
2✔
1591
    }
1592
    ir->r = (uint8_t)r;
3,809✔
1593
    rset_set(as->phiset, r);
3,809✔
1594
    as->phireg[r] = (IRRef1)ir->op1;
3,809✔
1595
    irt_setmark(irl->t);  /* Marks left PHIs _with_ register. */
3,809✔
1596
    if (ra_noreg(irl->r))
3,809✔
1597
      ra_sethint(irl->r, r); /* Set register hint for left PHI. */
3,591✔
1598
  } else {  /* Otherwise allocate a spill slot. */
1599
    /* This is overly restrictive, but it triggers only on synthetic code. */
1600
    if (ra_hasreg(irl->r) || ra_hasreg(irr->r))
14✔
1601
      lj_trace_err(as->J, LJ_TRERR_NYIPHI);
5✔
1602
    ra_spill(as, ir);
9✔
1603
    irr->s = ir->s;  /* Set right PHI spill slot. Sync left slot later. */
9✔
1604
  }
1605
}
1606

1607
static void asm_loop_fixup(ASMState *as);
1608

1609
/* Middle part of a loop. */
1610
static void asm_loop(ASMState *as)
2,417✔
1611
{
1612
  MCode *mcspill;
2,417✔
1613
  /* LOOP is a guard, so the snapno is up to date. */
1614
  as->loopsnapno = as->snapno;
2,417✔
1615
  if (as->gcsteps)
2,417✔
1616
    asm_gc_check(as);
245✔
1617
  /* LOOP marks the transition from the variant to the invariant part. */
1618
  as->flagmcp = as->invmcp = NULL;
2,417✔
1619
  as->sectref = 0;
2,417✔
1620
  if (!neverfuse(as)) as->fuseref = 0;
2,417✔
1621
  asm_phi_shuffle(as);
2,417✔
1622
  mcspill = as->mcp;
2,417✔
1623
  asm_phi_copyspill(as);
2,417✔
1624
  asm_loop_fixup(as);
2,417✔
1625
  as->mcloop = as->mcp;
2,417✔
1626
  RA_DBGX((as, "===== LOOP ====="));
2,417✔
1627
  if (!as->realign) RA_DBG_FLUSH();
2,417✔
1628
  if (as->mcp != mcspill)
2,417✔
1629
    emit_jmp(as, mcspill);
2✔
1630
}
2,417✔
1631

1632
/* -- Target-specific assembler ------------------------------------------- */
1633

1634
#if LJ_TARGET_X86ORX64
1635
#include "lj_asm_x86.h"
1636
#elif LJ_TARGET_ARM
1637
#include "lj_asm_arm.h"
1638
#elif LJ_TARGET_ARM64
1639
#include "lj_asm_arm64.h"
1640
#elif LJ_TARGET_PPC
1641
#include "lj_asm_ppc.h"
1642
#elif LJ_TARGET_MIPS
1643
#include "lj_asm_mips.h"
1644
#else
1645
#error "Missing assembler for target CPU"
1646
#endif
1647

1648
/* -- Common instruction helpers ------------------------------------------ */
1649

1650
#if !LJ_SOFTFP32
1651
#if !LJ_TARGET_X86ORX64
1652
#define asm_ldexp(as, ir)        asm_callid(as, ir, IRCALL_ldexp)
1653
#endif
1654

1655
static void asm_pow(ASMState *as, IRIns *ir)
234✔
1656
{
1657
#if LJ_64 && LJ_HASFFI
1658
  if (!irt_isnum(ir->t))
234✔
1659
    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
×
1660
                                          IRCALL_lj_carith_powu64);
1661
  else
1662
#endif
1663
  asm_callid(as, ir, IRCALL_pow);
234✔
1664
}
234✔
1665

1666
static void asm_div(ASMState *as, IRIns *ir)
137✔
1667
{
1668
#if LJ_64 && LJ_HASFFI
1669
  if (!irt_isnum(ir->t))
137✔
1670
    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
×
1671
                                          IRCALL_lj_carith_divu64);
1672
  else
1673
#endif
1674
    asm_fpdiv(as, ir);
137✔
1675
}
137✔
1676
#endif
1677

1678
static void asm_mod(ASMState *as, IRIns *ir)
67✔
1679
{
1680
#if LJ_64 && LJ_HASFFI
1681
  if (!irt_isint(ir->t))
67✔
1682
    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
×
1683
                                          IRCALL_lj_carith_modu64);
1684
  else
1685
#endif
1686
    asm_callid(as, ir, IRCALL_lj_vm_modi);
67✔
1687
}
67✔
1688

1689
static void asm_fuseequal(ASMState *as, IRIns *ir)
122,303✔
1690
{
1691
  /* Fuse HREF + EQ/NE. */
1692
  if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
122,303✔
1693
    as->curins--;
1,070✔
1694
    asm_href(as, ir-1, (IROp)ir->o);
1,070✔
1695
  } else {
1696
    asm_equal(as, ir);
121,233✔
1697
  }
1698
}
122,303✔
1699

1700
/* -- Instruction dispatch ------------------------------------------------ */
1701

1702
/* Assemble a single instruction. */
1703
static void asm_ir(ASMState *as, IRIns *ir)
602,270✔
1704
{
1705
  switch ((IROp)ir->o) {
602,270✔
1706
  /* Miscellaneous ops. */
1707
  case IR_LOOP: asm_loop(as); break;
2,417✔
1708
  case IR_NOP: case IR_XBAR:
1709
    lj_assertA(!ra_used(ir),
1710
               "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
1711
    break;
1712
  case IR_USE:
5✔
1713
    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
10✔
1714
  case IR_PHI: asm_phi(as, ir); break;
3,887✔
1715
  case IR_HIOP: asm_hiop(as, ir); break;
1716
  case IR_GCSTEP: asm_gcstep(as, ir); break;
102✔
1717
  case IR_PROF: asm_prof(as, ir); break;
1✔
1718

1719
  /* Guarded assertions. */
1720
  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
8,607✔
1721
  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1722
  case IR_ABC:
1723
    asm_comp(as, ir);
8,607✔
1724
    break;
8,607✔
1725
  case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
122,303✔
1726

1727
  case IR_RETF: asm_retf(as, ir); break;
1,057✔
1728

1729
  /* Bit ops. */
1730
  case IR_BNOT: asm_bnot(as, ir); break;
3✔
1731
  case IR_BSWAP: asm_bswap(as, ir); break;
9✔
1732
  case IR_BAND: asm_band(as, ir); break;
248✔
1733
  case IR_BOR: asm_bor(as, ir); break;
87✔
1734
  case IR_BXOR: asm_bxor(as, ir); break;
35✔
1735
  case IR_BSHL: asm_bshl(as, ir); break;
84✔
1736
  case IR_BSHR: asm_bshr(as, ir); break;
31✔
1737
  case IR_BSAR: asm_bsar(as, ir); break;
12✔
1738
  case IR_BROL: asm_brol(as, ir); break;
23✔
1739
  case IR_BROR: asm_bror(as, ir); break;
3✔
1740

1741
  /* Arithmetic ops. */
1742
  case IR_ADD: asm_add(as, ir); break;
28,733✔
1743
  case IR_SUB: asm_sub(as, ir); break;
731✔
1744
  case IR_MUL: asm_mul(as, ir); break;
18,380✔
1745
  case IR_MOD: asm_mod(as, ir); break;
67✔
1746
  case IR_NEG: asm_neg(as, ir); break;
29✔
1747
#if LJ_SOFTFP32
1748
  case IR_DIV: case IR_POW: case IR_ABS:
1749
  case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1750
    /* Unused for LJ_SOFTFP32. */
1751
    lj_assertA(0, "IR %04d with unused op %d",
1752
                  (int)(ir - as->ir) - REF_BIAS, ir->o);
1753
    break;
1754
#else
1755
  case IR_DIV: asm_div(as, ir); break;
137✔
1756
  case IR_POW: asm_pow(as, ir); break;
234✔
1757
  case IR_ABS: asm_abs(as, ir); break;
4✔
1758
  case IR_LDEXP: asm_ldexp(as, ir); break;
270✔
1759
  case IR_FPMATH: asm_fpmath(as, ir); break;
158✔
1760
  case IR_TOBIT: asm_tobit(as, ir); break;
65✔
1761
#endif
1762
  case IR_MIN: asm_min(as, ir); break;
17✔
1763
  case IR_MAX: asm_max(as, ir); break;
494✔
1764

1765
  /* Overflow-checking arithmetic ops. */
1766
  case IR_ADDOV: asm_addov(as, ir); break;
61✔
1767
  case IR_SUBOV: asm_subov(as, ir); break;
64✔
1768
  case IR_MULOV: asm_mulov(as, ir); break;
2✔
1769

1770
  /* Memory references. */
1771
  case IR_AREF: asm_aref(as, ir); break;
35✔
1772
  case IR_HREF: asm_href(as, ir, 0); break;
38✔
1773
  case IR_HREFK: asm_hrefk(as, ir); break;
78,066✔
1774
  case IR_NEWREF: asm_newref(as, ir); break;
1,064✔
1775
  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
422✔
1776
  case IR_FREF: asm_fref(as, ir); break;
×
1777
  case IR_STRREF: asm_strref(as, ir); break;
316✔
1778
  case IR_LREF: asm_lref(as, ir); break;
2✔
1779

1780
  /* Loads and stores. */
1781
  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
41,917✔
1782
    asm_ahuvload(as, ir);
41,917✔
1783
    break;
41,917✔
1784
  case IR_FLOAD: asm_fload(as, ir); break;
191,368✔
1785
  case IR_XLOAD: asm_xload(as, ir); break;
443✔
1786
  case IR_SLOAD: asm_sload(as, ir); break;
12,693✔
1787

1788
  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
40,912✔
1789
  case IR_FSTORE: asm_fstore(as, ir); break;
75✔
1790
  case IR_XSTORE: asm_xstore(as, ir); break;
614✔
1791

1792
  /* Allocations. */
1793
  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
105✔
1794
  case IR_TNEW: asm_tnew(as, ir); break;
184✔
1795
  case IR_TDUP: asm_tdup(as, ir); break;
53✔
1796
  case IR_CNEW: case IR_CNEWI:
1,624✔
1797
#if LJ_HASFFI
1798
    asm_cnew(as, ir);
1,624✔
1799
#else
1800
    lj_assertA(0, "IR %04d with unused op %d",
1801
                  (int)(ir - as->ir) - REF_BIAS, ir->o);
1802
#endif
1803
    break;
1,624✔
1804

1805
  /* Buffer operations. */
1806
  case IR_BUFHDR: asm_bufhdr(as, ir); break;
771✔
1807
  case IR_BUFPUT: asm_bufput(as, ir); break;
1,364✔
1808
  case IR_BUFSTR: asm_bufstr(as, ir); break;
757✔
1809

1810
  /* Write barriers. */
1811
  case IR_TBAR: asm_tbar(as, ir); break;
38,327✔
1812
  case IR_OBAR: asm_obar(as, ir); break;
4✔
1813

1814
  /* Type conversions. */
1815
  case IR_CONV: asm_conv(as, ir); break;
1,976✔
1816
  case IR_TOSTR: asm_tostr(as, ir); break;
42✔
1817
  case IR_STRTO: asm_strto(as, ir); break;
158✔
1818

1819
  /* Calls. */
1820
  case IR_CALLA:
3✔
1821
    as->gcsteps++;
3✔
1822
    /* fallthrough */
1823
  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
503✔
1824
  case IR_CALLXS: asm_callx(as, ir); break;
19✔
1825
  case IR_CARG: break;
1826

1827
  default:
×
1828
    setintV(&as->J->errinfo, ir->o);
×
1829
    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
×
1830
    break;
602,265✔
1831
  }
1832
}
602,265✔
1833

1834
/* -- Head of trace ------------------------------------------------------- */
1835

1836
/* Head of a root trace. */
1837
static void asm_head_root(ASMState *as)
2,147✔
1838
{
1839
  int32_t spadj;
2,147✔
1840
  asm_head_root_base(as);
2,147✔
1841
  emit_setvmstate(as, (int32_t)as->T->traceno);
2,147✔
1842
  spadj = asm_stack_adjust(as);
2,147✔
1843
  as->T->spadjust = (uint16_t)spadj;
2,147✔
1844
  emit_spsub(as, spadj);
2,147✔
1845
  /* Root traces assume a checked stack for the starting proto. */
1846
  as->T->topslot = gcref(as->T->startpt)->pt.framesize;
2,147✔
1847
}
2,147✔
1848

1849
/* Head of a side trace.
1850
**
1851
** The current simplistic algorithm requires that all slots inherited
1852
** from the parent are live in a register between pass 2 and pass 3. This
1853
** avoids the complexity of stack slot shuffling. But of course this may
1854
** overflow the register set in some cases and cause the dreaded error:
1855
** "NYI: register coalescing too complex". A refined algorithm is needed.
1856
*/
1857
static void asm_head_side(ASMState *as)
1,713✔
1858
{
1859
  IRRef1 sloadins[RID_MAX];
1,713✔
1860
  RegSet allow = RSET_ALL;  /* Inverse of all coalesced registers. */
1,713✔
1861
  RegSet live = RSET_EMPTY;  /* Live parent registers. */
1,713✔
1862
  RegSet pallow = RSET_GPR;  /* Registers needed by the parent stack check. */
1,713✔
1863
  Reg pbase;
1,713✔
1864
  IRIns *irp = &as->parent->ir[REF_BASE];  /* Parent base. */
1,713✔
1865
  int32_t spadj, spdelta;
1,713✔
1866
  int pass2 = 0;
1,713✔
1867
  int pass3 = 0;
1,713✔
1868
  IRRef i;
1,713✔
1869

1870
  if (as->snapno && as->topslot > as->parent->topslot) {
1,713✔
1871
    /* Force snap #0 alloc to prevent register overwrite in stack check. */
1872
    asm_snap_alloc(as, 0);
39✔
1873
  }
1874
  pbase = asm_head_side_base(as, irp);
1,713✔
1875
  if (pbase != RID_NONE) {
1,713✔
1876
    rset_clear(allow, pbase);
1,034✔
1877
    rset_clear(pallow, pbase);
1,034✔
1878
  }
1879

1880
  /* Scan all parent SLOADs and collect register dependencies. */
1881
  for (i = as->stopins; i > REF_BASE; i--) {
5,370✔
1882
    IRIns *ir = IR(i);
3,657✔
1883
    RegSP rs;
3,657✔
1884
    lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
3,657✔
1885
               (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
1886
               "IR %04d has bad parent op %d",
1887
               (int)(ir - as->ir) - REF_BIAS, ir->o);
1888
    rs = as->parentmap[i - REF_FIRST];
3,657✔
1889
    if (ra_hasreg(ir->r)) {
3,657✔
1890
      rset_clear(allow, ir->r);
1,642✔
1891
      if (ra_hasspill(ir->s)) {
1,642✔
1892
        ra_save(as, ir, ir->r);
31✔
1893
        checkmclim(as);
31✔
1894
      }
1895
    } else if (ra_hasspill(ir->s)) {
2,015✔
1896
      irt_setmark(ir->t);
1,905✔
1897
      pass2 = 1;
1,905✔
1898
    }
1899
    if (ir->r == rs) {  /* Coalesce matching registers right now. */
3,657✔
1900
      ra_free(as, ir->r);
1,024✔
1901
    } else if (ra_hasspill(regsp_spill(rs))) {
2,633✔
1902
      if (ra_hasreg(ir->r))
1,698✔
1903
        pass3 = 1;
454✔
1904
    } else if (ra_used(ir)) {
935✔
1905
      sloadins[rs] = (IRRef1)i;
913✔
1906
      rset_set(live, rs);  /* Block live parent register. */
913✔
1907
    }
1908
    if (!ra_hasspill(regsp_spill(rs))) rset_clear(pallow, regsp_reg(rs));
3,657✔
1909
  }
1910

1911
  /* Calculate stack frame adjustment. */
1912
  spadj = asm_stack_adjust(as);
1,713✔
1913
  spdelta = spadj - (int32_t)as->parent->spadjust;
1,713✔
1914
  if (spdelta < 0) {  /* Don't shrink the stack frame. */
1,713✔
1915
    spadj = (int32_t)as->parent->spadjust;
328✔
1916
    spdelta = 0;
328✔
1917
  }
1918
  as->T->spadjust = (uint16_t)spadj;
1,713✔
1919

1920
  /* Reload spilled target registers. */
1921
  if (pass2) {
1,713✔
1922
    for (i = as->stopins; i > REF_BASE; i--) {
1,252✔
1923
      IRIns *ir = IR(i);
1,202✔
1924
      if (irt_ismarked(ir->t)) {
1,202✔
1925
        RegSet mask;
827✔
1926
        Reg r;
827✔
1927
        RegSP rs;
827✔
1928
        irt_clearmark(ir->t);
827✔
1929
        rs = as->parentmap[i - REF_FIRST];
827✔
1930
        if (!ra_hasspill(regsp_spill(rs)))
827✔
1931
          ra_sethint(ir->r, rs);  /* Hint may be gone, set it again. */
639✔
1932
        else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
188✔
1933
          continue;  /* Same spill slot, do nothing. */
18✔
1934
        mask = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) & allow;
809✔
1935
        if (mask == RSET_EMPTY)
809✔
1936
          lj_trace_err(as->J, LJ_TRERR_NYICOAL);
77✔
1937
        r = ra_allocref(as, i, mask);
732✔
1938
        ra_save(as, ir, r);
732✔
1939
        rset_clear(allow, r);
732✔
1940
        if (r == rs) {  /* Coalesce matching registers right now. */
732✔
1941
          ra_free(as, r);
329✔
1942
          rset_clear(live, r);
329✔
1943
        } else if (ra_hasspill(regsp_spill(rs))) {
403✔
1944
          pass3 = 1;
170✔
1945
        }
1946
        checkmclim(as);
1,125✔
1947
      }
1948
    }
1949
  }
1950

1951
  /* Store trace number and adjust stack frame relative to the parent. */
1952
  emit_setvmstate(as, (int32_t)as->T->traceno);
1,636✔
1953
  emit_spsub(as, spdelta);
1,636✔
1954

1955
#if !LJ_TARGET_X86ORX64
1956
  /* Restore BASE register from parent spill slot. */
1957
  if (ra_hasspill(irp->s))
1958
    emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, sps_scale(irp->s));
1959
#endif
1960

1961
  /* Restore target registers from parent spill slots. */
1962
  if (pass3) {
1,636✔
1963
    RegSet work = ~as->freeset & RSET_ALL;
56✔
1964
    while (work) {
56✔
1965
      Reg r = rset_pickbot(work);
107✔
1966
      IRRef ref = regcost_ref(as->cost[r]);
107✔
1967
      RegSP rs = as->parentmap[ref - REF_FIRST];
107✔
1968
      rset_clear(work, r);
107✔
1969
      if (ra_hasspill(regsp_spill(rs))) {
107✔
1970
        int32_t ofs = sps_scale(regsp_spill(rs));
85✔
1971
        ra_free(as, r);
85✔
1972
        emit_spload(as, IR(ref), r, ofs);
85✔
1973
        checkmclim(as);
248✔
1974
      }
1975
    }
1976
  }
1977

1978
  /* Shuffle registers to match up target regs with parent regs. */
1979
  for (;;) {
1980
    RegSet work;
1981

1982
    /* Repeatedly coalesce free live registers by moving to their target. */
1983
    while ((work = as->freeset & live) != RSET_EMPTY) {
1,809✔
1984
      Reg rp = rset_pickbot(work);
166✔
1985
      IRIns *ir = IR(sloadins[rp]);
166✔
1986
      rset_clear(live, rp);
166✔
1987
      rset_clear(allow, rp);
166✔
1988
      ra_free(as, ir->r);
166✔
1989
      emit_movrr(as, ir, ir->r, rp);
166✔
1990
      checkmclim(as);
166✔
1991
    }
1992

1993
    /* We're done if no live registers remain. */
1994
    if (live == RSET_EMPTY)
1,643✔
1995
      break;
1996

1997
    /* Break cycles by renaming one target to a temp. register. */
1998
    if (live & RSET_GPR) {
7✔
1999
      RegSet tmpset = as->freeset & ~live & allow & RSET_GPR;
6✔
2000
      if (tmpset == RSET_EMPTY)
6✔
UNCOV
2001
        lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
2002
      ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset));
6✔
2003
    }
2004
    if (!LJ_SOFTFP && (live & RSET_FPR)) {
7✔
2005
      RegSet tmpset = as->freeset & ~live & allow & RSET_FPR;
1✔
2006
      if (tmpset == RSET_EMPTY)
1✔
UNCOV
2007
        lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
2008
      ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset));
1✔
2009
    }
2010
    checkmclim(as);
1,816✔
2011
    /* Continue with coalescing to fix up the broken cycle(s). */
2012
  }
2013

2014
  /* Inherit top stack slot already checked by parent trace. */
2015
  as->T->topslot = as->parent->topslot;
1,636✔
2016
  if (as->topslot > as->T->topslot) {  /* Need to check for higher slot? */
1,636✔
2017
#ifdef EXITSTATE_CHECKEXIT
2018
    /* Highest exit + 1 indicates stack check. */
2019
    ExitNo exitno = as->T->nsnap;
2020
#else
2021
    /* Reuse the parent exit in the context of the parent trace. */
2022
    ExitNo exitno = as->J->exitno;
113✔
2023
#endif
2024
    as->T->topslot = (uint8_t)as->topslot;  /* Remember for child traces. */
113✔
2025
    asm_stack_check(as, as->topslot, irp, pallow, exitno);
113✔
2026
  }
2027
}
1,636✔
2028

2029
/* -- Tail of trace ------------------------------------------------------- */
2030

2031
/* Get base slot for a snapshot. */
2032
static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
2,584✔
2033
{
2034
  SnapEntry *map = &as->T->snapmap[snap->mapofs];
2,584✔
2035
  MSize n;
2,584✔
2036
  for (n = snap->nent; n > 0; n--) {
8,927✔
2037
    SnapEntry sn = map[n-1];
7,646✔
2038
    if ((sn & SNAP_FRAME)) {
7,646✔
2039
      *gotframe = 1;
1,303✔
2040
      return snap_slot(sn) - LJ_FR2;
1,303✔
2041
    }
2042
  }
2043
  return 0;
2044
}
2045

2046
/* Link to another trace. */
2047
static void asm_tail_link(ASMState *as)
2,584✔
2048
{
2049
  SnapNo snapno = as->T->nsnap-1;  /* Last snapshot. */
2,584✔
2050
  SnapShot *snap = &as->T->snap[snapno];
2,584✔
2051
  int gotframe = 0;
2,584✔
2052
  BCReg baseslot = asm_baseslot(as, snap, &gotframe);
2,584✔
2053

2054
  as->topslot = snap->topslot;
2,584✔
2055
  checkmclim(as);
2,584✔
2056
  ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
2,584✔
2057

2058
  if (as->T->link == 0) {
2,584✔
2059
    /* Setup fixed registers for exit to interpreter. */
2060
    const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
436✔
2061
    int32_t mres;
436✔
2062
    if (bc_op(*pc) == BC_JLOOP) {  /* NYI: find a better way to do this. */
436✔
2063
      BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
2✔
2064
      if (bc_isret(bc_op(*retpc)))
2✔
2065
        pc = retpc;
2✔
2066
    }
2067
#if LJ_GC64
2068
    emit_loadu64(as, RID_LPC, u64ptr(pc));
436✔
2069
#else
2070
    ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
2071
    ra_allockreg(as, i32ptr(pc), RID_LPC);
2072
#endif
2073
    mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
436✔
2074
    switch (bc_op(*pc)) {
436✔
UNCOV
2075
    case BC_CALLM: case BC_CALLMT:
×
UNCOV
2076
      mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
×
UNCOV
2077
    case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
×
UNCOV
2078
    case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
×
2079
    default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
436✔
2080
    }
2081
    ra_allockreg(as, mres, RID_RET);  /* Return MULTRES or 0. */
436✔
2082
  } else if (baseslot) {
2,148✔
2083
    /* Save modified BASE for linking to trace with higher start frame. */
2084
    emit_setgl(as, RID_BASE, jit_base);
833✔
2085
  }
2086
  emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
2,584✔
2087

2088
  if (as->J->ktrace) {  /* Patch ktrace slot with the final GCtrace pointer. */
2,584✔
2089
    setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
117✔
2090
    IR(as->J->ktrace)->o = IR_KGC;
117✔
2091
  }
2092

2093
  /* Sync the interpreter state with the on-trace state. */
2094
  asm_stack_restore(as, snap);
2,584✔
2095

2096
  /* Root traces that add frames need to check the stack at the end. */
2097
  if (!as->parent && gotframe)
2,583✔
2098
    asm_stack_check(as, as->topslot, NULL, as->freeset & RSET_GPR, snapno);
114✔
2099
}
2,583✔
2100

2101
/* -- Trace setup --------------------------------------------------------- */
2102

2103
/* Clear reg/sp for all instructions and add register hints. */
2104
static void asm_setup_regsp(ASMState *as)
5,008✔
2105
{
2106
  GCtrace *T = as->T;
5,008✔
2107
  int sink = T->sinktags;
5,008✔
2108
  IRRef nins = T->nins;
5,008✔
2109
  IRIns *ir, *lastir;
5,008✔
2110
  int inloop;
5,008✔
2111
#if LJ_TARGET_ARM
2112
  uint32_t rload = 0xa6402a64;
2113
#endif
2114

2115
  ra_setup(as);
5,008✔
2116

2117
  /* Clear reg/sp for constants. */
2118
  for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
112,613✔
2119
    ir->prev = REGSP_INIT;
107,605✔
2120
    if (irt_is64(ir->t) && ir->o != IR_KNULL) {
107,605✔
2121
#if LJ_GC64
2122
      /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2123
      ir->i = 0;  /* Will become non-zero only for RIP-relative addresses. */
52,308✔
2124
#else
2125
      /* Make life easier for backends by putting address of constant in i. */
2126
      ir->i = (int32_t)(intptr_t)(ir+1);
2127
#endif
2128
      ir++;
52,308✔
2129
    }
2130
  }
2131

2132
  /* REF_BASE is used for implicit references to the BASE register. */
2133
  lastir->prev = REGSP_HINT(RID_BASE);
5,008✔
2134

2135
  as->snaprename = nins;
5,008✔
2136
  as->snapref = nins;
5,008✔
2137
  as->snapno = T->nsnap;
5,008✔
2138
  as->snapalloc = 0;
5,008✔
2139

2140
  as->stopins = REF_BASE;
5,008✔
2141
  as->orignins = nins;
5,008✔
2142
  as->curins = nins;
5,008✔
2143

2144
  /* Setup register hints for parent link instructions. */
2145
  ir = IR(REF_FIRST);
5,008✔
2146
  if (as->parent) {
5,008✔
2147
    uint16_t *p;
1,844✔
2148
    lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
1,844✔
2149
    if (lastir - ir > LJ_MAX_JSLOTS)
1,844✔
UNCOV
2150
      lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
2151
    as->stopins = (IRRef)((lastir-1) - as->ir);
1,844✔
2152
    for (p = as->parentmap; ir < lastir; ir++) {
5,509✔
2153
      RegSP rs = ir->prev;
3,665✔
2154
      *p++ = (uint16_t)rs;  /* Copy original parent RegSP to parentmap. */
3,665✔
2155
      if (!ra_hasspill(regsp_spill(rs)))
3,665✔
2156
        ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
1,966✔
2157
      else
2158
        ir->prev = REGSP_INIT;
1,699✔
2159
    }
2160
  }
2161

2162
  inloop = 0;
5,008✔
2163
  as->evenspill = SPS_FIRST;
5,008✔
2164
  for (lastir = IR(nins); ir < lastir; ir++) {
659,678✔
2165
    if (sink) {
654,670✔
2166
      if (ir->r == RID_SINK)
8,845✔
2167
        continue;
1,013✔
2168
      if (ir->r == RID_SUNK) {  /* Revert after ASM restart. */
7,832✔
2169
        ir->r = RID_SINK;
103✔
2170
        continue;
103✔
2171
      }
2172
    }
2173
    switch (ir->o) {
653,554✔
2174
    case IR_LOOP:
2,424✔
2175
      inloop = 1;
2,424✔
2176
      break;
2,424✔
2177
#if LJ_TARGET_ARM
2178
    case IR_SLOAD:
2179
      if (!((ir->op2 & IRSLOAD_TYPECHECK) || (ir+1)->o == IR_HIOP))
2180
        break;
2181
      /* fallthrough */
2182
    case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2183
      if (!LJ_SOFTFP && irt_isnum(ir->t)) break;
2184
      ir->prev = (uint16_t)REGSP_HINT((rload & 15));
2185
      rload = lj_ror(rload, 4);
2186
      continue;
2187
#endif
2188
    case IR_CALLXS: {
22✔
2189
      CCallInfo ci;
22✔
2190
      ci.flags = asm_callx_flags(as, ir);
22✔
2191
      ir->prev = asm_setup_call_slots(as, ir, &ci);
22✔
2192
      if (inloop)
22✔
2193
        as->modset |= RSET_SCRATCH;
7✔
2194
      continue;
22✔
2195
      }
2196
    case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
624✔
2197
      const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
624✔
2198
      ir->prev = asm_setup_call_slots(as, ir, ci);
624✔
2199
      if (inloop)
624✔
2200
        as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
223✔
2201
                      (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
223✔
2202
      continue;
624✔
2203
      }
2204
#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
2205
    case IR_HIOP:
2206
      switch ((ir-1)->o) {
2207
#if LJ_SOFTFP && LJ_TARGET_ARM
2208
      case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2209
        if (ra_hashint((ir-1)->r)) {
2210
          ir->prev = (ir-1)->prev + 1;
2211
          continue;
2212
        }
2213
        break;
2214
#endif
2215
#if !LJ_SOFTFP && LJ_NEED_FP64
2216
      case IR_CONV:
2217
        if (irt_isfp((ir-1)->t)) {
2218
          ir->prev = REGSP_HINT(RID_FPRET);
2219
          continue;
2220
        }
2221
#endif
2222
      /* fallthrough */
2223
      case IR_CALLN: case IR_CALLXS:
2224
#if LJ_SOFTFP
2225
      case IR_MIN: case IR_MAX:
2226
#endif
2227
        (ir-1)->prev = REGSP_HINT(RID_RETLO);
2228
        ir->prev = REGSP_HINT(RID_RETHI);
2229
        continue;
2230
      default:
2231
        break;
2232
      }
2233
      break;
2234
#endif
2235
#if LJ_SOFTFP
2236
    case IR_MIN: case IR_MAX:
2237
      if ((ir+1)->o != IR_HIOP) break;
2238
#endif
2239
    /* fallthrough */
2240
    /* C calls evict all scratch regs and return results in RID_RET. */
2241
    case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
2242
      if (REGARG_NUMGPR < 3 && as->evenspill < 3)
2243
        as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
2244
#if LJ_TARGET_X86 && LJ_HASFFI
2245
      if (0) {
2246
    case IR_CNEW:
2247
        if (ir->op2 != REF_NIL && as->evenspill < 4)
2248
          as->evenspill = 4;  /* lj_cdata_newv needs 4 args. */
2249
      }
2250
      /* fallthrough */
2251
#else
2252
      /* fallthrough */
2253
    case IR_CNEW:
2254
#endif
2255
      /* fallthrough */
2256
    case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2257
    case IR_BUFSTR:
2258
      ir->prev = REGSP_HINT(RID_RET);
5,926✔
2259
      if (inloop)
5,926✔
2260
        as->modset = RSET_SCRATCH;
1,343✔
2261
      continue;
5,926✔
2262
    case IR_STRTO: case IR_OBAR:
173✔
2263
      if (inloop)
173✔
2264
        as->modset = RSET_SCRATCH;
74✔
2265
      break;
2266
#if !LJ_SOFTFP
2267
#if !LJ_TARGET_X86ORX64
2268
    case IR_LDEXP:
2269
#endif
2270
#endif
2271
      /* fallthrough */
2272
    case IR_POW:
463✔
2273
      if (!LJ_SOFTFP && irt_isnum(ir->t)) {
463✔
2274
        if (inloop)
463✔
2275
          as->modset |= RSET_SCRATCH;
2✔
2276
#if LJ_TARGET_X86
2277
        if (irt_isnum(IR(ir->op2)->t)) {
2278
          if (as->evenspill < 4)  /* Leave room to call pow(). */
2279
            as->evenspill = 4;
2280
        }
2281
        break;
2282
#else
2283
        ir->prev = REGSP_HINT(RID_FPRET);
463✔
2284
        continue;
463✔
2285
#endif
2286
      }
2287
      /* fallthrough */ /* for integer POW */
2288
    case IR_DIV: case IR_MOD:
2289
      if (!irt_isnum(ir->t)) {
219✔
2290
        ir->prev = REGSP_HINT(RID_RET);
71✔
2291
        if (inloop)
71✔
2292
          as->modset |= (RSET_SCRATCH & RSET_GPR);
35✔
2293
        continue;
71✔
2294
      }
2295
      break;
2296
    case IR_FPMATH:
173✔
2297
#if LJ_TARGET_X86ORX64
2298
      if (ir->op2 <= IRFPM_TRUNC) {
173✔
2299
        if (!(as->flags & JIT_F_SSE4_1)) {
169✔
UNCOV
2300
          ir->prev = REGSP_HINT(RID_XMM0);
×
UNCOV
2301
          if (inloop)
×
UNCOV
2302
            as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
×
UNCOV
2303
          continue;
×
2304
        }
2305
        break;
2306
      }
2307
#endif
2308
      if (inloop)
4✔
2309
        as->modset |= RSET_SCRATCH;
2✔
2310
#if LJ_TARGET_X86
2311
      break;
2312
#else
2313
      ir->prev = REGSP_HINT(RID_FPRET);
4✔
2314
      continue;
4✔
2315
#endif
2316
#if LJ_TARGET_X86ORX64
2317
    /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
2318
    case IR_BSHL: case IR_BSHR: case IR_BSAR:
409✔
2319
      if ((as->flags & JIT_F_BMI2))  /* Except if BMI2 is available. */
409✔
2320
        break;
2321
      /* fallthrough */
2322
    case IR_BROL: case IR_BROR:
2323
      if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
34✔
2324
        IR(ir->op2)->r = REGSP_HINT(RID_ECX);
10✔
2325
        if (inloop)
10✔
2326
          rset_set(as->modset, RID_ECX);
3✔
2327
      }
2328
      break;
2329
#endif
2330
    /* Do not propagate hints across type conversions or loads. */
2331
    case IR_TOBIT:
2332
    case IR_XLOAD:
2333
#if !LJ_TARGET_ARM
2334
    case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2335
#endif
2336
      break;
2337
    case IR_CONV:
2,459✔
2338
      if (irt_isfp(ir->t) || (ir->op2 & IRCONV_SRCMASK) == IRT_NUM ||
2,459✔
2339
          (ir->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
2340
        break;
2341
      /* fallthrough */
2342
    default:
2343
      /* Propagate hints across likely 'op reg, imm' or 'op reg'. */
2344
      if (irref_isk(ir->op2) && !irref_isk(ir->op1) &&
595,754✔
2345
          ra_hashint(regsp_reg(IR(ir->op1)->prev))) {
477,467✔
2346
        ir->prev = IR(ir->op1)->prev;
4,570✔
2347
        continue;
4,570✔
2348
      }
2349
      break;
2350
    }
2351
    ir->prev = REGSP_INIT;
641,874✔
2352
  }
2353
  if ((as->evenspill & 1))
5,008✔
UNCOV
2354
    as->oddspill = as->evenspill++;
×
2355
  else
2356
    as->oddspill = 0;
5,008✔
2357
}
5,008✔
2358

2359
/* -- Assembler core ------------------------------------------------------ */
2360

2361
/* Assemble a trace. */
2362
void lj_asm_trace(jit_State *J, GCtrace *T)
3,855✔
2363
{
2364
  ASMState as_;
3,855✔
2365
  ASMState *as = &as_;
3,855✔
2366

2367
  /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2368
  {
2369
    IRRef nins = T->nins;
3,855✔
2370
    IRIns *ir = &T->ir[nins-1];
3,855✔
2371
    if (ir->o == IR_NOP || ir->o == IR_RENAME) {
3,855✔
2372
      do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
124✔
2373
      T->nins = nins;
124✔
2374
    }
2375
  }
2376

2377
  /* Ensure an initialized instruction beyond the last one for HIOP checks. */
2378
  /* This also allows one RENAME to be added without reallocating curfinal. */
2379
  as->orignins = lj_ir_nextins(J);
3,855✔
2380
  J->cur.ir[as->orignins].o = IR_NOP;
3,855✔
2381

2382
  /* Setup initial state. Copy some fields to reduce indirections. */
2383
  as->J = J;
3,855✔
2384
  as->T = T;
3,855✔
2385
  J->curfinal = lj_trace_alloc(J->L, T);  /* This copies the IR, too. */
3,855✔
2386
  as->flags = J->flags;
3,855✔
2387
  as->loopref = J->loopref;
3,855✔
2388
  as->realign = NULL;
3,855✔
2389
  as->loopinv = 0;
3,855✔
2390
  as->parent = J->parent ? traceref(J, J->parent) : NULL;
3,855✔
2391

2392
  /* Reserve MCode memory. */
2393
  as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
3,855✔
2394
  as->mcp = as->mctop;
3,855✔
2395
  as->mclim = as->mcbot + MCLIM_REDZONE;
3,855✔
2396
  asm_setup_target(as);
3,855✔
2397

2398
  /*
2399
  ** This is a loop, because the MCode may have to be (re-)assembled
2400
  ** multiple times:
2401
  **
2402
  ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2403
  **    backend wants the MCode to be aligned differently.
2404
  **
2405
  **    This is currently only the case on x86/x64, where small loops get
2406
  **    an aligned loop body plus a short branch. Not much effort is wasted,
2407
  **    because the abort happens very quickly and only once.
2408
  **
2409
  ** 2. The IR is immovable, since the MCode embeds pointers to various
2410
  **    constants inside the IR. But RENAMEs may need to be added to the IR
2411
  **    during assembly, which might grow and reallocate the IR. We check
2412
  **    at the end if the IR (in J->cur.ir) has actually grown, resize the
2413
  **    copy (in J->curfinal.ir) and try again.
2414
  **
2415
  **    95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2416
  **    2 RENAMEs and only 0.5% have more than that. That's why we opt to
2417
  **    always have one spare slot in the IR (see above), which means we
2418
  **    have to redo the assembly for only ~2% of all traces.
2419
  **
2420
  **    Very, very rarely, this needs to be done repeatedly, since the
2421
  **    location of constants inside the IR (actually, reachability from
2422
  **    a global pointer) may affect register allocation and thus the
2423
  **    number of RENAMEs.
2424
  */
2425
  for (;;) {
5,008✔
2426
    as->mcp = as->mctop;
5,008✔
2427
#ifdef LUA_USE_ASSERT
2428
    as->mcp_prev = as->mcp;
2429
#endif
2430
    as->ir = J->curfinal->ir;  /* Use the copied IR. */
5,008✔
2431
    as->curins = J->cur.nins = as->orignins;
5,008✔
2432

2433
    RA_DBG_START();
5,008✔
2434
    RA_DBGX((as, "===== STOP ====="));
5,008✔
2435

2436
    /* General trace setup. Emit tail of trace. */
2437
    asm_tail_prep(as);
5,008✔
2438
    as->mcloop = NULL;
5,008✔
2439
    as->flagmcp = NULL;
5,008✔
2440
    as->topslot = 0;
5,008✔
2441
    as->gcsteps = 0;
5,008✔
2442
    as->sectref = as->loopref;
5,008✔
2443
    as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
5,008✔
2444
    asm_setup_regsp(as);
5,008✔
2445
    if (!as->loopref)
5,008✔
2446
      asm_tail_link(as);
2,584✔
2447

2448
    /* Assemble a trace in linear backwards order. */
2449
    for (as->curins--; as->curins > as->stopins; as->curins--) {
620,867✔
2450
      IRIns *ir = IR(as->curins);
615,908✔
2451
      /* 64 bit types handled by SPLIT for 32 bit archs. */
2452
      lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
615,908✔
2453
                 "IR %04d has unsplit 64 bit type",
2454
                 (int)(ir - as->ir) - REF_BIAS);
2455
      asm_snap_prev(as);
615,908✔
2456
      if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
615,908✔
2457
        continue;  /* Dead-code elimination can be soooo easy. */
13,595✔
2458
      if (irt_isguard(ir->t))
602,313✔
2459
        asm_snap_prep(as);
264,543✔
2460
      RA_DBG_REF();
602,313✔
2461
      checkmclim(as);
602,313✔
2462
      asm_ir(as, ir);
602,270✔
2463
    }
2464

2465
    if (as->realign && J->curfinal->nins >= T->nins)
4,959✔
2466
      continue;  /* Retry in case only the MCode needs to be realigned. */
1,099✔
2467

2468
    /* Emit head of trace. */
2469
    RA_DBG_REF();
3,860✔
2470
    checkmclim(as);
3,860✔
2471
    if (as->gcsteps > 0) {
3,860✔
2472
      as->curins = as->T->snap[0].ref;
409✔
2473
      asm_snap_prep(as);  /* The GC check is a guard. */
409✔
2474
      asm_gc_check(as);
409✔
2475
      as->curins = as->stopins;
409✔
2476
    }
2477
    ra_evictk(as);
3,860✔
2478
    if (as->parent)
3,860✔
2479
      asm_head_side(as);
1,713✔
2480
    else
2481
      asm_head_root(as);
2,147✔
2482
    asm_phi_fixup(as);
3,783✔
2483

2484
    if (J->curfinal->nins >= T->nins) {  /* IR didn't grow? */
3,783✔
2485
      lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth");
3,729✔
2486
      memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
3,729✔
2487
             (T->nins - as->orignins) * sizeof(IRIns));  /* Copy RENAMEs. */
3,729✔
2488
      T->nins = J->curfinal->nins;
3,729✔
2489
      /* Fill mcofs of any unprocessed snapshots. */
2490
      as->curins = REF_FIRST;
3,729✔
2491
      asm_snap_prev(as);
3,729✔
2492
      break;  /* Done. */
3,729✔
2493
    }
2494

2495
    /* Otherwise try again with a bigger IR. */
2496
    lj_trace_free(J2G(J), J->curfinal);
54✔
2497
    J->curfinal = NULL;  /* In case lj_trace_alloc() OOMs. */
54✔
2498
    J->curfinal = lj_trace_alloc(J->L, T);
54✔
2499
    as->realign = NULL;
54✔
2500
  }
2501

2502
  RA_DBGX((as, "===== START ===="));
3,729✔
2503
  RA_DBG_FLUSH();
3,729✔
2504
  if (as->freeset != RSET_ALL)
3,729✔
UNCOV
2505
    lj_trace_err(as->J, LJ_TRERR_BADRA);  /* Ouch! Should never happen. */
×
2506

2507
  /* Set trace entry point before fixing up tail to allow link to self. */
2508
  T->mcode = as->mcp;
3,729✔
2509
  T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
3,729✔
2510
  if (!as->loopref)
3,729✔
2511
    asm_tail_fixup(as, T->link);  /* Note: this may change as->mctop! */
2,466✔
2512
  T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
3,729✔
2513
  asm_snap_fixup_mcofs(as);
3,729✔
2514
#if LJ_TARGET_MCODE_FIXUP
2515
  asm_mcode_fixup(T->mcode, T->szmcode);
2516
#endif
2517
  lj_mcode_sync(T->mcode, as->mctoporig);
3,729✔
2518
}
3,729✔
2519

2520
#undef IR
2521

2522
#endif
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc