• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / luajit / 6821269217

10 Nov 2023 05:48AM UTC coverage: 88.456% (-0.004%) from 88.46%
6821269217

push

github

igormunkin
Fix FOLD rule for x-0.

Reported by XmiliaH.

(cherry-picked from commit 7b994e0ee)

Fold optimization x - (-0) ==> x is INVALID for x = -0 in FP arithmetic.
Its result is -0 instead of +0. This patch allows only x - (+0) ==> x
optimization.

Sergey Kaplun:
* added the description and the test for the problem

Part of tarantool/tarantool#9145

5359 of 5975 branches covered (0.0%)

Branch coverage included in aggregate %.

1 of 1 new or added line in 1 file covered. (100.0%)

3 existing lines in 3 files now uncovered.

20540 of 23304 relevant lines covered (88.14%)

2753453.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.47
/src/lj_asm.c
1
/*
2
** IR assembler (SSA IR -> machine code).
3
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
*/
5

6
#define lj_asm_c
7
#define LUA_CORE
8

9
#include "lj_obj.h"
10

11
#if LJ_HASJIT
12

13
#include "lj_gc.h"
14
#include "lj_str.h"
15
#include "lj_tab.h"
16
#include "lj_frame.h"
17
#if LJ_HASFFI
18
#include "lj_ctype.h"
19
#endif
20
#include "lj_ir.h"
21
#include "lj_jit.h"
22
#include "lj_ircall.h"
23
#include "lj_iropt.h"
24
#include "lj_mcode.h"
25
#include "lj_iropt.h"
26
#include "lj_trace.h"
27
#include "lj_snap.h"
28
#include "lj_asm.h"
29
#include "lj_dispatch.h"
30
#include "lj_vm.h"
31
#include "lj_target.h"
32

33
#ifdef LUA_USE_ASSERT
34
#include <stdio.h>
35
#endif
36

37
/* -- Assembler state and common macros ----------------------------------- */
38

39
/* Assembler state. */
40
typedef struct ASMState {
41
  RegCost cost[RID_MAX];  /* Reference and blended allocation cost for regs. */
42

43
  MCode *mcp;                /* Current MCode pointer (grows down). */
44
  MCode *mclim;                /* Lower limit for MCode memory + red zone. */
45
#ifdef LUA_USE_ASSERT
46
  MCode *mcp_prev;        /* Red zone overflow check. */
47
#endif
48

49
  IRIns *ir;                /* Copy of pointer to IR instructions/constants. */
50
  jit_State *J;                /* JIT compiler state. */
51

52
#if LJ_TARGET_X86ORX64
53
  x86ModRM mrm;                /* Fused x86 address operand. */
54
#endif
55

56
  RegSet freeset;        /* Set of free registers. */
57
  RegSet modset;        /* Set of registers modified inside the loop. */
58
  RegSet weakset;        /* Set of weakly referenced registers. */
59
  RegSet phiset;        /* Set of PHI registers. */
60

61
  uint32_t flags;        /* Copy of JIT compiler flags. */
62
  int loopinv;                /* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */
63

64
  int32_t evenspill;        /* Next even spill slot. */
65
  int32_t oddspill;        /* Next odd spill slot (or 0). */
66

67
  IRRef curins;                /* Reference of current instruction. */
68
  IRRef stopins;        /* Stop assembly before hitting this instruction. */
69
  IRRef orignins;        /* Original T->nins. */
70

71
  IRRef snapref;        /* Current snapshot is active after this reference. */
72
  IRRef snaprename;        /* Rename highwater mark for snapshot check. */
73
  SnapNo snapno;        /* Current snapshot number. */
74
  SnapNo loopsnapno;        /* Loop snapshot number. */
75
  BloomFilter snapfilt1, snapfilt2;        /* Filled with snapshot refs. */
76
  int snapalloc;        /* Current snapshot needs allocation. */
77

78
  IRRef fuseref;        /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
79
  IRRef sectref;        /* Section base reference (loopref or 0). */
80
  IRRef loopref;        /* Reference of LOOP instruction (or 0). */
81

82
  BCReg topslot;        /* Number of slots for stack check (unless 0). */
83
  int32_t gcsteps;        /* Accumulated number of GC steps (per section). */
84

85
  GCtrace *T;                /* Trace to assemble. */
86
  GCtrace *parent;        /* Parent trace (or NULL). */
87

88
  MCode *mcbot;                /* Bottom of reserved MCode. */
89
  MCode *mctop;                /* Top of generated MCode. */
90
  MCode *mctoporig;        /* Original top of generated MCode. */
91
  MCode *mcloop;        /* Pointer to loop MCode (or NULL). */
92
  MCode *invmcp;        /* Points to invertible loop branch (or NULL). */
93
  MCode *flagmcp;        /* Pending opportunity to merge flag setting ins. */
94
  MCode *realign;        /* Realign loop if not NULL. */
95

96
#ifdef RID_NUM_KREF
97
  intptr_t krefk[RID_NUM_KREF];
98
#endif
99
  IRRef1 phireg[RID_MAX];  /* PHI register references. */
100
  uint16_t parentmap[LJ_MAX_JSLOTS];  /* Parent instruction to RegSP map. */
101
} ASMState;
102

103
#ifdef LUA_USE_ASSERT
104
#define lj_assertA(c, ...)        lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
105
#else
106
#define lj_assertA(c, ...)        ((void)as)
107
#endif
108

109
#define IR(ref)                        (&as->ir[(ref)])
110

111
#define ASMREF_TMP1                REF_TRUE        /* Temp. register. */
112
#define ASMREF_TMP2                REF_FALSE        /* Temp. register. */
113
#define ASMREF_L                REF_NIL                /* Stores register for L. */
114

115
/* Check for variant to invariant references. */
116
#define iscrossref(as, ref)        ((ref) < as->sectref)
117

118
/* Inhibit memory op fusion from variant to invariant references. */
119
#define FUSE_DISABLED                (~(IRRef)0)
120
#define mayfuse(as, ref)        ((ref) > as->fuseref)
121
#define neverfuse(as)                (as->fuseref == FUSE_DISABLED)
122
#define canfuse(as, ir)                (!neverfuse(as) && !irt_isphi((ir)->t))
123
#define opisfusableload(o) \
124
  ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \
125
   (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD)
126

127
/* Sparse limit checks using a red zone before the actual limit. */
128
#define MCLIM_REDZONE        64
129

130
static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
44✔
131
{
132
  lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
44✔
133
}
134

135
static LJ_AINLINE void checkmclim(ASMState *as)
633,909✔
136
{
137
#ifdef LUA_USE_ASSERT
138
  if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
139
    IRIns *ir = IR(as->curins+1);
140
    lj_assertA(0, "red zone overflow: %p IR %04d  %02d %04d %04d\n", as->mcp,
141
      as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
142
  }
143
#endif
144
  if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
1,916✔
145
#ifdef LUA_USE_ASSERT
146
  as->mcp_prev = as->mcp;
147
#endif
148
}
149

150
#ifdef RID_NUM_KREF
151
#define ra_iskref(ref)                ((ref) < RID_NUM_KREF)
152
#define ra_krefreg(ref)                ((Reg)(RID_MIN_KREF + (Reg)(ref)))
153
#define ra_krefk(as, ref)        (as->krefk[(ref)])
154

155
static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
156
{
157
  IRRef ref = (IRRef)(r - RID_MIN_KREF);
158
  as->krefk[ref] = k;
159
  as->cost[r] = REGCOST(ref, ref);
160
}
161

162
#else
163
#define ra_iskref(ref)                0
164
#define ra_krefreg(ref)                RID_MIN_GPR
165
#define ra_krefk(as, ref)        0
166
#endif
167

168
/* Arch-specific field offsets. */
169
static const uint8_t field_ofs[IRFL__MAX+1] = {
170
#define FLOFS(name, ofs)        (uint8_t)(ofs),
171
IRFLDEF(FLOFS)
172
#undef FLOFS
173
  0
174
};
175

176
/* -- Target-specific instruction emitter --------------------------------- */
177

178
#if LJ_TARGET_X86ORX64
179
#include "lj_emit_x86.h"
180
#elif LJ_TARGET_ARM
181
#include "lj_emit_arm.h"
182
#elif LJ_TARGET_ARM64
183
#include "lj_emit_arm64.h"
184
#elif LJ_TARGET_PPC
185
#include "lj_emit_ppc.h"
186
#elif LJ_TARGET_MIPS
187
#include "lj_emit_mips.h"
188
#else
189
#error "Missing instruction emitter for target CPU"
190
#endif
191

192
/* Generic load/store of register from/to stack slot. */
193
#define emit_spload(as, ir, r, ofs) \
194
  emit_loadofs(as, ir, (r), RID_SP, (ofs))
195
#define emit_spstore(as, ir, r, ofs) \
196
  emit_storeofs(as, ir, (r), RID_SP, (ofs))
197

198
/* -- Register allocator debugging ---------------------------------------- */
199

200
/* #define LUAJIT_DEBUG_RA */
201

202
#ifdef LUAJIT_DEBUG_RA
203

204
#include <stdio.h>
205
#include <stdarg.h>
206

207
#define RIDNAME(name)        #name,
208
static const char *const ra_regname[] = {
209
  GPRDEF(RIDNAME)
210
  FPRDEF(RIDNAME)
211
  VRIDDEF(RIDNAME)
212
  NULL
213
};
214
#undef RIDNAME
215

216
static char ra_dbg_buf[65536];
217
static char *ra_dbg_p;
218
static char *ra_dbg_merge;
219
static MCode *ra_dbg_mcp;
220

221
static void ra_dstart(void)
222
{
223
  ra_dbg_p = ra_dbg_buf;
224
  ra_dbg_merge = NULL;
225
  ra_dbg_mcp = NULL;
226
}
227

228
static void ra_dflush(void)
229
{
230
  fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout);
231
  ra_dstart();
232
}
233

234
static void ra_dprintf(ASMState *as, const char *fmt, ...)
235
{
236
  char *p;
237
  va_list argp;
238
  va_start(argp, fmt);
239
  p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p;
240
  ra_dbg_mcp = NULL;
241
  p += sprintf(p, "%08x  \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS);
242
  for (;;) {
243
    const char *e = strchr(fmt, '$');
244
    if (e == NULL) break;
245
    memcpy(p, fmt, (size_t)(e-fmt));
246
    p += e-fmt;
247
    if (e[1] == 'r') {
248
      Reg r = va_arg(argp, Reg) & RID_MASK;
249
      if (r <= RID_MAX) {
250
        const char *q;
251
        for (q = ra_regname[r]; *q; q++)
252
          *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
253
      } else {
254
        *p++ = '?';
255
        lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
256
      }
257
    } else if (e[1] == 'f' || e[1] == 'i') {
258
      IRRef ref;
259
      if (e[1] == 'f')
260
        ref = va_arg(argp, IRRef);
261
      else
262
        ref = va_arg(argp, IRIns *) - as->ir;
263
      if (ref >= REF_BIAS)
264
        p += sprintf(p, "%04d", ref - REF_BIAS);
265
      else
266
        p += sprintf(p, "K%03d", REF_BIAS - ref);
267
    } else if (e[1] == 's') {
268
      uint32_t slot = va_arg(argp, uint32_t);
269
      p += sprintf(p, "[sp+0x%x]", sps_scale(slot));
270
    } else if (e[1] == 'x') {
271
      p += sprintf(p, "%08x", va_arg(argp, int32_t));
272
    } else {
273
      lj_assertA(0, "bad debug format code");
274
    }
275
    fmt = e+2;
276
  }
277
  va_end(argp);
278
  while (*fmt)
279
    *p++ = *fmt++;
280
  *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n';
281
  if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) {
282
    fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout);
283
    p = ra_dbg_buf;
284
  }
285
  ra_dbg_p = p;
286
}
287

288
#define RA_DBG_START()        ra_dstart()
289
#define RA_DBG_FLUSH()        ra_dflush()
290
#define RA_DBG_REF() \
291
  do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \
292
       ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0)
293
#define RA_DBGX(x)        ra_dprintf x
294

295
#else
296
#define RA_DBG_START()        ((void)0)
297
#define RA_DBG_FLUSH()        ((void)0)
298
#define RA_DBG_REF()        ((void)0)
299
#define RA_DBGX(x)        ((void)0)
300
#endif
301

302
/* -- Register allocator -------------------------------------------------- */
303

304
#define ra_free(as, r)                rset_set(as->freeset, (r))
305
#define ra_modified(as, r)        rset_set(as->modset, (r))
306
#define ra_weak(as, r)                rset_set(as->weakset, (r))
307
#define ra_noweak(as, r)        rset_clear(as->weakset, (r))
308

309
#define ra_used(ir)                (ra_hasreg((ir)->r) || ra_hasspill((ir)->s))
310

311
/* Setup register allocator. */
312
static void ra_setup(ASMState *as)
5,027✔
313
{
314
  Reg r;
5,027✔
315
  /* Initially all regs (except the stack pointer) are free for use. */
316
  as->freeset = RSET_INIT;
5,027✔
317
  as->modset = RSET_EMPTY;
5,027✔
318
  as->weakset = RSET_EMPTY;
5,027✔
319
  as->phiset = RSET_EMPTY;
5,027✔
320
  memset(as->phireg, 0, sizeof(as->phireg));
5,027✔
321
  for (r = RID_MIN_GPR; r < RID_MAX; r++)
165,891✔
322
    as->cost[r] = REGCOST(~0u, 0u);
160,864✔
323
}
5,027✔
324

325
/* Rematerialize constants. */
326
static Reg ra_rematk(ASMState *as, IRRef ref)
13,901✔
327
{
328
  IRIns *ir;
13,901✔
329
  Reg r;
13,901✔
330
  if (ra_iskref(ref)) {
13,901✔
331
    r = ra_krefreg(ref);
332
    lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
333
    ra_free(as, r);
334
    ra_modified(as, r);
335
#if LJ_64
336
    emit_loadu64(as, r, ra_krefk(as, ref));
337
#else
338
    emit_loadi(as, r, ra_krefk(as, ref));
339
#endif
340
    return r;
341
  }
342
  ir = IR(ref);
13,901✔
343
  r = ir->r;
13,901✔
344
  lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
13,901✔
345
  lj_assertA(!ra_hasspill(ir->s),
13,901✔
346
             "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
347
  ra_free(as, r);
13,901✔
348
  ra_modified(as, r);
13,901✔
349
  ir->r = RID_INIT;  /* Do not keep any hint. */
13,901✔
350
  RA_DBGX((as, "remat     $i $r", ir, r));
13,901✔
351
#if !LJ_SOFTFP32
352
  if (ir->o == IR_KNUM) {
13,901✔
353
    emit_loadk64(as, r, ir);
5,040✔
354
  } else
355
#endif
356
  if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
8,861✔
357
    ra_sethint(ir->r, RID_BASE);  /* Restore BASE register hint. */
1,992✔
358
    emit_getgl(as, r, jit_base);
1,992✔
359
  } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
6,869✔
360
    /* REF_NIL stores ASMREF_L register. */
361
    lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
3,038✔
362
    emit_getgl(as, r, cur_L);
3,038✔
363
#if LJ_64
364
  } else if (ir->o == IR_KINT64) {
3,831✔
365
    emit_loadu64(as, r, ir_kint64(ir)->u64);
34✔
366
#if LJ_GC64
367
  } else if (ir->o == IR_KGC) {
3,797✔
368
    emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
3,658✔
369
  } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
139✔
370
    emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
23✔
371
#endif
372
#endif
373
  } else {
374
    lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
116✔
375
               ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
376
               "rematk of bad IR op %d", ir->o);
377
    emit_loadi(as, r, ir->i);
116✔
378
  }
379
  return r;
13,901✔
380
}
381

382
/* Force a spill. Allocate a new spill slot if needed. */
383
static int32_t ra_spill(ASMState *as, IRIns *ir)
384
{
385
  int32_t slot = ir->s;
386
  lj_assertA(ir >= as->ir + REF_TRUE,
387
             "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
388
  if (!ra_hasspill(slot)) {
389
    if (irt_is64(ir->t)) {
390
      slot = as->evenspill;
391
      as->evenspill += 2;
392
    } else if (as->oddspill) {
393
      slot = as->oddspill;
394
      as->oddspill = 0;
395
    } else {
396
      slot = as->evenspill;
397
      as->oddspill = slot+1;
398
      as->evenspill += 2;
399
    }
400
    if (as->evenspill > 256)
401
      lj_trace_err(as->J, LJ_TRERR_SPILLOV);
402
    ir->s = (uint8_t)slot;
403
  }
404
  return sps_scale(slot);
405
}
406

407
/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
408
static Reg ra_releasetmp(ASMState *as, IRRef ref)
4,532✔
409
{
410
  IRIns *ir = IR(ref);
4,532✔
411
  Reg r = ir->r;
4,532✔
412
  lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
4,532✔
413
  lj_assertA(!ra_hasspill(ir->s),
4,532✔
414
             "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
415
  ra_free(as, r);
4,532✔
416
  ra_modified(as, r);
4,532✔
417
  ir->r = RID_INIT;
4,532✔
418
  return r;
2,375✔
419
}
420

421
/* Restore a register (marked as free). Rematerialize or force a spill. */
422
static Reg ra_restore(ASMState *as, IRRef ref)
12,111✔
423
{
424
  if (emit_canremat(ref)) {
12,111✔
425
    return ra_rematk(as, ref);
6,310✔
426
  } else {
427
    IRIns *ir = IR(ref);
5,801✔
428
    int32_t ofs = ra_spill(as, ir);  /* Force a spill slot. */
5,801✔
429
    Reg r = ir->r;
5,801✔
430
    lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS);
5,801✔
431
    ra_sethint(ir->r, r);  /* Keep hint. */
5,801✔
432
    ra_free(as, r);
5,801✔
433
    if (!rset_test(as->weakset, r)) {  /* Only restore non-weak references. */
5,801✔
434
      ra_modified(as, r);
5,386✔
435
      RA_DBGX((as, "restore   $i $r", ir, r));
5,386✔
436
      emit_spload(as, ir, r, ofs);
5,386✔
437
    }
438
    return r;
5,801✔
439
  }
440
}
441

442
/* Save a register to a spill slot. */
443
static void ra_save(ASMState *as, IRIns *ir, Reg r)
4,372✔
444
{
445
  RA_DBGX((as, "save      $i $r", ir, r));
4,372✔
446
  emit_spstore(as, ir, r, sps_scale(ir->s));
4,372✔
447
}
3,431✔
448

449
#define MINCOST(name) \
450
  if (rset_test(RSET_ALL, RID_##name) && \
451
      LJ_LIKELY(allow&RID2RSET(RID_##name)) && as->cost[RID_##name] < cost) \
452
    cost = as->cost[RID_##name];
453

454
/* Evict the register with the lowest cost, forcing a restore. */
455
static Reg ra_evict(ASMState *as, RegSet allow)
2,137✔
456
{
457
  IRRef ref;
2,137✔
458
  RegCost cost = ~(RegCost)0;
2,137✔
459
  lj_assertA(allow != RSET_EMPTY, "evict from empty set");
2,137✔
460
  if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
2,137✔
461
    GPRDEF(MINCOST)
1,719✔
462
  } else {
463
    FPRDEF(MINCOST)
418✔
464
  }
465
  ref = regcost_ref(cost);
2,137✔
466
  lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins),
2,137✔
467
             "evict of out-of-range IR %04d", ref - REF_BIAS);
468
  /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
469
  if (!irref_isk(ref) && (as->weakset & allow)) {
2,137✔
470
    IRIns *ir = IR(ref);
18✔
471
    if (!rset_test(as->weakset, ir->r))
18✔
472
      ref = regcost_ref(as->cost[rset_pickbot((as->weakset & allow))]);
12✔
473
  }
474
  return ra_restore(as, ref);
2,137✔
475
}
476

477
/* Pick any register (marked as free). Evict on-demand. */
478
static Reg ra_pick(ASMState *as, RegSet allow)
121,799✔
479
{
480
  RegSet pick = as->freeset & allow;
121,799✔
481
  if (!pick)
121,799✔
482
    return ra_evict(as, allow);
20✔
483
  else
484
    return rset_picktop(pick);
121,779✔
485
}
486

487
/* Get a scratch register (marked as free). */
488
static Reg ra_scratch(ASMState *as, RegSet allow)
121,749✔
489
{
490
  Reg r = ra_pick(as, allow);
121,749✔
491
  ra_modified(as, r);
121,749✔
492
  RA_DBGX((as, "scratch        $r", r));
121,749✔
493
  return r;
121,749✔
494
}
495

496
/* Evict all registers from a set (if not free). */
497
static void ra_evictset(ASMState *as, RegSet drop)
6,912✔
498
{
499
  RegSet work;
6,912✔
500
  as->modset |= drop;
6,912✔
501
#if !LJ_SOFTFP
502
  work = (drop & ~as->freeset) & RSET_FPR;
6,912✔
503
  while (work) {
6,912✔
504
    Reg r = rset_pickbot(work);
2,947✔
505
    ra_restore(as, regcost_ref(as->cost[r]));
2,947✔
506
    rset_clear(work, r);
2,947✔
507
    checkmclim(as);
9,859✔
508
  }
509
#endif
510
  work = (drop & ~as->freeset);
6,912✔
511
  while (work) {
6,912✔
512
    Reg r = rset_pickbot(work);
6,787✔
513
    ra_restore(as, regcost_ref(as->cost[r]));
6,787✔
514
    rset_clear(work, r);
6,787✔
515
    checkmclim(as);
13,699✔
516
  }
517
}
6,911✔
518

519
/* Evict (rematerialize) all registers allocated to constants. */
520
static void ra_evictk(ASMState *as)
3,873✔
521
{
522
  RegSet work;
3,873✔
523
#if !LJ_SOFTFP
524
  work = ~as->freeset & RSET_FPR;
3,873✔
525
  while (work) {
7,497✔
526
    Reg r = rset_pickbot(work);
3,624✔
527
    IRRef ref = regcost_ref(as->cost[r]);
3,624✔
528
    if (emit_canremat(ref) && irref_isk(ref)) {
3,624✔
529
      ra_rematk(as, ref);
3,258✔
530
      checkmclim(as);
3,258✔
531
    }
532
    rset_clear(work, r);
3,624✔
533
  }
534
#endif
535
  work = ~as->freeset & RSET_GPR;
3,873✔
536
  while (work) {
12,641✔
537
    Reg r = rset_pickbot(work);
8,768✔
538
    IRRef ref = regcost_ref(as->cost[r]);
8,768✔
539
    if (emit_canremat(ref) && irref_isk(ref)) {
8,768✔
540
      ra_rematk(as, ref);
4,112✔
541
      checkmclim(as);
4,112✔
542
    }
543
    rset_clear(work, r);
8,768✔
544
  }
545
}
3,873✔
546

547
#ifdef RID_NUM_KREF
548
/* Allocate a register for a constant. */
549
static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
550
{
551
  /* First try to find a register which already holds the same constant. */
552
  RegSet pick, work = ~as->freeset & RSET_GPR;
553
  Reg r;
554
  while (work) {
555
    IRRef ref;
556
    r = rset_pickbot(work);
557
    ref = regcost_ref(as->cost[r]);
558
#if LJ_64
559
    if (ref < ASMREF_L) {
560
      if (ra_iskref(ref)) {
561
        if (k == ra_krefk(as, ref))
562
          return r;
563
      } else {
564
        IRIns *ir = IR(ref);
565
        if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
566
#if LJ_GC64
567
            (ir->o == IR_KINT && k == ir->i) ||
568
            (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
569
            ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
570
             k == (intptr_t)ir_kptr(ir))
571
#else
572
            (ir->o != IR_KINT64 && k == ir->i)
573
#endif
574
           )
575
          return r;
576
      }
577
    }
578
#else
579
    if (ref < ASMREF_L &&
580
        k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
581
      return r;
582
#endif
583
    rset_clear(work, r);
584
  }
585
  pick = as->freeset & allow;
586
  if (pick) {
587
    /* Constants should preferably get unmodified registers. */
588
    if ((pick & ~as->modset))
589
      pick &= ~as->modset;
590
    r = rset_pickbot(pick);  /* Reduce conflicts with inverse allocation. */
591
  } else {
592
    r = ra_evict(as, allow);
593
  }
594
  RA_DBGX((as, "allock    $x $r", k, r));
595
  ra_setkref(as, r, k);
596
  rset_clear(as->freeset, r);
597
  ra_noweak(as, r);
598
  return r;
599
}
600

601
/* Allocate a specific register for a constant. */
602
static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
603
{
604
  Reg kr = ra_allock(as, k, RID2RSET(r));
605
  if (kr != r) {
606
    IRIns irdummy;
607
    irdummy.t.irt = IRT_INT;
608
    ra_scratch(as, RID2RSET(r));
609
    emit_movrr(as, &irdummy, r, kr);
610
  }
611
}
612
#else
613
#define ra_allockreg(as, k, r)                emit_loadi(as, (r), (k))
614
#endif
615

616
/* Allocate a register for ref from the allowed set of registers.
617
** Note: this function assumes the ref does NOT have a register yet!
618
** Picks an optimal register, sets the cost and marks the register as non-free.
619
*/
620
static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
409,638✔
621
{
622
  IRIns *ir = IR(ref);
409,638✔
623
  RegSet pick = as->freeset & allow;
409,638✔
624
  Reg r;
409,638✔
625
  lj_assertA(ra_noreg(ir->r),
409,638✔
626
             "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
627
  if (pick) {
409,638✔
628
    /* First check register hint from propagation or PHI. */
629
    if (ra_hashint(ir->r)) {
407,521✔
630
      r = ra_gethint(ir->r);
63,828✔
631
      if (rset_test(pick, r))  /* Use hint register if possible. */
63,828✔
632
        goto found;
57,677✔
633
      /* Rematerialization is cheaper than missing a hint. */
634
      if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) {
6,151✔
635
        ra_rematk(as, regcost_ref(as->cost[r]));
221✔
636
        goto found;
221✔
637
      }
638
      RA_DBGX((as, "hintmiss  $f $r", ref, r));
349,623✔
639
    }
640
    /* Invariants should preferably get unmodified registers. */
641
    if (ref < as->loopref && !irt_isphi(ir->t)) {
349,623✔
642
      if ((pick & ~as->modset))
15,395✔
643
        pick &= ~as->modset;
8,345✔
644
      r = rset_pickbot(pick);  /* Reduce conflicts with inverse allocation. */
15,395✔
645
    } else {
646
      /* We've got plenty of regs, so get callee-save regs if possible. */
647
      if (RID_NUM_GPR > 8 && (pick & ~RSET_SCRATCH))
334,228✔
648
        pick &= ~RSET_SCRATCH;
277,010✔
649
      r = rset_picktop(pick);
334,228✔
650
    }
651
  } else {
652
    r = ra_evict(as, allow);
2,117✔
653
  }
654
found:
409,638✔
655
  RA_DBGX((as, "alloc     $f $r", ref, r));
409,638✔
656
  ir->r = (uint8_t)r;
409,638✔
657
  rset_clear(as->freeset, r);
409,638✔
658
  ra_noweak(as, r);
409,638✔
659
  as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t));
409,638✔
660
  return r;
409,638✔
661
}
662

663
/* Allocate a register on-demand. */
664
static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
474,799✔
665
{
666
  Reg r = IR(ref)->r;
474,799✔
667
  /* Note: allow is ignored if the register is already allocated. */
668
  if (ra_noreg(r)) r = ra_allocref(as, ref, allow);
474,799✔
669
  ra_noweak(as, r);
474,799✔
670
  return r;
474,799✔
671
}
672

673
/* Add a register rename to the IR. */
674
static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
675
{
676
  IRRef ren;
677
  lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
678
  ren = tref_ref(lj_ir_emit(as->J));
679
  as->J->cur.ir[ren].r = (uint8_t)down;
680
  as->J->cur.ir[ren].s = SPS_NONE;
681
}
682

683
/* Rename register allocation and emit move. */
684
static void ra_rename(ASMState *as, Reg down, Reg up)
726✔
685
{
686
  IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
726✔
687
  IRIns *ir = IR(ref);
726✔
688
  ir->r = (uint8_t)up;
726✔
689
  as->cost[down] = 0;
726✔
690
  lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
726✔
691
             "rename between GPR/FPR %d and %d", down, up);
692
  lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down);
726✔
693
  lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
726✔
694
  ra_free(as, down);  /* 'down' is free ... */
726✔
695
  ra_modified(as, down);
726✔
696
  rset_clear(as->freeset, up);  /* ... and 'up' is now allocated. */
726✔
697
  ra_noweak(as, up);
726✔
698
  RA_DBGX((as, "rename    $f $r $r", regcost_ref(as->cost[up]), down, up));
726✔
699
  emit_movrr(as, ir, down, up);  /* Backwards codegen needs inverse move. */
726✔
700
  if (!ra_hasspill(IR(ref)->s)) {  /* Add the rename to the IR. */
726✔
701
    /*
702
    ** The rename is effective at the subsequent (already emitted) exit
703
    ** branch. This is for the current snapshot (as->snapno). Except if we
704
    ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
705
    ** then it belongs to the next snapshot.
706
    ** See also the discussion at asm_snap_checkrename().
707
    */
708
    ra_addrename(as, down, ref, as->snapno + as->snapalloc);
701✔
709
  }
710
}
726✔
711

712
/* Pick a destination register (marked as free).
713
** Caveat: allow is ignored if there's already a destination register.
714
** Use ra_destreg() to get a specific register.
715
*/
716
static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow)
378,852✔
717
{
718
  Reg dest = ir->r;
378,852✔
719
  if (ra_hasreg(dest)) {
378,852✔
720
    ra_free(as, dest);
375,248✔
721
    ra_modified(as, dest);
375,248✔
722
  } else {
723
    if (ra_hashint(dest) && rset_test((as->freeset&allow), ra_gethint(dest))) {
3,604✔
724
      dest = ra_gethint(dest);
754✔
725
      ra_modified(as, dest);
754✔
726
      RA_DBGX((as, "dest           $r", dest));
754✔
727
    } else {
728
      dest = ra_scratch(as, allow);
2,850✔
729
    }
730
    ir->r = dest;
3,604✔
731
  }
732
  if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest);
378,852✔
733
  return dest;
378,852✔
734
}
735

736
/* Force a specific destination register (marked as free). */
737
static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
5,894✔
738
{
739
  Reg dest = ra_dest(as, ir, RID2RSET(r));
5,894✔
740
  if (dest != r) {
5,894✔
741
    lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
2,571✔
742
    ra_modified(as, r);
2,571✔
743
    emit_movrr(as, ir, dest, r);
2,571✔
744
  }
745
}
5,894✔
746

747
#if LJ_TARGET_X86ORX64
748
/* Propagate dest register to left reference. Emit moves as needed.
749
** This is a required fixup step for all 2-operand machine instructions.
750
*/
751
static void ra_left(ASMState *as, Reg dest, IRRef lref)
49,718✔
752
{
753
  IRIns *ir = IR(lref);
49,718✔
754
  Reg left = ir->r;
49,718✔
755
  if (ra_noreg(left)) {
49,718✔
756
    if (irref_isk(lref)) {
48,635✔
757
      if (ir->o == IR_KNUM) {
903✔
758
        /* FP remat needs a load except for +0. Still better than eviction. */
759
        if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
49✔
760
          emit_loadk64(as, dest, ir);
42✔
761
          return;
42✔
762
        }
763
#if LJ_64
764
      } else if (ir->o == IR_KINT64) {
854✔
765
        emit_loadk64(as, dest, ir);
5✔
766
        return;
5✔
767
#if LJ_GC64
768
      } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
849✔
769
        emit_loadk64(as, dest, ir);
777✔
770
        return;
777✔
771
#endif
772
#endif
773
      } else if (ir->o != IR_KPRI) {
72✔
774
        lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
71✔
775
                   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
776
                   "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
777
        emit_loadi(as, dest, ir->i);
71✔
778
        return;
71✔
779
      }
780
    }
781
    if (!ra_hashint(left) && !iscrossref(as, lref))
47,740✔
782
      ra_sethint(ir->r, dest);  /* Propagate register hint. */
44,570✔
783
    left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR);
90,866✔
784
  }
785
  ra_noweak(as, left);
48,823✔
786
  /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */
787
  if (dest != left) {
48,823✔
788
    /* Use register renaming if dest is the PHI reg. */
789
    if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
1,107✔
790
      ra_modified(as, left);
129✔
791
      ra_rename(as, left, dest);
129✔
792
    } else {
793
      emit_movrr(as, ir, dest, left);
978✔
794
    }
795
  }
796
}
797
#else
798
/* Similar to ra_left, except we override any hints. */
799
static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
800
{
801
  IRIns *ir = IR(lref);
802
  Reg left = ir->r;
803
  if (ra_noreg(left)) {
804
    ra_sethint(ir->r, dest);  /* Propagate register hint. */
805
    left = ra_allocref(as, lref,
806
                       (LJ_SOFTFP || dest < RID_MAX_GPR) ? RSET_GPR : RSET_FPR);
807
  }
808
  ra_noweak(as, left);
809
  if (dest != left) {
810
    /* Use register renaming if dest is the PHI reg. */
811
    if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
812
      ra_modified(as, left);
813
      ra_rename(as, left, dest);
814
    } else {
815
      emit_movrr(as, ir, dest, left);
816
    }
817
  }
818
}
819
#endif
820

821
#if !LJ_64
822
/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
823
static void ra_destpair(ASMState *as, IRIns *ir)
824
{
825
  Reg destlo = ir->r, desthi = (ir+1)->r;
826
  /* First spill unrelated refs blocking the destination registers. */
827
  if (!rset_test(as->freeset, RID_RETLO) &&
828
      destlo != RID_RETLO && desthi != RID_RETLO)
829
    ra_restore(as, regcost_ref(as->cost[RID_RETLO]));
830
  if (!rset_test(as->freeset, RID_RETHI) &&
831
      destlo != RID_RETHI && desthi != RID_RETHI)
832
    ra_restore(as, regcost_ref(as->cost[RID_RETHI]));
833
  /* Next free the destination registers (if any). */
834
  if (ra_hasreg(destlo)) {
835
    ra_free(as, destlo);
836
    ra_modified(as, destlo);
837
  } else {
838
    destlo = RID_RETLO;
839
  }
840
  if (ra_hasreg(desthi)) {
841
    ra_free(as, desthi);
842
    ra_modified(as, desthi);
843
  } else {
844
    desthi = RID_RETHI;
845
  }
846
  /* Check for conflicts and shuffle the registers as needed. */
847
  if (destlo == RID_RETHI) {
848
    if (desthi == RID_RETLO) {
849
#if LJ_TARGET_X86
850
      *--as->mcp = XI_XCHGa + RID_RETHI;
851
#else
852
      emit_movrr(as, ir, RID_RETHI, RID_TMP);
853
      emit_movrr(as, ir, RID_RETLO, RID_RETHI);
854
      emit_movrr(as, ir, RID_TMP, RID_RETLO);
855
#endif
856
    } else {
857
      emit_movrr(as, ir, RID_RETHI, RID_RETLO);
858
      if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
859
    }
860
  } else if (desthi == RID_RETLO) {
861
    emit_movrr(as, ir, RID_RETLO, RID_RETHI);
862
    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
863
  } else {
864
    if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
865
    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
866
  }
867
  /* Restore spill slots (if any). */
868
  if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
869
  if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
870
}
871
#endif
872

873
/* -- Snapshot handling --------- ----------------------------------------- */
874

875
/* Can we rematerialize a KNUM instead of forcing a spill? */
876
static int asm_snap_canremat(ASMState *as)
877
{
878
  Reg r;
879
  for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++)
340✔
880
    if (irref_isk(regcost_ref(as->cost[r])))
320✔
881
      return 1;
882
  return 0;
883
}
884

885
/* Check whether a sunk store corresponds to an allocation. */
886
static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
887
{
888
  if (irs->s == 255) {
889
    if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
890
        irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
891
      IRIns *irk = IR(irs->op1);
892
      if (irk->o == IR_AREF || irk->o == IR_HREFK)
893
        irk = IR(irk->op1);
894
      return (IR(irk->op1) == ira);
895
    }
896
    return 0;
897
  } else {
898
    return (ira + irs->s == irs);  /* Quick check. */
899
  }
900
}
901

902
/* Allocate register or spill slot for a ref that escapes to a snapshot. */
903
static void asm_snap_alloc1(ASMState *as, IRRef ref)
61,745✔
904
{
905
  IRIns *ir = IR(ref);
61,996✔
906
  if (!irref_isk(ref) && ir->r != RID_SUNK) {
61,996✔
907
    bloomset(as->snapfilt1, ref);
61,629✔
908
    bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
61,629✔
909
    if (ra_used(ir)) return;
61,629✔
910
    if (ir->r == RID_SINK) {
4,853✔
911
      ir->r = RID_SUNK;
337✔
912
#if LJ_HASFFI
913
      if (ir->o == IR_CNEWI) {  /* Allocate CNEWI value. */
337✔
914
        asm_snap_alloc1(as, ir->op2);
215✔
915
        if (LJ_32 && (ir+1)->o == IR_HIOP)
916
          asm_snap_alloc1(as, (ir+1)->op2);
917
      } else
918
#endif
919
      {  /* Allocate stored values for TNEW, TDUP and CNEW. */
920
        IRIns *irs;
122✔
921
        lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
122✔
922
                   "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
923
        for (irs = IR(as->snapref-1); irs > ir; irs--)
1,428✔
924
          if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
1,306✔
925
            lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
246✔
926
                       irs->o == IR_FSTORE || irs->o == IR_XSTORE,
927
                       "sunk store IR %04d has bad op %d",
928
                       (int)(irs - as->ir) - REF_BIAS, irs->o);
929
            asm_snap_alloc1(as, irs->op2);
246✔
930
            if (LJ_32 && (irs+1)->o == IR_HIOP)
246✔
931
              asm_snap_alloc1(as, (irs+1)->op2);
932
          }
933
      }
934
    } else {
935
      RegSet allow;
4,516✔
936
      if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) {
4,516✔
937
        IRIns *irc;
54✔
938
        for (irc = IR(as->curins); irc > ir; irc--)
468✔
939
          if ((irc->op1 == ref || irc->op2 == ref) &&
432✔
940
              !(irc->r == RID_SINK || irc->r == RID_SUNK))
54✔
941
            goto nosink;  /* Don't sink conversion if result is used. */
18✔
942
        asm_snap_alloc1(as, ir->op1);
36✔
943
        return;
36✔
944
      }
945
    nosink:
4,462✔
946
      allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR;
4,480✔
947
      if ((as->freeset & allow) ||
4,480✔
948
               (allow == RSET_FPR && asm_snap_canremat(as))) {
20✔
949
        /* Get a weak register if we have a free one or can rematerialize. */
950
        Reg r = ra_allocref(as, ref, allow);  /* Allocate a register. */
4,343✔
951
        if (!irt_isphi(ir->t))
4,343✔
952
          ra_weak(as, r);  /* But mark it as weakly referenced. */
3,716✔
953
        checkmclim(as);
4,343✔
954
        RA_DBGX((as, "snapreg   $f $r", ref, ir->r));
955
      } else {
956
        ra_spill(as, ir);  /* Otherwise force a spill slot. */
137✔
957
        RA_DBGX((as, "snapspill $f $s", ref, ir->s));
61,745✔
958
      }
959
    }
960
  }
961
}
962

963
/* Allocate refs escaping to a snapshot. */
964
static void asm_snap_alloc(ASMState *as, int snapno)
53,994✔
965
{
966
  SnapShot *snap = &as->T->snap[snapno];
53,994✔
967
  SnapEntry *map = &as->T->snapmap[snap->mapofs];
53,994✔
968
  MSize n, nent = snap->nent;
53,994✔
969
  as->snapfilt1 = as->snapfilt2 = 0;
53,994✔
970
  for (n = 0; n < nent; n++) {
134,354✔
971
    SnapEntry sn = map[n];
80,360✔
972
    IRRef ref = snap_ref(sn);
80,360✔
973
    if (!irref_isk(ref)) {
80,360✔
974
      asm_snap_alloc1(as, ref);
61,499✔
975
      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
61,499✔
976
        lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
977
                   "snap %d[%d] points to bad SOFTFP IR %04d",
978
                   snapno, n, ref - REF_BIAS);
979
        asm_snap_alloc1(as, ref+1);
980
      }
981
    }
982
  }
983
}
53,994✔
984

985
/* All guards for a snapshot use the same exitno. This is currently the
986
** same as the snapshot number. Since the exact origin of the exit cannot
987
** be determined, all guards for the same snapshot must exit with the same
988
** RegSP mapping.
989
** A renamed ref which has been used in a prior guard for the same snapshot
990
** would cause an inconsistency. The easy way out is to force a spill slot.
991
*/
992
static int asm_snap_checkrename(ASMState *as, IRRef ren)
40✔
993
{
994
  if (bloomtest(as->snapfilt1, ren) &&
40✔
995
      bloomtest(as->snapfilt2, hashrot(ren, ren + HASH_BIAS))) {
40✔
996
    IRIns *ir = IR(ren);
40✔
997
    ra_spill(as, ir);  /* Register renamed, so force a spill slot. */
40✔
998
    RA_DBGX((as, "snaprensp $f $s", ren, ir->s));
40✔
999
    return 1;  /* Found. */
40✔
1000
  }
1001
  return 0;  /* Not found. */
1002
}
1003

1004
/* Prepare snapshot for next guard or throwing instruction. */
1005
static void asm_snap_prep(ASMState *as)
266,446✔
1006
{
1007
  if (as->snapalloc) {
266,446✔
1008
    /* Alloc on first invocation for each snapshot. */
1009
    as->snapalloc = 0;
53,955✔
1010
    asm_snap_alloc(as, as->snapno);
53,955✔
1011
    as->snaprename = as->T->nins;
53,955✔
1012
  } else {
1013
    /* Check any renames above the highwater mark. */
1014
    for (; as->snaprename < as->T->nins; as->snaprename++) {
212,531✔
1015
      IRIns *ir = &as->T->ir[as->snaprename];
40✔
1016
      if (asm_snap_checkrename(as, ir->op1))
40✔
1017
        ir->op2 = REF_BIAS-1;  /* Kill rename. */
40✔
1018
    }
1019
  }
1020
}
266,446✔
1021

1022
/* Move to previous snapshot when we cross the current snapshot ref. */
1023
static void asm_snap_prev(ASMState *as)
619,436✔
1024
{
1025
  if (as->curins < as->snapref) {
619,436✔
1026
    ptrdiff_t ofs = as->mctoporig - as->mcp;
56,307✔
1027
    if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
56,307✔
1028
    do {
58,916✔
1029
      if (as->snapno == 0) return;
58,916✔
1030
      as->snapno--;
56,818✔
1031
      as->snapref = as->T->snap[as->snapno].ref;
56,818✔
1032
      as->T->snap[as->snapno].mcofs = ofs;  /* Remember mcode offset. */
56,818✔
1033
    } while (as->curins < as->snapref);  /* May have no ins inbetween. */
56,818✔
1034
    as->snapalloc = 1;
54,209✔
1035
  }
1036
}
1037

1038
/* Fixup snapshot mcode offsetst. */
1039
static void asm_snap_fixup_mcofs(ASMState *as)
3,739✔
1040
{
1041
  uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
3,739✔
1042
  SnapShot *snap = as->T->snap;
3,739✔
1043
  SnapNo i;
3,739✔
1044
  for (i = as->T->nsnap-1; i > 0; i--) {
53,320✔
1045
    /* Compute offset from mcode start and store in correct snapshot. */
1046
    snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
49,581✔
1047
  }
1048
  snap[0].mcofs = 0;
3,739✔
1049
}
3,739✔
1050

1051
/* -- Miscellaneous helpers ----------------------------------------------- */
1052

1053
/* Calculate stack adjustment. */
1054
static int32_t asm_stack_adjust(ASMState *as)
3,873✔
1055
{
1056
  if (as->evenspill <= SPS_FIXED)
3,873✔
1057
    return 0;
1058
  return sps_scale(sps_align(as->evenspill));
659✔
1059
}
1060

1061
/* Must match with hash*() in lj_tab.c. */
1062
static uint32_t ir_khash(ASMState *as, IRIns *ir)
1063
{
1064
  uint32_t lo, hi;
1065
  UNUSED(as);
1066
  if (irt_isstr(ir->t)) {
1067
    return ir_kstr(ir)->hash;
1068
  } else if (irt_isnum(ir->t)) {
1069
    lo = ir_knum(ir)->u32.lo;
1070
    hi = ir_knum(ir)->u32.hi << 1;
1071
  } else if (irt_ispri(ir->t)) {
1072
    lj_assertA(!irt_isnil(ir->t), "hash of nil key");
1073
    return irt_type(ir->t)-IRT_FALSE;
1074
  } else {
1075
    lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t));
1076
    lo = u32ptr(ir_kgc(ir));
1077
#if LJ_GC64
1078
    hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1079
#else
1080
    hi = lo + HASH_BIAS;
1081
#endif
1082
  }
1083
  return hashrot(lo, hi);
1084
}
1085

1086
/* -- Allocations --------------------------------------------------------- */
1087

1088
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args);
1089
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci);
1090

1091
static void asm_snew(ASMState *as, IRIns *ir)
105✔
1092
{
1093
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
105✔
1094
  IRRef args[3];
105✔
1095
  asm_snap_prep(as);
105✔
1096
  args[0] = ASMREF_L;  /* lua_State *L    */
105✔
1097
  args[1] = ir->op1;   /* const char *str */
105✔
1098
  args[2] = ir->op2;   /* size_t len      */
105✔
1099
  as->gcsteps++;
105✔
1100
  asm_setupresult(as, ir, ci);  /* GCstr * */
105✔
1101
  asm_gencall(as, ci, args);
105✔
1102
}
105✔
1103

1104
static void asm_tnew(ASMState *as, IRIns *ir)
182✔
1105
{
1106
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
182✔
1107
  IRRef args[2];
182✔
1108
  asm_snap_prep(as);
182✔
1109
  args[0] = ASMREF_L;     /* lua_State *L    */
182✔
1110
  args[1] = ASMREF_TMP1;  /* uint32_t ahsize */
182✔
1111
  as->gcsteps++;
182✔
1112
  asm_setupresult(as, ir, ci);  /* GCtab * */
182✔
1113
  asm_gencall(as, ci, args);
182✔
1114
  ra_allockreg(as, ir->op1 | (ir->op2 << 24), ra_releasetmp(as, ASMREF_TMP1));
182✔
1115
}
182✔
1116

1117
static void asm_tdup(ASMState *as, IRIns *ir)
53✔
1118
{
1119
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
53✔
1120
  IRRef args[2];
53✔
1121
  asm_snap_prep(as);
53✔
1122
  args[0] = ASMREF_L;  /* lua_State *L    */
53✔
1123
  args[1] = ir->op1;   /* const GCtab *kt */
53✔
1124
  as->gcsteps++;
53✔
1125
  asm_setupresult(as, ir, ci);  /* GCtab * */
53✔
1126
  asm_gencall(as, ci, args);
53✔
1127
}
53✔
1128

1129
static void asm_gc_check(ASMState *as);
1130

1131
/* Explicit GC step. */
1132
static void asm_gcstep(ASMState *as, IRIns *ir)
102✔
1133
{
1134
  IRIns *ira;
102✔
1135
  for (ira = IR(as->stopins+1); ira < ir; ira++)
1,645✔
1136
    if ((ira->o == IR_TNEW || ira->o == IR_TDUP ||
1,543✔
1137
         (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI))) &&
1,492✔
1138
        ra_used(ira))
1,492✔
1139
      as->gcsteps++;
1,467✔
1140
  if (as->gcsteps)
102✔
1141
    asm_gc_check(as);
96✔
1142
  as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
102✔
1143
}
102✔
1144

1145
/* -- Buffer operations --------------------------------------------------- */
1146

1147
static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1148

1149
static void asm_bufhdr(ASMState *as, IRIns *ir)
780✔
1150
{
1151
  Reg sb = ra_dest(as, ir, RSET_GPR);
780✔
1152
  if ((ir->op2 & IRBUFHDR_APPEND)) {
780✔
1153
    /* Rematerialize const buffer pointer instead of likely spill. */
1154
    IRIns *irp = IR(ir->op1);
47✔
1155
    if (!(ra_hasreg(irp->r) || irp == ir-1 ||
47✔
1156
          (irp == ir-2 && !ra_used(ir-1)))) {
47✔
1157
      while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
150✔
1158
        irp = IR(irp->op1);
106✔
1159
      if (irref_isk(irp->op1)) {
44✔
1160
        ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
44✔
1161
        ir = irp;
44✔
1162
      }
1163
    }
1164
  } else {
1165
    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
733✔
1166
    /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
1167
    emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
733✔
1168
    emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
733✔
1169
  }
1170
#if LJ_TARGET_X86ORX64
1171
  ra_left(as, sb, ir->op1);
780✔
1172
#else
1173
  ra_leftov(as, sb, ir->op1);
1174
#endif
1175
}
780✔
1176

1177
static void asm_bufput(ASMState *as, IRIns *ir)
1,379✔
1178
{
1179
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1,379✔
1180
  IRRef args[3];
1,379✔
1181
  IRIns *irs;
1,379✔
1182
  int kchar = -129;
1,379✔
1183
  args[0] = ir->op1;  /* SBuf * */
1,379✔
1184
  args[1] = ir->op2;  /* GCstr * */
1,379✔
1185
  irs = IR(ir->op2);
1,379✔
1186
  lj_assertA(irt_isstr(irs->t),
1,379✔
1187
             "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
1188
  if (irs->o == IR_KGC) {
1,379✔
1189
    GCstr *s = ir_kstr(irs);
532✔
1190
    if (s->len == 1) {  /* Optimize put of single-char string constant. */
532✔
1191
      kchar = (int8_t)strdata(s)[0];  /* Signed! */
161✔
1192
      args[1] = ASMREF_TMP1;  /* int, truncated to char */
161✔
1193
      ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
161✔
1194
    }
1195
  } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
847✔
1196
    if (irs->o == IR_TOSTR) {  /* Fuse number to string conversions. */
713✔
1197
      if (irs->op2 == IRTOSTR_NUM) {
246✔
1198
        args[1] = ASMREF_TMP1;  /* TValue * */
12✔
1199
        ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
12✔
1200
      } else {
1201
        lj_assertA(irt_isinteger(IR(irs->op1)->t),
234✔
1202
                   "TOSTR of non-numeric IR %04d", irs->op1);
1203
        args[1] = irs->op1;  /* int */
234✔
1204
        if (irs->op2 == IRTOSTR_INT)
234✔
1205
          ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1206
        else
1207
          ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
170✔
1208
      }
1209
    } else if (irs->o == IR_SNEW) {  /* Fuse string allocation. */
467✔
1210
      args[1] = irs->op1;  /* const void * */
4✔
1211
      args[2] = irs->op2;  /* MSize */
4✔
1212
      ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
4✔
1213
    }
1214
  }
1215
  asm_setupresult(as, ir, ci);  /* SBuf * */
1,379✔
1216
  asm_gencall(as, ci, args);
1,379✔
1217
  if (args[1] == ASMREF_TMP1) {
1,379✔
1218
    Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
173✔
1219
    if (kchar == -129)
173✔
1220
      asm_tvptr(as, tmp, irs->op1);
12✔
1221
    else
1222
      ra_allockreg(as, kchar, tmp);
161✔
1223
  }
1224
}
1,379✔
1225

1226
static void asm_bufstr(ASMState *as, IRIns *ir)
766✔
1227
{
1228
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
766✔
1229
  IRRef args[1];
766✔
1230
  args[0] = ir->op1;  /* SBuf *sb */
766✔
1231
  as->gcsteps++;
766✔
1232
  asm_setupresult(as, ir, ci);  /* GCstr * */
766✔
1233
  asm_gencall(as, ci, args);
766✔
1234
}
766✔
1235

1236
/* -- Type conversions ---------------------------------------------------- */
1237

1238
static void asm_tostr(ASMState *as, IRIns *ir)
43✔
1239
{
1240
  const CCallInfo *ci;
43✔
1241
  IRRef args[2];
43✔
1242
  asm_snap_prep(as);
43✔
1243
  args[0] = ASMREF_L;
43✔
1244
  as->gcsteps++;
43✔
1245
  if (ir->op2 == IRTOSTR_NUM) {
43✔
1246
    args[1] = ASMREF_TMP1;  /* cTValue * */
24✔
1247
    ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
24✔
1248
  } else {
1249
    args[1] = ir->op1;  /* int32_t k */
19✔
1250
    if (ir->op2 == IRTOSTR_INT)
19✔
1251
      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1252
    else
1253
      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
8✔
1254
  }
1255
  asm_setupresult(as, ir, ci);  /* GCstr * */
43✔
1256
  asm_gencall(as, ci, args);
43✔
1257
  if (ir->op2 == IRTOSTR_NUM)
43✔
1258
    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
24✔
1259
}
43✔
1260

1261
#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1262
static void asm_conv64(ASMState *as, IRIns *ir)
1263
{
1264
  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1265
  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1266
  IRCallID id;
1267
  IRRef args[2];
1268
  lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1269
             "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1270
  args[LJ_BE] = (ir-1)->op1;
1271
  args[LJ_LE] = ir->op1;
1272
  if (st == IRT_NUM || st == IRT_FLOAT) {
1273
    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1274
    ir--;
1275
  } else {
1276
    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1277
  }
1278
  {
1279
#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1280
    CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1281
    cim.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
1282
#else
1283
    const CCallInfo *ci = &lj_ir_callinfo[id];
1284
#endif
1285
    asm_setupresult(as, ir, ci);
1286
    asm_gencall(as, ci, args);
1287
  }
1288
}
1289
#endif
1290

1291
/* -- Memory references --------------------------------------------------- */
1292

1293
static void asm_newref(ASMState *as, IRIns *ir)
1,060✔
1294
{
1295
  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1,060✔
1296
  IRRef args[3];
1,060✔
1297
  if (ir->r == RID_SINK)
1,060✔
1298
    return;
33✔
1299
  asm_snap_prep(as);
1,027✔
1300
  args[0] = ASMREF_L;     /* lua_State *L */
1,027✔
1301
  args[1] = ir->op1;      /* GCtab *t     */
1,027✔
1302
  args[2] = ASMREF_TMP1;  /* cTValue *key */
1,027✔
1303
  asm_setupresult(as, ir, ci);  /* TValue * */
1,027✔
1304
  asm_gencall(as, ci, args);
1,027✔
1305
  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1,027✔
1306
}
1307

1308
static void asm_lref(ASMState *as, IRIns *ir)
2✔
1309
{
1310
  Reg r = ra_dest(as, ir, RSET_GPR);
2✔
1311
#if LJ_TARGET_X86ORX64
1312
  ra_left(as, r, ASMREF_L);
2✔
1313
#else
1314
  ra_leftov(as, r, ASMREF_L);
1315
#endif
1316
}
2✔
1317

1318
/* -- Calls --------------------------------------------------------------- */
1319

1320
/* Collect arguments from CALL* and CARG instructions. */
1321
static void asm_collectargs(ASMState *as, IRIns *ir,
1322
                            const CCallInfo *ci, IRRef *args)
1323
{
1324
  uint32_t n = CCI_XNARGS(ci);
1325
  /* Account for split args. */
1326
  lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
1327
  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1328
  while (n-- > 1) {
1329
    ir = IR(ir->op1);
1330
    lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
1331
    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1332
  }
1333
  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1334
  lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
1335
}
1336

1337
/* Reconstruct CCallInfo flags for CALLX*. */
1338
static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1339
{
1340
  uint32_t nargs = 0;
1341
  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
1342
    IRIns *ira = IR(ir->op1);
1343
    nargs++;
1344
    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1345
  }
1346
#if LJ_HASFFI
1347
  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
1348
    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1349
    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1350
    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1351
#if LJ_TARGET_X86
1352
    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1353
#endif
1354
  }
1355
#endif
1356
  return (nargs | (ir->t.irt << CCI_OTSHIFT));
1357
}
1358

1359
static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
303✔
1360
{
1361
  const CCallInfo *ci = &lj_ir_callinfo[id];
303✔
1362
  IRRef args[2];
303✔
1363
  args[0] = ir->op1;
303✔
1364
  args[1] = ir->op2;
303✔
1365
  asm_setupresult(as, ir, ci);
303✔
1366
  asm_gencall(as, ci, args);
302✔
1367
}
302✔
1368

1369
static void asm_call(ASMState *as, IRIns *ir)
496✔
1370
{
1371
  IRRef args[CCI_NARGS_MAX];
496✔
1372
  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
496✔
1373
  asm_collectargs(as, ir, ci, args);
496✔
1374
  asm_setupresult(as, ir, ci);
496✔
1375
  asm_gencall(as, ci, args);
496✔
1376
}
496✔
1377

1378
/* -- PHI and loop handling ----------------------------------------------- */
1379

1380
/* Break a PHI cycle by renaming to a free register (evict if needed). */
1381
static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby,
92✔
1382
                          RegSet allow)
1383
{
1384
  RegSet candidates = blocked & allow;
92✔
1385
  if (candidates) {  /* If this register file has candidates. */
92✔
1386
    /* Note: the set for ra_pick cannot be empty, since each register file
1387
    ** has some registers never allocated to PHIs.
1388
    */
1389
    Reg down, up = ra_pick(as, ~blocked & allow);  /* Get a free register. */
50✔
1390
    if (candidates & ~blockedby)  /* Optimize shifts, else it's a cycle. */
50✔
1391
      candidates = candidates & ~blockedby;
×
1392
    down = rset_picktop(candidates);  /* Pick candidate PHI register. */
50✔
1393
    ra_rename(as, down, up);  /* And rename it to the free register. */
50✔
1394
  }
1395
}
92✔
1396

1397
/* PHI register shuffling.
1398
**
1399
** The allocator tries hard to preserve PHI register assignments across
1400
** the loop body. Most of the time this loop does nothing, since there
1401
** are no register mismatches.
1402
**
1403
** If a register mismatch is detected and ...
1404
** - the register is currently free: rename it.
1405
** - the register is blocked by an invariant: restore/remat and rename it.
1406
** - Otherwise the register is used by another PHI, so mark it as blocked.
1407
**
1408
** The renames are order-sensitive, so just retry the loop if a register
1409
** is marked as blocked, but has been freed in the meantime. A cycle is
1410
** detected if all of the blocked registers are allocated. To break the
1411
** cycle rename one of them to a free register and retry.
1412
**
1413
** Note that PHI spill slots are kept in sync and don't need to be shuffled.
1414
*/
1415
static void asm_phi_shuffle(ASMState *as)
2,425✔
1416
{
1417
  RegSet work;
2,574✔
1418

1419
  /* Find and resolve PHI register mismatches. */
1420
  for (;;) {
2,574✔
1421
    RegSet blocked = RSET_EMPTY;
2,574✔
1422
    RegSet blockedby = RSET_EMPTY;
2,574✔
1423
    RegSet phiset = as->phiset;
2,574✔
1424
    while (phiset) {  /* Check all left PHI operand registers. */
7,446✔
1425
      Reg r = rset_pickbot(phiset);
4,872✔
1426
      IRIns *irl = IR(as->phireg[r]);
4,872✔
1427
      Reg left = irl->r;
4,872✔
1428
      if (r != left) {  /* Mismatch? */
4,872✔
1429
        if (!rset_test(as->freeset, r)) {  /* PHI register blocked? */
1,310✔
1430
          IRRef ref = regcost_ref(as->cost[r]);
646✔
1431
          /* Blocked by other PHI (w/reg)? */
1432
          if (!ra_iskref(ref) && irt_ismarked(IR(ref)->t)) {
646✔
1433
            rset_set(blocked, r);
642✔
1434
            if (ra_hasreg(left))
642✔
1435
              rset_set(blockedby, left);
642✔
1436
            left = RID_NONE;
1437
          } else {  /* Otherwise grab register from invariant. */
1438
            ra_restore(as, ref);
4✔
1439
            checkmclim(as);
4✔
1440
          }
1441
        }
1442
        if (ra_hasreg(left)) {
1,310✔
1443
          ra_rename(as, left, r);
540✔
1444
          checkmclim(as);
540✔
1445
        }
1446
      }
1447
      rset_clear(phiset, r);
4,872✔
1448
    }
1449
    if (!blocked) break;  /* Finished. */
2,574✔
1450
    if (!(as->freeset & blocked)) {  /* Break cycles if none are free. */
149✔
1451
      asm_phi_break(as, blocked, blockedby, RSET_GPR);
46✔
1452
      if (!LJ_SOFTFP) asm_phi_break(as, blocked, blockedby, RSET_FPR);
46✔
1453
      checkmclim(as);
2,620✔
1454
    }  /* Else retry some more renames. */
1455
  }
1456

1457
  /* Restore/remat invariants whose registers are modified inside the loop. */
1458
#if !LJ_SOFTFP
1459
  work = as->modset & ~(as->freeset | as->phiset) & RSET_FPR;
2,425✔
1460
  while (work) {
2,425✔
1461
    Reg r = rset_pickbot(work);
5✔
1462
    ra_restore(as, regcost_ref(as->cost[r]));
5✔
1463
    rset_clear(work, r);
5✔
1464
    checkmclim(as);
2,430✔
1465
  }
1466
#endif
1467
  work = as->modset & ~(as->freeset | as->phiset);
2,425✔
1468
  while (work) {
2,425✔
1469
    Reg r = rset_pickbot(work);
231✔
1470
    ra_restore(as, regcost_ref(as->cost[r]));
231✔
1471
    rset_clear(work, r);
231✔
1472
    checkmclim(as);
2,656✔
1473
  }
1474

1475
  /* Allocate and save all unsaved PHI regs and clear marks. */
1476
  work = as->phiset;
2,425✔
1477
  while (work) {
6,153✔
1478
    Reg r = rset_picktop(work);
3,728✔
1479
    IRRef lref = as->phireg[r];
3,728✔
1480
    IRIns *ir = IR(lref);
3,728✔
1481
    if (ra_hasspill(ir->s)) {  /* Left PHI gained a spill slot? */
3,728✔
1482
      irt_clearmark(ir->t);  /* Handled here, so clear marker now. */
179✔
1483
      ra_alloc1(as, lref, RID2RSET(r));
179✔
1484
      ra_save(as, ir, r);  /* Save to spill slot inside the loop. */
179✔
1485
      checkmclim(as);
179✔
1486
    }
1487
    rset_clear(work, r);
3,728✔
1488
  }
1489
}
2,425✔
1490

1491
/* Copy unsynced left/right PHI spill slots. Rarely needed. */
1492
static void asm_phi_copyspill(ASMState *as)
2,425✔
1493
{
1494
  int need = 0;
2,425✔
1495
  IRIns *ir;
2,425✔
1496
  for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--)
6,223✔
1497
    if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s))
3,798✔
1498
      need |= irt_isfp(ir->t) ? 2 : 1;  /* Unsynced spill slot? */
6✔
1499
  if ((need & 1)) {  /* Copy integer spill slots. */
2,425✔
1500
#if !LJ_TARGET_X86ORX64
1501
    Reg r = RID_TMP;
1502
#else
1503
    Reg r = RID_RET;
×
1504
    if ((as->freeset & RSET_GPR))
×
1505
      r = rset_pickbot((as->freeset & RSET_GPR));
×
1506
    else
1507
      emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1508
#endif
1509
    for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
×
1510
      if (ra_hasspill(ir->s)) {
×
1511
        IRIns *irl = IR(ir->op1);
×
1512
        if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) {
×
1513
          emit_spstore(as, irl, r, sps_scale(irl->s));
×
1514
          emit_spload(as, ir, r, sps_scale(ir->s));
×
1515
          checkmclim(as);
×
1516
        }
1517
      }
1518
    }
1519
#if LJ_TARGET_X86ORX64
1520
    if (!rset_test(as->freeset, r))
×
1521
      emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1522
#endif
1523
  }
1524
#if !LJ_SOFTFP
1525
  if ((need & 2)) {  /* Copy FP spill slots. */
2,425✔
1526
#if LJ_TARGET_X86
1527
    Reg r = RID_XMM0;
1528
#else
1529
    Reg r = RID_FPRET;
2✔
1530
#endif
1531
    if ((as->freeset & RSET_FPR))
2✔
1532
      r = rset_pickbot((as->freeset & RSET_FPR));
2✔
1533
    if (!rset_test(as->freeset, r))
2✔
1534
      emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1535
    for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
39✔
1536
      if (ra_hasspill(ir->s)) {
37✔
1537
        IRIns *irl = IR(ir->op1);
6✔
1538
        if (ra_hasspill(irl->s) && irt_isfp(ir->t)) {
6✔
1539
          emit_spstore(as, irl, r, sps_scale(irl->s));
6✔
1540
          emit_spload(as, ir, r, sps_scale(ir->s));
6✔
1541
          checkmclim(as);
37✔
1542
        }
1543
      }
1544
    }
1545
    if (!rset_test(as->freeset, r))
2✔
1546
      emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
×
1547
  }
1548
#endif
1549
}
2,425✔
1550

1551
/* Emit renames for left PHIs which are only spilled outside the loop. */
1552
static void asm_phi_fixup(ASMState *as)
3,796✔
1553
{
1554
  RegSet work = as->phiset;
3,796✔
1555
  while (work) {
5,869✔
1556
    Reg r = rset_picktop(work);
2,073✔
1557
    IRRef lref = as->phireg[r];
2,073✔
1558
    IRIns *ir = IR(lref);
2,073✔
1559
    if (irt_ismarked(ir->t)) {
2,073✔
1560
      irt_clearmark(ir->t);
1,942✔
1561
      /* Left PHI gained a spill slot before the loop? */
1562
      if (ra_hasspill(ir->s)) {
1,942✔
1563
        ra_addrename(as, r, lref, as->loopsnapno);
22✔
1564
      }
1565
    }
1566
    rset_clear(work, r);
2,073✔
1567
  }
1568
}
3,796✔
1569

1570
/* Setup right PHI reference. */
1571
static void asm_phi(ASMState *as, IRIns *ir)
3,919✔
1572
{
1573
  RegSet allow = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) &
3,919✔
1574
                 ~as->phiset;
3,919✔
1575
  RegSet afree = (as->freeset & allow);
3,919✔
1576
  IRIns *irl = IR(ir->op1);
3,919✔
1577
  IRIns *irr = IR(ir->op2);
3,919✔
1578
  if (ir->r == RID_SINK)  /* Sink PHI. */
3,919✔
1579
    return;
1580
  /* Spill slot shuffling is not implemented yet (but rarely needed). */
1581
  if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
3,855✔
1582
    lj_trace_err(as->J, LJ_TRERR_NYIPHI);
×
1583
  /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */
1584
  if ((afree & (afree-1))) {  /* Two or more free registers? */
3,855✔
1585
    Reg r;
3,836✔
1586
    if (ra_noreg(irr->r)) {  /* Get a register for the right PHI. */
3,836✔
1587
      r = ra_allocref(as, ir->op2, allow);
3,834✔
1588
    } else {  /* Duplicate right PHI, need a copy (rare). */
1589
      r = ra_scratch(as, allow);
2✔
1590
      emit_movrr(as, irr, r, irr->r);
2✔
1591
    }
1592
    ir->r = (uint8_t)r;
3,836✔
1593
    rset_set(as->phiset, r);
3,836✔
1594
    as->phireg[r] = (IRRef1)ir->op1;
3,836✔
1595
    irt_setmark(irl->t);  /* Marks left PHIs _with_ register. */
3,836✔
1596
    if (ra_noreg(irl->r))
3,836✔
1597
      ra_sethint(irl->r, r); /* Set register hint for left PHI. */
3,618✔
1598
  } else {  /* Otherwise allocate a spill slot. */
1599
    /* This is overly restrictive, but it triggers only on synthetic code. */
1600
    if (ra_hasreg(irl->r) || ra_hasreg(irr->r))
19✔
1601
      lj_trace_err(as->J, LJ_TRERR_NYIPHI);
7✔
1602
    ra_spill(as, ir);
12✔
1603
    irr->s = ir->s;  /* Set right PHI spill slot. Sync left slot later. */
12✔
1604
  }
1605
}
1606

1607
static void asm_loop_fixup(ASMState *as);
1608

1609
/* Middle part of a loop. */
1610
static void asm_loop(ASMState *as)
2,425✔
1611
{
1612
  MCode *mcspill;
2,425✔
1613
  /* LOOP is a guard, so the snapno is up to date. */
1614
  as->loopsnapno = as->snapno;
2,425✔
1615
  if (as->gcsteps)
2,425✔
1616
    asm_gc_check(as);
245✔
1617
  /* LOOP marks the transition from the variant to the invariant part. */
1618
  as->flagmcp = as->invmcp = NULL;
2,425✔
1619
  as->sectref = 0;
2,425✔
1620
  if (!neverfuse(as)) as->fuseref = 0;
2,425✔
1621
  asm_phi_shuffle(as);
2,425✔
1622
  mcspill = as->mcp;
2,425✔
1623
  asm_phi_copyspill(as);
2,425✔
1624
  asm_loop_fixup(as);
2,425✔
1625
  as->mcloop = as->mcp;
2,425✔
1626
  RA_DBGX((as, "===== LOOP ====="));
2,425✔
1627
  if (!as->realign) RA_DBG_FLUSH();
2,425✔
1628
  if (as->mcp != mcspill)
2,425✔
1629
    emit_jmp(as, mcspill);
2✔
1630
}
2,425✔
1631

1632
/* -- Target-specific assembler ------------------------------------------- */
1633

1634
#if LJ_TARGET_X86ORX64
1635
#include "lj_asm_x86.h"
1636
#elif LJ_TARGET_ARM
1637
#include "lj_asm_arm.h"
1638
#elif LJ_TARGET_ARM64
1639
#include "lj_asm_arm64.h"
1640
#elif LJ_TARGET_PPC
1641
#include "lj_asm_ppc.h"
1642
#elif LJ_TARGET_MIPS
1643
#include "lj_asm_mips.h"
1644
#else
1645
#error "Missing assembler for target CPU"
1646
#endif
1647

1648
/* -- Common instruction helpers ------------------------------------------ */
1649

1650
#if !LJ_SOFTFP32
1651
#if !LJ_TARGET_X86ORX64
1652
#define asm_ldexp(as, ir)        asm_callid(as, ir, IRCALL_ldexp)
1653
#endif
1654

1655
static void asm_pow(ASMState *as, IRIns *ir)
234✔
1656
{
1657
#if LJ_64 && LJ_HASFFI
1658
  if (!irt_isnum(ir->t))
234✔
1659
    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
×
1660
                                          IRCALL_lj_carith_powu64);
1661
  else
1662
#endif
1663
  asm_callid(as, ir, IRCALL_pow);
234✔
1664
}
234✔
1665

1666
static void asm_div(ASMState *as, IRIns *ir)
138✔
1667
{
1668
#if LJ_64 && LJ_HASFFI
1669
  if (!irt_isnum(ir->t))
138✔
1670
    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
×
1671
                                          IRCALL_lj_carith_divu64);
1672
  else
1673
#endif
1674
    asm_fpdiv(as, ir);
138✔
1675
}
138✔
1676
#endif
1677

1678
static void asm_mod(ASMState *as, IRIns *ir)
69✔
1679
{
1680
#if LJ_64 && LJ_HASFFI
1681
  if (!irt_isint(ir->t))
69✔
1682
    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
×
1683
                                          IRCALL_lj_carith_modu64);
1684
  else
1685
#endif
1686
    asm_callid(as, ir, IRCALL_lj_vm_modi);
69✔
1687
}
68✔
1688

1689
static void asm_fuseequal(ASMState *as, IRIns *ir)
122,278✔
1690
{
1691
  /* Fuse HREF + EQ/NE. */
1692
  if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
122,278✔
1693
    as->curins--;
1,074✔
1694
    asm_href(as, ir-1, (IROp)ir->o);
1,074✔
1695
  } else {
1696
    asm_equal(as, ir);
121,204✔
1697
  }
1698
}
122,278✔
1699

1700
/* -- Instruction dispatch ------------------------------------------------ */
1701

1702
/* Assemble a single instruction. */
1703
static void asm_ir(ASMState *as, IRIns *ir)
602,056✔
1704
{
1705
  switch ((IROp)ir->o) {
602,056✔
1706
  /* Miscellaneous ops. */
1707
  case IR_LOOP: asm_loop(as); break;
2,425✔
1708
  case IR_NOP: case IR_XBAR:
1709
    lj_assertA(!ra_used(ir),
1710
               "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
1711
    break;
1712
  case IR_USE:
5✔
1713
    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
10✔
1714
  case IR_PHI: asm_phi(as, ir); break;
3,919✔
1715
  case IR_HIOP: asm_hiop(as, ir); break;
1716
  case IR_GCSTEP: asm_gcstep(as, ir); break;
102✔
1717
  case IR_PROF: asm_prof(as, ir); break;
1✔
1718

1719
  /* Guarded assertions. */
1720
  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
8,635✔
1721
  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1722
  case IR_ABC:
1723
    asm_comp(as, ir);
8,635✔
1724
    break;
8,635✔
1725
  case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
122,278✔
1726

1727
  case IR_RETF: asm_retf(as, ir); break;
1,065✔
1728

1729
  /* Bit ops. */
1730
  case IR_BNOT: asm_bnot(as, ir); break;
3✔
1731
  case IR_BSWAP: asm_bswap(as, ir); break;
9✔
1732
  case IR_BAND: asm_band(as, ir); break;
247✔
1733
  case IR_BOR: asm_bor(as, ir); break;
84✔
1734
  case IR_BXOR: asm_bxor(as, ir); break;
35✔
1735
  case IR_BSHL: asm_bshl(as, ir); break;
84✔
1736
  case IR_BSHR: asm_bshr(as, ir); break;
31✔
1737
  case IR_BSAR: asm_bsar(as, ir); break;
12✔
1738
  case IR_BROL: asm_brol(as, ir); break;
23✔
1739
  case IR_BROR: asm_bror(as, ir); break;
3✔
1740

1741
  /* Arithmetic ops. */
1742
  case IR_ADD: asm_add(as, ir); break;
28,625✔
1743
  case IR_SUB: asm_sub(as, ir); break;
741✔
1744
  case IR_MUL: asm_mul(as, ir); break;
18,377✔
1745
  case IR_MOD: asm_mod(as, ir); break;
69✔
1746
  case IR_NEG: asm_neg(as, ir); break;
23✔
1747
#if LJ_SOFTFP32
1748
  case IR_DIV: case IR_POW: case IR_ABS:
1749
  case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1750
    /* Unused for LJ_SOFTFP32. */
1751
    lj_assertA(0, "IR %04d with unused op %d",
1752
                  (int)(ir - as->ir) - REF_BIAS, ir->o);
1753
    break;
1754
#else
1755
  case IR_DIV: asm_div(as, ir); break;
138✔
1756
  case IR_POW: asm_pow(as, ir); break;
234✔
1757
  case IR_ABS: asm_abs(as, ir); break;
4✔
1758
  case IR_LDEXP: asm_ldexp(as, ir); break;
270✔
1759
  case IR_FPMATH: asm_fpmath(as, ir); break;
159✔
1760
  case IR_TOBIT: asm_tobit(as, ir); break;
65✔
1761
#endif
1762
  case IR_MIN: asm_min(as, ir); break;
17✔
1763
  case IR_MAX: asm_max(as, ir); break;
494✔
1764

1765
  /* Overflow-checking arithmetic ops. */
1766
  case IR_ADDOV: asm_addov(as, ir); break;
61✔
1767
  case IR_SUBOV: asm_subov(as, ir); break;
64✔
1768
  case IR_MULOV: asm_mulov(as, ir); break;
2✔
1769

1770
  /* Memory references. */
1771
  case IR_AREF: asm_aref(as, ir); break;
35✔
1772
  case IR_HREF: asm_href(as, ir, 0); break;
38✔
1773
  case IR_HREFK: asm_hrefk(as, ir); break;
78,034✔
1774
  case IR_NEWREF: asm_newref(as, ir); break;
1,060✔
1775
  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
423✔
1776
  case IR_FREF: asm_fref(as, ir); break;
×
1777
  case IR_STRREF: asm_strref(as, ir); break;
313✔
1778
  case IR_LREF: asm_lref(as, ir); break;
2✔
1779

1780
  /* Loads and stores. */
1781
  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
41,966✔
1782
    asm_ahuvload(as, ir);
41,966✔
1783
    break;
41,966✔
1784
  case IR_FLOAD: asm_fload(as, ir); break;
191,271✔
1785
  case IR_XLOAD: asm_xload(as, ir); break;
444✔
1786
  case IR_SLOAD: asm_sload(as, ir); break;
12,745✔
1787

1788
  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
40,837✔
1789
  case IR_FSTORE: asm_fstore(as, ir); break;
75✔
1790
  case IR_XSTORE: asm_xstore(as, ir); break;
614✔
1791

1792
  /* Allocations. */
1793
  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
105✔
1794
  case IR_TNEW: asm_tnew(as, ir); break;
182✔
1795
  case IR_TDUP: asm_tdup(as, ir); break;
53✔
1796
  case IR_CNEW: case IR_CNEWI:
1,624✔
1797
#if LJ_HASFFI
1798
    asm_cnew(as, ir);
1,624✔
1799
#else
1800
    lj_assertA(0, "IR %04d with unused op %d",
1801
                  (int)(ir - as->ir) - REF_BIAS, ir->o);
1802
#endif
1803
    break;
1,624✔
1804

1805
  /* Buffer operations. */
1806
  case IR_BUFHDR: asm_bufhdr(as, ir); break;
780✔
1807
  case IR_BUFPUT: asm_bufput(as, ir); break;
1,379✔
1808
  case IR_BUFSTR: asm_bufstr(as, ir); break;
766✔
1809

1810
  /* Write barriers. */
1811
  case IR_TBAR: asm_tbar(as, ir); break;
38,255✔
1812
  case IR_OBAR: asm_obar(as, ir); break;
4✔
1813

1814
  /* Type conversions. */
1815
  case IR_CONV: asm_conv(as, ir); break;
1,971✔
1816
  case IR_TOSTR: asm_tostr(as, ir); break;
43✔
1817
  case IR_STRTO: asm_strto(as, ir); break;
160✔
1818

1819
  /* Calls. */
1820
  case IR_CALLA:
3✔
1821
    as->gcsteps++;
3✔
1822
    /* fallthrough */
1823
  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
496✔
1824
  case IR_CALLXS: asm_callx(as, ir); break;
19✔
1825
  case IR_CARG: break;
1826

1827
  default:
×
1828
    setintV(&as->J->errinfo, ir->o);
×
1829
    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
×
1830
    break;
602,048✔
1831
  }
1832
}
602,048✔
1833

1834
/* -- Head of trace ------------------------------------------------------- */
1835

1836
/* Head of a root trace. */
1837
static void asm_head_root(ASMState *as)
2,158✔
1838
{
1839
  int32_t spadj;
2,158✔
1840
  asm_head_root_base(as);
2,158✔
1841
  emit_setvmstate(as, (int32_t)as->T->traceno);
2,158✔
1842
  spadj = asm_stack_adjust(as);
2,158✔
1843
  as->T->spadjust = (uint16_t)spadj;
2,158✔
1844
  emit_spsub(as, spadj);
2,158✔
1845
  /* Root traces assume a checked stack for the starting proto. */
1846
  as->T->topslot = gcref(as->T->startpt)->pt.framesize;
2,158✔
1847
}
2,158✔
1848

1849
/* Head of a side trace.
1850
**
1851
** The current simplistic algorithm requires that all slots inherited
1852
** from the parent are live in a register between pass 2 and pass 3. This
1853
** avoids the complexity of stack slot shuffling. But of course this may
1854
** overflow the register set in some cases and cause the dreaded error:
1855
** "NYI: register coalescing too complex". A refined algorithm is needed.
1856
*/
1857
static void asm_head_side(ASMState *as)
1,715✔
1858
{
1859
  IRRef1 sloadins[RID_MAX];
1,715✔
1860
  RegSet allow = RSET_ALL;  /* Inverse of all coalesced registers. */
1,715✔
1861
  RegSet live = RSET_EMPTY;  /* Live parent registers. */
1,715✔
1862
  RegSet pallow = RSET_GPR;  /* Registers needed by the parent stack check. */
1,715✔
1863
  Reg pbase;
1,715✔
1864
  IRIns *irp = &as->parent->ir[REF_BASE];  /* Parent base. */
1,715✔
1865
  int32_t spadj, spdelta;
1,715✔
1866
  int pass2 = 0;
1,715✔
1867
  int pass3 = 0;
1,715✔
1868
  IRRef i;
1,715✔
1869

1870
  if (as->snapno && as->topslot > as->parent->topslot) {
1,715✔
1871
    /* Force snap #0 alloc to prevent register overwrite in stack check. */
1872
    asm_snap_alloc(as, 0);
39✔
1873
  }
1874
  pbase = asm_head_side_base(as, irp);
1,715✔
1875
  if (pbase != RID_NONE) {
1,715✔
1876
    rset_clear(allow, pbase);
1,032✔
1877
    rset_clear(pallow, pbase);
1,032✔
1878
  }
1879

1880
  /* Scan all parent SLOADs and collect register dependencies. */
1881
  for (i = as->stopins; i > REF_BASE; i--) {
5,383✔
1882
    IRIns *ir = IR(i);
3,668✔
1883
    RegSP rs;
3,668✔
1884
    lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
3,668✔
1885
               (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
1886
               "IR %04d has bad parent op %d",
1887
               (int)(ir - as->ir) - REF_BIAS, ir->o);
1888
    rs = as->parentmap[i - REF_FIRST];
3,668✔
1889
    if (ra_hasreg(ir->r)) {
3,668✔
1890
      rset_clear(allow, ir->r);
1,655✔
1891
      if (ra_hasspill(ir->s)) {
1,655✔
1892
        ra_save(as, ir, ir->r);
31✔
1893
        checkmclim(as);
31✔
1894
      }
1895
    } else if (ra_hasspill(ir->s)) {
2,013✔
1896
      irt_setmark(ir->t);
1,904✔
1897
      pass2 = 1;
1,904✔
1898
    }
1899
    if (ir->r == rs) {  /* Coalesce matching registers right now. */
3,668✔
1900
      ra_free(as, ir->r);
1,043✔
1901
    } else if (ra_hasspill(regsp_spill(rs))) {
2,625✔
1902
      if (ra_hasreg(ir->r))
1,695✔
1903
        pass3 = 1;
451✔
1904
    } else if (ra_used(ir)) {
930✔
1905
      sloadins[rs] = (IRRef1)i;
909✔
1906
      rset_set(live, rs);  /* Block live parent register. */
909✔
1907
    }
1908
    if (!ra_hasspill(regsp_spill(rs))) rset_clear(pallow, regsp_reg(rs));
3,668✔
1909
  }
1910

1911
  /* Calculate stack frame adjustment. */
1912
  spadj = asm_stack_adjust(as);
1,715✔
1913
  spdelta = spadj - (int32_t)as->parent->spadjust;
1,715✔
1914
  if (spdelta < 0) {  /* Don't shrink the stack frame. */
1,715✔
1915
    spadj = (int32_t)as->parent->spadjust;
324✔
1916
    spdelta = 0;
324✔
1917
  }
1918
  as->T->spadjust = (uint16_t)spadj;
1,715✔
1919

1920
  /* Reload spilled target registers. */
1921
  if (pass2) {
1,715✔
1922
    for (i = as->stopins; i > REF_BASE; i--) {
1,249✔
1923
      IRIns *ir = IR(i);
1,200✔
1924
      if (irt_ismarked(ir->t)) {
1,200✔
1925
        RegSet mask;
826✔
1926
        Reg r;
826✔
1927
        RegSP rs;
826✔
1928
        irt_clearmark(ir->t);
826✔
1929
        rs = as->parentmap[i - REF_FIRST];
826✔
1930
        if (!ra_hasspill(regsp_spill(rs)))
826✔
1931
          ra_sethint(ir->r, rs);  /* Hint may be gone, set it again. */
638✔
1932
        else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
188✔
1933
          continue;  /* Same spill slot, do nothing. */
18✔
1934
        mask = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) & allow;
808✔
1935
        if (mask == RSET_EMPTY)
808✔
1936
          lj_trace_err(as->J, LJ_TRERR_NYICOAL);
77✔
1937
        r = ra_allocref(as, i, mask);
731✔
1938
        ra_save(as, ir, r);
731✔
1939
        rset_clear(allow, r);
731✔
1940
        if (r == rs) {  /* Coalesce matching registers right now. */
731✔
1941
          ra_free(as, r);
328✔
1942
          rset_clear(live, r);
328✔
1943
        } else if (ra_hasspill(regsp_spill(rs))) {
403✔
1944
          pass3 = 1;
170✔
1945
        }
1946
        checkmclim(as);
1,123✔
1947
      }
1948
    }
1949
  }
1950

1951
  /* Store trace number and adjust stack frame relative to the parent. */
1952
  emit_setvmstate(as, (int32_t)as->T->traceno);
1,638✔
1953
  emit_spsub(as, spdelta);
1,638✔
1954

1955
#if !LJ_TARGET_X86ORX64
1956
  /* Restore BASE register from parent spill slot. */
1957
  if (ra_hasspill(irp->s))
1958
    emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, sps_scale(irp->s));
1959
#endif
1960

1961
  /* Restore target registers from parent spill slots. */
1962
  if (pass3) {
1,638✔
1963
    RegSet work = ~as->freeset & RSET_ALL;
56✔
1964
    while (work) {
56✔
1965
      Reg r = rset_pickbot(work);
103✔
1966
      IRRef ref = regcost_ref(as->cost[r]);
103✔
1967
      RegSP rs = as->parentmap[ref - REF_FIRST];
103✔
1968
      rset_clear(work, r);
103✔
1969
      if (ra_hasspill(regsp_spill(rs))) {
103✔
1970
        int32_t ofs = sps_scale(regsp_spill(rs));
82✔
1971
        ra_free(as, r);
82✔
1972
        emit_spload(as, IR(ref), r, ofs);
82✔
1973
        checkmclim(as);
241✔
1974
      }
1975
    }
1976
  }
1977

1978
  /* Shuffle registers to match up target regs with parent regs. */
1979
  for (;;) {
1980
    RegSet work;
1981

1982
    /* Repeatedly coalesce free live registers by moving to their target. */
1983
    while ((work = as->freeset & live) != RSET_EMPTY) {
1,808✔
1984
      Reg rp = rset_pickbot(work);
163✔
1985
      IRIns *ir = IR(sloadins[rp]);
163✔
1986
      rset_clear(live, rp);
163✔
1987
      rset_clear(allow, rp);
163✔
1988
      ra_free(as, ir->r);
163✔
1989
      emit_movrr(as, ir, ir->r, rp);
163✔
1990
      checkmclim(as);
163✔
1991
    }
1992

1993
    /* We're done if no live registers remain. */
1994
    if (live == RSET_EMPTY)
1,645✔
1995
      break;
1996

1997
    /* Break cycles by renaming one target to a temp. register. */
1998
    if (live & RSET_GPR) {
7✔
1999
      RegSet tmpset = as->freeset & ~live & allow & RSET_GPR;
6✔
2000
      if (tmpset == RSET_EMPTY)
6✔
2001
        lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
2002
      ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset));
6✔
2003
    }
2004
    if (!LJ_SOFTFP && (live & RSET_FPR)) {
7✔
2005
      RegSet tmpset = as->freeset & ~live & allow & RSET_FPR;
1✔
2006
      if (tmpset == RSET_EMPTY)
1✔
2007
        lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
2008
      ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset));
1✔
2009
    }
2010
    checkmclim(as);
1,815✔
2011
    /* Continue with coalescing to fix up the broken cycle(s). */
2012
  }
2013

2014
  /* Inherit top stack slot already checked by parent trace. */
2015
  as->T->topslot = as->parent->topslot;
1,638✔
2016
  if (as->topslot > as->T->topslot) {  /* Need to check for higher slot? */
1,638✔
2017
#ifdef EXITSTATE_CHECKEXIT
2018
    /* Highest exit + 1 indicates stack check. */
2019
    ExitNo exitno = as->T->nsnap;
2020
#else
2021
    /* Reuse the parent exit in the context of the parent trace. */
2022
    ExitNo exitno = as->J->exitno;
118✔
2023
#endif
2024
    as->T->topslot = (uint8_t)as->topslot;  /* Remember for child traces. */
118✔
2025
    asm_stack_check(as, as->topslot, irp, pallow, exitno);
118✔
2026
  }
2027
}
1,638✔
2028

2029
/* -- Tail of trace ------------------------------------------------------- */
2030

2031
/* Get base slot for a snapshot. */
2032
static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
2,593✔
2033
{
2034
  SnapEntry *map = &as->T->snapmap[snap->mapofs];
2,593✔
2035
  MSize n;
2,593✔
2036
  for (n = snap->nent; n > 0; n--) {
8,947✔
2037
    SnapEntry sn = map[n-1];
7,658✔
2038
    if ((sn & SNAP_FRAME)) {
7,658✔
2039
      *gotframe = 1;
1,304✔
2040
      return snap_slot(sn) - LJ_FR2;
1,304✔
2041
    }
2042
  }
2043
  return 0;
2044
}
2045

2046
/* Link to another trace. */
2047
static void asm_tail_link(ASMState *as)
2,593✔
2048
{
2049
  SnapNo snapno = as->T->nsnap-1;  /* Last snapshot. */
2,593✔
2050
  SnapShot *snap = &as->T->snap[snapno];
2,593✔
2051
  int gotframe = 0;
2,593✔
2052
  BCReg baseslot = asm_baseslot(as, snap, &gotframe);
2,593✔
2053

2054
  as->topslot = snap->topslot;
2,593✔
2055
  checkmclim(as);
2,593✔
2056
  ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
2,593✔
2057

2058
  if (as->T->link == 0) {
2,593✔
2059
    /* Setup fixed registers for exit to interpreter. */
2060
    const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
493✔
2061
    int32_t mres;
493✔
2062
    if (bc_op(*pc) == BC_JLOOP) {  /* NYI: find a better way to do this. */
493✔
2063
      BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
2✔
2064
      if (bc_isret(bc_op(*retpc)))
2✔
2065
        pc = retpc;
2✔
2066
    }
2067
#if LJ_GC64
2068
    emit_loadu64(as, RID_LPC, u64ptr(pc));
493✔
2069
#else
2070
    ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
2071
    ra_allockreg(as, i32ptr(pc), RID_LPC);
2072
#endif
2073
    mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
493✔
2074
    switch (bc_op(*pc)) {
493✔
2075
    case BC_CALLM: case BC_CALLMT:
×
2076
      mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
×
UNCOV
2077
    case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
×
2078
    case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
×
2079
    default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
493✔
2080
    }
2081
    ra_allockreg(as, mres, RID_RET);  /* Return MULTRES or 0. */
493✔
2082
  } else if (baseslot) {
2,100✔
2083
    /* Save modified BASE for linking to trace with higher start frame. */
2084
    emit_setgl(as, RID_BASE, jit_base);
784✔
2085
  }
2086
  emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
2,593✔
2087

2088
  if (as->J->ktrace) {  /* Patch ktrace slot with the final GCtrace pointer. */
2,593✔
2089
    setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
165✔
2090
    IR(as->J->ktrace)->o = IR_KGC;
165✔
2091
  }
2092

2093
  /* Sync the interpreter state with the on-trace state. */
2094
  asm_stack_restore(as, snap);
2,593✔
2095

2096
  /* Root traces that add frames need to check the stack at the end. */
2097
  if (!as->parent && gotframe)
2,592✔
2098
    asm_stack_check(as, as->topslot, NULL, as->freeset & RSET_GPR, snapno);
110✔
2099
}
2,592✔
2100

2101
/* -- Trace setup --------------------------------------------------------- */
2102

2103
/* Clear reg/sp for all instructions and add register hints. */
2104
static void asm_setup_regsp(ASMState *as)
5,027✔
2105
{
2106
  GCtrace *T = as->T;
5,027✔
2107
  int sink = T->sinktags;
5,027✔
2108
  IRRef nins = T->nins;
5,027✔
2109
  IRIns *ir, *lastir;
5,027✔
2110
  int inloop;
5,027✔
2111
#if LJ_TARGET_ARM
2112
  uint32_t rload = 0xa6402a64;
2113
#endif
2114

2115
  ra_setup(as);
5,027✔
2116

2117
  /* Clear reg/sp for constants. */
2118
  for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
113,386✔
2119
    ir->prev = REGSP_INIT;
108,359✔
2120
    if (irt_is64(ir->t) && ir->o != IR_KNULL) {
108,359✔
2121
#if LJ_GC64
2122
      /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2123
      ir->i = 0;  /* Will become non-zero only for RIP-relative addresses. */
52,748✔
2124
#else
2125
      /* Make life easier for backends by putting address of constant in i. */
2126
      ir->i = (int32_t)(intptr_t)(ir+1);
2127
#endif
2128
      ir++;
52,748✔
2129
    }
2130
  }
2131

2132
  /* REF_BASE is used for implicit references to the BASE register. */
2133
  lastir->prev = REGSP_HINT(RID_BASE);
5,027✔
2134

2135
  as->snaprename = nins;
5,027✔
2136
  as->snapref = nins;
5,027✔
2137
  as->snapno = T->nsnap;
5,027✔
2138
  as->snapalloc = 0;
5,027✔
2139

2140
  as->stopins = REF_BASE;
5,027✔
2141
  as->orignins = nins;
5,027✔
2142
  as->curins = nins;
5,027✔
2143

2144
  /* Setup register hints for parent link instructions. */
2145
  ir = IR(REF_FIRST);
5,027✔
2146
  if (as->parent) {
5,027✔
2147
    uint16_t *p;
1,843✔
2148
    lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
1,843✔
2149
    if (lastir - ir > LJ_MAX_JSLOTS)
1,843✔
2150
      lj_trace_err(as->J, LJ_TRERR_NYICOAL);
×
2151
    as->stopins = (IRRef)((lastir-1) - as->ir);
1,843✔
2152
    for (p = as->parentmap; ir < lastir; ir++) {
5,521✔
2153
      RegSP rs = ir->prev;
3,678✔
2154
      *p++ = (uint16_t)rs;  /* Copy original parent RegSP to parentmap. */
3,678✔
2155
      if (!ra_hasspill(regsp_spill(rs)))
3,678✔
2156
        ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
1,982✔
2157
      else
2158
        ir->prev = REGSP_INIT;
1,696✔
2159
    }
2160
  }
2161

2162
  inloop = 0;
5,027✔
2163
  as->evenspill = SPS_FIRST;
5,027✔
2164
  for (lastir = IR(nins); ir < lastir; ir++) {
660,472✔
2165
    if (sink) {
655,445✔
2166
      if (ir->r == RID_SINK)
8,845✔
2167
        continue;
1,013✔
2168
      if (ir->r == RID_SUNK) {  /* Revert after ASM restart. */
7,832✔
2169
        ir->r = RID_SINK;
103✔
2170
        continue;
103✔
2171
      }
2172
    }
2173
    switch (ir->o) {
654,329✔
2174
    case IR_LOOP:
2,434✔
2175
      inloop = 1;
2,434✔
2176
      break;
2,434✔
2177
#if LJ_TARGET_ARM
2178
    case IR_SLOAD:
2179
      if (!((ir->op2 & IRSLOAD_TYPECHECK) || (ir+1)->o == IR_HIOP))
2180
        break;
2181
      /* fallthrough */
2182
    case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2183
      if (!LJ_SOFTFP && irt_isnum(ir->t)) break;
2184
      ir->prev = (uint16_t)REGSP_HINT((rload & 15));
2185
      rload = lj_ror(rload, 4);
2186
      continue;
2187
#endif
2188
    case IR_CALLXS: {
22✔
2189
      CCallInfo ci;
22✔
2190
      ci.flags = asm_callx_flags(as, ir);
22✔
2191
      ir->prev = asm_setup_call_slots(as, ir, &ci);
22✔
2192
      if (inloop)
22✔
2193
        as->modset |= RSET_SCRATCH;
7✔
2194
      continue;
22✔
2195
      }
2196
    case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
616✔
2197
      const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
616✔
2198
      ir->prev = asm_setup_call_slots(as, ir, ci);
616✔
2199
      if (inloop)
616✔
2200
        as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
218✔
2201
                      (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
218✔
2202
      continue;
616✔
2203
      }
2204
#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
2205
    case IR_HIOP:
2206
      switch ((ir-1)->o) {
2207
#if LJ_SOFTFP && LJ_TARGET_ARM
2208
      case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2209
        if (ra_hashint((ir-1)->r)) {
2210
          ir->prev = (ir-1)->prev + 1;
2211
          continue;
2212
        }
2213
        break;
2214
#endif
2215
#if !LJ_SOFTFP && LJ_NEED_FP64
2216
      case IR_CONV:
2217
        if (irt_isfp((ir-1)->t)) {
2218
          ir->prev = REGSP_HINT(RID_FPRET);
2219
          continue;
2220
        }
2221
#endif
2222
      /* fallthrough */
2223
      case IR_CALLN: case IR_CALLXS:
2224
#if LJ_SOFTFP
2225
      case IR_MIN: case IR_MAX:
2226
#endif
2227
        (ir-1)->prev = REGSP_HINT(RID_RETLO);
2228
        ir->prev = REGSP_HINT(RID_RETHI);
2229
        continue;
2230
      default:
2231
        break;
2232
      }
2233
      break;
2234
#endif
2235
#if LJ_SOFTFP
2236
    case IR_MIN: case IR_MAX:
2237
      if ((ir+1)->o != IR_HIOP) break;
2238
#endif
2239
    /* fallthrough */
2240
    /* C calls evict all scratch regs and return results in RID_RET. */
2241
    case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
2242
      if (REGARG_NUMGPR < 3 && as->evenspill < 3)
2243
        as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
2244
#if LJ_TARGET_X86 && LJ_HASFFI
2245
      if (0) {
2246
    case IR_CNEW:
2247
        if (ir->op2 != REF_NIL && as->evenspill < 4)
2248
          as->evenspill = 4;  /* lj_cdata_newv needs 4 args. */
2249
      }
2250
      /* fallthrough */
2251
#else
2252
      /* fallthrough */
2253
    case IR_CNEW:
2254
#endif
2255
      /* fallthrough */
2256
    case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2257
    case IR_BUFSTR:
2258
      ir->prev = REGSP_HINT(RID_RET);
5,918✔
2259
      if (inloop)
5,918✔
2260
        as->modset = RSET_SCRATCH;
1,337✔
2261
      continue;
5,918✔
2262
    case IR_STRTO: case IR_OBAR:
173✔
2263
      if (inloop)
173✔
2264
        as->modset = RSET_SCRATCH;
74✔
2265
      break;
2266
#if !LJ_SOFTFP
2267
#if !LJ_TARGET_X86ORX64
2268
    case IR_LDEXP:
2269
#endif
2270
#endif
2271
      /* fallthrough */
2272
    case IR_POW:
463✔
2273
      if (!LJ_SOFTFP && irt_isnum(ir->t)) {
463✔
2274
        if (inloop)
463✔
2275
          as->modset |= RSET_SCRATCH;
2✔
2276
#if LJ_TARGET_X86
2277
        if (irt_isnum(IR(ir->op2)->t)) {
2278
          if (as->evenspill < 4)  /* Leave room to call pow(). */
2279
            as->evenspill = 4;
2280
        }
2281
        break;
2282
#else
2283
        ir->prev = REGSP_HINT(RID_FPRET);
463✔
2284
        continue;
463✔
2285
#endif
2286
      }
2287
      /* fallthrough */ /* for integer POW */
2288
    case IR_DIV: case IR_MOD:
2289
      if (!irt_isnum(ir->t)) {
215✔
2290
        ir->prev = REGSP_HINT(RID_RET);
71✔
2291
        if (inloop)
71✔
2292
          as->modset |= (RSET_SCRATCH & RSET_GPR);
35✔
2293
        continue;
71✔
2294
      }
2295
      break;
2296
    case IR_FPMATH:
169✔
2297
#if LJ_TARGET_X86ORX64
2298
      if (ir->op2 <= IRFPM_TRUNC) {
169✔
2299
        if (!(as->flags & JIT_F_SSE4_1)) {
165✔
2300
          ir->prev = REGSP_HINT(RID_XMM0);
×
2301
          if (inloop)
×
2302
            as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
×
2303
          continue;
×
2304
        }
2305
        break;
2306
      }
2307
#endif
2308
      if (inloop)
4✔
2309
        as->modset |= RSET_SCRATCH;
2✔
2310
#if LJ_TARGET_X86
2311
      break;
2312
#else
2313
      ir->prev = REGSP_HINT(RID_FPRET);
4✔
2314
      continue;
4✔
2315
#endif
2316
#if LJ_TARGET_X86ORX64
2317
    /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
2318
    case IR_BSHL: case IR_BSHR: case IR_BSAR:
409✔
2319
      if ((as->flags & JIT_F_BMI2))  /* Except if BMI2 is available. */
409✔
2320
        break;
2321
      /* fallthrough */
2322
    case IR_BROL: case IR_BROR:
2323
      if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
34✔
2324
        IR(ir->op2)->r = REGSP_HINT(RID_ECX);
10✔
2325
        if (inloop)
10✔
2326
          rset_set(as->modset, RID_ECX);
3✔
2327
      }
2328
      break;
2329
#endif
2330
    /* Do not propagate hints across type conversions or loads. */
2331
    case IR_TOBIT:
2332
    case IR_XLOAD:
2333
#if !LJ_TARGET_ARM
2334
    case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2335
#endif
2336
      break;
2337
    case IR_CONV:
2,448✔
2338
      if (irt_isfp(ir->t) || (ir->op2 & IRCONV_SRCMASK) == IRT_NUM ||
2,448✔
2339
          (ir->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
2340
        break;
2341
      /* fallthrough */
2342
    default:
2343
      /* Propagate hints across likely 'op reg, imm' or 'op reg'. */
2344
      if (irref_isk(ir->op2) && !irref_isk(ir->op1) &&
596,416✔
2345
          ra_hashint(regsp_reg(IR(ir->op1)->prev))) {
478,102✔
2346
        ir->prev = IR(ir->op1)->prev;
4,559✔
2347
        continue;
4,559✔
2348
      }
2349
      break;
2350
    }
2351
    ir->prev = REGSP_INIT;
642,676✔
2352
  }
2353
  if ((as->evenspill & 1))
5,027✔
2354
    as->oddspill = as->evenspill++;
×
2355
  else
2356
    as->oddspill = 0;
5,027✔
2357
}
5,027✔
2358

2359
/* -- Assembler core ------------------------------------------------------ */
2360

2361
/* Assemble a trace. */
2362
void lj_asm_trace(jit_State *J, GCtrace *T)
3,867✔
2363
{
2364
  ASMState as_;
3,867✔
2365
  ASMState *as = &as_;
3,867✔
2366

2367
  /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2368
  {
2369
    IRRef nins = T->nins;
3,867✔
2370
    IRIns *ir = &T->ir[nins-1];
3,867✔
2371
    if (ir->o == IR_NOP || ir->o == IR_RENAME) {
3,867✔
2372
      do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
121✔
2373
      T->nins = nins;
121✔
2374
    }
2375
  }
2376

2377
  /* Ensure an initialized instruction beyond the last one for HIOP checks. */
2378
  /* This also allows one RENAME to be added without reallocating curfinal. */
2379
  as->orignins = lj_ir_nextins(J);
3,867✔
2380
  J->cur.ir[as->orignins].o = IR_NOP;
3,867✔
2381

2382
  /* Setup initial state. Copy some fields to reduce indirections. */
2383
  as->J = J;
3,867✔
2384
  as->T = T;
3,867✔
2385
  J->curfinal = lj_trace_alloc(J->L, T);  /* This copies the IR, too. */
3,867✔
2386
  as->flags = J->flags;
3,867✔
2387
  as->loopref = J->loopref;
3,867✔
2388
  as->realign = NULL;
3,867✔
2389
  as->loopinv = 0;
3,867✔
2390
  as->parent = J->parent ? traceref(J, J->parent) : NULL;
3,867✔
2391

2392
  /* Reserve MCode memory. */
2393
  as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
3,867✔
2394
  as->mcp = as->mctop;
3,867✔
2395
  as->mclim = as->mcbot + MCLIM_REDZONE;
3,867✔
2396
  asm_setup_target(as);
3,867✔
2397

2398
  /*
2399
  ** This is a loop, because the MCode may have to be (re-)assembled
2400
  ** multiple times:
2401
  **
2402
  ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2403
  **    backend wants the MCode to be aligned differently.
2404
  **
2405
  **    This is currently only the case on x86/x64, where small loops get
2406
  **    an aligned loop body plus a short branch. Not much effort is wasted,
2407
  **    because the abort happens very quickly and only once.
2408
  **
2409
  ** 2. The IR is immovable, since the MCode embeds pointers to various
2410
  **    constants inside the IR. But RENAMEs may need to be added to the IR
2411
  **    during assembly, which might grow and reallocate the IR. We check
2412
  **    at the end if the IR (in J->cur.ir) has actually grown, resize the
2413
  **    copy (in J->curfinal.ir) and try again.
2414
  **
2415
  **    95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2416
  **    2 RENAMEs and only 0.5% have more than that. That's why we opt to
2417
  **    always have one spare slot in the IR (see above), which means we
2418
  **    have to redo the assembly for only ~2% of all traces.
2419
  **
2420
  **    Very, very rarely, this needs to be done repeatedly, since the
2421
  **    location of constants inside the IR (actually, reachability from
2422
  **    a global pointer) may affect register allocation and thus the
2423
  **    number of RENAMEs.
2424
  */
2425
  for (;;) {
5,027✔
2426
    as->mcp = as->mctop;
5,027✔
2427
#ifdef LUA_USE_ASSERT
2428
    as->mcp_prev = as->mcp;
2429
#endif
2430
    as->ir = J->curfinal->ir;  /* Use the copied IR. */
5,027✔
2431
    as->curins = J->cur.nins = as->orignins;
5,027✔
2432

2433
    RA_DBG_START();
5,027✔
2434
    RA_DBGX((as, "===== STOP ====="));
5,027✔
2435

2436
    /* General trace setup. Emit tail of trace. */
2437
    asm_tail_prep(as);
5,027✔
2438
    as->mcloop = NULL;
5,027✔
2439
    as->flagmcp = NULL;
5,027✔
2440
    as->topslot = 0;
5,027✔
2441
    as->gcsteps = 0;
5,027✔
2442
    as->sectref = as->loopref;
5,027✔
2443
    as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
5,027✔
2444
    asm_setup_regsp(as);
5,027✔
2445
    if (!as->loopref)
5,027✔
2446
      asm_tail_link(as);
2,593✔
2447

2448
    /* Assemble a trace in linear backwards order. */
2449
    for (as->curins--; as->curins > as->stopins; as->curins--) {
620,673✔
2450
      IRIns *ir = IR(as->curins);
615,697✔
2451
      /* 64 bit types handled by SPLIT for 32 bit archs. */
2452
      lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
615,697✔
2453
                 "IR %04d has unsplit 64 bit type",
2454
                 (int)(ir - as->ir) - REF_BIAS);
2455
      asm_snap_prev(as);
615,697✔
2456
      if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
615,697✔
2457
        continue;  /* Dead-code elimination can be soooo easy. */
13,599✔
2458
      if (irt_isguard(ir->t))
602,098✔
2459
        asm_snap_prep(as);
264,626✔
2460
      RA_DBG_REF();
602,098✔
2461
      checkmclim(as);
602,098✔
2462
      asm_ir(as, ir);
602,056✔
2463
    }
2464

2465
    if (as->realign && J->curfinal->nins >= T->nins)
4,976✔
2466
      continue;  /* Retry in case only the MCode needs to be realigned. */
1,103✔
2467

2468
    /* Emit head of trace. */
2469
    RA_DBG_REF();
3,873✔
2470
    checkmclim(as);
3,873✔
2471
    if (as->gcsteps > 0) {
3,873✔
2472
      as->curins = as->T->snap[0].ref;
410✔
2473
      asm_snap_prep(as);  /* The GC check is a guard. */
410✔
2474
      asm_gc_check(as);
410✔
2475
      as->curins = as->stopins;
410✔
2476
    }
2477
    ra_evictk(as);
3,873✔
2478
    if (as->parent)
3,873✔
2479
      asm_head_side(as);
1,715✔
2480
    else
2481
      asm_head_root(as);
2,158✔
2482
    asm_phi_fixup(as);
3,796✔
2483

2484
    if (J->curfinal->nins >= T->nins) {  /* IR didn't grow? */
3,796✔
2485
      lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth");
3,739✔
2486
      memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
3,739✔
2487
             (T->nins - as->orignins) * sizeof(IRIns));  /* Copy RENAMEs. */
3,739✔
2488
      T->nins = J->curfinal->nins;
3,739✔
2489
      /* Fill mcofs of any unprocessed snapshots. */
2490
      as->curins = REF_FIRST;
3,739✔
2491
      asm_snap_prev(as);
3,739✔
2492
      break;  /* Done. */
3,739✔
2493
    }
2494

2495
    /* Otherwise try again with a bigger IR. */
2496
    lj_trace_free(J2G(J), J->curfinal);
57✔
2497
    J->curfinal = NULL;  /* In case lj_trace_alloc() OOMs. */
57✔
2498
    J->curfinal = lj_trace_alloc(J->L, T);
57✔
2499
    as->realign = NULL;
57✔
2500
  }
2501

2502
  RA_DBGX((as, "===== START ===="));
3,739✔
2503
  RA_DBG_FLUSH();
3,739✔
2504
  if (as->freeset != RSET_ALL)
3,739✔
2505
    lj_trace_err(as->J, LJ_TRERR_BADRA);  /* Ouch! Should never happen. */
×
2506

2507
  /* Set trace entry point before fixing up tail to allow link to self. */
2508
  T->mcode = as->mcp;
3,739✔
2509
  T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
3,739✔
2510
  if (!as->loopref)
3,739✔
2511
    asm_tail_fixup(as, T->link);  /* Note: this may change as->mctop! */
2,475✔
2512
  T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
3,739✔
2513
  asm_snap_fixup_mcofs(as);
3,739✔
2514
#if LJ_TARGET_MCODE_FIXUP
2515
  asm_mcode_fixup(T->mcode, T->szmcode);
2516
#endif
2517
  lj_mcode_sync(T->mcode, as->mctoporig);
3,739✔
2518
}
3,739✔
2519

2520
#undef IR
2521

2522
#endif
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc