• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nickg / nvc / 20645692657

01 Jan 2026 09:14PM UTC coverage: 92.653% (+0.08%) from 92.573%
20645692657

push

github

nickg
Allocate virtual registers in MIR

Fixes #1259

160 of 161 new or added lines in 9 files covered. (99.38%)

579 existing lines in 12 files now uncovered.

76033 of 82062 relevant lines covered (92.65%)

466822.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.92
/src/jit/jit-optim.c
1
//
2
//  Copyright (C) 2022-2025  Nick Gasson
3
//
4
//  This program is free software: you can redistribute it and/or modify
5
//  it under the terms of the GNU General Public License as published by
6
//  the Free Software Foundation, either version 3 of the License, or
7
//  (at your option) any later version.
8
//
9
//  This program is distributed in the hope that it will be useful,
10
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
//  GNU General Public License for more details.
13
//
14
//  You should have received a copy of the GNU General Public License
15
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

18
#include "util.h"
19
#include "array.h"
20
#include "jit/jit-priv.h"
21

22
#include <assert.h>
23
#include <stdlib.h>
24
#include <string.h>
25
#include <limits.h>
26

27
////////////////////////////////////////////////////////////////////////////////
28
// Control flow graph construction
29

30
static bool cfg_is_terminator(jit_func_t *func, jit_ir_t *ir)
16,115,494✔
31
{
32
   if (ir->op == MACRO_CASE)
16,115,494✔
33
      return ir + 1 < func->irbuf + func->nirs && (ir + 1)->op != MACRO_CASE;
139,186✔
34
   else
35
      return ir->op == J_JUMP || ir->op == J_RET || ir->op == MACRO_REEXEC;
31,307,871✔
36
}
37

38
static void cfg_add_one_edge(jit_edge_list_t *list, unsigned edge)
246,020✔
39
{
40
   if (list->count < 4)
246,020✔
41
      list->u.edges[list->count++] = edge;
243,179✔
42
   else if (list->count == 4) {
2,841✔
43
      unsigned *ptr = xmalloc_array(16, sizeof(unsigned));
718✔
44
      memcpy(ptr, list->u.edges, 4 * sizeof(unsigned));
718✔
45

46
      list->max = 16;
718✔
47
      list->u.external = ptr;
718✔
48
      list->u.external[list->count++] = edge;
718✔
49
   }
50
   else if (list->count == list->max) {
2,123✔
51
      list->max *= 2;
49✔
52
      list->u.external =
98✔
53
         xrealloc_array(list->u.external, list->max, sizeof(unsigned));
49✔
54
      list->u.external[list->count++] = edge;
49✔
55
   }
56
   else
57
      list->u.external[list->count++] = edge;
2,074✔
58
}
246,020✔
59

60
static void cfg_add_edge(jit_cfg_t *cfg, jit_block_t *from, jit_block_t *to)
123,010✔
61
{
62
   cfg_add_one_edge(&(from->out), to - cfg->blocks);
123,010✔
63
   cfg_add_one_edge(&(to->in), from - cfg->blocks);
123,010✔
64
}
123,010✔
65

66
static jit_reg_t cfg_get_reg(jit_value_t value)
2,785,244✔
67
{
68
   switch (value.kind) {
2,785,244✔
69
   case JIT_VALUE_REG:
674,385✔
70
   case JIT_ADDR_REG:
71
      return value.reg;
674,385✔
72
   default:
73
      return JIT_REG_INVALID;
74
   }
75
}
76

77
static inline bool cfg_reads_result(jit_ir_t *ir)
2,913,336✔
78
{
79
   return ir->op == MACRO_COPY || ir->op == MACRO_CASE || ir->op == MACRO_BZERO
2,913,336✔
80
      || ir->op == MACRO_MOVE || ir->op == MACRO_MEMSET;
2,913,336✔
81
}
82

83
static inline bool cfg_writes_result(jit_ir_t *ir)
1,392,622✔
84
{
85
   return ir->result != JIT_REG_INVALID && ir->op != MACRO_CASE;
1,392,622✔
86
}
87

88
static void cfg_liveness(jit_cfg_t *cfg, jit_func_t *f)
16,386✔
89
{
90
   // Algorithm from "Engineering a Compiler" chapter 8.6
91

92
   for (int i = 0; i < cfg->nblocks; i++) {
135,701✔
93
      jit_block_t *b = &(cfg->blocks[i]);
119,315✔
94
      mask_init(&b->livein, f->nregs + 1);
119,315✔
95
      mask_init(&b->varkill, f->nregs + 1);
119,315✔
96
      mask_init(&b->liveout, f->nregs + 1);
119,315✔
97

98
      for (int j = b->first; j <= b->last; j++) {
1,511,937✔
99
         jit_ir_t *ir = &(f->irbuf[j]);
1,392,622✔
100

101
         jit_reg_t reg1 = cfg_get_reg(ir->arg1);
1,392,622✔
102
         if (reg1 != JIT_REG_INVALID && !mask_test(&b->varkill, reg1))
1,392,622✔
103
            mask_set(&b->livein, reg1);
113,991✔
104

105
         jit_reg_t reg2 = cfg_get_reg(ir->arg2);
1,392,622✔
106
         if (reg2 != JIT_REG_INVALID && !mask_test(&b->varkill, reg2))
1,392,622✔
107
            mask_set(&b->livein, reg2);
97,399✔
108

109
         if (cfg_reads_result(ir) && !mask_test(&b->varkill, ir->result))
1,392,622✔
110
            mask_set(&b->livein, ir->result);
321✔
111

112
         if (cfg_writes_result(ir))
1,392,622✔
113
            mask_set(&b->varkill, ir->result);
433,774✔
114

115
         if (jit_writes_flags(ir))
1,392,622✔
116
            mask_set(&b->varkill, f->nregs);
71,264✔
117
         else if (jit_reads_flags(ir) && !mask_test(&b->varkill, f->nregs))
1,321,358✔
118
            mask_set(&b->livein, f->nregs);
1,392,622✔
119
      }
120
   }
121

122
   bit_mask_t new, tmp;
16,386✔
123
   mask_init(&new, f->nregs + 1);
16,386✔
124
   mask_init(&tmp, f->nregs + 1);
16,386✔
125

126
   bool changed;
29,224✔
127
   do {
29,224✔
128
      changed = false;
29,224✔
129

130
      for (int i = cfg->nblocks - 1; i >= 0; i--) {
315,473✔
131
         jit_block_t *b = &(cfg->blocks[i]);
286,249✔
132
         mask_clearall(&new);
286,249✔
133

134
         for (int j = 0; j < b->out.count; j++) {
596,779✔
135
            jit_block_t *succ = &(cfg->blocks[jit_get_edge(&b->out, j)]);
310,530✔
136
            mask_copy(&tmp, &succ->liveout);
310,530✔
137
            mask_subtract(&tmp, &succ->varkill);
310,530✔
138
            mask_union(&tmp, &succ->livein);
310,530✔
139
            mask_union(&new, &tmp);
310,530✔
140
         }
141

142
         if (!mask_eq(&new, &b->liveout)) {
286,249✔
143
            mask_copy(&b->liveout, &new);
78,128✔
144
            changed = true;
78,128✔
145
         }
146
      }
147
   } while (changed);
29,224✔
148

149
   // Replaced "upward exposed variables" set with live-in
150
   for (int i = 0; i < cfg->nblocks; i++) {
135,701✔
151
      jit_block_t *b = &(cfg->blocks[i]);
119,315✔
152
      mask_copy(&tmp, &b->liveout);
119,315✔
153
      mask_subtract(&tmp, &b->varkill);
119,315✔
154
      mask_union(&b->livein, &tmp);
119,315✔
155
   }
156

157
   mask_free(&new);
16,386✔
158
   mask_free(&tmp);
16,386✔
159
}
16,386✔
160

161
jit_cfg_t *jit_get_cfg(jit_func_t *f)
16,386✔
162
{
163
   int nb = 1;
16,386✔
164
   for (int i = 0, first = 0; i < f->nirs; i++) {
1,409,008✔
165
      jit_ir_t *ir = &(f->irbuf[i]);
1,392,622✔
166
      if (ir->target && i > 0 && first != i)
1,392,622✔
167
         first = i, nb++;
31,054✔
168
      if (cfg_is_terminator(f, ir) && i + 1 < f->nirs)
1,392,622✔
169
         first = i + 1, nb++;
71,875✔
170
   }
171

172
   jit_cfg_t *cfg = xcalloc_flex(sizeof(jit_cfg_t), nb, sizeof(jit_block_t));
16,386✔
173
   cfg->nblocks = nb;
16,386✔
174

175
   jit_block_t *bb = cfg->blocks;
16,386✔
176
   for (int i = 0; i < f->nirs; i++) {
1,409,008✔
177
      jit_ir_t *ir = &(f->irbuf[i]);
1,392,622✔
178
      if (ir->target && i > 0 && bb->first != i) {
1,392,622✔
179
         if (!bb->returns && !bb->aborts)
31,054✔
180
            cfg_add_edge(cfg, bb, bb + 1);
13,798✔
181
         (++bb)->first = i;
31,054✔
182
      }
183

184
      bb->last = i;
1,392,622✔
185

186
      if (ir->op == J_RET)
1,392,622✔
187
         bb->returns = 1;
29,449✔
188
      else if (jit_will_abort(ir))
1,363,173✔
189
         bb->aborts = 1;
17,659✔
190

191
      if (cfg_is_terminator(f, ir) && i + 1 < f->nirs) {
1,392,622✔
192
         if ((ir->op == J_JUMP && ir->cc != JIT_CC_NONE)
71,875✔
193
             || ir->op == MACRO_CASE)
27,582✔
194
            cfg_add_edge(cfg, bb, bb + 1);   // Fall-through case
45,977✔
195
         (++bb)->first = i + 1;
71,875✔
196
      }
197
   }
198

199
   for (int i = 0; i < f->nirs; i++) {
1,409,008✔
200
      jit_ir_t *ir = &(f->irbuf[i]);
1,392,622✔
201
      jit_label_t label = JIT_LABEL_INVALID;
1,392,622✔
202

203
      if (ir->op == J_JUMP)
1,392,622✔
204
         label = ir->arg1.label;
56,396✔
205
      else if (ir->op == MACRO_CASE)
1,336,226✔
206
         label = ir->arg2.label;
6,839✔
207

208
      if (label != JIT_LABEL_INVALID) {
63,235✔
209
         assert(label < f->nirs);
63,235✔
210
         jit_block_t *from = jit_block_for(cfg, i);
63,235✔
211
         jit_block_t *to = jit_block_for(cfg, label);
63,235✔
212
         cfg_add_edge(cfg, from, to);
63,235✔
213
      }
214
   }
215

216
   cfg_liveness(cfg, f);
16,386✔
217

218
   return cfg;
16,386✔
219
}
220

221
void jit_free_cfg(jit_cfg_t *cfg)
16,381✔
222
{
223
   for (int i = 0; i < cfg->nblocks; i++) {
135,676✔
224
      jit_block_t *b = &(cfg->blocks[i]);
119,295✔
225
      mask_free(&b->livein);
119,295✔
226
      mask_free(&b->liveout);
119,295✔
227
      mask_free(&b->varkill);
119,295✔
228

229
      if (b->in.max > 4) free(b->in.u.external);
119,295✔
230
      if (b->out.max > 4) free(b->out.u.external);
119,295✔
231
   }
232

233
   free(cfg);
16,381✔
234
}
16,381✔
235

236
jit_block_t *jit_block_for(jit_cfg_t *cfg, int pos)
126,470✔
237
{
238
   for (int low = 0, high = cfg->nblocks - 1; low <= high; ) {
486,089✔
239
      const int mid = (low + high) / 2;
486,089✔
240
      jit_block_t *bb = &(cfg->blocks[mid]);
486,089✔
241

242
      if (bb->last < pos)
486,089✔
243
         low = mid + 1;
193,761✔
244
      else if (bb->first > pos)
292,328✔
245
         high = mid - 1;
165,858✔
246
      else
247
         return bb;
126,470✔
248
   }
249

250
   fatal_trace("operation %d is not in any block", pos);
251
}
252

253
int jit_get_edge(jit_edge_list_t *list, int nth)
552,607✔
254
{
255
   assert(nth < list->count);
552,607✔
256
   if (list->max <= 4)
552,607✔
257
      return list->u.edges[nth];
536,327✔
258
   else
259
      return list->u.external[nth];
16,280✔
260
}
261

262
////////////////////////////////////////////////////////////////////////////////
263
// Local value numbering and simple peepholes
264

265
#define FOR_ALL_JIT_SIZES(size, macro) do {             \
266
      switch (size) {                                   \
267
      case JIT_SZ_8: macro(int8_t); break;              \
268
      case JIT_SZ_16: macro(int16_t); break;            \
269
      case JIT_SZ_32: macro(int32_t); break;            \
270
      case JIT_SZ_UNSPEC:                               \
271
      case JIT_SZ_64: macro(int64_t); break;            \
272
      }                                                 \
273
   } while (0)
274

275
typedef unsigned valnum_t;
276

277
#define VN_INVALID  UINT_MAX
278
#define SMALL_CONST 100
279
#define MAX_CONSTS  32
280
#define FIRST_VN    (SMALL_CONST + MAX_CONSTS)
281
#define TRACK_ARGS  8
282

283
#define LVN_REG(r) ((jit_value_t){ .kind = JIT_VALUE_REG, .reg = (r) })
284
#define LVN_CONST(i) ((jit_value_t){ .kind = JIT_VALUE_INT64, .int64 = (i) })
285

286
typedef struct {
287
   jit_ir_t *ir;
288
   valnum_t  vn;
289
   int       tuple[3];
290
} lvn_tab_t;
291

292
typedef struct {
293
   jit_func_t *func;
294
   valnum_t   *regvn;
295
   valnum_t    nextvn;
296
   lvn_tab_t  *hashtab;
297
   size_t      tabsz;
298
   int64_t     consttab[MAX_CONSTS];
299
   unsigned    nconsts;
300
} lvn_state_t;
301

302
static void jit_lvn_mov(jit_ir_t *ir, lvn_state_t *state);
303

304
static inline valnum_t lvn_new_value(lvn_state_t *state)
979,339✔
305
{
306
   return state->nextvn++;
979,339✔
307
}
308

309
static bool lvn_get_const(valnum_t vn, lvn_state_t *state, int64_t *cval)
1,108,648✔
310
{
311
   if (vn < SMALL_CONST) {
1,108,648✔
312
      *cval = vn;
168,261✔
313
      return true;
168,261✔
314
   }
315
   else if (vn < SMALL_CONST + MAX_CONSTS) {
940,387✔
316
      *cval = state->consttab[vn - SMALL_CONST];
17,127✔
317
      return true;
17,127✔
318
   }
319
   else
320
      return false;
321
}
322

323
static bool lvn_is_const(jit_value_t value, lvn_state_t *state, int64_t *cval)
1,402,077✔
324
{
325
   switch (value.kind) {
1,402,077✔
326
   case JIT_VALUE_INT64:
429,952✔
327
      *cval = value.int64;
429,952✔
328
      return true;
429,952✔
329
   case JIT_VALUE_REG:
972,122✔
330
      return lvn_get_const(state->regvn[value.reg], state, cval);
972,122✔
331
   default:
332
      return false;
333
   }
334
}
335

336
static inline bool lvn_can_fold(jit_ir_t *ir, lvn_state_t *state,
604,800✔
337
                                int64_t *arg1, int64_t *arg2)
338
{
339
   return lvn_is_const(ir->arg1, state, arg1)
604,800✔
340
      && lvn_is_const(ir->arg2, state, arg2);
604,800✔
341
}
342

343
static void lvn_convert_mov(jit_ir_t *ir, lvn_state_t *state, jit_value_t value)
254,823✔
344
{
345
   ir->op        = J_MOV;
254,823✔
346
   ir->size      = JIT_SZ_UNSPEC;
254,823✔
347
   ir->cc        = JIT_CC_NONE;
254,823✔
348
   ir->arg1      = value;
254,823✔
349
   ir->arg2.kind = JIT_VALUE_INVALID;
254,823✔
350

351
   jit_lvn_mov(ir, state);
254,823✔
352
}
254,823✔
353

354
static void lvn_convert_nop(jit_ir_t *ir)
376,940✔
355
{
356
   ir->op        = J_NOP;
376,940✔
357
   ir->size      = JIT_SZ_UNSPEC;
376,940✔
358
   ir->cc        = JIT_CC_NONE;
376,940✔
359
   ir->result    = JIT_REG_INVALID;
376,940✔
360
   ir->arg1.kind = JIT_VALUE_INVALID;
376,940✔
361
   ir->arg2.kind = JIT_VALUE_INVALID;
376,940✔
362
}
376,940✔
363

364
static valnum_t lvn_value_num(jit_value_t value, lvn_state_t *state)
2,349,537✔
365
{
366
   switch (value.kind) {
2,349,537✔
367
   case JIT_VALUE_REG:
1,345,867✔
368
      if (state->regvn[value.reg] != VN_INVALID)
1,345,867✔
369
         return state->regvn[value.reg];
370
      else
371
         return (state->regvn[value.reg] = lvn_new_value(state));
497,034✔
372

373
   case JIT_VALUE_INT64:
505,266✔
374
      if (value.int64 >= 0 && value.int64 < SMALL_CONST)
505,266✔
375
         return value.int64;
431,190✔
376
      else {
377
         for (int i = 0; i < state->nconsts; i++) {
157,330✔
378
            if (state->consttab[i] == value.int64)
142,948✔
379
               return SMALL_CONST + i;
59,694✔
380
         }
381

382
         if (state->nconsts < MAX_CONSTS) {
14,382✔
383
            state->consttab[state->nconsts] = value.int64;
14,268✔
384
            return SMALL_CONST + state->nconsts++;
14,268✔
385
         }
386
         else
387
            return lvn_new_value(state);
114✔
388
      }
389

390
   case JIT_VALUE_INVALID:
391
      return VN_INVALID;
392

393
   case JIT_VALUE_HANDLE:
14,092✔
394
   case JIT_VALUE_DOUBLE:
395
      return lvn_new_value(state);
14,092✔
396

UNCOV
397
   default:
×
398
      fatal_trace("cannot handle value kind %d in lvn_value_num", value.kind);
399
   }
400
}
401

402
static inline bool lvn_is_commutative(jit_op_t op)
882,534✔
403
{
404
   return op == J_ADD || op == J_MUL || op == J_AND || op == J_OR;
882,534✔
405
}
406

407
static inline void lvn_kill_flags(lvn_state_t *state)
26,561✔
408
{
409
   state->regvn[state->func->nregs] = VN_INVALID;
26,561✔
410
}
26,561✔
411

412
static void lvn_kill_args(lvn_state_t *state)
194,922✔
413
{
414
   for (int i = 0; i < TRACK_ARGS; i++)
1,754,298✔
415
      state->regvn[state->func->nregs + i + 1] = VN_INVALID;
1,559,376✔
416
}
194,922✔
417

418
static void lvn_commute_const(jit_ir_t *ir, lvn_state_t *state)
167,381✔
419
{
420
   assert(lvn_is_commutative(ir->op));
167,381✔
421

422
   int64_t cval;
167,381✔
423
   if (lvn_is_const(ir->arg1, state, &cval)) {
167,381✔
424
      jit_value_t tmp = ir->arg1;
4,281✔
425
      ir->arg1 = ir->arg2;
4,281✔
426
      ir->arg2 = tmp;
4,281✔
427
   }
428
}
167,381✔
429

430
static void lvn_get_tuple(jit_ir_t *ir, lvn_state_t *state, int tuple[3])
715,153✔
431
{
432
   tuple[0] = ir->op | ir->size << 8 | ir->cc << 11;
715,153✔
433

434
   const valnum_t vn1 = lvn_value_num(ir->arg1, state);
715,153✔
435
   const valnum_t vn2 = lvn_value_num(ir->arg2, state);
715,153✔
436

437
   if (lvn_is_commutative(ir->op) && vn1 > vn2) {
715,153✔
438
      tuple[1] = vn2;
76,028✔
439
      tuple[2] = vn1;
76,028✔
440
   }
441
   else {
442
      tuple[1] = vn1;
639,125✔
443
      tuple[2] = vn2;
639,125✔
444
   }
445
}
715,153✔
446

447
static void jit_lvn_generic(jit_ir_t *ir, lvn_state_t *state, valnum_t vn)
715,153✔
448
{
449
   assert(ir->result != JIT_REG_INVALID);
715,153✔
450

451
   int tuple[3];
715,153✔
452
   lvn_get_tuple(ir, state, tuple);
715,153✔
453

454
   const uint32_t hash =
715,153✔
455
      mix_bits_32(tuple[0]*29 + tuple[1]*1093 + tuple[2]*6037);
715,153✔
456

457
   for (int idx = hash & (state->tabsz - 1), limit = 0, stale = -1; limit < 10;
844,288✔
458
        idx = (idx + 1) & (state->tabsz - 1), limit++) {
129,135✔
459

460
      lvn_tab_t *tab = &(state->hashtab[idx]);
844,288✔
461
      if (tab->ir == NULL) {
844,288✔
462
         if (stale >= 0)  // Reuse earlier stale entry if possible
608,665✔
463
            tab = &(state->hashtab[stale]);
107,173✔
464
         tab->ir = ir;
608,665✔
465
         tab->vn = state->regvn[ir->result] =
1,217,330✔
466
            (vn == VN_INVALID ? lvn_new_value(state) : vn);
608,665✔
467
         memcpy(tab->tuple, tuple, sizeof(tuple));
608,665✔
468
         return;
715,153✔
469
      }
470
      else if (tab->vn != state->regvn[tab->ir->result]) {
235,623✔
471
         // Stale entry may be reused if we do not find matching value
472
         stale = idx;
122,349✔
473
         continue;
122,349✔
474
      }
475
      else if (memcmp(tuple, tab->tuple, sizeof(tuple)) == 0) {
113,274✔
476
         assert(tab->ir->result != JIT_REG_INVALID);
106,488✔
477

478
         ir->op   = J_MOV;
106,488✔
479
         ir->size = JIT_SZ_UNSPEC;
106,488✔
480
         ir->cc   = JIT_CC_NONE;
106,488✔
481

482
         // Propagate constants where possible
483
         int64_t cval;
106,488✔
484
         if (lvn_get_const(state->regvn[tab->ir->result], state, &cval))
106,488✔
485
            ir->arg1 = LVN_CONST(cval);
56,629✔
486
         else
487
            ir->arg1 = LVN_REG(tab->ir->result);
49,859✔
488

489
         ir->arg2.kind = JIT_VALUE_INVALID;
106,488✔
490

491
         state->regvn[ir->result] = tab->vn;
106,488✔
492
         return;
106,488✔
493
      }
494
   }
495

UNCOV
496
   state->regvn[ir->result] = VN_INVALID;   // Reached iteration limit
×
497
}
498

499
static void jit_lvn_mul(jit_ir_t *ir, lvn_state_t *state)
77,814✔
500
{
501
   if (ir->cc != JIT_CC_NONE)
77,814✔
502
      lvn_kill_flags(state);
904✔
503

504
   int64_t lhs, rhs;
77,814✔
505
   if (lvn_can_fold(ir, state, &lhs, &rhs)) {
77,814✔
506
#define FOLD_MUL(type) do {                                             \
507
         type result;                                                   \
508
         if (!__builtin_mul_overflow(lhs, rhs, &result)) {              \
509
            lvn_convert_mov(ir, state, LVN_CONST(result));              \
510
            return;                                                     \
511
         }                                                              \
512
      } while (0)
513

514
      FOR_ALL_JIT_SIZES(ir->size, FOLD_MUL);
21,470✔
515
#undef FOLD_MUL
516
   }
517

518
   lvn_commute_const(ir, state);
56,345✔
519

520
   if (lvn_is_const(ir->arg2, state, &rhs)) {
56,345✔
521
      if (rhs == 0) {
55,810✔
522
         lvn_convert_mov(ir, state, LVN_CONST(0));
1✔
523
         return;
1✔
524
      }
525
      else if (rhs == 1) {
55,809✔
526
         lvn_convert_mov(ir, state, LVN_REG(ir->arg1.reg));
38,449✔
527
         return;
38,449✔
528
      }
529
      else if (rhs > 0 && is_power_of_2(rhs) && ir->size == JIT_SZ_UNSPEC) {
17,360✔
530
         ir->op = J_SHL;
14,404✔
531
         ir->arg2 = LVN_CONST(ilog2(rhs));
14,404✔
532
         jit_lvn_generic(ir, state, VN_INVALID);
14,404✔
533
         return;
14,404✔
534
      }
535
   }
536

537
   jit_lvn_generic(ir, state, VN_INVALID);
3,491✔
538
}
539

540
static void jit_lvn_div(jit_ir_t *ir, lvn_state_t *state)
1,317✔
541
{
542
   if (ir->cc != JIT_CC_NONE)
1,317✔
UNCOV
543
      lvn_kill_flags(state);
×
544

545
   int64_t lhs, rhs;
1,317✔
546
   if (lvn_can_fold(ir, state, &lhs, &rhs) && rhs != 0) {
1,317✔
547
      // XXX: potential bug here with INT_MIN/-1
548
#define FOLD_DIV(type) do {                                             \
549
         type result = (type)lhs / (type)rhs;                           \
550
         lvn_convert_mov(ir, state, LVN_CONST(result));                 \
551
         return;                                                        \
552
      } while (0)
553

554
      FOR_ALL_JIT_SIZES(ir->size, FOLD_DIV);
2✔
555
#undef FOLD_DIV
556
   }
557
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 1) {
1,316✔
558
      lvn_convert_mov(ir, state, LVN_REG(ir->arg1.reg));
1✔
559
      return;
1✔
560
   }
561

562
   jit_lvn_generic(ir, state, VN_INVALID);
1,315✔
563
}
564

565
static void jit_lvn_add(jit_ir_t *ir, lvn_state_t *state)
130,676✔
566
{
567
   if (ir->cc != JIT_CC_NONE)
130,676✔
568
      lvn_kill_flags(state);
4,751✔
569

570
   int64_t lhs, rhs;
130,676✔
571
   if (lvn_can_fold(ir, state, &lhs, &rhs)) {
130,676✔
572
#define FOLD_ADD(type) do {                                             \
573
         type result;                                                   \
574
         if (!__builtin_add_overflow(lhs, rhs, &result)) {              \
575
            lvn_convert_mov(ir, state, LVN_CONST(result));              \
576
            return;                                                     \
577
         }                                                              \
578
      } while (0)
579

580
      FOR_ALL_JIT_SIZES(ir->size, FOLD_ADD);
19,640✔
581
#undef FOLD_ADD
582
   }
583

584
   lvn_commute_const(ir, state);
111,036✔
585

586
   if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0) {
111,036✔
587
      lvn_convert_mov(ir, state, LVN_REG(ir->arg1.reg));
4,975✔
588
      return;
4,975✔
589
   }
590

591
   jit_lvn_generic(ir, state, VN_INVALID);
106,061✔
592
}
593

594
static void jit_lvn_sub(jit_ir_t *ir, lvn_state_t *state)
99,782✔
595
{
596
   if (ir->cc != JIT_CC_NONE)
99,782✔
597
      lvn_kill_flags(state);
1,649✔
598

599
   int64_t lhs, rhs;
99,782✔
600
   if (lvn_can_fold(ir, state, &lhs, &rhs)) {
99,782✔
601
#define FOLD_SUB(type) do {                                             \
602
         type result;                                                   \
603
         if (!__builtin_sub_overflow(lhs, rhs, &result)) {              \
604
            lvn_convert_mov(ir, state, LVN_CONST(result));              \
605
            return;                                                     \
606
         }                                                              \
607
      } while (0)
608

609
      FOR_ALL_JIT_SIZES(ir->size, FOLD_SUB);
38,912✔
610
#undef FOLD_SUB
611
   }
612

613
   if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0) {
60,870✔
614
      lvn_convert_mov(ir, state, LVN_REG(ir->arg1.reg));
7,760✔
615
      return;
7,760✔
616
   }
617
   else if (lvn_is_const(ir->arg1, state, &lhs) && lhs == 0
53,110✔
618
            && ir->cc == JIT_CC_NONE && ir->size == JIT_SZ_UNSPEC) {
4,867✔
619
     ir->op        = J_NEG;
4,866✔
620
     ir->arg1      = ir->arg2;
4,866✔
621
     ir->arg2.kind = JIT_VALUE_INVALID;
4,866✔
622
     jit_lvn_generic(ir, state, VN_INVALID);
4,866✔
623
     return;
4,866✔
624
   }
625

626
   jit_lvn_generic(ir, state, VN_INVALID);
48,244✔
627
}
628

629
static void jit_lvn_neg(jit_ir_t *ir, lvn_state_t *state)
33,237✔
630
{
631
   int64_t value;
33,237✔
632
   if (lvn_is_const(ir->arg1, state, &value))
33,237✔
633
      lvn_convert_mov(ir, state, LVN_CONST(-value));
28,518✔
634
   else
635
      jit_lvn_generic(ir, state, VN_INVALID);
4,719✔
636
}
33,237✔
637

638
static void jit_lvn_or(jit_ir_t *ir, lvn_state_t *state)
7,458✔
639
{
640
   int64_t lhs, rhs;
7,458✔
641
   if (lvn_can_fold(ir, state, &lhs, &rhs))
7,458✔
642
      lvn_convert_mov(ir, state, LVN_CONST(lhs | rhs));
1,298✔
643
   else if (lvn_is_const(ir->arg1, state, &lhs) && lhs == 0)
6,160✔
644
      lvn_convert_mov(ir, state, ir->arg2);
81✔
645
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
6,079✔
646
      lvn_convert_mov(ir, state, ir->arg1);
1,581✔
647
   else
648
      jit_lvn_generic(ir, state, VN_INVALID);
4,498✔
649
}
7,458✔
650

651
static void jit_lvn_xor(jit_ir_t *ir, lvn_state_t *state)
53,284✔
652
{
653
   int64_t lhs, rhs;
53,284✔
654
   if (lvn_can_fold(ir, state, &lhs, &rhs))
53,284✔
655
      lvn_convert_mov(ir, state, LVN_CONST(lhs ^ rhs));
19,388✔
656
   else if (lvn_is_const(ir->arg1, state, &lhs) && lhs == 0)
33,896✔
657
      lvn_convert_mov(ir, state, ir->arg2);
6,055✔
658
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
27,841✔
659
      lvn_convert_mov(ir, state, ir->arg1);
1✔
660
   else
661
      jit_lvn_generic(ir, state, VN_INVALID);
27,840✔
662
}
53,284✔
663

664
static void jit_lvn_shl(jit_ir_t *ir, lvn_state_t *state)
2,204✔
665
{
666
   int64_t lhs, rhs;
2,204✔
667
   if (lvn_can_fold(ir, state, &lhs, &rhs) && rhs >= 0 && rhs < 64)
2,204✔
668
      lvn_convert_mov(ir, state, LVN_CONST(lhs << rhs));
841✔
669
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
1,363✔
670
      lvn_convert_mov(ir, state, ir->arg1);
88✔
671
   else
672
      jit_lvn_generic(ir, state, VN_INVALID);
1,275✔
673
}
2,204✔
674

675
static void jit_lvn_shr(jit_ir_t *ir, lvn_state_t *state)
596✔
676
{
677
   int64_t lhs, rhs;
596✔
678
   if (lvn_can_fold(ir, state, &lhs, &rhs) && rhs >= 0 && rhs < 64)
596✔
679
      lvn_convert_mov(ir, state, LVN_CONST(lhs >> rhs));
406✔
680
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
190✔
681
      lvn_convert_mov(ir, state, ir->arg1);
133✔
682
   else
683
      jit_lvn_generic(ir, state, VN_INVALID);
57✔
684
}
596✔
685

686
static void jit_lvn_asr(jit_ir_t *ir, lvn_state_t *state)
23,348✔
687
{
688
   int64_t lhs, rhs;
23,348✔
689
   if (lvn_can_fold(ir, state, &lhs, &rhs) && rhs >= 0 && rhs < 64)
23,348✔
690
      lvn_convert_mov(ir, state, LVN_CONST(lhs >> rhs));
31✔
691
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
23,317✔
692
      lvn_convert_mov(ir, state, ir->arg1);
1✔
693
   else
694
      jit_lvn_generic(ir, state, VN_INVALID);
23,316✔
695
}
23,348✔
696

697
static void jit_lvn_mov(jit_ir_t *ir, lvn_state_t *state)
532,907✔
698
{
699
   if (ir->arg1.kind == JIT_VALUE_REG && ir->arg1.reg == ir->result) {
532,907✔
700
      lvn_convert_nop(ir);
2,007✔
701
      return;
2,007✔
702
   }
703

704
   valnum_t vn = lvn_value_num(ir->arg1, state);
530,900✔
705
   if (state->regvn[ir->result] == vn) {
530,900✔
706
      // Result register already contains this value
707
      lvn_convert_nop(ir);
75,495✔
708
      return;
75,495✔
709
   }
710

711
   jit_lvn_generic(ir, state, vn);
455,405✔
712
}
713

714
static void jit_lvn_cmp(jit_ir_t *ir, lvn_state_t *state)
208,319✔
715
{
716
   int64_t lhs, rhs;
208,319✔
717
   if (lvn_can_fold(ir, state, &lhs, &rhs)) {
208,319✔
718
      bool result = false;
39,213✔
719
      switch (ir->cc) {
39,213✔
720
      case JIT_CC_EQ: result = (lhs == rhs); break;
37,437✔
721
      case JIT_CC_NE: result = (lhs != rhs); break;
741✔
UNCOV
722
      case JIT_CC_LT: result = (lhs < rhs); break;
×
723
      case JIT_CC_GT: result = (lhs > rhs); break;
40✔
724
      case JIT_CC_LE: result = (lhs <= rhs); break;
30✔
725
      case JIT_CC_GE: result = (lhs >= rhs); break;
965✔
UNCOV
726
      default:
×
727
         fatal_trace("unhandled condition code in jit_lvn_cmp");
728
      }
729

730
      state->regvn[state->func->nregs] = result;
39,213✔
731
   }
732
   else
733
      state->regvn[state->func->nregs] = VN_INVALID;
169,106✔
734
}
208,319✔
735

736
static void jit_lvn_ccmp(jit_ir_t *ir, lvn_state_t *state)
19,889✔
737
{
738
   const int fconst = state->regvn[state->func->nregs];
19,889✔
739
   if (fconst == 0)
19,889✔
740
      lvn_convert_nop(ir);
3✔
741
   else if (fconst != VN_INVALID) {
19,886✔
742
      ir->op = J_CMP;
968✔
743
      jit_lvn_cmp(ir, state);
968✔
744
   }
745
   else
746
      lvn_kill_flags(state);
18,918✔
747
}
19,889✔
748

749
static void jit_lvn_csel(jit_ir_t *ir, lvn_state_t *state)
95,008✔
750
{
751
   const int fconst = state->regvn[state->func->nregs];
95,008✔
752
   if (fconst != VN_INVALID)
95,008✔
753
      lvn_convert_mov(ir, state, fconst ? ir->arg1 : ir->arg2);
45,978✔
754
   else
755
      state->regvn[ir->result] = VN_INVALID;
49,030✔
756
}
95,008✔
757

758
static void jit_lvn_cset(jit_ir_t *ir, lvn_state_t *state)
56,599✔
759
{
760
   const int fconst = state->regvn[state->func->nregs];
56,599✔
761
   if (fconst != VN_INVALID)
56,599✔
762
      lvn_convert_mov(ir, state, LVN_CONST(fconst));
7✔
763
   else
764
      state->regvn[ir->result] = VN_INVALID;
56,592✔
765
}
56,599✔
766

767
static void jit_lvn_jump(jit_ir_t *ir, lvn_state_t *state)
179,393✔
768
{
769
   assert(ir->arg1.label < state->func->nirs);
179,393✔
770
   jit_ir_t *dest = &(state->func->irbuf[ir->arg1.label]);
179,393✔
771

772
   const int fconst = state->regvn[state->func->nregs];
179,393✔
773
   if (ir->cc != JIT_CC_NONE && fconst != VN_INVALID) {
179,393✔
774
      if (fconst == (ir->cc == JIT_CC_T))
926✔
775
         ir->cc = JIT_CC_NONE;
89✔
776
      else {
777
         lvn_convert_nop(ir);
837✔
778
         return;
837✔
779
      }
780
   }
781

782
   if (dest == ir + 1)
178,556✔
783
      lvn_convert_nop(ir);
17,340✔
784
   else if (dest->op == J_JUMP && dest->cc == JIT_CC_NONE)
161,216✔
785
      ir->arg1 = dest->arg1;     // Simple jump threading
842✔
786
}
787

788
static void jit_lvn_clamp(jit_ir_t *ir, lvn_state_t *state)
38,529✔
789
{
790
   int64_t value;
38,529✔
791
   if (lvn_is_const(ir->arg1, state, &value))
38,529✔
792
      lvn_convert_mov(ir, state, LVN_CONST(value < 0 ? 0 : value));
19,207✔
793
   else
794
      jit_lvn_generic(ir, state, VN_INVALID);
19,322✔
795
}
38,529✔
796

797
static void jit_lvn_recv(jit_ir_t *ir, lvn_state_t *state)
184,031✔
798
{
799
   const unsigned nth = ir->arg1.int64;
184,031✔
800
   if (nth >= TRACK_ARGS)
184,031✔
801
      return;
802

803
   const unsigned vreg = state->func->nregs + 1 + nth;
182,952✔
804

805
   if (state->regvn[vreg] != VN_INVALID)
182,952✔
806
      state->regvn[ir->result] = state->regvn[vreg];
1✔
807
   else {
808
      valnum_t vn = lvn_new_value(state);
182,951✔
809
      state->regvn[vreg] = vn;
182,951✔
810
      state->regvn[ir->result] = vn;
182,951✔
811
   }
812
}
813

814
static void jit_lvn_send(jit_ir_t *ir, lvn_state_t *state)
883,790✔
815
{
816
   const unsigned nth = ir->arg1.int64;
883,790✔
817
   if (nth >= TRACK_ARGS)
883,790✔
818
      return;
819
   else if (ir->arg2.kind != JIT_VALUE_REG)
877,559✔
820
      return;
821

822
   const unsigned vreg = state->func->nregs + 1 + nth;
388,331✔
823

824
   valnum_t vn = lvn_value_num(ir->arg2, state);
388,331✔
825
   if (vn != VN_INVALID && state->regvn[vreg] == vn)
388,331✔
826
      lvn_convert_nop(ir);   // Already in argument slot
21,307✔
827
}
828

829
static void jit_lvn_copy(jit_ir_t *ir, lvn_state_t *state)
24,645✔
830
{
831
   if (ir->op == MACRO_MOVE && ir->arg2.kind == JIT_ADDR_CPOOL)
24,645✔
832
      ir->op = MACRO_COPY;   // Cannot overlap when copying from constant pool
14,606✔
833

834
   int64_t count;
24,645✔
835
   if (lvn_get_const(state->regvn[ir->result], state, &count)) {
24,645✔
836
      if (count == 0) {
18,683✔
837
         lvn_convert_nop(ir);
1✔
838
         return;
1,907✔
839
      }
840
      else if (count > 0 && count <= 8 && is_power_of_2(count)
18,682✔
841
               && ir->arg2.kind == JIT_ADDR_CPOOL) {
2,772✔
842
         // Replace a small copy from the constant pool with a store
843
         const void *src = state->func->cpool + ir->arg2.int64;
1,905✔
844
         ir->op = J_STORE;
1,905✔
845
         ir->arg2 = ir->arg1;
1,905✔
846
         ir->result = JIT_REG_INVALID;
1,905✔
847

848
         switch (count) {
1,905✔
849
         case 8:
642✔
850
            ir->size = JIT_SZ_64;
642✔
851
            ir->arg1 = LVN_CONST(unaligned_load(src, uint64_t));
642✔
852
            break;
642✔
853
         case 4:
521✔
854
            ir->size = JIT_SZ_32;
521✔
855
            ir->arg1 = LVN_CONST(unaligned_load(src, uint32_t));
521✔
856
            break;
521✔
857
         case 2:
458✔
858
            ir->size = JIT_SZ_16;
458✔
859
            ir->arg1 = LVN_CONST(unaligned_load(src, uint16_t));
458✔
860
            break;
458✔
861
         case 1:
284✔
862
            ir->size = JIT_SZ_8;
284✔
863
            ir->arg1 = LVN_CONST(*(uint8_t *)src);
284✔
864
            break;
284✔
UNCOV
865
         default:
×
UNCOV
866
            jit_dump_with_mark(state->func, ir - state->func->irbuf);
×
867
            fatal_trace("unhandled constant copy from constant pool");
868
         }
869

870
         return;
1,905✔
871
      }
872
   }
873

874
   // Clobbers the count register
875
   state->regvn[ir->result] = lvn_new_value(state);
22,739✔
876
}
877

878
static void jit_lvn_bzero(jit_ir_t *ir, lvn_state_t *state)
3,835✔
879
{
880
   int64_t count;
3,835✔
881
   if (lvn_get_const(state->regvn[ir->result], state, &count)) {
3,835✔
882
      if (count == 0) {
1,485✔
883
         lvn_convert_nop(ir);
1✔
884
         return;
397✔
885
      }
886
      else if (count > 0 && count <= 8 && is_power_of_2(count)) {
1,484✔
887
         // Convert to a store of zero
888
         ir->op     = J_STORE;
395✔
889
         ir->arg2   = ir->arg1;
395✔
890
         ir->arg1   = LVN_CONST(0);
395✔
891
         ir->result = JIT_REG_INVALID;
395✔
892
         ir->size   = ilog2(count);
395✔
893
         return;
395✔
894
      }
895
   }
896

897
   // Clobbers the count register
898
   state->regvn[ir->result] = lvn_new_value(state);
3,439✔
899
}
900

901
static void jit_lvn_memset(jit_ir_t *ir, lvn_state_t *state)
1,612✔
902
{
903
   int64_t value;
1,612✔
904
   if (lvn_is_const(ir->arg2, state, &value)) {
1,612✔
905
      static const uint64_t compress[JIT_SZ_UNSPEC] = {
1,565✔
906
         0x01, 0x0101, 0x01010101, 0x0101010101010101
907
      };
908
      static const uint64_t expand[JIT_SZ_UNSPEC] = {
1,565✔
909
         0x0101010101010101, 0x0001000100010001, 0x100000001, 1,
910
      };
911
      static const uint64_t mask[JIT_SZ_UNSPEC] = {
1,565✔
912
         0xff, 0xffff, 0xffffffff, 0xffffffffffffffff
913
      };
914

915
      uint64_t bits = value;
1,565✔
916
      if (value == 0) {
1,565✔
917
         ir->op = MACRO_BZERO;
7✔
918
         ir->size = JIT_SZ_UNSPEC;
7✔
919
         ir->arg2.kind = JIT_VALUE_INVALID;
7✔
920
         jit_lvn_bzero(ir, state);
7✔
921
         return;
149✔
922
      }
923
      else if (bits == (bits & 0xff) * compress[ir->size]) {
1,558✔
924
         ir->size = JIT_SZ_8;
650✔
925
         ir->arg2 = LVN_CONST((bits &= 0xff));
650✔
926
      }
927

928
      int64_t bytes;
1,558✔
929
      if (lvn_get_const(state->regvn[ir->result], state, &bytes)) {
1,558✔
930
         if (bytes == 0) {
952✔
931
            lvn_convert_nop(ir);
1✔
932
            return;
1✔
933
         }
934
         else if (bytes > 0 && bytes <= 8 && is_power_of_2(bytes)) {
951✔
935
            // Convert to a store
936
            const jit_size_t newsz = ilog2(bytes);
134✔
937
            ir->op     = J_STORE;
134✔
938
            ir->arg2   = ir->arg1;
134✔
939
            ir->arg1   = LVN_CONST((bits * expand[ir->size]) & mask[newsz]);
134✔
940
            ir->result = JIT_REG_INVALID;
134✔
941
            ir->size   = newsz;
134✔
942
            return;
134✔
943
         }
944
      }
945
   }
946

947
   // Clobbers the count register
948
   state->regvn[ir->result] = lvn_new_value(state);
1,470✔
949
}
950

951
static void jit_lvn_exp(jit_ir_t *ir, lvn_state_t *state)
341✔
952
{
953
   int64_t base, exp;
341✔
954
   if (ir->cc != JIT_CC_NONE) {
341✔
955
      lvn_kill_flags(state);
339✔
956
      jit_lvn_generic(ir, state, VN_INVALID);
339✔
957
   }
958
   else if (lvn_can_fold(ir, state, &base, &exp))
2✔
959
      lvn_convert_mov(ir, state, LVN_CONST(ipow(base, exp)));
1✔
960
   else if (lvn_is_const(ir->arg1, state, &base) && base == 2) {
1✔
961
      ir->op = J_SHL;
1✔
962
      ir->arg1.int64 = 1;
1✔
963
      jit_lvn_generic(ir, state, VN_INVALID);
1✔
964
   }
965
   else
UNCOV
966
      jit_lvn_generic(ir, state, VN_INVALID);
×
967
}
341✔
968

969
void jit_do_lvn(jit_func_t *f)
42,257✔
970
{
971
   lvn_state_t state = {
42,257✔
972
      .tabsz  = next_power_of_2(f->nirs),
42,257✔
973
      .func   = f,
974
      .nextvn = FIRST_VN
975
   };
976

977
   state.regvn = xmalloc_array(f->nregs + 1 + TRACK_ARGS, sizeof(valnum_t));
42,257✔
978
   state.hashtab = xcalloc_array(state.tabsz, sizeof(lvn_tab_t));
42,257✔
979

980
   bool reset = true;
42,257✔
981
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs;
4,504,138✔
982
        reset = cfg_is_terminator(f, ir), ir++) {
4,461,881✔
983

984
      if (reset || ir->target) {
4,461,881✔
985
         for (int j = 0; j < f->nregs + 1 + TRACK_ARGS; j++)
26,870,504✔
986
            state.regvn[j] = VN_INVALID;
26,532,549✔
987
      }
988

989
      switch (ir->op) {
4,461,881✔
990
      case J_MUL: jit_lvn_mul(ir, &state); break;
77,814✔
991
      case J_DIV: jit_lvn_div(ir, &state); break;
1,317✔
992
      case J_ADD: jit_lvn_add(ir, &state); break;
130,676✔
993
      case J_SUB: jit_lvn_sub(ir, &state); break;
99,782✔
994
      case J_NEG: jit_lvn_neg(ir, &state); break;
33,237✔
995
      case J_OR:  jit_lvn_or(ir, &state); break;
7,458✔
996
      case J_XOR: jit_lvn_xor(ir, &state); break;
53,284✔
997
      case J_SHL: jit_lvn_shl(ir, &state); break;
2,204✔
998
      case J_SHR: jit_lvn_shr(ir, &state); break;
596✔
999
      case J_ASR: jit_lvn_asr(ir, &state); break;
23,348✔
1000
      case J_MOV: jit_lvn_mov(ir, &state); break;
278,084✔
1001
      case J_CMP: jit_lvn_cmp(ir, &state); break;
207,351✔
1002
      case J_CCMP: jit_lvn_ccmp(ir, &state); break;
19,889✔
1003
      case J_CSEL: jit_lvn_csel(ir, &state); break;
95,008✔
1004
      case J_CSET: jit_lvn_cset(ir, &state); break;
56,599✔
1005
      case J_JUMP: jit_lvn_jump(ir, &state); break;
179,393✔
1006
      case J_CLAMP: jit_lvn_clamp(ir, &state); break;
38,529✔
1007
      case J_RECV: jit_lvn_recv(ir, &state); break;
184,031✔
1008
      case J_SEND: jit_lvn_send(ir, &state); break;
883,790✔
1009
      case MACRO_MOVE:
24,645✔
1010
      case MACRO_COPY: jit_lvn_copy(ir, &state); break;
24,645✔
1011
      case MACRO_BZERO: jit_lvn_bzero(ir, &state); break;
3,828✔
1012
      case MACRO_MEMSET: jit_lvn_memset(ir, &state); break;
1,612✔
1013
      case MACRO_EXP: jit_lvn_exp(ir, &state); break;
341✔
1014
      case MACRO_EXIT:
194,922✔
1015
      case MACRO_VEC4OP:
1016
      case MACRO_VEC2OP:
1017
      case MACRO_PACK:
1018
      case MACRO_UNPACK:
1019
      case J_CALL:
1020
         lvn_kill_args(&state);
194,922✔
1021
         // Fall-through
1022
      default:
2,059,065✔
1023
         if (jit_writes_flags(ir))
2,059,065✔
1024
            state.regvn[f->nregs] = VN_INVALID;
2,985✔
1025
         if (ir->result != JIT_REG_INVALID)
2,059,065✔
1026
            state.regvn[ir->result] = VN_INVALID;
488,338✔
1027
      }
1028
   }
1029

1030
   free(state.regvn);
42,257✔
1031
   free(state.hashtab);
42,257✔
1032
}
42,257✔
1033

1034
////////////////////////////////////////////////////////////////////////////////
1035
// Copy propagation
1036

1037
void jit_do_cprop(jit_func_t *f)
42,246✔
1038
{
1039
   jit_value_t *map LOCAL = xmalloc_array(f->nregs, sizeof(jit_value_t));
84,492✔
1040

1041
   bool reset = true;
42,246✔
1042
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs;
4,504,010✔
1043
        reset = cfg_is_terminator(f, ir), ir++) {
4,461,764✔
1044

1045
      if (reset || ir->target) {
4,461,764✔
1046
         for (int j = 0; j < f->nregs; j++)
23,828,713✔
1047
            map[j].kind = JIT_VALUE_INVALID;
23,490,777✔
1048
      }
1049

1050
      if (ir->arg1.kind == JIT_VALUE_REG) {
4,461,764✔
1051
         jit_value_t copy = map[ir->arg1.reg];
905,530✔
1052
         if (copy.kind != JIT_VALUE_INVALID)
905,530✔
1053
            ir->arg1 = copy;
211,125✔
1054
      }
1055

1056
      if (ir->arg2.kind == JIT_VALUE_REG) {
4,461,764✔
1057
         jit_value_t copy = map[ir->arg2.reg];
684,199✔
1058
         if (copy.kind != JIT_VALUE_INVALID)
684,199✔
1059
            ir->arg2 = copy;
149,787✔
1060
      }
1061

1062
      if (ir->result == JIT_REG_INVALID)
4,461,764✔
1063
         continue;
2,941,045✔
1064
      else if (ir->op == MACRO_MEMSET || ir->op == MACRO_COPY
1,520,719✔
1065
               || ir->op == MACRO_MOVE || ir->op == MACRO_BZERO)
1,506,552✔
1066
         continue;   // Does not write to result
27,641✔
1067
      else if (map[ir->result].kind != JIT_VALUE_INVALID) {
1,493,078✔
1068
         // Invalidate any existing copies of this register
1069
         for (int j = 0; j < f->nregs; j++) {
17,703,376✔
1070
            if (map[j].kind == JIT_VALUE_REG && map[j].reg == ir->result)
17,492,576✔
1071
               map[j].kind = JIT_VALUE_INVALID;
134,593✔
1072
         }
1073
      }
1074

1075
      if (ir->op == J_MOV) {
1,493,078✔
1076
         map[ir->result] = ir->arg1;
457,608✔
1077
         if (ir->arg1.kind == JIT_VALUE_REG)
457,608✔
1078
            map[ir->arg1.reg] = ir->arg1;
255,205✔
1079
      }
1080
      else
1081
         map[ir->result].kind = JIT_VALUE_INVALID;
1,035,470✔
1082
   }
1083
}
42,246✔
1084

1085
////////////////////////////////////////////////////////////////////////////////
1086
// Dead code elimination
1087

1088
typedef struct {
1089
   int       *count;
1090
   jit_reg_t *renumber;
1091
   jit_reg_t  next;
1092
} dce_state_t;
1093

1094
static void dce_count_use(jit_value_t value, dce_state_t *state)
8,923,526✔
1095
{
1096
   if (value.kind == JIT_VALUE_REG || value.kind == JIT_ADDR_REG) {
8,923,526✔
1097
      if (state->count[value.reg]++ == 0)
2,031,731✔
1098
         state->renumber[value.reg] = state->next++;
528,051✔
1099
   }
1100
}
8,923,526✔
1101

1102
static void dce_renumber(jit_value_t *value, dce_state_t *state)
8,923,526✔
1103
{
1104
   if (value->kind == JIT_VALUE_REG || value->kind == JIT_ADDR_REG)
8,923,526✔
1105
      value->reg = state->renumber[value->reg];
2,031,731✔
1106
}
8,923,526✔
1107

1108
void jit_do_dce(jit_func_t *f)
42,246✔
1109
{
1110
   dce_state_t state = {};
42,246✔
1111
   state.count = xcalloc_array(f->nregs, sizeof(int));
42,246✔
1112
   state.renumber = xcalloc_array(f->nregs, sizeof(jit_reg_t));
42,246✔
1113

1114
#ifdef DEBUG
1115
   for (int i = 0; i < f->nregs; i++)
732,408✔
1116
      state.renumber[i] = JIT_REG_INVALID;
690,162✔
1117
#endif
1118

1119
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs; ir++) {
4,504,009✔
1120
      dce_count_use(ir->arg1, &state);
4,461,763✔
1121
      dce_count_use(ir->arg2, &state);
4,461,763✔
1122

1123
      const bool must_keep_result =
8,923,526✔
1124
         ir->result != JIT_REG_INVALID &&
4,461,763✔
1125
         (cfg_reads_result(ir) || jit_writes_flags(ir));
1,520,714✔
1126

1127
      if (must_keep_result && state.count[ir->result]++ == 0)
4,461,763✔
1128
         state.renumber[ir->result] = state.next++;
11,418✔
1129
   }
1130

1131
   for (jit_ir_t *ir = f->irbuf, *cmp = NULL; ir < f->irbuf + f->nirs; ir++) {
4,504,009✔
1132
      dce_renumber(&ir->arg1, &state);
4,461,763✔
1133
      dce_renumber(&ir->arg2, &state);
4,461,763✔
1134

1135
      if (jit_reads_flags(ir) || cfg_is_terminator(f, ir))
4,461,763✔
1136
         cmp = NULL;   // Consumed flags
1137
      else if (jit_writes_flags(ir)) {
4,094,237✔
1138
         if (cmp != NULL)
218,240✔
1139
            lvn_convert_nop(cmp);   // Flags are never read
31,324✔
1140
         if (ir->op == J_CMP || ir->op == J_FCMP)
218,240✔
1141
            cmp = ir;
210,598✔
1142
         else
1143
            cmp = NULL;
1144
      }
1145

1146
      if (ir->result == JIT_REG_INVALID)
4,461,763✔
1147
         continue;
2,941,049✔
1148
      else if (state.count[ir->result] == 0 && !jit_writes_flags(ir))
1,520,714✔
1149
         lvn_convert_nop(ir);
228,624✔
1150
      else
1151
         ir->result = state.renumber[ir->result];
1,292,090✔
1152
   }
1153

1154
   f->nregs = state.next;
42,246✔
1155

1156
   free(state.count);
42,246✔
1157
   free(state.renumber);
42,246✔
1158
}
42,246✔
1159

1160
////////////////////////////////////////////////////////////////////////////////
1161
// NOP deletion
1162

1163
void jit_delete_nops(jit_func_t *f)
42,246✔
1164
{
1165
   jit_label_t *map LOCAL = xmalloc_array(f->nirs, sizeof(jit_label_t));
42,246✔
1166

1167
   int wptr = 0;
42,246✔
1168
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs; ir++) {
4,504,011✔
1169
      map[ir - f->irbuf] = wptr;
4,461,765✔
1170

1171
      if (ir->op != J_NOP) {
4,461,765✔
1172
         jit_ir_t *dest = f->irbuf + wptr++;
4,084,789✔
1173
         if (dest != ir)
4,084,789✔
1174
            *dest = *ir;
2,998,024✔
1175
      }
1176
   }
1177

1178
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs; ir++) {
4,504,011✔
1179
      if (ir->arg1.kind == JIT_VALUE_LABEL) {
4,461,765✔
1180
         ir->arg1.label = map[ir->arg1.label];
176,893✔
1181
         f->irbuf[ir->arg1.label].target = 1;
176,893✔
1182
      }
1183
      if (ir->arg2.kind == JIT_VALUE_LABEL) {
4,461,765✔
1184
         ir->arg2.label = map[ir->arg2.label];
21,668✔
1185
         f->irbuf[ir->arg2.label].target = 1;
21,668✔
1186
      }
1187
   }
1188

1189
   f->nirs = wptr;
42,246✔
1190
}
42,246✔
1191

1192
////////////////////////////////////////////////////////////////////////////////
1193
// Memory to register promotion
1194

1195
static inline jit_reg_t get_value_reg(jit_value_t value)
2,068,544✔
1196
{
1197
   switch (value.kind) {
2,068,544✔
1198
   case JIT_ADDR_REG:
614,333✔
1199
   case JIT_VALUE_REG:
1200
      return value.reg;
614,333✔
1201
   default:
1202
      return JIT_REG_INVALID;
1203
   }
1204
}
1205

1206
static bool try_promote_allocation(jit_func_t *f, jit_ir_t *alloc)
16,808✔
1207
{
1208
   assert(alloc->arg2.kind == JIT_VALUE_INT64);
16,808✔
1209

1210
   if (alloc->arg2.int64 > 8 || !is_power_of_2(alloc->arg2.int64))
16,808✔
1211
      return false;
1212

1213
   jit_size_t sz = JIT_SZ_UNSPEC;
12,498✔
1214
   switch (alloc->arg2.int64) {
12,498✔
1215
   case 1: sz = JIT_SZ_8; break;
1216
   case 2: sz = JIT_SZ_16; break;
1217
   case 4: sz = JIT_SZ_32; break;
1218
   case 8: sz = JIT_SZ_64; break;
1219
   }
1220

1221
   jit_reg_t reg = alloc->result;
12,498✔
1222
   assert(reg != JIT_REG_INVALID);
12,498✔
1223

1224
   int first = -1, last = -1;
1225
   for (int i = 0; i < f->nirs; i++) {
1,079,861✔
1226
      jit_ir_t *ir = &(f->irbuf[i]);
1,079,812✔
1227
      if (ir->result == reg && ir != alloc)
1,079,812✔
1228
         return false;
1229
      else if (ir->op == J_LOAD || ir->op == J_STORE) {
1,079,812✔
1230
         jit_value_t addr = ir->op == J_LOAD ? ir->arg1 : ir->arg2;
90,944✔
1231
         if (get_value_reg(addr) != reg)
90,944✔
1232
            continue;
78,818✔
1233
         else if (addr.disp != 0 || ir->size != sz)
12,126✔
1234
            return false;
1235
         else if (ir->op == J_STORE && get_value_reg(ir->arg1) == reg)
2✔
1236
            return false;
1237
         else if (first == -1)
2✔
1238
            first = last = i;
1239
         else
1240
            last = i;
1✔
1241
      }
1242
      else if (get_value_reg(ir->arg1) == reg)
988,868✔
1243
         return false;
1244
      else if (get_value_reg(ir->arg2) == reg)
988,728✔
1245
         return false;
1246
   }
1247

1248
   if (first != -1) {
49✔
1249
      for (jit_ir_t *ir = f->irbuf + first; ir <= f->irbuf + last; ir++) {
4✔
1250
         if (ir->op == J_STORE && get_value_reg(ir->arg2) == reg) {
3✔
1251
            ir->op        = J_MOV;
1✔
1252
            ir->size      = JIT_SZ_UNSPEC;
1✔
1253
            ir->cc        = JIT_CC_NONE;
1✔
1254
            ir->result    = reg;
1✔
1255
            ir->arg2.kind = JIT_VALUE_INVALID;
1✔
1256
         }
1257
         else if (ir->op == J_LOAD && get_value_reg(ir->arg1) == reg) {
2✔
1258
            ir->op        = J_MOV;
1✔
1259
            ir->size      = JIT_SZ_UNSPEC;
1✔
1260
            ir->cc        = JIT_CC_NONE;
1✔
1261
            ir->arg1.kind = JIT_VALUE_REG;
1✔
1262
            ir->arg1.reg  = reg;
1✔
1263
         }
1264
      }
1265
   }
1266

1267
   alloc->op        = J_NOP;
49✔
1268
   alloc->size      = JIT_SZ_UNSPEC;
49✔
1269
   alloc->cc        = JIT_CC_NONE;
49✔
1270
   alloc->result    = JIT_REG_INVALID;
49✔
1271
   alloc->arg1.kind = JIT_VALUE_INVALID;
49✔
1272
   alloc->arg2.kind = JIT_VALUE_INVALID;
49✔
1273

1274
   return true;
49✔
1275
}
1276

1277
void jit_do_mem2reg(jit_func_t *f)
42,245✔
1278
{
1279
   if (f->framesz == 0)
42,245✔
1280
      return;
1281

1282
   int replaced = 0;
1283
   for (int i = 0; i < f->nirs; i++) {
195,099✔
1284
      jit_ir_t *ir = &(f->irbuf[i]);
195,098✔
1285
      if (ir->op == MACRO_SALLOC && try_promote_allocation(f, ir))
195,098✔
1286
         replaced++;
49✔
1287
      else if (cfg_is_terminator(f, ir))
195,049✔
1288
         break;
1289
   }
1290

1291
   if (replaced == 0)
6,466✔
1292
      return;
1293

1294
   int newsize = 0;
1295
   for (int i = 0; i < f->nirs; i++) {
247✔
1296
      jit_ir_t *ir = &(f->irbuf[i]);
246✔
1297
      if (ir->op == MACRO_SALLOC) {
246✔
1298
         assert(ir->arg1.kind == JIT_VALUE_INT64);
2✔
1299
         assert(ir->arg1.int64 >= newsize);
2✔
1300
         ir->arg1.int64 = newsize;
2✔
1301

1302
         assert(ir->arg2.kind == JIT_VALUE_INT64);
2✔
1303
         newsize += ALIGN_UP(ir->arg2.int64, 8);
2✔
1304
      }
1305
      else if (cfg_is_terminator(f, ir))
244✔
1306
         break;
1307
   }
1308

1309
   assert(newsize <= f->framesz);
49✔
1310
   f->framesz = newsize;
49✔
1311
}
1312

1313
////////////////////////////////////////////////////////////////////////////////
1314
// Register allocation
1315

1316
typedef struct {
1317
   jit_reg_t   reg;
1318
   unsigned    first;
1319
   unsigned    last;
1320
} lscan_interval_t;
1321

1322
static void lscan_grow_range(jit_reg_t reg, lscan_interval_t *li, int pos)
405✔
1323
{
1324
   assert(reg != JIT_REG_INVALID);
405✔
1325
   li[reg].first = MIN(li[reg].first, pos);
405✔
1326
   li[reg].last = MAX(li[reg].last, pos);
405✔
1327
}
405✔
1328

1329
static void lscan_walk_cfg(jit_func_t *f, jit_cfg_t *cfg, int bi,
75✔
1330
                           lscan_interval_t *li, bit_mask_t *visited)
1331
{
1332
   if (mask_test(visited, bi))
75✔
1333
      return;
1334

1335
   mask_set(visited, bi);
73✔
1336

1337
   jit_block_t *b = &(cfg->blocks[bi]);
73✔
1338

1339
   for (size_t bit = -1; mask_iter(&b->livein, &bit);)
78✔
1340
      lscan_grow_range(bit, li, b->first);
5✔
1341

1342
   for (int i = b->first; i <= b->last; i++) {
427✔
1343
      jit_ir_t *ir = &(f->irbuf[i]);
354✔
1344

1345
      if (ir->result != JIT_REG_INVALID)
354✔
1346
         lscan_grow_range(ir->result, li, i);
207✔
1347

1348
      if (ir->arg1.kind == JIT_VALUE_REG || ir->arg1.kind == JIT_ADDR_REG)
354✔
1349
         lscan_grow_range(ir->arg1.reg, li, i);
79✔
1350

1351
      if (ir->arg2.kind == JIT_VALUE_REG || ir->arg2.kind == JIT_ADDR_REG)
354✔
1352
         lscan_grow_range(ir->arg2.reg, li, i);
108✔
1353
   }
1354

1355
   for (size_t bit = -1; mask_iter(&b->liveout, &bit); )
79✔
1356
      lscan_grow_range(bit, li, b->last);
6✔
1357

1358
   for (int i = 0; i < b->out.count; i++) {
81✔
1359
      const int next = jit_get_edge(&(b->out), i);
8✔
1360
      lscan_walk_cfg(f, cfg, next, li, visited);
8✔
1361
   }
1362
}
1363

1364
static int lscan_interval_cmp(const void *a, const void *b)
267✔
1365
{
1366
   const lscan_interval_t *la = a;
267✔
1367
   const lscan_interval_t *lb = b;
267✔
1368

1369
   if (la->first < lb->first)
267✔
1370
      return -1;
1371
   else if (la->first > lb->first)
6✔
1372
      return 1;
1373
   else
1374
      return 0;
2✔
1375
}
1376

1377
static inline void lscan_shift_active(lscan_interval_t **active, unsigned to,
193✔
1378
                                      unsigned from, unsigned count)
1379
{
1380
   if (to != from && count == 1)
193✔
1381
      active[to] = active[from];
7✔
1382
   else if (to != from && count > 1)
186✔
UNCOV
1383
      memmove(active + to, active + from, count * sizeof(lscan_interval_t *));
×
1384
}
193✔
1385

1386
int jit_do_lscan(jit_func_t *f, phys_slot_t *slots, uint64_t badmask)
67✔
1387
{
1388
   //
1389
   // Massimiliano Poletto and Vivek Sarkar
1390
   // Linear Scan Register Allocation
1391
   // ACM Trans. Program. Lang. Syst., Vol. 21, 5 (sep 1999), 895--913
1392
   // https://doi.org/10.1145/330249.330250
1393
   //
1394

1395
   jit_cfg_t *cfg = jit_get_cfg(f);
67✔
1396

1397
   lscan_interval_t *li LOCAL =
134✔
1398
      xmalloc_array(f->nregs, sizeof(lscan_interval_t));
67✔
1399
   for (int i = 0; i < f->nregs; i++) {
258✔
1400
      li[i].reg = i;
191✔
1401
      li[i].first = UINT_MAX;
191✔
1402
      li[i].last = 0;
191✔
1403
      slots[i] = UINT_MAX;
191✔
1404
   }
1405

1406
   bit_mask_t visited;
67✔
1407
   mask_init(&visited, cfg->nblocks);
67✔
1408
   lscan_walk_cfg(f, cfg, 0, li, &visited);
67✔
1409
   mask_free(&visited);
67✔
1410

1411
   jit_free_cfg(cfg);
67✔
1412

1413
   qsort(li, f->nregs, sizeof(lscan_interval_t), lscan_interval_cmp);
67✔
1414

1415
   const int Rint = 32 - __builtin_popcountl(badmask & 0xffffffff);
67✔
1416
   uint32_t freeregs = ~(badmask & 0xffffffff);
67✔
1417

1418
   lscan_interval_t **active LOCAL =
67✔
1419
      xmalloc_array(f->nregs, sizeof(lscan_interval_t *));
67✔
1420
   unsigned nactive = 0, next_spill = STACK_BASE;
67✔
1421

1422
   for (int i = 0; i < f->nregs; i++) {
258✔
1423
      // Expire old intervals
1424
      int expire = 0;
1425
      for (; expire < nactive; expire++) {
229✔
1426
         const lscan_interval_t *next = active[expire];
147✔
1427
         if (next->last >= li[i].first)
147✔
1428
            break;
1429
         else {
1430
            assert(slots[next->reg] < STACK_BASE);
38✔
1431
            freeregs |= (1 << slots[next->reg]);
38✔
1432
         }
1433
      }
1434

1435
      lscan_shift_active(active, 0, expire, nactive - expire);
191✔
1436
      nactive -= expire;
191✔
1437

1438
      const lscan_interval_t *this = &li[i];
191✔
1439

1440
      bool allocated = false;
191✔
1441
      if (nactive == Rint) {
191✔
1442
         // Spill at interval
1443
         const lscan_interval_t *spill = active[nactive - 1];
44✔
1444
         if (spill->last > this->last) {
44✔
1445
            slots[this->reg] = slots[spill->reg];
2✔
1446
            slots[spill->reg] = next_spill++;
2✔
1447
            nactive--;
2✔
1448
            allocated = true;
2✔
1449
         }
1450
         else
1451
            slots[this->reg] = next_spill++;
42✔
1452
      }
1453
      else {
1454
         const int bit = __builtin_ffsl(freeregs) - 1;
147✔
1455
         assert(bit >= 0);
147✔
1456
         assert(freeregs & (1 << bit));
147✔
1457
         freeregs &= ~(1 << bit);
147✔
1458
         slots[this->reg] = bit;
147✔
1459
         allocated = true;
147✔
1460
      }
1461

1462
      if (allocated) {
191✔
1463
         // Add to active list, sorted by increasing end point
1464
         int pos = 0;
1465
         for (; pos < nactive && active[pos]->last <= li[i].last; pos++)
214✔
1466
            assert(active[pos] != &li[i]);
65✔
1467
         assert(pos < f->nregs);
149✔
1468
         if (pos < nactive)
149✔
1469
            lscan_shift_active(active, pos + 1, pos, nactive - pos);
2✔
1470
         active[pos] = &li[i];
149✔
1471
         nactive++;
149✔
1472
      }
1473
   }
1474

1475
   return next_spill - STACK_BASE;
67✔
1476
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc