• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nickg / nvc / 25327563677

04 May 2026 03:17PM UTC coverage: 92.23% (+0.03%) from 92.197%
25327563677

push

github

nickg
Fix $readmemh when bit width is not multiple of 4

10 of 14 new or added lines in 1 file covered. (71.43%)

473 existing lines in 11 files now uncovered.

77515 of 84045 relevant lines covered (92.23%)

626937.27 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.56
/src/jit/jit-optim.c
1
//
2
//  Copyright (C) 2022-2025  Nick Gasson
3
//
4
//  This program is free software: you can redistribute it and/or modify
5
//  it under the terms of the GNU General Public License as published by
6
//  the Free Software Foundation, either version 3 of the License, or
7
//  (at your option) any later version.
8
//
9
//  This program is distributed in the hope that it will be useful,
10
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
//  GNU General Public License for more details.
13
//
14
//  You should have received a copy of the GNU General Public License
15
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

18
#include "util.h"
19
#include "array.h"
20
#include "jit/jit-priv.h"
21

22
#include <assert.h>
23
#include <stdlib.h>
24
#include <string.h>
25
#include <limits.h>
26

27
////////////////////////////////////////////////////////////////////////////////
28
// Control flow graph construction
29

30
static bool cfg_is_terminator(jit_func_t *func, jit_ir_t *ir)
26,297,868✔
31
{
32
   if (ir->op == MACRO_CASE)
26,297,868✔
33
      return ir + 1 < func->irbuf + func->nirs && (ir + 1)->op != MACRO_CASE;
213,477✔
34
   else
35
      return ir->op == J_JUMP || ir->op == J_RET || ir->op == MACRO_REEXEC;
51,326,719✔
36
}
37

38
static void cfg_add_one_edge(jit_edge_list_t *list, unsigned edge)
257,936✔
39
{
40
   if (list->count < 4)
257,936✔
41
      list->u.edges[list->count++] = edge;
252,917✔
42
   else if (list->count == 4) {
5,019✔
43
      unsigned *ptr = xmalloc_array(16, sizeof(unsigned));
784✔
44
      memcpy(ptr, list->u.edges, 4 * sizeof(unsigned));
784✔
45

46
      list->max = 16;
784✔
47
      list->u.external = ptr;
784✔
48
      list->u.external[list->count++] = edge;
784✔
49
   }
50
   else if (list->count == list->max) {
4,235✔
51
      list->max *= 2;
77✔
52
      list->u.external =
154✔
53
         xrealloc_array(list->u.external, list->max, sizeof(unsigned));
77✔
54
      list->u.external[list->count++] = edge;
77✔
55
   }
56
   else
57
      list->u.external[list->count++] = edge;
4,158✔
58
}
257,936✔
59

60
static void cfg_add_edge(jit_cfg_t *cfg, jit_block_t *from, jit_block_t *to)
128,968✔
61
{
62
   cfg_add_one_edge(&(from->out), to - cfg->blocks);
128,968✔
63
   cfg_add_one_edge(&(to->in), from - cfg->blocks);
128,968✔
64
}
128,968✔
65

66
static jit_reg_t cfg_get_reg(jit_value_t value)
3,972,656✔
67
{
68
   switch (value.kind) {
3,972,656✔
69
   case JIT_VALUE_REG:
1,081,583✔
70
   case JIT_ADDR_REG:
71
      return value.reg;
1,081,583✔
72
   default:
73
      return JIT_REG_INVALID;
74
   }
75
}
76

77
static inline bool cfg_reads_result(jit_ir_t *ir)
4,686,406✔
78
{
79
   return ir->op == MACRO_COPY || ir->op == MACRO_CASE || ir->op == MACRO_BZERO
4,686,406✔
80
      || ir->op == MACRO_MOVE || ir->op == MACRO_MEMSET;
4,686,406✔
81
}
82

83
static inline bool cfg_writes_result(jit_ir_t *ir)
1,986,328✔
84
{
85
   return ir->result != JIT_REG_INVALID && ir->op != MACRO_CASE;
1,986,328✔
86
}
87

88
static void cfg_liveness(jit_cfg_t *cfg, jit_func_t *f)
15,232✔
89
{
90
   // Algorithm from "Engineering a Compiler" chapter 8.6
91

92
   for (int i = 0; i < cfg->nblocks; i++) {
135,810✔
93
      jit_block_t *b = &(cfg->blocks[i]);
120,578✔
94
      mask_init(&b->livein, f->nregs + 1);
120,578✔
95
      mask_init(&b->varkill, f->nregs + 1);
120,578✔
96
      mask_init(&b->liveout, f->nregs + 1);
120,578✔
97

98
      for (int j = b->first; j <= b->last; j++) {
2,106,906✔
99
         jit_ir_t *ir = &(f->irbuf[j]);
1,986,328✔
100

101
         jit_reg_t reg1 = cfg_get_reg(ir->arg1);
1,986,328✔
102
         if (reg1 != JIT_REG_INVALID && !mask_test(&b->varkill, reg1))
1,986,328✔
103
            mask_set(&b->livein, reg1);
153,365✔
104

105
         jit_reg_t reg2 = cfg_get_reg(ir->arg2);
1,986,328✔
106
         if (reg2 != JIT_REG_INVALID && !mask_test(&b->varkill, reg2))
1,986,328✔
107
            mask_set(&b->livein, reg2);
121,460✔
108

109
         if (cfg_reads_result(ir) && !mask_test(&b->varkill, ir->result))
1,986,328✔
110
            mask_set(&b->livein, ir->result);
344✔
111

112
         if (cfg_writes_result(ir))
1,986,328✔
113
            mask_set(&b->varkill, ir->result);
755,934✔
114

115
         if (jit_writes_flags(ir))
1,986,328✔
116
            mask_set(&b->varkill, f->nregs);
108,442✔
117
         else if (jit_reads_flags(ir) && !mask_test(&b->varkill, f->nregs))
1,877,886✔
118
            mask_set(&b->livein, f->nregs);
1,986,328✔
119
      }
120
   }
121

122
   bit_mask_t new, tmp;
15,232✔
123
   mask_init(&new, f->nregs + 1);
15,232✔
124
   mask_init(&tmp, f->nregs + 1);
15,232✔
125

126
   bool changed;
26,775✔
127
   do {
26,775✔
128
      changed = false;
26,775✔
129

130
      for (int i = cfg->nblocks - 1; i >= 0; i--) {
334,314✔
131
         jit_block_t *b = &(cfg->blocks[i]);
307,539✔
132
         mask_clearall(&new);
307,539✔
133

134
         for (int j = 0; j < b->out.count; j++) {
652,385✔
135
            jit_block_t *succ = &(cfg->blocks[jit_get_edge(&b->out, j)]);
344,846✔
136
            mask_copy(&tmp, &succ->liveout);
344,846✔
137
            mask_subtract(&tmp, &succ->varkill);
344,846✔
138
            mask_union(&tmp, &succ->livein);
344,846✔
139
            mask_union(&new, &tmp);
344,846✔
140
         }
141

142
         if (!mask_eq(&new, &b->liveout)) {
307,539✔
143
            mask_copy(&b->liveout, &new);
87,249✔
144
            changed = true;
87,249✔
145
         }
146
      }
147
   } while (changed);
26,775✔
148

149
   // Replaced "upward exposed variables" set with live-in
150
   for (int i = 0; i < cfg->nblocks; i++) {
135,810✔
151
      jit_block_t *b = &(cfg->blocks[i]);
120,578✔
152
      mask_copy(&tmp, &b->liveout);
120,578✔
153
      mask_subtract(&tmp, &b->varkill);
120,578✔
154
      mask_union(&b->livein, &tmp);
120,578✔
155
   }
156

157
   mask_free(&new);
15,232✔
158
   mask_free(&tmp);
15,232✔
159
}
15,232✔
160

161
jit_cfg_t *jit_get_cfg(jit_func_t *f)
15,232✔
162
{
163
   int nb = 1;
15,232✔
164
   for (int i = 0, first = 0; i < f->nirs; i++) {
2,001,560✔
165
      jit_ir_t *ir = &(f->irbuf[i]);
1,986,328✔
166
      if (ir->target && i > 0 && first != i)
1,986,328✔
167
         first = i, nb++;
31,514✔
168
      if (cfg_is_terminator(f, ir) && i + 1 < f->nirs)
1,986,328✔
169
         first = i + 1, nb++;
73,832✔
170
   }
171

172
   jit_cfg_t *cfg = xcalloc_flex(sizeof(jit_cfg_t), nb, sizeof(jit_block_t));
15,232✔
173
   cfg->nblocks = nb;
15,232✔
174

175
   jit_block_t *bb = cfg->blocks;
15,232✔
176
   for (int i = 0; i < f->nirs; i++) {
2,001,560✔
177
      jit_ir_t *ir = &(f->irbuf[i]);
1,986,328✔
178
      if (ir->target && i > 0 && bb->first != i) {
1,986,328✔
179
         if (!bb->returns && !bb->aborts)
31,514✔
180
            cfg_add_edge(cfg, bb, bb + 1);
16,636✔
181
         (++bb)->first = i;
31,514✔
182
      }
183

184
      bb->last = i;
1,986,328✔
185

186
      if (ir->op == J_RET)
1,986,328✔
187
         bb->returns = 1;
29,100✔
188
      else if (jit_will_abort(ir))
1,957,228✔
189
         bb->aborts = 1;
15,265✔
190

191
      if (cfg_is_terminator(f, ir) && i + 1 < f->nirs) {
1,986,328✔
192
         if ((ir->op == J_JUMP && ir->cc != JIT_CC_NONE)
73,832✔
193
             || ir->op == MACRO_CASE)
30,125✔
194
            cfg_add_edge(cfg, bb, bb + 1);   // Fall-through case
45,545✔
195
         (++bb)->first = i + 1;
73,832✔
196
      }
197
   }
198

199
   for (int i = 0; i < f->nirs; i++) {
2,001,560✔
200
      jit_ir_t *ir = &(f->irbuf[i]);
1,986,328✔
201
      jit_label_t label = JIT_LABEL_INVALID;
1,986,328✔
202

203
      if (ir->op == J_JUMP)
1,986,328✔
204
         label = ir->arg1.label;
57,426✔
205
      else if (ir->op == MACRO_CASE)
1,928,902✔
206
         label = ir->arg2.label;
9,361✔
207

208
      if (label != JIT_LABEL_INVALID) {
66,787✔
209
         assert(label < f->nirs);
66,787✔
210
         jit_block_t *from = jit_block_for(cfg, i);
66,787✔
211
         jit_block_t *to = jit_block_for(cfg, label);
66,787✔
212
         cfg_add_edge(cfg, from, to);
66,787✔
213
      }
214
   }
215

216
   cfg_liveness(cfg, f);
15,232✔
217

218
   return cfg;
15,232✔
219
}
220

221
void jit_free_cfg(jit_cfg_t *cfg)
15,227✔
222
{
223
   for (int i = 0; i < cfg->nblocks; i++) {
135,785✔
224
      jit_block_t *b = &(cfg->blocks[i]);
120,558✔
225
      mask_free(&b->livein);
120,558✔
226
      mask_free(&b->liveout);
120,558✔
227
      mask_free(&b->varkill);
120,558✔
228

229
      if (b->in.max > 4) free(b->in.u.external);
120,558✔
230
      if (b->out.max > 4) free(b->out.u.external);
120,558✔
231
   }
232

233
   free(cfg);
15,227✔
234
}
15,227✔
235

236
jit_block_t *jit_block_for(jit_cfg_t *cfg, int pos)
133,574✔
237
{
238
   for (int low = 0, high = cfg->nblocks - 1; low <= high; ) {
575,147✔
239
      const int mid = (low + high) / 2;
575,147✔
240
      jit_block_t *bb = &(cfg->blocks[mid]);
575,147✔
241

242
      if (bb->last < pos)
575,147✔
243
         low = mid + 1;
229,299✔
244
      else if (bb->first > pos)
345,848✔
245
         high = mid - 1;
212,274✔
246
      else
247
         return bb;
133,574✔
248
   }
249

250
   fatal_trace("operation %d is not in any block", pos);
251
}
252

253
int jit_get_edge(jit_edge_list_t *list, int nth)
611,529✔
254
{
255
   assert(nth < list->count);
611,529✔
256
   if (list->max <= 4)
611,529✔
257
      return list->u.edges[nth];
585,710✔
258
   else
259
      return list->u.external[nth];
25,819✔
260
}
261

262
////////////////////////////////////////////////////////////////////////////////
263
// Local value numbering and simple peepholes
264

265
#define FOR_ALL_JIT_SIZES(size, macro) do {             \
266
      switch (size) {                                   \
267
      case JIT_SZ_8: macro(int8_t); break;              \
268
      case JIT_SZ_16: macro(int16_t); break;            \
269
      case JIT_SZ_32: macro(int32_t); break;            \
270
      case JIT_SZ_UNSPEC:                               \
271
      case JIT_SZ_64: macro(int64_t); break;            \
272
      }                                                 \
273
   } while (0)
274

275
typedef unsigned valnum_t;
276

277
#define VN_INVALID  UINT_MAX
278
#define SMALL_CONST 100
279
#define MAX_CONSTS  32
280
#define FIRST_VN    (SMALL_CONST + MAX_CONSTS)
281
#define TRACK_ARGS  8
282

283
#define LVN_REG(r) ((jit_value_t){ .kind = JIT_VALUE_REG, .reg = (r) })
284
#define LVN_CONST(i) ((jit_value_t){ .kind = JIT_VALUE_INT64, .int64 = (i) })
285

286
typedef struct {
287
   jit_ir_t *ir;
288
   valnum_t  vn;
289
   int       tuple[3];
290
} lvn_tab_t;
291

292
typedef struct {
293
   jit_func_t *func;
294
   valnum_t   *regvn;
295
   valnum_t    nextvn;
296
   lvn_tab_t  *hashtab;
297
   size_t      tabsz;
298
   int64_t     consttab[MAX_CONSTS];
299
   unsigned    nconsts;
300
} lvn_state_t;
301

302
static void jit_lvn_mov(jit_ir_t *ir, lvn_state_t *state);
303

304
static inline valnum_t lvn_new_value(lvn_state_t *state)
1,740,020✔
305
{
306
   return state->nextvn++;
1,740,020✔
307
}
308

309
static bool lvn_get_const(valnum_t vn, lvn_state_t *state, int64_t *cval)
1,976,228✔
310
{
311
   if (vn < SMALL_CONST) {
1,976,228✔
312
      *cval = vn;
283,156✔
313
      return true;
283,156✔
314
   }
315
   else if (vn < SMALL_CONST + MAX_CONSTS) {
1,693,072✔
316
      *cval = state->consttab[vn - SMALL_CONST];
23,702✔
317
      return true;
23,702✔
318
   }
319
   else
320
      return false;
321
}
322

323
static bool lvn_is_const(jit_value_t value, lvn_state_t *state, int64_t *cval)
2,397,633✔
324
{
325
   switch (value.kind) {
2,397,633✔
326
   case JIT_VALUE_INT64:
651,205✔
327
      *cval = value.int64;
651,205✔
328
      return true;
651,205✔
329
   case JIT_VALUE_REG:
1,746,424✔
330
      return lvn_get_const(state->regvn[value.reg], state, cval);
1,746,424✔
331
   default:
332
      return false;
333
   }
334
}
335

336
static inline bool lvn_can_fold(jit_ir_t *ir, lvn_state_t *state,
1,029,411✔
337
                                int64_t *arg1, int64_t *arg2)
338
{
339
   return lvn_is_const(ir->arg1, state, arg1)
1,029,411✔
340
      && lvn_is_const(ir->arg2, state, arg2);
1,029,411✔
341
}
342

343
static void lvn_convert_mov(jit_ir_t *ir, lvn_state_t *state, jit_value_t value)
389,708✔
344
{
345
   ir->op        = J_MOV;
389,708✔
346
   ir->size      = JIT_SZ_UNSPEC;
389,708✔
347
   ir->cc        = JIT_CC_NONE;
389,708✔
348
   ir->arg1      = value;
389,708✔
349
   ir->arg2.kind = JIT_VALUE_INVALID;
389,708✔
350

351
   jit_lvn_mov(ir, state);
389,708✔
352
}
389,708✔
353

354
static void lvn_convert_nop(jit_ir_t *ir)
554,485✔
355
{
356
   ir->op        = J_NOP;
554,485✔
357
   ir->size      = JIT_SZ_UNSPEC;
554,485✔
358
   ir->cc        = JIT_CC_NONE;
554,485✔
359
   ir->result    = JIT_REG_INVALID;
554,485✔
360
   ir->arg1.kind = JIT_VALUE_INVALID;
554,485✔
361
   ir->arg2.kind = JIT_VALUE_INVALID;
554,485✔
362
}
554,485✔
363

364
static valnum_t lvn_value_num(jit_value_t value, lvn_state_t *state)
3,979,749✔
365
{
366
   switch (value.kind) {
3,979,749✔
367
   case JIT_VALUE_REG:
2,294,318✔
368
      if (state->regvn[value.reg] != VN_INVALID)
2,294,318✔
369
         return state->regvn[value.reg];
370
      else
371
         return (state->regvn[value.reg] = lvn_new_value(state));
852,323✔
372

373
   case JIT_VALUE_INT64:
861,391✔
374
      if (value.int64 >= 0 && value.int64 < SMALL_CONST)
861,391✔
375
         return value.int64;
717,117✔
376
      else {
377
         for (int i = 0; i < state->nconsts; i++) {
1,528,101✔
378
            if (state->consttab[i] == value.int64)
1,467,829✔
379
               return SMALL_CONST + i;
84,002✔
380
         }
381

382
         if (state->nconsts < MAX_CONSTS) {
60,272✔
383
            state->consttab[state->nconsts] = value.int64;
20,860✔
384
            return SMALL_CONST + state->nconsts++;
20,860✔
385
         }
386
         else
387
            return lvn_new_value(state);
39,412✔
388
      }
389

390
   case JIT_VALUE_INVALID:
391
      return VN_INVALID;
392

393
   case JIT_VALUE_HANDLE:
3,744✔
394
   case JIT_VALUE_DOUBLE:
395
      return lvn_new_value(state);
3,744✔
396

397
   default:
×
398
      fatal_trace("cannot handle value kind %d in lvn_value_num", value.kind);
399
   }
400
}
401

402
static inline bool lvn_is_commutative(jit_op_t op)
1,506,880✔
403
{
404
   return op == J_ADD || op == J_MUL || op == J_AND || op == J_OR;
1,506,880✔
405
}
406

407
static inline void lvn_kill_flags(lvn_state_t *state)
39,673✔
408
{
409
   state->regvn[state->func->nregs] = VN_INVALID;
39,673✔
410
}
39,673✔
411

412
static void lvn_kill_args(lvn_state_t *state)
341,732✔
413
{
414
   for (int i = 0; i < TRACK_ARGS; i++)
3,075,588✔
415
      state->regvn[state->func->nregs + i + 1] = VN_INVALID;
2,733,856✔
416
}
341,732✔
417

418
static void lvn_commute_const(jit_ir_t *ir, lvn_state_t *state)
259,582✔
419
{
420
   assert(lvn_is_commutative(ir->op));
259,582✔
421

422
   int64_t cval;
259,582✔
423
   if (lvn_is_const(ir->arg1, state, &cval)) {
259,582✔
424
      jit_value_t tmp = ir->arg1;
6,280✔
425
      ir->arg1 = ir->arg2;
6,280✔
426
      ir->arg2 = tmp;
6,280✔
427
   }
428
}
259,582✔
429

430
static void lvn_get_tuple(jit_ir_t *ir, lvn_state_t *state, int tuple[3])
1,247,298✔
431
{
432
   tuple[0] = ir->op | ir->size << 8 | ir->cc << 11;
1,247,298✔
433

434
   const valnum_t vn1 = lvn_value_num(ir->arg1, state);
1,247,298✔
435
   const valnum_t vn2 = lvn_value_num(ir->arg2, state);
1,247,298✔
436

437
   if (lvn_is_commutative(ir->op) && vn1 > vn2) {
1,247,298✔
438
      tuple[1] = vn2;
136,251✔
439
      tuple[2] = vn1;
136,251✔
440
   }
441
   else {
442
      tuple[1] = vn1;
1,111,047✔
443
      tuple[2] = vn2;
1,111,047✔
444
   }
445
}
1,247,298✔
446

447
static void jit_lvn_generic(jit_ir_t *ir, lvn_state_t *state, valnum_t vn)
1,247,298✔
448
{
449
   assert(ir->result != JIT_REG_INVALID);
1,247,298✔
450

451
   int tuple[3];
1,247,298✔
452
   lvn_get_tuple(ir, state, tuple);
1,247,298✔
453

454
   const uint32_t hash =
1,247,298✔
455
      mix_bits_32(tuple[0]*29 + tuple[1]*1093 + tuple[2]*6037);
1,247,298✔
456

457
   for (int idx = hash & (state->tabsz - 1), limit = 0, stale = -1; limit < 10;
1,455,314✔
458
        idx = (idx + 1) & (state->tabsz - 1), limit++) {
208,016✔
459

460
      lvn_tab_t *tab = &(state->hashtab[idx]);
1,455,314✔
461
      if (tab->ir == NULL) {
1,455,314✔
462
         if (stale >= 0)  // Reuse earlier stale entry if possible
1,067,294✔
463
            tab = &(state->hashtab[stale]);
179,902✔
464
         tab->ir = ir;
1,067,294✔
465
         tab->vn = state->regvn[ir->result] =
2,134,588✔
466
            (vn == VN_INVALID ? lvn_new_value(state) : vn);
1,067,294✔
467
         memcpy(tab->tuple, tuple, sizeof(tuple));
1,067,294✔
468
         return;
1,247,298✔
469
      }
470
      else if (tab->vn != state->regvn[tab->ir->result]) {
388,020✔
471
         // Stale entry may be reused if we do not find matching value
472
         stale = idx;
198,002✔
473
         continue;
198,002✔
474
      }
475
      else if (memcmp(tuple, tab->tuple, sizeof(tuple)) == 0) {
190,018✔
476
         assert(tab->ir->result != JIT_REG_INVALID);
180,004✔
477

478
         ir->op   = J_MOV;
180,004✔
479
         ir->size = JIT_SZ_UNSPEC;
180,004✔
480
         ir->cc   = JIT_CC_NONE;
180,004✔
481

482
         // Propagate constants where possible
483
         int64_t cval;
180,004✔
484
         if (lvn_get_const(state->regvn[tab->ir->result], state, &cval))
180,004✔
485
            ir->arg1 = LVN_CONST(cval);
94,886✔
486
         else
487
            ir->arg1 = LVN_REG(tab->ir->result);
85,118✔
488

489
         ir->arg2.kind = JIT_VALUE_INVALID;
180,004✔
490

491
         state->regvn[ir->result] = tab->vn;
180,004✔
492
         return;
180,004✔
493
      }
494
   }
495

496
   state->regvn[ir->result] = VN_INVALID;   // Reached iteration limit
×
497
}
498

499
static void jit_lvn_mul(jit_ir_t *ir, lvn_state_t *state)
132,288✔
500
{
501
   if (ir->cc != JIT_CC_NONE)
132,288✔
502
      lvn_kill_flags(state);
1,279✔
503

504
   int64_t lhs, rhs;
132,288✔
505
   if (lvn_can_fold(ir, state, &lhs, &rhs)) {
132,288✔
506
#define FOLD_MUL(type) do {                                             \
507
         type result;                                                   \
508
         if (!__builtin_mul_overflow(lhs, rhs, &result)) {              \
509
            lvn_convert_mov(ir, state, LVN_CONST(result));              \
510
            return;                                                     \
511
         }                                                              \
512
      } while (0)
513

514
      FOR_ALL_JIT_SIZES(ir->size, FOLD_MUL);
38,069✔
515
#undef FOLD_MUL
516
   }
517

518
   lvn_commute_const(ir, state);
94,220✔
519

520
   if (lvn_is_const(ir->arg2, state, &rhs)) {
94,220✔
521
      if (rhs == 0) {
92,562✔
522
         lvn_convert_mov(ir, state, LVN_CONST(0));
21✔
523
         return;
21✔
524
      }
525
      else if (rhs == 1) {
92,541✔
526
         lvn_convert_mov(ir, state, LVN_REG(ir->arg1.reg));
67,493✔
527
         return;
67,493✔
528
      }
529
      else if (rhs > 0 && is_power_of_2(rhs) && ir->size == JIT_SZ_UNSPEC) {
25,048✔
530
         ir->op = J_SHL;
20,757✔
531
         ir->arg2 = LVN_CONST(ilog2(rhs));
20,757✔
532
         jit_lvn_generic(ir, state, VN_INVALID);
20,757✔
533
         return;
20,757✔
534
      }
535
   }
536

537
   jit_lvn_generic(ir, state, VN_INVALID);
5,949✔
538
}
539

540
static void jit_lvn_div(jit_ir_t *ir, lvn_state_t *state)
2,970✔
541
{
542
   if (ir->cc != JIT_CC_NONE)
2,970✔
543
      lvn_kill_flags(state);
×
544

545
   int64_t lhs, rhs;
2,970✔
546
   if (lvn_can_fold(ir, state, &lhs, &rhs) && rhs != 0) {
2,970✔
547
      // XXX: potential bug here with INT_MIN/-1
548
#define FOLD_DIV(type) do {                                             \
549
         type result = (type)lhs / (type)rhs;                           \
550
         lvn_convert_mov(ir, state, LVN_CONST(result));                 \
551
         return;                                                        \
552
      } while (0)
553

554
      FOR_ALL_JIT_SIZES(ir->size, FOLD_DIV);
38✔
555
#undef FOLD_DIV
556
   }
557
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 1) {
2,969✔
558
      lvn_convert_mov(ir, state, LVN_REG(ir->arg1.reg));
37✔
559
      return;
37✔
560
   }
561

562
   jit_lvn_generic(ir, state, VN_INVALID);
2,932✔
563
}
564

565
static void jit_lvn_add(jit_ir_t *ir, lvn_state_t *state)
192,704✔
566
{
567
   if (ir->cc != JIT_CC_NONE)
192,704✔
568
      lvn_kill_flags(state);
6,412✔
569

570
   int64_t lhs, rhs;
192,704✔
571
   if (lvn_can_fold(ir, state, &lhs, &rhs)) {
192,704✔
572
#define FOLD_ADD(type) do {                                             \
573
         type result;                                                   \
574
         if (!__builtin_add_overflow(lhs, rhs, &result)) {              \
575
            lvn_convert_mov(ir, state, LVN_CONST(result));              \
576
            return;                                                     \
577
         }                                                              \
578
      } while (0)
579

580
      FOR_ALL_JIT_SIZES(ir->size, FOLD_ADD);
27,342✔
581
#undef FOLD_ADD
582
   }
583

584
   lvn_commute_const(ir, state);
165,362✔
585

586
   if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0) {
165,362✔
587
      lvn_convert_mov(ir, state, LVN_REG(ir->arg1.reg));
2,001✔
588
      return;
2,001✔
589
   }
590

591
   jit_lvn_generic(ir, state, VN_INVALID);
163,361✔
592
}
593

594
static void jit_lvn_sub(jit_ir_t *ir, lvn_state_t *state)
136,496✔
595
{
596
   if (ir->cc != JIT_CC_NONE)
136,496✔
597
      lvn_kill_flags(state);
1,839✔
598

599
   int64_t lhs, rhs;
136,496✔
600
   if (lvn_can_fold(ir, state, &lhs, &rhs)) {
136,496✔
601
#define FOLD_SUB(type) do {                                             \
602
         type result;                                                   \
603
         if (!__builtin_sub_overflow(lhs, rhs, &result)) {              \
604
            lvn_convert_mov(ir, state, LVN_CONST(result));              \
605
            return;                                                     \
606
         }                                                              \
607
      } while (0)
608

609
      FOR_ALL_JIT_SIZES(ir->size, FOLD_SUB);
54,460✔
610
#undef FOLD_SUB
611
   }
612

613
   if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0) {
82,036✔
614
      lvn_convert_mov(ir, state, LVN_REG(ir->arg1.reg));
8,987✔
615
      return;
8,987✔
616
   }
617
   else if (lvn_is_const(ir->arg1, state, &lhs) && lhs == 0
73,049✔
618
            && ir->cc == JIT_CC_NONE && ir->size == JIT_SZ_UNSPEC) {
5,465✔
619
     ir->op        = J_NEG;
5,464✔
620
     ir->arg1      = ir->arg2;
5,464✔
621
     ir->arg2.kind = JIT_VALUE_INVALID;
5,464✔
622
     jit_lvn_generic(ir, state, VN_INVALID);
5,464✔
623
     return;
5,464✔
624
   }
625

626
   jit_lvn_generic(ir, state, VN_INVALID);
67,585✔
627
}
628

629
static void jit_lvn_neg(jit_ir_t *ir, lvn_state_t *state)
46,305✔
630
{
631
   int64_t value;
46,305✔
632
   if (lvn_is_const(ir->arg1, state, &value))
46,305✔
633
      lvn_convert_mov(ir, state, LVN_CONST(-value));
38,834✔
634
   else
635
      jit_lvn_generic(ir, state, VN_INVALID);
7,471✔
636
}
46,305✔
637

638
static void jit_lvn_or(jit_ir_t *ir, lvn_state_t *state)
79,527✔
639
{
640
   int64_t lhs, rhs;
79,527✔
641
   if (lvn_can_fold(ir, state, &lhs, &rhs))
79,527✔
642
      lvn_convert_mov(ir, state, LVN_CONST(lhs | rhs));
4,100✔
643
   else if (lvn_is_const(ir->arg1, state, &lhs) && lhs == 0)
75,427✔
644
      lvn_convert_mov(ir, state, ir->arg2);
2,712✔
645
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
72,715✔
646
      lvn_convert_mov(ir, state, ir->arg1);
12,968✔
647
   else
648
      jit_lvn_generic(ir, state, VN_INVALID);
59,747✔
649
}
79,527✔
650

651
static void jit_lvn_xor(jit_ir_t *ir, lvn_state_t *state)
104,348✔
652
{
653
   int64_t lhs, rhs;
104,348✔
654
   if (lvn_can_fold(ir, state, &lhs, &rhs))
104,348✔
655
      lvn_convert_mov(ir, state, LVN_CONST(lhs ^ rhs));
30,571✔
656
   else if (lvn_is_const(ir->arg1, state, &lhs) && lhs == 0)
73,777✔
657
      lvn_convert_mov(ir, state, ir->arg2);
8,015✔
658
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
65,762✔
659
      lvn_convert_mov(ir, state, ir->arg1);
1,029✔
660
   else
661
      jit_lvn_generic(ir, state, VN_INVALID);
64,733✔
662
}
104,348✔
663

664
static void jit_lvn_shl(jit_ir_t *ir, lvn_state_t *state)
5,809✔
665
{
666
   int64_t lhs, rhs;
5,809✔
667
   if (lvn_can_fold(ir, state, &lhs, &rhs) && rhs >= 0 && rhs < 64 && lhs >= 0)
5,809✔
668
      lvn_convert_mov(ir, state, LVN_CONST(lhs << rhs));
2,184✔
669
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
3,625✔
670
      lvn_convert_mov(ir, state, ir->arg1);
157✔
671
   else
672
      jit_lvn_generic(ir, state, VN_INVALID);
3,468✔
673
}
5,809✔
674

675
static void jit_lvn_shr(jit_ir_t *ir, lvn_state_t *state)
4,442✔
676
{
677
   int64_t lhs, rhs;
4,442✔
678
   if (lvn_can_fold(ir, state, &lhs, &rhs) && rhs >= 0 && rhs < 64)
4,442✔
679
      lvn_convert_mov(ir, state, LVN_CONST(lhs >> rhs));
357✔
680
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
4,085✔
681
      lvn_convert_mov(ir, state, ir->arg1);
353✔
682
   else
683
      jit_lvn_generic(ir, state, VN_INVALID);
3,732✔
684
}
4,442✔
685

686
static void jit_lvn_asr(jit_ir_t *ir, lvn_state_t *state)
34,662✔
687
{
688
   int64_t lhs, rhs;
34,662✔
689
   if (lvn_can_fold(ir, state, &lhs, &rhs) && rhs >= 0 && rhs < 64)
34,662✔
690
      lvn_convert_mov(ir, state, LVN_CONST(lhs >> rhs));
49✔
691
   else if (lvn_is_const(ir->arg2, state, &rhs) && rhs == 0)
34,613✔
692
      lvn_convert_mov(ir, state, ir->arg1);
1✔
693
   else
694
      jit_lvn_generic(ir, state, VN_INVALID);
34,612✔
695
}
34,662✔
696

697
static void jit_lvn_mov(jit_ir_t *ir, lvn_state_t *state)
902,846✔
698
{
699
   if (ir->arg1.kind == JIT_VALUE_REG && ir->arg1.reg == ir->result) {
902,846✔
700
      lvn_convert_nop(ir);
7,260✔
701
      return;
7,260✔
702
   }
703

704
   valnum_t vn = lvn_value_num(ir->arg1, state);
895,586✔
705
   if (state->regvn[ir->result] == vn) {
895,586✔
706
      // Result register already contains this value
707
      lvn_convert_nop(ir);
113,785✔
708
      return;
113,785✔
709
   }
710

711
   jit_lvn_generic(ir, state, vn);
781,801✔
712
}
713

714
static void jit_lvn_cmp(jit_ir_t *ir, lvn_state_t *state)
336,163✔
715
{
716
   int64_t lhs, rhs;
336,163✔
717
   if (lvn_can_fold(ir, state, &lhs, &rhs)) {
336,163✔
718
      const uint64_t ulhs = lhs, urhs = rhs;
54,397✔
719
      bool result = false;
54,397✔
720
      switch (ir->cc) {
54,397✔
721
      case JIT_CC_EQ: result = (lhs == rhs); break;
51,458✔
722
      case JIT_CC_NE: result = (lhs != rhs); break;
1,512✔
UNCOV
723
      case JIT_CC_LT: result = (lhs < rhs); break;
×
724
      case JIT_CC_GT: result = (lhs > rhs); break;
56✔
725
      case JIT_CC_LE: result = (lhs <= rhs); break;
39✔
726
      case JIT_CC_GE: result = (lhs >= rhs); break;
1,328✔
UNCOV
727
      case JIT_CC_C:  result = (ulhs < urhs); break;
×
728
      case JIT_CC_NC: result = (ulhs >= urhs); break;
4✔
UNCOV
729
      case JIT_CC_O:  result = (ulhs > urhs); break;
×
UNCOV
730
      case JIT_CC_NO: result = (ulhs <= urhs); break;
×
UNCOV
731
      default:
×
732
         fatal_trace("unhandled condition code in jit_lvn_cmp");
733
      }
734

735
      state->regvn[state->func->nregs] = result;
54,397✔
736
   }
737
   else
738
      state->regvn[state->func->nregs] = VN_INVALID;
281,766✔
739
}
336,163✔
740

741
static void jit_lvn_ccmp(jit_ir_t *ir, lvn_state_t *state)
31,466✔
742
{
743
   const int fconst = state->regvn[state->func->nregs];
31,466✔
744
   if (fconst == 0)
31,466✔
745
      lvn_convert_nop(ir);
56✔
746
   else if (fconst != VN_INVALID) {
31,410✔
747
      ir->op = J_CMP;
1,392✔
748
      jit_lvn_cmp(ir, state);
1,392✔
749
   }
750
   else
751
      lvn_kill_flags(state);
30,018✔
752
}
31,466✔
753

754
static void jit_lvn_csel(jit_ir_t *ir, lvn_state_t *state)
153,709✔
755
{
756
   const int fconst = state->regvn[state->func->nregs];
153,709✔
757
   if (fconst != VN_INVALID)
153,709✔
758
      lvn_convert_mov(ir, state, fconst ? ir->arg1 : ir->arg2);
62,616✔
759
   else
760
      state->regvn[ir->result] = VN_INVALID;
91,093✔
761
}
153,709✔
762

763
static void jit_lvn_cset(jit_ir_t *ir, lvn_state_t *state)
113,707✔
764
{
765
   const int fconst = state->regvn[state->func->nregs];
113,707✔
766
   if (fconst != VN_INVALID)
113,707✔
767
      lvn_convert_mov(ir, state, LVN_CONST(fconst));
517✔
768
   else
769
      state->regvn[ir->result] = VN_INVALID;
113,190✔
770
}
113,707✔
771

772
static void jit_lvn_jump(jit_ir_t *ir, lvn_state_t *state)
240,737✔
773
{
774
   assert(ir->arg1.label < state->func->nirs);
240,737✔
775
   jit_ir_t *dest = &(state->func->irbuf[ir->arg1.label]);
240,737✔
776

777
   const int fconst = state->regvn[state->func->nregs];
240,737✔
778
   if (ir->cc != JIT_CC_NONE && fconst != VN_INVALID) {
240,737✔
779
      if (fconst == (ir->cc == JIT_CC_T))
1,487✔
780
         ir->cc = JIT_CC_NONE;
192✔
781
      else {
782
         lvn_convert_nop(ir);
1,295✔
783
         return;
1,295✔
784
      }
785
   }
786

787
   if (dest == ir + 1)
239,442✔
788
      lvn_convert_nop(ir);
24,746✔
789
   else if (dest->op == J_JUMP && dest->cc == JIT_CC_NONE)
214,696✔
790
      ir->arg1 = dest->arg1;     // Simple jump threading
1,269✔
791
}
792

793
static void jit_lvn_clamp(jit_ir_t *ir, lvn_state_t *state)
52,394✔
794
{
795
   int64_t value;
52,394✔
796
   if (lvn_is_const(ir->arg1, state, &value))
52,394✔
797
      lvn_convert_mov(ir, state, LVN_CONST(value < 0 ? 0 : value));
26,834✔
798
   else
799
      jit_lvn_generic(ir, state, VN_INVALID);
25,560✔
800
}
52,394✔
801

802
static void jit_lvn_recv(jit_ir_t *ir, lvn_state_t *state)
337,875✔
803
{
804
   const unsigned nth = ir->arg1.int64;
337,875✔
805
   if (nth >= TRACK_ARGS)
337,875✔
806
      return;
807

808
   const unsigned vreg = state->func->nregs + 1 + nth;
335,949✔
809

810
   if (state->regvn[vreg] != VN_INVALID)
335,949✔
811
      state->regvn[ir->result] = state->regvn[vreg];
1✔
812
   else {
813
      valnum_t vn = lvn_new_value(state);
335,948✔
814
      state->regvn[vreg] = vn;
335,948✔
815
      state->regvn[ir->result] = vn;
335,948✔
816
   }
817
}
818

819
static void jit_lvn_send(jit_ir_t *ir, lvn_state_t *state)
1,413,906✔
820
{
821
   const unsigned nth = ir->arg1.int64;
1,413,906✔
822
   if (nth >= TRACK_ARGS)
1,413,906✔
823
      return;
824
   else if (ir->arg2.kind != JIT_VALUE_REG)
1,405,704✔
825
      return;
826

827
   const unsigned vreg = state->func->nregs + 1 + nth;
589,567✔
828

829
   valnum_t vn = lvn_value_num(ir->arg2, state);
589,567✔
830
   if (vn != VN_INVALID && state->regvn[vreg] == vn)
589,567✔
831
      lvn_convert_nop(ir);   // Already in argument slot
33,104✔
832
}
833

834
static void jit_lvn_copy(jit_ir_t *ir, lvn_state_t *state)
42,486✔
835
{
836
   if (ir->op == MACRO_MOVE && ir->arg2.kind == JIT_ADDR_CPOOL)
42,486✔
837
      ir->op = MACRO_COPY;   // Cannot overlap when copying from constant pool
27,666✔
838

839
   int64_t count;
42,486✔
840
   if (lvn_get_const(state->regvn[ir->result], state, &count)) {
42,486✔
841
      if (count == 0) {
34,074✔
842
         lvn_convert_nop(ir);
1✔
843
         return;
2,782✔
844
      }
845
      else if (count > 0 && count <= 8 && is_power_of_2(count)
34,073✔
846
               && ir->arg2.kind == JIT_ADDR_CPOOL) {
4,154✔
847
         // Replace a small copy from the constant pool with a store
848
         const void *src = state->func->cpool + ir->arg2.int64;
2,780✔
849
         ir->op = J_STORE;
2,780✔
850
         ir->arg2 = ir->arg1;
2,780✔
851
         ir->result = JIT_REG_INVALID;
2,780✔
852

853
         switch (count) {
2,780✔
854
         case 8:
1,022✔
855
            ir->size = JIT_SZ_64;
1,022✔
856
            ir->arg1 = LVN_CONST(unaligned_load(src, uint64_t));
1,022✔
857
            break;
1,022✔
858
         case 4:
738✔
859
            ir->size = JIT_SZ_32;
738✔
860
            ir->arg1 = LVN_CONST(unaligned_load(src, uint32_t));
738✔
861
            break;
738✔
862
         case 2:
629✔
863
            ir->size = JIT_SZ_16;
629✔
864
            ir->arg1 = LVN_CONST(unaligned_load(src, uint16_t));
629✔
865
            break;
629✔
866
         case 1:
391✔
867
            ir->size = JIT_SZ_8;
391✔
868
            ir->arg1 = LVN_CONST(*(uint8_t *)src);
391✔
869
            break;
391✔
UNCOV
870
         default:
×
UNCOV
871
            jit_dump_with_mark(state->func, ir - state->func->irbuf);
×
872
            fatal_trace("unhandled constant copy from constant pool");
873
         }
874

875
         return;
2,780✔
876
      }
877
   }
878

879
   // Clobbers the count register
880
   state->regvn[ir->result] = lvn_new_value(state);
39,705✔
881
}
882

883
static void jit_lvn_bzero(jit_ir_t *ir, lvn_state_t *state)
5,176✔
884
{
885
   int64_t count;
5,176✔
886
   if (lvn_get_const(state->regvn[ir->result], state, &count)) {
5,176✔
887
      if (count == 0) {
2,101✔
888
         lvn_convert_nop(ir);
1✔
889
         return;
578✔
890
      }
891
      else if (count > 0 && count <= 8 && is_power_of_2(count)) {
2,100✔
892
         // Convert to a store of zero
893
         ir->op     = J_STORE;
576✔
894
         ir->arg2   = ir->arg1;
576✔
895
         ir->arg1   = LVN_CONST(0);
576✔
896
         ir->result = JIT_REG_INVALID;
576✔
897
         ir->size   = ilog2(count);
576✔
898
         return;
576✔
899
      }
900
   }
901

902
   // Clobbers the count register
903
   state->regvn[ir->result] = lvn_new_value(state);
4,599✔
904
}
905

906
static void jit_lvn_memset(jit_ir_t *ir, lvn_state_t *state)
2,210✔
907
{
908
   int64_t value;
2,210✔
909
   if (lvn_is_const(ir->arg2, state, &value)) {
2,210✔
910
      static const uint64_t compress[JIT_SZ_UNSPEC] = {
2,147✔
911
         0x01, 0x0101, 0x01010101, 0x0101010101010101
912
      };
913
      static const uint64_t expand[JIT_SZ_UNSPEC] = {
2,147✔
914
         0x0101010101010101, 0x0001000100010001, 0x100000001, 1,
915
      };
916
      static const uint64_t mask[JIT_SZ_UNSPEC] = {
2,147✔
917
         0xff, 0xffff, 0xffffffff, 0xffffffffffffffff
918
      };
919

920
      uint64_t bits = value;
2,147✔
921
      if (value == 0) {
2,147✔
922
         ir->op = MACRO_BZERO;
9✔
923
         ir->size = JIT_SZ_UNSPEC;
9✔
924
         ir->arg2.kind = JIT_VALUE_INVALID;
9✔
925
         jit_lvn_bzero(ir, state);
9✔
926
         return;
204✔
927
      }
928
      else if (bits == (bits & 0xff) * compress[ir->size]) {
2,138✔
929
         ir->size = JIT_SZ_8;
842✔
930
         ir->arg2 = LVN_CONST((bits &= 0xff));
842✔
931
      }
932

933
      int64_t bytes;
2,138✔
934
      if (lvn_get_const(state->regvn[ir->result], state, &bytes)) {
2,138✔
935
         if (bytes == 0) {
1,357✔
936
            lvn_convert_nop(ir);
1✔
937
            return;
1✔
938
         }
939
         else if (bytes > 0 && bytes <= 8 && is_power_of_2(bytes)) {
1,356✔
940
            // Convert to a store
941
            const jit_size_t newsz = ilog2(bytes);
185✔
942
            ir->op     = J_STORE;
185✔
943
            ir->arg2   = ir->arg1;
185✔
944
            ir->arg1   = LVN_CONST((bits * expand[ir->size]) & mask[newsz]);
185✔
945
            ir->result = JIT_REG_INVALID;
185✔
946
            ir->size   = newsz;
185✔
947
            return;
185✔
948
         }
949
      }
950
   }
951

952
   // Clobbers the count register
953
   state->regvn[ir->result] = lvn_new_value(state);
2,015✔
954
}
955

956
static void jit_lvn_exp(jit_ir_t *ir, lvn_state_t *state)
127✔
957
{
958
   int64_t base, exp;
127✔
959
   if (ir->cc != JIT_CC_NONE) {
127✔
960
      lvn_kill_flags(state);
125✔
961
      jit_lvn_generic(ir, state, VN_INVALID);
125✔
962
   }
963
   else if (lvn_can_fold(ir, state, &base, &exp))
2✔
964
      lvn_convert_mov(ir, state, LVN_CONST(ipow(base, exp)));
1✔
965
   else if (lvn_is_const(ir->arg1, state, &base) && base == 2) {
1✔
966
      ir->op = J_SHL;
1✔
967
      ir->arg1.int64 = 1;
1✔
968
      jit_lvn_generic(ir, state, VN_INVALID);
1✔
969
   }
970
   else
UNCOV
971
      jit_lvn_generic(ir, state, VN_INVALID);
×
972
}
127✔
973

974
void jit_do_lvn(jit_func_t *f)
67,313✔
975
{
976
   lvn_state_t state = {
67,313✔
977
      .tabsz  = next_power_of_2(f->nirs),
67,313✔
978
      .func   = f,
979
      .nextvn = FIRST_VN
980
   };
981

982
   state.regvn = xmalloc_array(f->nregs + 1 + TRACK_ARGS, sizeof(valnum_t));
67,313✔
983
   state.hashtab = xcalloc_array(state.tabsz, sizeof(lvn_tab_t));
67,313✔
984

985
   bool reset = true;
67,313✔
986
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs;
7,542,939✔
987
        reset = cfg_is_terminator(f, ir), ir++) {
7,475,626✔
988

989
      if (reset || ir->target) {
7,475,626✔
990
         for (int j = 0; j < f->nregs + 1 + TRACK_ARGS; j++)
84,160,010✔
991
            state.regvn[j] = VN_INVALID;
83,697,522✔
992
      }
993

994
      switch (ir->op) {
7,475,626✔
995
      case J_MUL: jit_lvn_mul(ir, &state); break;
132,288✔
996
      case J_DIV: jit_lvn_div(ir, &state); break;
2,970✔
997
      case J_ADD: jit_lvn_add(ir, &state); break;
192,704✔
998
      case J_SUB: jit_lvn_sub(ir, &state); break;
136,496✔
999
      case J_NEG: jit_lvn_neg(ir, &state); break;
46,305✔
1000
      case J_OR:  jit_lvn_or(ir, &state); break;
79,527✔
1001
      case J_XOR: jit_lvn_xor(ir, &state); break;
104,348✔
1002
      case J_SHL: jit_lvn_shl(ir, &state); break;
5,809✔
1003
      case J_SHR: jit_lvn_shr(ir, &state); break;
4,442✔
1004
      case J_ASR: jit_lvn_asr(ir, &state); break;
34,662✔
1005
      case J_MOV: jit_lvn_mov(ir, &state); break;
513,138✔
1006
      case J_CMP: jit_lvn_cmp(ir, &state); break;
334,771✔
1007
      case J_CCMP: jit_lvn_ccmp(ir, &state); break;
31,466✔
1008
      case J_CSEL: jit_lvn_csel(ir, &state); break;
153,709✔
1009
      case J_CSET: jit_lvn_cset(ir, &state); break;
113,707✔
1010
      case J_JUMP: jit_lvn_jump(ir, &state); break;
240,737✔
1011
      case J_CLAMP: jit_lvn_clamp(ir, &state); break;
52,394✔
1012
      case J_RECV: jit_lvn_recv(ir, &state); break;
337,875✔
1013
      case J_SEND: jit_lvn_send(ir, &state); break;
1,413,906✔
1014
      case MACRO_MOVE:
42,486✔
1015
      case MACRO_COPY: jit_lvn_copy(ir, &state); break;
42,486✔
1016
      case MACRO_BZERO: jit_lvn_bzero(ir, &state); break;
5,167✔
1017
      case MACRO_MEMSET: jit_lvn_memset(ir, &state); break;
2,210✔
1018
      case MACRO_EXP: jit_lvn_exp(ir, &state); break;
127✔
1019
      case MACRO_EXIT:
341,732✔
1020
      case MACRO_VEC4OP:
1021
      case MACRO_VEC2OP:
1022
      case MACRO_PACK:
1023
      case MACRO_UNPACK:
1024
      case J_CALL:
1025
         lvn_kill_args(&state);
341,732✔
1026
         // Fall-through
1027
      default:
3,494,382✔
1028
         if (jit_writes_flags(ir))
3,494,382✔
1029
            state.regvn[f->nregs] = VN_INVALID;
4,142✔
1030
         if (ir->result != JIT_REG_INVALID)
3,494,382✔
1031
            state.regvn[ir->result] = VN_INVALID;
864,381✔
1032
      }
1033
   }
1034

1035
   free(state.regvn);
67,313✔
1036
   free(state.hashtab);
67,313✔
1037
}
67,313✔
1038

1039
////////////////////////////////////////////////////////////////////////////////
1040
// Copy propagation
1041

1042
void jit_do_cprop(jit_func_t *f)
67,302✔
1043
{
1044
   jit_value_t *map LOCAL = xmalloc_array(f->nregs, sizeof(jit_value_t));
134,604✔
1045

1046
   bool reset = true;
67,302✔
1047
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs;
7,542,811✔
1048
        reset = cfg_is_terminator(f, ir), ir++) {
7,475,509✔
1049

1050
      if (reset || ir->target) {
7,475,509✔
1051
         for (int j = 0; j < f->nregs; j++)
79,997,422✔
1052
            map[j].kind = JIT_VALUE_INVALID;
79,534,953✔
1053
      }
1054

1055
      if (ir->arg1.kind == JIT_VALUE_REG) {
7,475,509✔
1056
         jit_value_t copy = map[ir->arg1.reg];
1,674,205✔
1057
         if (copy.kind != JIT_VALUE_INVALID)
1,674,205✔
1058
            ir->arg1 = copy;
380,585✔
1059
      }
1060

1061
      if (ir->arg2.kind == JIT_VALUE_REG) {
7,475,509✔
1062
         jit_value_t copy = map[ir->arg2.reg];
1,121,945✔
1063
         if (copy.kind != JIT_VALUE_INVALID)
1,121,945✔
1064
            ir->arg2 = copy;
258,300✔
1065
      }
1066

1067
      if (ir->result == JIT_REG_INVALID)
7,475,509✔
1068
         continue;
4,775,426✔
1069
      else if (ir->op == MACRO_MEMSET || ir->op == MACRO_COPY
2,700,083✔
1070
               || ir->op == MACRO_MOVE || ir->op == MACRO_BZERO)
2,673,186✔
1071
         continue;   // Does not write to result
46,312✔
1072
      else if (map[ir->result].kind != JIT_VALUE_INVALID) {
2,653,771✔
1073
         // Invalidate any existing copies of this register
1074
         for (int j = 0; j < f->nregs; j++) {
617,473,331✔
1075
            if (map[j].kind == JIT_VALUE_REG && map[j].reg == ir->result)
617,064,621✔
1076
               map[j].kind = JIT_VALUE_INVALID;
344,615✔
1077
         }
1078
      }
1079

1080
      if (ir->op == J_MOV) {
2,653,771✔
1081
         map[ir->result] = ir->arg1;
784,979✔
1082
         if (ir->arg1.kind == JIT_VALUE_REG)
784,979✔
1083
            map[ir->arg1.reg] = ir->arg1;
457,435✔
1084
      }
1085
      else
1086
         map[ir->result].kind = JIT_VALUE_INVALID;
1,868,792✔
1087
   }
1088
}
67,302✔
1089

1090
////////////////////////////////////////////////////////////////////////////////
1091
// Dead code elimination
1092

1093
typedef struct {
1094
   int       *count;
1095
   jit_reg_t *renumber;
1096
   jit_reg_t  next;
1097
} dce_state_t;
1098

1099
static void dce_count_use(jit_value_t value, dce_state_t *state)
14,951,016✔
1100
{
1101
   if (value.kind == JIT_VALUE_REG || value.kind == JIT_ADDR_REG) {
14,951,016✔
1102
      if (state->count[value.reg]++ == 0)
3,622,846✔
1103
         state->renumber[value.reg] = state->next++;
812,173✔
1104
   }
1105
}
14,951,016✔
1106

1107
static void dce_renumber(jit_value_t *value, dce_state_t *state)
14,951,016✔
1108
{
1109
   if (value->kind == JIT_VALUE_REG || value->kind == JIT_ADDR_REG)
14,951,016✔
1110
      value->reg = state->renumber[value->reg];
3,622,846✔
1111
}
14,951,016✔
1112

1113
void jit_do_dce(jit_func_t *f)
67,302✔
1114
{
1115
   dce_state_t state = {};
67,302✔
1116
   state.count = xcalloc_array(f->nregs, sizeof(int));
67,302✔
1117
   state.renumber = xcalloc_array(f->nregs, sizeof(jit_reg_t));
67,302✔
1118

1119
#ifdef DEBUG
1120
   for (int i = 0; i < f->nregs; i++)
1,111,947✔
1121
      state.renumber[i] = JIT_REG_INVALID;
1,044,645✔
1122
#endif
1123

1124
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs; ir++) {
7,542,810✔
1125
      dce_count_use(ir->arg1, &state);
7,475,508✔
1126
      dce_count_use(ir->arg2, &state);
7,475,508✔
1127

1128
      const bool must_keep_result =
14,951,016✔
1129
         ir->result != JIT_REG_INVALID &&
7,475,508✔
1130
         (cfg_reads_result(ir) || jit_writes_flags(ir));
2,700,078✔
1131

1132
      if (must_keep_result && state.count[ir->result]++ == 0)
7,475,508✔
1133
         state.renumber[ir->result] = state.next++;
15,251✔
1134
   }
1135

1136
   for (jit_ir_t *ir = f->irbuf, *cmp = NULL; ir < f->irbuf + f->nirs; ir++) {
7,542,810✔
1137
      dce_renumber(&ir->arg1, &state);
7,475,508✔
1138
      dce_renumber(&ir->arg2, &state);
7,475,508✔
1139

1140
      if (jit_reads_flags(ir) || cfg_is_terminator(f, ir))
7,475,508✔
1141
         cmp = NULL;   // Consumed flags
1142
      else if (jit_writes_flags(ir)) {
6,895,864✔
1143
         if (cmp != NULL)
349,020✔
1144
            lvn_convert_nop(cmp);   // Flags are never read
44,068✔
1145
         if (ir->op == J_CMP || ir->op == J_FCMP)
349,020✔
1146
            cmp = ir;
339,366✔
1147
         else
1148
            cmp = NULL;
1149
      }
1150

1151
      if (ir->result == JIT_REG_INVALID)
7,475,508✔
1152
         continue;
4,775,430✔
1153
      else if (state.count[ir->result] == 0 && !jit_writes_flags(ir))
2,700,078✔
1154
         lvn_convert_nop(ir);
330,168✔
1155
      else
1156
         ir->result = state.renumber[ir->result];
2,369,910✔
1157
   }
1158

1159
   f->nregs = state.next;
67,302✔
1160

1161
   free(state.count);
67,302✔
1162
   free(state.renumber);
67,302✔
1163
}
67,302✔
1164

1165
////////////////////////////////////////////////////////////////////////////////
1166
// NOP deletion
1167

1168
void jit_delete_nops(jit_func_t *f)
67,302✔
1169
{
1170
   jit_label_t *map LOCAL = xmalloc_array(f->nirs, sizeof(jit_label_t));
67,302✔
1171

1172
   int wptr = 0;
67,302✔
1173
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs; ir++) {
7,542,812✔
1174
      map[ir - f->irbuf] = wptr;
7,475,510✔
1175

1176
      if (ir->op != J_NOP) {
7,475,510✔
1177
         jit_ir_t *dest = f->irbuf + wptr++;
6,920,973✔
1178
         if (dest != ir)
6,920,973✔
1179
            *dest = *ir;
5,209,875✔
1180
      }
1181
   }
1182

1183
   for (jit_ir_t *ir = f->irbuf; ir < f->irbuf + f->nirs; ir++) {
7,542,812✔
1184
      if (ir->arg1.kind == JIT_VALUE_LABEL) {
7,475,510✔
1185
         ir->arg1.label = map[ir->arg1.label];
236,308✔
1186
         f->irbuf[ir->arg1.label].target = 1;
236,308✔
1187
      }
1188
      if (ir->arg2.kind == JIT_VALUE_LABEL) {
7,475,510✔
1189
         ir->arg2.label = map[ir->arg2.label];
33,430✔
1190
         f->irbuf[ir->arg2.label].target = 1;
33,430✔
1191
      }
1192
   }
1193

1194
   f->nirs = wptr;
67,302✔
1195
}
67,302✔
1196

1197
////////////////////////////////////////////////////////////////////////////////
1198
// Memory to register promotion
1199

1200
static inline jit_reg_t get_value_reg(jit_value_t value)
3,142,628✔
1201
{
1202
   switch (value.kind) {
3,142,628✔
1203
   case JIT_ADDR_REG:
961,546✔
1204
   case JIT_VALUE_REG:
1205
      return value.reg;
961,546✔
1206
   default:
1207
      return JIT_REG_INVALID;
1208
   }
1209
}
1210

1211
static bool try_promote_allocation(jit_func_t *f, jit_ir_t *alloc)
23,704✔
1212
{
1213
   assert(alloc->arg2.kind == JIT_VALUE_INT64);
23,704✔
1214

1215
   if (alloc->arg2.int64 > 8 || !is_power_of_2(alloc->arg2.int64))
23,704✔
1216
      return false;
1217

1218
   jit_size_t sz = JIT_SZ_UNSPEC;
17,550✔
1219
   switch (alloc->arg2.int64) {
17,550✔
1220
   case 1: sz = JIT_SZ_8; break;
1221
   case 2: sz = JIT_SZ_16; break;
1222
   case 4: sz = JIT_SZ_32; break;
1223
   case 8: sz = JIT_SZ_64; break;
1224
   }
1225

1226
   jit_reg_t reg = alloc->result;
17,550✔
1227
   assert(reg != JIT_REG_INVALID);
17,550✔
1228

1229
   int first = -1, last = -1;
1230
   for (int i = 0; i < f->nirs; i++) {
1,640,520✔
1231
      jit_ir_t *ir = &(f->irbuf[i]);
1,640,455✔
1232
      if (ir->result == reg && ir != alloc)
1,640,455✔
1233
         return false;
1234
      else if (ir->op == J_LOAD || ir->op == J_STORE) {
1,640,455✔
1235
         jit_value_t addr = ir->op == J_LOAD ? ir->arg1 : ir->arg2;
138,153✔
1236
         if (get_value_reg(addr) != reg)
138,153✔
1237
            continue;
121,125✔
1238
         else if (addr.disp != 0 || ir->size != sz)
17,028✔
1239
            return false;
1240
         else if (ir->op == J_STORE && get_value_reg(ir->arg1) == reg)
2✔
1241
            return false;
1242
         else if (first == -1)
2✔
1243
            first = last = i;
1244
         else
1245
            last = i;
1✔
1246
      }
1247
      else if (get_value_reg(ir->arg1) == reg)
1,502,302✔
1248
         return false;
1249
      else if (get_value_reg(ir->arg2) == reg)
1,502,169✔
1250
         return false;
1251
   }
1252

1253
   if (first != -1) {
65✔
1254
      for (jit_ir_t *ir = f->irbuf + first; ir <= f->irbuf + last; ir++) {
4✔
1255
         if (ir->op == J_STORE && get_value_reg(ir->arg2) == reg) {
3✔
1256
            ir->op        = J_MOV;
1✔
1257
            ir->size      = JIT_SZ_UNSPEC;
1✔
1258
            ir->cc        = JIT_CC_NONE;
1✔
1259
            ir->result    = reg;
1✔
1260
            ir->arg2.kind = JIT_VALUE_INVALID;
1✔
1261
         }
1262
         else if (ir->op == J_LOAD && get_value_reg(ir->arg1) == reg) {
2✔
1263
            ir->op        = J_MOV;
1✔
1264
            ir->size      = JIT_SZ_UNSPEC;
1✔
1265
            ir->cc        = JIT_CC_NONE;
1✔
1266
            ir->arg1.kind = JIT_VALUE_REG;
1✔
1267
            ir->arg1.reg  = reg;
1✔
1268
         }
1269
      }
1270
   }
1271

1272
   alloc->op        = J_NOP;
65✔
1273
   alloc->size      = JIT_SZ_UNSPEC;
65✔
1274
   alloc->cc        = JIT_CC_NONE;
65✔
1275
   alloc->result    = JIT_REG_INVALID;
65✔
1276
   alloc->arg1.kind = JIT_VALUE_INVALID;
65✔
1277
   alloc->arg2.kind = JIT_VALUE_INVALID;
65✔
1278

1279
   return true;
65✔
1280
}
1281

1282
void jit_do_mem2reg(jit_func_t *f)
67,301✔
1283
{
1284
   if (f->framesz == 0)
67,301✔
1285
      return;
1286

1287
   int replaced = 0;
1288
   for (int i = 0; i < f->nirs; i++) {
297,710✔
1289
      jit_ir_t *ir = &(f->irbuf[i]);
297,709✔
1290
      if (ir->op == MACRO_SALLOC && try_promote_allocation(f, ir))
297,709✔
1291
         replaced++;
65✔
1292
      else if (cfg_is_terminator(f, ir))
297,644✔
1293
         break;
1294
   }
1295

1296
   if (replaced == 0)
9,003✔
1297
      return;
1298

1299
   int newsize = 0;
1300
   for (int i = 0; i < f->nirs; i++) {
327✔
1301
      jit_ir_t *ir = &(f->irbuf[i]);
326✔
1302
      if (ir->op == MACRO_SALLOC) {
326✔
1303
         assert(ir->arg1.kind == JIT_VALUE_INT64);
2✔
1304
         assert(ir->arg1.int64 >= newsize);
2✔
1305
         ir->arg1.int64 = newsize;
2✔
1306

1307
         assert(ir->arg2.kind == JIT_VALUE_INT64);
2✔
1308
         newsize += ALIGN_UP(ir->arg2.int64, 8);
2✔
1309
      }
1310
      else if (cfg_is_terminator(f, ir))
324✔
1311
         break;
1312
   }
1313

1314
   assert(newsize <= f->framesz);
65✔
1315
   f->framesz = newsize;
65✔
1316
}
1317

1318
////////////////////////////////////////////////////////////////////////////////
1319
// Register allocation
1320

1321
typedef struct {
1322
   jit_reg_t   reg;
1323
   unsigned    first;
1324
   unsigned    last;
1325
} lscan_interval_t;
1326

1327
static void lscan_grow_range(jit_reg_t reg, lscan_interval_t *li, int pos)
465✔
1328
{
1329
   assert(reg != JIT_REG_INVALID);
465✔
1330
   li[reg].first = MIN(li[reg].first, pos);
465✔
1331
   li[reg].last = MAX(li[reg].last, pos);
465✔
1332
}
465✔
1333

1334
static void lscan_walk_cfg(jit_func_t *f, jit_cfg_t *cfg, int bi,
85✔
1335
                           lscan_interval_t *li, bit_mask_t *visited)
1336
{
1337
   if (mask_test(visited, bi))
85✔
1338
      return;
1339

1340
   mask_set(visited, bi);
83✔
1341

1342
   jit_block_t *b = &(cfg->blocks[bi]);
83✔
1343

1344
   for (size_t bit = -1; mask_iter(&b->livein, &bit);)
88✔
1345
      lscan_grow_range(bit, li, b->first);
5✔
1346

1347
   for (int i = b->first; i <= b->last; i++) {
497✔
1348
      jit_ir_t *ir = &(f->irbuf[i]);
414✔
1349

1350
      if (ir->result != JIT_REG_INVALID)
414✔
1351
         lscan_grow_range(ir->result, li, i);
237✔
1352

1353
      if (ir->arg1.kind == JIT_VALUE_REG || ir->arg1.kind == JIT_ADDR_REG)
414✔
1354
         lscan_grow_range(ir->arg1.reg, li, i);
89✔
1355

1356
      if (ir->arg2.kind == JIT_VALUE_REG || ir->arg2.kind == JIT_ADDR_REG)
414✔
1357
         lscan_grow_range(ir->arg2.reg, li, i);
128✔
1358
   }
1359

1360
   for (size_t bit = -1; mask_iter(&b->liveout, &bit); )
89✔
1361
      lscan_grow_range(bit, li, b->last);
6✔
1362

1363
   for (int i = 0; i < b->out.count; i++) {
91✔
1364
      const int next = jit_get_edge(&(b->out), i);
8✔
1365
      lscan_walk_cfg(f, cfg, next, li, visited);
8✔
1366
   }
1367
}
1368

1369
static int lscan_interval_cmp(const void *a, const void *b)
307✔
1370
{
1371
   const lscan_interval_t *la = a;
307✔
1372
   const lscan_interval_t *lb = b;
307✔
1373

1374
   if (la->first < lb->first)
307✔
1375
      return -1;
1376
   else if (la->first > lb->first)
6✔
1377
      return 1;
1378
   else
1379
      return 0;
2✔
1380
}
1381

1382
static inline void lscan_shift_active(lscan_interval_t **active, unsigned to,
223✔
1383
                                      unsigned from, unsigned count)
1384
{
1385
   if (to != from && count == 1)
223✔
1386
      active[to] = active[from];
7✔
1387
   else if (to != from && count > 1)
216✔
UNCOV
1388
      memmove(active + to, active + from, count * sizeof(lscan_interval_t *));
×
1389
}
223✔
1390

1391
int jit_do_lscan(jit_func_t *f, phys_slot_t *slots, uint64_t badmask)
77✔
1392
{
1393
   //
1394
   // Massimiliano Poletto and Vivek Sarkar
1395
   // Linear Scan Register Allocation
1396
   // ACM Trans. Program. Lang. Syst., Vol. 21, 5 (sep 1999), 895--913
1397
   // https://doi.org/10.1145/330249.330250
1398
   //
1399

1400
   jit_cfg_t *cfg = jit_get_cfg(f);
77✔
1401

1402
   lscan_interval_t *li LOCAL =
154✔
1403
      xmalloc_array(f->nregs, sizeof(lscan_interval_t));
77✔
1404
   for (int i = 0; i < f->nregs; i++) {
298✔
1405
      li[i].reg = i;
221✔
1406
      li[i].first = UINT_MAX;
221✔
1407
      li[i].last = 0;
221✔
1408
      slots[i] = UINT_MAX;
221✔
1409
   }
1410

1411
   bit_mask_t visited;
77✔
1412
   mask_init(&visited, cfg->nblocks);
77✔
1413
   lscan_walk_cfg(f, cfg, 0, li, &visited);
77✔
1414
   mask_free(&visited);
77✔
1415

1416
   jit_free_cfg(cfg);
77✔
1417

1418
   qsort(li, f->nregs, sizeof(lscan_interval_t), lscan_interval_cmp);
77✔
1419

1420
   const int Rint = 32 - __builtin_popcountl(badmask & 0xffffffff);
77✔
1421
   uint32_t freeregs = ~(badmask & 0xffffffff);
77✔
1422

1423
   lscan_interval_t **active LOCAL =
77✔
1424
      xmalloc_array(f->nregs, sizeof(lscan_interval_t *));
77✔
1425
   unsigned nactive = 0, next_spill = STACK_BASE;
77✔
1426

1427
   for (int i = 0; i < f->nregs; i++) {
298✔
1428
      // Expire old intervals
1429
      int expire = 0;
1430
      for (; expire < nactive; expire++) {
279✔
1431
         const lscan_interval_t *next = active[expire];
177✔
1432
         if (next->last >= li[i].first)
177✔
1433
            break;
1434
         else {
1435
            assert(slots[next->reg] < STACK_BASE);
58✔
1436
            freeregs |= (1 << slots[next->reg]);
58✔
1437
         }
1438
      }
1439

1440
      lscan_shift_active(active, 0, expire, nactive - expire);
221✔
1441
      nactive -= expire;
221✔
1442

1443
      const lscan_interval_t *this = &li[i];
221✔
1444

1445
      bool allocated = false;
221✔
1446
      if (nactive == Rint) {
221✔
1447
         // Spill at interval
1448
         const lscan_interval_t *spill = active[nactive - 1];
44✔
1449
         if (spill->last > this->last) {
44✔
1450
            slots[this->reg] = slots[spill->reg];
2✔
1451
            slots[spill->reg] = next_spill++;
2✔
1452
            nactive--;
2✔
1453
            allocated = true;
2✔
1454
         }
1455
         else
1456
            slots[this->reg] = next_spill++;
42✔
1457
      }
1458
      else {
1459
         const int bit = __builtin_ffsl(freeregs) - 1;
177✔
1460
         assert(bit >= 0);
177✔
1461
         assert(freeregs & (1 << bit));
177✔
1462
         freeregs &= ~(1 << bit);
177✔
1463
         slots[this->reg] = bit;
177✔
1464
         allocated = true;
177✔
1465
      }
1466

1467
      if (allocated) {
221✔
1468
         // Add to active list, sorted by increasing end point
1469
         int pos = 0;
1470
         for (; pos < nactive && active[pos]->last <= li[i].last; pos++)
254✔
1471
            assert(active[pos] != &li[i]);
75✔
1472
         assert(pos < f->nregs);
179✔
1473
         if (pos < nactive)
179✔
1474
            lscan_shift_active(active, pos + 1, pos, nactive - pos);
2✔
1475
         active[pos] = &li[i];
179✔
1476
         nactive++;
179✔
1477
      }
1478
   }
1479

1480
   return next_spill - STACK_BASE;
77✔
1481
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc