• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / luajit / 6312339805

26 Sep 2023 11:32AM UTC coverage: 88.208% (+0.01%) from 88.198%
6312339805

push

github

igormunkin
Handle table unsinking in the presence of IRFL_TAB_NOMM.

Reported by Sergey Kaplun.

(cherry-picked from commit 0ef51b495)

Table `NEWREF` storage for non-constant keys also emits `FREF` IR with
`IRFL_TAB_NOMM` to invalidate the metamethod cache. When table creation
and `NEWREF` are sinked, the corresponding `FSTORE` is sinked too and
should be restored on trace exit. However, `snap_unsink()` doesn't
expect anything except `IRFL_TAB_META` as the second operand of `FREF`,
so the corresponding assertion fails.

This patch adds a switch-case statement to handle the `IRFL_TAB_NOMM`
case. Since `FREF` with `IRFL_TAB_NOMM` always follows some hash store,
we can avoid a duplication of the cache invalidation, so this case just
does nothing.

Sergey Kaplun:
* added the description and the test for the problem

Part of tarantool/tarantool#8825

Reviewed-by: Maxim Kokryashkin <m.kokryashkin@tarantool.org>
Reviewed-by: Sergey Bronnikov <sergeyb@tarantool.org>
Signed-off-by: Igor Munkin <imun@tarantool.org>

5337 of 5972 branches covered (0.0%)

Branch coverage included in aggregate %.

20477 of 23293 relevant lines covered (87.91%)

2732866.64 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.74
/src/lj_str.c
1
/*
2
** String handling.
3
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
*/
5

6
#define lj_str_c
7
#define LUA_CORE
8

9
#include "lj_obj.h"
10
#include "lj_gc.h"
11
#include "lj_err.h"
12
#include "lj_str.h"
13
#include "lj_char.h"
14

15
#if LUAJIT_USE_ASAN
16
/* These functions may read past a buffer end, that's ok. */
17
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
18
  __attribute__((no_sanitize_address));
19

20
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
21
  __attribute__((no_sanitize_address));
22

23
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
24
  __attribute__((no_sanitize_address));
25
#endif
26

27
/* -- String helpers ------------------------------------------------------ */
28

29
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
30
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
103,370✔
31
{
32
  MSize i, n = a->len > b->len ? b->len : a->len;
103,370✔
33
  for (i = 0; i < n; i += 4) {
109,870✔
34
    /* Note: innocuous access up to end of string + 3. */
35
    uint32_t va = *(const uint32_t *)(strdata(a)+i);
103,472✔
36
    uint32_t vb = *(const uint32_t *)(strdata(b)+i);
103,472✔
37
    if (va != vb) {
103,472✔
38
#if LJ_LE
39
      va = lj_bswap(va); vb = lj_bswap(vb);
96,972✔
40
#endif
41
      i -= n;
96,972✔
42
      if ((int32_t)i >= -3) {
96,972✔
43
        va >>= 32+(i<<3); vb >>= 32+(i<<3);
96,789✔
44
        if (va == vb) break;
96,789✔
45
      }
46
      return va < vb ? -1 : 1;
96,508✔
47
    }
48
  }
49
  return (int32_t)(a->len - b->len);
6,862✔
50
}
51

52
/* Fast string data comparison. Caveat: unaligned access to 1st string! */
53
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
7,837,169✔
54
{
55
  MSize i = 0;
7,837,169✔
56
  lj_assertX(len > 0, "fast string compare with zero length");
14,859,126✔
57
  lj_assertX((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4,
14,859,126✔
58
             "fast string compare crossing page boundary");
59
  do {  /* Note: innocuous access up to end of string + 3. */
14,859,126✔
60
    uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
14,859,126✔
61
    if (v) {
14,859,126✔
62
      i -= len;
2,546,236✔
63
#if LJ_LE
64
      return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
2,546,236✔
65
#else
66
      return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
67
#endif
68
    }
69
    i += 4;
12,312,890✔
70
  } while (i < len);
12,312,890✔
71
  return 0;
72
}
73

74
/* Find fixed string p inside string s. */
75
const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
1,701✔
76
{
77
  if (plen <= slen) {
1,701✔
78
    if (plen == 0) {
1,627✔
79
      return s;
80
    } else {
81
      int c = *(const uint8_t *)p++;
1,624✔
82
      plen--; slen -= plen;
1,624✔
83
      while (slen) {
3,090✔
84
        const char *q = (const char *)memchr(s, c, slen);
3,041✔
85
        if (!q) break;
3,041✔
86
        if (memcmp(q+1, p, plen) == 0) return q;
1,740✔
87
        q++; slen -= (MSize)(q-s); s = q;
1,466✔
88
      }
89
    }
90
  }
91
  return NULL;
92
}
93

94
/* Check whether a string has a pattern matching character. */
95
int lj_str_haspattern(GCstr *s)
1,976✔
96
{
97
  const char *p = strdata(s), *q = p + s->len;
1,976✔
98
  while (p < q) {
4,702✔
99
    int c = *(const uint8_t *)p++;
3,626✔
100
    if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
3,626✔
101
      return 1;  /* Found a pattern matching char. */
102
  }
103
  return 0;  /* No pattern matching chars found. */
104
}
105

106
/* -- String interning ---------------------------------------------------- */
107

108
/* Resize the string hash table (grow and shrink). */
109
void lj_str_resize(lua_State *L, MSize newmask)
681✔
110
{
111
  global_State *g = G(L);
681✔
112
  GCRef *newhash;
681✔
113
  MSize i;
681✔
114
  if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
681✔
115
    return;  /* No resizing during GC traversal or if already too big. */
116
  newhash = lj_mem_newvec(L, newmask+1, GCRef);
681✔
117
  memset(newhash, 0, (newmask+1)*sizeof(GCRef));
681✔
118
  for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
1,743,017✔
119
    GCobj *p = gcref(g->strhash[i]);
1,742,336✔
120
    while (p) {  /* Follow each hash chain and reinsert all strings. */
2,453,097✔
121
      MSize h = gco2str(p)->hash & newmask;
710,761✔
122
      GCobj *next = gcnext(p);
710,761✔
123
      /* NOBARRIER: The string table is a GC root. */
124
      setgcrefr(p->gch.nextgc, newhash[h]);
710,761✔
125
      setgcref(newhash[h], p);
710,761✔
126
      p = next;
710,761✔
127
    }
128
  }
129
  lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
681✔
130
  g->strmask = newmask;
681✔
131
  g->strhash = newhash;
681✔
132
}
133

134
#if LUAJIT_SMART_STRINGS
135
static LJ_AINLINE uint32_t
136
lj_fullhash(const uint8_t *v, MSize len)
137
{
138
  MSize a = 0, b = 0;
139
  MSize c = 0xcafedead;
140
  MSize d = 0xdeadbeef;
141
  MSize h = len;
2,722,038✔
142
  lj_assertX(len >= 12, "full hash calculation for too short (%d) string", len);
143
  for(; len>8; len-=8, v+=8) {
2,722,038✔
144
    a ^= lj_getu32(v);
2,680,131✔
145
    b ^= lj_getu32(v+4);
2,680,131✔
146
    c += a;
2,680,131✔
147
    d += b;
2,680,131✔
148
    a = lj_rol(a, 5) - d;
2,680,131✔
149
    b = lj_rol(b, 7) - c;
2,680,131✔
150
    c = lj_rol(c, 24) ^ a;
2,680,131✔
151
    d = lj_rol(d, 1) ^ b;
2,680,131✔
152
  }
153
  a ^= lj_getu32(v+len-8);
41,907✔
154
  b ^= lj_getu32(v+len-4);
41,907✔
155
  c += b; c -= lj_rol(a, 9);
41,907✔
156
  d += a; d -= lj_rol(b, 18);
41,907✔
157
  h -= lj_rol(a^b,7);
41,907✔
158
  h += c; h += lj_rol(d,13);
41,907✔
159
  d ^= c;  d -= lj_rol(c,25);
41,907✔
160
  h ^= d; h -= lj_rol(d,16);
41,907✔
161
  c ^= h; c -= lj_rol(h,4);
41,907✔
162
  d ^= c;  d -= lj_rol(c,14);
41,907✔
163
  h ^= d; h -= lj_rol(d,24);
41,907✔
164
  return h;
41,907✔
165
}
166
#endif
167

168
/* Intern a string and return string object. */
169
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
8,258,341✔
170
{
171
  global_State *g;
8,258,341✔
172
  GCstr *s;
8,258,341✔
173
  GCobj *o;
8,258,341✔
174
  MSize len = (MSize)lenx;
8,258,341✔
175
  uint8_t strflags = 0;
8,258,341✔
176
#if LUAJIT_SMART_STRINGS
177
  unsigned collisions = 0;
8,258,341✔
178
#endif
179
  if (lenx >= LJ_MAX_STR)
8,258,341✔
180
    lj_err_msg(L, LJ_ERR_STROV);
×
181
  g = G(L);
8,258,341✔
182
  if (len == 0)
8,258,341✔
183
    return &g->strempty;
4,197✔
184
  /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
185
  MSize h = lua_hash(str, len);
8,254,144✔
186
  /* Check if the string has already been interned. */
187
  o = gcref(g->strhash[h & g->strmask]);
8,254,144✔
188
#if LUAJIT_SMART_STRINGS
189
/*
190
** The default "fast" string hash function samples only a few positions
191
** in a string, the remaining bytes don't affect the function's result.
192
** The function performs well for short strings; however long strings
193
** can yield extremely high collision rates.
194
**
195
** An adaptive schema was implemented. Two hash functions are used
196
** simultaneously. A bucket is picked based on the output of the fast
197
** hash function. If an item is to be inserted in a collision chain
198
** longer than a certain threshold, another bucket is picked based on
199
** the stronger hash function. Since two hash functions are used
200
** simultaneously, insert should consider two buckets. The second bucket
201
** is often NOT considered thanks to the bloom filter. The filter is
202
** rebuilt during GC cycle.
203
**
204
** Parameters below were tuned on a set of benchmarks. Max_collisions is
205
** also backed by theory: the expected maximum length of a collision
206
** chain in a hash table with the fill factor of 1.0 is
207
** O(log(N)/log(log(N))), assuming uniformly distributed random keys.
208
** The upper bound for N=65,000 is 10, hence 40 is a clear indication of
209
** an anomaly.
210
**/
211
#define max_collisions 40
212
#define inc_collision_soft() (collisions++)
213
/* If different strings yield the same hash sum, grow counter faster. */
214
#define inc_collision_hard() (collisions+=1+(len>>4), 1)
215
#else
216
#define inc_collision_hard() (1)
217
#define inc_collision_soft()
218
#endif
219
  if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
8,254,144✔
220
    while (o != NULL) {
12,156,391✔
221
      GCstr *sx = gco2str(o);
11,551,130✔
222
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
11,551,130✔
223
                      str_fastcmp(str, strdata(sx), len) == 0) {
15,465,323✔
224
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
225
        if (isdead(g, o)) flipwhite(o);
7,647,749✔
226
        g->strhash_hit++;
7,647,749✔
227
        return sx;  /* Return existing string. */
7,647,749✔
228
      }
229
      o = gcnext(o);
3,903,381✔
230
      inc_collision_soft();
3,903,381✔
231
    }
232
  } else {  /* Slow path: end of string is too close to a page boundary. */
233
    while (o != NULL) {
2,184✔
234
      GCstr *sx = gco2str(o);
2,152✔
235
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
2,152✔
236
                      memcmp(str, strdata(sx), len) == 0) {
1,102✔
237
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
238
        if (isdead(g, o)) flipwhite(o);
1,102✔
239
        g->strhash_hit++;
1,102✔
240
        return sx;  /* Return existing string. */
1,102✔
241
      }
242
      o = gcnext(o);
1,050✔
243
      inc_collision_soft();
1,050✔
244
    }
245
  }
246
#if LUAJIT_SMART_STRINGS
247
  /* "Fast" hash function consumes all bytes of a string <= 12 bytes. */
248
  if (len > 12) {
605,293✔
249
    /*
250
    ** The bloom filter is keyed with the high 12 bits of the fast
251
    ** hash sum. The filter is rebuilt during GC cycle. It's beneficial
252
    ** to have these bits readily available and avoid hash sum
253
    ** recalculation during GC. High 6 bits are included in the "full"
254
    ** hash sum, and bits 19-25 are stored in s->strflags.
255
    **/
256
    int search_fullh =
362,980✔
257
       bloomtest(g->strbloom.cur[0], h>>(sizeof(h)*8- 6)) != 0 &&
181,490✔
258
       bloomtest(g->strbloom.cur[1], h>>(sizeof(h)*8-12)) != 0;
49,579✔
259
    if (LJ_UNLIKELY(search_fullh || collisions > max_collisions)) {
181,490✔
260
      MSize fh = lj_fullhash((const uint8_t*)str, len);
41,907✔
261
#define high6mask ((~(MSize)0)<<(sizeof(MSize)*8-6))
262
      fh = (fh >> 6) | (h & high6mask);
41,907✔
263
      if (search_fullh) {
41,907✔
264
        /* Recheck if the string has already been interned with "harder" hash. */
265
        o = gcref(g->strhash[fh & g->strmask]);
40,602✔
266
        if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
40,602✔
267
          while (o != NULL) {
53,850✔
268
            GCstr *sx = gco2str(o);
32,843✔
269
            if (sx->hash == fh && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
52,438✔
270
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
271
              if (isdead(g, o)) flipwhite(o);
19,595✔
272
              g->strhash_hit++;
19,595✔
273
              return sx;  /* Return existing string. */
19,595✔
274
            }
275
            o = gcnext(o);
13,248✔
276
          }
277
        } else {  /* Slow path: end of string is too close to a page boundary. */
278
          while (o != NULL) {
×
279
            GCstr *sx = gco2str(o);
×
280
            if (sx->hash == fh && sx->len == len && memcmp(str, strdata(sx), len) == 0) {
×
281
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
282
              if (isdead(g, o)) flipwhite(o);
×
283
              g->strhash_hit++;
×
284
              return sx;  /* Return existing string. */
×
285
            }
286
            o = gcnext(o);
×
287
          }
288
        }
289
      }
290
      if (collisions > max_collisions) {
22,312✔
291
        strflags = 0xc0 | ((h>>(sizeof(h)*8-12))&0x3f);
19,671✔
292
        bloomset(g->strbloom.cur[0], h>>(sizeof(h)*8- 6));
19,671✔
293
        bloomset(g->strbloom.cur[1], h>>(sizeof(h)*8-12));
19,671✔
294
        bloomset(g->strbloom.next[0], h>>(sizeof(h)*8- 6));
19,671✔
295
        bloomset(g->strbloom.next[1], h>>(sizeof(h)*8-12));
19,671✔
296
        h = fh;
19,671✔
297
      }
298
    }
299
  }
300
#endif
301
  g->strhash_miss++;
585,698✔
302
  /* Nope, create a new string. */
303
  s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
585,698✔
304
  newwhite(g, s);
585,698✔
305
  s->gct = ~LJ_TSTR;
585,698✔
306
  s->len = len;
585,698✔
307
  s->hash = h;
585,698✔
308
  s->reserved = 0;
585,698✔
309
  s->strflags = strflags;
585,698✔
310
  memcpy(strdatawr(s), str, len);
585,698✔
311
  strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
585,698✔
312
  /* Add it to string hash table. */
313
  h &= g->strmask;
585,698✔
314
  s->nextgc = g->strhash[h];
585,698✔
315
  /* NOBARRIER: The string table is a GC root. */
316
  setgcref(g->strhash[h], obj2gco(s));
585,698✔
317
  if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
585,698✔
318
    lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
349✔
319
  return s;  /* Return newly interned string. */
320
}
321

322
void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
582,966✔
323
{
324
  g->strnum--;
582,966✔
325
  lj_mem_free(g, s, sizestring(s));
582,966✔
326
}
582,966✔
327

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc