• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / luajit / 12298038079

12 Dec 2024 01:33PM UTC coverage: 92.888% (-0.003%) from 92.891%
12298038079

push

github

Buristan
test: skip <string/dump.lua> test for table bump

If the `foo()` function itself starts to be recorded on the very first
call, it leads to the changing of TNEW bytecode when table bump
optimization is enabled. This patch skips the test for this type of
build.

Reviewed-by: Maxim Kokryashkin <m.kokryashkin@tarantool.org>
Reviewed-by: Sergey Bronnikov <sergeyb@tarantool.org>
Signed-off-by: Sergey Kaplun <skaplun@tarantool.org>
(cherry picked from commit 5daf61c7f)

5673 of 6014 branches covered (94.33%)

Branch coverage included in aggregate %.

21609 of 23357 relevant lines covered (92.52%)

868792.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.74
/src/lj_str.c
1
/*
2
** String handling.
3
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
*/
5

6
#define lj_str_c
7
#define LUA_CORE
8

9
#include "lj_obj.h"
10
#include "lj_gc.h"
11
#include "lj_err.h"
12
#include "lj_str.h"
13
#include "lj_char.h"
14

15
#if LUAJIT_USE_ASAN
16
/* These functions may read past a buffer end, that's ok. */
17
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
18
  __attribute__((no_sanitize_address));
19

20
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
21
  __attribute__((no_sanitize_address));
22

23
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
24
  __attribute__((no_sanitize_address));
25
#endif
26

27
/* -- String helpers ------------------------------------------------------ */
28

29
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
30
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
103,370✔
31
{
32
  MSize i, n = a->len > b->len ? b->len : a->len;
103,370✔
33
  for (i = 0; i < n; i += 4) {
109,869✔
34
    /* Note: innocuous access up to end of string + 3. */
35
    uint32_t va = *(const uint32_t *)(strdata(a)+i);
103,472✔
36
    uint32_t vb = *(const uint32_t *)(strdata(b)+i);
103,472✔
37
    if (va != vb) {
103,472✔
38
#if LJ_LE
39
      va = lj_bswap(va); vb = lj_bswap(vb);
96,973✔
40
#endif
41
      i -= n;
96,973✔
42
      if ((int32_t)i >= -3) {
96,973✔
43
        va >>= 32+(i<<3); vb >>= 32+(i<<3);
96,790✔
44
        if (va == vb) break;
96,790✔
45
      }
46
      return va < vb ? -1 : 1;
96,509✔
47
    }
48
  }
49
  return (int32_t)(a->len - b->len);
6,861✔
50
}
51

52
/* Fast string data comparison. Caveat: unaligned access to 1st string! */
53
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
8,790,334✔
54
{
55
  MSize i = 0;
8,790,334✔
56
  lj_assertX(len > 0, "fast string compare with zero length");
16,898,443✔
57
  lj_assertX((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4,
16,898,443✔
58
             "fast string compare crossing page boundary");
59
  do {  /* Note: innocuous access up to end of string + 3. */
16,898,443✔
60
    uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
16,898,443✔
61
    if (v) {
16,898,443✔
62
      i -= len;
3,237,904✔
63
#if LJ_LE
64
      return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
3,237,904✔
65
#else
66
      return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
67
#endif
68
    }
69
    i += 4;
13,660,539✔
70
  } while (i < len);
13,660,539✔
71
  return 0;
72
}
73

74
/* Find fixed string p inside string s. */
75
const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
2,683✔
76
{
77
  if (plen <= slen) {
2,683✔
78
    if (plen == 0) {
2,606✔
79
      return s;
80
    } else {
81
      int c = *(const uint8_t *)p++;
2,603✔
82
      plen--; slen -= plen;
2,603✔
83
      while (slen) {
9,033✔
84
        const char *q = (const char *)memchr(s, c, slen);
8,932✔
85
        if (!q) break;
8,932✔
86
        if (memcmp(q+1, p, plen) == 0) return q;
6,709✔
87
        q++; slen -= (MSize)(q-s); s = q;
6,430✔
88
      }
89
    }
90
  }
91
  return NULL;
92
}
93

94
/* Check whether a string has a pattern matching character. */
95
int lj_str_haspattern(GCstr *s)
2,691✔
96
{
97
  const char *p = strdata(s), *q = p + s->len;
2,691✔
98
  while (p < q) {
6,062✔
99
    int c = *(const uint8_t *)p++;
4,705✔
100
    if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
4,705✔
101
      return 1;  /* Found a pattern matching char. */
102
  }
103
  return 0;  /* No pattern matching chars found. */
104
}
105

106
/* -- String interning ---------------------------------------------------- */
107

108
/* Resize the string hash table (grow and shrink). */
109
void lj_str_resize(lua_State *L, MSize newmask)
980✔
110
{
111
  global_State *g = G(L);
980✔
112
  GCRef *newhash;
980✔
113
  MSize i;
980✔
114
  if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
980✔
115
    return;  /* No resizing during GC traversal or if already too big. */
116
  newhash = lj_mem_newvec(L, newmask+1, GCRef);
980✔
117
  memset(newhash, 0, (newmask+1)*sizeof(GCRef));
980✔
118
  for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
2,244,052✔
119
    GCobj *p = gcref(g->strhash[i]);
2,243,072✔
120
    while (p) {  /* Follow each hash chain and reinsert all strings. */
3,245,699✔
121
      MSize h = gco2str(p)->hash & newmask;
1,002,627✔
122
      GCobj *next = gcnext(p);
1,002,627✔
123
      /* NOBARRIER: The string table is a GC root. */
124
      setgcrefr(p->gch.nextgc, newhash[h]);
1,002,627✔
125
      setgcref(newhash[h], p);
1,002,627✔
126
      p = next;
1,002,627✔
127
    }
128
  }
129
  lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
980✔
130
  g->strmask = newmask;
980✔
131
  g->strhash = newhash;
980✔
132
}
133

134
#if LUAJIT_SMART_STRINGS
135
static LJ_AINLINE uint32_t
136
lj_fullhash(const uint8_t *v, MSize len)
137
{
138
  MSize a = 0, b = 0;
139
  MSize c = 0xcafedead;
140
  MSize d = 0xdeadbeef;
141
  MSize h = len;
3,026,589✔
142
  lj_assertX(len >= 12, "full hash calculation for too short (%d) string", len);
143
  for(; len>8; len-=8, v+=8) {
3,026,589✔
144
    a ^= lj_getu32(v);
2,982,645✔
145
    b ^= lj_getu32(v+4);
2,982,645✔
146
    c += a;
2,982,645✔
147
    d += b;
2,982,645✔
148
    a = lj_rol(a, 5) - d;
2,982,645✔
149
    b = lj_rol(b, 7) - c;
2,982,645✔
150
    c = lj_rol(c, 24) ^ a;
2,982,645✔
151
    d = lj_rol(d, 1) ^ b;
2,982,645✔
152
  }
153
  a ^= lj_getu32(v+len-8);
43,944✔
154
  b ^= lj_getu32(v+len-4);
43,944✔
155
  c += b; c -= lj_rol(a, 9);
43,944✔
156
  d += a; d -= lj_rol(b, 18);
43,944✔
157
  h -= lj_rol(a^b,7);
43,944✔
158
  h += c; h += lj_rol(d,13);
43,944✔
159
  d ^= c;  d -= lj_rol(c,25);
43,944✔
160
  h ^= d; h -= lj_rol(d,16);
43,944✔
161
  c ^= h; c -= lj_rol(h,4);
43,944✔
162
  d ^= c;  d -= lj_rol(c,14);
43,944✔
163
  h ^= d; h -= lj_rol(d,24);
43,944✔
164
  return h;
43,944✔
165
}
166
#endif
167

168
/* Intern a string and return string object. */
169
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
9,487,569✔
170
{
171
  global_State *g;
9,487,569✔
172
  GCstr *s;
9,487,569✔
173
  GCobj *o;
9,487,569✔
174
  MSize len = (MSize)lenx;
9,487,569✔
175
  uint8_t strflags = 0;
9,487,569✔
176
#if LUAJIT_SMART_STRINGS
177
  unsigned collisions = 0;
9,487,569✔
178
#endif
179
  if (lenx >= LJ_MAX_STR)
9,487,569✔
180
    lj_err_msg(L, LJ_ERR_STROV);
×
181
  g = G(L);
9,487,569✔
182
  if (len == 0)
9,487,569✔
183
    return &g->strempty;
5,750✔
184
  /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
185
  MSize h = lua_hash(str, len);
9,481,819✔
186
  /* Check if the string has already been interned. */
187
  o = gcref(g->strhash[h & g->strmask]);
9,481,819✔
188
#if LUAJIT_SMART_STRINGS
189
/*
190
** The default "fast" string hash function samples only a few positions
191
** in a string, the remaining bytes don't affect the function's result.
192
** The function performs well for short strings; however long strings
193
** can yield extremely high collision rates.
194
**
195
** An adaptive schema was implemented. Two hash functions are used
196
** simultaneously. A bucket is picked based on the output of the fast
197
** hash function. If an item is to be inserted in a collision chain
198
** longer than a certain threshold, another bucket is picked based on
199
** the stronger hash function. Since two hash functions are used
200
** simultaneously, insert should consider two buckets. The second bucket
201
** is often NOT considered thanks to the bloom filter. The filter is
202
** rebuilt during GC cycle.
203
**
204
** Parameters below were tuned on a set of benchmarks. Max_collisions is
205
** also backed by theory: the expected maximum length of a collision
206
** chain in a hash table with the fill factor of 1.0 is
207
** O(log(N)/log(log(N))), assuming uniformly distributed random keys.
208
** The upper bound for N=65,000 is 10, hence 40 is a clear indication of
209
** an anomaly.
210
**/
211
#define max_collisions 40
212
#define inc_collision_soft() (collisions++)
213
/* If different strings yield the same hash sum, grow counter faster. */
214
#define inc_collision_hard() (collisions+=1+(len>>4), 1)
215
#else
216
#define inc_collision_hard() (1)
217
#define inc_collision_soft()
218
#endif
219
  if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
9,481,819✔
220
    while (o != NULL) {
13,715,894✔
221
      GCstr *sx = gco2str(o);
12,827,540✔
222
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
12,827,540✔
223
                      str_fastcmp(str, strdata(sx), len) == 0) {
17,361,848✔
224
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
225
        if (isdead(g, o)) flipwhite(o);
8,591,109✔
226
        g->strhash_hit++;
8,591,109✔
227
        return sx;  /* Return existing string. */
8,591,109✔
228
      }
229
      o = gcnext(o);
4,236,431✔
230
      inc_collision_soft();
4,236,431✔
231
    }
232
  } else {  /* Slow path: end of string is too close to a page boundary. */
233
    while (o != NULL) {
4,456✔
234
      GCstr *sx = gco2str(o);
4,241✔
235
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
4,241✔
236
                      memcmp(str, strdata(sx), len) == 0) {
2,141✔
237
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
238
        if (isdead(g, o)) flipwhite(o);
2,141✔
239
        g->strhash_hit++;
2,141✔
240
        return sx;  /* Return existing string. */
2,141✔
241
      }
242
      o = gcnext(o);
2,100✔
243
      inc_collision_soft();
2,100✔
244
    }
245
  }
246
#if LUAJIT_SMART_STRINGS
247
  /* "Fast" hash function consumes all bytes of a string <= 12 bytes. */
248
  if (len > 12) {
888,569✔
249
    /*
250
    ** The bloom filter is keyed with the high 12 bits of the fast
251
    ** hash sum. The filter is rebuilt during GC cycle. It's beneficial
252
    ** to have these bits readily available and avoid hash sum
253
    ** recalculation during GC. High 6 bits are included in the "full"
254
    ** hash sum, and bits 19-25 are stored in s->strflags.
255
    **/
256
    int search_fullh =
439,438✔
257
       bloomtest(g->strbloom.cur[0], h>>(sizeof(h)*8- 6)) != 0 &&
219,719✔
258
       bloomtest(g->strbloom.cur[1], h>>(sizeof(h)*8-12)) != 0;
53,213✔
259
    if (LJ_UNLIKELY(search_fullh || collisions > max_collisions)) {
219,719✔
260
      MSize fh = lj_fullhash((const uint8_t*)str, len);
43,944✔
261
#define high6mask ((~(MSize)0)<<(sizeof(MSize)*8-6))
262
      fh = (fh >> 6) | (h & high6mask);
43,944✔
263
      if (search_fullh) {
43,944✔
264
        /* Recheck if the string has already been interned with "harder" hash. */
265
        o = gcref(g->strhash[fh & g->strmask]);
42,632✔
266
        if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
42,632✔
267
          while (o != NULL) {
57,324✔
268
            GCstr *sx = gco2str(o);
34,287✔
269
            if (sx->hash == fh && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
53,882✔
270
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
271
              if (isdead(g, o)) flipwhite(o);
19,595✔
272
              g->strhash_hit++;
19,595✔
273
              return sx;  /* Return existing string. */
19,595✔
274
            }
275
            o = gcnext(o);
14,692✔
276
          }
277
        } else {  /* Slow path: end of string is too close to a page boundary. */
278
          while (o != NULL) {
×
279
            GCstr *sx = gco2str(o);
×
280
            if (sx->hash == fh && sx->len == len && memcmp(str, strdata(sx), len) == 0) {
×
281
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
282
              if (isdead(g, o)) flipwhite(o);
×
283
              g->strhash_hit++;
×
284
              return sx;  /* Return existing string. */
×
285
            }
286
            o = gcnext(o);
×
287
          }
288
        }
289
      }
290
      if (collisions > max_collisions) {
24,349✔
291
        strflags = 0xc0 | ((h>>(sizeof(h)*8-12))&0x3f);
21,253✔
292
        bloomset(g->strbloom.cur[0], h>>(sizeof(h)*8- 6));
21,253✔
293
        bloomset(g->strbloom.cur[1], h>>(sizeof(h)*8-12));
21,253✔
294
        bloomset(g->strbloom.next[0], h>>(sizeof(h)*8- 6));
21,253✔
295
        bloomset(g->strbloom.next[1], h>>(sizeof(h)*8-12));
21,253✔
296
        h = fh;
21,253✔
297
      }
298
    }
299
  }
300
#endif
301
  g->strhash_miss++;
868,974✔
302
  /* Nope, create a new string. */
303
  s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
868,974✔
304
  newwhite(g, s);
868,974✔
305
  s->gct = ~LJ_TSTR;
868,974✔
306
  s->len = len;
868,974✔
307
  s->hash = h;
868,974✔
308
  s->reserved = 0;
868,974✔
309
  s->strflags = strflags;
868,974✔
310
  memcpy(strdatawr(s), str, len);
868,974✔
311
  strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
868,974✔
312
  /* Add it to string hash table. */
313
  h &= g->strmask;
868,974✔
314
  s->nextgc = g->strhash[h];
868,974✔
315
  /* NOBARRIER: The string table is a GC root. */
316
  setgcref(g->strhash[h], obj2gco(s));
868,974✔
317
  if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
868,974✔
318
    lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
535✔
319
  return s;  /* Return newly interned string. */
320
}
321

322
void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
864,788✔
323
{
324
  g->strnum--;
864,788✔
325
  lj_mem_free(g, s, sizestring(s));
864,788✔
326
}
864,788✔
327

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc