• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / luajit / 5784751762

07 Aug 2023 11:54AM UTC coverage: 84.282% (-3.1%) from 87.373%
5784751762

push

github

ligurio
setup-gcovr [TO SQUASH]

5157 of 6002 branches covered (85.92%)

Branch coverage included in aggregate %.

19632 of 23410 relevant lines covered (83.86%)

229380.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.61
/src/lj_str.c
1
/*
2
** String handling.
3
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
*/
5

6
#define lj_str_c
7
#define LUA_CORE
8

9
#include "lj_obj.h"
10
#include "lj_gc.h"
11
#include "lj_err.h"
12
#include "lj_str.h"
13
#include "lj_char.h"
14

15
#if LUAJIT_USE_ASAN
16
/* These functions may read past a buffer end, that's ok. */
17
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
18
  __attribute__((no_sanitize_address));
19

20
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
21
  __attribute__((no_sanitize_address));
22

23
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
24
  __attribute__((no_sanitize_address));
25
#endif
26

27
/* -- String helpers ------------------------------------------------------ */
28

29
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
30
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
103,370✔
31
{
32
  MSize i, n = a->len > b->len ? b->len : a->len;
103,370✔
33
  for (i = 0; i < n; i += 4) {
109,869✔
34
    /* Note: innocuous access up to end of string + 3. */
35
    uint32_t va = *(const uint32_t *)(strdata(a)+i);
103,472✔
36
    uint32_t vb = *(const uint32_t *)(strdata(b)+i);
103,472✔
37
    if (va != vb) {
103,472✔
38
#if LJ_LE
39
      va = lj_bswap(va); vb = lj_bswap(vb);
96,973✔
40
#endif
41
      i -= n;
96,973✔
42
      if ((int32_t)i >= -3) {
96,973✔
43
        va >>= 32+(i<<3); vb >>= 32+(i<<3);
96,790✔
44
        if (va == vb) break;
96,790✔
45
      }
46
      return va < vb ? -1 : 1;
96,509✔
47
    }
48
  }
49
  return (int32_t)(a->len - b->len);
6,861✔
50
}
51

52
/* Fast string data comparison. Caveat: unaligned access to 1st string! */
53
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
7,904,332✔
54
{
55
  MSize i = 0;
7,904,332✔
56
  lua_assert(len > 0);
15,056,445✔
57
  lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
15,056,445✔
58
  do {  /* Note: innocuous access up to end of string + 3. */
15,056,445✔
59
    uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
15,056,445✔
60
    if (v) {
15,056,445✔
61
      i -= len;
2,569,864✔
62
#if LJ_LE
63
      return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
2,569,864✔
64
#else
65
      return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
66
#endif
67
    }
68
    i += 4;
12,486,581✔
69
  } while (i < len);
12,486,581✔
70
  return 0;
71
}
72

73
/* Find fixed string p inside string s. */
74
const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
1,682✔
75
{
76
  if (plen <= slen) {
1,682✔
77
    if (plen == 0) {
1,608✔
78
      return s;
79
    } else {
80
      int c = *(const uint8_t *)p++;
1,605✔
81
      plen--; slen -= plen;
1,605✔
82
      while (slen) {
3,071✔
83
        const char *q = (const char *)memchr(s, c, slen);
3,022✔
84
        if (!q) break;
3,022✔
85
        if (memcmp(q+1, p, plen) == 0) return q;
1,740✔
86
        q++; slen -= (MSize)(q-s); s = q;
1,466✔
87
      }
88
    }
89
  }
90
  return NULL;
91
}
92

93
/* Check whether a string has a pattern matching character. */
94
int lj_str_haspattern(GCstr *s)
1,959✔
95
{
96
  const char *p = strdata(s), *q = p + s->len;
1,959✔
97
  while (p < q) {
4,668✔
98
    int c = *(const uint8_t *)p++;
3,609✔
99
    if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
3,609✔
100
      return 1;  /* Found a pattern matching char. */
101
  }
102
  return 0;  /* No pattern matching chars found. */
103
}
104

105
/* -- String interning ---------------------------------------------------- */
106

107
/* Resize the string hash table (grow and shrink). */
108
void lj_str_resize(lua_State *L, MSize newmask)
640✔
109
{
110
  global_State *g = G(L);
640✔
111
  GCRef *newhash;
640✔
112
  MSize i;
640✔
113
  if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
640✔
114
    return;  /* No resizing during GC traversal or if already too big. */
115
  newhash = lj_mem_newvec(L, newmask+1, GCRef);
640✔
116
  memset(newhash, 0, (newmask+1)*sizeof(GCRef));
640✔
117
  for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
1,734,016✔
118
    GCobj *p = gcref(g->strhash[i]);
1,733,376✔
119
    while (p) {  /* Follow each hash chain and reinsert all strings. */
2,434,818✔
120
      MSize h = gco2str(p)->hash & newmask;
701,442✔
121
      GCobj *next = gcnext(p);
701,442✔
122
      /* NOBARRIER: The string table is a GC root. */
123
      setgcrefr(p->gch.nextgc, newhash[h]);
701,442✔
124
      setgcref(newhash[h], p);
701,442✔
125
      p = next;
701,442✔
126
    }
127
  }
128
  lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
640✔
129
  g->strmask = newmask;
640✔
130
  g->strhash = newhash;
640✔
131
}
132

133
#if LUAJIT_SMART_STRINGS
134
static LJ_AINLINE uint32_t
135
lj_fullhash(const uint8_t *v, MSize len)
136
{
137
  MSize a = 0, b = 0;
138
  MSize c = 0xcafedead;
139
  MSize d = 0xdeadbeef;
140
  MSize h = len;
2,763,222✔
141
  lua_assert(len >= 12);
142
  for(; len>8; len-=8, v+=8) {
2,763,222✔
143
    a ^= lj_getu32(v);
2,721,104✔
144
    b ^= lj_getu32(v+4);
2,721,104✔
145
    c += a;
2,721,104✔
146
    d += b;
2,721,104✔
147
    a = lj_rol(a, 5) - d;
2,721,104✔
148
    b = lj_rol(b, 7) - c;
2,721,104✔
149
    c = lj_rol(c, 24) ^ a;
2,721,104✔
150
    d = lj_rol(d, 1) ^ b;
2,721,104✔
151
  }
152
  a ^= lj_getu32(v+len-8);
42,118✔
153
  b ^= lj_getu32(v+len-4);
42,118✔
154
  c += b; c -= lj_rol(a, 9);
42,118✔
155
  d += a; d -= lj_rol(b, 18);
42,118✔
156
  h -= lj_rol(a^b,7);
42,118✔
157
  h += c; h += lj_rol(d,13);
42,118✔
158
  d ^= c;  d -= lj_rol(c,25);
42,118✔
159
  h ^= d; h -= lj_rol(d,16);
42,118✔
160
  c ^= h; c -= lj_rol(h,4);
42,118✔
161
  d ^= c;  d -= lj_rol(c,14);
42,118✔
162
  h ^= d; h -= lj_rol(d,24);
42,118✔
163
  return h;
42,118✔
164
}
165
#endif
166

167
/* Intern a string and return string object. */
168
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
8,307,147✔
169
{
170
  global_State *g;
8,307,147✔
171
  GCstr *s;
8,307,147✔
172
  GCobj *o;
8,307,147✔
173
  MSize len = (MSize)lenx;
8,307,147✔
174
  uint8_t strflags = 0;
8,307,147✔
175
#if LUAJIT_SMART_STRINGS
176
  unsigned collisions = 0;
8,307,147✔
177
#endif
178
  if (lenx >= LJ_MAX_STR)
8,307,147✔
179
    lj_err_msg(L, LJ_ERR_STROV);
×
180
  g = G(L);
8,307,147✔
181
  if (len == 0)
8,307,147✔
182
    return &g->strempty;
3,682✔
183
  /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
184
  MSize h = lua_hash(str, len);
8,303,465✔
185
  /* Check if the string has already been interned. */
186
  o = gcref(g->strhash[h & g->strmask]);
8,303,465✔
187
#if LUAJIT_SMART_STRINGS
188
/*
189
** The default "fast" string hash function samples only a few positions
190
** in a string, the remaining bytes don't affect the function's result.
191
** The function performs well for short strings; however long strings
192
** can yield extremely high collision rates.
193
**
194
** An adaptive schema was implemented. Two hash functions are used
195
** simultaneously. A bucket is picked based on the output of the fast
196
** hash function. If an item is to be inserted in a collision chain
197
** longer than a certain threshold, another bucket is picked based on
198
** the stronger hash function. Since two hash functions are used
199
** simultaneously, insert should consider two buckets. The second bucket
200
** is often NOT considered thanks to the bloom filter. The filter is
201
** rebuilt during GC cycle.
202
**
203
** Parameters below were tuned on a set of benchmarks. Max_collisions is
204
** also backed by theory: the expected maximum length of a collision
205
** chain in a hash table with the fill factor of 1.0 is
206
** O(log(N)/log(log(N))), assuming uniformly distributed random keys.
207
** The upper bound for N=65,000 is 10, hence 40 is a clear indication of
208
** an anomaly.
209
**/
210
#define max_collisions 40
211
#define inc_collision_soft() (collisions++)
212
/* If different strings yield the same hash sum, grow counter faster. */
213
#define inc_collision_hard() (collisions+=1+(len>>4), 1)
214
#else
215
#define inc_collision_hard() (1)
216
#define inc_collision_soft()
217
#endif
218
  if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
8,303,465✔
219
    while (o != NULL) {
12,247,402✔
220
      GCstr *sx = gco2str(o);
11,653,444✔
221
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
11,653,444✔
222
                      str_fastcmp(str, strdata(sx), len) == 0) {
15,592,706✔
223
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
224
        if (isdead(g, o)) flipwhite(o);
7,708,056✔
225
        g->strhash_hit++;
7,708,056✔
226
        return sx;  /* Return existing string. */
7,708,056✔
227
      }
228
      o = gcnext(o);
3,945,388✔
229
      inc_collision_soft();
3,945,388✔
230
    }
231
  } else {  /* Slow path: end of string is too close to a page boundary. */
232
    while (o != NULL) {
2,439✔
233
      GCstr *sx = gco2str(o);
2,320✔
234
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
2,320✔
235
                      memcmp(str, strdata(sx), len) == 0) {
1,359✔
236
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
237
        if (isdead(g, o)) flipwhite(o);
1,332✔
238
        g->strhash_hit++;
1,332✔
239
        return sx;  /* Return existing string. */
1,332✔
240
      }
241
      o = gcnext(o);
988✔
242
      inc_collision_soft();
988✔
243
    }
244
  }
245
#if LUAJIT_SMART_STRINGS
246
  /* "Fast" hash function consumes all bytes of a string <= 12 bytes. */
247
  if (len > 12) {
594,077✔
248
    /*
249
    ** The bloom filter is keyed with the high 12 bits of the fast
250
    ** hash sum. The filter is rebuilt during GC cycle. It's beneficial
251
    ** to have these bits readily available and avoid hash sum
252
    ** recalculation during GC. High 6 bits are included in the "full"
253
    ** hash sum, and bits 19-25 are stored in s->strflags.
254
    **/
255
    int search_fullh =
359,014✔
256
       bloomtest(g->strbloom.cur[0], h>>(sizeof(h)*8- 6)) != 0 &&
179,507✔
257
       bloomtest(g->strbloom.cur[1], h>>(sizeof(h)*8-12)) != 0;
49,789✔
258
    if (LJ_UNLIKELY(search_fullh || collisions > max_collisions)) {
179,507✔
259
      MSize fh = lj_fullhash((const uint8_t*)str, len);
42,118✔
260
#define high6mask ((~(MSize)0)<<(sizeof(MSize)*8-6))
261
      fh = (fh >> 6) | (h & high6mask);
42,118✔
262
      if (search_fullh) {
42,118✔
263
        /* Recheck if the string has already been interned with "harder" hash. */
264
        o = gcref(g->strhash[fh & g->strmask]);
40,811✔
265
        if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
40,811✔
266
          while (o != NULL) {
54,140✔
267
            GCstr *sx = gco2str(o);
33,013✔
268
            if (sx->hash == fh && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
52,695✔
269
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
270
              if (isdead(g, o)) flipwhite(o);
19,682✔
271
              g->strhash_hit++;
19,682✔
272
              return sx;  /* Return existing string. */
19,682✔
273
            }
274
            o = gcnext(o);
13,331✔
275
          }
276
        } else {  /* Slow path: end of string is too close to a page boundary. */
277
          while (o != NULL) {
5✔
278
            GCstr *sx = gco2str(o);
3✔
279
            if (sx->hash == fh && sx->len == len && memcmp(str, strdata(sx), len) == 0) {
3✔
280
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
281
              if (isdead(g, o)) flipwhite(o);
×
282
              g->strhash_hit++;
×
283
              return sx;  /* Return existing string. */
×
284
            }
285
            o = gcnext(o);
3✔
286
          }
287
        }
288
      }
289
      if (collisions > max_collisions) {
22,436✔
290
        strflags = 0xc0 | ((h>>(sizeof(h)*8-12))&0x3f);
19,757✔
291
        bloomset(g->strbloom.cur[0], h>>(sizeof(h)*8- 6));
19,757✔
292
        bloomset(g->strbloom.cur[1], h>>(sizeof(h)*8-12));
19,757✔
293
        bloomset(g->strbloom.next[0], h>>(sizeof(h)*8- 6));
19,757✔
294
        bloomset(g->strbloom.next[1], h>>(sizeof(h)*8-12));
19,757✔
295
        h = fh;
19,757✔
296
      }
297
    }
298
  }
299
#endif
300
  g->strhash_miss++;
574,395✔
301
  /* Nope, create a new string. */
302
  s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
574,395✔
303
  newwhite(g, s);
574,395✔
304
  s->gct = ~LJ_TSTR;
574,395✔
305
  s->len = len;
574,395✔
306
  s->hash = h;
574,395✔
307
  s->reserved = 0;
574,395✔
308
  s->strflags = strflags;
574,395✔
309
  memcpy(strdatawr(s), str, len);
574,395✔
310
  strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
574,395✔
311
  /* Add it to string hash table. */
312
  h &= g->strmask;
574,395✔
313
  s->nextgc = g->strhash[h];
574,395✔
314
  /* NOBARRIER: The string table is a GC root. */
315
  setgcref(g->strhash[h], obj2gco(s));
574,395✔
316
  if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
574,395✔
317
    lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
325✔
318
  return s;  /* Return newly interned string. */
319
}
320

321
void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
571,940✔
322
{
323
  g->strnum--;
571,940✔
324
  lj_mem_free(g, s, sizestring(s));
571,940✔
325
}
571,940✔
326

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc