• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / luajit / 10940225480

19 Sep 2024 11:22AM UTC coverage: 92.845% (-0.06%) from 92.909%
10940225480

push

github

Buristan
FFI: Workaround for platform dlerror() returning NULL.

Contributed by mcclure.

(cherry picked from commit 478bcfe52)

The `ffi.load()` implementation assumes the string returned from
`dlerror()` is non-NULL and immediately dereferences it. This may lead
to a crash on POSIX platforms [1] where it is possible.

This patch adds the corresponding check and the default "dlopen failed"
error message.

Sergey Kaplun:
* added the description and the test for the problem

[1]: https://pubs.opengroup.org/onlinepubs/009695399/functions/dlerror.html

Part of tarantool/tarantool#10199

Reviewed-by: Sergey Bronnikov <sergeyb@tarantool.org>
Reviewed-by: Maxim Kokryashkin <m.kokryashkin@tarantool.org>
Signed-off-by: Sergey Kaplun <skaplun@tarantool.org>

5684 of 6027 branches covered (94.31%)

Branch coverage included in aggregate %.

2 of 2 new or added lines in 1 file covered. (100.0%)

26 existing lines in 5 files now uncovered.

21670 of 23435 relevant lines covered (92.47%)

2912061.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.74
/src/lj_str.c
1
/*
2
** String handling.
3
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
*/
5

6
#define lj_str_c
7
#define LUA_CORE
8

9
#include "lj_obj.h"
10
#include "lj_gc.h"
11
#include "lj_err.h"
12
#include "lj_str.h"
13
#include "lj_char.h"
14

15
#if LUAJIT_USE_ASAN
16
/* These functions may read past a buffer end, that's ok. */
17
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
18
  __attribute__((no_sanitize_address));
19

20
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
21
  __attribute__((no_sanitize_address));
22

23
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
24
  __attribute__((no_sanitize_address));
25
#endif
26

27
/* -- String helpers ------------------------------------------------------ */
28

29
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
30
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
103,367✔
31
{
32
  MSize i, n = a->len > b->len ? b->len : a->len;
103,367✔
33
  for (i = 0; i < n; i += 4) {
109,861✔
34
    /* Note: innocuous access up to end of string + 3. */
35
    uint32_t va = *(const uint32_t *)(strdata(a)+i);
103,469✔
36
    uint32_t vb = *(const uint32_t *)(strdata(b)+i);
103,469✔
37
    if (va != vb) {
103,469✔
38
#if LJ_LE
39
      va = lj_bswap(va); vb = lj_bswap(vb);
96,975✔
40
#endif
41
      i -= n;
96,975✔
42
      if ((int32_t)i >= -3) {
96,975✔
43
        va >>= 32+(i<<3); vb >>= 32+(i<<3);
96,792✔
44
        if (va == vb) break;
96,792✔
45
      }
46
      return va < vb ? -1 : 1;
96,517✔
47
    }
48
  }
49
  return (int32_t)(a->len - b->len);
6,850✔
50
}
51

52
/* Fast string data comparison. Caveat: unaligned access to 1st string! */
53
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
8,832,647✔
54
{
55
  MSize i = 0;
8,832,647✔
56
  lj_assertX(len > 0, "fast string compare with zero length");
16,951,123✔
57
  lj_assertX((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4,
16,951,123✔
58
             "fast string compare crossing page boundary");
59
  do {  /* Note: innocuous access up to end of string + 3. */
16,951,123✔
60
    uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
16,951,123✔
61
    if (v) {
16,951,123✔
62
      i -= len;
3,257,179✔
63
#if LJ_LE
64
      return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
3,257,179✔
65
#else
66
      return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
67
#endif
68
    }
69
    i += 4;
13,693,944✔
70
  } while (i < len);
13,693,944✔
71
  return 0;
72
}
73

74
/* Find fixed string p inside string s. */
75
const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
2,676✔
76
{
77
  if (plen <= slen) {
2,676✔
78
    if (plen == 0) {
2,599✔
79
      return s;
80
    } else {
81
      int c = *(const uint8_t *)p++;
2,596✔
82
      plen--; slen -= plen;
2,596✔
83
      while (slen) {
9,022✔
84
        const char *q = (const char *)memchr(s, c, slen);
8,921✔
85
        if (!q) break;
8,921✔
86
        if (memcmp(q+1, p, plen) == 0) return q;
6,704✔
87
        q++; slen -= (MSize)(q-s); s = q;
6,426✔
88
      }
89
    }
90
  }
91
  return NULL;
92
}
93

94
/* Check whether a string has a pattern matching character. */
95
int lj_str_haspattern(GCstr *s)
2,674✔
96
{
97
  const char *p = strdata(s), *q = p + s->len;
2,674✔
98
  while (p < q) {
5,993✔
99
    int c = *(const uint8_t *)p++;
4,652✔
100
    if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
4,652✔
101
      return 1;  /* Found a pattern matching char. */
102
  }
103
  return 0;  /* No pattern matching chars found. */
104
}
105

106
/* -- String interning ---------------------------------------------------- */
107

108
/* Resize the string hash table (grow and shrink). */
109
void lj_str_resize(lua_State *L, MSize newmask)
965✔
110
{
111
  global_State *g = G(L);
965✔
112
  GCRef *newhash;
965✔
113
  MSize i;
965✔
114
  if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
965✔
115
    return;  /* No resizing during GC traversal or if already too big. */
116
  newhash = lj_mem_newvec(L, newmask+1, GCRef);
965✔
117
  memset(newhash, 0, (newmask+1)*sizeof(GCRef));
965✔
118
  for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
2,439,365✔
119
    GCobj *p = gcref(g->strhash[i]);
2,438,400✔
120
    while (p) {  /* Follow each hash chain and reinsert all strings. */
3,534,906✔
121
      MSize h = gco2str(p)->hash & newmask;
1,096,506✔
122
      GCobj *next = gcnext(p);
1,096,506✔
123
      /* NOBARRIER: The string table is a GC root. */
124
      setgcrefr(p->gch.nextgc, newhash[h]);
1,096,506✔
125
      setgcref(newhash[h], p);
1,096,506✔
126
      p = next;
1,096,506✔
127
    }
128
  }
129
  lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
965✔
130
  g->strmask = newmask;
965✔
131
  g->strhash = newhash;
965✔
132
}
133

134
#if LUAJIT_SMART_STRINGS
135
static LJ_AINLINE uint32_t
136
lj_fullhash(const uint8_t *v, MSize len)
137
{
138
  MSize a = 0, b = 0;
139
  MSize c = 0xcafedead;
140
  MSize d = 0xdeadbeef;
141
  MSize h = len;
3,082,749✔
142
  lj_assertX(len >= 12, "full hash calculation for too short (%d) string", len);
143
  for(; len>8; len-=8, v+=8) {
3,082,749✔
144
    a ^= lj_getu32(v);
3,038,496✔
145
    b ^= lj_getu32(v+4);
3,038,496✔
146
    c += a;
3,038,496✔
147
    d += b;
3,038,496✔
148
    a = lj_rol(a, 5) - d;
3,038,496✔
149
    b = lj_rol(b, 7) - c;
3,038,496✔
150
    c = lj_rol(c, 24) ^ a;
3,038,496✔
151
    d = lj_rol(d, 1) ^ b;
3,038,496✔
152
  }
153
  a ^= lj_getu32(v+len-8);
44,253✔
154
  b ^= lj_getu32(v+len-4);
44,253✔
155
  c += b; c -= lj_rol(a, 9);
44,253✔
156
  d += a; d -= lj_rol(b, 18);
44,253✔
157
  h -= lj_rol(a^b,7);
44,253✔
158
  h += c; h += lj_rol(d,13);
44,253✔
159
  d ^= c;  d -= lj_rol(c,25);
44,253✔
160
  h ^= d; h -= lj_rol(d,16);
44,253✔
161
  c ^= h; c -= lj_rol(h,4);
44,253✔
162
  d ^= c;  d -= lj_rol(c,14);
44,253✔
163
  h ^= d; h -= lj_rol(d,24);
44,253✔
164
  return h;
44,253✔
165
}
166
#endif
167

168
/* Intern a string and return string object. */
169
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
9,568,271✔
170
{
171
  global_State *g;
9,568,271✔
172
  GCstr *s;
9,568,271✔
173
  GCobj *o;
9,568,271✔
174
  MSize len = (MSize)lenx;
9,568,271✔
175
  uint8_t strflags = 0;
9,568,271✔
176
#if LUAJIT_SMART_STRINGS
177
  unsigned collisions = 0;
9,568,271✔
178
#endif
179
  if (lenx >= LJ_MAX_STR)
9,568,271✔
180
    lj_err_msg(L, LJ_ERR_STROV);
×
181
  g = G(L);
9,568,271✔
182
  if (len == 0)
9,568,271✔
183
    return &g->strempty;
5,611✔
184
  /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
185
  MSize h = lua_hash(str, len);
9,562,660✔
186
  /* Check if the string has already been interned. */
187
  o = gcref(g->strhash[h & g->strmask]);
9,562,660✔
188
#if LUAJIT_SMART_STRINGS
189
/*
190
** The default "fast" string hash function samples only a few positions
191
** in a string, the remaining bytes don't affect the function's result.
192
** The function performs well for short strings; however long strings
193
** can yield extremely high collision rates.
194
**
195
** An adaptive schema was implemented. Two hash functions are used
196
** simultaneously. A bucket is picked based on the output of the fast
197
** hash function. If an item is to be inserted in a collision chain
198
** longer than a certain threshold, another bucket is picked based on
199
** the stronger hash function. Since two hash functions are used
200
** simultaneously, insert should consider two buckets. The second bucket
201
** is often NOT considered thanks to the bloom filter. The filter is
202
** rebuilt during GC cycle.
203
**
204
** Parameters below were tuned on a set of benchmarks. Max_collisions is
205
** also backed by theory: the expected maximum length of a collision
206
** chain in a hash table with the fill factor of 1.0 is
207
** O(log(N)/log(log(N))), assuming uniformly distributed random keys.
208
** The upper bound for N=65,000 is 10, hence 40 is a clear indication of
209
** an anomaly.
210
**/
211
#define max_collisions 40
212
#define inc_collision_soft() (collisions++)
213
/* If different strings yield the same hash sum, grow counter faster. */
214
#define inc_collision_hard() (collisions+=1+(len>>4), 1)
215
#else
216
#define inc_collision_hard() (1)
217
#define inc_collision_soft()
218
#endif
219
  if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
9,562,660✔
220
    while (o != NULL) {
13,889,417✔
221
      GCstr *sx = gco2str(o);
12,961,304✔
222
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
12,961,304✔
223
                      str_fastcmp(str, strdata(sx), len) == 0) {
17,445,838✔
224
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
225
        if (isdead(g, o)) flipwhite(o);
8,632,785✔
226
        g->strhash_hit++;
8,632,785✔
227
        return sx;  /* Return existing string. */
8,632,785✔
228
      }
229
      o = gcnext(o);
4,328,519✔
230
      inc_collision_soft();
4,328,519✔
231
    }
232
  } else {  /* Slow path: end of string is too close to a page boundary. */
233
    while (o != NULL) {
2,661✔
234
      GCstr *sx = gco2str(o);
2,434✔
235
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
2,434✔
236
                      memcmp(str, strdata(sx), len) == 0) {
1,538✔
237
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
238
        if (isdead(g, o)) flipwhite(o);
1,535✔
239
        g->strhash_hit++;
1,535✔
240
        return sx;  /* Return existing string. */
1,535✔
241
      }
242
      o = gcnext(o);
899✔
243
      inc_collision_soft();
899✔
244
    }
245
  }
246
#if LUAJIT_SMART_STRINGS
247
  /* "Fast" hash function consumes all bytes of a string <= 12 bytes. */
248
  if (len > 12) {
928,340✔
249
    /*
250
    ** The bloom filter is keyed with the high 12 bits of the fast
251
    ** hash sum. The filter is rebuilt during GC cycle. It's beneficial
252
    ** to have these bits readily available and avoid hash sum
253
    ** recalculation during GC. High 6 bits are included in the "full"
254
    ** hash sum, and bits 19-25 are stored in s->strflags.
255
    **/
256
    int search_fullh =
442,866✔
257
       bloomtest(g->strbloom.cur[0], h>>(sizeof(h)*8- 6)) != 0 &&
221,433✔
258
       bloomtest(g->strbloom.cur[1], h>>(sizeof(h)*8-12)) != 0;
53,549✔
259
    if (LJ_UNLIKELY(search_fullh || collisions > max_collisions)) {
221,433✔
260
      MSize fh = lj_fullhash((const uint8_t*)str, len);
44,253✔
261
#define high6mask ((~(MSize)0)<<(sizeof(MSize)*8-6))
262
      fh = (fh >> 6) | (h & high6mask);
44,253✔
263
      if (search_fullh) {
44,253✔
264
        /* Recheck if the string has already been interned with "harder" hash. */
265
        o = gcref(g->strhash[fh & g->strmask]);
42,937✔
266
        if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
42,937✔
267
          while (o != NULL) {
57,743✔
268
            GCstr *sx = gco2str(o);
34,400✔
269
            if (sx->hash == fh && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
53,994✔
270
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
271
              if (isdead(g, o)) flipwhite(o);
19,594✔
272
              g->strhash_hit++;
19,594✔
273
              return sx;  /* Return existing string. */
19,594✔
274
            }
275
            o = gcnext(o);
14,806✔
276
          }
277
        } else {  /* Slow path: end of string is too close to a page boundary. */
UNCOV
278
          while (o != NULL) {
×
UNCOV
279
            GCstr *sx = gco2str(o);
×
UNCOV
280
            if (sx->hash == fh && sx->len == len && memcmp(str, strdata(sx), len) == 0) {
×
281
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
282
              if (isdead(g, o)) flipwhite(o);
×
283
              g->strhash_hit++;
×
284
              return sx;  /* Return existing string. */
×
285
            }
UNCOV
286
            o = gcnext(o);
×
287
          }
288
        }
289
      }
290
      if (collisions > max_collisions) {
24,659✔
291
        strflags = 0xc0 | ((h>>(sizeof(h)*8-12))&0x3f);
21,551✔
292
        bloomset(g->strbloom.cur[0], h>>(sizeof(h)*8- 6));
21,551✔
293
        bloomset(g->strbloom.cur[1], h>>(sizeof(h)*8-12));
21,551✔
294
        bloomset(g->strbloom.next[0], h>>(sizeof(h)*8- 6));
21,551✔
295
        bloomset(g->strbloom.next[1], h>>(sizeof(h)*8-12));
21,551✔
296
        h = fh;
21,551✔
297
      }
298
    }
299
  }
300
#endif
301
  g->strhash_miss++;
908,746✔
302
  /* Nope, create a new string. */
303
  s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
908,746✔
304
  newwhite(g, s);
908,746✔
305
  s->gct = ~LJ_TSTR;
908,746✔
306
  s->len = len;
908,746✔
307
  s->hash = h;
908,746✔
308
  s->reserved = 0;
908,746✔
309
  s->strflags = strflags;
908,746✔
310
  memcpy(strdatawr(s), str, len);
908,746✔
311
  strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
908,746✔
312
  /* Add it to string hash table. */
313
  h &= g->strmask;
908,746✔
314
  s->nextgc = g->strhash[h];
908,746✔
315
  /* NOBARRIER: The string table is a GC root. */
316
  setgcref(g->strhash[h], obj2gco(s));
908,746✔
317
  if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
908,746✔
318
    lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
529✔
319
  return s;  /* Return newly interned string. */
320
}
321

322
void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
904,468✔
323
{
324
  g->strnum--;
904,468✔
325
  lj_mem_free(g, s, sizestring(s));
904,468✔
326
}
904,468✔
327

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc