• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / luajit / 10902590989

17 Sep 2024 12:01PM UTC coverage: 92.776% (-0.02%) from 92.796%
10902590989

push

github

Buristan
Fix limit check in narrow_conv_backprop().

Thanks to Sergey Kaplun.

(cherry picked from commit e45fd4cb7)

`narrow_conv_backprop()` misses the stack pointer (`nc->sp`) limit check
after a bunch of recursive calls that may change its value. As a result,
it leads to stack-buffer-overflow during the instruction narrowing. This
patch adds a missing check.

Sergey Kaplun:
* added the description and the test for the problem

Part of tarantool/tarantool#10199

5674 of 6025 branches covered (94.17%)

Branch coverage included in aggregate %.

1 of 1 new or added line in 1 file covered. (100.0%)

7 existing lines in 1 file now uncovered.

21654 of 23431 relevant lines covered (92.42%)

2939401.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.74
/src/lj_str.c
1
/*
2
** String handling.
3
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
*/
5

6
#define lj_str_c
7
#define LUA_CORE
8

9
#include "lj_obj.h"
10
#include "lj_gc.h"
11
#include "lj_err.h"
12
#include "lj_str.h"
13
#include "lj_char.h"
14

15
#if LUAJIT_USE_ASAN
16
/* These functions may read past a buffer end, that's ok. */
17
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
18
  __attribute__((no_sanitize_address));
19

20
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
21
  __attribute__((no_sanitize_address));
22

23
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
24
  __attribute__((no_sanitize_address));
25
#endif
26

27
/* -- String helpers ------------------------------------------------------ */
28

29
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
30
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
103,370✔
31
{
32
  MSize i, n = a->len > b->len ? b->len : a->len;
103,370✔
33
  for (i = 0; i < n; i += 4) {
109,869✔
34
    /* Note: innocuous access up to end of string + 3. */
35
    uint32_t va = *(const uint32_t *)(strdata(a)+i);
103,472✔
36
    uint32_t vb = *(const uint32_t *)(strdata(b)+i);
103,472✔
37
    if (va != vb) {
103,472✔
38
#if LJ_LE
39
      va = lj_bswap(va); vb = lj_bswap(vb);
96,973✔
40
#endif
41
      i -= n;
96,973✔
42
      if ((int32_t)i >= -3) {
96,973✔
43
        va >>= 32+(i<<3); vb >>= 32+(i<<3);
96,790✔
44
        if (va == vb) break;
96,790✔
45
      }
46
      return va < vb ? -1 : 1;
96,509✔
47
    }
48
  }
49
  return (int32_t)(a->len - b->len);
6,861✔
50
}
51

52
/* Fast string data comparison. Caveat: unaligned access to 1st string! */
53
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
8,699,770✔
54
{
55
  MSize i = 0;
8,699,770✔
56
  lj_assertX(len > 0, "fast string compare with zero length");
16,717,803✔
57
  lj_assertX((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4,
16,717,803✔
58
             "fast string compare crossing page boundary");
59
  do {  /* Note: innocuous access up to end of string + 3. */
16,717,803✔
60
    uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
16,717,803✔
61
    if (v) {
16,717,803✔
62
      i -= len;
3,148,684✔
63
#if LJ_LE
64
      return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
3,148,684✔
65
#else
66
      return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
67
#endif
68
    }
69
    i += 4;
13,569,119✔
70
  } while (i < len);
13,569,119✔
71
  return 0;
72
}
73

74
/* Find fixed string p inside string s. */
75
const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
2,570✔
76
{
77
  if (plen <= slen) {
2,570✔
78
    if (plen == 0) {
2,497✔
79
      return s;
80
    } else {
81
      int c = *(const uint8_t *)p++;
2,494✔
82
      plen--; slen -= plen;
2,494✔
83
      while (slen) {
8,540✔
84
        const char *q = (const char *)memchr(s, c, slen);
8,458✔
85
        if (!q) break;
8,458✔
86
        if (memcmp(q+1, p, plen) == 0) return q;
6,323✔
87
        q++; slen -= (MSize)(q-s); s = q;
6,046✔
88
      }
89
    }
90
  }
91
  return NULL;
92
}
93

94
/* Check whether a string has a pattern matching character. */
95
int lj_str_haspattern(GCstr *s)
2,513✔
96
{
97
  const char *p = strdata(s), *q = p + s->len;
2,513✔
98
  while (p < q) {
5,667✔
99
    int c = *(const uint8_t *)p++;
4,332✔
100
    if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
4,332✔
101
      return 1;  /* Found a pattern matching char. */
102
  }
103
  return 0;  /* No pattern matching chars found. */
104
}
105

106
/* -- String interning ---------------------------------------------------- */
107

108
/* Resize the string hash table (grow and shrink). */
109
void lj_str_resize(lua_State *L, MSize newmask)
961✔
110
{
111
  global_State *g = G(L);
961✔
112
  GCRef *newhash;
961✔
113
  MSize i;
961✔
114
  if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
961✔
115
    return;  /* No resizing during GC traversal or if already too big. */
116
  newhash = lj_mem_newvec(L, newmask+1, GCRef);
961✔
117
  memset(newhash, 0, (newmask+1)*sizeof(GCRef));
961✔
118
  for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
1,852,865✔
119
    GCobj *p = gcref(g->strhash[i]);
1,851,904✔
120
    while (p) {  /* Follow each hash chain and reinsert all strings. */
2,690,033✔
121
      MSize h = gco2str(p)->hash & newmask;
838,129✔
122
      GCobj *next = gcnext(p);
838,129✔
123
      /* NOBARRIER: The string table is a GC root. */
124
      setgcrefr(p->gch.nextgc, newhash[h]);
838,129✔
125
      setgcref(newhash[h], p);
838,129✔
126
      p = next;
838,129✔
127
    }
128
  }
129
  lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
961✔
130
  g->strmask = newmask;
961✔
131
  g->strhash = newhash;
961✔
132
}
133

134
#if LUAJIT_SMART_STRINGS
135
static LJ_AINLINE uint32_t
136
lj_fullhash(const uint8_t *v, MSize len)
137
{
138
  MSize a = 0, b = 0;
139
  MSize c = 0xcafedead;
140
  MSize d = 0xdeadbeef;
141
  MSize h = len;
3,014,944✔
142
  lj_assertX(len >= 12, "full hash calculation for too short (%d) string", len);
143
  for(; len>8; len-=8, v+=8) {
3,014,944✔
144
    a ^= lj_getu32(v);
2,971,257✔
145
    b ^= lj_getu32(v+4);
2,971,257✔
146
    c += a;
2,971,257✔
147
    d += b;
2,971,257✔
148
    a = lj_rol(a, 5) - d;
2,971,257✔
149
    b = lj_rol(b, 7) - c;
2,971,257✔
150
    c = lj_rol(c, 24) ^ a;
2,971,257✔
151
    d = lj_rol(d, 1) ^ b;
2,971,257✔
152
  }
153
  a ^= lj_getu32(v+len-8);
43,687✔
154
  b ^= lj_getu32(v+len-4);
43,687✔
155
  c += b; c -= lj_rol(a, 9);
43,687✔
156
  d += a; d -= lj_rol(b, 18);
43,687✔
157
  h -= lj_rol(a^b,7);
43,687✔
158
  h += c; h += lj_rol(d,13);
43,687✔
159
  d ^= c;  d -= lj_rol(c,25);
43,687✔
160
  h ^= d; h -= lj_rol(d,16);
43,687✔
161
  c ^= h; c -= lj_rol(h,4);
43,687✔
162
  d ^= c;  d -= lj_rol(c,14);
43,687✔
163
  h ^= d; h -= lj_rol(d,24);
43,687✔
164
  return h;
43,687✔
165
}
166
#endif
167

168
/* Intern a string and return string object. */
169
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
9,300,002✔
170
{
171
  global_State *g;
9,300,002✔
172
  GCstr *s;
9,300,002✔
173
  GCobj *o;
9,300,002✔
174
  MSize len = (MSize)lenx;
9,300,002✔
175
  uint8_t strflags = 0;
9,300,002✔
176
#if LUAJIT_SMART_STRINGS
177
  unsigned collisions = 0;
9,300,002✔
178
#endif
179
  if (lenx >= LJ_MAX_STR)
9,300,002✔
180
    lj_err_msg(L, LJ_ERR_STROV);
×
181
  g = G(L);
9,300,002✔
182
  if (len == 0)
9,300,002✔
183
    return &g->strempty;
5,526✔
184
  /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
185
  MSize h = lua_hash(str, len);
9,294,476✔
186
  /* Check if the string has already been interned. */
187
  o = gcref(g->strhash[h & g->strmask]);
9,294,476✔
188
#if LUAJIT_SMART_STRINGS
189
/*
190
** The default "fast" string hash function samples only a few positions
191
** in a string, the remaining bytes don't affect the function's result.
192
** The function performs well for short strings; however long strings
193
** can yield extremely high collision rates.
194
**
195
** An adaptive schema was implemented. Two hash functions are used
196
** simultaneously. A bucket is picked based on the output of the fast
197
** hash function. If an item is to be inserted in a collision chain
198
** longer than a certain threshold, another bucket is picked based on
199
** the stronger hash function. Since two hash functions are used
200
** simultaneously, insert should consider two buckets. The second bucket
201
** is often NOT considered thanks to the bloom filter. The filter is
202
** rebuilt during GC cycle.
203
**
204
** Parameters below were tuned on a set of benchmarks. Max_collisions is
205
** also backed by theory: the expected maximum length of a collision
206
** chain in a hash table with the fill factor of 1.0 is
207
** O(log(N)/log(log(N))), assuming uniformly distributed random keys.
208
** The upper bound for N=65,000 is 10, hence 40 is a clear indication of
209
** an anomaly.
210
**/
211
#define max_collisions 40
212
#define inc_collision_soft() (collisions++)
213
/* If different strings yield the same hash sum, grow counter faster. */
214
#define inc_collision_hard() (collisions+=1+(len>>4), 1)
215
#else
216
#define inc_collision_hard() (1)
217
#define inc_collision_soft()
218
#endif
219
  if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
9,294,476✔
220
    while (o != NULL) {
13,448,294✔
221
      GCstr *sx = gco2str(o);
12,655,898✔
222
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
12,655,898✔
223
                      str_fastcmp(str, strdata(sx), len) == 0) {
17,180,918✔
224
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
225
        if (isdead(g, o)) flipwhite(o);
8,500,713✔
226
        g->strhash_hit++;
8,500,713✔
227
        return sx;  /* Return existing string. */
8,500,713✔
228
      }
229
      o = gcnext(o);
4,155,185✔
230
      inc_collision_soft();
4,155,185✔
231
    }
232
  } else {  /* Slow path: end of string is too close to a page boundary. */
233
    while (o != NULL) {
2,208✔
234
      GCstr *sx = gco2str(o);
2,123✔
235
      if (sx->hash == h && sx->len == len && inc_collision_hard() &&
2,123✔
236
                      memcmp(str, strdata(sx), len) == 0) {
1,285✔
237
        /* Resurrect if dead. Can only happen with fixstring() (keywords). */
238
        if (isdead(g, o)) flipwhite(o);
1,282✔
239
        g->strhash_hit++;
1,282✔
240
        return sx;  /* Return existing string. */
1,282✔
241
      }
242
      o = gcnext(o);
841✔
243
      inc_collision_soft();
841✔
244
    }
245
  }
246
#if LUAJIT_SMART_STRINGS
247
  /* "Fast" hash function consumes all bytes of a string <= 12 bytes. */
248
  if (len > 12) {
792,481✔
249
    /*
250
    ** The bloom filter is keyed with the high 12 bits of the fast
251
    ** hash sum. The filter is rebuilt during GC cycle. It's beneficial
252
    ** to have these bits readily available and avoid hash sum
253
    ** recalculation during GC. High 6 bits are included in the "full"
254
    ** hash sum, and bits 19-25 are stored in s->strflags.
255
    **/
256
    int search_fullh =
439,356✔
257
       bloomtest(g->strbloom.cur[0], h>>(sizeof(h)*8- 6)) != 0 &&
219,678✔
258
       bloomtest(g->strbloom.cur[1], h>>(sizeof(h)*8-12)) != 0;
52,335✔
259
    if (LJ_UNLIKELY(search_fullh || collisions > max_collisions)) {
219,678✔
260
      MSize fh = lj_fullhash((const uint8_t*)str, len);
43,687✔
261
#define high6mask ((~(MSize)0)<<(sizeof(MSize)*8-6))
262
      fh = (fh >> 6) | (h & high6mask);
43,687✔
263
      if (search_fullh) {
43,687✔
264
        /* Recheck if the string has already been interned with "harder" hash. */
265
        o = gcref(g->strhash[fh & g->strmask]);
42,361✔
266
        if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
42,361✔
267
          while (o != NULL) {
56,893✔
268
            GCstr *sx = gco2str(o);
34,097✔
269
            if (sx->hash == fh && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
53,662✔
270
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
271
              if (isdead(g, o)) flipwhite(o);
19,565✔
272
              g->strhash_hit++;
19,565✔
273
              return sx;  /* Return existing string. */
19,565✔
274
            }
275
            o = gcnext(o);
14,532✔
276
          }
277
        } else {  /* Slow path: end of string is too close to a page boundary. */
UNCOV
278
          while (o != NULL) {
×
UNCOV
279
            GCstr *sx = gco2str(o);
×
UNCOV
280
            if (sx->hash == fh && sx->len == len && memcmp(str, strdata(sx), len) == 0) {
×
281
              /* Resurrect if dead. Can only happen with fixstring() (keywords). */
UNCOV
282
              if (isdead(g, o)) flipwhite(o);
×
UNCOV
283
              g->strhash_hit++;
×
UNCOV
284
              return sx;  /* Return existing string. */
×
285
            }
UNCOV
286
            o = gcnext(o);
×
287
          }
288
        }
289
      }
290
      if (collisions > max_collisions) {
24,122✔
291
        strflags = 0xc0 | ((h>>(sizeof(h)*8-12))&0x3f);
21,188✔
292
        bloomset(g->strbloom.cur[0], h>>(sizeof(h)*8- 6));
21,188✔
293
        bloomset(g->strbloom.cur[1], h>>(sizeof(h)*8-12));
21,188✔
294
        bloomset(g->strbloom.next[0], h>>(sizeof(h)*8- 6));
21,188✔
295
        bloomset(g->strbloom.next[1], h>>(sizeof(h)*8-12));
21,188✔
296
        h = fh;
21,188✔
297
      }
298
    }
299
  }
300
#endif
301
  g->strhash_miss++;
772,916✔
302
  /* Nope, create a new string. */
303
  s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
772,916✔
304
  newwhite(g, s);
772,916✔
305
  s->gct = ~LJ_TSTR;
772,916✔
306
  s->len = len;
772,916✔
307
  s->hash = h;
772,916✔
308
  s->reserved = 0;
772,916✔
309
  s->strflags = strflags;
772,916✔
310
  memcpy(strdatawr(s), str, len);
772,916✔
311
  strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
772,916✔
312
  /* Add it to string hash table. */
313
  h &= g->strmask;
772,916✔
314
  s->nextgc = g->strhash[h];
772,916✔
315
  /* NOBARRIER: The string table is a GC root. */
316
  setgcref(g->strhash[h], obj2gco(s));
772,916✔
317
  if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
772,916✔
318
    lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
527✔
319
  return s;  /* Return newly interned string. */
320
}
321

322
void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
768,641✔
323
{
324
  g->strnum--;
768,641✔
325
  lj_mem_free(g, s, sizestring(s));
768,641✔
326
}
768,641✔
327

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc