• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

processone / stringprep / 16

12 Feb 2025 11:58AM UTC coverage: 86.275% (+2.0%) from 84.249%
16

Pull #14

github

web-flow
Merge 3e54bf3f1 into 7688cf9ce
Pull Request #14: Use on_load module attribute

220 of 255 relevant lines covered (86.27%)

836.48 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.27
/c_src/stringprep.cpp
1
/*
2
 * Copyright (C) 2002-2021 ProcessOne, SARL. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 *
16
 */
17

18
#include <string.h>
19
#include <stdint.h>
20
#include <erl_nif.h>
21

22
#include "uni_data.c"
23
#include "uni_norm.c"
24

25
/* Hangul constants */
26
#define SBase 0xAC00
27
#define LBase 0x1100
28
#define VBase 0x1161
29
#define TBase 0x11A7
30
#define LCount 19
31
#define VCount 21
32
#define TCount 28
33
#define NCount (VCount * TCount)
34
#define SCount (LCount * NCount)
35

36
static int compose(int ch1, int ch2) {
1,227✔
37
  int info1, info2;
1,227✔
38

39
  if (LBase <= ch1 && ch1 < LBase + LCount &&
1,227✔
40
          VBase <= ch2 && ch2 < VBase + VCount) {
×
41
        return SBase + ((ch1 - LBase) * VCount + (ch2 - VBase)) * TCount;
×
42
  }
43

44
  if (SBase <= ch1 && ch1 < SBase + SCount && ((ch1 - SBase) % TCount) == 0 &&
1,227✔
45
          TBase <= ch2 && ch2 < TBase + TCount) {
×
46
        return ch1 + ch2 - TBase;
×
47
  }
48

49
  info1 = GetUniCharCompInfo(ch1);
1,227✔
50
  if (info1 != -1 && info1 & CompSingleMask) {
1,227✔
51
        if (!(info1 & CompSecondMask) &&
111✔
52
                ch2 == compFirstList[info1 & CompMask][0]) {
111✔
53
          return compFirstList[info1 & CompMask][1];
3✔
54
        } else
55
          return 0;
56
  }
57

58
  info2 = GetUniCharCompInfo(ch2);
1,116✔
59
  if (info2 != -1 && info2 & CompSingleMask) {
1,116✔
60
        if ((info2 & CompSecondMask) &&
57✔
61
                ch1 == compSecondList[info2 & CompMask][0]) {
×
62
          return compSecondList[info2 & CompMask][1];
×
63
        } else
64
          return 0;
65
  }
66

67
  if (info1 != -1 && info2 != -1 &&
1,059✔
68
          !(info1 & CompSecondMask) && (info2 & CompSecondMask))
606✔
69
        return compBothList[info1][info2 & CompMask];
207✔
70
  else
71
        return 0;
72
}
73

74
template<class T, int N>
75
class MaybeStaticBuf {
76
 public:
77
  MaybeStaticBuf() : pos(0), size(N), len(0), buf(static_buf) { }
564✔
78
  ~MaybeStaticBuf() {
1,128✔
79
        if (buf != static_buf)
1,128✔
80
          enif_free(buf);
×
81
  }
82
  T init(T ch) {
2,598✔
83
        len = 1;
2,598✔
84
        pos = 0;
2,598✔
85
        return buf[0] = ch;
2,598✔
86
  }
87
  T add(T ch) {
2,592✔
88
        if (len >= size) {
2,592✔
89
          if (buf == static_buf) {
×
90
                T *old = buf;
×
91
                buf = (T *) enif_alloc(sizeof(T) * size * 2);
×
92
                if (!buf)
×
93
                  return -2;
94
                memcpy(buf, old, size * sizeof(T));
×
95
          } else {
96
                buf = (T *) enif_realloc(buf, sizeof(T) * size * 2);
×
97
                if (!buf)
×
98
                  return -2;
99
          }
100
          size *= 2;
×
101
        }
102
        buf[len++] = ch;
2,592✔
103
        return ch;
2,592✔
104
  }
105

106
  void empty() {
1,857✔
107
        pos = len = 0;
1,857✔
108
  }
109

110
  void swap(int p1, int p2) {
×
111
        T ch = buf[p1];
×
112
        buf[p1] = buf[p2];
×
113
        buf[p2] = ch;
×
114
  }
115

116
  T operator[](int index) {
4,623✔
117
        return buf[index];
4,623✔
118
  }
119

120
  int pos;
121
  int size;
122
  int len;
123
 private:
124
  T static_buf[N];
125
  T *buf;
126
};
127

128
class UTF8DecoderStream {
129
 public:
130
  UTF8DecoderStream(ErlNifBinary *input) : input(input), pos(0) { };
564✔
131

132
  void reset() {
201✔
133
        pos = 0;
201✔
134
  }
201✔
135

136
  ErlNifBinary *getBinary() {
123✔
137
        return input;
123✔
138
  }
139

140
  int32_t getNext() {
3,873✔
141
        if (pos >= input->size)
3,873✔
142
          return -1;
143
        unsigned char c = input->data[pos++];
2,427✔
144
        if (c <= 0x80) {
2,427✔
145
          return c;
972✔
146
        } else if (c < 0xC0) {
1,455✔
147
          return -2;
148
        } else if (c < 0xE0) {
1,455✔
149
          if (pos < input->size && (input->data[pos] & 0xC0) == 0x80) {
627✔
150
                return ((c & 0x1F) << 6) | (input->data[pos++] & 0x3F);
603✔
151
          }
152
        } else if (c < 0xF0) {
828✔
153
          if (pos + 1 < input->size && (input->data[pos] & 0xC0) == 0x80 &&
624✔
154
                  (input->data[pos + 1] & 0xC0) == 0x80) {
624✔
155
                pos += 2;
624✔
156
                return ((c & 0x0F) << 12)
624✔
157
                        | ((input->data[pos - 2] & 0x3F) << 6)
624✔
158
                        | (input->data[pos - 1] & 0x3F);
624✔
159
          }
160
        } else if (c < 0xF8) {
204✔
161
          if (pos + 2 < input->size &&
204✔
162
                  (input->data[pos] & 0xC0) == 0x80 &&
204✔
163
                  (input->data[pos + 1] & 0xC0) == 0x80 &&
204✔
164
                  (input->data[pos + 2] & 0xC0) == 0x80) {
204✔
165
                int32_t wc = ((c & 0x07) << 18)
204✔
166
                        | ((input->data[pos] & 0x3F) << 12)
204✔
167
                        | ((input->data[pos + 1] & 0x3F) << 6)
204✔
168
                        | (input->data[pos + 2] & 0x3F);
204✔
169
                pos += 3;
204✔
170
                if (wc <= 0x10FFFF)
204✔
171
                  return wc;
172
          }
173
        }
174
        return -2;
175
  }
176

177
 private:
178
  ErlNifBinary *input;
179
  size_t pos;
180
};
181

182
class PreprocessStream {
183
 public:
184
  PreprocessStream(UTF8DecoderStream *source, bool toLower) :
564✔
185
          source(source), buf(NULL), pos(0), len(0), toLower(toLower) {
564✔
186
  }
187

188
  int32_t getNext() {
2,826✔
189
        if (pos < len)
2,826✔
190
          return buf[pos++];
210✔
191

192
        loop:
2,616✔
193
        int32_t ch = source->getNext();
2,772✔
194
        if (ch < 0)
2,772✔
195
          return ch;
1,335✔
196
        int info = GetUniCharInfo(ch);
1,437✔
197

198
        if (!(info & B1Mask)) {
1,437✔
199
          if (toLower) {
1,281✔
200
                if (!(info & MCMask)) {
897✔
201
                  return ch + GetDelta(info);
753✔
202
                } else {
203
                  buf = GetMC(info) + 1;
144✔
204
                  len = buf[-1];
144✔
205
                  pos = 1;
144✔
206
                  return buf[0];
144✔
207
                }
208
          } else {
209
                return ch;
210
          }
211
        } else
212
          goto loop;
156✔
213
  }
214
 private:
215
  UTF8DecoderStream *source;
216
  int32_t *buf;
217
  int pos;
218
  int len;
219
  bool toLower;
220
};
221

222
class DecompositeStream {
223
 public:
224
  DecompositeStream(PreprocessStream *source) : source(source), pos(0), len(0) { }
564✔
225

226
  int32_t getNext() {
3,114✔
227
        if (pos < len)
3,114✔
228
          return decompList[pos++];
288✔
229

230
        int32_t ch = source->getNext();
2,826✔
231

232
        if (ch < 0)
2,826✔
233
          return ch;
234

235
        int info = GetUniCharDecompInfo(ch);
1,491✔
236
        if (info >= 0) {
1,491✔
237
          pos = GetDecompShift(info);
246✔
238
          len = pos + GetDecompLen(info);
246✔
239
          return decompList[pos++];
246✔
240
        } else
241
          return ch;
242
  }
243

244
 private:
245
  PreprocessStream *source;
246
  int pos;
247
  int len;
248
};
249

250
class CanonicalizeStream {
×
251
 public:
252
  CanonicalizeStream(DecompositeStream *source) : source(source), buf() {
564✔
253
  }
254

255
  int32_t getNext() {
2,598✔
256
        if (buf.pos < buf.len - 1)
2,598✔
257
          return buf[buf.pos++];
×
258

259
        int32_t ch, ch2;
2,598✔
260
        if (buf.len > 0) {
2,598✔
261
          ch = buf.init(buf[buf.len - 1]);
2,034✔
262
        } else {
263
          ch = buf.init(source->getNext());
564✔
264

265
          if (ch < 0)
564✔
266
                return ch;
267
        }
268

269
        buf.pos++;
2,550✔
270

271
        int last = GetUniCharCClass(ch);
2,550✔
272
        while ((ch2 = buf.add(source->getNext())) >= 0) {
2,550✔
273
          int next = GetUniCharCClass(ch2);
1,263✔
274
          if (next != 0 && last > next) {
1,263✔
275
                for (int j = buf.len - 2; j >= 0; j--) {
×
276
                  if (GetUniCharCClass(buf[j]) <= next)
×
277
                        break;
278
                  buf.swap(j, j + 1);
×
279
                }
280
          } else {
281
                return buf[0];
1,263✔
282
          }
283
        }
284
        return buf[0];
1,287✔
285
  }
286

287
 private:
288
  DecompositeStream *source;
289
  MaybeStaticBuf<int32_t, 8> buf;
290
};
291

292
class ComposeStream {
×
293
 public:
294
  ComposeStream(CanonicalizeStream *source) : source(source), buf(), lastCh(-1) {
564✔
295
  }
296

297
  int32_t getNext() {
1,896✔
298
        int32_t ch, nch;
1,896✔
299

300
        if (buf.pos < buf.len)
1,896✔
301
          return buf[buf.pos++];
39✔
302
        else
303
          buf.empty();
1,857✔
304

305
        if (lastCh < 0) {
1,857✔
306
          ch = source->getNext();
843✔
307
          if (ch < 0)
843✔
308
                return ch;
309
        } else {
310
          ch = lastCh;
311
        }
312

313
        int cclass1 = GetUniCharCClass(ch);
1,530✔
314
        while ((lastCh = source->getNext()) >= 0) {
1,755✔
315
          int cclass2 = GetUniCharCClass(lastCh);
1,257✔
316
          if ((cclass1 == 0 || cclass2 > cclass1) &&
1,257✔
317
                  (nch = compose(ch, lastCh))) {
1,227✔
318
                ch = nch;
319
          } else if (cclass2 == 0) {
1,074✔
320
                return ch;
1,032✔
321
          } else {
322
                buf.add(lastCh);
42✔
323
                cclass1 = cclass2;
42✔
324
          }
325
        }
326

327
        if (lastCh >= -1)
498✔
328
          return ch;
329
        else
330
          return lastCh;
×
331
  }
332
 private:
333
  CanonicalizeStream *source;
334
  MaybeStaticBuf<int32_t, 8> buf;
335
  int32_t lastCh;
336
};
337

338
class PrepCheckStream {
339
 public:
340
  PrepCheckStream(ComposeStream *source, int32_t prohibit) :
564✔
341
          source(source), prohibit(prohibit), first_ral(-1),
564✔
342
          last_ral(0), have_ral(0), have_l(0) {
564✔
343
  }
344

345
  int32_t getNext() {
1,896✔
346
        int32_t ch = source->getNext();
1,896✔
347
        if (ch < 0)
1,896✔
348
          return ch;
349

350
        int32_t info = GetUniCharInfo(ch);
1,569✔
351

352
        if (info & prohibit) {
1,569✔
353
          return -2;
354
        }
355
        if (first_ral < 0)
1,332✔
356
          first_ral = (info & D1Mask) != 0;
291✔
357

358
        last_ral = (info & D1Mask) != 0;
1,332✔
359
        have_ral = have_ral || last_ral;
1,332✔
360
        have_l = have_l || (info & D2Mask) != 0;
1,332✔
361

362
        return ch;
1,332✔
363
  }
364

365
  bool was_valid() {
315✔
366
        return !(have_ral && (!first_ral || !last_ral || have_l));
315✔
367
  }
368
 private:
369
  ComposeStream *source;
370
  int32_t prohibit;
371
  char first_ral;
372
  char last_ral;
373
  char have_ral;
374
  char have_l;
375
};
376

377
class UTF8Encoder {
378
 public:
379
  UTF8Encoder(size_t initial_size, UTF8DecoderStream *input) : input(*input), pos(0) {
564✔
380
        binary.size = initial_size < 4 ? 4 : initial_size;
564✔
381
        binary.data = NULL;
564✔
382
  }
383

384
  ~UTF8Encoder() {
564✔
385
        if (binary.data)
564✔
386
          enif_release_binary(&binary);
201✔
387
  }
564✔
388

389
  ErlNifBinary *encode_stream(PrepCheckStream *source) {
564✔
390
        int32_t ch, ich;
564✔
391
        int idx = 0;
564✔
392

393
        while ((ch = source->getNext()) == (ich = input.getNext()) && ch >= 0) {
954✔
394
          idx++;
390✔
395
        }
396
        if (ch < -1)
564✔
397
          return NULL;
398
        if (ch != ich) {
324✔
399
          input.reset();
201✔
400
          while (idx-- > 0)
348✔
401
                if (put_char(input.getNext()) < 0)
147✔
402
                  return NULL;
403
          if (ch >= 0) {
201✔
404
                do {
942✔
405
                  if (put_char(ch) < 0)
942✔
406
                        return NULL;
407
                } while ((ch = source->getNext()) >= 0);
942✔
408
                if (ch < -1)
177✔
409
                  return NULL;
410
          }
411
        } else {
412
          return input.getBinary();
123✔
413
        }
414

415
        if (binary.data) {
192✔
416
          if (pos != binary.size && !enif_realloc_binary(&binary, pos))
168✔
417
                return NULL;
418
        } else if (!enif_alloc_binary(0, &binary))
24✔
419
          return NULL;
420

421
        return &binary;
192✔
422
  }
423

424
  int put_char(int32_t ch) {
1,089✔
425
        if (ch <= 0x7F) {
1,089✔
426
          if (!buf_size_inc(1)) return -2;
714✔
427
          binary.data[pos++] = (unsigned char) ch;
714✔
428
        } else if (ch <= 0x7FF) {
375✔
429
          if (!buf_size_inc(2)) return -2;
228✔
430
          binary.data[pos] = (unsigned char) ((ch >> 6) | 0xC0);
228✔
431
          binary.data[pos + 1] = (unsigned char) ((ch | 0x80) & 0xBF);
228✔
432
          pos += 2;
228✔
433
        } else if (ch <= 0xFFFF) {
147✔
434
          if (!buf_size_inc(3)) return -2;
147✔
435
          binary.data[pos] = (unsigned char) ((ch >> 12) | 0xE0);
147✔
436
          binary.data[pos + 1] = (unsigned char) (((ch >> 6) | 0x80) & 0xBF);
147✔
437
          binary.data[pos + 2] = (unsigned char) ((ch | 0x80) & 0xBF);
147✔
438
          pos += 3;
147✔
439
        } else if (ch <= 0x1FFFFF) {
×
440
          if (!buf_size_inc(4)) return -2;
×
441
          binary.data[pos] = (unsigned char) ((ch >> 18) | 0xF0);
×
442
          binary.data[pos + 1] = (unsigned char) (((ch >> 12) | 0x80) & 0xBF);
×
443
          binary.data[pos + 2] = (unsigned char) (((ch >> 6) | 0x80) & 0xBF);
×
444
          binary.data[pos + 3] = (unsigned char) ((ch | 0x80) & 0xBF);
×
445
          pos += 4;
×
446
        } else
447
          return -2;
448
        return 0;
449
  }
450
 private:
451
  int buf_size_inc(int inc) {
1,089✔
452
        int res = 1;
1,089✔
453

454
        if (!binary.data)
1,089✔
455
          res = enif_alloc_binary(binary.size, &binary);
177✔
456

457
        if (pos + inc > binary.size)
1,089✔
458
          res = enif_realloc_binary(&binary, binary.size * 2);
66✔
459

460
        return res;
1,089✔
461
  }
462

463
  UTF8DecoderStream input;
464
  ErlNifBinary binary;
465
  size_t pos;
466
};
467

468
static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) {
3✔
469
  return 0;
3✔
470
}
471

472
static ERL_NIF_TERM prep(ErlNifEnv *env, int argc,
576✔
473
                                                 const ERL_NIF_TERM argv[],
474
                                                 int prohibit, bool toLower) {
475
  ErlNifBinary input;
576✔
476

477
  if (argc != 1)
576✔
478
        return enif_make_badarg(env);
×
479

480
  if (!enif_inspect_iolist_as_binary(env, argv[0], &input))
576✔
481
        return enif_make_badarg(env);
12✔
482

483
  UTF8DecoderStream decoder(&input);
564✔
484
  PreprocessStream normalize(&decoder, toLower);
564✔
485
  DecompositeStream decomposite(&normalize);
564✔
486
  CanonicalizeStream canonicalize(&decomposite);
564✔
487
  ComposeStream compose(&canonicalize);
564✔
488
  PrepCheckStream prepCheck(&compose, prohibit);
564✔
489
  UTF8Encoder encode(input.size, &decoder);
564✔
490

491
  ErlNifBinary *res = encode.encode_stream(&prepCheck);
564✔
492

493
  if (!res || !prepCheck.was_valid()) {
564✔
494
        return enif_make_atom(env, "error");
288✔
495
  } else
496
        return enif_make_binary(env, res);
276✔
497
}
564✔
498

499
static ERL_NIF_TERM nodeprep(ErlNifEnv *env, int argc,
144✔
500
                                                         const ERL_NIF_TERM argv[]) {
501
  return prep(env, argc, argv, ACMask | C11Mask | C21Mask | XNPMask, 1);
144✔
502
}
503

504
static ERL_NIF_TERM nameprep(ErlNifEnv *env, int argc,
144✔
505
                                                         const ERL_NIF_TERM argv[]) {
506
  return prep(env, argc, argv, ACMask, 1);
144✔
507
}
508

509
static ERL_NIF_TERM resourceprep(ErlNifEnv *env, int argc,
144✔
510
                                                                 const ERL_NIF_TERM argv[]) {
511
  return prep(env, argc, argv, ACMask | C21Mask, 0);
144✔
512
}
513

514
static ERL_NIF_TERM to_lower(ErlNifEnv *env, int argc,
144✔
515
                                                         const ERL_NIF_TERM argv[]) {
516
  return prep(env, argc, argv, ACMask, 1);
144✔
517
}
518

519
static ERL_NIF_TERM to_lower_no_filter(ErlNifEnv *env, int argc,
520
                                                         const ERL_NIF_TERM argv[]) {
521
  return prep(env, argc, argv, 0, 1);
×
522
}
523

524
static ErlNifFunc nif_funcs[] =
525
        {
526
                {"nodeprep", 1, nodeprep},
527
                {"nameprep", 1, nameprep},
528
                {"resourceprep", 1, resourceprep},
529
                {"tolower", 1, to_lower},
530
                {"tolower_nofilter", 1, to_lower_no_filter}
531
        };
532

533
ERL_NIF_INIT(stringprep, nif_funcs, load, NULL, NULL, NULL)
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc