• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ascii-boxes / boxes / 6518013212

14 Oct 2023 01:37PM UTC coverage: 81.211% (-0.4%) from 81.608%
6518013212

push

github

tsjensen
remove

2349 of 3210 branches covered (0.0%)

Branch coverage included in aggregate %.

1 of 1 new or added line in 1 file covered. (100.0%)

3767 of 4321 relevant lines covered (87.18%)

7801.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.05
/src/unicode.c
1
/*
2
 * boxes - Command line filter to draw/remove ASCII boxes around text
3
 * Copyright (c) 1999-2023 Thomas Jensen and the boxes contributors
4
 *
5
 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
6
 * License, version 3, as published by the Free Software Foundation.
7
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
8
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
9
 * details.
10
 * You should have received a copy of the GNU General Public License along with this program.
11
 * If not, see <https://www.gnu.org/licenses/>.
12
 *
13
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
14
 */
15

16
/*
17
 * Functions and constants for handling unicode strings with libunistring.
18
 */
19

20
#include "config.h"
21
#include <errno.h>
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25

26
#include <uniconv.h>
27
#include <unictype.h>
28
#include <unistr.h>
29

30
#include "boxes.h"
31
#include "tools.h"
32
#include "unicode.h"
33

34

35

36
/* effective character encoding of input and output text */
37
const char *encoding;
38

39
/* ucs4_t character '\t' (tab)  */
40
const ucs4_t char_tab = 0x00000009;
41

42
/* ucs4_t character ' '  (space) */
43
const ucs4_t char_space = 0x00000020;
44

45
/* ucs4_t character '\r' (carriage return) */
46
const ucs4_t char_cr = 0x0000000d;
47

48
/* ucs4_t character '\n' (newline) */
49
const ucs4_t char_newline = 0x0000000a;
50

51
/* ucs4_t character 0x1b (escape)  */
52
const ucs4_t char_esc = 0x0000001b;
53

54
/* ucs4_t character '\0' (zero) */
55
const ucs4_t char_nul = 0x00000000;
56

57

58

59
int is_char_at(const uint32_t *text, const size_t idx, const ucs4_t expected_char)
618,037✔
60
{
61
    return text != NULL && u32_cmp(text + idx, &expected_char, 1) == 0;
618,037!
62
}
63

64

65

66
void set_char_at(uint32_t *text, const size_t idx, const ucs4_t char_to_set)
38,844✔
67
{
68
    u32_set(text + idx, char_to_set, 1);
38,844✔
69
}
38,844✔
70

71

72

73
int is_empty(const uint32_t *text)
617,611✔
74
{
75
    return text == NULL || is_char_at(text, 0, char_nul);
617,611✔
76
}
77

78

79

80
int u32_is_blank(const uint32_t *text)
1✔
81
{
82
    if (is_empty(text)) {
1!
83
        return 1;
×
84
    }
85

86
    for (const uint32_t *c = text; *c != char_nul; c++) {
5✔
87
        if (!is_blank(*c)) {
4!
88
            return 0;
×
89
        }
90
    }
91
    return 1;
1✔
92
}
93

94

95

96
int is_ascii_printable(const ucs4_t c)
588,487✔
97
{
98
    return c >= 0x20 && c < 0x7f;
588,487✔
99
}
100

101

102

103
int is_allowed_anywhere(const ucs4_t c)
712,657✔
104
{
105
    /* ESC, CR, LF, and TAB are control characters */
106
    return !uc_is_cntrl(c) || c == char_tab || c == char_cr || c == char_newline || c == char_esc;
712,657✔
107
}
108

109

110

111
int is_allowed_in_shape(const ucs4_t c)
23,929✔
112
{
113
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline;
23,929✔
114
}
115

116

117

118
int is_allowed_in_sample(const ucs4_t c)
69,333✔
119
{
120
    return is_allowed_anywhere(c);
69,333✔
121
}
122

123

124

125
int is_allowed_in_filename(const ucs4_t c)
246✔
126
{
127
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
246✔
128
}
129

130

131

132
int is_allowed_in_kv_string(const ucs4_t c)
22,619✔
133
{
134
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
22,619✔
135
}
136

137

138

139
int is_blank(const ucs4_t c)
1,179,770✔
140
{
141
    return c == char_tab || uc_is_blank(c);
1,179,770✔
142
}
143

144

145

146
uint32_t *new_empty_string32()
175✔
147
{
148
    return (uint32_t *) calloc(1, sizeof(uint32_t));
175✔
149
}
150

151

152

153
ucs4_t to_utf32(char ascii)
1,057✔
154
{
155
    ucs4_t c = char_nul;
1,057✔
156
    if (ascii >= 0x20 && ascii < 0x7f) {
1,057!
157
        char *bytes = (char *) (&c);
1,056✔
158
        bytes[0] = ascii;
1,056✔
159
    }
160
    return c;
1,057✔
161
}
162

163

164

165
uint32_t *advance_next32(const uint32_t *s, size_t *invis)
614,107✔
166
{
167
    if (is_empty(s)) {
614,107!
168
        return (uint32_t *) s;
×
169
    }
170

171
    int ansipos = 0;
614,107✔
172
    (*invis) = 0;
614,107✔
173
    ucs4_t c;
174
    const uint32_t *rest = s;
614,107✔
175
    while ((rest = u32_next(&c, rest))) {
676,234✔
176
        if (ansipos == 0 && c == char_esc) {
676,233✔
177
            /* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
178
            (*invis)++;
9,994✔
179
            ansipos++;
9,994✔
180
        } else if (ansipos == 1 && (c == '[' || c == '(')) {
666,239!
181
            /* Found '[' char after ESC. A CSI sequence has started. */
182
            (*invis)++;
9,994✔
183
            ansipos++;
9,994✔
184
        } else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) { /* between '@' and '_' (mostly uppercase letters) */
656,245!
185
            /* Found a byte designating the end of a two-byte escape sequence */
186
            (*invis)++;
×
187
            ansipos = 0;
×
188
            break;
×
189
        } else if (ansipos == 2) {
656,245✔
190
            /* Inside CSI sequence - Keep counting chars as invisible */
191
            (*invis)++;
52,132✔
192

193
            /* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
194
            if (c >= 0x40 && c <= 0x7e) {
52,132!
195
                ansipos = 0;
9,993✔
196
                break;
9,993✔
197
            }
198
        } else {
199
            break;
604,113✔
200
        }
201
    }
202
    if (rest == NULL) {
614,107✔
203
        rest = s + u32_strlen(s);
1✔
204
    }
205
    return (uint32_t *) rest;
614,107✔
206
}
207

208

209

210
uint32_t *advance32(uint32_t *s, const size_t offset)
×
211
{
212
    if (is_empty(s)) {
×
213
        return new_empty_string32();
×
214
    }
215
    if (offset == 0) {
×
216
        return s;
×
217
    }
218

219
    size_t count = 0;                 /* the count of visible characters */
×
220
    int visible = 1;                  /* flag indicating whether the previous char was a visible char */
×
221
    const uint32_t *last_esc = NULL;  /* pointer to the start of the last escape sequence encountered */
×
222
    const uint32_t *rest = s;         /* pointer to the next character coming up */
×
223
    size_t step_invis = 0;            /* unused, but required for advance_next32() call */
×
224

225
    for (ucs4_t c = s[0]; c != char_nul; c = rest[0]) {
×
226
        if (c == char_esc) {
×
227
            last_esc = rest;
×
228
            visible = 0;
×
229
        } else {
230
            if (count++ == offset) {
×
231
                if (!visible && last_esc != NULL) {
×
232
                    return (uint32_t *) last_esc;
×
233
                }
234
                break;
×
235
            }
236
            visible = 1;
×
237
        }
238
        rest = advance_next32(rest, &step_invis);
×
239
    }
240
    return (uint32_t *) rest;         /* may point to zero terminator when offset too large */
×
241
}
242

243

244

245
uint32_t *u32_strconv_from_input(const char *src)
666✔
246
{
247
    return u32_strconv_from_arg(src, encoding);
666✔
248
}
249

250

251

252
uint32_t *u32_strconv_from_arg(const char *src, const char *sourceEncoding)
17,969✔
253
{
254
    if (src == NULL) {
17,969✔
255
        return NULL;
2✔
256
    }
257
    if (src[0] == '\0') {
17,967✔
258
        return new_empty_string32();
60✔
259
    }
260

261
    uint32_t *result = u32_strconv_from_encoding(
17,907✔
262
            src,                    /* the source string to convert */
263
            sourceEncoding,         /* the character encoding from which to convert */
264
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
265

266
    if (result == NULL) {
17,907!
267
        fprintf(stderr, "%s: failed to convert from '%s' to UTF-32: %s\n", PROJECT, sourceEncoding, strerror(errno));
×
268
    }
269
    return result;
17,907✔
270
}
271

272

273

274
char *u32_strconv_to_output(const uint32_t *src)
1,283✔
275
{
276
    return u32_strconv_to_arg(src, encoding);
1,283✔
277
}
278

279

280

281
char *u32_strconv_to_arg(const uint32_t *src, const char *targetEncoding)
1,447✔
282
{
283
    if (src == NULL) {
1,447!
284
        return NULL;
×
285
    }
286
    if (is_empty(src)) {
1,447!
287
        return strdup("");
×
288
    }
289

290
    char *result = u32_strconv_to_encoding(
1,447✔
291
            src,                    /* the source string to convert */
292
            targetEncoding,         /* the character encoding to which to convert */
293
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
294

295
    if (result == NULL) {
1,447!
296
        fprintf(stderr, "%s: failed to convert from UTF-32 to '%s': %s\n", PROJECT, targetEncoding, strerror(errno));
×
297
    }
298
    return result;
1,447✔
299
}
300

301

302

303
const char *check_encoding(const char *manual_encoding, const char *system_encoding)
160✔
304
{
305
    if (manual_encoding != NULL) {
160✔
306
        uint32_t *unicode = u32_strconv_from_encoding(" ", manual_encoding, iconveh_error);
13✔
307
        if (unicode != NULL) {
13✔
308
            BFREE(unicode);
12!
309
            return manual_encoding;
12✔
310
        }
311
        fprintf(stderr, "%s: Invalid character encoding: %s - falling back to %s\n",
1✔
312
                PROJECT, manual_encoding, system_encoding);
313
        fflush(stderr);
1✔
314
    }
315
    return system_encoding;
148✔
316
}
317

318

319

320
char *to_utf8(uint32_t *src)
177✔
321
{
322
    if (src == NULL) {
177!
323
        return NULL;
×
324
    }
325
    if (is_empty(src)) {
177!
326
        return (char *) strdup("");
×
327
    }
328
    char *result = u32_strconv_to_encoding(src, "UTF-8", iconveh_error);
177✔
329
    if (result == NULL) {
177!
330
        bx_fprintf(stderr, "%s: failed to convert a string to UTF-8: %s\n", PROJECT, strerror(errno));
×
331
        return NULL;
×
332
    }
333
    return result;
177✔
334
}
335

336

337

338
uint32_t *u32_nspaces(const size_t n)
916✔
339
{
340
    uint32_t *result = (uint32_t *) malloc((n + 1) * sizeof(uint32_t));
916✔
341
    if (result == NULL) {
916!
342
        perror(PROJECT);
×
343
        return NULL;
×
344
    }
345
    if (n > 0) {
916✔
346
        u32_set(result, char_space, n);
716✔
347
    }
348
    set_char_at(result, n, char_nul);
916✔
349
    return result;
916✔
350
}
351

352

353
// TODO It seems skip is always 0, can we remove that parameter?
354
uint32_t *u32_strnrstr(const uint32_t *haystack, const uint32_t *needle, const size_t needle_len, int skip)
344✔
355
{
356
    if (is_empty(needle)) {
344✔
357
        return (uint32_t *) haystack;
1✔
358
    }
359
    if (is_empty(haystack)) {
343✔
360
        return NULL;
1✔
361
    }
362
    if (skip < 0) {
342✔
363
        skip = 0;
1✔
364
    }
365

366
    uint32_t *p = u32_strrchr(haystack, needle[0]);
342✔
367
    if (!p) {
342✔
368
        return NULL;
2✔
369
    }
370

371
    while (p >= haystack) {
11,384✔
372
        int comp = u32_strncmp(p, needle, needle_len);
11,137✔
373
        if (comp == 0) {
11,137✔
374
            if (skip--) {
94✔
375
                --p;
1✔
376
            }
377
            else {
378
                return p;
93✔
379
            }
380
        }
381
        else {
382
            --p;
11,043✔
383
        }
384
    }
385

386
    return NULL;
247✔
387
}
388

389

390

391
void u32_insert_space_at(uint32_t **s, const size_t idx, const size_t n)
4✔
392
{
393
    if (s == NULL || n == 0) {
4✔
394
        return;
2✔
395
    }
396

397
    size_t len = u32_strlen(*s);
2✔
398
    size_t x = idx;
2✔
399
    if (idx > len) {
2✔
400
        x = len;
1✔
401
    }
402

403
    *s = (uint32_t *) realloc(*s, (len + 1 + n) * sizeof(uint32_t));
2✔
404
    u32_move(*s + x + n, *s + x, len - x);
2✔
405
    u32_set(*s + x, char_space, n);
2✔
406
}
407

408

409
/* vim: set cindent sw=4: */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc