• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ascii-boxes / boxes / 6523970155

15 Oct 2023 12:12PM UTC coverage: 81.863% (+0.4%) from 81.504%
6523970155

push

github

tsjensen
Remove obsolete parameter 'skip' from u32_strnrstr() in 'unicode' module

2352 of 3188 branches covered (0.0%)

Branch coverage included in aggregate %.

6 of 6 new or added lines in 2 files covered. (100.0%)

3773 of 4294 relevant lines covered (87.87%)

7848.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.04
/src/unicode.c
1
/*
2
 * boxes - Command line filter to draw/remove ASCII boxes around text
3
 * Copyright (c) 1999-2023 Thomas Jensen and the boxes contributors
4
 *
5
 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
6
 * License, version 3, as published by the Free Software Foundation.
7
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
8
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
9
 * details.
10
 * You should have received a copy of the GNU General Public License along with this program.
11
 * If not, see <https://www.gnu.org/licenses/>.
12
 *
13
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
14
 */
15

16
/*
17
 * Functions and constants for handling unicode strings with libunistring.
18
 */
19

20
#include "config.h"
21
#include <errno.h>
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25

26
#include <uniconv.h>
27
#include <unictype.h>
28
#include <unistr.h>
29

30
#include "boxes.h"
31
#include "tools.h"
32
#include "unicode.h"
33

34

35

36
/* effective character encoding of input and output text */
37
const char *encoding;
38

39
/* ucs4_t character '\t' (tab)  */
40
const ucs4_t char_tab = 0x00000009;
41

42
/* ucs4_t character ' '  (space) */
43
const ucs4_t char_space = 0x00000020;
44

45
/* ucs4_t character '\r' (carriage return) */
46
const ucs4_t char_cr = 0x0000000d;
47

48
/* ucs4_t character '\n' (newline) */
49
const ucs4_t char_newline = 0x0000000a;
50

51
/* ucs4_t character 0x1b (escape)  */
52
const ucs4_t char_esc = 0x0000001b;
53

54
/* ucs4_t character '\0' (zero) */
55
const ucs4_t char_nul = 0x00000000;
56

57

58

59
int is_char_at(const uint32_t *text, const size_t idx, const ucs4_t expected_char)
618,057✔
60
{
61
    return text != NULL && u32_cmp(text + idx, &expected_char, 1) == 0;
618,057!
62
}
63

64

65

66
void set_char_at(uint32_t *text, const size_t idx, const ucs4_t char_to_set)
38,849✔
67
{
68
    u32_set(text + idx, char_to_set, 1);
38,849✔
69
}
38,849✔
70

71

72

73
int is_empty(const uint32_t *text)
617,631✔
74
{
75
    return text == NULL || is_char_at(text, 0, char_nul);
617,631✔
76
}
77

78

79

80
int u32_is_blank(const uint32_t *text)
2✔
81
{
82
    if (is_empty(text)) {
2✔
83
        return 1;
1✔
84
    }
85

86
    for (const uint32_t *c = text; *c != char_nul; c++) {
5✔
87
        if (!is_blank(*c)) {
4!
88
            return 0;
×
89
        }
90
    }
91
    return 1;
1✔
92
}
93

94

95

96
int is_ascii_printable(const ucs4_t c)
588,500✔
97
{
98
    return c >= 0x20 && c < 0x7f;
588,500✔
99
}
100

101

102

103
int is_allowed_anywhere(const ucs4_t c)
712,678✔
104
{
105
    /* ESC, CR, LF, and TAB are control characters */
106
    return !uc_is_cntrl(c) || c == char_tab || c == char_cr || c == char_newline || c == char_esc;
712,678✔
107
}
108

109

110

111
int is_allowed_in_shape(const ucs4_t c)
23,929✔
112
{
113
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline;
23,929✔
114
}
115

116

117

118
int is_allowed_in_sample(const ucs4_t c)
69,333✔
119
{
120
    return is_allowed_anywhere(c);
69,333✔
121
}
122

123

124

125
int is_allowed_in_filename(const ucs4_t c)
246✔
126
{
127
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
246✔
128
}
129

130

131

132
int is_allowed_in_kv_string(const ucs4_t c)
22,619✔
133
{
134
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
22,619✔
135
}
136

137

138

139
int is_blank(const ucs4_t c)
1,179,804✔
140
{
141
    return c == char_tab || uc_is_blank(c);
1,179,804✔
142
}
143

144

145

146
uint32_t *new_empty_string32()
175✔
147
{
148
    return (uint32_t *) calloc(1, sizeof(uint32_t));
175✔
149
}
150

151

152

153
ucs4_t to_utf32(char ascii)
1,057✔
154
{
155
    ucs4_t c = char_nul;
1,057✔
156
    if (ascii >= 0x20 && ascii < 0x7f) {
1,057!
157
        char *bytes = (char *) (&c);
1,056✔
158
        bytes[0] = ascii;
1,056✔
159
    }
160
    return c;
1,057✔
161
}
162

163

164

165
uint32_t *advance_next32(const uint32_t *s, size_t *invis)
614,128✔
166
{
167
    if (is_empty(s)) {
614,128!
168
        return (uint32_t *) s;
×
169
    }
170

171
    int ansipos = 0;
614,128✔
172
    (*invis) = 0;
614,128✔
173
    ucs4_t c;
174
    const uint32_t *rest = s;
614,128✔
175
    while ((rest = u32_next(&c, rest))) {
676,307✔
176
        if (ansipos == 0 && c == char_esc) {
676,306✔
177
            /* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
178
            (*invis)++;
10,002✔
179
            ansipos++;
10,002✔
180
        } else if (ansipos == 1 && (c == '[' || c == '(')) {
666,304!
181
            /* Found '[' char after ESC. A CSI sequence has started. */
182
            (*invis)++;
10,002✔
183
            ansipos++;
10,002✔
184
        } else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) { /* between '@' and '_' (mostly uppercase letters) */
656,302!
185
            /* Found a byte designating the end of a two-byte escape sequence */
186
            (*invis)++;
×
187
            ansipos = 0;
×
188
            break;
×
189
        } else if (ansipos == 2) {
656,302✔
190
            /* Inside CSI sequence - Keep counting chars as invisible */
191
            (*invis)++;
52,176✔
192

193
            /* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
194
            if (c >= 0x40 && c <= 0x7e) {
52,176!
195
                ansipos = 0;
10,001✔
196
                break;
10,001✔
197
            }
198
        } else {
199
            break;
604,126✔
200
        }
201
    }
202
    if (rest == NULL) {
614,128✔
203
        rest = s + u32_strlen(s);
1✔
204
    }
205
    return (uint32_t *) rest;
614,128✔
206
}
207

208

209

210
uint32_t *u32_strconv_from_input(const char *src)
666✔
211
{
212
    return u32_strconv_from_arg(src, encoding);
666✔
213
}
214

215

216

217
uint32_t *u32_strconv_from_arg(const char *src, const char *sourceEncoding)
17,971✔
218
{
219
    if (src == NULL) {
17,971✔
220
        return NULL;
2✔
221
    }
222
    if (src[0] == '\0') {
17,969✔
223
        return new_empty_string32();
60✔
224
    }
225

226
    uint32_t *result = u32_strconv_from_encoding(
17,909✔
227
            src,                    /* the source string to convert */
228
            sourceEncoding,         /* the character encoding from which to convert */
229
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
230

231
    if (result == NULL) {
17,909!
232
        fprintf(stderr, "%s: failed to convert from '%s' to UTF-32: %s\n", PROJECT, sourceEncoding, strerror(errno));
×
233
    }
234
    return result;
17,909✔
235
}
236

237

238

239
char *u32_strconv_to_output(const uint32_t *src)
1,283✔
240
{
241
    return u32_strconv_to_arg(src, encoding);
1,283✔
242
}
243

244

245

246
char *u32_strconv_to_arg(const uint32_t *src, const char *targetEncoding)
1,447✔
247
{
248
    if (src == NULL) {
1,447!
249
        return NULL;
×
250
    }
251
    if (is_empty(src)) {
1,447!
252
        return strdup("");
×
253
    }
254

255
    char *result = u32_strconv_to_encoding(
1,447✔
256
            src,                    /* the source string to convert */
257
            targetEncoding,         /* the character encoding to which to convert */
258
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
259

260
    if (result == NULL) {
1,447!
261
        fprintf(stderr, "%s: failed to convert from UTF-32 to '%s': %s\n", PROJECT, targetEncoding, strerror(errno));
×
262
    }
263
    return result;
1,447✔
264
}
265

266

267

268
const char *check_encoding(const char *manual_encoding, const char *system_encoding)
160✔
269
{
270
    if (manual_encoding != NULL) {
160✔
271
        uint32_t *unicode = u32_strconv_from_encoding(" ", manual_encoding, iconveh_error);
13✔
272
        if (unicode != NULL) {
13✔
273
            BFREE(unicode);
12!
274
            return manual_encoding;
12✔
275
        }
276
        fprintf(stderr, "%s: Invalid character encoding: %s - falling back to %s\n",
1✔
277
                PROJECT, manual_encoding, system_encoding);
278
        fflush(stderr);
1✔
279
    }
280
    return system_encoding;
148✔
281
}
282

283

284

285
char *to_utf8(uint32_t *src)
177✔
286
{
287
    if (src == NULL) {
177!
288
        return NULL;
×
289
    }
290
    if (is_empty(src)) {
177!
291
        return (char *) strdup("");
×
292
    }
293
    char *result = u32_strconv_to_encoding(src, "UTF-8", iconveh_error);
177✔
294
    if (result == NULL) {
177!
295
        bx_fprintf(stderr, "%s: failed to convert a string to UTF-8: %s\n", PROJECT, strerror(errno));
×
296
        return NULL;
×
297
    }
298
    return result;
177✔
299
}
300

301

302

303
uint32_t *u32_nspaces(const size_t n)
917✔
304
{
305
    uint32_t *result = (uint32_t *) malloc((n + 1) * sizeof(uint32_t));
917✔
306
    if (result == NULL) {
917!
307
        perror(PROJECT);
×
308
        return NULL;
×
309
    }
310
    if (n > 0) {
917✔
311
        u32_set(result, char_space, n);
717✔
312
    }
313
    set_char_at(result, n, char_nul);
917✔
314
    return result;
917✔
315
}
316

317

318

319
uint32_t *u32_strnrstr(const uint32_t *haystack, const uint32_t *needle, const size_t needle_len)
343✔
320
{
321
    if (is_empty(needle)) {
343✔
322
        return (uint32_t *) haystack;
1✔
323
    }
324
    if (is_empty(haystack)) {
342✔
325
        return NULL;
1✔
326
    }
327

328
    uint32_t *p = u32_strrchr(haystack, needle[0]);
341✔
329
    if (!p) {
341✔
330
        return NULL;
2✔
331
    }
332

333
    while (p >= haystack) {
11,367✔
334
        if (u32_strncmp(p, needle, needle_len) == 0) {
11,120✔
335
            return p;
92✔
336
        }
337
        --p;
11,028✔
338
    }
339

340
    return NULL;
247✔
341
}
342

343

344

345
void u32_insert_space_at(uint32_t **s, const size_t idx, const size_t n)
4✔
346
{
347
    if (s == NULL || n == 0) {
4✔
348
        return;
2✔
349
    }
350

351
    size_t len = u32_strlen(*s);
2✔
352
    size_t x = idx;
2✔
353
    if (idx > len) {
2✔
354
        x = len;
1✔
355
    }
356

357
    *s = (uint32_t *) realloc(*s, (len + 1 + n) * sizeof(uint32_t));
2✔
358
    u32_move(*s + x + n, *s + x, len - x);
2✔
359
    u32_set(*s + x, char_space, n);
2✔
360
}
361

362

363
/* vim: set cindent sw=4: */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc