• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ascii-boxes / boxes / 25991660202

17 May 2026 01:04PM UTC coverage: 82.822%. Remained the same
25991660202

push

github

tsjensen
Fix a Heisenbug in u32_insert_space_at() in unicode.c

2776 of 3695 branches covered (75.13%)

Branch coverage included in aggregate %.

5 of 7 new or added lines in 1 file covered. (71.43%)

297 existing lines in 19 files now uncovered.

4345 of 4903 relevant lines covered (88.62%)

98937.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.66
/src/unicode.c
1
/*
2
 * boxes - Command line filter to draw/remove ASCII boxes around text
3
 * SPDX-FileCopyrightText: Copyright (c) 1999-2026 Thomas Jensen and the boxes contributors
4
 * SPDX-License-Identifier: GPL-3.0-only
5
 *
6
 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
7
 * License, version 3, as published by the Free Software Foundation.
8
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
9
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
10
 * details.
11
 * You should have received a copy of the GNU General Public License along with this program.
12
 * If not, see <https://www.gnu.org/licenses/>.
13
 *
14
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
15
 */
16

17
/*
18
 * Functions and constants for handling unicode strings with libunistring.
19
 */
20

21
#include "config.h"
22
#include <errno.h>
23
#include <stdio.h>
24
#include <stdlib.h>
25
#include <string.h>
26

27
#include <uniconv.h>
28
#include <unictype.h>
29
#include <unistr.h>
30

31
#include "boxes.h"
32
#include "tools.h"
33
#include "unicode.h"
34

35

36

37
/* effective character encoding of input and output text */
38
const char *encoding;
39

40
/* ucs4_t character '\t' (tab)  */
41
const ucs4_t char_tab = 0x00000009;
42

43
/* ucs4_t character ' '  (space) */
44
const ucs4_t char_space = 0x00000020;
45

46
/* ucs4_t character '\r' (carriage return) */
47
const ucs4_t char_cr = 0x0000000d;
48

49
/* ucs4_t character '\n' (newline) */
50
const ucs4_t char_newline = 0x0000000a;
51

52
/* ucs4_t character 0x1b (escape)  */
53
const ucs4_t char_esc = 0x0000001b;
54

55
/* ucs4_t character '\0' (zero) */
56
const ucs4_t char_nul = 0x00000000;
57

58

59

60
int is_char_at(const uint32_t *text, const size_t idx, const ucs4_t expected_char)
9,148,752✔
61
{
62
    return text != NULL && u32_cmp(text + idx, &expected_char, 1) == 0;
9,148,752!
63
}
64

65

66

67
void set_char_at(uint32_t *text, const size_t idx, const ucs4_t char_to_set)
191,676✔
68
{
69
    u32_set(text + idx, char_to_set, 1);
191,676✔
70
}
191,676✔
71

72

73

74
int is_empty(const uint32_t *text)
9,145,118✔
75
{
76
    return text == NULL || is_char_at(text, 0, char_nul);
9,145,118✔
77
}
78

79

80

81
int u32_is_blank(const uint32_t *text)
56✔
82
{
83
    if (is_empty(text)) {
56✔
84
        return 1;
1✔
85
    }
86

87
    for (const uint32_t *c = text; *c != char_nul; c++) {
297✔
88
        if (!is_blank(*c)) {
242!
UNCOV
89
            return 0;
×
90
        }
91
    }
92
    return 1;
55✔
93
}
94

95

96

97
int is_ascii_printable(const ucs4_t c)
9,060,020✔
98
{
99
    return c >= 0x20 && c < 0x7f;
9,060,020✔
100
}
101

102

103

104
int is_allowed_anywhere(const ucs4_t c)
9,522,609✔
105
{
106
    /* ESC, CR, LF, and TAB are control characters */
107
    return !uc_is_cntrl(c) || c == char_tab || c == char_cr || c == char_newline || c == char_esc;
9,522,609✔
108
}
109

110

111

112
int is_allowed_in_shape(const ucs4_t c)
109,913✔
113
{
114
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline;
109,913✔
115
}
116

117

118

119
int is_allowed_in_sample(const ucs4_t c)
293,201✔
120
{
121
    return is_allowed_anywhere(c);
293,201✔
122
}
123

124

125

126
int is_allowed_in_filename(const ucs4_t c)
281✔
127
{
128
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
281✔
129
}
130

131

132

133
int is_allowed_in_kv_string(const ucs4_t c)
29,443✔
134
{
135
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
29,443✔
136
}
137

138

139

140
int is_blank(const ucs4_t c)
18,148,948✔
141
{
142
    return c == char_tab || uc_is_blank(c);
18,148,948✔
143
}
144

145

146

147
uint32_t *new_empty_string32()
327✔
148
{
149
    return (uint32_t *) calloc(1, sizeof(uint32_t));
327✔
150
}
151

152

153

154
ucs4_t to_utf32(char ascii)
2,296✔
155
{
156
    ucs4_t c = char_nul;
2,296✔
157
    if (ascii >= 0x20 && ascii < 0x7f) {
2,296!
158
        c = (ucs4_t)ascii;  // Store the ASCII value directly in c
2,295✔
159
    }
160
    return c;
2,296✔
161
}
162

163

164

165
uint32_t *advance_next32(const uint32_t *s, size_t *invis)
9,125,255✔
166
{
167
    if (is_empty(s)) {
9,125,255!
UNCOV
168
        return (uint32_t *) s;
×
169
    }
170

171
    int ansipos = 0;
9,125,255✔
172
    (*invis) = 0;
9,125,255✔
173
    ucs4_t c;
174
    const uint32_t *rest = s;
9,125,255✔
175
    while ((rest = u32_next(&c, rest))) {
9,294,976✔
176
        if (ansipos == 0 && c == char_esc) {
9,294,975✔
177
            /* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
178
            (*invis)++;
31,702✔
179
            ansipos++;
31,702✔
180
        } else if (ansipos == 1 && (c == '[' || c == '(')) {
9,263,273!
181
            /* Found '[' char after ESC. A CSI sequence has started. */
182
            (*invis)++;
31,702✔
183
            ansipos++;
31,702✔
184
        } else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) { /* between '@' and '_' (mostly uppercase letters) */
9,231,571!
185
            /* Found a byte designating the end of a two-byte escape sequence */
186
            (*invis)++;
×
187
            ansipos = 0;
×
UNCOV
188
            break;
×
189
        } else if (ansipos == 2) {
9,231,571✔
190
            /* Inside CSI sequence - Keep counting chars as invisible */
191
            (*invis)++;
138,018✔
192

193
            /* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
194
            if (c >= 0x40 && c <= 0x7e) {
138,018!
195
                ansipos = 0;
31,701✔
196
                break;
31,701✔
197
            }
198
        } else {
199
            break;
9,093,553✔
200
        }
201
    }
202
    if (rest == NULL) {
9,125,255✔
203
        rest = s + u32_strlen(s);
1✔
204
    }
205
    return (uint32_t *) rest;
9,125,255✔
206
}
207

208

209

210
uint32_t *u32_strconv_from_input(const char *src)
2,635✔
211
{
212
    return u32_strconv_from_arg(src, encoding);
2,635✔
213
}
214

215

216

217
uint32_t *u32_strconv_from_arg(const char *src, const char *sourceEncoding)
287,998✔
218
{
219
    if (src == NULL) {
287,998✔
220
        return NULL;
2✔
221
    }
222
    if (src[0] == '\0') {
287,996✔
223
        return new_empty_string32();
63✔
224
    }
225

226
    uint32_t *result = u32_strconv_from_encoding(
287,933✔
227
            src,                    /* the source string to convert */
228
            sourceEncoding,         /* the character encoding from which to convert */
229
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
230

231
    if (result == NULL) {
287,933!
UNCOV
232
        fprintf(stderr, "%s: failed to convert from '%s' to UTF-32: %s\n", PROJECT, sourceEncoding, strerror(errno));
×
233
    }
234
    return result;
287,933✔
235
}
236

237

238

239
char *u32_strconv_to_output(const uint32_t *src)
3,367✔
240
{
241
    return u32_strconv_to_arg(src, encoding);
3,367✔
242
}
243

244

245

246
char *u32_strconv_to_arg(const uint32_t *src, const char *targetEncoding)
3,764✔
247
{
248
    if (src == NULL) {
3,764!
UNCOV
249
        return NULL;
×
250
    }
251
    if (is_empty(src)) {
3,764✔
252
        return strdup("");
14✔
253
    }
254

255
    char *result = u32_strconv_to_encoding(
3,750✔
256
            src,                    /* the source string to convert */
257
            targetEncoding,         /* the character encoding to which to convert */
258
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
259

260
    if (result == NULL) {
3,750!
UNCOV
261
        fprintf(stderr, "%s: failed to convert from UTF-32 to '%s': %s\n", PROJECT, targetEncoding, strerror(errno));
×
262
    }
263
    return result;
3,750✔
264
}
265

266

267

268
const char *check_encoding(const char *manual_encoding, const char *system_encoding)
391✔
269
{
270
    if (manual_encoding != NULL) {
391✔
271
        uint32_t *unicode = u32_strconv_from_encoding(" ", manual_encoding, iconveh_error);
13✔
272
        if (unicode != NULL) {
13✔
273
            BFREE(unicode);
12!
274
            return manual_encoding;
12✔
275
        }
276
        fprintf(stderr, "%s: Invalid character encoding: %s - falling back to %s\n",
1✔
277
                PROJECT, manual_encoding, system_encoding);
278
        fflush(stderr);
1✔
279
    }
280
    return system_encoding;
379✔
281
}
282

283

284

285
char *to_utf8(uint32_t *src)
412✔
286
{
287
    if (src == NULL) {
412!
UNCOV
288
        return NULL;
×
289
    }
290
    if (is_empty(src)) {
412!
UNCOV
291
        return (char *) strdup("");
×
292
    }
293
    char *result = u32_strconv_to_encoding(src, "UTF-8", iconveh_error);
412✔
294
    if (result == NULL) {
412!
295
        bx_fprintf(stderr, "%s: failed to convert a string to UTF-8: %s\n", PROJECT, strerror(errno));
×
UNCOV
296
        return NULL;
×
297
    }
298
    return result;
412✔
299
}
300

301

302

303
uint32_t *u32_nspaces(const size_t n)
1,760✔
304
{
305
    uint32_t *result = (uint32_t *) malloc((n + 1) * sizeof(uint32_t));
1,760✔
306
    if (result == NULL) {
1,760!
307
        perror(PROJECT);
×
UNCOV
308
        return NULL;
×
309
    }
310
    if (n > 0) {
1,760✔
311
        u32_set(result, char_space, n);
1,339✔
312
    }
313
    set_char_at(result, n, char_nul);
1,760✔
314
    return result;
1,760✔
315
}
316

317

318

319
uint32_t *u32_strnrstr(const uint32_t *haystack, const uint32_t *needle, const size_t needle_len)
5,553✔
320
{
321
    if (is_empty(needle)) {
5,553✔
322
        return (uint32_t *) haystack;
49✔
323
    }
324
    if (is_empty(haystack)) {
5,504✔
325
        return NULL;
1✔
326
    }
327

328
    const uint32_t *p = u32_strrchr(haystack, needle[0]);
5,503✔
329
    if (!p) {
5,503✔
330
        return NULL;
536✔
331
    }
332

333
    while (p >= haystack) {
227,797✔
334
        if (u32_strncmp(p, needle, needle_len) == 0) {
224,873✔
335
            return (uint32_t *) p;
2,043✔
336
        }
337
        --p;
222,830✔
338
    }
339

340
    return NULL;
2,924✔
341
}
342

343

344

345
void u32_insert_space_at(uint32_t **s, const size_t idx, const size_t n)
4✔
346
{
347
    if (s == NULL || *s == NULL || n == 0) {
4!
348
        return;
2✔
349
    }
350

351
    size_t len = u32_strlen(*s);
2✔
352
    size_t x = idx;
2✔
353
    if (idx > len) {
2✔
354
        x = len;
1✔
355
    }
356

357
    uint32_t *tmp = (uint32_t *) realloc(*s, (len + 1 + n) * sizeof(uint32_t));
2✔
358
    if (tmp == NULL) {
2!
NEW
359
        perror(PROJECT);
×
NEW
360
        return;
×
361
    }
362

363
    *s = tmp;
2✔
364
    u32_move(*s + x + n, *s + x, len - x + 1);
2✔
365
    u32_set(*s + x, char_space, n);
2✔
366
}
367

368

369
/* vim: set cindent sw=4: */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc