• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ascii-boxes / boxes / 5017901761

18 May 2023 08:22PM UTC coverage: 81.608% (-0.3%) from 81.947%
5017901761

push

github

Thomas Jensen
Enable terminal colors globally for tests

2190 of 2965 branches covered (73.86%)

Branch coverage included in aggregate %.

3414 of 3902 relevant lines covered (87.49%)

6909.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.68
/src/unicode.c
1
/*
2
 * boxes - Command line filter to draw/remove ASCII boxes around text
3
 * Copyright (c) 1999-2023 Thomas Jensen and the boxes contributors
4
 *
5
 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
6
 * License, version 3, as published by the Free Software Foundation.
7
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
8
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
9
 * details.
10
 * You should have received a copy of the GNU General Public License along with this program.
11
 * If not, see <https://www.gnu.org/licenses/>.
12
 *
13
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
14
 */
15

16
/*
17
 * Functions and constants for handling unicode strings with libunistring.
18
 */
19

20
#include "config.h"
21
#include <errno.h>
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25

26
#include <uniconv.h>
27
#include <unictype.h>
28
#include <unistr.h>
29

30
#include "boxes.h"
31
#include "tools.h"
32
#include "unicode.h"
33

34

35

36
/* effective character encoding of input and output text */
37
const char *encoding;
38

39
/* ucs4_t character '\t' (tab)  */
40
const ucs4_t char_tab = 0x00000009;
41

42
/* ucs4_t character ' '  (space) */
43
const ucs4_t char_space = 0x00000020;
44

45
/* ucs4_t character '\r' (carriage return) */
46
const ucs4_t char_cr = 0x0000000d;
47

48
/* ucs4_t character '\n' (newline) */
49
const ucs4_t char_newline = 0x0000000a;
50

51
/* ucs4_t character 0x1b (escape)  */
52
const ucs4_t char_esc = 0x0000001b;
53

54
/* ucs4_t character '\0' (zero) */
55
const ucs4_t char_nul = 0x00000000;
56

57

58

59
int is_char_at(const uint32_t *text, const size_t idx, const ucs4_t expected_char)
518,831✔
60
{
61
    return text != NULL && u32_cmp(text + idx, &expected_char, 1) == 0;
518,831!
62
}
63

64

65

66
void set_char_at(uint32_t *text, const size_t idx, const ucs4_t char_to_set)
33,580✔
67
{
68
    u32_set(text + idx, char_to_set, 1);
33,580✔
69
}
33,580✔
70

71

72

73
int is_empty(const uint32_t *text)
518,479✔
74
{
75
    return text == NULL || is_char_at(text, 0, char_nul);
518,479!
76
}
77

78

79

80
int is_ascii_printable(const ucs4_t c)
488,797✔
81
{
82
    return c >= 0x20 && c < 0x7f;
488,797✔
83
}
84

85

86

87
int is_allowed_anywhere(const ucs4_t c)
524,112✔
88
{
89
    /* ESC, CR, LF, and TAB are control characters */
90
    return !uc_is_cntrl(c) || c == char_tab || c == char_cr || c == char_newline || c == char_esc;
524,112✔
91
}
92

93

94

95
int is_allowed_in_shape(const ucs4_t c)
14,092✔
96
{
97
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline;
14,092✔
98
}
99

100

101

102
int is_allowed_in_sample(const ucs4_t c)
41,526✔
103
{
104
    return is_allowed_anywhere(c);
41,526✔
105
}
106

107

108

109
int is_allowed_in_filename(const ucs4_t c)
246✔
110
{
111
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
246✔
112
}
113

114

115

116
int is_allowed_in_kv_string(const ucs4_t c)
16,768✔
117
{
118
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
16,768✔
119
}
120

121

122

123
int is_blank(const ucs4_t c)
895,780✔
124
{
125
    return c == char_tab || uc_is_blank(c);
895,780✔
126
}
127

128

129

130
uint32_t *new_empty_string32()
159✔
131
{
132
    return (uint32_t *) calloc(1, sizeof(uint32_t));
159✔
133
}
134

135

136

137
ucs4_t to_utf32(char ascii)
834✔
138
{
139
    ucs4_t c = char_nul;
834✔
140
    if (ascii >= 0x20 && ascii < 0x7f) {
834!
141
        char *bytes = (char *) (&c);
833✔
142
        bytes[0] = ascii;
833✔
143
    }
144
    return c;
834✔
145
}
146

147

148

149
uint32_t *advance_next32(const uint32_t *s, size_t *invis)
513,459✔
150
{
151
    if (is_empty(s)) {
513,459!
152
        return (uint32_t *) s;
×
153
    }
154

155
    int ansipos = 0;
513,459✔
156
    (*invis) = 0;
513,459✔
157
    ucs4_t c;
158
    const uint32_t *rest = s;
513,459✔
159
    while ((rest = u32_next(&c, rest))) {
561,892✔
160
        if (ansipos == 0 && c == char_esc) {
561,891✔
161
            /* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
162
            (*invis)++;
7,830✔
163
            ansipos++;
7,830✔
164
        } else if (ansipos == 1 && (c == '[' || c == '(')) {
554,061!
165
            /* Found '[' char after ESC. A CSI sequence has started. */
166
            (*invis)++;
7,830✔
167
            ansipos++;
7,830✔
168
        } else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) { /* between '@' and '_' (mostly uppercase letters) */
546,231!
169
            /* Found a byte designating the end of a two-byte escape sequence */
170
            (*invis)++;
×
171
            ansipos = 0;
×
172
            break;
×
173
        } else if (ansipos == 2) {
546,231✔
174
            /* Inside CSI sequence - Keep counting chars as invisible */
175
            (*invis)++;
40,602✔
176

177
            /* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
178
            if (c >= 0x40 && c <= 0x7e) {
40,602!
179
                ansipos = 0;
7,829✔
180
                break;
7,829✔
181
            }
182
        } else {
183
            break;
505,629✔
184
        }
185
    }
186
    if (rest == NULL) {
513,459✔
187
        rest = s + u32_strlen(s);
1✔
188
    }
189
    return (uint32_t *) rest;
513,459✔
190
}
191

192

193

194
uint32_t *advance32(uint32_t *s, const size_t offset)
1,214✔
195
{
196
    if (is_empty(s)) {
1,214✔
197
        return new_empty_string32();
36✔
198
    }
199
    if (offset == 0) {
1,178✔
200
        return s;
981✔
201
    }
202

203
    size_t count = 0;                 /* the count of visible characters */
197✔
204
    int visible = 1;                  /* flag indicating whether the previous char was a visible char */
197✔
205
    const uint32_t *last_esc = NULL;  /* pointer to the start of the last escape sequence encountered */
197✔
206
    const uint32_t *rest = s;         /* pointer to the next character coming up */
197✔
207
    size_t step_invis = 0;            /* unused, but required for advance_next32() call */
197✔
208

209
    for (ucs4_t c = s[0]; c != char_nul; c = rest[0]) {
1,666!
210
        if (c == char_esc) {
1,666✔
211
            last_esc = rest;
787✔
212
            visible = 0;
787✔
213
        } else {
214
            if (count++ == offset) {
879✔
215
                if (!visible && last_esc != NULL) {
197!
216
                    return (uint32_t *) last_esc;
111✔
217
                }
218
                break;
86✔
219
            }
220
            visible = 1;
682✔
221
        }
222
        rest = advance_next32(rest, &step_invis);
1,469✔
223
    }
224
    return (uint32_t *) rest;         /* may point to zero terminator when offset too large */
86✔
225
}
226

227

228

229
uint32_t *u32_strconv_from_input(const char *src)
629✔
230
{
231
    return u32_strconv_from_arg(src, encoding);
629✔
232
}
233

234

235

236
uint32_t *u32_strconv_from_arg(const char *src, const char *sourceEncoding)
15,413✔
237
{
238
    if (src == NULL) {
15,413✔
239
        return NULL;
2✔
240
    }
241
    if (src[0] == '\0') {
15,411✔
242
        return new_empty_string32();
9✔
243
    }
244

245
    uint32_t *result = u32_strconv_from_encoding(
15,402✔
246
            src,                    /* the source string to convert */
247
            sourceEncoding,         /* the character encoding from which to convert */
248
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
249

250
    if (result == NULL) {
15,402!
251
        fprintf(stderr, "%s: failed to convert from '%s' to UTF-32: %s\n", PROJECT, sourceEncoding, strerror(errno));
×
252
    }
253
    return result;
15,402✔
254
}
255

256

257

258
char *u32_strconv_to_output(const uint32_t *src)
1,271✔
259
{
260
    return u32_strconv_to_arg(src, encoding);
1,271✔
261
}
262

263

264

265
char *u32_strconv_to_arg(const uint32_t *src, const char *targetEncoding)
1,432✔
266
{
267
    if (src == NULL) {
1,432!
268
        return NULL;
×
269
    }
270
    if (is_empty(src)) {
1,432!
271
        return strdup("");
×
272
    }
273

274
    char *result = u32_strconv_to_encoding(
1,432✔
275
            src,                    /* the source string to convert */
276
            targetEncoding,         /* the character encoding to which to convert */
277
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
278

279
    if (result == NULL) {
1,432!
280
        fprintf(stderr, "%s: failed to convert from UTF-32 to '%s': %s\n", PROJECT, targetEncoding, strerror(errno));
×
281
    }
282
    return result;
1,432✔
283
}
284

285

286

287
const char *check_encoding(const char *manual_encoding, const char *system_encoding)
157✔
288
{
289
    if (manual_encoding != NULL) {
157✔
290
        uint32_t *unicode = u32_strconv_from_encoding(" ", manual_encoding, iconveh_error);
13✔
291
        if (unicode != NULL) {
13✔
292
            BFREE(unicode);
12!
293
            return manual_encoding;
12✔
294
        }
295
        fprintf(stderr, "%s: Invalid character encoding: %s - falling back to %s\n",
1✔
296
                PROJECT, manual_encoding, system_encoding);
297
        fflush(stderr);
1✔
298
    }
299
    return system_encoding;
145✔
300
}
301

302

303

304
char *to_utf8(uint32_t *src)
174✔
305
{
306
    if (src == NULL) {
174!
307
        return NULL;
×
308
    }
309
    if (is_empty(src)) {
174!
310
        return (char *) strdup("");
×
311
    }
312
    char *result = u32_strconv_to_encoding(src, "UTF-8", iconveh_error);
174✔
313
    if (result == NULL) {
174!
314
        bx_fprintf(stderr, "%s: failed to convert a string to UTF-8: %s\n", PROJECT, strerror(errno));
×
315
        return NULL;
×
316
    }
317
    return result;
174✔
318
}
319

320

321

322
uint32_t *u32_nspaces(const size_t n)
905✔
323
{
324
    uint32_t *result = (uint32_t *) malloc((n + 1) * sizeof(uint32_t));
905✔
325
    if (result == NULL) {
905!
326
        perror(PROJECT);
×
327
        return NULL;
×
328
    }
329
    if (n > 0) {
905✔
330
        u32_set(result, char_space, n);
705✔
331
    }
332
    set_char_at(result, n, char_nul);
905✔
333
    return result;
905✔
334
}
335

336

337
/* vim: set cindent sw=4: */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc