• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Tehreer / SheenBidi / 21298031641

23 Jan 2026 07:10PM UTC coverage: 95.567% (-0.8%) from 96.403%
21298031641

push

github

mta452
Make text editing and analysis API optional

2393 of 2504 relevant lines covered (95.57%)

743160.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

56.95
/Source/API/SBCodepoint.c
1
/*
2
 * Copyright (C) 2025 Muhammad Tayyab Akram
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *      http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
#include <stddef.h>
18

19
#include <API/SBAssert.h>
20
#include <API/SBBase.h>
21
#include <Data/BidiTypeLookup.h>
22
#include <Data/GeneralCategoryLookup.h>
23
#include <Data/PairingLookup.h>
24
#include <Data/ScriptLookup.h>
25

26
#include "SBCodepoint.h"
27

28
typedef struct {
29
    SBUInt8 valid;
30
    SBUInt8 total;
31
    SBUInt8 start;
32
    SBUInt8 end;
33
} UTF8State;
34

35
static const UTF8State UTF8StateTable[9] = {
36
    { 1, 0, 0x00, 0x00 }, { 0, 0, 0x00, 0x00 }, { 1, 2, 0x80, 0xBF }, { 1, 3, 0xA0, 0xBF },
37
    { 1, 3, 0x80, 0xBF }, { 1, 3, 0x80, 0x9F }, { 1, 4, 0x90, 0xBF }, { 1, 4, 0x80, 0xBF },
38
    { 1, 4, 0x80, 0x8F }
39
};
40

41
static const SBUInt8 UTF8LookupTable[256] = {
42
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
/* LEAD: -- 80..BF -- */
47
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49
/* LEAD: -- C0..C1 -- */
50
    1, 1,
51
/* LEAD: -- C2..DF -- */
52
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
53
/* LEAD: -- E0..E0 -- */
54
    3,
55
/* LEAD: -- E1..EC -- */
56
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
57
/* LEAD: -- ED..ED -- */
58
    5,
59
/* LEAD: -- EE..EF -- */
60
    4, 4,
61
/* LEAD: -- F0..F0 -- */
62
    6,
63
/* LEAD: -- F1..F3 -- */
64
    7, 7, 7,
65
/* LEAD: -- F4..F4 -- */
66
    8,
67
/* LEAD: -- F5..F7 -- */
68
    1, 1, 1,
69
/* LEAD: -- F8..FB -- */
70
    1, 1, 1, 1,
71
/* LEAD: -- FC..FD -- */
72
    1, 1,
73
/* LEAD: -- FE..FF -- */
74
    1, 1
75
};
76

77
SB_INTERNAL const void *SBCodepointGetBufferOffset(const void *buffer,
×
78
    SBStringEncoding encoding, SBUInteger index)
79
{
80
    switch (encoding) {
×
81
    case SBStringEncodingUTF8:
×
82
        return (const SBUInt8 *)buffer + index;
×
83
    
84
    case SBStringEncodingUTF16:
×
85
        return (const SBUInt16 *)buffer + index;
×
86

87
    case SBStringEncodingUTF32:
×
88
        return (const SBUInt32 *)buffer + index;
×
89

90
    default:
×
91
        return NULL;
×
92
    }
93
}
94

95
SB_INTERNAL void SBCodepointSkipToStart(const void *buffer, SBUInteger length,
×
96
    SBStringEncoding encoding, SBUInteger *index)
97
{
98
    /* Index MUST be valid */
99
    SBAssert(*index < length);
×
100

101
    switch (encoding) {
×
102
    case SBStringEncodingUTF8: {
×
103
        const SBUInt8 *codeUnits = buffer;
×
104
        SBUInteger start = *index;
×
105

106
        /* Advance 4 code units to take trailing ones into account */
107
        if (*index + 4 < length) {
×
108
            *index += 4;
×
109
        } else {
110
            *index = length;
×
111
        }
112

113
        /* Get previous code point until start is reached/passed */
114
        do {
115
            SBCodepointDecodePreviousFromUTF8(codeUnits, length, index);
×
116
        } while (*index > start);
×
117

118
        break;
×
119
    }
120

121
    case SBStringEncodingUTF16: {
×
122
        const SBUInt16 *codeUnits = buffer;
×
123
        SBUInteger start = *index;
×
124

125
        /* Advance one code unit to take surrogate into account. */
126
        if (*index < length) {
×
127
            *index += 1;
×
128
        }
129

130
        /* Get previous code point until start is reached/passed. */
131
        do {
132
            SBCodepointDecodePreviousFromUTF16(codeUnits, length, index);
×
133
        } while (*index > start);
×
134

135
        break;
×
136
    }
137

138
    case SBStringEncodingUTF32:
×
139
        /* Nothing to do here. */
140
        break;
×
141
    }
142
}
×
143

144
SB_INTERNAL void SBCodepointSkipToEnd(const void *buffer, SBUInteger length,
×
145
    SBStringEncoding encoding, SBUInteger *index)
146
{
147
    /* Index MUST be valid */
148
    SBAssert(*index < length);
×
149

150
    switch (encoding) {
×
151
    case SBStringEncodingUTF8: {
×
152
        const SBUInt8 *codeUnits = buffer;
×
153
        SBUInteger end = *index + 1;
×
154

155
        /* Advance 4 code units to take leading ones into account. */
156
        if (*index > 4) {
×
157
            *index -= 4;
×
158
        } else {
159
            *index = 0;
×
160
        }
161

162
        /* Get next code point until end is reached/passed. */
163
        do {
164
            SBCodepointDecodeNextFromUTF8(codeUnits, length, index);
×
165
        } while (*index < end);
×
166

167
        break;
×
168
    }
169

170
    case SBStringEncodingUTF16: {
×
171
        const SBUInt16 *codeUnits = buffer;
×
172
        SBUInteger end = *index + 1;
×
173

174
        /* Advance one code unit to take surrogate into account. */
175
        if (*index > 1) {
×
176
            *index -= 1;
×
177
        }
178

179
        /* Get next code point until end is reached/passed. */
180
        do {
181
            SBCodepointDecodeNextFromUTF16(codeUnits, length, index);
×
182
        } while (*index < end);
×
183

184
        break;
×
185
    }
186

187
    case SBStringEncodingUTF32:
×
188
        *index += 1;
×
189
        break;
×
190
    }
191
}
×
192

193
SB_INTERNAL SBBoolean SBCodepointIsCanonicalEquivalentBracket(
175,371✔
194
    SBCodepoint codepoint, SBCodepoint bracket)
195
{
196
    SBCodepoint canonical;
197

198
    switch (codepoint) {
175,371✔
199
    case 0x2329:
2✔
200
        canonical = 0x3008;
2✔
201
        break;
2✔
202
    case 0x3008:
1✔
203
        canonical = 0x2329;
1✔
204
        break;
1✔
205

206
    case 0x232A:
6✔
207
        canonical = 0x3009;
6✔
208
        break;
6✔
209
    case 0x3009:
5✔
210
        canonical = 0x232A;
5✔
211
        break;
5✔
212

213
    default:
175,357✔
214
        canonical = codepoint;
175,357✔
215
        break;
175,357✔
216
    }
217

218
    return bracket == codepoint || bracket == canonical;
175,371✔
219
}
220

221
SBBidiType SBCodepointGetBidiType(SBCodepoint codepoint)
×
222
{
223
    return LookupBidiType(codepoint);
×
224
}
225

226
SBGeneralCategory SBCodepointGetGeneralCategory(SBCodepoint codepoint)
×
227
{
228
    return LookupGeneralCategory(codepoint);
×
229
}
230

231
SBCodepoint SBCodepointGetMirror(SBCodepoint codepoint)
×
232
{
233
    return LookupMirror(codepoint);
×
234
}
235

236
SBScript SBCodepointGetScript(SBCodepoint codepoint)
×
237
{
238
    return LookupScript(codepoint);
×
239
}
240

241
SBCodepoint SBCodepointDecodeNextFromUTF8(const SBUInt8 *buffer, SBUInteger length, SBUInteger *index)
1,021✔
242
{
243
    SBCodepoint codepoint = SBCodepointInvalid;
1,021✔
244

245
    if (*index < length) {
1,021✔
246
        SBUInt8 lead = buffer[*index];
941✔
247
        UTF8State state = UTF8StateTable[UTF8LookupTable[lead]];
941✔
248
        SBUInteger limit = *index + state.total;
941✔
249

250
        if (limit > length) {
941✔
251
            limit = length;
13✔
252
            state.valid = SBFalse;
13✔
253
        }
254

255
        codepoint = lead & (0x7F >> state.total);
941✔
256

257
        while (++(*index) < limit) {
1,001✔
258
            SBUInt8 byte = buffer[*index];
304✔
259

260
            if (byte >= state.start && byte <= state.end) {
304✔
261
                codepoint = (codepoint << 6) | (byte & 0x3F);
60✔
262
            } else {
263
                state.valid = SBFalse;
244✔
264
                break;
244✔
265
            }
266

267
            state.start = 0x80;
60✔
268
            state.end = 0xBF;
60✔
269
        }
270

271
        if (!state.valid) {
941✔
272
            codepoint = SBCodepointFaulty;
756✔
273
        }
274
    }
275

276
    return codepoint;
1,021✔
277
}
278

279
SBCodepoint SBCodepointDecodePreviousFromUTF8(const SBUInt8 *buffer, SBUInteger length, SBUInteger *index)
537✔
280
{
281
    SBCodepoint codepoint = SBCodepointInvalid;
537✔
282

283
    if ((*index - 1) < length) {
537✔
284
        SBUInteger startIndex = *index;
458✔
285
        SBUInteger limitIndex;
458✔
286
        SBUInteger continuation;
287

288
        continuation = 4;
458✔
289

290
        while (continuation-- && --startIndex) {
1,095✔
291
            SBUInt8 codeUnit = buffer[startIndex];
813✔
292

293
            if ((codeUnit & 0xC0) != 0x80) {
813✔
294
                break;
176✔
295
            }
296
        }
297

298
        limitIndex = startIndex;
458✔
299
        codepoint = SBCodepointDecodeNextFromUTF8(buffer, length, &limitIndex);
458✔
300

301
        if (limitIndex == *index) {
458✔
302
            *index = startIndex;
221✔
303
        } else {
304
            codepoint = SBCodepointFaulty;
237✔
305
            *index -= 1;
237✔
306
        }
307
    }
308

309
    return codepoint;
537✔
310
}
311

312
SBCodepoint SBCodepointDecodeNextFromUTF16(const SBUInt16 *buffer, SBUInteger length, SBUInteger *index)
114✔
313
{
314
    SBCodepoint codepoint = SBCodepointInvalid;
114✔
315

316
    if (*index < length) {
114✔
317
        SBUInt16 lead;
318

319
        codepoint = SBCodepointFaulty;
80✔
320

321
        lead = buffer[*index];
80✔
322
        *index += 1;
80✔
323

324
        if (!SBCodepointIsSurrogate(lead)) {
80✔
325
            codepoint = lead;
49✔
326
        } else if (lead <= 0xDBFF) {
31✔
327
            if (*index < length) {
20✔
328
                SBUInt16 trail = buffer[*index];
14✔
329

330
                if (SBUInt16InRange(trail, 0xDC00, 0xDFFF)) {
14✔
331
                    codepoint = (lead << 10) + trail - ((0xD800 << 10) + 0xDC00 - 0x10000);
9✔
332
                    *index += 1;
9✔
333
                }
334
            }
335
        }
336
    }
337

338
    return codepoint;
114✔
339
}
340

341
SBCodepoint SBCodepointDecodePreviousFromUTF16(const SBUInt16 *buffer, SBUInteger length, SBUInteger *index)
85✔
342
{
343
    SBCodepoint codepoint = SBCodepointInvalid;
85✔
344

345
    if ((*index - 1) < length) {
85✔
346
        SBUInt16 trail;
347

348
        codepoint = SBCodepointFaulty;
52✔
349

350
        *index -= 1;
52✔
351
        trail = buffer[*index];
52✔
352

353
        if (!SBCodepointIsSurrogate(trail)) {
52✔
354
            codepoint = trail;
21✔
355
        } else if (trail >= 0xDC00) {
31✔
356
            if (*index > 0) {
20✔
357
                SBUInt16 lead = buffer[*index - 1];
14✔
358

359
                if (SBUInt16InRange(lead, 0xD800, 0xDBFF)) {
14✔
360
                    codepoint = (lead << 10) + trail - ((0xD800 << 10) + 0xDC00 - 0x10000);
9✔
361
                    *index -= 1;
9✔
362
                }
363
            }
364
        }
365
    }
366

367
    return codepoint;
85✔
368
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc