• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5913888333

19 Aug 2023 10:40PM UTC coverage: 22.078% (-0.6%) from 22.694%
5913888333

push

github

neon-sunset
feat: faster and non-faulting ascii change case

122 of 788 branches covered (15.48%)

Branch coverage included in aggregate %.

88 of 88 new or added lines in 3 files covered. (100.0%)

439 of 1753 relevant lines covered (25.04%)

29147.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/U8String.Manipulation.cs
1
using System.Text;
2
using U8Primitives.InteropServices;
3

4
namespace U8Primitives;
5

6
#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
7
public readonly partial struct U8String
8
{
9
    // TODO: Optimize/deduplicate Concat variants
10
    // TODO: Investigate if it is possible fold validation for u8 literals
11
    public static U8String Concat(U8String left, U8String right)
12
    {
13
        if (!left.IsEmpty)
×
14
        {
15
            if (!right.IsEmpty)
×
16
            {
17
                var length = left.Length + right.Length;
×
18
                var value = new byte[length];
×
19

20
                left.UnsafeSpan.CopyTo(value);
×
21
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
22

23
                return new U8String(value, 0, length);
×
24
            }
25

26
            return left;
×
27
        }
28

29
        return right;
×
30
    }
31

32
    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
33
    {
34
        if (!right.IsEmpty)
×
35
        {
36
            Validate(right);
×
37
            if (!left.IsEmpty)
×
38
            {
39
                var length = left.Length + right.Length;
×
40
                var value = new byte[length];
×
41

42
                left.UnsafeSpan.CopyTo(value);
×
43
                right.CopyTo(value.AsSpan(left.Length));
×
44

45
                return new U8String(value, 0, length);
×
46
            }
47

48
            return new U8String(right, skipValidation: true);
×
49
        }
50

51
        return left;
×
52
    }
53

54
    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
55
    {
56
        if (!left.IsEmpty)
×
57
        {
58
            Validate(left);
×
59
            if (!right.IsEmpty)
×
60
            {
61
                var length = left.Length + right.Length;
×
62
                var value = new byte[length];
×
63

64
                left.CopyTo(value);
×
65
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
66

67
                return new U8String(value, 0, length);
×
68
            }
69

70
            return new U8String(left, skipValidation: true);
×
71
        }
72

73
        return right;
×
74
    }
75

76
    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
77
    {
78
        var length = left.Length + right.Length;
×
79
        if (length != 0)
×
80
        {
81
            var value = new byte[length];
×
82

83
            left.CopyTo(value);
×
84
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));
×
85

86
            Validate(value);
×
87
            return new U8String(value, 0, length);
×
88
        }
89

90
        return default;
×
91
    }
92

93
    /// <summary>
94
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
95
    /// </summary>
96
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
97
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
98
    {
99
        throw new NotImplementedException();
×
100
    }
101

102
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
103
    public U8String Replace(byte oldValue, byte newValue)
104
    {
105
        return U8Manipulation.Replace(this, oldValue, newValue);
×
106
    }
107

108
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
109
    public U8String Replace(char oldValue, char newValue)
110
    {
111
        return U8Manipulation.Replace(this, oldValue, newValue);
×
112
    }
113

114
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
115
    public U8String Replace(Rune oldValue, Rune newValue)
116
    {
117
        return U8Manipulation.Replace(this, oldValue, newValue);
×
118
    }
119

120
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
121
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
122
    {
123
        return U8Manipulation.Replace(this, oldValue, newValue);
×
124
    }
125

126
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
127
    public U8String Replace(U8String oldValue, U8String newValue)
128
    {
129
        return U8Manipulation.ReplaceUnchecked(this, oldValue, newValue);
×
130
    }
131

132
    /// <inheritdoc />
133
    public void CopyTo(byte[] destination, int index)
134
    {
135
        var src = this;
×
136
        var dst = destination.AsSpan()[index..];
×
137
        if (src.Length > dst.Length)
×
138
        {
139
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
×
140
        }
141

142
        src.UnsafeSpan.CopyTo(dst);
×
143
    }
×
144

145
    /// <summary>
146
    /// Retrieves a substring from this instance. The substring starts at a specified
147
    /// character position and continues to the end of the string.
148
    /// </summary>
149
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
150
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
151
    /// <exception cref="ArgumentOutOfRangeException">
152
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
153
    /// </exception>
154
    /// <exception cref="ArgumentException">
155
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
156
    /// </exception>
157
    public U8String Slice(int start)
158
    {
159
        var source = this;
×
160
        // From ReadOnly/Span<T> Slice(int) implementation
161
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
×
162
        {
163
            ThrowHelpers.ArgumentOutOfRange();
×
164
        }
165

166
        var length = source.Length - start;
×
167
        if (length > 0)
×
168
        {
169
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
×
170
            {
171
                ThrowHelpers.InvalidSplit();
×
172
            }
173

174
            return new(source._value, source.Offset + start, length);
×
175
        }
176

177
        return default;
×
178
    }
179

180
    /// <summary>
181
    /// Retrieves a substring from this instance. The substring starts at a specified
182
    /// character position and has a specified length.
183
    /// </summary>
184
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
185
    /// <param name="length">The number of bytes in the substring.</param>
186
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
187
    /// <exception cref="ArgumentOutOfRangeException">
188
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
189
    /// </exception>
190
    /// <exception cref="ArgumentException">
191
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
192
    /// </exception>
193
    public U8String Slice(int start, int length)
194
    {
195
        var source = this;
×
196
        // From ReadOnly/Span<T> Slice(int, int) implementation
197
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
×
198
        {
199
            ThrowHelpers.ArgumentOutOfRange();
×
200
        }
201

202
        var result = default(U8String);
×
203
        if (length > 0)
×
204
        {
205
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
206
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
×
207
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
×
208
            {
209
                // TODO: Exception message UX
210
                ThrowHelpers.InvalidSplit();
×
211
            }
212

213
            result = new(source._value, source.Offset + start, length);
×
214
        }
215

216
        return result;
×
217
    }
218

219
    /// <summary>
220
    /// Removes all leading and trailing whitespace characters from the current string.
221
    /// </summary>
222
    /// <returns>
223
    /// A sub-slice that remains after all whitespace characters
224
    /// are removed from the start and end of the current string.
225
    /// </returns>
226
    public U8String Trim()
227
    {
228
        // TODO: Optimize fast path on no whitespace
229
        // TODO 2: Do not convert to runes and have proper
230
        // whitespace LUT to evaluate code points in a branchless way
231
        var source = this;
×
232
        if (!source.IsEmpty)
×
233
        {
234
            ref var ptr = ref source.UnsafeRef;
×
235

236
            var start = 0;
×
237
            while (start < source.Length)
×
238
            {
239
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
240
                {
241
                    break;
242
                }
243
                start += size;
×
244
            }
245

246
            var end = source.Length - 1;
×
247
            for (var endSearch = end; endSearch >= start; endSearch--)
×
248
            {
249
                var b = ptr.Add(endSearch);
×
250
                if (!U8Info.IsContinuationByte(b))
×
251
                {
252
                    if (U8Info.IsAsciiByte(b)
×
253
                        ? U8Info.IsAsciiWhitespace(b)
×
254
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
255
                    {
256
                        // Save the last found whitespace code point offset and continue searching
257
                        // for more whitspace byte sequences from their end. If we don't do this,
258
                        // we will end up trimming away continuation bytes at the end of the string.
259
                        end = endSearch - 1;
×
260
                    }
261
                    else
262
                    {
263
                        break;
264
                    }
265
                }
266
            }
267

268
            return U8Marshal.Slice(source, start, end - start + 1);
×
269
        }
270

271
        return default;
×
272
    }
273

274
    /// <summary>
275
    /// Removes all leading whitespace characters from the current string.
276
    /// </summary>
277
    /// <returns>
278
    /// A sub-slice that remains after all whitespace characters
279
    /// are removed from the start of the current string.
280
    /// </returns>
281
    public U8String TrimStart()
282
    {
283
        var source = this;
×
284
        if (!source.IsEmpty)
×
285
        {
286
            ref var ptr = ref source.UnsafeRef;
×
287
            var b = ptr;
×
288

289
            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
×
290
            {
291
                return source;
×
292
            }
293

294
            var start = 0;
×
295
            while (start < source.Length)
×
296
            {
297
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
298
                {
299
                    break;
300
                }
301
                start += size;
×
302
            }
303

304
            return U8Marshal.Slice(source, start);
×
305
        }
306

307
        return default;
×
308
    }
309

310
    /// <summary>
311
    /// Removes all trailing whitespace characters from the current string.
312
    /// </summary>
313
    /// <returns>
314
    /// A sub-slice that remains after all whitespace characters
315
    /// are removed from the end of the current string.
316
    /// </returns>
317
    public U8String TrimEnd()
318
    {
319
        var source = this;
×
320
        if (!source.IsEmpty)
×
321
        {
322
            ref var ptr = ref source.UnsafeRef;
×
323

324
            var end = source.Length - 1;
×
325
            for (var endSearch = end; endSearch >= 0; endSearch--)
×
326
            {
327
                var b = ptr.Add(endSearch);
×
328
                if (!U8Info.IsContinuationByte(b))
×
329
                {
330
                    if (U8Info.IsAsciiByte(b)
×
331
                        ? U8Info.IsAsciiWhitespace(b)
×
332
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
333
                    {
334
                        end = endSearch - 1;
×
335
                    }
336
                    else
337
                    {
338
                        break;
339
                    }
340
                }
341
            }
342

343
            return U8Marshal.Slice(source, 0, end + 1);
×
344
        }
345

346
        return default;
×
347
    }
348

349
    /// <summary>
350
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
351
    /// </summary>
352
    /// <returns>
353
    /// A sub-slice that remains after all ASCII whitespace characters
354
    /// are removed from the start and end of the current string.
355
    /// </returns>
356
    public U8String TrimAscii()
357
    {
358
        var source = this;
×
359
        var range = Ascii.Trim(source);
×
360

361
        return !range.IsEmpty()
×
362
            ? U8Marshal.Slice(source, range)
×
363
            : default;
×
364
    }
365

366
    /// <summary>
367
    /// Removes all the leading ASCII whitespace characters from the current string.
368
    /// </summary>
369
    /// <returns>
370
    /// A sub-slice that remains after all whitespace characters
371
    /// are removed from the start of the current string.
372
    /// </returns>
373
    public U8String TrimStartAscii()
374
    {
375
        var source = this;
×
376
        var range = Ascii.TrimStart(source);
×
377

378
        return !range.IsEmpty()
×
379
            ? U8Marshal.Slice(source, range)
×
380
            : default;
×
381
    }
382

383
    /// <summary>
384
    /// Removes all the trailing ASCII whitespace characters from the current string.
385
    /// </summary>
386
    /// <returns>
387
    /// A sub-slice that remains after all whitespace characters
388
    /// are removed from the end of the current string.
389
    /// </returns>
390
    public U8String TrimEndAscii()
391
    {
392
        var source = this;
×
393
        var range = Ascii.TrimEnd(source);
×
394

395
        return !range.IsEmpty()
×
396
            ? U8Marshal.Slice(source, range)
×
397
            : default;
×
398
    }
399

400
    // TODO: docs
401
    public U8String ToLowerAscii()
402
    {
403
        var source = this;
×
404
        if (source.Length > 0)
×
405
        {
406
            var destination = new U8String(new byte[source.Length], 0, source.Length);
×
407

408
            U8Manipulation.ToLowerAscii(
×
409
                ref source.UnsafeRef,
×
410
                ref destination.UnsafeRef,
×
411
                (uint)source.Length);
×
412

413
            return destination;
×
414
        }
415

416
        return default;
×
417
    }
418

419
    public U8String ToUpperAscii()
420
    {
421
        var source = this;
×
422
        if (source.Length > 0)
×
423
        {
424
            var destination = new U8String(new byte[source.Length], 0, source.Length);
×
425

426
            U8Manipulation.ToUpperAscii(
×
427
                ref source.UnsafeRef,
×
428
                ref destination.UnsafeRef,
×
429
                (uint)source.Length);
×
430

431
            return destination;
×
432
        }
433

434
        return default;
×
435
    }
436
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc