• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5922049812

21 Aug 2023 04:39AM UTC coverage: 20.136% (-1.6%) from 21.736%
5922049812

push

github

neon-sunset
feat: work in progress - comparers/globalization, planning, splitters and native string prototype scaffolding

122 of 856 branches covered (14.25%)

Branch coverage included in aggregate %.

188 of 188 new or added lines in 9 files covered. (100.0%)

439 of 1930 relevant lines covered (22.75%)

26474.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/U8String.Manipulation.cs
1
using System.Buffers;
2
using System.Diagnostics.CodeAnalysis;
3
using System.Runtime.InteropServices;
4
using System.Text;
5
using U8Primitives.InteropServices;
6

7
namespace U8Primitives;
8

9
#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
10
public readonly partial struct U8String
11
{
12
    // TODO: Optimize/deduplicate Concat variants
13
    // TODO: Investigate if it is possible fold validation for u8 literals
14
    public static U8String Concat(U8String left, U8String right)
15
    {
16
        if (!left.IsEmpty)
×
17
        {
18
            if (!right.IsEmpty)
×
19
            {
20
                var length = left.Length + right.Length;
×
21
                var value = new byte[length];
×
22

23
                left.UnsafeSpan.CopyTo(value);
×
24
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
25

26
                return new U8String(value, 0, length);
×
27
            }
28

29
            return left;
×
30
        }
31

32
        return right;
×
33
    }
34

35
    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
36
    {
37
        if (!right.IsEmpty)
×
38
        {
39
            Validate(right);
×
40
            if (!left.IsEmpty)
×
41
            {
42
                var length = left.Length + right.Length;
×
43
                var value = new byte[length];
×
44

45
                left.UnsafeSpan.CopyTo(value);
×
46
                right.CopyTo(value.AsSpan(left.Length));
×
47

48
                return new U8String(value, 0, length);
×
49
            }
50

51
            return new U8String(right, skipValidation: true);
×
52
        }
53

54
        return left;
×
55
    }
56

57
    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
58
    {
59
        if (!left.IsEmpty)
×
60
        {
61
            Validate(left);
×
62
            if (!right.IsEmpty)
×
63
            {
64
                var length = left.Length + right.Length;
×
65
                var value = new byte[length];
×
66

67
                left.CopyTo(value);
×
68
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
69

70
                return new U8String(value, 0, length);
×
71
            }
72

73
            return new U8String(left, skipValidation: true);
×
74
        }
75

76
        return right;
×
77
    }
78

79
    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
80
    {
81
        var length = left.Length + right.Length;
×
82
        if (length != 0)
×
83
        {
84
            var value = new byte[length];
×
85

86
            left.CopyTo(value);
×
87
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));
×
88

89
            Validate(value);
×
90
            return new U8String(value, 0, length);
×
91
        }
92

93
        return default;
×
94
    }
95

96
    /// <summary>
97
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
98
    /// </summary>
99
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
100
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
101
    {
102
        throw new NotImplementedException();
×
103
    }
104

105
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
106
    public U8String Replace(byte oldValue, byte newValue)
107
    {
108
        return U8Manipulation.Replace(this, oldValue, newValue);
×
109
    }
110

111
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
112
    public U8String Replace(char oldValue, char newValue)
113
    {
114
        return U8Manipulation.Replace(this, oldValue, newValue);
×
115
    }
116

117
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
118
    public U8String Replace(Rune oldValue, Rune newValue)
119
    {
120
        return U8Manipulation.Replace(this, oldValue, newValue);
×
121
    }
122

123
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
124
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
125
    {
126
        return U8Manipulation.Replace(this, oldValue, newValue);
×
127
    }
128

129
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
130
    public U8String Replace(U8String oldValue, U8String newValue)
131
    {
132
        return U8Manipulation.ReplaceUnchecked(this, oldValue, newValue);
×
133
    }
134

135
    /// <inheritdoc />
136
    public void CopyTo(byte[] destination, int index)
137
    {
138
        var src = this;
×
139
        var dst = destination.AsSpan()[index..];
×
140
        if (src.Length > dst.Length)
×
141
        {
142
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
×
143
        }
144

145
        src.UnsafeSpan.CopyTo(dst);
×
146
    }
×
147

148
    /// <summary>
149
    /// Retrieves a substring from this instance. The substring starts at a specified
150
    /// character position and continues to the end of the string.
151
    /// </summary>
152
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
153
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
154
    /// <exception cref="ArgumentOutOfRangeException">
155
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
156
    /// </exception>
157
    /// <exception cref="ArgumentException">
158
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
159
    /// </exception>
160
    public U8String Slice(int start)
161
    {
162
        var source = this;
×
163
        // From ReadOnly/Span<T> Slice(int) implementation
164
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
×
165
        {
166
            ThrowHelpers.ArgumentOutOfRange();
×
167
        }
168

169
        var length = source.Length - start;
×
170
        if (length > 0)
×
171
        {
172
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
×
173
            {
174
                ThrowHelpers.InvalidSplit();
×
175
            }
176

177
            return new(source._value, source.Offset + start, length);
×
178
        }
179

180
        return default;
×
181
    }
182

183
    /// <summary>
184
    /// Retrieves a substring from this instance. The substring starts at a specified
185
    /// character position and has a specified length.
186
    /// </summary>
187
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
188
    /// <param name="length">The number of bytes in the substring.</param>
189
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
190
    /// <exception cref="ArgumentOutOfRangeException">
191
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
192
    /// </exception>
193
    /// <exception cref="ArgumentException">
194
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
195
    /// </exception>
196
    public U8String Slice(int start, int length)
197
    {
198
        var source = this;
×
199
        // From ReadOnly/Span<T> Slice(int, int) implementation
200
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
×
201
        {
202
            ThrowHelpers.ArgumentOutOfRange();
×
203
        }
204

205
        var result = default(U8String);
×
206
        if (length > 0)
×
207
        {
208
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
209
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
×
210
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
×
211
            {
212
                // TODO: Exception message UX
213
                ThrowHelpers.InvalidSplit();
×
214
            }
215

216
            result = new(source._value, source.Offset + start, length);
×
217
        }
218

219
        return result;
×
220
    }
221

222
    /// <summary>
223
    /// Removes all leading and trailing whitespace characters from the current string.
224
    /// </summary>
225
    /// <returns>
226
    /// A sub-slice that remains after all whitespace characters
227
    /// are removed from the start and end of the current string.
228
    /// </returns>
229
    public U8String Trim()
230
    {
231
        // TODO: Optimize fast path on no whitespace
232
        // TODO 2: Do not convert to runes and have proper
233
        // whitespace LUT to evaluate code points in a branchless way
234
        var source = this;
×
235
        if (!source.IsEmpty)
×
236
        {
237
            ref var ptr = ref source.UnsafeRef;
×
238

239
            var start = 0;
×
240
            while (start < source.Length)
×
241
            {
242
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
243
                {
244
                    break;
245
                }
246
                start += size;
×
247
            }
248

249
            var end = source.Length - 1;
×
250
            for (var endSearch = end; endSearch >= start; endSearch--)
×
251
            {
252
                var b = ptr.Add(endSearch);
×
253
                if (!U8Info.IsContinuationByte(b))
×
254
                {
255
                    if (U8Info.IsAsciiByte(b)
×
256
                        ? U8Info.IsAsciiWhitespace(b)
×
257
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
258
                    {
259
                        // Save the last found whitespace code point offset and continue searching
260
                        // for more whitspace byte sequences from their end. If we don't do this,
261
                        // we will end up trimming away continuation bytes at the end of the string.
262
                        end = endSearch - 1;
×
263
                    }
264
                    else
265
                    {
266
                        break;
267
                    }
268
                }
269
            }
270

271
            return U8Marshal.Slice(source, start, end - start + 1);
×
272
        }
273

274
        return default;
×
275
    }
276

277
    /// <summary>
278
    /// Removes all leading whitespace characters from the current string.
279
    /// </summary>
280
    /// <returns>
281
    /// A sub-slice that remains after all whitespace characters
282
    /// are removed from the start of the current string.
283
    /// </returns>
284
    public U8String TrimStart()
285
    {
286
        var source = this;
×
287
        if (!source.IsEmpty)
×
288
        {
289
            ref var ptr = ref source.UnsafeRef;
×
290
            var b = ptr;
×
291

292
            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
×
293
            {
294
                return source;
×
295
            }
296

297
            var start = 0;
×
298
            while (start < source.Length)
×
299
            {
300
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
301
                {
302
                    break;
303
                }
304
                start += size;
×
305
            }
306

307
            return U8Marshal.Slice(source, start);
×
308
        }
309

310
        return default;
×
311
    }
312

313
    /// <summary>
314
    /// Removes all trailing whitespace characters from the current string.
315
    /// </summary>
316
    /// <returns>
317
    /// A sub-slice that remains after all whitespace characters
318
    /// are removed from the end of the current string.
319
    /// </returns>
320
    public U8String TrimEnd()
321
    {
322
        var source = this;
×
323
        if (!source.IsEmpty)
×
324
        {
325
            ref var ptr = ref source.UnsafeRef;
×
326

327
            var end = source.Length - 1;
×
328
            for (var endSearch = end; endSearch >= 0; endSearch--)
×
329
            {
330
                var b = ptr.Add(endSearch);
×
331
                if (!U8Info.IsContinuationByte(b))
×
332
                {
333
                    if (U8Info.IsAsciiByte(b)
×
334
                        ? U8Info.IsAsciiWhitespace(b)
×
335
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
336
                    {
337
                        end = endSearch - 1;
×
338
                    }
339
                    else
340
                    {
341
                        break;
342
                    }
343
                }
344
            }
345

346
            return U8Marshal.Slice(source, 0, end + 1);
×
347
        }
348

349
        return default;
×
350
    }
351

352
    /// <summary>
353
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
354
    /// </summary>
355
    /// <returns>
356
    /// A sub-slice that remains after all ASCII whitespace characters
357
    /// are removed from the start and end of the current string.
358
    /// </returns>
359
    public U8String TrimAscii()
360
    {
361
        var source = this;
×
362
        var range = Ascii.Trim(source);
×
363

364
        return U8Marshal.Slice(source, range);
×
365
    }
366

367
    /// <summary>
368
    /// Removes all the leading ASCII whitespace characters from the current string.
369
    /// </summary>
370
    /// <returns>
371
    /// A sub-slice that remains after all whitespace characters
372
    /// are removed from the start of the current string.
373
    /// </returns>
374
    public U8String TrimStartAscii()
375
    {
376
        var source = this;
×
377
        var range = Ascii.TrimStart(source);
×
378

379
        return U8Marshal.Slice(source, range);
×
380
    }
381

382
    /// <summary>
383
    /// Removes all the trailing ASCII whitespace characters from the current string.
384
    /// </summary>
385
    /// <returns>
386
    /// A sub-slice that remains after all whitespace characters
387
    /// are removed from the end of the current string.
388
    /// </returns>
389
    public U8String TrimEndAscii()
390
    {
391
        var source = this;
×
392
        var range = Ascii.TrimEnd(source);
×
393

394
        return U8Marshal.Slice(source, range);
×
395
    }
396

397
    // TODO:
398
    // - Complete impl. depends on porting of InlineArray-based array builder for letters
399
    // which have different lengths in upper/lower case.
400
    // - Remove/rename to ToLowerFallback or move to something like "FallbackInvariantComparer"
401
    // clearly indicating it being slower and inferior alternative to proper implementations
402
    // which call into ICU/NLS/Hybrid-provided case change exports.
403
    public U8String ToLower()
404
    {
405
        var source = this;
×
406
        if (source.Length > 0)
×
407
        {
408
            var lowercase = new byte[source.Length];
×
409
            var destination = lowercase.AsSpan();
×
410

411
            var result = Ascii.ToLower(source, destination, out var consumed);
×
412
            if (result is OperationStatus.InvalidData)
×
413
            {
414
                foreach (var rune in U8Marshal.Slice(source, consumed).Runes)
×
415
                {
416
                    var lower = Rune.ToLowerInvariant(rune);
×
417
                    var (scalar, length) = U8Conversions.RuneToCodepoint(lower);
×
418
                    if (consumed + 4 > destination.Length)
×
419
                    {
420
                        [DoesNotReturn]
421
                        static void Unimpl()
422
                        {
423
                            throw new NotImplementedException();
×
424
                        }
425

426
                        Unimpl();
×
427
                    }
428

429
                    Unsafe.As<byte, uint>(ref destination.AsRef(consumed)) = scalar;
×
430
                    consumed += length;
×
431
                }
432
            }
433

434
            return new(lowercase, 0, consumed);
×
435
        }
436

437
        return default;
×
438
    }
439

440
    public U8String ToUpper()
441
    {
442
        var source = this;
×
443
        if (source.Length > 0)
×
444
        {
445
            var uppercase = new byte[source.Length + 3];
×
446
            var destination = uppercase.AsSpan();
×
447

448
            var result = Ascii.ToUpper(source, destination, out var consumed);
×
449
            if (result is OperationStatus.InvalidData)
×
450
            {
451
                foreach (var rune in U8Marshal.Slice(source, consumed).Runes)
×
452
                {
453
                    var upper = Rune.ToUpperInvariant(rune);
×
454
                    var (scalar, length) = U8Conversions.RuneToCodepoint(upper);
×
455
                    if (consumed + 4 > destination.Length)
×
456
                    {
457
                        [DoesNotReturn]
458
                        static void Unimpl()
459
                        {
460
                            throw new NotImplementedException();
×
461
                        }
462

463
                        Unimpl();
×
464
                    }
465

466
                    Unsafe.As<byte, uint>(ref destination.AsRef(consumed)) = scalar;
×
467
                    consumed += length;
×
468
                }
469
            }
470

471
            return new(uppercase, 0, consumed);
×
472
        }
473

474
        return default;
×
475
    }
476

477
    // TODO: docs
478
    public U8String ToLowerAscii()
479
    {
480
        var source = this;
×
481
        if (source.Length > 0)
×
482
        {
483
            var destination = new byte[source.Length];
×
484

485
            U8Manipulation.ToLowerAscii(
×
486
                ref source.UnsafeRef,
×
487
                ref MemoryMarshal.GetArrayDataReference(destination),
×
488
                (uint)source.Length);
×
489

490
            return new(destination, 0, source.Length);
×
491
        }
492

493
        return default;
×
494
    }
495

496
    public U8String ToUpperAscii()
497
    {
498
        var source = this;
×
499
        if (source.Length > 0)
×
500
        {
501
            var destination = new byte[source.Length];
×
502

503
            U8Manipulation.ToUpperAscii(
×
504
                ref source.UnsafeRef,
×
505
                ref MemoryMarshal.GetArrayDataReference(destination),
×
506
                (uint)source.Length);
×
507

508
            return new(destination, 0, source.Length);
×
509
        }
510

511
        return default;
×
512
    }
513
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc