• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5955483049

23 Aug 2023 07:11PM UTC coverage: 20.597% (-0.8%) from 21.364%
5955483049

push

github

neon-sunset
perf: additional methods specializations on enumerators to avoid unnecessary overhead

133 of 910 branches covered (14.62%)

Branch coverage included in aggregate %.

84 of 84 new or added lines in 4 files covered. (100.0%)

481 of 2071 relevant lines covered (23.23%)

39801.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/U8String.Manipulation.cs
1
using System.Buffers;
2
using System.Diagnostics.CodeAnalysis;
3
using System.Runtime.InteropServices;
4
using System.Text;
5
using U8Primitives.InteropServices;
6

7
namespace U8Primitives;
8

9
#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
10
public readonly partial struct U8String
11
{
12
    // TODO: Optimize/deduplicate Concat variants
13
    // TODO: Investigate if it is possible fold validation for u8 literals
14
    public static U8String Concat(U8String left, U8String right)
15
    {
16
        if (!left.IsEmpty)
×
17
        {
18
            if (!right.IsEmpty)
×
19
            {
20
                return U8Manipulation.ConcatUnchecked(
×
21
                    left.UnsafeSpan,
×
22
                    right.UnsafeSpan);
×
23
            }
24

25
            return left;
×
26
        }
27

28
        return right;
×
29
    }
30

31
    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
32
    {
33
        if (!right.IsEmpty)
×
34
        {
35
            Validate(right);
×
36
            if (!left.IsEmpty)
×
37
            {
38
                return U8Manipulation.ConcatUnchecked(left.UnsafeSpan, right);
×
39
            }
40

41
            return new U8String(right, skipValidation: true);
×
42
        }
43

44
        return left;
×
45
    }
46

47
    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
48
    {
49
        if (!left.IsEmpty)
×
50
        {
51
            Validate(left);
×
52
            if (!right.IsEmpty)
×
53
            {
54
                return U8Manipulation.ConcatUnchecked(left, right.UnsafeSpan);
×
55
            }
56

57
            return new U8String(left, skipValidation: true);
×
58
        }
59

60
        return right;
×
61
    }
62

63
    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
64
    {
65
        var length = left.Length + right.Length;
×
66
        if (length != 0)
×
67
        {
68
            var value = new byte[length];
×
69

70
            left.CopyTo(value);
×
71
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));
×
72

73
            Validate(value);
×
74
            return new U8String(value, 0, length);
×
75
        }
76

77
        return default;
×
78
    }
79

80
    /// <inheritdoc />
81
    public void CopyTo(byte[] destination, int index)
82
    {
83
        var src = this;
×
84
        var dst = destination.AsSpan()[index..];
×
85
        if (src.Length > dst.Length)
×
86
        {
87
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
×
88
        }
89

90
        src.UnsafeSpan.CopyTo(dst);
×
91
    }
×
92

93
    /// <summary>
94
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
95
    /// </summary>
96
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
97
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
98
    {
99
        throw new NotImplementedException();
×
100
    }
101

102
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
103
    public U8String Replace(byte oldValue, byte newValue)
104
    {
105
        return U8Manipulation.Replace(this, oldValue, newValue);
×
106
    }
107

108
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
109
    public U8String Replace(char oldValue, char newValue)
110
    {
111
        return U8Manipulation.Replace(this, oldValue, newValue);
×
112
    }
113

114
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
115
    public U8String Replace(Rune oldValue, Rune newValue)
116
    {
117
        return U8Manipulation.Replace(this, oldValue, newValue);
×
118
    }
119

120
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
121
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
122
    {
123
        return U8Manipulation.Replace(this, oldValue, newValue);
×
124
    }
125

126
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
127
    public U8String Replace(U8String oldValue, U8String newValue)
128
    {
129
        return U8Manipulation.Replace(this, oldValue, newValue);
×
130
    }
131

132
    public U8String ReplaceLineEndings()
133
    {
134
        var source = this;
×
135
        if (!source.IsEmpty)
×
136
        {
137
            if (!OperatingSystem.IsWindows())
×
138
            {
139
                return U8Manipulation.ReplaceCore(
×
140
                    source, "\r\n"u8, "\n"u8, validate: false);
×
141
            }
142

143
            // This needs manual loop which is sad
144
            throw new NotImplementedException();
×
145
        }
146

147
        return source;
×
148
    }
149

150
    /// <summary>
151
    /// Retrieves a substring from this instance. The substring starts at a specified
152
    /// character position and continues to the end of the string.
153
    /// </summary>
154
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
155
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
156
    /// <exception cref="ArgumentOutOfRangeException">
157
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
158
    /// </exception>
159
    /// <exception cref="ArgumentException">
160
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
161
    /// </exception>
162
    public U8String Slice(int start)
163
    {
164
        var source = this;
×
165
        // From ReadOnly/Span<T> Slice(int) implementation
166
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
×
167
        {
168
            ThrowHelpers.ArgumentOutOfRange();
×
169
        }
170

171
        var length = source.Length - start;
×
172
        if (length > 0)
×
173
        {
174
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
×
175
            {
176
                ThrowHelpers.InvalidSplit();
×
177
            }
178

179
            return new(source._value, source.Offset + start, length);
×
180
        }
181

182
        return default;
×
183
    }
184

185
    /// <summary>
186
    /// Retrieves a substring from this instance. The substring starts at a specified
187
    /// character position and has a specified length.
188
    /// </summary>
189
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
190
    /// <param name="length">The number of bytes in the substring.</param>
191
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
192
    /// <exception cref="ArgumentOutOfRangeException">
193
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
194
    /// </exception>
195
    /// <exception cref="ArgumentException">
196
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
197
    /// </exception>
198
    public U8String Slice(int start, int length)
199
    {
200
        var source = this;
×
201
        // From ReadOnly/Span<T> Slice(int, int) implementation
202
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
×
203
        {
204
            ThrowHelpers.ArgumentOutOfRange();
×
205
        }
206

207
        var result = default(U8String);
×
208
        if (length > 0)
×
209
        {
210
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
211
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
×
212
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
×
213
            {
214
                // TODO: Exception message UX
215
                ThrowHelpers.InvalidSplit();
×
216
            }
217

218
            result = new(source._value, source.Offset + start, length);
×
219
        }
220

221
        return result;
×
222
    }
223

224
    /// <summary>
225
    /// Removes all leading and trailing whitespace characters from the current string.
226
    /// </summary>
227
    /// <returns>
228
    /// A sub-slice that remains after all whitespace characters
229
    /// are removed from the start and end of the current string.
230
    /// </returns>
231
    public U8String Trim()
232
    {
233
        // TODO: Optimize fast path on no whitespace
234
        // TODO 2: Do not convert to runes and have proper
235
        // whitespace LUT to evaluate code points in a branchless way
236
        var source = this;
×
237
        if (!source.IsEmpty)
×
238
        {
239
            ref var ptr = ref source.UnsafeRef;
×
240

241
            var start = 0;
×
242
            while (start < source.Length)
×
243
            {
244
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
245
                {
246
                    break;
247
                }
248
                start += size;
×
249
            }
250

251
            var end = source.Length - 1;
×
252
            for (var endSearch = end; endSearch >= start; endSearch--)
×
253
            {
254
                var b = ptr.Add(endSearch);
×
255
                if (!U8Info.IsContinuationByte(b))
×
256
                {
257
                    if (U8Info.IsAsciiByte(b)
×
258
                        ? U8Info.IsAsciiWhitespace(b)
×
259
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
260
                    {
261
                        // Save the last found whitespace code point offset and continue searching
262
                        // for more whitspace byte sequences from their end. If we don't do this,
263
                        // we will end up trimming away continuation bytes at the end of the string.
264
                        end = endSearch - 1;
×
265
                    }
266
                    else
267
                    {
268
                        break;
269
                    }
270
                }
271
            }
272

273
            return U8Marshal.Slice(source, start, end - start + 1);
×
274
        }
275

276
        return default;
×
277
    }
278

279
    /// <summary>
280
    /// Removes all leading whitespace characters from the current string.
281
    /// </summary>
282
    /// <returns>
283
    /// A sub-slice that remains after all whitespace characters
284
    /// are removed from the start of the current string.
285
    /// </returns>
286
    public U8String TrimStart()
287
    {
288
        var source = this;
×
289
        if (!source.IsEmpty)
×
290
        {
291
            ref var ptr = ref source.UnsafeRef;
×
292
            var b = ptr;
×
293

294
            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
×
295
            {
296
                return source;
×
297
            }
298

299
            var start = 0;
×
300
            while (start < source.Length)
×
301
            {
302
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
303
                {
304
                    break;
305
                }
306
                start += size;
×
307
            }
308

309
            return U8Marshal.Slice(source, start);
×
310
        }
311

312
        return default;
×
313
    }
314

315
    /// <summary>
316
    /// Removes all trailing whitespace characters from the current string.
317
    /// </summary>
318
    /// <returns>
319
    /// A sub-slice that remains after all whitespace characters
320
    /// are removed from the end of the current string.
321
    /// </returns>
322
    public U8String TrimEnd()
323
    {
324
        var source = this;
×
325
        if (!source.IsEmpty)
×
326
        {
327
            ref var ptr = ref source.UnsafeRef;
×
328

329
            var end = source.Length - 1;
×
330
            for (var endSearch = end; endSearch >= 0; endSearch--)
×
331
            {
332
                var b = ptr.Add(endSearch);
×
333
                if (!U8Info.IsContinuationByte(b))
×
334
                {
335
                    if (U8Info.IsAsciiByte(b)
×
336
                        ? U8Info.IsAsciiWhitespace(b)
×
337
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
338
                    {
339
                        end = endSearch - 1;
×
340
                    }
341
                    else
342
                    {
343
                        break;
344
                    }
345
                }
346
            }
347

348
            return U8Marshal.Slice(source, 0, end + 1);
×
349
        }
350

351
        return default;
×
352
    }
353

354
    /// <summary>
355
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
356
    /// </summary>
357
    /// <returns>
358
    /// A sub-slice that remains after all ASCII whitespace characters
359
    /// are removed from the start and end of the current string.
360
    /// </returns>
361
    public U8String TrimAscii()
362
    {
363
        var source = this;
×
364
        var range = Ascii.Trim(source);
×
365

366
        return U8Marshal.Slice(source, range);
×
367
    }
368

369
    /// <summary>
370
    /// Removes all the leading ASCII whitespace characters from the current string.
371
    /// </summary>
372
    /// <returns>
373
    /// A sub-slice that remains after all whitespace characters
374
    /// are removed from the start of the current string.
375
    /// </returns>
376
    public U8String TrimStartAscii()
377
    {
378
        var source = this;
×
379
        var range = Ascii.TrimStart(source);
×
380

381
        return U8Marshal.Slice(source, range);
×
382
    }
383

384
    /// <summary>
385
    /// Removes all the trailing ASCII whitespace characters from the current string.
386
    /// </summary>
387
    /// <returns>
388
    /// A sub-slice that remains after all whitespace characters
389
    /// are removed from the end of the current string.
390
    /// </returns>
391
    public U8String TrimEndAscii()
392
    {
393
        var source = this;
×
394
        var range = Ascii.TrimEnd(source);
×
395

396
        return U8Marshal.Slice(source, range);
×
397
    }
398

399
    // TODO:
400
    // - Complete impl. depends on porting of InlineArray-based array builder for letters
401
    // which have different lengths in upper/lower case.
402
    // - Remove/rename to ToLowerFallback or move to something like "FallbackInvariantComparer"
403
    // clearly indicating it being slower and inferior alternative to proper implementations
404
    // which call into ICU/NLS/Hybrid-provided case change exports.
405
    public U8String ToLower()
406
    {
407
        var source = this;
×
408
        if (source.Length > 0)
×
409
        {
410
            var lowercase = new byte[source.Length + 3];
×
411
            var destination = lowercase.AsSpan();
×
412
            ref var dst = ref destination.AsRef();
×
413

414
            var result = Ascii.ToLower(source, destination, out var consumed);
×
415
            if (result is OperationStatus.InvalidData)
×
416
            {
417
                foreach (var rune in U8Marshal.Slice(source, consumed).Runes)
×
418
                {
419
                    var lower = Rune.ToLowerInvariant(rune);
×
420
                    var scalar = U8Scalar.Create(lower);
×
421
                    if (consumed + 4 > destination.Length)
×
422
                    {
423
                        [DoesNotReturn]
424
                        static void Unimpl()
425
                        {
426
                            throw new NotImplementedException();
×
427
                        }
428

429
                        Unimpl();
×
430
                    }
431

432
                    scalar.StoreUnsafe(ref dst.Add(consumed));
×
433
                    consumed += scalar.Size;
×
434
                }
435
            }
436

437
            return new(lowercase, 0, consumed);
×
438
        }
439

440
        return default;
×
441
    }
442

443
    public U8String ToUpper()
444
    {
445
        var source = this;
×
446
        if (source.Length > 0)
×
447
        {
448
            var uppercase = new byte[source.Length + 3];
×
449
            var destination = uppercase.AsSpan();
×
450
            ref var dst = ref destination.AsRef();
×
451

452
            var result = Ascii.ToUpper(source, destination, out var consumed);
×
453
            if (result is OperationStatus.InvalidData)
×
454
            {
455
                foreach (var rune in U8Marshal.Slice(source, consumed).Runes)
×
456
                {
457
                    var upper = Rune.ToUpperInvariant(rune);
×
458
                    var scalar = U8Scalar.Create(upper);
×
459
                    if (consumed + 4 > destination.Length)
×
460
                    {
461
                        [DoesNotReturn]
462
                        static void Unimpl()
463
                        {
464
                            throw new NotImplementedException();
×
465
                        }
466

467
                        Unimpl();
×
468
                    }
469

470
                    scalar.StoreUnsafe(ref dst.Add(consumed));
×
471
                    consumed += scalar.Size;
×
472
                }
473
            }
474

475
            return new(uppercase, 0, consumed);
×
476
        }
477

478
        return default;
×
479
    }
480

481
    // TODO: docs
482
    public U8String ToLowerAscii()
483
    {
484
        var source = this;
×
485
        if (source.Length > 0)
×
486
        {
487
            var destination = new byte[source.Length];
×
488

489
            U8Manipulation.ToLowerAscii(
×
490
                ref source.UnsafeRef,
×
491
                ref MemoryMarshal.GetArrayDataReference(destination),
×
492
                (uint)source.Length);
×
493

494
            return new(destination, 0, source.Length);
×
495
        }
496

497
        return default;
×
498
    }
499

500
    public U8String ToUpperAscii()
501
    {
502
        var source = this;
×
503
        if (source.Length > 0)
×
504
        {
505
            var destination = new byte[source.Length];
×
506

507
            U8Manipulation.ToUpperAscii(
×
508
                ref source.UnsafeRef,
×
509
                ref MemoryMarshal.GetArrayDataReference(destination),
×
510
                (uint)source.Length);
×
511

512
            return new(destination, 0, source.Length);
×
513
        }
514

515
        return default;
×
516
    }
517
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc