• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5950717515

23 Aug 2023 11:30AM UTC coverage: 23.255% (-0.8%) from 24.028%
5950717515

push

github

neon-sunset
feat: implement .Replace() variants, clean up searching overloads

154 of 856 branches covered (17.99%)

Branch coverage included in aggregate %.

86 of 86 new or added lines in 4 files covered. (100.0%)

499 of 1952 relevant lines covered (25.56%)

42232.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/U8String.Manipulation.cs
1
using System.Buffers;
2
using System.Diagnostics.CodeAnalysis;
3
using System.Runtime.InteropServices;
4
using System.Text;
5
using U8Primitives.InteropServices;
6

7
namespace U8Primitives;
8

9
#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
10
public readonly partial struct U8String
11
{
12
    // TODO: Optimize/deduplicate Concat variants
13
    // TODO: Investigate if it is possible fold validation for u8 literals
14
    public static U8String Concat(U8String left, U8String right)
15
    {
16
        if (!left.IsEmpty)
×
17
        {
18
            if (!right.IsEmpty)
×
19
            {
20
                return U8Manipulation.ConcatUnchecked(
×
21
                    left.UnsafeSpan,
×
22
                    right.UnsafeSpan);
×
23
            }
24

25
            return left;
×
26
        }
27

28
        return right;
×
29
    }
30

31
    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
32
    {
33
        if (!right.IsEmpty)
×
34
        {
35
            Validate(right);
×
36
            if (!left.IsEmpty)
×
37
            {
38
                return U8Manipulation.ConcatUnchecked(left.UnsafeSpan, right);
×
39
            }
40

41
            return new U8String(right, skipValidation: true);
×
42
        }
43

44
        return left;
×
45
    }
46

47
    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
48
    {
49
        if (!left.IsEmpty)
×
50
        {
51
            Validate(left);
×
52
            if (!right.IsEmpty)
×
53
            {
54
                return U8Manipulation.ConcatUnchecked(left, right.UnsafeSpan);
×
55
            }
56

57
            return new U8String(left, skipValidation: true);
×
58
        }
59

60
        return right;
×
61
    }
62

63
    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
64
    {
65
        var length = left.Length + right.Length;
×
66
        if (length != 0)
×
67
        {
68
            var value = new byte[length];
×
69

70
            left.CopyTo(value);
×
71
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));
×
72

73
            Validate(value);
×
74
            return new U8String(value, 0, length);
×
75
        }
76

77
        return default;
×
78
    }
79

80
    /// <inheritdoc />
81
    public void CopyTo(byte[] destination, int index)
82
    {
83
        var src = this;
×
84
        var dst = destination.AsSpan()[index..];
×
85
        if (src.Length > dst.Length)
×
86
        {
87
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
×
88
        }
89

90
        src.UnsafeSpan.CopyTo(dst);
×
91
    }
×
92

93
    /// <summary>
94
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
95
    /// </summary>
96
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
97
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
98
    {
99
        throw new NotImplementedException();
×
100
    }
101

102
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
103
    public U8String Replace(byte oldValue, byte newValue)
104
    {
105
        return U8Manipulation.Replace(this, oldValue, newValue);
×
106
    }
107

108
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
109
    public U8String Replace(char oldValue, char newValue)
110
    {
111
        return U8Manipulation.Replace(this, oldValue, newValue);
×
112
    }
113

114
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
115
    public U8String Replace(Rune oldValue, Rune newValue)
116
    {
117
        return U8Manipulation.Replace(this, oldValue, newValue);
×
118
    }
119

120
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
121
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
122
    {
123
        return U8Manipulation.Replace(this, oldValue, newValue);
×
124
    }
125

126
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
127
    public U8String Replace(U8String oldValue, U8String newValue)
128
    {
129
        return U8Manipulation.Replace(this, oldValue, newValue);
×
130
    }
131

132
    public U8String ReplaceLineEndings()
133
    {
134
        var source = this;
×
135
        if (!source.IsEmpty)
×
136
        {
137
            if (!OperatingSystem.IsWindows())
×
138
            {
139
                return U8Manipulation.ReplaceCore(
×
140
                    source, "\r\n"u8, "\n"u8, validate: false);
×
141
            }
142

143
            throw new NotImplementedException();
×
144
        }
145

146
        return source;
×
147
    }
148

149
    /// <summary>
150
    /// Retrieves a substring from this instance. The substring starts at a specified
151
    /// character position and continues to the end of the string.
152
    /// </summary>
153
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
154
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
155
    /// <exception cref="ArgumentOutOfRangeException">
156
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
157
    /// </exception>
158
    /// <exception cref="ArgumentException">
159
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
160
    /// </exception>
161
    public U8String Slice(int start)
162
    {
163
        var source = this;
×
164
        // From ReadOnly/Span<T> Slice(int) implementation
165
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
×
166
        {
167
            ThrowHelpers.ArgumentOutOfRange();
×
168
        }
169

170
        var length = source.Length - start;
×
171
        if (length > 0)
×
172
        {
173
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
×
174
            {
175
                ThrowHelpers.InvalidSplit();
×
176
            }
177

178
            return new(source._value, source.Offset + start, length);
×
179
        }
180

181
        return default;
×
182
    }
183

184
    /// <summary>
185
    /// Retrieves a substring from this instance. The substring starts at a specified
186
    /// character position and has a specified length.
187
    /// </summary>
188
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
189
    /// <param name="length">The number of bytes in the substring.</param>
190
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
191
    /// <exception cref="ArgumentOutOfRangeException">
192
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
193
    /// </exception>
194
    /// <exception cref="ArgumentException">
195
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
196
    /// </exception>
197
    public U8String Slice(int start, int length)
198
    {
199
        var source = this;
×
200
        // From ReadOnly/Span<T> Slice(int, int) implementation
201
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
×
202
        {
203
            ThrowHelpers.ArgumentOutOfRange();
×
204
        }
205

206
        var result = default(U8String);
×
207
        if (length > 0)
×
208
        {
209
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
210
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
×
211
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
×
212
            {
213
                // TODO: Exception message UX
214
                ThrowHelpers.InvalidSplit();
×
215
            }
216

217
            result = new(source._value, source.Offset + start, length);
×
218
        }
219

220
        return result;
×
221
    }
222

223
    /// <summary>
224
    /// Removes all leading and trailing whitespace characters from the current string.
225
    /// </summary>
226
    /// <returns>
227
    /// A sub-slice that remains after all whitespace characters
228
    /// are removed from the start and end of the current string.
229
    /// </returns>
230
    public U8String Trim()
231
    {
232
        // TODO: Optimize fast path on no whitespace
233
        // TODO 2: Do not convert to runes and have proper
234
        // whitespace LUT to evaluate code points in a branchless way
235
        var source = this;
×
236
        if (!source.IsEmpty)
×
237
        {
238
            ref var ptr = ref source.UnsafeRef;
×
239

240
            var start = 0;
×
241
            while (start < source.Length)
×
242
            {
243
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
244
                {
245
                    break;
246
                }
247
                start += size;
×
248
            }
249

250
            var end = source.Length - 1;
×
251
            for (var endSearch = end; endSearch >= start; endSearch--)
×
252
            {
253
                var b = ptr.Add(endSearch);
×
254
                if (!U8Info.IsContinuationByte(b))
×
255
                {
256
                    if (U8Info.IsAsciiByte(b)
×
257
                        ? U8Info.IsAsciiWhitespace(b)
×
258
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
259
                    {
260
                        // Save the last found whitespace code point offset and continue searching
261
                        // for more whitspace byte sequences from their end. If we don't do this,
262
                        // we will end up trimming away continuation bytes at the end of the string.
263
                        end = endSearch - 1;
×
264
                    }
265
                    else
266
                    {
267
                        break;
268
                    }
269
                }
270
            }
271

272
            return U8Marshal.Slice(source, start, end - start + 1);
×
273
        }
274

275
        return default;
×
276
    }
277

278
    /// <summary>
279
    /// Removes all leading whitespace characters from the current string.
280
    /// </summary>
281
    /// <returns>
282
    /// A sub-slice that remains after all whitespace characters
283
    /// are removed from the start of the current string.
284
    /// </returns>
285
    public U8String TrimStart()
286
    {
287
        var source = this;
×
288
        if (!source.IsEmpty)
×
289
        {
290
            ref var ptr = ref source.UnsafeRef;
×
291
            var b = ptr;
×
292

293
            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
×
294
            {
295
                return source;
×
296
            }
297

298
            var start = 0;
×
299
            while (start < source.Length)
×
300
            {
301
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
302
                {
303
                    break;
304
                }
305
                start += size;
×
306
            }
307

308
            return U8Marshal.Slice(source, start);
×
309
        }
310

311
        return default;
×
312
    }
313

314
    /// <summary>
315
    /// Removes all trailing whitespace characters from the current string.
316
    /// </summary>
317
    /// <returns>
318
    /// A sub-slice that remains after all whitespace characters
319
    /// are removed from the end of the current string.
320
    /// </returns>
321
    public U8String TrimEnd()
322
    {
323
        var source = this;
×
324
        if (!source.IsEmpty)
×
325
        {
326
            ref var ptr = ref source.UnsafeRef;
×
327

328
            var end = source.Length - 1;
×
329
            for (var endSearch = end; endSearch >= 0; endSearch--)
×
330
            {
331
                var b = ptr.Add(endSearch);
×
332
                if (!U8Info.IsContinuationByte(b))
×
333
                {
334
                    if (U8Info.IsAsciiByte(b)
×
335
                        ? U8Info.IsAsciiWhitespace(b)
×
336
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
337
                    {
338
                        end = endSearch - 1;
×
339
                    }
340
                    else
341
                    {
342
                        break;
343
                    }
344
                }
345
            }
346

347
            return U8Marshal.Slice(source, 0, end + 1);
×
348
        }
349

350
        return default;
×
351
    }
352

353
    /// <summary>
354
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
355
    /// </summary>
356
    /// <returns>
357
    /// A sub-slice that remains after all ASCII whitespace characters
358
    /// are removed from the start and end of the current string.
359
    /// </returns>
360
    public U8String TrimAscii()
361
    {
362
        var source = this;
×
363
        var range = Ascii.Trim(source);
×
364

365
        return U8Marshal.Slice(source, range);
×
366
    }
367

368
    /// <summary>
369
    /// Removes all the leading ASCII whitespace characters from the current string.
370
    /// </summary>
371
    /// <returns>
372
    /// A sub-slice that remains after all whitespace characters
373
    /// are removed from the start of the current string.
374
    /// </returns>
375
    public U8String TrimStartAscii()
376
    {
377
        var source = this;
×
378
        var range = Ascii.TrimStart(source);
×
379

380
        return U8Marshal.Slice(source, range);
×
381
    }
382

383
    /// <summary>
384
    /// Removes all the trailing ASCII whitespace characters from the current string.
385
    /// </summary>
386
    /// <returns>
387
    /// A sub-slice that remains after all whitespace characters
388
    /// are removed from the end of the current string.
389
    /// </returns>
390
    public U8String TrimEndAscii()
391
    {
392
        var source = this;
×
393
        var range = Ascii.TrimEnd(source);
×
394

395
        return U8Marshal.Slice(source, range);
×
396
    }
397

398
    // TODO:
399
    // - Complete impl. depends on porting of InlineArray-based array builder for letters
400
    // which have different lengths in upper/lower case.
401
    // - Remove/rename to ToLowerFallback or move to something like "FallbackInvariantComparer"
402
    // clearly indicating it being slower and inferior alternative to proper implementations
403
    // which call into ICU/NLS/Hybrid-provided case change exports.
404
    public U8String ToLower()
405
    {
406
        var source = this;
×
407
        if (source.Length > 0)
×
408
        {
409
            var lowercase = new byte[source.Length + 3];
×
410
            var destination = lowercase.AsSpan();
×
411
            ref var dst = ref destination.AsRef();
×
412

413
            var result = Ascii.ToLower(source, destination, out var consumed);
×
414
            if (result is OperationStatus.InvalidData)
×
415
            {
416
                foreach (var rune in U8Marshal.Slice(source, consumed).Runes)
×
417
                {
418
                    var lower = Rune.ToLowerInvariant(rune);
×
419
                    var scalar = U8Scalar.Create(lower);
×
420
                    if (consumed + 4 > destination.Length)
×
421
                    {
422
                        [DoesNotReturn]
423
                        static void Unimpl()
424
                        {
425
                            throw new NotImplementedException();
×
426
                        }
427

428
                        Unimpl();
×
429
                    }
430

431
                    scalar.StoreUnsafe(ref dst.Add(consumed));
×
432
                    consumed += scalar.Size;
×
433
                }
434
            }
435

436
            return new(lowercase, 0, consumed);
×
437
        }
438

439
        return default;
×
440
    }
441

442
    public U8String ToUpper()
443
    {
444
        var source = this;
×
445
        if (source.Length > 0)
×
446
        {
447
            var uppercase = new byte[source.Length + 3];
×
448
            var destination = uppercase.AsSpan();
×
449
            ref var dst = ref destination.AsRef();
×
450

451
            var result = Ascii.ToUpper(source, destination, out var consumed);
×
452
            if (result is OperationStatus.InvalidData)
×
453
            {
454
                foreach (var rune in U8Marshal.Slice(source, consumed).Runes)
×
455
                {
456
                    var upper = Rune.ToUpperInvariant(rune);
×
457
                    var scalar = U8Scalar.Create(upper);
×
458
                    if (consumed + 4 > destination.Length)
×
459
                    {
460
                        [DoesNotReturn]
461
                        static void Unimpl()
462
                        {
463
                            throw new NotImplementedException();
×
464
                        }
465

466
                        Unimpl();
×
467
                    }
468

469
                    scalar.StoreUnsafe(ref dst.Add(consumed));
×
470
                    consumed += scalar.Size;
×
471
                }
472
            }
473

474
            return new(uppercase, 0, consumed);
×
475
        }
476

477
        return default;
×
478
    }
479

480
    // TODO: docs
481
    public U8String ToLowerAscii()
482
    {
483
        var source = this;
×
484
        if (source.Length > 0)
×
485
        {
486
            var destination = new byte[source.Length];
×
487

488
            U8Manipulation.ToLowerAscii(
×
489
                ref source.UnsafeRef,
×
490
                ref MemoryMarshal.GetArrayDataReference(destination),
×
491
                (uint)source.Length);
×
492

493
            return new(destination, 0, source.Length);
×
494
        }
495

496
        return default;
×
497
    }
498

499
    public U8String ToUpperAscii()
500
    {
501
        var source = this;
×
502
        if (source.Length > 0)
×
503
        {
504
            var destination = new byte[source.Length];
×
505

506
            U8Manipulation.ToUpperAscii(
×
507
                ref source.UnsafeRef,
×
508
                ref MemoryMarshal.GetArrayDataReference(destination),
×
509
                (uint)source.Length);
×
510

511
            return new(destination, 0, source.Length);
×
512
        }
513

514
        return default;
×
515
    }
516
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc