• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5992664408

27 Aug 2023 06:57PM UTC coverage: 18.359% (+0.04%) from 18.315%
5992664408

push

github

neon-sunset
feat: make ascii comparer len hint inlineable, fix ToUpper

134 of 1038 branches covered (0.0%)

Branch coverage included in aggregate %.

5 of 5 new or added lines in 3 files covered. (100.0%)

479 of 2301 relevant lines covered (20.82%)

35781.08 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/U8String.Manipulation.cs
1
using System.Runtime.InteropServices;
2
using System.Text;
3

4
using U8Primitives.Abstractions;
5
using U8Primitives.InteropServices;
6

7
namespace U8Primitives;
8

9
#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
10
public readonly partial struct U8String
11
{
12
    // TODO: Optimize/deduplicate Concat variants
13
    // TODO: Investigate if it is possible fold validation for u8 literals
14
    public static U8String Concat(U8String left, U8String right)
15
    {
16
        if (!left.IsEmpty)
×
17
        {
18
            if (!right.IsEmpty)
×
19
            {
20
                return U8Manipulation.ConcatUnchecked(
×
21
                    left.UnsafeSpan,
×
22
                    right.UnsafeSpan);
×
23
            }
24

25
            return left;
×
26
        }
27

28
        return right;
×
29
    }
30

31
    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
32
    {
33
        if (!right.IsEmpty)
×
34
        {
35
            Validate(right);
×
36
            if (!left.IsEmpty)
×
37
            {
38
                return U8Manipulation.ConcatUnchecked(left.UnsafeSpan, right);
×
39
            }
40

41
            return new U8String(right, skipValidation: true);
×
42
        }
43

44
        return left;
×
45
    }
46

47
    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
48
    {
49
        if (!left.IsEmpty)
×
50
        {
51
            Validate(left);
×
52
            if (!right.IsEmpty)
×
53
            {
54
                return U8Manipulation.ConcatUnchecked(left, right.UnsafeSpan);
×
55
            }
56

57
            return new U8String(left, skipValidation: true);
×
58
        }
59

60
        return right;
×
61
    }
62

63
    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
64
    {
65
        var length = left.Length + right.Length;
×
66
        if (length != 0)
×
67
        {
68
            var value = new byte[length];
×
69

70
            left.CopyTo(value);
×
71
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));
×
72

73
            Validate(value);
×
74
            return new U8String(value, 0, length);
×
75
        }
76

77
        return default;
×
78
    }
79

80
    /// <inheritdoc />
81
    public void CopyTo(byte[] destination, int index)
82
    {
83
        var src = this;
×
84
        var dst = destination.AsSpan()[index..];
×
85
        if (src.Length > dst.Length)
×
86
        {
87
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
×
88
        }
89

90
        src.UnsafeSpan.CopyTo(dst);
×
91
    }
×
92

93
    /// <summary>
94
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
95
    /// </summary>
96
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
97
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
98
    {
99
        throw new NotImplementedException();
×
100
    }
101

102
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
103
    public U8String Replace(byte oldValue, byte newValue)
104
    {
105
        return U8Manipulation.Replace(this, oldValue, newValue);
×
106
    }
107

108
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
109
    public U8String Replace(char oldValue, char newValue)
110
    {
111
        return U8Manipulation.Replace(this, oldValue, newValue);
×
112
    }
113

114
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
115
    public U8String Replace(Rune oldValue, Rune newValue)
116
    {
117
        return U8Manipulation.Replace(this, oldValue, newValue);
×
118
    }
119

120
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
121
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
122
    {
123
        return U8Manipulation.Replace(this, oldValue, newValue);
×
124
    }
125

126
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
127
    public U8String Replace(U8String oldValue, U8String newValue)
128
    {
129
        return U8Manipulation.Replace(this, oldValue, newValue);
×
130
    }
131

132
    public U8String ReplaceLineEndings()
133
    {
134
        var source = this;
×
135
        if (!source.IsEmpty)
×
136
        {
137
            if (!OperatingSystem.IsWindows())
×
138
            {
139
                return U8Manipulation.ReplaceCore(
×
140
                    source, "\r\n"u8, "\n"u8, validate: false);
×
141
            }
142

143
            // This needs manual loop which is sad
144
            throw new NotImplementedException();
×
145
        }
146

147
        return source;
×
148
    }
149

150
    /// <summary>
151
    /// Retrieves a substring from this instance. The substring starts at a specified
152
    /// character position and continues to the end of the string.
153
    /// </summary>
154
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
155
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
156
    /// <exception cref="ArgumentOutOfRangeException">
157
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
158
    /// </exception>
159
    /// <exception cref="ArgumentException">
160
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
161
    /// </exception>
162
    public U8String Slice(int start)
163
    {
164
        var source = this;
×
165
        // From ReadOnly/Span<T> Slice(int) implementation
166
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
×
167
        {
168
            ThrowHelpers.ArgumentOutOfRange();
×
169
        }
170

171
        var length = source.Length - start;
×
172
        if (length > 0)
×
173
        {
174
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
×
175
            {
176
                ThrowHelpers.InvalidSplit();
×
177
            }
178

179
            return new(source._value, source.Offset + start, length);
×
180
        }
181

182
        return default;
×
183
    }
184

185
    /// <summary>
186
    /// Retrieves a substring from this instance. The substring starts at a specified
187
    /// character position and has a specified length.
188
    /// </summary>
189
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
190
    /// <param name="length">The number of bytes in the substring.</param>
191
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
192
    /// <exception cref="ArgumentOutOfRangeException">
193
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
194
    /// </exception>
195
    /// <exception cref="ArgumentException">
196
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
197
    /// </exception>
198
    public U8String Slice(int start, int length)
199
    {
200
        var source = this;
×
201
        // From ReadOnly/Span<T> Slice(int, int) implementation
202
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
×
203
        {
204
            ThrowHelpers.ArgumentOutOfRange();
×
205
        }
206

207
        var result = default(U8String);
×
208
        if (length > 0)
×
209
        {
210
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
211
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
×
212
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
×
213
            {
214
                // TODO: Exception message UX
215
                ThrowHelpers.InvalidSplit();
×
216
            }
217

218
            result = new(source._value, source.Offset + start, length);
×
219
        }
220

221
        return result;
×
222
    }
223

224
    /// <summary>
225
    /// Removes all leading and trailing whitespace characters from the current string.
226
    /// </summary>
227
    /// <returns>
228
    /// A sub-slice that remains after all whitespace characters
229
    /// are removed from the start and end of the current string.
230
    /// </returns>
231
    public U8String Trim()
232
    {
233
        // TODO: Optimize fast path on no whitespace
234
        // TODO 2: Do not convert to runes and have proper
235
        // whitespace LUT to evaluate code points in a branchless way
236
        var source = this;
×
237
        if (!source.IsEmpty)
×
238
        {
239
            ref var ptr = ref source.UnsafeRef;
×
240

241
            var start = 0;
×
242
            while (start < source.Length)
×
243
            {
244
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
245
                {
246
                    break;
247
                }
248
                start += size;
×
249
            }
250

251
            var end = source.Length - 1;
×
252
            for (var endSearch = end; endSearch >= start; endSearch--)
×
253
            {
254
                var b = ptr.Add(endSearch);
×
255
                if (!U8Info.IsContinuationByte(b))
×
256
                {
257
                    if (U8Info.IsAsciiByte(b)
×
258
                        ? U8Info.IsAsciiWhitespace(b)
×
259
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
260
                    {
261
                        // Save the last found whitespace code point offset and continue searching
262
                        // for more whitspace byte sequences from their end. If we don't do this,
263
                        // we will end up trimming away continuation bytes at the end of the string.
264
                        end = endSearch - 1;
×
265
                    }
266
                    else
267
                    {
268
                        break;
269
                    }
270
                }
271
            }
272

273
            return U8Marshal.Slice(source, start, end - start + 1);
×
274
        }
275

276
        return default;
×
277
    }
278

279
    /// <summary>
280
    /// Removes all leading whitespace characters from the current string.
281
    /// </summary>
282
    /// <returns>
283
    /// A sub-slice that remains after all whitespace characters
284
    /// are removed from the start of the current string.
285
    /// </returns>
286
    public U8String TrimStart()
287
    {
288
        var source = this;
×
289
        if (!source.IsEmpty)
×
290
        {
291
            ref var ptr = ref source.UnsafeRef;
×
292
            var b = ptr;
×
293

294
            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
×
295
            {
296
                return source;
×
297
            }
298

299
            var start = 0;
×
300
            while (start < source.Length)
×
301
            {
302
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
303
                {
304
                    break;
305
                }
306
                start += size;
×
307
            }
308

309
            return U8Marshal.Slice(source, start);
×
310
        }
311

312
        return default;
×
313
    }
314

315
    /// <summary>
316
    /// Removes all trailing whitespace characters from the current string.
317
    /// </summary>
318
    /// <returns>
319
    /// A sub-slice that remains after all whitespace characters
320
    /// are removed from the end of the current string.
321
    /// </returns>
322
    public U8String TrimEnd()
323
    {
324
        var source = this;
×
325
        if (!source.IsEmpty)
×
326
        {
327
            ref var ptr = ref source.UnsafeRef;
×
328

329
            var end = source.Length - 1;
×
330
            for (var endSearch = end; endSearch >= 0; endSearch--)
×
331
            {
332
                var b = ptr.Add(endSearch);
×
333
                if (!U8Info.IsContinuationByte(b))
×
334
                {
335
                    if (U8Info.IsAsciiByte(b)
×
336
                        ? U8Info.IsAsciiWhitespace(b)
×
337
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
338
                    {
339
                        end = endSearch - 1;
×
340
                    }
341
                    else
342
                    {
343
                        break;
344
                    }
345
                }
346
            }
347

348
            return U8Marshal.Slice(source, 0, end + 1);
×
349
        }
350

351
        return default;
×
352
    }
353

354
    /// <summary>
355
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
356
    /// </summary>
357
    /// <returns>
358
    /// A sub-slice that remains after all ASCII whitespace characters
359
    /// are removed from the start and end of the current string.
360
    /// </returns>
361
    public U8String TrimAscii()
362
    {
363
        var source = this;
×
364
        var range = Ascii.Trim(source);
×
365

366
        return U8Marshal.Slice(source, range);
×
367
    }
368

369
    /// <summary>
370
    /// Removes all the leading ASCII whitespace characters from the current string.
371
    /// </summary>
372
    /// <returns>
373
    /// A sub-slice that remains after all whitespace characters
374
    /// are removed from the start of the current string.
375
    /// </returns>
376
    public U8String TrimStartAscii()
377
    {
378
        var source = this;
×
379
        var range = Ascii.TrimStart(source);
×
380

381
        return U8Marshal.Slice(source, range);
×
382
    }
383

384
    /// <summary>
385
    /// Removes all the trailing ASCII whitespace characters from the current string.
386
    /// </summary>
387
    /// <returns>
388
    /// A sub-slice that remains after all whitespace characters
389
    /// are removed from the end of the current string.
390
    /// </returns>
391
    public U8String TrimEndAscii()
392
    {
393
        var source = this;
×
394
        var range = Ascii.TrimEnd(source);
×
395

396
        return U8Marshal.Slice(source, range);
×
397
    }
398

399
    // TODO:
400
    // - Complete impl. depends on porting of InlineArray-based array builder for letters
401
    // which have different lengths in upper/lower case.
402
    // - Remove/rename to ToLowerFallback or move to something like "FallbackInvariantComparer"
403
    // clearly indicating it being slower and inferior alternative to proper implementations
404
    // which call into ICU/NLS/Hybrid-provided case change exports.
405
    public U8String ToLower<T>(T converter)
406
        where T : IU8CaseConverter
407
    {
408
        // 1. Estimate the start offset of the conversion (first char requiring case change)
409
        // 2. Estimate the length of the conversion (the length of the resulting segment after case change)
410
        // 3. Allocate the resulting buffer and copy the pre-offset segment
411
        // 4. Perform the conversion which writes to the remainder segment of the buffer
412
        // 5. Return the resulting buffer as a new string
413

414
        var deref = this;
×
415
        if (!deref.IsEmpty)
×
416
        {
417
            var source = deref.UnsafeSpan;
×
418

419
            var (replaceStart, resultLength) = converter.LowercaseHint(source);
×
420

421
            if ((uint)replaceStart < (uint)source.Length)
×
422
            {
423
                var lowercase = new byte[resultLength];
×
424
                var destination = lowercase.AsSpan();
×
425

426
                source[..replaceStart].CopyTo(destination);
×
427
                source = source.Slice(replaceStart);
×
428
                destination = destination.Slice(replaceStart);
×
429

430
                var convertedLength = converter.ToLower(source, destination);
×
431

432
                return new U8String(lowercase, 0, replaceStart + convertedLength);
×
433
            }
434
        }
435

436
        return deref;
×
437
    }
438

439
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
440
    public U8String ToUpper<T>(T converter)
441
        where T : IU8CaseConverter
442
    {
443
        var deref = this;
×
444
        if (!deref.IsEmpty)
×
445
        {
446
            var source = deref.UnsafeSpan;
×
447
            var (replaceStart, resultLength) = converter.UppercaseHint(source);
×
448

449
            if ((uint)replaceStart < (uint)source.Length)
×
450
            {
451
                var uppercase = new byte[resultLength];
×
452
                var destination = uppercase.AsSpan();
×
453

454
                source[..replaceStart].CopyTo(destination);
×
455
                source = source.Slice(replaceStart);
×
456
                destination = destination.Slice(replaceStart);
×
457

458
                var convertedLength = converter.ToUpper(source, destination);
×
459

460
                return new U8String(uppercase, 0, replaceStart + convertedLength);
×
461
            }
462
        }
463

464
        return deref;
×
465
    }
466

467
    // TODO: docs
468
    // TODO 2: scan for lower/uppercase chars and only allocate if there are any
469
    public U8String ToLowerAscii()
470
    {
471
        var source = this;
×
472
        if (source.Length > 0)
×
473
        {
474
            var destination = new byte[source.Length];
×
475

476
            U8Manipulation.ToLowerAscii(
×
477
                ref source.UnsafeRef,
×
478
                ref MemoryMarshal.GetArrayDataReference(destination),
×
479
                (uint)source.Length);
×
480

481
            return new(destination, 0, source.Length);
×
482
        }
483

484
        return default;
×
485
    }
486

487
    public U8String ToUpperAscii()
488
    {
489
        var source = this;
×
490
        if (source.Length > 0)
×
491
        {
492
            var destination = new byte[source.Length];
×
493

494
            U8Manipulation.ToUpperAscii(
×
495
                ref source.UnsafeRef,
×
496
                ref MemoryMarshal.GetArrayDataReference(destination),
×
497
                (uint)source.Length);
×
498

499
            return new(destination, 0, source.Length);
×
500
        }
501

502
        return default;
×
503
    }
504
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc