• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5929604283

21 Aug 2023 05:34PM UTC coverage: 20.165% (+0.2%) from 19.993%
5929604283

push

github

neon-sunset
feat: dedup concat, add concat byte

122 of 858 branches covered (14.22%)

Branch coverage included in aggregate %.

11 of 11 new or added lines in 3 files covered. (100.0%)

439 of 1924 relevant lines covered (22.82%)

26556.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/U8String.Manipulation.cs
1
using System.Buffers;
2
using System.Diagnostics.CodeAnalysis;
3
using System.Runtime.InteropServices;
4
using System.Text;
5
using U8Primitives.InteropServices;
6

7
namespace U8Primitives;
8

9
#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
10
public readonly partial struct U8String
11
{
12
    // TODO: Optimize/deduplicate Concat variants
13
    // TODO: Investigate if it is possible fold validation for u8 literals
14
    public static U8String Concat(U8String left, U8String right)
15
    {
16
        if (!left.IsEmpty)
×
17
        {
18
            if (!right.IsEmpty)
×
19
            {
20
                return U8Manipulation.ConcatUnchecked(
×
21
                    left.UnsafeSpan,
×
22
                    right.UnsafeSpan);
×
23
            }
24

25
            return left;
×
26
        }
27

28
        return right;
×
29
    }
30

31
    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
32
    {
33
        if (!right.IsEmpty)
×
34
        {
35
            Validate(right);
×
36
            if (!left.IsEmpty)
×
37
            {
38
                return U8Manipulation.ConcatUnchecked(left.UnsafeSpan, right);
×
39
            }
40

41
            return new U8String(right, skipValidation: true);
×
42
        }
43

44
        return left;
×
45
    }
46

47
    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
48
    {
49
        if (!left.IsEmpty)
×
50
        {
51
            Validate(left);
×
52
            if (!right.IsEmpty)
×
53
            {
54
                return U8Manipulation.ConcatUnchecked(left, right.UnsafeSpan);
×
55
            }
56

57
            return new U8String(left, skipValidation: true);
×
58
        }
59

60
        return right;
×
61
    }
62

63
    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
64
    {
65
        var length = left.Length + right.Length;
×
66
        if (length != 0)
×
67
        {
68
            var value = new byte[length];
×
69

70
            left.CopyTo(value);
×
71
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));
×
72

73
            Validate(value);
×
74
            return new U8String(value, 0, length);
×
75
        }
76

77
        return default;
×
78
    }
79

80
    /// <summary>
81
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
82
    /// </summary>
83
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
84
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
85
    {
86
        throw new NotImplementedException();
×
87
    }
88

89
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
90
    public U8String Replace(byte oldValue, byte newValue)
91
    {
92
        return U8Manipulation.Replace(this, oldValue, newValue);
×
93
    }
94

95
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
96
    public U8String Replace(char oldValue, char newValue)
97
    {
98
        return U8Manipulation.Replace(this, oldValue, newValue);
×
99
    }
100

101
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
102
    public U8String Replace(Rune oldValue, Rune newValue)
103
    {
104
        return U8Manipulation.Replace(this, oldValue, newValue);
×
105
    }
106

107
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
108
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
109
    {
110
        return U8Manipulation.Replace(this, oldValue, newValue);
×
111
    }
112

113
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
114
    public U8String Replace(U8String oldValue, U8String newValue)
115
    {
116
        return U8Manipulation.ReplaceUnchecked(this, oldValue, newValue);
×
117
    }
118

119
    /// <inheritdoc />
120
    public void CopyTo(byte[] destination, int index)
121
    {
122
        var src = this;
×
123
        var dst = destination.AsSpan()[index..];
×
124
        if (src.Length > dst.Length)
×
125
        {
126
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
×
127
        }
128

129
        src.UnsafeSpan.CopyTo(dst);
×
130
    }
×
131

132
    /// <summary>
133
    /// Retrieves a substring from this instance. The substring starts at a specified
134
    /// character position and continues to the end of the string.
135
    /// </summary>
136
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
137
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
138
    /// <exception cref="ArgumentOutOfRangeException">
139
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
140
    /// </exception>
141
    /// <exception cref="ArgumentException">
142
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
143
    /// </exception>
144
    public U8String Slice(int start)
145
    {
146
        var source = this;
×
147
        // From ReadOnly/Span<T> Slice(int) implementation
148
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
×
149
        {
150
            ThrowHelpers.ArgumentOutOfRange();
×
151
        }
152

153
        var length = source.Length - start;
×
154
        if (length > 0)
×
155
        {
156
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
×
157
            {
158
                ThrowHelpers.InvalidSplit();
×
159
            }
160

161
            return new(source._value, source.Offset + start, length);
×
162
        }
163

164
        return default;
×
165
    }
166

167
    /// <summary>
168
    /// Retrieves a substring from this instance. The substring starts at a specified
169
    /// character position and has a specified length.
170
    /// </summary>
171
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
172
    /// <param name="length">The number of bytes in the substring.</param>
173
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
174
    /// <exception cref="ArgumentOutOfRangeException">
175
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
176
    /// </exception>
177
    /// <exception cref="ArgumentException">
178
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
179
    /// </exception>
180
    public U8String Slice(int start, int length)
181
    {
182
        var source = this;
×
183
        // From ReadOnly/Span<T> Slice(int, int) implementation
184
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
×
185
        {
186
            ThrowHelpers.ArgumentOutOfRange();
×
187
        }
188

189
        var result = default(U8String);
×
190
        if (length > 0)
×
191
        {
192
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
193
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
×
194
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
×
195
            {
196
                // TODO: Exception message UX
197
                ThrowHelpers.InvalidSplit();
×
198
            }
199

200
            result = new(source._value, source.Offset + start, length);
×
201
        }
202

203
        return result;
×
204
    }
205

206
    /// <summary>
207
    /// Removes all leading and trailing whitespace characters from the current string.
208
    /// </summary>
209
    /// <returns>
210
    /// A sub-slice that remains after all whitespace characters
211
    /// are removed from the start and end of the current string.
212
    /// </returns>
213
    public U8String Trim()
214
    {
215
        // TODO: Optimize fast path on no whitespace
216
        // TODO 2: Do not convert to runes and have proper
217
        // whitespace LUT to evaluate code points in a branchless way
218
        var source = this;
×
219
        if (!source.IsEmpty)
×
220
        {
221
            ref var ptr = ref source.UnsafeRef;
×
222

223
            var start = 0;
×
224
            while (start < source.Length)
×
225
            {
226
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
227
                {
228
                    break;
229
                }
230
                start += size;
×
231
            }
232

233
            var end = source.Length - 1;
×
234
            for (var endSearch = end; endSearch >= start; endSearch--)
×
235
            {
236
                var b = ptr.Add(endSearch);
×
237
                if (!U8Info.IsContinuationByte(b))
×
238
                {
239
                    if (U8Info.IsAsciiByte(b)
×
240
                        ? U8Info.IsAsciiWhitespace(b)
×
241
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
242
                    {
243
                        // Save the last found whitespace code point offset and continue searching
244
                        // for more whitspace byte sequences from their end. If we don't do this,
245
                        // we will end up trimming away continuation bytes at the end of the string.
246
                        end = endSearch - 1;
×
247
                    }
248
                    else
249
                    {
250
                        break;
251
                    }
252
                }
253
            }
254

255
            return U8Marshal.Slice(source, start, end - start + 1);
×
256
        }
257

258
        return default;
×
259
    }
260

261
    /// <summary>
262
    /// Removes all leading whitespace characters from the current string.
263
    /// </summary>
264
    /// <returns>
265
    /// A sub-slice that remains after all whitespace characters
266
    /// are removed from the start of the current string.
267
    /// </returns>
268
    public U8String TrimStart()
269
    {
270
        var source = this;
×
271
        if (!source.IsEmpty)
×
272
        {
273
            ref var ptr = ref source.UnsafeRef;
×
274
            var b = ptr;
×
275

276
            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
×
277
            {
278
                return source;
×
279
            }
280

281
            var start = 0;
×
282
            while (start < source.Length)
×
283
            {
284
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
285
                {
286
                    break;
287
                }
288
                start += size;
×
289
            }
290

291
            return U8Marshal.Slice(source, start);
×
292
        }
293

294
        return default;
×
295
    }
296

297
    /// <summary>
298
    /// Removes all trailing whitespace characters from the current string.
299
    /// </summary>
300
    /// <returns>
301
    /// A sub-slice that remains after all whitespace characters
302
    /// are removed from the end of the current string.
303
    /// </returns>
304
    public U8String TrimEnd()
305
    {
306
        var source = this;
×
307
        if (!source.IsEmpty)
×
308
        {
309
            ref var ptr = ref source.UnsafeRef;
×
310

311
            var end = source.Length - 1;
×
312
            for (var endSearch = end; endSearch >= 0; endSearch--)
×
313
            {
314
                var b = ptr.Add(endSearch);
×
315
                if (!U8Info.IsContinuationByte(b))
×
316
                {
317
                    if (U8Info.IsAsciiByte(b)
×
318
                        ? U8Info.IsAsciiWhitespace(b)
×
319
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
320
                    {
321
                        end = endSearch - 1;
×
322
                    }
323
                    else
324
                    {
325
                        break;
326
                    }
327
                }
328
            }
329

330
            return U8Marshal.Slice(source, 0, end + 1);
×
331
        }
332

333
        return default;
×
334
    }
335

336
    /// <summary>
337
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
338
    /// </summary>
339
    /// <returns>
340
    /// A sub-slice that remains after all ASCII whitespace characters
341
    /// are removed from the start and end of the current string.
342
    /// </returns>
343
    public U8String TrimAscii()
344
    {
345
        var source = this;
×
346
        var range = Ascii.Trim(source);
×
347

348
        return U8Marshal.Slice(source, range);
×
349
    }
350

351
    /// <summary>
352
    /// Removes all the leading ASCII whitespace characters from the current string.
353
    /// </summary>
354
    /// <returns>
355
    /// A sub-slice that remains after all whitespace characters
356
    /// are removed from the start of the current string.
357
    /// </returns>
358
    public U8String TrimStartAscii()
359
    {
360
        var source = this;
×
361
        var range = Ascii.TrimStart(source);
×
362

363
        return U8Marshal.Slice(source, range);
×
364
    }
365

366
    /// <summary>
367
    /// Removes all the trailing ASCII whitespace characters from the current string.
368
    /// </summary>
369
    /// <returns>
370
    /// A sub-slice that remains after all whitespace characters
371
    /// are removed from the end of the current string.
372
    /// </returns>
373
    public U8String TrimEndAscii()
374
    {
375
        var source = this;
×
376
        var range = Ascii.TrimEnd(source);
×
377

378
        return U8Marshal.Slice(source, range);
×
379
    }
380

381
    // TODO:
382
    // - Complete impl. depends on porting of InlineArray-based array builder for letters
383
    // which have different lengths in upper/lower case.
384
    // - Remove/rename to ToLowerFallback or move to something like "FallbackInvariantComparer"
385
    // clearly indicating it being slower and inferior alternative to proper implementations
386
    // which call into ICU/NLS/Hybrid-provided case change exports.
387
    public U8String ToLower()
388
    {
389
        var source = this;
×
390
        if (source.Length > 0)
×
391
        {
392
            var lowercase = new byte[source.Length];
×
393
            var destination = lowercase.AsSpan();
×
394

395
            var result = Ascii.ToLower(source, destination, out var consumed);
×
396
            if (result is OperationStatus.InvalidData)
×
397
            {
398
                foreach (var rune in U8Marshal.Slice(source, consumed).Runes)
×
399
                {
400
                    var lower = Rune.ToLowerInvariant(rune);
×
401
                    var (scalar, length) = U8Conversions.RuneToCodepoint(lower);
×
402
                    if (consumed + 4 > destination.Length)
×
403
                    {
404
                        [DoesNotReturn]
405
                        static void Unimpl()
406
                        {
407
                            throw new NotImplementedException();
×
408
                        }
409

410
                        Unimpl();
×
411
                    }
412

413
                    Unsafe.As<byte, uint>(ref destination.AsRef(consumed)) = scalar;
×
414
                    consumed += length;
×
415
                }
416
            }
417

418
            return new(lowercase, 0, consumed);
×
419
        }
420

421
        return default;
×
422
    }
423

424
    public U8String ToUpper()
425
    {
426
        var source = this;
×
427
        if (source.Length > 0)
×
428
        {
429
            var uppercase = new byte[source.Length + 3];
×
430
            var destination = uppercase.AsSpan();
×
431

432
            var result = Ascii.ToUpper(source, destination, out var consumed);
×
433
            if (result is OperationStatus.InvalidData)
×
434
            {
435
                foreach (var rune in U8Marshal.Slice(source, consumed).Runes)
×
436
                {
437
                    var upper = Rune.ToUpperInvariant(rune);
×
438
                    var (scalar, length) = U8Conversions.RuneToCodepoint(upper);
×
439
                    if (consumed + 4 > destination.Length)
×
440
                    {
441
                        [DoesNotReturn]
442
                        static void Unimpl()
443
                        {
444
                            throw new NotImplementedException();
×
445
                        }
446

447
                        Unimpl();
×
448
                    }
449

450
                    Unsafe.As<byte, uint>(ref destination.AsRef(consumed)) = scalar;
×
451
                    consumed += length;
×
452
                }
453
            }
454

455
            return new(uppercase, 0, consumed);
×
456
        }
457

458
        return default;
×
459
    }
460

461
    // TODO: docs
462
    public U8String ToLowerAscii()
463
    {
464
        var source = this;
×
465
        if (source.Length > 0)
×
466
        {
467
            var destination = new byte[source.Length];
×
468

469
            U8Manipulation.ToLowerAscii(
×
470
                ref source.UnsafeRef,
×
471
                ref MemoryMarshal.GetArrayDataReference(destination),
×
472
                (uint)source.Length);
×
473

474
            return new(destination, 0, source.Length);
×
475
        }
476

477
        return default;
×
478
    }
479

480
    public U8String ToUpperAscii()
481
    {
482
        var source = this;
×
483
        if (source.Length > 0)
×
484
        {
485
            var destination = new byte[source.Length];
×
486

487
            U8Manipulation.ToUpperAscii(
×
488
                ref source.UnsafeRef,
×
489
                ref MemoryMarshal.GetArrayDataReference(destination),
×
490
                (uint)source.Length);
×
491

492
            return new(destination, 0, source.Length);
×
493
        }
494

495
        return default;
×
496
    }
497
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc