• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5914393035

20 Aug 2023 12:40AM UTC coverage: 21.769% (-0.3%) from 22.078%
5914393035

push

github

neon-sunset
perf: optimize ascii case change

122 of 796 branches covered (15.33%)

Branch coverage included in aggregate %.

118 of 118 new or added lines in 2 files covered. (100.0%)

439 of 1781 relevant lines covered (24.65%)

28688.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/U8String.Manipulation.cs
1
using System.Runtime.InteropServices;
2
using System.Text;
3
using U8Primitives.InteropServices;
4

5
namespace U8Primitives;
6

7
#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
8
public readonly partial struct U8String
9
{
10
    // TODO: Optimize/deduplicate Concat variants
11
    // TODO: Investigate if it is possible fold validation for u8 literals
12
    public static U8String Concat(U8String left, U8String right)
13
    {
14
        if (!left.IsEmpty)
×
15
        {
16
            if (!right.IsEmpty)
×
17
            {
18
                var length = left.Length + right.Length;
×
19
                var value = new byte[length];
×
20

21
                left.UnsafeSpan.CopyTo(value);
×
22
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
23

24
                return new U8String(value, 0, length);
×
25
            }
26

27
            return left;
×
28
        }
29

30
        return right;
×
31
    }
32

33
    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
34
    {
35
        if (!right.IsEmpty)
×
36
        {
37
            Validate(right);
×
38
            if (!left.IsEmpty)
×
39
            {
40
                var length = left.Length + right.Length;
×
41
                var value = new byte[length];
×
42

43
                left.UnsafeSpan.CopyTo(value);
×
44
                right.CopyTo(value.AsSpan(left.Length));
×
45

46
                return new U8String(value, 0, length);
×
47
            }
48

49
            return new U8String(right, skipValidation: true);
×
50
        }
51

52
        return left;
×
53
    }
54

55
    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
56
    {
57
        if (!left.IsEmpty)
×
58
        {
59
            Validate(left);
×
60
            if (!right.IsEmpty)
×
61
            {
62
                var length = left.Length + right.Length;
×
63
                var value = new byte[length];
×
64

65
                left.CopyTo(value);
×
66
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
67

68
                return new U8String(value, 0, length);
×
69
            }
70

71
            return new U8String(left, skipValidation: true);
×
72
        }
73

74
        return right;
×
75
    }
76

77
    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
78
    {
79
        var length = left.Length + right.Length;
×
80
        if (length != 0)
×
81
        {
82
            var value = new byte[length];
×
83

84
            left.CopyTo(value);
×
85
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));
×
86

87
            Validate(value);
×
88
            return new U8String(value, 0, length);
×
89
        }
90

91
        return default;
×
92
    }
93

94
    /// <summary>
95
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
96
    /// </summary>
97
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
98
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
99
    {
100
        throw new NotImplementedException();
×
101
    }
102

103
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
104
    public U8String Replace(byte oldValue, byte newValue)
105
    {
106
        return U8Manipulation.Replace(this, oldValue, newValue);
×
107
    }
108

109
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
110
    public U8String Replace(char oldValue, char newValue)
111
    {
112
        return U8Manipulation.Replace(this, oldValue, newValue);
×
113
    }
114

115
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
116
    public U8String Replace(Rune oldValue, Rune newValue)
117
    {
118
        return U8Manipulation.Replace(this, oldValue, newValue);
×
119
    }
120

121
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
122
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
123
    {
124
        return U8Manipulation.Replace(this, oldValue, newValue);
×
125
    }
126

127
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
128
    public U8String Replace(U8String oldValue, U8String newValue)
129
    {
130
        return U8Manipulation.ReplaceUnchecked(this, oldValue, newValue);
×
131
    }
132

133
    /// <inheritdoc />
134
    public void CopyTo(byte[] destination, int index)
135
    {
136
        var src = this;
×
137
        var dst = destination.AsSpan()[index..];
×
138
        if (src.Length > dst.Length)
×
139
        {
140
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
×
141
        }
142

143
        src.UnsafeSpan.CopyTo(dst);
×
144
    }
×
145

146
    /// <summary>
147
    /// Retrieves a substring from this instance. The substring starts at a specified
148
    /// character position and continues to the end of the string.
149
    /// </summary>
150
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
151
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
152
    /// <exception cref="ArgumentOutOfRangeException">
153
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
154
    /// </exception>
155
    /// <exception cref="ArgumentException">
156
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
157
    /// </exception>
158
    public U8String Slice(int start)
159
    {
160
        var source = this;
×
161
        // From ReadOnly/Span<T> Slice(int) implementation
162
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
×
163
        {
164
            ThrowHelpers.ArgumentOutOfRange();
×
165
        }
166

167
        var length = source.Length - start;
×
168
        if (length > 0)
×
169
        {
170
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
×
171
            {
172
                ThrowHelpers.InvalidSplit();
×
173
            }
174

175
            return new(source._value, source.Offset + start, length);
×
176
        }
177

178
        return default;
×
179
    }
180

181
    /// <summary>
182
    /// Retrieves a substring from this instance. The substring starts at a specified
183
    /// character position and has a specified length.
184
    /// </summary>
185
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
186
    /// <param name="length">The number of bytes in the substring.</param>
187
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
188
    /// <exception cref="ArgumentOutOfRangeException">
189
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
190
    /// </exception>
191
    /// <exception cref="ArgumentException">
192
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
193
    /// </exception>
194
    public U8String Slice(int start, int length)
195
    {
196
        var source = this;
×
197
        // From ReadOnly/Span<T> Slice(int, int) implementation
198
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
×
199
        {
200
            ThrowHelpers.ArgumentOutOfRange();
×
201
        }
202

203
        var result = default(U8String);
×
204
        if (length > 0)
×
205
        {
206
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
207
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
×
208
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
×
209
            {
210
                // TODO: Exception message UX
211
                ThrowHelpers.InvalidSplit();
×
212
            }
213

214
            result = new(source._value, source.Offset + start, length);
×
215
        }
216

217
        return result;
×
218
    }
219

220
    /// <summary>
221
    /// Removes all leading and trailing whitespace characters from the current string.
222
    /// </summary>
223
    /// <returns>
224
    /// A sub-slice that remains after all whitespace characters
225
    /// are removed from the start and end of the current string.
226
    /// </returns>
227
    public U8String Trim()
228
    {
229
        // TODO: Optimize fast path on no whitespace
230
        // TODO 2: Do not convert to runes and have proper
231
        // whitespace LUT to evaluate code points in a branchless way
232
        var source = this;
×
233
        if (!source.IsEmpty)
×
234
        {
235
            ref var ptr = ref source.UnsafeRef;
×
236

237
            var start = 0;
×
238
            while (start < source.Length)
×
239
            {
240
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
241
                {
242
                    break;
243
                }
244
                start += size;
×
245
            }
246

247
            var end = source.Length - 1;
×
248
            for (var endSearch = end; endSearch >= start; endSearch--)
×
249
            {
250
                var b = ptr.Add(endSearch);
×
251
                if (!U8Info.IsContinuationByte(b))
×
252
                {
253
                    if (U8Info.IsAsciiByte(b)
×
254
                        ? U8Info.IsAsciiWhitespace(b)
×
255
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
256
                    {
257
                        // Save the last found whitespace code point offset and continue searching
258
                        // for more whitspace byte sequences from their end. If we don't do this,
259
                        // we will end up trimming away continuation bytes at the end of the string.
260
                        end = endSearch - 1;
×
261
                    }
262
                    else
263
                    {
264
                        break;
265
                    }
266
                }
267
            }
268

269
            return U8Marshal.Slice(source, start, end - start + 1);
×
270
        }
271

272
        return default;
×
273
    }
274

275
    /// <summary>
276
    /// Removes all leading whitespace characters from the current string.
277
    /// </summary>
278
    /// <returns>
279
    /// A sub-slice that remains after all whitespace characters
280
    /// are removed from the start of the current string.
281
    /// </returns>
282
    public U8String TrimStart()
283
    {
284
        var source = this;
×
285
        if (!source.IsEmpty)
×
286
        {
287
            ref var ptr = ref source.UnsafeRef;
×
288
            var b = ptr;
×
289

290
            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
×
291
            {
292
                return source;
×
293
            }
294

295
            var start = 0;
×
296
            while (start < source.Length)
×
297
            {
298
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
×
299
                {
300
                    break;
301
                }
302
                start += size;
×
303
            }
304

305
            return U8Marshal.Slice(source, start);
×
306
        }
307

308
        return default;
×
309
    }
310

311
    /// <summary>
312
    /// Removes all trailing whitespace characters from the current string.
313
    /// </summary>
314
    /// <returns>
315
    /// A sub-slice that remains after all whitespace characters
316
    /// are removed from the end of the current string.
317
    /// </returns>
318
    public U8String TrimEnd()
319
    {
320
        var source = this;
×
321
        if (!source.IsEmpty)
×
322
        {
323
            ref var ptr = ref source.UnsafeRef;
×
324

325
            var end = source.Length - 1;
×
326
            for (var endSearch = end; endSearch >= 0; endSearch--)
×
327
            {
328
                var b = ptr.Add(endSearch);
×
329
                if (!U8Info.IsContinuationByte(b))
×
330
                {
331
                    if (U8Info.IsAsciiByte(b)
×
332
                        ? U8Info.IsAsciiWhitespace(b)
×
333
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
×
334
                    {
335
                        end = endSearch - 1;
×
336
                    }
337
                    else
338
                    {
339
                        break;
340
                    }
341
                }
342
            }
343

344
            return U8Marshal.Slice(source, 0, end + 1);
×
345
        }
346

347
        return default;
×
348
    }
349

350
    /// <summary>
351
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
352
    /// </summary>
353
    /// <returns>
354
    /// A sub-slice that remains after all ASCII whitespace characters
355
    /// are removed from the start and end of the current string.
356
    /// </returns>
357
    public U8String TrimAscii()
358
    {
359
        var source = this;
×
360
        var range = Ascii.Trim(source);
×
361

362
        return !range.IsEmpty()
×
363
            ? U8Marshal.Slice(source, range)
×
364
            : default;
×
365
    }
366

367
    /// <summary>
368
    /// Removes all the leading ASCII whitespace characters from the current string.
369
    /// </summary>
370
    /// <returns>
371
    /// A sub-slice that remains after all whitespace characters
372
    /// are removed from the start of the current string.
373
    /// </returns>
374
    public U8String TrimStartAscii()
375
    {
376
        var source = this;
×
377
        var range = Ascii.TrimStart(source);
×
378

379
        return !range.IsEmpty()
×
380
            ? U8Marshal.Slice(source, range)
×
381
            : default;
×
382
    }
383

384
    /// <summary>
385
    /// Removes all the trailing ASCII whitespace characters from the current string.
386
    /// </summary>
387
    /// <returns>
388
    /// A sub-slice that remains after all whitespace characters
389
    /// are removed from the end of the current string.
390
    /// </returns>
391
    public U8String TrimEndAscii()
392
    {
393
        var source = this;
×
394
        var range = Ascii.TrimEnd(source);
×
395

396
        return !range.IsEmpty()
×
397
            ? U8Marshal.Slice(source, range)
×
398
            : default;
×
399
    }
400

401
    // TODO: docs
402
    public U8String ToLowerAscii()
403
    {
404
        var source = this;
×
405
        if (source.Length > 0)
×
406
        {
407
            var destination = new byte[source.Length];
×
408

409
            U8Manipulation.ToLowerAscii(
×
410
                ref source.UnsafeRef,
×
411
                ref MemoryMarshal.GetArrayDataReference(destination),
×
412
                (uint)source.Length);
×
413

414
            return new(destination, 0, source.Length);
×
415
        }
416

417
        return default;
×
418
    }
419

420
    public U8String ToUpperAscii()
421
    {
422
        var source = this;
×
423
        if (source.Length > 0)
×
424
        {
425
            var destination = new byte[source.Length];
×
426

427
            U8Manipulation.ToUpperAscii(
×
428
                ref source.UnsafeRef,
×
429
                ref MemoryMarshal.GetArrayDataReference(destination),
×
430
                (uint)source.Length);
×
431

432
            return new(destination, 0, source.Length);
×
433
        }
434

435
        return default;
×
436
    }
437
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc