• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5898062255

18 Aug 2023 02:15AM UTC coverage: 21.646% (+1.8%) from 19.883%
5898062255

push

github

neon-sunset
feat: more whitespace work, fix trimming

115 of 776 branches covered (14.82%)

Branch coverage included in aggregate %.

149 of 149 new or added lines in 6 files covered. (100.0%)

411 of 1654 relevant lines covered (24.85%)

17239.56 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/U8String.Manipulation.cs
1
using System.Buffers;
2
using System.Text;
3
using U8Primitives.InteropServices;
4

5
namespace U8Primitives;
6

7
#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
8
public readonly partial struct U8String
9
{
10
    // TODO: Optimize/deduplicate Concat variants
11
    // TODO: Investigate if it is possible fold validation for u8 literals
12
    public static U8String Concat(U8String left, U8String right)
13
    {
14
        if (!left.IsEmpty)
×
15
        {
16
            if (!right.IsEmpty)
×
17
            {
18
                var length = left.Length + right.Length;
×
19
                var value = new byte[length];
×
20

21
                left.UnsafeSpan.CopyTo(value);
×
22
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
23

24
                return new U8String(value, 0, length);
×
25
            }
26

27
            return left;
×
28
        }
29

30
        return right;
×
31
    }
32

33
    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
34
    {
35
        if (!right.IsEmpty)
×
36
        {
37
            Validate(right);
×
38
            if (!left.IsEmpty)
×
39
            {
40
                var length = left.Length + right.Length;
×
41
                var value = new byte[length];
×
42

43
                left.UnsafeSpan.CopyTo(value);
×
44
                right.CopyTo(value.AsSpan(left.Length));
×
45

46
                return new U8String(value, 0, length);
×
47
            }
48

49
            return new U8String(right, skipValidation: true);
×
50
        }
51

52
        return left;
×
53
    }
54

55
    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
56
    {
57
        if (!left.IsEmpty)
×
58
        {
59
            Validate(left);
×
60
            if (!right.IsEmpty)
×
61
            {
62
                var length = left.Length + right.Length;
×
63
                var value = new byte[length];
×
64

65
                left.CopyTo(value);
×
66
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
67

68
                return new U8String(value, 0, length);
×
69
            }
70

71
            return new U8String(left, skipValidation: true);
×
72
        }
73

74
        return right;
×
75
    }
76

77
    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
78
    {
79
        var length = left.Length + right.Length;
×
80
        if (length != 0)
×
81
        {
82
            var value = new byte[length];
×
83

84
            left.CopyTo(value);
×
85
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));
×
86

87
            Validate(value);
×
88
            return new U8String(value, 0, length);
×
89
        }
90

91
        return default;
×
92
    }
93

94
    /// <summary>
95
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
96
    /// </summary>
97
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
98
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
99
    {
100
        throw new NotImplementedException();
×
101
    }
102

103
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
104
    public U8String Replace(byte oldValue, byte newValue)
105
    {
106
        return U8Manipulation.Replace(this, oldValue, newValue);
×
107
    }
108

109
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
110
    public U8String Replace(char oldValue, char newValue)
111
    {
112
        return U8Manipulation.Replace(this, oldValue, newValue);
×
113
    }
114

115
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
116
    public U8String Replace(Rune oldValue, Rune newValue)
117
    {
118
        return U8Manipulation.Replace(this, oldValue, newValue);
×
119
    }
120

121
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
122
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
123
    {
124
        return U8Manipulation.Replace(this, oldValue, newValue);
×
125
    }
126

127
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
128
    public U8String Replace(U8String oldValue, U8String newValue)
129
    {
130
        return U8Manipulation.ReplaceUnchecked(this, oldValue, newValue);
×
131
    }
132

133
    /// <inheritdoc />
134
    public void CopyTo(byte[] destination, int index)
135
    {
136
        var src = this;
×
137
        var dst = destination.AsSpan()[index..];
×
138
        if (src.Length > dst.Length)
×
139
        {
140
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
×
141
        }
142

143
        src.UnsafeSpan.CopyTo(dst);
×
144
    }
×
145

146
    /// <summary>
147
    /// Retrieves a substring from this instance. The substring starts at a specified
148
    /// character position and continues to the end of the string.
149
    /// </summary>
150
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
151
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
152
    /// <exception cref="ArgumentOutOfRangeException">
153
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
154
    /// </exception>
155
    /// <exception cref="ArgumentException">
156
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
157
    /// </exception>
158
    public U8String Slice(int start)
159
    {
160
        var source = this;
×
161
        // From ReadOnly/Span<T> Slice(int) implementation
162
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
×
163
        {
164
            ThrowHelpers.ArgumentOutOfRange();
×
165
        }
166

167
        var length = source.Length - start;
×
168
        if (length > 0)
×
169
        {
170
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
×
171
            {
172
                ThrowHelpers.InvalidSplit();
×
173
            }
174

175
            return new(source._value, source.Offset + start, length);
×
176
        }
177

178
        return default;
×
179
    }
180

181
    /// <summary>
182
    /// Retrieves a substring from this instance. The substring starts at a specified
183
    /// character position and has a specified length.
184
    /// </summary>
185
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
186
    /// <param name="length">The number of bytes in the substring.</param>
187
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
188
    /// <exception cref="ArgumentOutOfRangeException">
189
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
190
    /// </exception>
191
    /// <exception cref="ArgumentException">
192
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
193
    /// </exception>
194
    public U8String Slice(int start, int length)
195
    {
196
        var source = this;
×
197
        // From ReadOnly/Span<T> Slice(int, int) implementation
198
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
×
199
        {
200
            ThrowHelpers.ArgumentOutOfRange();
×
201
        }
202

203
        var result = default(U8String);
×
204
        if (length > 0)
×
205
        {
206
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
207
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
×
208
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
×
209
            {
210
                // TODO: Exception message UX
211
                ThrowHelpers.InvalidSplit();
×
212
            }
213

214
            result = new(source._value, source.Offset + start, length);
×
215
        }
216

217
        return result;
×
218
    }
219

220
    /// <summary>
221
    /// Removes all leading and trailing whitespace characters from the current string.
222
    /// </summary>
223
    /// <returns>
224
    /// A sub-slice that remains after all whitespace characters
225
    /// are removed from the start and end of the current string.
226
    /// </returns>
227
    public U8String Trim()
228
    {
229
        // TODO: Optimize fast path on no whitespace
230
        // TODO 2: Do not convert to runes and have proper
231
        // whitespace LUT to evaluate code points in a branchless way
232
        var source = this;
×
233
        if (!source.IsEmpty)
×
234
        {
235
            ref var ptr = ref source.UnsafeRef;
×
236

237
            var start = 0;
×
238
            for (; start < source.Length; start++)
×
239
            {
240
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start)))
×
241
                {
242
                    break;
243
                }
244
            }
245

246
            var end = source.Length - 1;
×
247
            for (var endSearch = end; endSearch >= start; endSearch--)
×
248
            {
249
                var b = ptr.Add(endSearch);
×
250
                if (!U8Info.IsContinuationByte(b))
×
251
                {
252
                    if (U8Info.IsAsciiByte(b)
×
253
                        ? U8Info.IsAsciiWhitespace(b)
×
254
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end)))
×
255
                    {
256
                        // Save the last found whitespace code point offset and continue searching
257
                        // for more whitspace byte sequences from their end. If we don't do this,
258
                        // we will end up trimming away continuation bytes at the end of the string.
259
                        end = endSearch - 1;
×
260
                    }
261
                    else
262
                    {
263
                        break;
264
                    }
265
                }
266
            }
267

268
            return U8Marshal.Slice(source, start, end - start + 1);
×
269
        }
270

271
        return default;
×
272
    }
273

274
    /// <summary>
275
    /// Removes all leading whitespace characters from the current string.
276
    /// </summary>
277
    /// <returns>
278
    /// A sub-slice that remains after all whitespace characters
279
    /// are removed from the start of the current string.
280
    /// </returns>
281
    [MethodImpl(MethodImplOptions.AggressiveOptimization)]
282
    public U8String TrimStart()
283
    {
284
        var source = this;
×
285
        if (!source.IsEmpty)
×
286
        {
287
            ref var ptr = ref source.UnsafeRef;
×
288
            var b = ptr;
×
289

290
            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
×
291
            {
292
                return source;
×
293
            }
294

295
            var start = 0;
×
296
            for (; start < source.Length; start++)
×
297
            {
298
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start)))
×
299
                {
300
                    break;
301
                }
302
            }
303

304
            return U8Marshal.Slice(source, start);
×
305
        }
306

307
        return default;
×
308
    }
309

310
    /// <summary>
311
    /// Removes all trailing whitespace characters from the current string.
312
    /// </summary>
313
    /// <returns>
314
    /// A sub-slice that remains after all whitespace characters
315
    /// are removed from the end of the current string.
316
    /// </returns>
317
    public U8String TrimEnd()
318
    {
319
        var source = this;
×
320
        if (!source.IsEmpty)
×
321
        {
322
            ref var ptr = ref source.UnsafeRef;
×
323

324
            var end = source.Length - 1;
×
325
            for (var endSearch = end; endSearch >= 0; endSearch--)
×
326
            {
327
                var b = ptr.Add(endSearch);
×
328
                if (!U8Info.IsContinuationByte(b))
×
329
                {
330
                    if (U8Info.IsAsciiByte(b)
×
331
                        ? U8Info.IsAsciiWhitespace(b)
×
332
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end)))
×
333
                    {
334
                        end = endSearch - 1;
×
335
                    }
336
                    else
337
                    {
338
                        break;
339
                    }
340
                }
341
            }
342

343
            return U8Marshal.Slice(source, 0, end + 1);
×
344
        }
345

346
        return default;
×
347
    }
348

349
    /// <summary>
350
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
351
    /// </summary>
352
    /// <returns>
353
    /// A sub-slice that remains after all ASCII whitespace characters
354
    /// are removed from the start and end of the current string.
355
    /// </returns>
356
    public U8String TrimAscii()
357
    {
358
        var source = this;
×
359
        var range = Ascii.Trim(source);
×
360

361
        return !range.IsEmpty()
×
362
            ? U8Marshal.Slice(source, range)
×
363
            : default;
×
364
    }
365

366
    /// <summary>
367
    /// Removes all the leading ASCII whitespace characters from the current string.
368
    /// </summary>
369
    /// <returns>
370
    /// A sub-slice that remains after all whitespace characters
371
    /// are removed from the start of the current string.
372
    /// </returns>
373
    public U8String TrimStartAscii()
374
    {
375
        var source = this;
×
376
        var range = Ascii.TrimStart(source);
×
377

378
        return !range.IsEmpty()
×
379
            ? U8Marshal.Slice(source, range)
×
380
            : default;
×
381
    }
382

383
    /// <summary>
384
    /// Removes all the trailing ASCII whitespace characters from the current string.
385
    /// </summary>
386
    /// <returns>
387
    /// A sub-slice that remains after all whitespace characters
388
    /// are removed from the end of the current string.
389
    /// </returns>
390
    public U8String TrimEndAscii()
391
    {
392
        var source = this;
×
393
        var range = Ascii.TrimEnd(source);
×
394

395
        return !range.IsEmpty()
×
396
            ? U8Marshal.Slice(source, range)
×
397
            : default;
×
398
    }
399

400
    /// <summary>
401
    /// Returns a copy of this ASCII string converted to lower case.
402
    /// </summary>
403
    /// <returns>A lowercase equivalent of the current ASCII string.</returns>
404
    /// <exception cref="ArgumentException">
405
    /// The current string is not a valid ASCII sequence.
406
    /// </exception>
407
    public U8String ToLowerAscii()
408
    {
409
        var source = this;
×
410
        if (!source.IsEmpty)
×
411
        {
412
            var span = source.UnsafeSpan;
×
413
            var destination = new byte[span.Length];
×
414
            var result = Ascii.ToLower(span, destination, out _);
×
415
            if (result is OperationStatus.InvalidData)
×
416
            {
417
                ThrowHelpers.InvalidAscii();
×
418
            }
419

420
            return new U8String(destination, 0, span.Length);
×
421
        }
422

423
        return default;
×
424
    }
425

426
    /// <summary>
427
    /// Returns a copy of this ASCII string converted to upper case.
428
    /// </summary>
429
    /// <returns>The uppercase equivalent of the current ASCII string.</returns>
430
    /// <exception cref="ArgumentException">
431
    /// The current string is not a valid ASCII sequence.
432
    /// </exception>
433
    public U8String ToUpperAscii()
434
    {
435
        var source = this;
×
436
        if (!source.IsEmpty)
×
437
        {
438
            var span = source.UnsafeSpan;
×
439
            var destination = new byte[span.Length];
×
440
            var result = Ascii.ToUpper(span, destination, out _);
×
441
            if (result is OperationStatus.InvalidData)
×
442
            {
443
                ThrowHelpers.InvalidAscii();
×
444
            }
445

446
            return new U8String(destination, 0, span.Length);
×
447
        }
448

449
        return default;
×
450
    }
451
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc