• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5876236943

pending completion
5876236943

push

github

neon-sunset
fix: do not trim away continuation bytes

102 of 768 branches covered (13.28%)

Branch coverage included in aggregate %.

7 of 7 new or added lines in 1 file covered. (100.0%)

371 of 1631 relevant lines covered (22.75%)

436.37 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/U8String.Manipulation.cs
1
using System.Buffers;
2
using System.Text;
3
using U8Primitives.InteropServices;
4

5
namespace U8Primitives;
6

7
#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
8
public readonly partial struct U8String
9
{
10
    // TODO: Optimize/deduplicate Concat variants
11
    // TODO: Investigate if it is possible fold validation for u8 literals
12
    public static U8String Concat(U8String left, U8String right)
13
    {
14
        if (!left.IsEmpty)
×
15
        {
16
            if (!right.IsEmpty)
×
17
            {
18
                var length = left.Length + right.Length;
×
19
                var value = new byte[length];
×
20

21
                left.UnsafeSpan.CopyTo(value);
×
22
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
23

24
                return new U8String(value, 0, length);
×
25
            }
26

27
            return left;
×
28
        }
29

30
        return right;
×
31
    }
32

33
    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
34
    {
35
        if (!right.IsEmpty)
×
36
        {
37
            Validate(right);
×
38
            if (!left.IsEmpty)
×
39
            {
40
                var length = left.Length + right.Length;
×
41
                var value = new byte[length];
×
42

43
                left.UnsafeSpan.CopyTo(value);
×
44
                right.CopyTo(value.AsSpan(left.Length));
×
45

46
                return new U8String(value, 0, length);
×
47
            }
48

49
            return new U8String(right, skipValidation: true);
×
50
        }
51

52
        return left;
×
53
    }
54

55
    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
56
    {
57
        if (!left.IsEmpty)
×
58
        {
59
            Validate(left);
×
60
            if (!right.IsEmpty)
×
61
            {
62
                var length = left.Length + right.Length;
×
63
                var value = new byte[length];
×
64

65
                left.CopyTo(value);
×
66
                right.UnsafeSpan.CopyTo(value.AsSpan(left.Length));
×
67

68
                return new U8String(value, 0, length);
×
69
            }
70

71
            return new U8String(left, skipValidation: true);
×
72
        }
73

74
        return right;
×
75
    }
76

77
    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
78
    {
79
        var length = left.Length + right.Length;
×
80
        if (length != 0)
×
81
        {
82
            var value = new byte[length];
×
83

84
            left.CopyTo(value);
×
85
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));
×
86

87
            Validate(value);
×
88
            return new U8String(value, 0, length);
×
89
        }
90

91
        return default;
×
92
    }
93

94
    /// <summary>
95
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
96
    /// </summary>
97
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
98
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
99
    {
100
        throw new NotImplementedException();
×
101
    }
102

103
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
104
    public U8String Replace(byte oldValue, byte newValue)
105
    {
106
        return U8Manipulation.Replace(this, oldValue, newValue);
×
107
    }
108

109
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
110
    public U8String Replace(char oldValue, char newValue)
111
    {
112
        return U8Manipulation.Replace(this, oldValue, newValue);
×
113
    }
114

115
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
116
    public U8String Replace(Rune oldValue, Rune newValue)
117
    {
118
        return U8Manipulation.Replace(this, oldValue, newValue);
×
119
    }
120

121
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
122
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
123
    {
124
        return U8Manipulation.Replace(this, oldValue, newValue);
×
125
    }
126

127
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
128
    public U8String Replace(U8String oldValue, U8String newValue)
129
    {
130
        return U8Manipulation.ReplaceUnchecked(this, oldValue, newValue);
×
131
    }
132

133
    /// <inheritdoc />
134
    public void CopyTo(byte[] destination, int index)
135
    {
136
        var src = this;
×
137
        var dst = destination.AsSpan()[index..];
×
138
        if (src.Length > dst.Length)
×
139
        {
140
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
×
141
        }
142

143
        src.UnsafeSpan.CopyTo(dst);
×
144
    }
×
145

146
    /// <summary>
147
    /// Retrieves a substring from this instance. The substring starts at a specified
148
    /// character position and continues to the end of the string.
149
    /// </summary>
150
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
151
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
152
    /// <exception cref="ArgumentOutOfRangeException">
153
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
154
    /// </exception>
155
    /// <exception cref="ArgumentException">
156
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
157
    /// </exception>
158
    public U8String Slice(int start)
159
    {
160
        var source = this;
×
161
        // From ReadOnly/Span<T> Slice(int) implementation
162
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
×
163
        {
164
            ThrowHelpers.ArgumentOutOfRange();
×
165
        }
166

167
        var length = source.Length - start;
×
168
        if (length > 0)
×
169
        {
170
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
×
171
            {
172
                ThrowHelpers.InvalidSplit();
×
173
            }
174

175
            return new(source._value, source.Offset + start, length);
×
176
        }
177

178
        return default;
×
179
    }
180

181
    /// <summary>
182
    /// Retrieves a substring from this instance. The substring starts at a specified
183
    /// character position and has a specified length.
184
    /// </summary>
185
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
186
    /// <param name="length">The number of bytes in the substring.</param>
187
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
188
    /// <exception cref="ArgumentOutOfRangeException">
189
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
190
    /// </exception>
191
    /// <exception cref="ArgumentException">
192
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
193
    /// </exception>
194
    public U8String Slice(int start, int length)
195
    {
196
        var source = this;
×
197
        // From ReadOnly/Span<T> Slice(int, int) implementation
198
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
×
199
        {
200
            ThrowHelpers.ArgumentOutOfRange();
×
201
        }
202

203
        var result = default(U8String);
×
204
        if (length > 0)
×
205
        {
206
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
207
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
×
208
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
×
209
            {
210
                // TODO: Exception message UX
211
                ThrowHelpers.InvalidSplit();
×
212
            }
213

214
            result = new(source._value, source.Offset + start, length);
×
215
        }
216

217
        return result;
×
218
    }
219

220
    /// <summary>
221
    /// Removes all leading and trailing whitespace characters from the current string.
222
    /// </summary>
223
    /// <returns>
224
    /// A sub-slice that remains after all whitespace characters
225
    /// are removed from the start and end of the current string.
226
    /// </returns>
227
    public U8String Trim()
228
    {
229
        // TODO: Optimize fast path on no whitespace
230
        // TODO 2: Do not convert to runes and have proper
231
        // whitespace LUT to evaluate code points in a branchless way
232
        var source = this;
×
233
        ref var ptr = ref source.DangerousRef;
×
234

235
        var start = 0;
×
236
        for (; start < source.Length; start++)
×
237
        {
238
            var b = ptr.Add(start);
×
239
            if (!U8Info.IsContinuationByte(b) && !(
×
240
                U8Info.IsAsciiByte(b)
×
241
                    ? U8Info.IsAsciiWhitespace(b)
×
242
                    : U8Info.IsNonAsciiWhitespace(ref ptr.Add(start))))
×
243
            {
244
                break;
245
            }
246
        }
247

248
        var end = source.Length - 1;
×
249
        for (var endSearch = end; endSearch >= start; endSearch--)
×
250
        {
251
            var b = ptr.Add(endSearch);
×
252
            if (!U8Info.IsContinuationByte(b))
×
253
            {
254
                if (U8Info.IsAsciiByte(b)
×
255
                    ? U8Info.IsAsciiWhitespace(b)
×
256
                    : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end)))
×
257
                {
258
                    end = endSearch - 1;
×
259
                }
260
                else
261
                {
262
                    break;
263
                }
264
            }
265
        }
266

267
        return U8Marshal.Slice(source, start, end - start + 1);
×
268
    }
269

270
    /// <summary>
271
    /// Removes all leading whitespace characters from the current string.
272
    /// </summary>
273
    /// <returns>
274
    /// A sub-slice that remains after all whitespace characters
275
    /// are removed from the start of the current string.
276
    /// </returns>
277
    [MethodImpl(MethodImplOptions.AggressiveOptimization)]
278
    public U8String TrimStart()
279
    {
280
        var source = this;
×
281
        if (!source.IsEmpty)
×
282
        {
283
            ref var ptr = ref source.UnsafeRef;
×
284
            var b = ptr;
×
285

286
            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
×
287
            {
288
                return source;
×
289
            }
290

291
            var start = 0;
×
292
            for (; start < source.Length;)
×
293
            {
294
                if (!U8Info.IsContinuationByte(b) && !(
×
295
                    U8Info.IsAsciiByte(b)
×
296
                        ? U8Info.IsAsciiWhitespace(b)
×
297
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(start))))
×
298
                {
299
                    break;
300
                }
301

302
                b = ptr.Add(++start);
×
303
            }
304

305
            return U8Marshal.Slice(source, start);
×
306
        }
307

308
        return default;
×
309
    }
310

311
    /// <summary>
312
    /// Removes all trailing whitespace characters from the current string.
313
    /// </summary>
314
    /// <returns>
315
    /// A sub-slice that remains after all whitespace characters
316
    /// are removed from the end of the current string.
317
    /// </returns>
318
    public U8String TrimEnd()
319
    {
320
        var source = this;
×
321
        ref var ptr = ref source.DangerousRef;
×
322

323
        var end = source.Length - 1;
×
324
        for (; end >= 0; end--)
×
325
        {
326
            var b = ptr.Add(end);
×
327
            if (!U8Info.IsContinuationByte(b) && !(
×
328
                U8Info.IsAsciiByte(b)
×
329
                    ? U8Info.IsAsciiWhitespace(b)
×
330
                    : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end))))
×
331
            {
332
                break;
333
            }
334
        }
335

336
        return U8Marshal.Slice(source, 0, end + 1);
×
337
    }
338

339
    /// <summary>
340
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
341
    /// </summary>
342
    /// <returns>
343
    /// A sub-slice that remains after all ASCII whitespace characters
344
    /// are removed from the start and end of the current string.
345
    /// </returns>
346
    public U8String TrimAscii()
347
    {
348
        var source = this;
×
349
        var range = Ascii.Trim(source);
×
350

351
        return !range.IsEmpty()
×
352
            ? U8Marshal.Slice(source, range)
×
353
            : default;
×
354
    }
355

356
    /// <summary>
357
    /// Removes all the leading ASCII whitespace characters from the current string.
358
    /// </summary>
359
    /// <returns>
360
    /// A sub-slice that remains after all whitespace characters
361
    /// are removed from the start of the current string.
362
    /// </returns>
363
    public U8String TrimStartAscii()
364
    {
365
        var source = this;
×
366
        var range = Ascii.TrimStart(source);
×
367

368
        return !range.IsEmpty()
×
369
            ? U8Marshal.Slice(source, range)
×
370
            : default;
×
371
    }
372

373
    /// <summary>
374
    /// Removes all the trailing ASCII whitespace characters from the current string.
375
    /// </summary>
376
    /// <returns>
377
    /// A sub-slice that remains after all whitespace characters
378
    /// are removed from the end of the current string.
379
    /// </returns>
380
    public U8String TrimEndAscii()
381
    {
382
        var source = this;
×
383
        var range = Ascii.TrimEnd(source);
×
384

385
        return !range.IsEmpty()
×
386
            ? U8Marshal.Slice(source, range)
×
387
            : default;
×
388
    }
389

390
    /// <summary>
391
    /// Returns a copy of this ASCII string converted to lower case.
392
    /// </summary>
393
    /// <returns>A lowercase equivalent of the current ASCII string.</returns>
394
    /// <exception cref="ArgumentException">
395
    /// The current string is not a valid ASCII sequence.
396
    /// </exception>
397
    public U8String ToLowerAscii()
398
    {
399
        var source = this;
×
400
        if (!source.IsEmpty)
×
401
        {
402
            var span = source.UnsafeSpan;
×
403
            var destination = new byte[span.Length];
×
404
            var result = Ascii.ToLower(span, destination, out _);
×
405
            if (result is OperationStatus.InvalidData)
×
406
            {
407
                ThrowHelpers.InvalidAscii();
×
408
            }
409

410
            return new U8String(destination, 0, span.Length);
×
411
        }
412

413
        return default;
×
414
    }
415

416
    /// <summary>
417
    /// Returns a copy of this ASCII string converted to upper case.
418
    /// </summary>
419
    /// <returns>The uppercase equivalent of the current ASCII string.</returns>
420
    /// <exception cref="ArgumentException">
421
    /// The current string is not a valid ASCII sequence.
422
    /// </exception>
423
    public U8String ToUpperAscii()
424
    {
425
        var source = this;
×
426
        if (!source.IsEmpty)
×
427
        {
428
            var span = source.UnsafeSpan;
×
429
            var destination = new byte[span.Length];
×
430
            var result = Ascii.ToUpper(span, destination, out _);
×
431
            if (result is OperationStatus.InvalidData)
×
432
            {
433
                ThrowHelpers.InvalidAscii();
×
434
            }
435

436
            return new U8String(destination, 0, span.Length);
×
437
        }
438

439
        return default;
×
440
    }
441
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc