• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5863290061

pending completion
5863290061

push

github

neon-sunset
tests: fix U8Chars_EnumeratesToCorrectValues due to some underlying IComparer issue

105 of 706 branches covered (14.87%)

Branch coverage included in aggregate %.

3 of 3 new or added lines in 2 files covered. (100.0%)

372 of 1522 relevant lines covered (24.44%)

467.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.96
/src/U8String.Enumeration.cs
1
using System.Buffers;
2
using System.Collections;
3
using System.Diagnostics;
4
using System.Runtime.InteropServices;
5
using System.Text;
6
using U8Primitives.Abstractions;
7

8
using Rune = System.Text.Rune;
9

10
namespace U8Primitives;
11

12
#pragma warning disable IDE0032, IDE0057 // Use auto property and index operator. Why: Perf, struct layout, accuracy and codegen.
13
public readonly partial struct U8String
14
{
15
    /// <summary>
16
    /// Returns a collection of <see cref="char"/>s over the provided string.
17
    /// </summary>
18
    /// <remarks>
19
    /// This is a lazily-evaluated allocation-free collection.
20
    /// </remarks>
21
    public U8Chars Chars
22
    {
23
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
24
        get => new(this);
24✔
25
    }
26

27
    /// <summary>
28
    /// Returns a collection of <see cref="Rune"/>s over the provided string.
29
    /// </summary>
30
    /// <remarks>
31
    /// This is a lazily-evaluated allocation-free collection.
32
    /// </remarks>
33
    public U8Runes Runes
34
    {
35
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
36
        get => new(this);
30✔
37
    }
38

39
    /// <summary>
40
    /// Returns a collection of lines over the provided string.
41
    /// </summary>
42
    /// <remarks>
43
    /// This is a lazily-evaluated allocation-free collection.
44
    /// </remarks>
45
    public U8Lines Lines
46
    {
47
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
48
        get => new(this);
×
49
    }
50

51
    // Bad codegen still :(
52
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
53
    public Enumerator GetEnumerator() => new(this);
12✔
54

55
    IEnumerator<byte> IEnumerable<byte>.GetEnumerator() => GetEnumerator();
×
56
    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
57

58
    public struct Enumerator : IEnumerator<byte>
59
    {
60
        readonly byte[]? _value;
61
        readonly int _offset;
62
        readonly int _length;
63
        int _index;
64

65
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
66
        public Enumerator(U8String value)
67
        {
68
            _value = value._value;
12✔
69
            _offset = value.Offset;
12✔
70
            _length = value.Length;
12✔
71
            _index = -1;
12✔
72
        }
12✔
73

74
        // Still cheaper than MemoryMarshal clever variants
75
        public readonly byte Current => _value![_offset + _index];
3,138✔
76

77
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
78
        public bool MoveNext() => ++_index < _length;
3,156✔
79
        // {
80
        //     var index = _index;
81
        //     if (++index < _length)
82
        //     {
83
        //         // Current = Unsafe.Add(
84
        //         //     ref MemoryMarshal.GetArrayDataReference(_value!),
85
        //         //     (nint)(uint)(_offset + index));
86
        //         Current = _value![_offset + index];
87
        //         _index = index;
88
        //         return true;
89
        //     }
90

91
        //     return false;
92
        // }
93

94
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
95
        public void Reset() => _index = -1;
×
96

97
        readonly object IEnumerator.Current => Current;
1,569✔
98
        readonly void IDisposable.Dispose() { }
6✔
99
    }
100
}
101

102
/// <summary>
103
/// A collection of chars in a provided <see cref="U8String"/>.
104
/// </summary>
105
public struct U8Chars : ICollection<char>
106
{
107
    readonly U8String _value;
108

109
    int _count;
110

111
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
112
    public U8Chars(U8String value)
113
    {
114
        if (!value.IsEmpty)
24✔
115
        {
116
            _value = value;
20✔
117
            _count = -1;
20✔
118
        }
119
    }
24✔
120

121
    /// <summary>
122
    /// The number of chars in the current <see cref="U8String"/>.
123
    /// </summary>
124
    public int Count
125
    {
126
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
127
        get
128
        {
129
            // Somehow the codegen here is underwhelming
130
            var count = _count;
24✔
131
            if (count >= 0)
24✔
132
            {
133
                return count;
9✔
134
            }
135
            return _count = Count(_value.UnsafeSpan);
15✔
136

137
            static int Count(ReadOnlySpan<byte> value)
138
            {
139
                Debug.Assert(!value.IsEmpty);
140

141
                // TODO: Is this enough?
142
                return Encoding.UTF8.GetCharCount(value);
15✔
143
            }
144
        }
145
    }
146

147
    // TODO: Wow, this seems to be terribly broken on surrogate chars and 
148
    // there is no easy way to fix it without sacrificing performance.
149
    // Perhaps it is worth just do the transcoding iteration here and warn the users
150
    // instead of straight up producing UB or throwing exceptions???
151
    public readonly bool Contains(char item) => _value.Contains(item);
×
152

153
    public readonly void CopyTo(char[] destination, int index)
154
    {
155
        var value = _value;
5✔
156
        if (!value.IsEmpty)
5✔
157
        {
158
            Encoding.UTF8.GetChars(value.UnsafeSpan, destination.AsSpan(index));
5✔
159
        }
160
    }
5✔
161

162
    public readonly Enumerator GetEnumerator() => new(_value);
6✔
163

164
    readonly IEnumerator<char> IEnumerable<char>.GetEnumerator() => new Enumerator(_value);
×
165
    readonly IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_value);
6✔
166

167
    public struct Enumerator : IEnumerator<char>
168
    {
169
        // TODO: refactor layout
170
        readonly byte[]? _value;
171
        readonly int _offset;
172
        readonly int _length;
173
        int _nextByteIdx;
174
        uint _currentCharPair;
175

176
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
177
        public Enumerator(U8String value)
178
        {
179
            if (!value.IsEmpty)
12✔
180
            {
181
                _value = value._value;
10✔
182
                _offset = value.Offset;
10✔
183
                _length = value.Length;
10✔
184
                _nextByteIdx = 0;
10✔
185
            }
186
        }
12✔
187

188
        // TODO
189
        public readonly char Current => (char)_currentCharPair;
1,402✔
190

191
        // TODO: This looks terrible, there must be a better way
192
        // to convert UTF-8 to UTF-16 with an enumerator.
193
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
194
        public bool MoveNext()
195
        {
196
            var (offset, length, nextByteIdx, currentCharPair) =
1,420✔
197
                (_offset, _length, _nextByteIdx, _currentCharPair);
1,420✔
198

199
            if (currentCharPair < char.MaxValue)
1,420✔
200
            {
201
                if ((uint)nextByteIdx < (uint)length)
1,302✔
202
                {
203
                    var span = _value!.SliceUnsafe(offset + nextByteIdx, length - nextByteIdx);
1,284✔
204
                    var firstByte = MemoryMarshal.GetReference(span);
1,284✔
205
                    if (U8Info.IsAsciiByte(firstByte))
1,284✔
206
                    {
207
                        // Fast path because Rune.DecodeFromUtf8 won't inline
208
                        // making UTF-8 push us more and more towards anglocentrism.
209
                        _nextByteIdx = nextByteIdx + 1;
314✔
210
                        _currentCharPair = firstByte;
314✔
211
                        return true;
314✔
212
                    }
213

214
                    var status = Rune.DecodeFromUtf8(span, out var rune, out var bytesConsumed);
970✔
215
                    Debug.Assert(status is OperationStatus.Done);
216

217
                    _nextByteIdx = nextByteIdx + bytesConsumed;
970✔
218

219
                    if (rune.IsBmp)
970✔
220
                    {
221
                        _currentCharPair = (uint)rune.Value;
852✔
222
                        return true;
852✔
223
                    }
224

225
                    // I wonder if this just explodes on BigEndian
226
                    var runeValue = (uint)rune.Value;
118✔
227
                    var highSurrogate = (char)((runeValue + ((0xD800u - 0x40u) << 10)) >> 10);
118✔
228
                    var lowSurrogate = (char)((runeValue & 0x3FFu) + 0xDC00u);
118✔
229
                    _currentCharPair = highSurrogate + ((uint)lowSurrogate << 16);
118✔
230
                    return true;
118✔
231
                }
232

233
                return false;
18✔
234
            }
235

236
            _currentCharPair = currentCharPair >> 16;
118✔
237
            return true;
118✔
238
        }
239

240
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
241
        public void Reset() => _nextByteIdx = 0;
×
242

243
        readonly object IEnumerator.Current => Current;
701✔
244
        readonly void IDisposable.Dispose() { }
6✔
245
    }
246

247
    readonly bool ICollection<char>.IsReadOnly => true;
×
248
    readonly void ICollection<char>.Add(char item) => throw new NotSupportedException();
×
249
    readonly void ICollection<char>.Clear() => throw new NotSupportedException();
×
250
    readonly bool ICollection<char>.Remove(char item) => throw new NotSupportedException();
×
251
}
252

253
/// <summary>
254
/// A collection of Runes (unicode scalar values) in a provided <see cref="U8String"/>.
255
/// </summary>
256
public struct U8Runes : ICollection<Rune>
257
{
258
    readonly U8String _value;
259

260
    // If we bring up non-ascii counting to ascii level, we might not need this
261
    // similar to LineCollection.
262
    int _count;
263

264
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
265
    public U8Runes(U8String value)
266
    {
267
        if (!value.IsEmpty)
30✔
268
        {
269
            _value = value;
25✔
270
            _count = -1;
25✔
271
        }
272
    }
30✔
273

274
    /// <summary>
275
    /// The number of Runes (unicode scalar values) in the current <see cref="U8String"/>.
276
    /// </summary>
277
    public int Count
278
    {
279
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
280
        get
281
        {
282
            // Somehow the codegen here is underwhelming
283
            var count = _count;
24✔
284
            if (count >= 0)
24✔
285
            {
286
                return count;
9✔
287
            }
288

289
            return _count = Count(_value.UnsafeSpan);
15✔
290

291
            static int Count(ReadOnlySpan<byte> value)
292
            {
293
                Debug.Assert(!value.IsEmpty);
294

295
                // TODO: SIMD non-continuation byte counting
296
                var runeCount = (int)(nint)Polyfills.Text.Ascii.GetIndexOfFirstNonAsciiByte(value);
15✔
297
                value = value.SliceUnsafe(runeCount);
15✔
298

299
                for (var i = 0; (uint)i < (uint)value.Length; i += U8Info.CharLength(value.AsRef(i)))
2,940✔
300
                {
301
                    runeCount++;
1,455✔
302
                }
303

304
                return runeCount;
15✔
305
            }
306
        }
307
    }
308

309
    public readonly bool Contains(Rune item) => _value.Contains(item);
642✔
310

311
    public readonly void CopyTo(Rune[] destination, int index)
312
    {
313
        // TODO: Consistency and correctness? Implement single-pass vectorized conversion?
314
        foreach (var rune in this)
1,294✔
315
        {
316
            destination[index++] = rune;
642✔
317
        }
318
    }
5✔
319

320
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
321
    public readonly Enumerator GetEnumerator() => new(_value);
659✔
322

323
    readonly IEnumerator<Rune> IEnumerable<Rune>.GetEnumerator() => GetEnumerator();
642✔
324
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
325

326
    public struct Enumerator : IEnumerator<Rune>
327
    {
328
        readonly byte[]? _value;
329
        readonly int _offset;
330
        readonly int _length;
331
        int _index;
332

333
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
334
        public Enumerator(U8String value)
335
        {
336
            if (!value.IsEmpty)
659✔
337
            {
338
                _value = value._value;
657✔
339
                _offset = value.Offset;
657✔
340
                _length = value.Length;
657✔
341
            }
342
        }
659✔
343

344
        public Rune Current { get; private set; }
345

346
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
347
        public bool MoveNext()
348
        {
349
            // TODO: Optimize for codegen, this one isn't great
350
            var index = _index;
55,597✔
351
            if (index < _length)
55,597✔
352
            {
353
                Rune.DecodeFromUtf8(
55,574✔
354
                    _value!.SliceUnsafe(_offset + index, _length - index),
55,574✔
355
                    out var rune,
55,574✔
356
                    out var consumed);
55,574✔
357

358
                Current = rune;
55,574✔
359
                _index = index + consumed;
55,574✔
360
                return true;
55,574✔
361
            }
362

363
            return false;
23✔
364
        }
365

366
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
367
        public void Reset() => _index = -1;
×
368

369
        readonly object IEnumerator.Current => Current;
642✔
370
        readonly void IDisposable.Dispose() { }
653✔
371
    }
372

373
    readonly bool ICollection<Rune>.IsReadOnly => true;
×
374
    readonly void ICollection<Rune>.Add(Rune item) => throw new NotImplementedException();
×
375
    readonly void ICollection<Rune>.Clear() => throw new NotImplementedException();
×
376
    readonly bool ICollection<Rune>.Remove(Rune item) => throw new NotImplementedException();
×
377
}
378

379
/// <summary>
380
/// A collection of lines in a provided <see cref="U8String"/>.
381
/// </summary>
382
public struct U8Lines : ICollection<U8String>, IU8Enumerable<U8Lines.Enumerator>
383
{
384
    readonly U8String _value;
385

386
    // We might not need this. Although counting is O(n), the absolute performance
387
    // is very good, and on AVX2/512 - it's basically instantenous.
388
    int _count;
389

390
    /// <summary>
391
    /// Creates a new line enumeration over the provided string.
392
    /// </summary>
393
    /// <param name="value">The string to enumerate over.</param>
394
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
395
    public U8Lines(U8String value)
396
    {
397
        if (!value.IsEmpty)
×
398
        {
399
            _value = value;
×
400
            _count = -1;
×
401
        }
402
    }
×
403

404
    /// <summary>
405
    /// The number of lines in the current <see cref="U8String"/>.
406
    /// </summary>
407
    public int Count
408
    {
409
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
410
        get
411
        {
412
            var count = _count;
×
413
            if (count >= 0)
×
414
            {
415
                return count;
×
416
            }
417

418
            // Matches the behavior of string.Split('\n').Length for "hello\n"
419
            // TODO: Should we break consistency and not count the very last segment if it is empty?
420
            // (likely no - an empty line is still a line)
421
            return _count = _value.UnsafeSpan.Count((byte)'\n') + 1;
×
422
        }
423
    }
424

425
    public readonly bool Contains(U8String item)
426
    {
427
        return !item.Contains((byte)'\n') && _value.Contains(item);
×
428
    }
429

430
    public void CopyTo(U8String[] destination, int index)
431
    {
432
        var count = Count;
×
433
        var dst = destination.AsSpan();
×
434
        if ((uint)count > (uint)dst.Length - (uint)index)
×
435
        {
436
            // TODO: EH UX
437
            ThrowHelpers.ArgumentOutOfRange();
×
438
        }
439

440
        if (count > 0)
×
441
        {
442
            foreach (var line in this)
×
443
            {
444
                dst[index++] = line;
×
445
            }
446
        }
447
    }
×
448

449
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
450
    public readonly void Deconstruct(out U8String first, out U8String second)
451
    {
452
        this.Deconstruct<U8Lines, Enumerator>(out first, out second);
×
453
    }
×
454

455
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
456
    public readonly void Deconstruct(out U8String first, out U8String second, out U8String third)
457
    {
458
        this.Deconstruct<U8Lines, Enumerator>(out first, out second, out third);
×
459
    }
×
460

461
    /// <summary>
462
    /// Returns a <see cref="Enumerator"/> over the provided string.
463
    /// </summary>
464
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
465
    public readonly Enumerator GetEnumerator() => new(_value);
×
466

467
    readonly IEnumerator<U8String> IEnumerable<U8String>.GetEnumerator() => GetEnumerator();
×
468
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
×
469

470
    readonly bool ICollection<U8String>.IsReadOnly => true;
×
471
    readonly void ICollection<U8String>.Add(U8String item) => throw new NotSupportedException();
×
472
    readonly void ICollection<U8String>.Clear() => throw new NotSupportedException();
×
473
    readonly bool ICollection<U8String>.Remove(U8String item) => throw new NotSupportedException();
×
474

475
    /// <summary>
476
    /// A struct that enumerates lines over a string.
477
    /// </summary>
478
    public struct Enumerator : IU8Enumerator
479
    {
480
        // TODO 1: Ensure this is aligned with Rust's .lines() implementation, or not?
481
        // private static readonly SearchValues<byte> NewLine = SearchValues.Create("\r\n"u8);
482
        // TODO 2: Consider using 'InnerOffsets'
483
        private readonly byte[]? _value;
484
        private U8Range _remaining;
485
        private U8Range _current;
486

487
        /// <summary>
488
        /// Creates a new line enumerator over the provided string.
489
        /// </summary>
490
        /// <param name="value">The string to enumerate over.</param>
491
        public Enumerator(U8String value)
492
        {
493
            if (!value.IsEmpty)
×
494
            {
495
                _value = value._value;
×
496
                _remaining = value._inner;
×
497
            }
498
        }
×
499

500
        /// <summary>
501
        /// Returns the current line.
502
        /// </summary>
503
        public readonly U8String Current => new(_value, _current.Offset, _current.Length);
×
504

505
        /// <summary>
506
        /// Advances the enumerator to the next line.
507
        /// </summary>
508
        [MethodImpl(MethodImplOptions.AggressiveInlining)] // Surprisingly smaller codegen than when not inlined
509
        public bool MoveNext()
510
        {
511
            var remaining = _remaining;
×
512
            if (remaining.Length > 0)
×
513
            {
514
                var span = _value!.SliceUnsafe(remaining.Offset, remaining.Length);
×
515
                var idx = span.IndexOf((byte)'\n');
×
516

517
                if ((uint)idx < (uint)span.Length)
×
518
                {
519
                    var cutoff = idx;
×
520
                    if (idx > 0 && span.AsRef().Add(idx - 1) is (byte)'\r')
×
521
                    {
522
                        cutoff--;
×
523
                    }
524

525
                    _current = new(remaining.Offset, cutoff);
×
526
                    _remaining = new(remaining.Offset + idx + 1, remaining.Length - idx - 1);
×
527
                }
528
                else
529
                {
530
                    // We've reached EOF, but we still need to return 'true' for this final
531
                    // iteration so that the caller can query the Current property once more.
532
                    _current = new(remaining.Offset, remaining.Length);
×
533
                    _remaining = default;
×
534
                }
535

536
                return true;
×
537
            }
538

539
            return false;
×
540
        }
541

542
        readonly object IEnumerator.Current => Current;
×
543
        readonly void IEnumerator.Reset() => throw new NotSupportedException();
×
544
        readonly void IDisposable.Dispose() { }
×
545
    }
546
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc