• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5868414274

pending completion
5868414274

push

github

neon-sunset
feat: consolidate CopyTo/ToArray/List logic, naming and Rune methods

105 of 704 branches covered (14.91%)

Branch coverage included in aggregate %.

46 of 46 new or added lines in 6 files covered. (100.0%)

372 of 1547 relevant lines covered (24.05%)

460.03 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

61.88
/src/U8String.Enumeration.cs
1
using System.Buffers;
2
using System.Collections;
3
using System.Diagnostics;
4
using System.Runtime.InteropServices;
5
using System.Text;
6
using U8Primitives.Abstractions;
7

8
using Rune = System.Text.Rune;
9

10
namespace U8Primitives;
11

12
#pragma warning disable IDE0032, IDE0057 // Use auto property and index operator. Why: Perf, struct layout, accuracy and codegen.
13
public readonly partial struct U8String
14
{
15
    /// <summary>
16
    /// Returns a collection of <see cref="char"/>s over the provided string.
17
    /// </summary>
18
    /// <remarks>
19
    /// This is a lazily-evaluated allocation-free collection.
20
    /// </remarks>
21
    public U8Chars Chars
22
    {
23
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
24
        get => new(this);
24✔
25
    }
26

27
    /// <summary>
28
    /// Returns a collection of <see cref="Rune"/>s over the provided string.
29
    /// </summary>
30
    /// <remarks>
31
    /// This is a lazily-evaluated allocation-free collection.
32
    /// </remarks>
33
    public U8Runes Runes
34
    {
35
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
36
        get => new(this);
30✔
37
    }
38

39
    /// <summary>
40
    /// Returns a collection of lines over the provided string.
41
    /// </summary>
42
    /// <remarks>
43
    /// This is a lazily-evaluated allocation-free collection.
44
    /// </remarks>
45
    public U8Lines Lines
46
    {
47
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
48
        get => new(this);
×
49
    }
50

51
    // Bad codegen still :(
52
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
53
    public Enumerator GetEnumerator() => new(this);
12✔
54

55
    IEnumerator<byte> IEnumerable<byte>.GetEnumerator() => GetEnumerator();
×
56
    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
57

58
    public struct Enumerator : IEnumerator<byte>
59
    {
60
        readonly byte[]? _value;
61
        readonly int _offset;
62
        readonly int _length;
63
        int _index;
64

65
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
66
        public Enumerator(U8String value)
67
        {
68
            _value = value._value;
12✔
69
            _offset = value.Offset;
12✔
70
            _length = value.Length;
12✔
71
            _index = -1;
12✔
72
        }
12✔
73

74
        // Still cheaper than MemoryMarshal clever variants
75
        public readonly byte Current => _value![_offset + _index];
3,138✔
76

77
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
78
        public bool MoveNext() => (uint)(++_index) < (uint)_length;
3,156✔
79
        // {
80
        //     var index = _index;
81
        //     if (++index < _length)
82
        //     {
83
        //         // Current = Unsafe.Add(
84
        //         //     ref MemoryMarshal.GetArrayDataReference(_value!),
85
        //         //     (nint)(uint)(_offset + index));
86
        //         Current = _value![_offset + index];
87
        //         _index = index;
88
        //         return true;
89
        //     }
90

91
        //     return false;
92
        // }
93

94
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
95
        public void Reset() => _index = -1;
×
96

97
        readonly object IEnumerator.Current => Current;
1,569✔
98
        readonly void IDisposable.Dispose() { }
6✔
99
    }
100
}
101

102
/// <summary>
103
/// A collection of chars in a provided <see cref="U8String"/>.
104
/// </summary>
105
public struct U8Chars : ICollection<char>
106
{
107
    readonly U8String _value;
108

109
    int _count;
110

111
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
112
    public U8Chars(U8String value)
113
    {
114
        if (!value.IsEmpty)
24✔
115
        {
116
            _value = value;
20✔
117
            _count = -1;
20✔
118
        }
119
    }
24✔
120

121
    /// <summary>
122
    /// The number of chars in the current <see cref="U8String"/>.
123
    /// </summary>
124
    public int Count
125
    {
126
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
127
        get
128
        {
129
            // Somehow the codegen here is underwhelming
130
            var count = _count;
24✔
131
            if (count >= 0)
24✔
132
            {
133
                return count;
9✔
134
            }
135
            return _count = Count(_value.UnsafeSpan);
15✔
136

137
            static int Count(ReadOnlySpan<byte> value)
138
            {
139
                Debug.Assert(!value.IsEmpty);
140

141
                // TODO: Is this enough?
142
                return Encoding.UTF8.GetCharCount(value);
15✔
143
            }
144
        }
145
    }
146

147
    // TODO: Wow, this seems to be terribly broken on surrogate chars and 
148
    // there is no easy way to fix it without sacrificing performance.
149
    // Perhaps it is worth just do the transcoding iteration here and warn the users
150
    // instead of straight up producing UB or throwing exceptions???
151
    public readonly bool Contains(char item) => _value.Contains(item);
×
152

153
    public readonly void CopyTo(char[] destination, int index)
154
    {
155
        var value = _value;
5✔
156
        if (!value.IsEmpty)
5✔
157
        {
158
            Encoding.UTF8.GetChars(value.UnsafeSpan, destination.AsSpan(index));
5✔
159
        }
160
    }
5✔
161

162
    public readonly Enumerator GetEnumerator() => new(_value);
6✔
163

164
    readonly IEnumerator<char> IEnumerable<char>.GetEnumerator() => new Enumerator(_value);
×
165
    readonly IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_value);
6✔
166

167
    public struct Enumerator : IEnumerator<char>
168
    {
169
        // TODO: refactor layout
170
        readonly byte[]? _value;
171
        readonly int _offset;
172
        readonly int _length;
173
        int _nextByteIdx;
174
        uint _currentCharPair;
175

176
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
177
        public Enumerator(U8String value)
178
        {
179
            if (!value.IsEmpty)
12✔
180
            {
181
                _value = value._value;
10✔
182
                _offset = value.Offset;
10✔
183
                _length = value.Length;
10✔
184
                _nextByteIdx = 0;
10✔
185
            }
186
        }
12✔
187

188
        // TODO
189
        public readonly char Current => (char)_currentCharPair;
1,402✔
190

191
        // TODO: This looks terrible, there must be a better way
192
        // to convert UTF-8 to UTF-16 with an enumerator.
193
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
194
        public bool MoveNext()
195
        {
196
            var (offset, length, nextByteIdx, currentCharPair) =
1,420✔
197
                (_offset, _length, _nextByteIdx, _currentCharPair);
1,420✔
198

199
            if (currentCharPair < char.MaxValue)
1,420✔
200
            {
201
                if ((uint)nextByteIdx < (uint)length)
1,302✔
202
                {
203
                    var span = _value!.SliceUnsafe(offset + nextByteIdx, length - nextByteIdx);
1,284✔
204
                    var firstByte = MemoryMarshal.GetReference(span);
1,284✔
205
                    if (U8Info.IsAsciiByte(firstByte))
1,284✔
206
                    {
207
                        // Fast path because Rune.DecodeFromUtf8 won't inline
208
                        // making UTF-8 push us more and more towards anglocentrism.
209
                        _nextByteIdx = nextByteIdx + 1;
314✔
210
                        _currentCharPair = firstByte;
314✔
211
                        return true;
314✔
212
                    }
213

214
                    var status = Rune.DecodeFromUtf8(span, out var rune, out var bytesConsumed);
970✔
215
                    Debug.Assert(status is OperationStatus.Done);
216

217
                    _nextByteIdx = nextByteIdx + bytesConsumed;
970✔
218

219
                    if (rune.IsBmp)
970✔
220
                    {
221
                        _currentCharPair = (uint)rune.Value;
852✔
222
                        return true;
852✔
223
                    }
224

225
                    // I wonder if this just explodes on BigEndian
226
                    var runeValue = (uint)rune.Value;
118✔
227
                    var highSurrogate = (char)((runeValue + ((0xD800u - 0x40u) << 10)) >> 10);
118✔
228
                    var lowSurrogate = (char)((runeValue & 0x3FFu) + 0xDC00u);
118✔
229
                    _currentCharPair = highSurrogate + ((uint)lowSurrogate << 16);
118✔
230
                    return true;
118✔
231
                }
232

233
                return false;
18✔
234
            }
235

236
            _currentCharPair = currentCharPair >> 16;
118✔
237
            return true;
118✔
238
        }
239

240
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
241
        public void Reset() => _nextByteIdx = 0;
×
242

243
        readonly object IEnumerator.Current => Current;
701✔
244
        readonly void IDisposable.Dispose() { }
6✔
245
    }
246

247
    readonly bool ICollection<char>.IsReadOnly => true;
×
248
    readonly void ICollection<char>.Add(char item) => throw new NotSupportedException();
×
249
    readonly void ICollection<char>.Clear() => throw new NotSupportedException();
×
250
    readonly bool ICollection<char>.Remove(char item) => throw new NotSupportedException();
×
251
}
252

253
/// <summary>
254
/// A collection of Runes (unicode scalar values) in a provided <see cref="U8String"/>.
255
/// </summary>
256
public struct U8Runes : ICollection<Rune>
257
{
258
    readonly U8String _value;
259

260
    // If we bring up non-ascii counting to ascii level, we might not need this
261
    // similar to LineCollection.
262
    int _count;
263

264
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
265
    public U8Runes(U8String value)
266
    {
267
        if (!value.IsEmpty)
30✔
268
        {
269
            _value = value;
25✔
270
            _count = -1;
25✔
271
        }
272
    }
30✔
273

274
    /// <summary>
275
    /// The number of Runes (unicode scalar values) in the current <see cref="U8String"/>.
276
    /// </summary>
277
    public int Count
278
    {
279
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
280
        get
281
        {
282
            // Somehow the codegen here is underwhelming
283
            var count = _count;
24✔
284
            if (count >= 0)
24✔
285
            {
286
                return count;
9✔
287
            }
288

289
            return _count = Count(_value.UnsafeSpan);
15✔
290

291
            static int Count(ReadOnlySpan<byte> value)
292
            {
293
                Debug.Assert(!value.IsEmpty);
294

295
                // TODO: SIMD non-continuation byte counting
296
                var runeCount = (int)(nint)Polyfills.Text.Ascii.GetIndexOfFirstNonAsciiByte(value);
15✔
297
                value = value.SliceUnsafe(runeCount);
15✔
298

299
                for (var i = 0; (uint)i < (uint)value.Length; i += U8Info.CharLength(value.AsRef(i)))
2,940✔
300
                {
301
                    runeCount++;
1,455✔
302
                }
303

304
                return runeCount;
15✔
305
            }
306
        }
307
    }
308

309
    public readonly bool Contains(Rune item) => _value.Contains(item);
642✔
310

311
    public readonly void CopyTo(Rune[] destination, int index)
312
    {
313
        // TODO: Simple SIMD widen ASCII to UTF-32 (ideally widen+validate in place instead of double traversal)
314
        // TODO: Consistency and correctness? Implement single-pass vectorized conversion?
315
        foreach (var rune in this)
1,294✔
316
        {
317
            destination[index++] = rune;
642✔
318
        }
319
    }
5✔
320

321
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
322
    public readonly Enumerator GetEnumerator() => new(_value);
659✔
323

324
    readonly IEnumerator<Rune> IEnumerable<Rune>.GetEnumerator() => GetEnumerator();
642✔
325
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
326

327
    public struct Enumerator : IEnumerator<Rune>
328
    {
329
        readonly byte[]? _value;
330
        readonly int _offset;
331
        readonly int _length;
332
        int _index;
333

334
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
335
        public Enumerator(U8String value)
336
        {
337
            if (!value.IsEmpty)
659✔
338
            {
339
                _value = value._value;
657✔
340
                _offset = value.Offset;
657✔
341
                _length = value.Length;
657✔
342
            }
343
        }
659✔
344

345
        public Rune Current { get; private set; }
346

347
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
348
        public bool MoveNext()
349
        {
350
            // TODO: Optimize for codegen, this one isn't great
351
            var index = _index;
55,597✔
352
            if (index < _length)
55,597✔
353
            {
354
                Rune.DecodeFromUtf8(
55,574✔
355
                    _value!.SliceUnsafe(_offset + index, _length - index),
55,574✔
356
                    out var rune,
55,574✔
357
                    out var consumed);
55,574✔
358

359
                Current = rune;
55,574✔
360
                _index = index + consumed;
55,574✔
361
                return true;
55,574✔
362
            }
363

364
            return false;
23✔
365
        }
366

367
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
368
        public void Reset() => _index = -1;
×
369

370
        readonly object IEnumerator.Current => Current;
642✔
371
        readonly void IDisposable.Dispose() { }
653✔
372
    }
373

374
    readonly bool ICollection<Rune>.IsReadOnly => true;
×
375
    readonly void ICollection<Rune>.Add(Rune item) => throw new NotImplementedException();
×
376
    readonly void ICollection<Rune>.Clear() => throw new NotImplementedException();
×
377
    readonly bool ICollection<Rune>.Remove(Rune item) => throw new NotImplementedException();
×
378
}
379

380
/// <summary>
381
/// A collection of lines in a provided <see cref="U8String"/>.
382
/// </summary>
383
public struct U8Lines : ICollection<U8String>, IU8Enumerable<U8Lines.Enumerator>
384
{
385
    readonly U8String _value;
386

387
    // We might not need this. Although counting is O(n), the absolute performance
388
    // is very good, and on AVX2/512 - it's basically instantenous.
389
    int _count;
390

391
    /// <summary>
392
    /// Creates a new line enumeration over the provided string.
393
    /// </summary>
394
    /// <param name="value">The string to enumerate over.</param>
395
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
396
    public U8Lines(U8String value)
397
    {
398
        if (!value.IsEmpty)
×
399
        {
400
            _value = value;
×
401
            _count = -1;
×
402
        }
403
    }
×
404

405
    /// <summary>
406
    /// The number of lines in the current <see cref="U8String"/>.
407
    /// </summary>
408
    public int Count
409
    {
410
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
411
        get
412
        {
413
            var count = _count;
×
414
            if (count >= 0)
×
415
            {
416
                return count;
×
417
            }
418

419
            // Matches the behavior of string.Split('\n').Length for "hello\n"
420
            // TODO: Should we break consistency and not count the very last segment if it is empty?
421
            // (likely no - an empty line is still a line)
422
            return _count = _value.UnsafeSpan.Count((byte)'\n') + 1;
×
423
        }
424
    }
425

426
    public readonly bool Contains(U8String item)
427
    {
428
        return !item.Contains((byte)'\n') && _value.Contains(item);
×
429
    }
430

431
    public void CopyTo(U8String[] destination, int index)
432
    {
433
        this.CopyTo<U8Lines, Enumerator>(destination.AsSpan()[index..]);
×
434
    }
×
435

436
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
437
    public readonly void Deconstruct(out U8String first, out U8String second)
438
    {
439
        this.Deconstruct<U8Lines, Enumerator>(out first, out second);
×
440
    }
×
441

442
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
443
    public readonly void Deconstruct(out U8String first, out U8String second, out U8String third)
444
    {
445
        this.Deconstruct<U8Lines, Enumerator>(out first, out second, out third);
×
446
    }
×
447

448
    public U8String[] ToArray() => this.ToArray<U8Lines, Enumerator>();
×
449
    public List<U8String> ToList() => this.ToList<U8Lines, Enumerator>();
×
450

451
    /// <summary>
452
    /// Returns a <see cref="Enumerator"/> over the provided string.
453
    /// </summary>
454
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
455
    public readonly Enumerator GetEnumerator() => new(_value);
×
456

457
    readonly IEnumerator<U8String> IEnumerable<U8String>.GetEnumerator() => GetEnumerator();
×
458
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
×
459

460
    readonly bool ICollection<U8String>.IsReadOnly => true;
×
461
    readonly void ICollection<U8String>.Add(U8String item) => throw new NotSupportedException();
×
462
    readonly void ICollection<U8String>.Clear() => throw new NotSupportedException();
×
463
    readonly bool ICollection<U8String>.Remove(U8String item) => throw new NotSupportedException();
×
464

465
    /// <summary>
466
    /// A struct that enumerates lines over a string.
467
    /// </summary>
468
    public struct Enumerator : IU8Enumerator
469
    {
470
        // TODO 1: Ensure this is aligned with Rust's .lines() implementation, or not?
471
        // private static readonly SearchValues<byte> NewLine = SearchValues.Create("\r\n"u8);
472
        // TODO 2: Consider using 'InnerOffsets'
473
        private readonly byte[]? _value;
474
        private U8Range _remaining;
475
        private U8Range _current;
476

477
        /// <summary>
478
        /// Creates a new line enumerator over the provided string.
479
        /// </summary>
480
        /// <param name="value">The string to enumerate over.</param>
481
        public Enumerator(U8String value)
482
        {
483
            if (!value.IsEmpty)
×
484
            {
485
                _value = value._value;
×
486
                _remaining = value._inner;
×
487
            }
488
        }
×
489

490
        /// <summary>
491
        /// Returns the current line.
492
        /// </summary>
493
        public readonly U8String Current => new(_value, _current.Offset, _current.Length);
×
494

495
        /// <summary>
496
        /// Advances the enumerator to the next line.
497
        /// </summary>
498
        [MethodImpl(MethodImplOptions.AggressiveInlining)] // Surprisingly smaller codegen than when not inlined
499
        public bool MoveNext()
500
        {
501
            var remaining = _remaining;
×
502
            if (remaining.Length > 0)
×
503
            {
504
                var span = _value!.SliceUnsafe(remaining.Offset, remaining.Length);
×
505
                var idx = span.IndexOf((byte)'\n');
×
506

507
                if ((uint)idx < (uint)span.Length)
×
508
                {
509
                    var cutoff = idx;
×
510
                    if (idx > 0 && span.AsRef().Add(idx - 1) is (byte)'\r')
×
511
                    {
512
                        cutoff--;
×
513
                    }
514

515
                    _current = new(remaining.Offset, cutoff);
×
516
                    _remaining = new(remaining.Offset + idx + 1, remaining.Length - idx - 1);
×
517
                }
518
                else
519
                {
520
                    // We've reached EOF, but we still need to return 'true' for this final
521
                    // iteration so that the caller can query the Current property once more.
522
                    _current = new(remaining.Offset, remaining.Length);
×
523
                    _remaining = default;
×
524
                }
525

526
                return true;
×
527
            }
528

529
            return false;
×
530
        }
531

532
        readonly object IEnumerator.Current => Current;
×
533
        readonly void IEnumerator.Reset() => throw new NotSupportedException();
×
534
        readonly void IDisposable.Dispose() { }
×
535
    }
536
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc