• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5955483049

23 Aug 2023 07:11PM UTC coverage: 20.597% (-0.8%) from 21.364%
5955483049

push

github

neon-sunset
perf: additional methods specializations on enumerators to avoid unnecessary overhead

133 of 910 branches covered (14.62%)

Branch coverage included in aggregate %.

84 of 84 new or added lines in 4 files covered. (100.0%)

481 of 2071 relevant lines covered (23.23%)

39801.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

49.77
/src/U8String.Enumeration.cs
1
using System.Buffers;
2
using System.Collections;
3
using System.Diagnostics;
4
using System.Runtime.InteropServices;
5
using System.Text;
6
using U8Primitives.Abstractions;
7

8
using Rune = System.Text.Rune;
9

10
namespace U8Primitives;
11

12
#pragma warning disable IDE0032, IDE0057 // Use auto property and index operator. Why: Perf, struct layout, accuracy and codegen.
13
public readonly partial struct U8String
14
{
15
    /// <summary>
16
    /// Returns a collection of <see cref="char"/>s over the provided string.
17
    /// </summary>
18
    /// <remarks>
19
    /// This is a lazily-evaluated allocation-free collection.
20
    /// </remarks>
21
    public U8Chars Chars
22
    {
23
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
24
        get => new(this);
30✔
25
    }
26

27
    /// <summary>
28
    /// Returns a collection of <see cref="Rune"/>s over the provided string.
29
    /// </summary>
30
    /// <remarks>
31
    /// This is a lazily-evaluated allocation-free collection.
32
    /// </remarks>
33
    public U8Runes Runes
34
    {
35
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
36
        get => new(this);
30✔
37
    }
38

39
    /// <summary>
40
    /// Returns a collection of lines over the provided string.
41
    /// </summary>
42
    /// <remarks>
43
    /// This is a lazily-evaluated allocation-free collection.
44
    /// </remarks>
45
    public U8Lines Lines
46
    {
47
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
48
        get => new(this);
×
49
    }
50

51
    // Bad codegen still :(
52
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
53
    public Enumerator GetEnumerator() => new(this);
12✔
54

55
    IEnumerator<byte> IEnumerable<byte>.GetEnumerator() => GetEnumerator();
×
56
    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
57

58
    public struct Enumerator : IEnumerator<byte>
59
    {
60
        readonly byte[]? _value;
61
        readonly int _offset;
62
        readonly int _length;
63
        int _index;
64

65
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
66
        public Enumerator(U8String value)
67
        {
68
            _value = value._value;
12✔
69
            _offset = value.Offset;
12✔
70
            _length = value.Length;
12✔
71
            _index = -1;
12✔
72
        }
12✔
73

74
        // Still cheaper than MemoryMarshal clever variants
75
        public readonly byte Current => _value![_offset + _index];
3,138✔
76

77
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
78
        public bool MoveNext() => (uint)(++_index) < (uint)_length;
3,156✔
79
        // {
80
        //     var index = _index;
81
        //     if (++index < _length)
82
        //     {
83
        //         // Current = Unsafe.Add(
84
        //         //     ref MemoryMarshal.GetArrayDataReference(_value!),
85
        //         //     (nint)(uint)(_offset + index));
86
        //         Current = _value![_offset + index];
87
        //         _index = index;
88
        //         return true;
89
        //     }
90

91
        //     return false;
92
        // }
93

94
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
95
        public void Reset() => _index = -1;
×
96

97
        readonly object IEnumerator.Current => Current;
1,569✔
98
        readonly void IDisposable.Dispose() { }
6✔
99
    }
100
}
101

102
/// <summary>
103
/// A collection of chars in a provided <see cref="U8String"/>.
104
/// </summary>
105
public struct U8Chars : ICollection<char>, IEnumerable<char, U8Chars.Enumerator>
106
{
107
    readonly U8String _value;
108

109
    int _count;
110

111
    public U8Chars(U8String value)
112
    {
113
        _value = value;
30✔
114
        _count = value.IsEmpty ? 0 : -1;
30✔
115
    }
30✔
116

117
    /// <summary>
118
    /// The number of chars in the current <see cref="U8String"/>.
119
    /// </summary>
120
    public int Count
121
    {
122
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
123
        get
124
        {
125
            // Somehow the codegen here is underwhelming
126
            var count = _count;
23✔
127
            if (count >= 0)
23✔
128
            {
129
                return count;
8✔
130
            }
131
            return _count = Count(_value.UnsafeSpan);
15✔
132

133
            static int Count(ReadOnlySpan<byte> value)
134
            {
135
                Debug.Assert(!value.IsEmpty);
136

137
                // TODO: Is this enough?
138
                return Encoding.UTF8.GetCharCount(value);
15✔
139
            }
140
        }
141
    }
142

143
    // TODO: Wow, this seems to be terribly broken on surrogate chars and 
144
    // there is no easy way to fix it without sacrificing performance.
145
    // Perhaps it is worth just do the transcoding iteration here and warn the users
146
    // instead of straight up producing UB or throwing exceptions???
147
    public readonly bool Contains(char item) => _value.Contains(item);
1,402✔
148

149
    public readonly void CopyTo(char[] destination, int index)
150
    {
151
        var value = _value;
×
152
        if (!value.IsEmpty)
×
153
        {
154
            Encoding.UTF8.GetChars(value.UnsafeSpan, destination.AsSpan()[index..]);
×
155
        }
156
    }
×
157

158
    public readonly void Deconstruct(out char first, out char second)
159
    {
160
        this.Deconstruct<U8Chars, Enumerator, char>(out first, out second);
×
161
    }
×
162

163
    public readonly void Deconstruct(out char first, out char second, out char third)
164
    {
165
        this.Deconstruct<U8Chars, Enumerator, char>(out first, out second, out third);
×
166
    }
×
167

168
    public readonly char ElementAt(int index)
169
    {
170
        return this.ElementAt<U8Chars, Enumerator, char>(index);
×
171
    }
172

173
    public readonly char ElementAtOrDefault(int index)
174
    {
175
        return this.ElementAtOrDefault<U8Chars, Enumerator, char>(index);
×
176
    }
177

178
    public char[] ToArray()
179
    {
180
        var value = _value;
6✔
181
        if (!value.IsEmpty)
6✔
182
        {
183
            var chars = new char[Count];
5✔
184
            Encoding.UTF8.GetChars(value.UnsafeSpan, chars);
5✔
185
            return chars;
5✔
186
        }
187

188
        return Array.Empty<char>();
1✔
189
    }
190

191
    public List<char> ToList()
192
    {
193
        var value = _value;
×
194
        if (!value.IsEmpty)
×
195
        {
196
            var count = Count;
×
197
            var chars = new List<char>(count);
×
198
            CollectionsMarshal.SetCount(chars, count);
×
199
            var span = CollectionsMarshal.AsSpan(chars);
×
200

201
            Encoding.UTF8.GetChars(value.UnsafeSpan, span);
×
202
            return chars;
×
203
        }
204

205
        return new List<char>();
×
206
    }
207

208
    public readonly Enumerator GetEnumerator() => new(_value);
6✔
209

210
    readonly IEnumerator<char> IEnumerable<char>.GetEnumerator() => new Enumerator(_value);
×
211
    readonly IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_value);
6✔
212

213
    public struct Enumerator : IEnumerator<char>
214
    {
215
        // TODO: refactor layout
216
        readonly byte[]? _value;
217
        readonly int _offset;
218
        readonly int _length;
219
        int _nextByteIdx;
220
        uint _currentCharPair;
221

222
        public Enumerator(U8String value)
223
        {
224
            _value = value._value;
12✔
225
            _offset = value.Offset;
12✔
226
            _length = value.Length;
12✔
227
            _nextByteIdx = 0;
12✔
228
        }
12✔
229

230
        // TODO
231
        public readonly char Current => (char)_currentCharPair;
1,402✔
232

233
        // TODO: This looks terrible, there must be a better way
234
        // to convert UTF-8 to UTF-16 with an enumerator.
235
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
236
        public bool MoveNext()
237
        {
238
            var (offset, length, nextByteIdx, currentCharPair) =
1,420✔
239
                (_offset, _length, _nextByteIdx, _currentCharPair);
1,420✔
240

241
            if (currentCharPair < char.MaxValue)
1,420✔
242
            {
243
                if ((uint)nextByteIdx < (uint)length)
1,302✔
244
                {
245
                    var span = _value!.SliceUnsafe(offset + nextByteIdx, length - nextByteIdx);
1,284✔
246
                    var firstByte = MemoryMarshal.GetReference(span);
1,284✔
247
                    if (U8Info.IsAsciiByte(firstByte))
1,284✔
248
                    {
249
                        // Fast path because Rune.DecodeFromUtf8 won't inline
250
                        // making UTF-8 push us more and more towards anglocentrism.
251
                        _nextByteIdx = nextByteIdx + 1;
314✔
252
                        _currentCharPair = firstByte;
314✔
253
                        return true;
314✔
254
                    }
255

256
                    var status = Rune.DecodeFromUtf8(span, out var rune, out var bytesConsumed);
970✔
257
                    Debug.Assert(status is OperationStatus.Done);
258

259
                    _nextByteIdx = nextByteIdx + bytesConsumed;
970✔
260

261
                    if (rune.IsBmp)
970✔
262
                    {
263
                        _currentCharPair = (uint)rune.Value;
852✔
264
                        return true;
852✔
265
                    }
266

267
                    // I wonder if this just explodes on BigEndian
268
                    var runeValue = (uint)rune.Value;
118✔
269
                    var highSurrogate = (char)((runeValue + ((0xD800u - 0x40u) << 10)) >> 10);
118✔
270
                    var lowSurrogate = (char)((runeValue & 0x3FFu) + 0xDC00u);
118✔
271
                    _currentCharPair = highSurrogate + ((uint)lowSurrogate << 16);
118✔
272
                    return true;
118✔
273
                }
274

275
                return false;
18✔
276
            }
277

278
            _currentCharPair = currentCharPair >> 16;
118✔
279
            return true;
118✔
280
        }
281

282
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
283
        public void Reset() => _nextByteIdx = 0;
×
284

285
        readonly object IEnumerator.Current => Current;
701✔
286
        readonly void IDisposable.Dispose() { }
6✔
287
    }
288

289
    readonly bool ICollection<char>.IsReadOnly => true;
×
290
    readonly void ICollection<char>.Add(char item) => throw new NotSupportedException();
×
291
    readonly void ICollection<char>.Clear() => throw new NotSupportedException();
×
292
    readonly bool ICollection<char>.Remove(char item) => throw new NotSupportedException();
×
293
}
294

295
/// <summary>
296
/// A collection of Runes (unicode scalar values) in a provided <see cref="U8String"/>.
297
/// </summary>
298
public struct U8Runes : ICollection<Rune>, IEnumerable<Rune, U8Runes.Enumerator>
299
{
300
    readonly U8String _value;
301

302
    // If we bring up non-ascii counting to ascii level, we might not need this
303
    // similar to LineCollection.
304
    int _count;
305

306
    public U8Runes(U8String value)
307
    {
308
        _value = value;
30✔
309
        _count = value.IsEmpty ? 0 : -1;
30✔
310
    }
30✔
311

312
    /// <summary>
313
    /// The number of Runes (unicode scalar values) in the current <see cref="U8String"/>.
314
    /// </summary>
315
    public int Count
316
    {
317
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
318
        get
319
        {
320
            // Somehow the codegen here is underwhelming
321
            var count = _count;
24✔
322
            if (count >= 0)
24✔
323
            {
324
                return count;
9✔
325
            }
326

327
            return _count = U8Searching.CountRunes(ref _value.UnsafeRef, (uint)_value.Length);
15✔
328
        }
329
    }
330

331
    public readonly bool Contains(Rune item) => _value.Contains(item);
642✔
332

333
    public readonly void CopyTo(Rune[] destination, int index)
334
    {
335
        // TODO: Simple SIMD widen ASCII to UTF-32 (ideally widen+validate in place instead of double traversal)
336
        // TODO: Consistency and correctness? Implement single-pass vectorized conversion?
337
        foreach (var rune in this)
×
338
        {
339
            destination[index++] = rune;
×
340
        }
341
    }
×
342

343
    public readonly void Deconstruct(out Rune first, out Rune second)
344
    {
345
        this.Deconstruct<U8Runes, Enumerator, Rune>(out first, out second);
×
346
    }
×
347

348
    public readonly void Deconstruct(out Rune first, out Rune second, out Rune third)
349
    {
350
        this.Deconstruct<U8Runes, Enumerator, Rune>(out first, out second, out third);
×
351
    }
×
352

353
    public readonly Rune ElementAt(int index)
354
    {
355
        return this.ElementAt<U8Runes, Enumerator, Rune>(index);
×
356
    }
357

358
    public readonly Rune ElementAtOrDefault(int index)
359
    {
360
        return this.ElementAtOrDefault<U8Runes, Enumerator, Rune>(index);
×
361
    }
362

363
    public Rune[] ToArray() => this.ToArray<U8Runes, Enumerator, Rune>();
6✔
364

365
    public List<Rune> ToList() => this.ToList<U8Runes, Enumerator, Rune>();
×
366

367
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
368
    public readonly Enumerator GetEnumerator() => new(_value);
659✔
369

370
    readonly IEnumerator<Rune> IEnumerable<Rune>.GetEnumerator() => GetEnumerator();
642✔
371
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
372

373
    public struct Enumerator : IEnumerator<Rune>
374
    {
375
        readonly byte[]? _value;
376
        readonly int _offset;
377
        readonly int _length;
378
        int _index;
379

380
        public Enumerator(U8String value)
381
        {
382
            _value = value._value;
659✔
383
            _offset = value.Offset;
659✔
384
            _length = value.Length;
659✔
385
        }
659✔
386

387
        public Rune Current { get; private set; }
388

389
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
390
        public bool MoveNext()
391
        {
392
            var index = _index;
55,597✔
393
            if (index < _length)
55,597✔
394
            {
395
                ref var ptr = ref MemoryMarshal
55,574✔
396
                    .GetArrayDataReference(_value!)
55,574✔
397
                    .Add(_offset + index);
55,574✔
398

399
                Current = U8Conversions.CodepointToRune(ref ptr, out var size);
55,574✔
400
                _index = index + size;
55,574✔
401
                return true;
55,574✔
402
            }
403

404
            return false;
23✔
405
        }
406

407
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
408
        public void Reset() => _index = -1;
×
409

410
        readonly object IEnumerator.Current => Current;
642✔
411
        readonly void IDisposable.Dispose() { }
653✔
412
    }
413

414
    readonly bool ICollection<Rune>.IsReadOnly => true;
×
415
    readonly void ICollection<Rune>.Add(Rune item) => throw new NotImplementedException();
×
416
    readonly void ICollection<Rune>.Clear() => throw new NotImplementedException();
×
417
    readonly bool ICollection<Rune>.Remove(Rune item) => throw new NotImplementedException();
×
418
}
419

420
/// <summary>
421
/// A collection of lines in a provided <see cref="U8String"/>.
422
/// </summary>
423
public struct U8Lines : ICollection<U8String>, IU8Enumerable<U8Lines.Enumerator>
424
{
425
    readonly U8String _value;
426

427
    // We might not need this. Although counting is O(n), the absolute performance
428
    // is very good, and on AVX2/512 - it's basically instantenous.
429
    int _count;
430

431
    /// <summary>
432
    /// Creates a new line enumeration over the provided string.
433
    /// </summary>
434
    /// <param name="value">The string to enumerate over.</param>
435
    public U8Lines(U8String value)
436
    {
437
        _value = value;
×
438
        _count = value.IsEmpty ? 0 : -1;
×
439
    }
×
440

441
    /// <summary>
442
    /// The number of lines in the current <see cref="U8String"/>.
443
    /// </summary>
444
    public int Count
445
    {
446
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
447
        get
448
        {
449
            var count = _count;
×
450
            if (count >= 0)
×
451
            {
452
                return count;
×
453
            }
454

455
            // Matches the behavior of string.Split('\n').Length for "hello\n"
456
            // TODO: Should we break consistency and not count the very last segment if it is empty?
457
            // (likely no - an empty line is still a line)
458
            return _count = _value.UnsafeSpan.Count((byte)'\n') + 1;
×
459
        }
460
    }
461

462
    public readonly bool Contains(U8String item)
463
    {
464
        return !item.Contains((byte)'\n') && _value.Contains(item);
×
465
    }
466

467
    public void CopyTo(U8String[] destination, int index)
468
    {
469
        this.CopyTo<U8Lines, Enumerator, U8String>(destination.AsSpan()[index..]);
×
470
    }
×
471

472
    public readonly void Deconstruct(out U8String first, out U8String second)
473
    {
474
        this.Deconstruct<U8Lines, Enumerator, U8String>(out first, out second);
×
475
    }
×
476

477
    public readonly void Deconstruct(out U8String first, out U8String second, out U8String third)
478
    {
479
        this.Deconstruct<U8Lines, Enumerator, U8String>(out first, out second, out third);
×
480
    }
×
481

482
    public readonly U8String ElementAt(int index)
483
    {
484
        return this.ElementAt<U8Lines, Enumerator, U8String>(index);
×
485
    }
486

487
    public readonly U8String ElementAtOrDefault(int index)
488
    {
489
        return this.ElementAtOrDefault<U8Lines, Enumerator, U8String>(index);
×
490
    }
491

492
    public U8String[] ToArray() => this.ToArray<U8Lines, Enumerator, U8String>();
×
493
    public List<U8String> ToList() => this.ToList<U8Lines, Enumerator, U8String>();
×
494

495
    /// <summary>
496
    /// Returns a <see cref="Enumerator"/> over the provided string.
497
    /// </summary>
498
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
499
    public readonly Enumerator GetEnumerator() => new(_value);
×
500

501
    readonly IEnumerator<U8String> IEnumerable<U8String>.GetEnumerator() => GetEnumerator();
×
502
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
×
503

504
    readonly bool ICollection<U8String>.IsReadOnly => true;
×
505
    readonly void ICollection<U8String>.Add(U8String item) => throw new NotSupportedException();
×
506
    readonly void ICollection<U8String>.Clear() => throw new NotSupportedException();
×
507
    readonly bool ICollection<U8String>.Remove(U8String item) => throw new NotSupportedException();
×
508

509
    /// <summary>
510
    /// A struct that enumerates lines over a string.
511
    /// </summary>
512
    public struct Enumerator : IU8Enumerator
513
    {
514
        // TODO 1: Ensure this is aligned with Rust's .lines() implementation, or not?
515
        // private static readonly SearchValues<byte> NewLine = SearchValues.Create("\r\n"u8);
516
        // TODO 2: Consider using 'InnerOffsets'
517
        private readonly byte[]? _value;
518
        private U8Range _remaining;
519
        private U8Range _current;
520

521
        /// <summary>
522
        /// Creates a new line enumerator over the provided string.
523
        /// </summary>
524
        /// <param name="value">The string to enumerate over.</param>
525
        public Enumerator(U8String value)
526
        {
527
            _value = value._value;
×
528
            _remaining = value._inner;
×
529
        }
×
530

531
        /// <summary>
532
        /// Returns the current line.
533
        /// </summary>
534
        public readonly U8String Current => new(_value, _current.Offset, _current.Length);
×
535

536
        /// <summary>
537
        /// Advances the enumerator to the next line.
538
        /// </summary>
539
        [MethodImpl(MethodImplOptions.AggressiveInlining)] // Surprisingly smaller codegen than when not inlined
540
        public bool MoveNext()
541
        {
542
            var remaining = _remaining;
×
543
            if (remaining.Length > 0)
×
544
            {
545
                var span = _value!.SliceUnsafe(remaining.Offset, remaining.Length);
×
546
                var idx = span.IndexOf((byte)'\n');
×
547

548
                if ((uint)idx < (uint)span.Length)
×
549
                {
550
                    var cutoff = idx;
×
551
                    if (idx > 0 && span.AsRef(idx - 1) is (byte)'\r')
×
552
                    {
553
                        cutoff--;
×
554
                    }
555

556
                    _current = new(remaining.Offset, cutoff);
×
557
                    _remaining = new(remaining.Offset + idx + 1, remaining.Length - idx - 1);
×
558
                }
559
                else
560
                {
561
                    // We've reached EOF, but we still need to return 'true' for this final
562
                    // iteration so that the caller can query the Current property once more.
563
                    _current = new(remaining.Offset, remaining.Length);
×
564
                    _remaining = default;
×
565
                }
566

567
                return true;
×
568
            }
569

570
            return false;
×
571
        }
572

573
        readonly object IEnumerator.Current => Current;
×
574
        readonly void IEnumerator.Reset() => throw new NotSupportedException();
×
575
        readonly void IDisposable.Dispose() { }
×
576
    }
577
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc