• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5912546951

19 Aug 2023 05:00PM UTC coverage: 22.685%. Remained the same
5912546951

push

github

neon-sunset
perf: don't forcibly inline enumerator constructors - let the compiler figure it out to prevent codegen regressions when hitting inlining budget

122 of 776 branches covered (15.72%)

Branch coverage included in aggregate %.

1 of 1 new or added line in 1 file covered. (100.0%)

439 of 1697 relevant lines covered (25.87%)

30109.08 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

52.29
/src/U8String.Enumeration.cs
1
using System.Buffers;
2
using System.Collections;
3
using System.Diagnostics;
4
using System.Runtime.InteropServices;
5
using System.Text;
6
using U8Primitives.Abstractions;
7

8
using Rune = System.Text.Rune;
9

10
namespace U8Primitives;
11

12
#pragma warning disable IDE0032, IDE0057 // Use auto property and index operator. Why: Perf, struct layout, accuracy and codegen.
13
public readonly partial struct U8String
14
{
15
    /// <summary>
16
    /// Returns a collection of <see cref="char"/>s over the provided string.
17
    /// </summary>
18
    /// <remarks>
19
    /// This is a lazily-evaluated allocation-free collection.
20
    /// </remarks>
21
    public U8Chars Chars
22
    {
23
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
24
        get => new(this);
24✔
25
    }
26

27
    /// <summary>
28
    /// Returns a collection of <see cref="Rune"/>s over the provided string.
29
    /// </summary>
30
    /// <remarks>
31
    /// This is a lazily-evaluated allocation-free collection.
32
    /// </remarks>
33
    public U8Runes Runes
34
    {
35
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
36
        get => new(this);
30✔
37
    }
38

39
    /// <summary>
40
    /// Returns a collection of lines over the provided string.
41
    /// </summary>
42
    /// <remarks>
43
    /// This is a lazily-evaluated allocation-free collection.
44
    /// </remarks>
45
    public U8Lines Lines
46
    {
47
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
48
        get => new(this);
×
49
    }
50

51
    // Bad codegen still :(
52
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
53
    public Enumerator GetEnumerator() => new(this);
12✔
54

55
    IEnumerator<byte> IEnumerable<byte>.GetEnumerator() => GetEnumerator();
×
56
    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
57

58
    public struct Enumerator : IEnumerator<byte>
59
    {
60
        readonly byte[]? _value;
61
        readonly int _offset;
62
        readonly int _length;
63
        int _index;
64

65
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
66
        public Enumerator(U8String value)
67
        {
68
            _value = value._value;
12✔
69
            _offset = value.Offset;
12✔
70
            _length = value.Length;
12✔
71
            _index = -1;
12✔
72
        }
12✔
73

74
        // Still cheaper than MemoryMarshal clever variants
75
        public readonly byte Current => _value![_offset + _index];
3,138✔
76

77
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
78
        public bool MoveNext() => (uint)(++_index) < (uint)_length;
3,156✔
79
        // {
80
        //     var index = _index;
81
        //     if (++index < _length)
82
        //     {
83
        //         // Current = Unsafe.Add(
84
        //         //     ref MemoryMarshal.GetArrayDataReference(_value!),
85
        //         //     (nint)(uint)(_offset + index));
86
        //         Current = _value![_offset + index];
87
        //         _index = index;
88
        //         return true;
89
        //     }
90

91
        //     return false;
92
        // }
93

94
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
95
        public void Reset() => _index = -1;
×
96

97
        readonly object IEnumerator.Current => Current;
1,569✔
98
        readonly void IDisposable.Dispose() { }
6✔
99
    }
100
}
101

102
/// <summary>
103
/// A collection of chars in a provided <see cref="U8String"/>.
104
/// </summary>
105
public struct U8Chars : ICollection<char>, IEnumerable<char, U8Chars.Enumerator>
106
{
107
    readonly U8String _value;
108

109
    int _count;
110

111
    public U8Chars(U8String value)
112
    {
113
        _value = value;
24✔
114
        _count = value.IsEmpty ? 0 : -1;
24✔
115
    }
24✔
116

117
    /// <summary>
118
    /// The number of chars in the current <see cref="U8String"/>.
119
    /// </summary>
120
    public int Count
121
    {
122
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
123
        get
124
        {
125
            // Somehow the codegen here is underwhelming
126
            var count = _count;
23✔
127
            if (count >= 0)
23✔
128
            {
129
                return count;
8✔
130
            }
131
            return _count = Count(_value.UnsafeSpan);
15✔
132

133
            static int Count(ReadOnlySpan<byte> value)
134
            {
135
                Debug.Assert(!value.IsEmpty);
136

137
                // TODO: Is this enough?
138
                return Encoding.UTF8.GetCharCount(value);
15✔
139
            }
140
        }
141
    }
142

143
    // TODO: Wow, this seems to be terribly broken on surrogate chars and 
144
    // there is no easy way to fix it without sacrificing performance.
145
    // Perhaps it is worth just do the transcoding iteration here and warn the users
146
    // instead of straight up producing UB or throwing exceptions???
147
    public readonly bool Contains(char item) => _value.Contains(item);
×
148

149
    public readonly void CopyTo(char[] destination, int index)
150
    {
151
        var value = _value;
×
152
        if (!value.IsEmpty)
×
153
        {
154
            Encoding.UTF8.GetChars(value.UnsafeSpan, destination.AsSpan()[index..]);
×
155
        }
156
    }
×
157

158
    public readonly void Deconstruct(out char first, out char second)
159
    {
160
        this.Deconstruct<U8Chars, Enumerator, char>(out first, out second);
×
161
    }
×
162

163
    public readonly void Deconstruct(out char first, out char second, out char third)
164
    {
165
        this.Deconstruct<U8Chars, Enumerator, char>(out first, out second, out third);
×
166
    }
×
167

168
    public char[] ToArray()
169
    {
170
        var value = _value;
6✔
171
        if (!value.IsEmpty)
6✔
172
        {
173
            var chars = new char[Count];
5✔
174
            Encoding.UTF8.GetChars(value.UnsafeSpan, chars);
5✔
175
            return chars;
5✔
176
        }
177

178
        return Array.Empty<char>();
1✔
179
    }
180

181
    public List<char> ToList()
182
    {
183
        var value = _value;
×
184
        if (!value.IsEmpty)
×
185
        {
186
            var count = Count;
×
187
            var chars = new List<char>(count);
×
188
            CollectionsMarshal.SetCount(chars, count);
×
189
            var span = CollectionsMarshal.AsSpan(chars);
×
190

191
            Encoding.UTF8.GetChars(value.UnsafeSpan, span);
×
192
            return chars;
×
193
        }
194

195
        return new List<char>();
×
196
    }
197

198
    public readonly Enumerator GetEnumerator() => new(_value);
6✔
199

200
    readonly IEnumerator<char> IEnumerable<char>.GetEnumerator() => new Enumerator(_value);
×
201
    readonly IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_value);
6✔
202

203
    public struct Enumerator : IEnumerator<char>
204
    {
205
        // TODO: refactor layout
206
        readonly byte[]? _value;
207
        readonly int _offset;
208
        readonly int _length;
209
        int _nextByteIdx;
210
        uint _currentCharPair;
211

212
        public Enumerator(U8String value)
213
        {
214
            _value = value._value;
12✔
215
            _offset = value.Offset;
12✔
216
            _length = value.Length;
12✔
217
            _nextByteIdx = 0;
12✔
218
        }
12✔
219

220
        // TODO
221
        public readonly char Current => (char)_currentCharPair;
1,402✔
222

223
        // TODO: This looks terrible, there must be a better way
224
        // to convert UTF-8 to UTF-16 with an enumerator.
225
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
226
        public bool MoveNext()
227
        {
228
            var (offset, length, nextByteIdx, currentCharPair) =
1,420✔
229
                (_offset, _length, _nextByteIdx, _currentCharPair);
1,420✔
230

231
            if (currentCharPair < char.MaxValue)
1,420✔
232
            {
233
                if ((uint)nextByteIdx < (uint)length)
1,302✔
234
                {
235
                    var span = _value!.SliceUnsafe(offset + nextByteIdx, length - nextByteIdx);
1,284✔
236
                    var firstByte = MemoryMarshal.GetReference(span);
1,284✔
237
                    if (U8Info.IsAsciiByte(firstByte))
1,284✔
238
                    {
239
                        // Fast path because Rune.DecodeFromUtf8 won't inline
240
                        // making UTF-8 push us more and more towards anglocentrism.
241
                        _nextByteIdx = nextByteIdx + 1;
314✔
242
                        _currentCharPair = firstByte;
314✔
243
                        return true;
314✔
244
                    }
245

246
                    var status = Rune.DecodeFromUtf8(span, out var rune, out var bytesConsumed);
970✔
247
                    Debug.Assert(status is OperationStatus.Done);
248

249
                    _nextByteIdx = nextByteIdx + bytesConsumed;
970✔
250

251
                    if (rune.IsBmp)
970✔
252
                    {
253
                        _currentCharPair = (uint)rune.Value;
852✔
254
                        return true;
852✔
255
                    }
256

257
                    // I wonder if this just explodes on BigEndian
258
                    var runeValue = (uint)rune.Value;
118✔
259
                    var highSurrogate = (char)((runeValue + ((0xD800u - 0x40u) << 10)) >> 10);
118✔
260
                    var lowSurrogate = (char)((runeValue & 0x3FFu) + 0xDC00u);
118✔
261
                    _currentCharPair = highSurrogate + ((uint)lowSurrogate << 16);
118✔
262
                    return true;
118✔
263
                }
264

265
                return false;
18✔
266
            }
267

268
            _currentCharPair = currentCharPair >> 16;
118✔
269
            return true;
118✔
270
        }
271

272
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
273
        public void Reset() => _nextByteIdx = 0;
×
274

275
        readonly object IEnumerator.Current => Current;
701✔
276
        readonly void IDisposable.Dispose() { }
6✔
277
    }
278

279
    readonly bool ICollection<char>.IsReadOnly => true;
×
280
    readonly void ICollection<char>.Add(char item) => throw new NotSupportedException();
×
281
    readonly void ICollection<char>.Clear() => throw new NotSupportedException();
×
282
    readonly bool ICollection<char>.Remove(char item) => throw new NotSupportedException();
×
283
}
284

285
/// <summary>
286
/// A collection of Runes (unicode scalar values) in a provided <see cref="U8String"/>.
287
/// </summary>
288
public struct U8Runes : ICollection<Rune>, IEnumerable<Rune, U8Runes.Enumerator>
289
{
290
    readonly U8String _value;
291

292
    // If we bring up non-ascii counting to ascii level, we might not need this
293
    // similar to LineCollection.
294
    int _count;
295

296
    public U8Runes(U8String value)
297
    {
298
        _value = value;
30✔
299
        _count = value.IsEmpty ? 0 : -1;
30✔
300
    }
30✔
301

302
    /// <summary>
303
    /// The number of Runes (unicode scalar values) in the current <see cref="U8String"/>.
304
    /// </summary>
305
    public int Count
306
    {
307
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
308
        get
309
        {
310
            // Somehow the codegen here is underwhelming
311
            var count = _count;
24✔
312
            if (count >= 0)
24✔
313
            {
314
                return count;
9✔
315
            }
316

317
            return _count = Count(_value.UnsafeSpan);
15✔
318

319
            static int Count(ReadOnlySpan<byte> value)
320
            {
321
                Debug.Assert(!value.IsEmpty);
322

323
                // TODO: SIMD non-continuation byte counting
324
                var runeCount = (int)(nint)Polyfills.Text.Ascii.GetIndexOfFirstNonAsciiByte(value);
15✔
325
                value = value.SliceUnsafe(runeCount);
15✔
326

327
                for (var i = 0; (uint)i < (uint)value.Length; i += U8Info.RuneLength(value.AsRef(i)))
2,940✔
328
                {
329
                    runeCount++;
1,455✔
330
                }
331

332
                return runeCount;
15✔
333
            }
334
        }
335
    }
336

337
    public readonly bool Contains(Rune item) => _value.Contains(item);
642✔
338

339
    public readonly void CopyTo(Rune[] destination, int index)
340
    {
341
        // TODO: Simple SIMD widen ASCII to UTF-32 (ideally widen+validate in place instead of double traversal)
342
        // TODO: Consistency and correctness? Implement single-pass vectorized conversion?
343
        foreach (var rune in this)
×
344
        {
345
            destination[index++] = rune;
×
346
        }
347
    }
×
348

349
    public readonly void Deconstruct(out Rune first, out Rune second)
350
    {
351
        this.Deconstruct<U8Runes, Enumerator, Rune>(out first, out second);
×
352
    }
×
353

354
    public readonly void Deconstruct(out Rune first, out Rune second, out Rune third)
355
    {
356
        this.Deconstruct<U8Runes, Enumerator, Rune>(out first, out second, out third);
×
357
    }
×
358

359
    public Rune[] ToArray() => this.ToArray<U8Runes, Enumerator, Rune>();
6✔
360

361
    public List<Rune> ToList() => this.ToList<U8Runes, Enumerator, Rune>();
×
362

363
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
364
    public readonly Enumerator GetEnumerator() => new(_value);
659✔
365

366
    readonly IEnumerator<Rune> IEnumerable<Rune>.GetEnumerator() => GetEnumerator();
642✔
367
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
368

369
    public struct Enumerator : IEnumerator<Rune>
370
    {
371
        readonly byte[]? _value;
372
        readonly int _offset;
373
        readonly int _length;
374
        int _index;
375

376
        public Enumerator(U8String value)
377
        {
378
            _value = value._value;
659✔
379
            _offset = value.Offset;
659✔
380
            _length = value.Length;
659✔
381
        }
659✔
382

383
        public Rune Current { get; private set; }
384

385
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
386
        public bool MoveNext()
387
        {
388
            var index = _index;
55,597✔
389
            if (index < _length)
55,597✔
390
            {
391
                ref var ptr = ref MemoryMarshal
55,574✔
392
                    .GetArrayDataReference(_value!)
55,574✔
393
                    .Add(_offset + index);
55,574✔
394

395
                Current = U8Conversions.CodepointToRune(ref ptr, out var size);
55,574✔
396
                _index = index + size;
55,574✔
397
                return true;
55,574✔
398
            }
399

400
            return false;
23✔
401
        }
402

403
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
404
        public void Reset() => _index = -1;
×
405

406
        readonly object IEnumerator.Current => Current;
642✔
407
        readonly void IDisposable.Dispose() { }
653✔
408
    }
409

410
    readonly bool ICollection<Rune>.IsReadOnly => true;
×
411
    readonly void ICollection<Rune>.Add(Rune item) => throw new NotImplementedException();
×
412
    readonly void ICollection<Rune>.Clear() => throw new NotImplementedException();
×
413
    readonly bool ICollection<Rune>.Remove(Rune item) => throw new NotImplementedException();
×
414
}
415

416
/// <summary>
417
/// A collection of lines in a provided <see cref="U8String"/>.
418
/// </summary>
419
public struct U8Lines : ICollection<U8String>, IU8Enumerable<U8Lines.Enumerator>
420
{
421
    readonly U8String _value;
422

423
    // We might not need this. Although counting is O(n), the absolute performance
424
    // is very good, and on AVX2/512 - it's basically instantenous.
425
    int _count;
426

427
    /// <summary>
428
    /// Creates a new line enumeration over the provided string.
429
    /// </summary>
430
    /// <param name="value">The string to enumerate over.</param>
431
    public U8Lines(U8String value)
432
    {
433
        _value = value;
×
434
        _count = value.IsEmpty ? 0 : -1;
×
435
    }
×
436

437
    /// <summary>
438
    /// The number of lines in the current <see cref="U8String"/>.
439
    /// </summary>
440
    public int Count
441
    {
442
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
443
        get
444
        {
445
            var count = _count;
×
446
            if (count >= 0)
×
447
            {
448
                return count;
×
449
            }
450

451
            // Matches the behavior of string.Split('\n').Length for "hello\n"
452
            // TODO: Should we break consistency and not count the very last segment if it is empty?
453
            // (likely no - an empty line is still a line)
454
            return _count = _value.UnsafeSpan.Count((byte)'\n') + 1;
×
455
        }
456
    }
457

458
    public readonly bool Contains(U8String item)
459
    {
460
        return !item.Contains((byte)'\n') && _value.Contains(item);
×
461
    }
462

463
    public void CopyTo(U8String[] destination, int index)
464
    {
465
        this.CopyTo<U8Lines, Enumerator, U8String>(destination.AsSpan()[index..]);
×
466
    }
×
467

468
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
469
    public readonly void Deconstruct(out U8String first, out U8String second)
470
    {
471
        this.Deconstruct<U8Lines, Enumerator, U8String>(out first, out second);
×
472
    }
×
473

474
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
475
    public readonly void Deconstruct(out U8String first, out U8String second, out U8String third)
476
    {
477
        this.Deconstruct<U8Lines, Enumerator, U8String>(out first, out second, out third);
×
478
    }
×
479

480
    public U8String[] ToArray() => this.ToArray<U8Lines, Enumerator, U8String>();
×
481
    public List<U8String> ToList() => this.ToList<U8Lines, Enumerator, U8String>();
×
482

483
    /// <summary>
484
    /// Returns a <see cref="Enumerator"/> over the provided string.
485
    /// </summary>
486
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
487
    public readonly Enumerator GetEnumerator() => new(_value);
×
488

489
    readonly IEnumerator<U8String> IEnumerable<U8String>.GetEnumerator() => GetEnumerator();
×
490
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
×
491

492
    readonly bool ICollection<U8String>.IsReadOnly => true;
×
493
    readonly void ICollection<U8String>.Add(U8String item) => throw new NotSupportedException();
×
494
    readonly void ICollection<U8String>.Clear() => throw new NotSupportedException();
×
495
    readonly bool ICollection<U8String>.Remove(U8String item) => throw new NotSupportedException();
×
496

497
    /// <summary>
498
    /// A struct that enumerates lines over a string.
499
    /// </summary>
500
    public struct Enumerator : IU8Enumerator
501
    {
502
        // TODO 1: Ensure this is aligned with Rust's .lines() implementation, or not?
503
        // private static readonly SearchValues<byte> NewLine = SearchValues.Create("\r\n"u8);
504
        // TODO 2: Consider using 'InnerOffsets'
505
        private readonly byte[]? _value;
506
        private U8Range _remaining;
507
        private U8Range _current;
508

509
        /// <summary>
510
        /// Creates a new line enumerator over the provided string.
511
        /// </summary>
512
        /// <param name="value">The string to enumerate over.</param>
513
        public Enumerator(U8String value)
514
        {
515
            _value = value._value;
×
516
            _remaining = value._inner;
×
517
        }
×
518

519
        /// <summary>
520
        /// Returns the current line.
521
        /// </summary>
522
        public readonly U8String Current => new(_value, _current.Offset, _current.Length);
×
523

524
        /// <summary>
525
        /// Advances the enumerator to the next line.
526
        /// </summary>
527
        [MethodImpl(MethodImplOptions.AggressiveInlining)] // Surprisingly smaller codegen than when not inlined
528
        public bool MoveNext()
529
        {
530
            var remaining = _remaining;
×
531
            if (remaining.Length > 0)
×
532
            {
533
                var span = _value!.SliceUnsafe(remaining.Offset, remaining.Length);
×
534
                var idx = span.IndexOf((byte)'\n');
×
535

536
                if ((uint)idx < (uint)span.Length)
×
537
                {
538
                    var cutoff = idx;
×
539
                    if (idx > 0 && span.AsRef(idx - 1) is (byte)'\r')
×
540
                    {
541
                        cutoff--;
×
542
                    }
543

544
                    _current = new(remaining.Offset, cutoff);
×
545
                    _remaining = new(remaining.Offset + idx + 1, remaining.Length - idx - 1);
×
546
                }
547
                else
548
                {
549
                    // We've reached EOF, but we still need to return 'true' for this final
550
                    // iteration so that the caller can query the Current property once more.
551
                    _current = new(remaining.Offset, remaining.Length);
×
552
                    _remaining = default;
×
553
                }
554

555
                return true;
×
556
            }
557

558
            return false;
×
559
        }
560

561
        readonly object IEnumerator.Current => Current;
×
562
        readonly void IEnumerator.Reset() => throw new NotSupportedException();
×
563
        readonly void IDisposable.Dispose() { }
×
564
    }
565
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc