• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5909906392

19 Aug 2023 06:18AM UTC coverage: 22.422% (+0.7%) from 21.682%
5909906392

push

github

neon-sunset
fix+docs: fix tests build and update TODO

120 of 772 branches covered (15.54%)

Branch coverage included in aggregate %.

50 of 50 new or added lines in 1 file covered. (100.0%)

428 of 1672 relevant lines covered (25.6%)

30558.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

61.38
/src/U8String.Enumeration.cs
1
using System.Buffers;
2
using System.Collections;
3
using System.Diagnostics;
4
using System.Runtime.InteropServices;
5
using System.Text;
6
using U8Primitives.Abstractions;
7

8
using Rune = System.Text.Rune;
9

10
namespace U8Primitives;
11

12
#pragma warning disable IDE0032, IDE0057 // Use auto property and index operator. Why: Perf, struct layout, accuracy and codegen.
13
public readonly partial struct U8String
14
{
15
    /// <summary>
16
    /// Returns a collection of <see cref="char"/>s over the provided string.
17
    /// </summary>
18
    /// <remarks>
19
    /// This is a lazily-evaluated allocation-free collection.
20
    /// </remarks>
21
    public U8Chars Chars
22
    {
23
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
24
        get => new(this);
24✔
25
    }
26

27
    /// <summary>
28
    /// Returns a collection of <see cref="Rune"/>s over the provided string.
29
    /// </summary>
30
    /// <remarks>
31
    /// This is a lazily-evaluated allocation-free collection.
32
    /// </remarks>
33
    public U8Runes Runes
34
    {
35
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
36
        get => new(this);
30✔
37
    }
38

39
    /// <summary>
40
    /// Returns a collection of lines over the provided string.
41
    /// </summary>
42
    /// <remarks>
43
    /// This is a lazily-evaluated allocation-free collection.
44
    /// </remarks>
45
    public U8Lines Lines
46
    {
47
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
48
        get => new(this);
×
49
    }
50

51
    // Bad codegen still :(
52
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
53
    public Enumerator GetEnumerator() => new(this);
12✔
54

55
    IEnumerator<byte> IEnumerable<byte>.GetEnumerator() => GetEnumerator();
×
56
    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
57

58
    public struct Enumerator : IEnumerator<byte>
59
    {
60
        readonly byte[]? _value;
61
        readonly int _offset;
62
        readonly int _length;
63
        int _index;
64

65
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
66
        public Enumerator(U8String value)
67
        {
68
            _value = value._value;
12✔
69
            _offset = value.Offset;
12✔
70
            _length = value.Length;
12✔
71
            _index = -1;
12✔
72
        }
12✔
73

74
        // Still cheaper than MemoryMarshal clever variants
75
        public readonly byte Current => _value![_offset + _index];
3,138✔
76

77
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
78
        public bool MoveNext() => (uint)(++_index) < (uint)_length;
3,156✔
79
        // {
80
        //     var index = _index;
81
        //     if (++index < _length)
82
        //     {
83
        //         // Current = Unsafe.Add(
84
        //         //     ref MemoryMarshal.GetArrayDataReference(_value!),
85
        //         //     (nint)(uint)(_offset + index));
86
        //         Current = _value![_offset + index];
87
        //         _index = index;
88
        //         return true;
89
        //     }
90

91
        //     return false;
92
        // }
93

94
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
95
        public void Reset() => _index = -1;
×
96

97
        readonly object IEnumerator.Current => Current;
1,569✔
98
        readonly void IDisposable.Dispose() { }
6✔
99
    }
100
}
101

102
/// <summary>
103
/// A collection of chars in a provided <see cref="U8String"/>.
104
/// </summary>
105
public struct U8Chars : ICollection<char>
106
{
107
    readonly U8String _value;
108

109
    int _count;
110

111
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
112
    public U8Chars(U8String value)
113
    {
114
        _value = value;
24✔
115
        _count = value.IsEmpty ? 0 : -1;
24✔
116
    }
24✔
117

118
    /// <summary>
119
    /// The number of chars in the current <see cref="U8String"/>.
120
    /// </summary>
121
    public int Count
122
    {
123
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
124
        get
125
        {
126
            // Somehow the codegen here is underwhelming
127
            var count = _count;
24✔
128
            if (count >= 0)
24✔
129
            {
130
                return count;
9✔
131
            }
132
            return _count = Count(_value.UnsafeSpan);
15✔
133

134
            static int Count(ReadOnlySpan<byte> value)
135
            {
136
                Debug.Assert(!value.IsEmpty);
137

138
                // TODO: Is this enough?
139
                return Encoding.UTF8.GetCharCount(value);
15✔
140
            }
141
        }
142
    }
143

144
    // TODO: Wow, this seems to be terribly broken on surrogate chars and 
145
    // there is no easy way to fix it without sacrificing performance.
146
    // Perhaps it is worth just do the transcoding iteration here and warn the users
147
    // instead of straight up producing UB or throwing exceptions???
148
    public readonly bool Contains(char item) => _value.Contains(item);
×
149

150
    public readonly void CopyTo(char[] destination, int index)
151
    {
152
        var value = _value;
5✔
153
        if (!value.IsEmpty)
5✔
154
        {
155
            Encoding.UTF8.GetChars(value.UnsafeSpan, destination.AsSpan(index));
5✔
156
        }
157
    }
5✔
158

159
    public readonly Enumerator GetEnumerator() => new(_value);
6✔
160

161
    readonly IEnumerator<char> IEnumerable<char>.GetEnumerator() => new Enumerator(_value);
×
162
    readonly IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_value);
6✔
163

164
    public struct Enumerator : IEnumerator<char>
165
    {
166
        // TODO: refactor layout
167
        readonly byte[]? _value;
168
        readonly int _offset;
169
        readonly int _length;
170
        int _nextByteIdx;
171
        uint _currentCharPair;
172

173
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
174
        public Enumerator(U8String value)
175
        {
176
            _value = value._value;
12✔
177
            _offset = value.Offset;
12✔
178
            _length = value.Length;
12✔
179
            _nextByteIdx = 0;
12✔
180
        }
12✔
181

182
        // TODO
183
        public readonly char Current => (char)_currentCharPair;
1,402✔
184

185
        // TODO: This looks terrible, there must be a better way
186
        // to convert UTF-8 to UTF-16 with an enumerator.
187
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
188
        public bool MoveNext()
189
        {
190
            var (offset, length, nextByteIdx, currentCharPair) =
1,420✔
191
                (_offset, _length, _nextByteIdx, _currentCharPair);
1,420✔
192

193
            if (currentCharPair < char.MaxValue)
1,420✔
194
            {
195
                if ((uint)nextByteIdx < (uint)length)
1,302✔
196
                {
197
                    var span = _value!.SliceUnsafe(offset + nextByteIdx, length - nextByteIdx);
1,284✔
198
                    var firstByte = MemoryMarshal.GetReference(span);
1,284✔
199
                    if (U8Info.IsAsciiByte(firstByte))
1,284✔
200
                    {
201
                        // Fast path because Rune.DecodeFromUtf8 won't inline
202
                        // making UTF-8 push us more and more towards anglocentrism.
203
                        _nextByteIdx = nextByteIdx + 1;
314✔
204
                        _currentCharPair = firstByte;
314✔
205
                        return true;
314✔
206
                    }
207

208
                    var status = Rune.DecodeFromUtf8(span, out var rune, out var bytesConsumed);
970✔
209
                    Debug.Assert(status is OperationStatus.Done);
210

211
                    _nextByteIdx = nextByteIdx + bytesConsumed;
970✔
212

213
                    if (rune.IsBmp)
970✔
214
                    {
215
                        _currentCharPair = (uint)rune.Value;
852✔
216
                        return true;
852✔
217
                    }
218

219
                    // I wonder if this just explodes on BigEndian
220
                    var runeValue = (uint)rune.Value;
118✔
221
                    var highSurrogate = (char)((runeValue + ((0xD800u - 0x40u) << 10)) >> 10);
118✔
222
                    var lowSurrogate = (char)((runeValue & 0x3FFu) + 0xDC00u);
118✔
223
                    _currentCharPair = highSurrogate + ((uint)lowSurrogate << 16);
118✔
224
                    return true;
118✔
225
                }
226

227
                return false;
18✔
228
            }
229

230
            _currentCharPair = currentCharPair >> 16;
118✔
231
            return true;
118✔
232
        }
233

234
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
235
        public void Reset() => _nextByteIdx = 0;
×
236

237
        readonly object IEnumerator.Current => Current;
701✔
238
        readonly void IDisposable.Dispose() { }
6✔
239
    }
240

241
    readonly bool ICollection<char>.IsReadOnly => true;
×
242
    readonly void ICollection<char>.Add(char item) => throw new NotSupportedException();
×
243
    readonly void ICollection<char>.Clear() => throw new NotSupportedException();
×
244
    readonly bool ICollection<char>.Remove(char item) => throw new NotSupportedException();
×
245
}
246

247
/// <summary>
248
/// A collection of Runes (unicode scalar values) in a provided <see cref="U8String"/>.
249
/// </summary>
250
public struct U8Runes : ICollection<Rune>
251
{
252
    readonly U8String _value;
253

254
    // If we bring up non-ascii counting to ascii level, we might not need this
255
    // similar to LineCollection.
256
    int _count;
257

258
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
259
    public U8Runes(U8String value)
260
    {
261
        _value = value;
30✔
262
        _count = value.IsEmpty ? 0 : -1;
30✔
263
    }
30✔
264

265
    /// <summary>
266
    /// The number of Runes (unicode scalar values) in the current <see cref="U8String"/>.
267
    /// </summary>
268
    public int Count
269
    {
270
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
271
        get
272
        {
273
            // Somehow the codegen here is underwhelming
274
            var count = _count;
24✔
275
            if (count >= 0)
24✔
276
            {
277
                return count;
9✔
278
            }
279

280
            return _count = Count(_value.UnsafeSpan);
15✔
281

282
            static int Count(ReadOnlySpan<byte> value)
283
            {
284
                Debug.Assert(!value.IsEmpty);
285

286
                // TODO: SIMD non-continuation byte counting
287
                var runeCount = (int)(nint)Polyfills.Text.Ascii.GetIndexOfFirstNonAsciiByte(value);
15✔
288
                value = value.SliceUnsafe(runeCount);
15✔
289

290
                for (var i = 0; (uint)i < (uint)value.Length; i += U8Info.RuneLength(value.AsRef(i)))
2,940✔
291
                {
292
                    runeCount++;
1,455✔
293
                }
294

295
                return runeCount;
15✔
296
            }
297
        }
298
    }
299

300
    public readonly bool Contains(Rune item) => _value.Contains(item);
642✔
301

302
    public readonly void CopyTo(Rune[] destination, int index)
303
    {
304
        // TODO: Simple SIMD widen ASCII to UTF-32 (ideally widen+validate in place instead of double traversal)
305
        // TODO: Consistency and correctness? Implement single-pass vectorized conversion?
306
        foreach (var rune in this)
1,294✔
307
        {
308
            destination[index++] = rune;
642✔
309
        }
310
    }
5✔
311

312
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
313
    public readonly Enumerator GetEnumerator() => new(_value);
659✔
314

315
    readonly IEnumerator<Rune> IEnumerable<Rune>.GetEnumerator() => GetEnumerator();
642✔
316
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
317

318
    public struct Enumerator : IEnumerator<Rune>
319
    {
320
        readonly byte[]? _value;
321
        readonly int _offset;
322
        readonly int _length;
323
        int _index;
324

325
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
326
        public Enumerator(U8String value)
327
        {
328
            _value = value._value;
659✔
329
            _offset = value.Offset;
659✔
330
            _length = value.Length;
659✔
331
        }
659✔
332

333
        public Rune Current { get; private set; }
334

335
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
336
        public bool MoveNext()
337
        {
338
            var index = _index;
55,597✔
339
            if (index < _length)
55,597✔
340
            {
341
                ref var ptr = ref MemoryMarshal
55,574✔
342
                    .GetArrayDataReference(_value!)
55,574✔
343
                    .Add(_offset + index);
55,574✔
344

345
                Current = U8Conversions.CodepointToRune(ref ptr, out var size);
55,574✔
346
                _index = index + size;
55,574✔
347
                return true;
55,574✔
348
            }
349

350
            return false;
23✔
351
        }
352

353
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
354
        public void Reset() => _index = -1;
×
355

356
        readonly object IEnumerator.Current => Current;
642✔
357
        readonly void IDisposable.Dispose() { }
653✔
358
    }
359

360
    readonly bool ICollection<Rune>.IsReadOnly => true;
×
361
    readonly void ICollection<Rune>.Add(Rune item) => throw new NotImplementedException();
×
362
    readonly void ICollection<Rune>.Clear() => throw new NotImplementedException();
×
363
    readonly bool ICollection<Rune>.Remove(Rune item) => throw new NotImplementedException();
×
364
}
365

366
/// <summary>
367
/// A collection of lines in a provided <see cref="U8String"/>.
368
/// </summary>
369
public struct U8Lines : ICollection<U8String>, IU8Enumerable<U8Lines.Enumerator>
370
{
371
    readonly U8String _value;
372

373
    // We might not need this. Although counting is O(n), the absolute performance
374
    // is very good, and on AVX2/512 - it's basically instantenous.
375
    int _count;
376

377
    /// <summary>
378
    /// Creates a new line enumeration over the provided string.
379
    /// </summary>
380
    /// <param name="value">The string to enumerate over.</param>
381
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
382
    public U8Lines(U8String value)
383
    {
384
        _value = value;
×
385
        _count = value.IsEmpty ? 0 : -1;
×
386
    }
×
387

388
    /// <summary>
389
    /// The number of lines in the current <see cref="U8String"/>.
390
    /// </summary>
391
    public int Count
392
    {
393
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
394
        get
395
        {
396
            var count = _count;
×
397
            if (count >= 0)
×
398
            {
399
                return count;
×
400
            }
401

402
            // Matches the behavior of string.Split('\n').Length for "hello\n"
403
            // TODO: Should we break consistency and not count the very last segment if it is empty?
404
            // (likely no - an empty line is still a line)
405
            return _count = _value.UnsafeSpan.Count((byte)'\n') + 1;
×
406
        }
407
    }
408

409
    public readonly bool Contains(U8String item)
410
    {
411
        return !item.Contains((byte)'\n') && _value.Contains(item);
×
412
    }
413

414
    public void CopyTo(U8String[] destination, int index)
415
    {
416
        this.CopyTo<U8Lines, Enumerator>(destination.AsSpan()[index..]);
×
417
    }
×
418

419
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
420
    public readonly void Deconstruct(out U8String first, out U8String second)
421
    {
422
        this.Deconstruct<U8Lines, Enumerator>(out first, out second);
×
423
    }
×
424

425
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
426
    public readonly void Deconstruct(out U8String first, out U8String second, out U8String third)
427
    {
428
        this.Deconstruct<U8Lines, Enumerator>(out first, out second, out third);
×
429
    }
×
430

431
    public U8String[] ToArray() => this.ToArray<U8Lines, Enumerator>();
×
432
    public List<U8String> ToList() => this.ToList<U8Lines, Enumerator>();
×
433

434
    /// <summary>
435
    /// Returns a <see cref="Enumerator"/> over the provided string.
436
    /// </summary>
437
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
438
    public readonly Enumerator GetEnumerator() => new(_value);
×
439

440
    readonly IEnumerator<U8String> IEnumerable<U8String>.GetEnumerator() => GetEnumerator();
×
441
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
×
442

443
    readonly bool ICollection<U8String>.IsReadOnly => true;
×
444
    readonly void ICollection<U8String>.Add(U8String item) => throw new NotSupportedException();
×
445
    readonly void ICollection<U8String>.Clear() => throw new NotSupportedException();
×
446
    readonly bool ICollection<U8String>.Remove(U8String item) => throw new NotSupportedException();
×
447

448
    /// <summary>
449
    /// A struct that enumerates lines over a string.
450
    /// </summary>
451
    public struct Enumerator : IU8Enumerator
452
    {
453
        // TODO 1: Ensure this is aligned with Rust's .lines() implementation, or not?
454
        // private static readonly SearchValues<byte> NewLine = SearchValues.Create("\r\n"u8);
455
        // TODO 2: Consider using 'InnerOffsets'
456
        private readonly byte[]? _value;
457
        private U8Range _remaining;
458
        private U8Range _current;
459

460
        /// <summary>
461
        /// Creates a new line enumerator over the provided string.
462
        /// </summary>
463
        /// <param name="value">The string to enumerate over.</param>
464
        public Enumerator(U8String value)
465
        {
466
            _value = value._value;
×
467
            _remaining = value._inner;
×
468
        }
×
469

470
        /// <summary>
471
        /// Returns the current line.
472
        /// </summary>
473
        public readonly U8String Current => new(_value, _current.Offset, _current.Length);
×
474

475
        /// <summary>
476
        /// Advances the enumerator to the next line.
477
        /// </summary>
478
        [MethodImpl(MethodImplOptions.AggressiveInlining)] // Surprisingly smaller codegen than when not inlined
479
        public bool MoveNext()
480
        {
481
            var remaining = _remaining;
×
482
            if (remaining.Length > 0)
×
483
            {
484
                var span = _value!.SliceUnsafe(remaining.Offset, remaining.Length);
×
485
                var idx = span.IndexOf((byte)'\n');
×
486

487
                if ((uint)idx < (uint)span.Length)
×
488
                {
489
                    var cutoff = idx;
×
490
                    if (idx > 0 && span.AsRef().Add(idx - 1) is (byte)'\r')
×
491
                    {
492
                        cutoff--;
×
493
                    }
494

495
                    _current = new(remaining.Offset, cutoff);
×
496
                    _remaining = new(remaining.Offset + idx + 1, remaining.Length - idx - 1);
×
497
                }
498
                else
499
                {
500
                    // We've reached EOF, but we still need to return 'true' for this final
501
                    // iteration so that the caller can query the Current property once more.
502
                    _current = new(remaining.Offset, remaining.Length);
×
503
                    _remaining = default;
×
504
                }
505

506
                return true;
×
507
            }
508

509
            return false;
×
510
        }
511

512
        readonly object IEnumerator.Current => Current;
×
513
        readonly void IEnumerator.Reset() => throw new NotSupportedException();
×
514
        readonly void IDisposable.Dispose() { }
×
515
    }
516
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc