• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5993769794

27 Aug 2023 11:22PM UTC coverage: 18.282% (-0.08%) from 18.359%
5993769794

push

github

neon-sunset
perf: interleave CountMatches for V128, change runes enumerator to use common CopyTo impl.

134 of 1038 branches covered (0.0%)

Branch coverage included in aggregate %.

17 of 17 new or added lines in 2 files covered. (100.0%)

479 of 2315 relevant lines covered (20.69%)

35564.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

47.45
/src/U8Enumerators.cs
1
using System.Collections;
2
using System.Diagnostics;
3
using System.Runtime.InteropServices;
4
using System.Text;
5
using U8Primitives.Abstractions;
6

7
namespace U8Primitives;
8

9
/// <summary>
10
/// A collection of chars in a provided <see cref="U8String"/>.
11
/// </summary>
12
public struct U8Chars : ICollection<char>, IEnumerable<char, U8Chars.Enumerator>
13
{
14
    readonly U8String _value;
15

16
    int _count;
17

18
    public U8Chars(U8String value)
19
    {
20
        _value = value;
30✔
21
        _count = value.IsEmpty ? 0 : -1;
30✔
22
    }
30✔
23

24
    /// <summary>
25
    /// The number of chars in the current <see cref="U8String"/>.
26
    /// </summary>
27
    public int Count
28
    {
29
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
30
        get
31
        {
32
            // Somehow the codegen here is underwhelming
33
            var count = _count;
23✔
34
            if (count >= 0)
23✔
35
            {
36
                return count;
8✔
37
            }
38
            return _count = Count(_value.UnsafeSpan);
15✔
39

40
            static int Count(ReadOnlySpan<byte> value)
41
            {
42
                Debug.Assert(!value.IsEmpty);
43

44
                // TODO: Is this enough?
45
                return Encoding.UTF8.GetCharCount(value);
15✔
46
            }
47
        }
48
    }
49

50
    // TODO: Wow, this seems to be terribly broken on surrogate chars and 
51
    // there is no easy way to fix it without sacrificing performance.
52
    // Perhaps it is worth just do the transcoding iteration here and warn the users
53
    // instead of straight up producing UB or throwing exceptions???
54
    public readonly bool Contains(char item) => _value.Contains(item);
1,402✔
55

56
    public readonly void CopyTo(char[] destination, int index)
57
    {
58
        var value = _value;
×
59
        if (!value.IsEmpty)
×
60
        {
61
            Encoding.UTF8.GetChars(value.UnsafeSpan, destination.AsSpan()[index..]);
×
62
        }
63
    }
×
64

65
    public readonly void Deconstruct(out char first, out char second)
66
    {
67
        this.Deconstruct<U8Chars, Enumerator, char>(out first, out second);
×
68
    }
×
69

70
    public readonly void Deconstruct(out char first, out char second, out char third)
71
    {
72
        this.Deconstruct<U8Chars, Enumerator, char>(out first, out second, out third);
×
73
    }
×
74

75
    public readonly char ElementAt(int index)
76
    {
77
        return this.ElementAt<U8Chars, Enumerator, char>(index);
×
78
    }
79

80
    public readonly char ElementAtOrDefault(int index)
81
    {
82
        return this.ElementAtOrDefault<U8Chars, Enumerator, char>(index);
×
83
    }
84

85
    public char[] ToArray()
86
    {
87
        var value = _value;
6✔
88
        if (!value.IsEmpty)
6✔
89
        {
90
            var chars = new char[Count];
5✔
91
            Encoding.UTF8.GetChars(value.UnsafeSpan, chars);
5✔
92
            return chars;
5✔
93
        }
94

95
        return Array.Empty<char>();
1✔
96
    }
97

98
    public List<char> ToList()
99
    {
100
        var value = _value;
×
101
        if (!value.IsEmpty)
×
102
        {
103
            var count = Count;
×
104
            var chars = new List<char>(count);
×
105
            CollectionsMarshal.SetCount(chars, count);
×
106
            var span = CollectionsMarshal.AsSpan(chars);
×
107

108
            Encoding.UTF8.GetChars(value.UnsafeSpan, span);
×
109
            return chars;
×
110
        }
111

112
        return new List<char>();
×
113
    }
114

115
    public readonly Enumerator GetEnumerator() => new(_value);
6✔
116

117
    readonly IEnumerator<char> IEnumerable<char>.GetEnumerator() => new Enumerator(_value);
×
118
    readonly IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_value);
6✔
119

120
    public struct Enumerator : IEnumerator<char>
121
    {
122
        // TODO: refactor layout
123
        readonly byte[]? _value;
124
        readonly int _offset;
125
        readonly int _length;
126
        int _nextByteIdx;
127
        uint _currentCharPair;
128

129
        public Enumerator(U8String value)
130
        {
131
            _value = value._value;
12✔
132
            _offset = value.Offset;
12✔
133
            _length = value.Length;
12✔
134
            _nextByteIdx = 0;
12✔
135
        }
12✔
136

137
        // TODO
138
        public readonly char Current => (char)_currentCharPair;
1,402✔
139

140
        // TODO: This is still terrible,
141
        // refactor to avoid UTF8->Rune->char conversion
142
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
143
        public bool MoveNext()
144
        {
145
            var (offset, length, nextByteIdx, currentCharPair) =
1,420✔
146
                (_offset, _length, _nextByteIdx, _currentCharPair);
1,420✔
147

148
            if (currentCharPair < char.MaxValue)
1,420✔
149
            {
150
                if ((uint)nextByteIdx < (uint)length)
1,302✔
151
                {
152
                    ref var ptr = ref _value!.AsRef(offset + nextByteIdx);
1,284✔
153

154
                    if (U8Info.IsAsciiByte(in ptr))
1,284✔
155
                    {
156
                        _nextByteIdx = nextByteIdx + 1;
314✔
157
                        _currentCharPair = ptr;
314✔
158
                        return true;
314✔
159
                    }
160

161
                    var rune = U8Conversions.CodepointToRune(
970✔
162
                        ref ptr, out var size, checkAscii: false);
970✔
163
                    _nextByteIdx = nextByteIdx + size;
970✔
164

165
                    if (rune.IsBmp)
970✔
166
                    {
167
                        _currentCharPair = (uint)rune.Value;
852✔
168
                        return true;
852✔
169
                    }
170

171
                    // I wonder if this just explodes on BigEndian
172
                    var runeValue = (uint)rune.Value;
118✔
173
                    var highSurrogate = (char)((runeValue + ((0xD800u - 0x40u) << 10)) >> 10);
118✔
174
                    var lowSurrogate = (char)((runeValue & 0x3FFu) + 0xDC00u);
118✔
175
                    _currentCharPair = highSurrogate + ((uint)lowSurrogate << 16);
118✔
176
                    return true;
118✔
177
                }
178

179
                return false;
18✔
180
            }
181

182
            _currentCharPair = currentCharPair >> 16;
118✔
183
            return true;
118✔
184
        }
185

186
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
187
        public void Reset() => _nextByteIdx = 0;
×
188

189
        readonly object IEnumerator.Current => Current;
701✔
190
        readonly void IDisposable.Dispose() { }
6✔
191
    }
192

193
    readonly bool ICollection<char>.IsReadOnly => true;
×
194
    readonly void ICollection<char>.Add(char item) => throw new NotSupportedException();
×
195
    readonly void ICollection<char>.Clear() => throw new NotSupportedException();
×
196
    readonly bool ICollection<char>.Remove(char item) => throw new NotSupportedException();
×
197
}
198

199
// TODO: Implement span-taking alternatives to work with ROS<T> where T in byte, char, Rune
200
/// <summary>
201
/// A collection of Runes (unicode scalar values) in a provided <see cref="U8String"/>.
202
/// </summary>
203
public struct U8Runes : ICollection<Rune>, IEnumerable<Rune, U8Runes.Enumerator>
204
{
205
    readonly U8String _value;
206

207
    // If we bring up non-ascii counting to ascii level, we might not need this
208
    // similar to LineCollection.
209
    int _count;
210

211
    public U8Runes(U8String value)
212
    {
213
        _value = value;
30✔
214
        _count = value.IsEmpty ? 0 : -1;
30✔
215
    }
30✔
216

217
    /// <summary>
218
    /// The number of Runes (unicode scalar values) in the current <see cref="U8String"/>.
219
    /// </summary>
220
    public int Count
221
    {
222
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
223
        get
224
        {
225
            // Somehow the codegen here is underwhelming
226
            var count = _count;
24✔
227
            if (count >= 0)
24✔
228
            {
229
                return count;
9✔
230
            }
231

232
            return _count = _value.RuneCount;
15✔
233
        }
234
    }
235

236
    public readonly bool Contains(Rune item) => _value.Contains(item);
642✔
237

238
    public void CopyTo(Rune[] destination, int index)
239
    {
240
        // TODO: Simple SIMD widen ASCII to UTF-32 (ideally widen+validate in place instead of double traversal)
241
        // TODO: Consistency and correctness? Implement single-pass vectorized conversion?
242
        this.CopyTo<U8Runes, Enumerator, Rune>(destination.AsSpan()[index..]);
×
243
    }
×
244

245
    public readonly void Deconstruct(out Rune first, out Rune second)
246
    {
247
        this.Deconstruct<U8Runes, Enumerator, Rune>(out first, out second);
×
248
    }
×
249

250
    public readonly void Deconstruct(out Rune first, out Rune second, out Rune third)
251
    {
252
        this.Deconstruct<U8Runes, Enumerator, Rune>(out first, out second, out third);
×
253
    }
×
254

255
    public readonly Rune ElementAt(int index)
256
    {
257
        return this.ElementAt<U8Runes, Enumerator, Rune>(index);
×
258
    }
259

260
    public readonly Rune ElementAtOrDefault(int index)
261
    {
262
        return this.ElementAtOrDefault<U8Runes, Enumerator, Rune>(index);
×
263
    }
264

265
    public Rune[] ToArray() => this.ToArray<U8Runes, Enumerator, Rune>();
6✔
266

267
    public List<Rune> ToList() => this.ToList<U8Runes, Enumerator, Rune>();
×
268

269
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
270
    public readonly Enumerator GetEnumerator() => new(_value);
659✔
271

272
    readonly IEnumerator<Rune> IEnumerable<Rune>.GetEnumerator() => GetEnumerator();
642✔
273
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
6✔
274

275
    public struct Enumerator : IEnumerator<Rune>
276
    {
277
        readonly byte[]? _value;
278
        readonly int _offset;
279
        readonly int _length;
280
        int _index;
281

282
        public Enumerator(U8String value)
283
        {
284
            _value = value._value;
659✔
285
            _offset = value.Offset;
659✔
286
            _length = value.Length;
659✔
287
        }
659✔
288

289
        public Rune Current { get; private set; }
290

291
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
292
        public bool MoveNext()
293
        {
294
            var index = _index;
55,597✔
295
            if (index < _length)
55,597✔
296
            {
297
                ref var ptr = ref _value!.AsRef(_offset + index);
55,574✔
298

299
                Current = U8Conversions.CodepointToRune(ref ptr, out var size);
55,574✔
300
                _index = index + size;
55,574✔
301
                return true;
55,574✔
302
            }
303

304
            return false;
23✔
305
        }
306

307
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
308
        public void Reset() => _index = -1;
×
309

310
        readonly object IEnumerator.Current => Current;
642✔
311
        readonly void IDisposable.Dispose() { }
653✔
312
    }
313

314
    readonly bool ICollection<Rune>.IsReadOnly => true;
×
315
    readonly void ICollection<Rune>.Add(Rune item) => throw new NotImplementedException();
×
316
    readonly void ICollection<Rune>.Clear() => throw new NotImplementedException();
×
317
    readonly bool ICollection<Rune>.Remove(Rune item) => throw new NotImplementedException();
×
318
}
319

320
/// <summary>
321
/// A collection of lines in a provided <see cref="U8String"/>.
322
/// </summary>
323
public struct U8Lines : ICollection<U8String>, IU8Enumerable<U8Lines.Enumerator>
324
{
325
    readonly U8String _value;
326

327
    // We might not need this. Although counting is O(n), the absolute performance
328
    // is very good, and on AVX2/512 - it's basically instantenous.
329
    int _count;
330

331
    /// <summary>
332
    /// Creates a new line enumeration over the provided string.
333
    /// </summary>
334
    /// <param name="value">The string to enumerate over.</param>
335
    public U8Lines(U8String value)
336
    {
337
        _value = value;
×
338
        _count = value.IsEmpty ? 0 : -1;
×
339
    }
×
340

341
    /// <summary>
342
    /// The number of lines in the current <see cref="U8String"/>.
343
    /// </summary>
344
    public int Count
345
    {
346
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
347
        get
348
        {
349
            var count = _count;
×
350
            if (count >= 0)
×
351
            {
352
                return count;
×
353
            }
354

355
            // Matches the behavior of string.Split('\n').Length for "hello\n"
356
            // TODO: Should we break consistency and not count the very last segment if it is empty?
357
            // (likely no - an empty line is still a line)
358
            return _count = _value.UnsafeSpan.Count((byte)'\n') + 1;
×
359
        }
360
    }
361

362
    public readonly bool Contains(U8String item)
363
    {
364
        return !item.Contains((byte)'\n') && _value.Contains(item);
×
365
    }
366

367
    public void CopyTo(U8String[] destination, int index)
368
    {
369
        this.CopyTo<U8Lines, Enumerator, U8String>(destination.AsSpan()[index..]);
×
370
    }
×
371

372
    public readonly void Deconstruct(out U8String first, out U8String second)
373
    {
374
        this.Deconstruct<U8Lines, Enumerator, U8String>(out first, out second);
×
375
    }
×
376

377
    public readonly void Deconstruct(out U8String first, out U8String second, out U8String third)
378
    {
379
        this.Deconstruct<U8Lines, Enumerator, U8String>(out first, out second, out third);
×
380
    }
×
381

382
    public readonly U8String ElementAt(int index)
383
    {
384
        return this.ElementAt<U8Lines, Enumerator, U8String>(index);
×
385
    }
386

387
    public readonly U8String ElementAtOrDefault(int index)
388
    {
389
        return this.ElementAtOrDefault<U8Lines, Enumerator, U8String>(index);
×
390
    }
391

392
    public U8String[] ToArray() => this.ToArray<U8Lines, Enumerator, U8String>();
×
393
    public List<U8String> ToList() => this.ToList<U8Lines, Enumerator, U8String>();
×
394

395
    /// <summary>
396
    /// Returns a <see cref="Enumerator"/> over the provided string.
397
    /// </summary>
398
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
399
    public readonly Enumerator GetEnumerator() => new(_value);
×
400

401
    readonly IEnumerator<U8String> IEnumerable<U8String>.GetEnumerator() => GetEnumerator();
×
402
    readonly IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
×
403

404
    readonly bool ICollection<U8String>.IsReadOnly => true;
×
405
    readonly void ICollection<U8String>.Add(U8String item) => throw new NotSupportedException();
×
406
    readonly void ICollection<U8String>.Clear() => throw new NotSupportedException();
×
407
    readonly bool ICollection<U8String>.Remove(U8String item) => throw new NotSupportedException();
×
408

409
    /// <summary>
410
    /// A struct that enumerates lines over a string.
411
    /// </summary>
412
    public struct Enumerator : IU8Enumerator
413
    {
414
        // TODO 1: Ensure this is aligned with Rust's .lines() implementation, or not?
415
        // private static readonly SearchValues<byte> NewLine = SearchValues.Create("\r\n"u8);
416
        // TODO 2: Consider using 'InnerOffsets'
417
        private readonly byte[]? _value;
418
        private U8Range _remaining;
419
        private U8Range _current;
420

421
        /// <summary>
422
        /// Creates a new line enumerator over the provided string.
423
        /// </summary>
424
        /// <param name="value">The string to enumerate over.</param>
425
        public Enumerator(U8String value)
426
        {
427
            _value = value._value;
×
428
            _remaining = value._inner;
×
429
        }
×
430

431
        /// <summary>
432
        /// Returns the current line.
433
        /// </summary>
434
        public readonly U8String Current => new(_value, _current.Offset, _current.Length);
×
435

436
        /// <summary>
437
        /// Advances the enumerator to the next line.
438
        /// </summary>
439
        [MethodImpl(MethodImplOptions.AggressiveInlining)] // Surprisingly smaller codegen than when not inlined
440
        public bool MoveNext()
441
        {
442
            var remaining = _remaining;
×
443
            if (remaining.Length > 0)
×
444
            {
445
                var span = _value!.SliceUnsafe(remaining.Offset, remaining.Length);
×
446
                var idx = span.IndexOf((byte)'\n');
×
447

448
                if ((uint)idx < (uint)span.Length)
×
449
                {
450
                    var cutoff = idx;
×
451
                    if (idx > 0 && span.AsRef(idx - 1) is (byte)'\r')
×
452
                    {
453
                        cutoff--;
×
454
                    }
455

456
                    _current = new(remaining.Offset, cutoff);
×
457
                    _remaining = new(remaining.Offset + idx + 1, remaining.Length - idx - 1);
×
458
                }
459
                else
460
                {
461
                    // We've reached EOF, but we still need to return 'true' for this final
462
                    // iteration so that the caller can query the Current property once more.
463
                    _current = new(remaining.Offset, remaining.Length);
×
464
                    _remaining = default;
×
465
                }
466

467
                return true;
×
468
            }
469

470
            return false;
×
471
        }
472

473
        readonly object IEnumerator.Current => Current;
×
474
        readonly void IEnumerator.Reset() => throw new NotSupportedException();
×
475
        readonly void IDisposable.Dispose() { }
×
476
    }
477
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc