• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 6005208900

28 Aug 2023 09:36PM UTC coverage: 18.096% (-0.2%) from 18.326%
6005208900

push

github

neon-sunset
feat: Extend NativeU8String and restructure solution to account for increased line count, add roadmap draft

134 of 1050 branches covered (0.0%)

Branch coverage included in aggregate %.

1058 of 1058 new or added lines in 25 files covered. (100.0%)

478 of 2332 relevant lines covered (20.5%)

35305.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

21.11
/src/Implementations/Shared/U8Searching.cs
1
using System.Diagnostics;
2
using System.Numerics;
3
using System.Runtime.Intrinsics;
4
using System.Runtime.Intrinsics.Arm;
5
using System.Text;
6
using U8Primitives.Abstractions;
7

8
namespace U8Primitives;
9

10
// TODO: Better name?
11
internal static class U8Searching
12
{
13
    /// <summary>
14
    /// Returns the index of the first occurrence of a specified value in a span.
15
    /// </summary>
16
    /// <remarks>
17
    /// Designed to be inlined into the caller and optimized away on constants.
18
    /// <para>
19
    /// Contract: when T is char and a surrogate, the return value is false.
20
    /// </para>
21
    /// </remarks>
22
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
23
    internal static bool Contains<T>(ReadOnlySpan<byte> source, T value)
24
        where T : struct
25
    {
26
        Debug.Assert(value is byte or char or Rune or U8String);
27

28
        return value switch
2,044!
29
        {
2,044✔
30
            byte b => source.Contains(b),
×
31

2,044✔
32
            char c => char.IsAscii(c)
1,402✔
33
                ? source.Contains((byte)c)
1,402✔
34
                : !char.IsSurrogate(c) &&
1,402✔
35
                    source.IndexOf(U8Scalar.Create(c, checkAscii: false).AsSpan()) >= 0,
1,402✔
36

2,044✔
37
            Rune r => r.IsAscii
642✔
38
                ? source.Contains((byte)r.Value)
642✔
39
                : source.IndexOf(U8Scalar.Create(r, checkAscii: false).AsSpan()) >= 0,
642✔
40

2,044✔
41
            U8String str => Contains(source, str.AsSpan()),
×
42

2,044✔
43
            _ => ThrowHelpers.Unreachable<bool>()
×
44
        };
2,044✔
45
    }
46

47
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
48
    internal static bool Contains(ReadOnlySpan<byte> source, ReadOnlySpan<byte> value)
49
    {
50
        return value.Length is 1
×
51
            ? source.Contains(value[0])
×
52
            : source.IndexOf(value) >= 0;
×
53
    }
54

55
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
56
    internal static bool Contains<T, C>(ReadOnlySpan<byte> source, T value, C comparer)
57
        where T : struct
58
        where C : IU8ContainsOperator
59
    {
60
        // Debug.Assert(value is not char s || !char.IsSurrogate(s));
61
        Debug.Assert(value is byte or char or Rune or U8String);
62

63
        return value switch
×
64
        {
×
65
            byte b => comparer.Contains(source, b),
×
66

×
67
            char c => char.IsAscii(c)
×
68
                ? comparer.Contains(source, (byte)c)
×
69
                : !char.IsSurrogate(c) &&
×
70
                    comparer.Contains(source, U8Scalar.Create(c, checkAscii: false).AsSpan()),
×
71

×
72
            Rune r => r.IsAscii
×
73
                ? comparer.Contains(source, (byte)r.Value)
×
74
                : comparer.Contains(source, U8Scalar.Create(r, checkAscii: false).AsSpan()),
×
75

×
76
            U8String str => Contains(source, str.AsSpan(), comparer),
×
77

×
78
            _ => ThrowHelpers.Unreachable<bool>()
×
79
        };
×
80
    }
81

82
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
83
    internal static bool Contains<T>(ReadOnlySpan<byte> source, ReadOnlySpan<byte> value, T comparer)
84
        where T : IU8ContainsOperator
85
    {
86
        Debug.Assert(!source.IsEmpty);
87

88
        return value.Length is 1
×
89
            ? comparer.Contains(source, value[0]) // TODO: Verify if this bounds checks
×
90
            : comparer.Contains(source, value);
×
91
    }
92

93
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
94
    internal static bool SplitContains<T>(
95
        ReadOnlySpan<byte> value, T separator, ReadOnlySpan<byte> item)
96
            where T : struct
97
    {
98
        return !Contains(item, separator) && Contains(value, item);
×
99
    }
100

101
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
102
    internal static bool SplitContains(
103
        ReadOnlySpan<byte> value,
104
        ReadOnlySpan<byte> separator,
105
        ReadOnlySpan<byte> item)
106
    {
107
        // When the item we are looking for contains the separator, it means that it will
108
        // never be found in the split since it would be pointing to the split boundary.
109
        return !Contains(item, separator) && Contains(value, item);
×
110
    }
111

112
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
113
    internal static bool SplitContains<T, C>(
114
        ReadOnlySpan<byte> value, T separator, ReadOnlySpan<byte> item, C comparer)
115
            where T : struct
116
            where C : IU8ContainsOperator
117
    {
118
        return !Contains(item, separator, comparer) && Contains(value, item, comparer);
×
119
    }
120

121
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
122
    internal static bool SplitContains<T>(
123
        ReadOnlySpan<byte> value,
124
        ReadOnlySpan<byte> separator,
125
        ReadOnlySpan<byte> item,
126
        T comparer) where T : IU8ContainsOperator
127
    {
128
        // When the item we are looking for contains the separator, it means that it will
129
        // never be found in the split since it would be pointing to the split boundary.
130
        return !Contains(item, separator, comparer) && Contains(value, item, comparer);
×
131
    }
132

133
    /// <summary>
134
    /// Contract: when T is char, it must never be a surrogate.
135
    /// </summary>
136
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
137
    internal static int Count<T>(ReadOnlySpan<byte> source, T value)
138
        where T : struct
139
    {
140
        Debug.Assert(value is not char i || !char.IsSurrogate(i));
141
        Debug.Assert(value is byte or char or Rune or U8String);
142

143
        return value switch
×
144
        {
×
145
            byte b => source.Count(b),
×
146

×
147
            char c => char.IsAscii(c)
×
148
                ? source.Count((byte)c)
×
149
                : source.Count(U8Scalar.Create(c, checkAscii: false).AsSpan()),
×
150

×
151
            Rune r => r.IsAscii
×
152
                ? source.Count((byte)r.Value)
×
153
                : source.Count(U8Scalar.Create(r, checkAscii: false).AsSpan()),
×
154

×
155
            U8String str => Count(source, str.AsSpan()),
×
156

×
157
            _ => ThrowHelpers.Unreachable<int>()
×
158
        };
×
159
    }
160

161
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
162
    internal static int Count(ReadOnlySpan<byte> value, ReadOnlySpan<byte> item)
163
    {
164
        //return item.Length is 1 ? value.Count(item.AsRef()) : value.Count(item);
165
        return value.Count(item); // This already has internal check for Length is 1
×
166
    }
167

168
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
169
    internal static int Count<T, C>(ReadOnlySpan<byte> source, T value, C comparer)
170
        where T : struct
171
        where C : IU8CountOperator
172
    {
173
        Debug.Assert(value is not char i || !char.IsSurrogate(i));
174
        Debug.Assert(value is byte or char or Rune or U8String);
175

176
        return value switch
×
177
        {
×
178
            byte b => comparer.Count(source, b),
×
179

×
180
            char c => char.IsAscii(c)
×
181
                ? comparer.Count(source, (byte)c)
×
182
                : comparer.Count(source, U8Scalar.Create(c, checkAscii: false).AsSpan()),
×
183

×
184
            Rune r => r.IsAscii
×
185
                ? comparer.Count(source, (byte)r.Value)
×
186
                : comparer.Count(source, U8Scalar.Create(r, checkAscii: false).AsSpan()),
×
187

×
188
            U8String str => Count(source, str.AsSpan(), comparer),
×
189

×
190
            _ => ThrowHelpers.Unreachable<int>()
×
191
        };
×
192
    }
193

194
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
195
    internal static int Count<T>(ReadOnlySpan<byte> source, ReadOnlySpan<byte> value, T comparer)
196
        where T : IU8CountOperator
197
    {
198
        return comparer.Count(source, value);
×
199
    }
200

201
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
202
    internal static int Count(ReadOnlySpan<byte> value, ReadOnlySpan<byte> item, U8SplitOptions options)
203
    {
204
        if (options is U8SplitOptions.None)
×
205
        {
206
            return Count(value, item);
×
207
        }
208

209
        return CountSlow(value, item, options);
×
210
    }
211

212
    internal static int Count<T>(ReadOnlySpan<byte> value, T item, U8SplitOptions options)
213
    {
214
        Debug.Assert(options != U8SplitOptions.None);
215
        throw new NotImplementedException();
×
216
    }
217

218
    internal static int CountSlow(ReadOnlySpan<byte> value, ReadOnlySpan<byte> item, U8SplitOptions options)
219
    {
220
        Debug.Assert(options != U8SplitOptions.None);
221
        throw new NotImplementedException();
×
222
    }
223

224
    // TODO: Count without cast -> lt -> sub vec len?
225
    // TODO 2: Consider adding AVX512 path?
226
    internal static int CountRunes(ref byte src, nuint length)
227
    {
228
        // Adopted from https://github.com/simdutf/simdutf/blob/master/src/generic/utf8.h#L10
229
        var count = 0;
15✔
230
        var offset = (nuint)0;
15✔
231
        ref var ptr = ref Unsafe.As<byte, sbyte>(ref src);
15✔
232

233
        if (length >= (nuint)Vector256<byte>.Count)
15✔
234
        {
235
            var continuations = Vector256.Create((sbyte)-64);
15✔
236
            var lastvec = length - (nuint)Vector256<byte>.Count;
15✔
237
            do
238
            {
239
                var chunk = Vector256.LoadUnsafe(ref ptr.Add(offset));
141✔
240
                var matches = Vector256.LessThan(chunk, continuations);
141✔
241

242
                count += 32 - matches.AsByte().CountMatches();
141✔
243
                offset += (nuint)Vector256<byte>.Count;
141✔
244
            } while (offset <= lastvec);
141✔
245
        }
246

247
        if (offset <= length - (nuint)Vector128<byte>.Count)
15✔
248
        {
249
            var continuations = Vector128.Create((sbyte)-64);
6✔
250
            var chunk = Vector128.LoadUnsafe(ref ptr.Add(offset));
6✔
251
            var matches = Vector128.LessThan(chunk, continuations);
6✔
252

253
            count += 16 - matches.AsByte().CountMatches();
6✔
254
            offset += (nuint)Vector128<byte>.Count;
6✔
255
        }
256

257
        if (AdvSimd.IsSupported &&
15!
258
            (offset <= length - (nuint)Vector64<byte>.Count))
15✔
259
        {
260
            var continuations = Vector64.Create((sbyte)-64);
×
261
            var chunk = Vector64.LoadUnsafe(ref ptr.Add(offset));
×
262
            var matches = Vector64
×
263
                .LessThan(chunk, continuations)
×
264
                .AsUInt64()
×
265
                .ToScalar();
×
266

267
            count += 8 - (BitOperations.PopCount(matches) / 8);
×
268
            offset += (nuint)Vector64<byte>.Count;
×
269
        }
270

271
        while (offset < length)
114✔
272
        {
273
            // Branchless: x86_64: cmp + setge; arm64: cmn + cset
274
            count += U8Info.IsContinuationByte((byte)ptr.Add(offset)) ? 0 : 1;
99✔
275
            offset++;
99✔
276
        }
277

278
        return count;
15✔
279
    }
280

281
    /// <summary>
282
    /// Contract: when T is char, it must never be a surrogate.
283
    /// </summary>
284
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
285
    internal static (int Offset, int Length) IndexOf<T>(ReadOnlySpan<byte> source, T value)
286
        where T : struct
287
    {
288
        Debug.Assert(value is not char i || !char.IsSurrogate(i));
289
        Debug.Assert(value is byte or char or Rune or U8String);
290

291
        switch (value)
292
        {
293
            case byte b:
294
                return (source.IndexOf(b), 1);
×
295

296
            case char c:
297
                if (char.IsAscii(c))
×
298
                {
299
                    return (source.IndexOf((byte)c), 1);
×
300
                }
301

302
                var scalar = U8Scalar.Create(c, checkAscii: false);
×
303
                return (source.IndexOf(scalar.AsSpan()), scalar.Length);
×
304

305
            case Rune r:
306
                if (r.IsAscii)
×
307
                {
308
                    return (source.IndexOf((byte)r.Value), 1);
×
309
                }
310

311
                var rune = U8Scalar.Create(r, checkAscii: false);
×
312
                return (source.IndexOf(rune.AsSpan()), rune.Length);
×
313

314
            case U8String str:
315
                var span = str.AsSpan();
×
316
                return (IndexOf(source, span), span.Length);
×
317

318
            default:
319
                return ThrowHelpers.Unreachable<(int, int)>();
×
320
        }
321
    }
322

323
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
324
    internal static int IndexOf(ReadOnlySpan<byte> source, ReadOnlySpan<byte> value)
325
    {
326
        return value.Length is 1 ? source.IndexOf(value[0]) : source.IndexOf(value);
×
327
    }
328

329
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
330
    internal static (int Offset, int Length) IndexOf<T, C>(ReadOnlySpan<byte> source, T value, C comparer)
331
        where T : struct
332
        where C : IU8IndexOfOperator
333
    {
334
        Debug.Assert(value is not char i || !char.IsSurrogate(i));
335
        Debug.Assert(value is byte or char or Rune or U8String);
336

337
        switch (value)
338
        {
339
            case byte b:
340
                return comparer.IndexOf(source, b);
×
341

342
            case char c:
343
                if (char.IsAscii(c))
×
344
                {
345
                    return comparer.IndexOf(source, (byte)c);
×
346
                }
347

348
                var scalar = U8Scalar.Create(c, checkAscii: false);
×
349
                return comparer.IndexOf(source, scalar.AsSpan());
×
350

351
            case Rune r:
352
                if (r.IsAscii)
×
353
                {
354
                    return comparer.IndexOf(source, (byte)r.Value);
×
355
                }
356

357
                var rune = U8Scalar.Create(r, checkAscii: false);
×
358
                return comparer.IndexOf(source, rune.AsSpan());
×
359

360
            case U8String str:
361
                var span = str.AsSpan();
×
362
                return IndexOf(source, span, comparer);
×
363

364
            default:
365
                return ThrowHelpers.Unreachable<(int, int)>();
×
366
        }
367
    }
368

369
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
370
    internal static (int Offset, int Length) IndexOf<T>(ReadOnlySpan<byte> source, ReadOnlySpan<byte> value, T comparer)
371
        where T : IU8IndexOfOperator
372
    {
373
        return value.Length is 1
×
374
            ? comparer.IndexOf(source, value[0])
×
375
            : comparer.IndexOf(source, value);
×
376
    }
377
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc