• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 5953309698

23 Aug 2023 03:23PM UTC coverage: 21.364% (-1.9%) from 23.214%
5953309698

push

github

neon-sunset
perf: implement SIMD rune counting, add 3rd party notice for simdutf

133 of 874 branches covered (15.22%)

Branch coverage included in aggregate %.

53 of 53 new or added lines in 6 files covered. (100.0%)

481 of 2000 relevant lines covered (24.05%)

41214.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

42.31
/src/U8String.cs
1
using System.ComponentModel;
2
using System.Diagnostics;
3
using System.Diagnostics.CodeAnalysis;
4
using System.Runtime.InteropServices;
5
using System.Text;
6
using System.Text.Json.Serialization;
7
using System.Text.Unicode;
8
using U8Primitives.Serialization;
9

10
#pragma warning disable IDE1006 // Naming Styles. Why: Exposing internal fields for perf.
11
namespace U8Primitives;
12

13
internal readonly struct U8Range
14
{
15
    internal readonly int Offset;
16
    internal readonly int Length;
17

18
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
19
    public U8Range(int offset, int length)
20
    {
21
        Debug.Assert((uint)offset <= int.MaxValue);
22
        Debug.Assert((uint)length <= int.MaxValue);
23

24
        Offset = offset;
45✔
25
        Length = length;
45✔
26
    }
45✔
27
}
28

29
/// <summary>
30
/// Represents a UTF-8 encoded string.
31
/// </summary>
32
/// <remarks>
33
/// <para>U8String is an immutable value type that represents a UTF-8 encoded string.</para>
34
/// <para>It stores UTF-8 code units in the underlying buffer, and provides methods
35
/// for manipulating and accessing the string content. It can be created from or converted
36
/// to a string or a span of bytes, as long as the data is valid and convertible to UTF-8.</para>
37
/// <para>U8String slicing methods are non-copying and return a new U8String that
38
/// references a portion of the original data. Methods which manipulate the
39
/// instances of U8String ensure that the resulting U8String is well-formed and valid UTF-8,
40
/// unless specified otherwise. If an operation would produce invalid UTF-8, an exception is thrown.</para>
41
/// <para>By default, U8String is indexed by the underlying UTF-8 bytes but offers alternate Rune and Char projections.</para>
42
/// </remarks>
43
[DebuggerDisplay("{ToString()}")]
44
[JsonConverter(typeof(U8StringJsonConverter))]
45
[CollectionBuilder(typeof(U8String), nameof(Create))]
46
public readonly partial struct U8String :
47
    IEquatable<U8String>,
48
    IEquatable<U8String?>,
49
    IEquatable<byte[]?>,
50
    IComparable<U8String>,
51
    IComparable<U8String?>,
52
    IComparable<byte[]?>,
53
    IList<byte>,
54
    ICloneable,
55
    ISpanParsable<U8String>,
56
    ISpanFormattable,
57
    IUtf8SpanParsable<U8String>,
58
    IUtf8SpanFormattable
59
{
60
    /// <summary>
61
    /// Represents an empty <see cref="U8String"/>.
62
    /// </summary>
63
    /// <remarks>
64
    /// Functionally equivalent to <see langword="default(U8String)"/>.
65
    /// </remarks>
66
    public static U8String Empty => default;
×
67

68
    internal readonly byte[]? _value;
69
    internal readonly U8Range _inner;
70

71
    internal int Offset
72
    {
73
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
74
        get => _inner.Offset;
2,762✔
75
    }
76

77
    /// <summary>
78
    /// The number of UTF-8 bytes in the current <see cref="U8String"/>.
79
    /// </summary>
80
    /// <returns>The number of UTF-8 bytes.</returns>
81
    public int Length
82
    {
83
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
84
        get => _inner.Length;
2,762✔
85
    }
86

87
    /// <summary>
88
    /// Indicates whether the current <see cref="U8String"/> is empty.
89
    /// </summary>
90
    /// <returns><see langword="true"/> if the current <see cref="U8String"/> is empty; otherwise, <see langword="false"/>.</returns>
91
    [MemberNotNullWhen(false, nameof(_value))]
92
    public bool IsEmpty
93
    {
94
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
95
        get => _value is null;
66✔
96
        //get => Length is 0; -> regresses Warpskimmer benchmarks
97
    }
98

99
    /// <summary>
100
    /// The number of UTF-8 code points in the current <see cref="U8String"/>.
101
    /// </summary>
102
    /// <remarks>
103
    /// Although evaluation of this property is O(n), its actual cost is very low.
104
    /// </remarks>
105
    public int RuneCount
106
    {
107
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
108
        get
109
        {
110
            return !IsEmpty ? U8Searching.CountRunes(ref UnsafeRef, (nuint)Length) : 0;
×
111
        }
112
    }
113

114
    /// <inheritdoc/>
115
    int ICollection<byte>.Count => Length;
×
116

117
    /// <inheritdoc/>
118
    bool ICollection<byte>.IsReadOnly => true;
×
119

120
    /// <summary>
121
    /// Similar to <see cref="UnsafeRef"/>, but does not throw NRE if <see cref="IsEmpty"/> is true.
122
    /// </summary>
123
    /// <remarks>
124
    /// cmov's the ref out of byte[] if it is not null and uncoditionally increments it by <see cref="Offset"/>.
125
    /// </remarks>
126
    internal ref byte DangerousRef
127
    {
128
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
129
        get
130
        {
131
            var value = _value;
×
132
            ref var reference = ref Unsafe.NullRef<byte>();
×
133
            if (value != null) reference = ref MemoryMarshal.GetArrayDataReference(value);
×
134
            reference = ref Unsafe.Add(ref reference, Offset);
×
135
            return ref reference;
×
136
        }
137
    }
138

139
    /// <summary>
140
    /// Will throw NRE if <see cref="IsEmpty"/> is true.
141
    /// </summary>
142
    internal ref byte UnsafeRef
143
    {
144
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
145
        get => ref Unsafe.Add(
35✔
146
            ref MemoryMarshal.GetArrayDataReference(_value!), (nint)(uint)Offset);
35✔
147
    }
148

149
    /// <summary>
150
    /// Will throw NRE if <see cref="IsEmpty"/> is true.
151
    /// </summary>
152
    internal ReadOnlySpan<byte> UnsafeSpan
153
    {
154
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
155
        get => MemoryMarshal.CreateReadOnlySpan(ref UnsafeRef, Length);
20✔
156
    }
157

158
    /// <summary>
159
    /// Will throw NRE if <see cref="IsEmpty"/> is true.
160
    /// </summary>
161
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
162
    internal ref byte UnsafeRefAdd(int index)
163
    {
164
        return ref Unsafe.Add(
×
165
            ref MemoryMarshal.GetArrayDataReference(_value!), (nint)(uint)Offset + (nint)(uint)index);
×
166
    }
167

168
    /// <summary>
169
    /// Evaluates if the current <see cref="U8String"/> contains only ASCII characters.
170
    /// </summary>
171
    public bool IsAscii() => Ascii.IsValid(this);
×
172

173
    /// <summary>
174
    /// Evaluates if the current <see cref="U8String"/> is normalized to the specified
175
    /// Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
176
    /// </summary>
177
    public bool IsNormalized(NormalizationForm form = NormalizationForm.FormC) => throw new NotImplementedException();
×
178

179
    /// <summary>
180
    /// Validates that the <paramref name="value"/> is a valid UTF-8 byte sequence.
181
    /// </summary>
182
    /// <param name="value">The <see cref="ReadOnlySpan{T}"/> to validate.</param>
183
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
184
    public static bool IsValid(ReadOnlySpan<byte> value)
185
    {
186
        return value.Length switch
45!
187
        {
45✔
188
            0 => true,
×
189
            1 => U8Info.IsAsciiByte(value[0]),
×
190
            _ => Utf8.IsValid(value)
45✔
191
        };
45✔
192
    }
193

194
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
195
    internal static void Validate(ReadOnlySpan<byte> value)
196
    {
197
        if (!IsValid(value))
45!
198
        {
199
            ThrowHelpers.InvalidUtf8();
×
200
        }
201
    }
45✔
202

203
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
204
    internal static bool ValidateSlice(ReadOnlySpan<byte> value, int offset, int length)
205
    {
206
        // TODO: Another method which requires like 10 iterations to achieve good codegen.
207
        throw new NotImplementedException();
×
208
    }
209

210
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
211
    internal void Deconstruct(out byte[]? value, out int offset, out int length)
212
    {
213
        value = _value;
2,044✔
214
        offset = Offset;
2,044✔
215
        length = Length;
2,044✔
216
    }
2,044✔
217

218
    [EditorBrowsable(EditorBrowsableState.Never)]
219
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
220
    public ref readonly byte GetPinnableReference() => ref DangerousRef;
×
221

222
    void IList<byte>.Insert(int index, byte item) => throw new NotImplementedException();
×
223
    void IList<byte>.RemoveAt(int index) => throw new NotImplementedException();
×
224
    void ICollection<byte>.Add(byte item) => throw new NotImplementedException();
×
225
    void ICollection<byte>.Clear() => throw new NotImplementedException();
×
226
    bool ICollection<byte>.Remove(byte item) => throw new NotImplementedException();
×
227
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc