• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neon-sunset / U8String / 6005208900

28 Aug 2023 09:36PM UTC coverage: 18.096% (-0.2%) from 18.326%
6005208900

push

github

neon-sunset
feat: Extend NativeU8String and restructure solution to account for increased line count, add roadmap draft

134 of 1050 branches covered (0.0%)

Branch coverage included in aggregate %.

1058 of 1058 new or added lines in 25 files covered. (100.0%)

478 of 2332 relevant lines covered (20.5%)

35305.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

45.1
/src/U8String.cs
1
using System.ComponentModel;
2
using System.Diagnostics;
3
using System.Diagnostics.CodeAnalysis;
4
using System.Runtime.InteropServices;
5
using System.Text;
6
using System.Text.Json.Serialization;
7
using System.Text.Unicode;
8
using U8Primitives.Serialization;
9

10
#pragma warning disable IDE1006 // Naming Styles. Why: Exposing internal fields for perf.
11
namespace U8Primitives;
12

13
internal readonly struct U8Range
14
{
15
    internal readonly int Offset;
16
    internal readonly int Length;
17

18
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
19
    public U8Range(int offset, int length)
20
    {
21
        Debug.Assert((uint)offset <= int.MaxValue);
22
        Debug.Assert((uint)length <= int.MaxValue);
23

24
        Offset = offset;
45✔
25
        Length = length;
45✔
26
    }
45✔
27
}
28

29
/// <summary>
30
/// Represents a UTF-8 encoded string.
31
/// </summary>
32
/// <remarks>
33
/// <para>U8String is an immutable value type that represents a UTF-8 encoded string.</para>
34
/// <para>It stores UTF-8 code units in the underlying buffer, and provides methods
35
/// for manipulating and accessing the string content. It can be created from or converted
36
/// to a string or a span of bytes, as long as the data is valid and convertible to UTF-8.</para>
37
/// <para>U8String slicing methods are non-copying and return a new U8String that
38
/// references a portion of the original data. Methods which manipulate the
39
/// instances of U8String ensure that the resulting U8String is well-formed and valid UTF-8,
40
/// unless specified otherwise. If an operation would produce invalid UTF-8, an exception is thrown.</para>
41
/// <para>By default, U8String is indexed by the underlying UTF-8 bytes but offers alternate Rune and Char projections.</para>
42
/// </remarks>
43
[DebuggerDisplay("{ToString()}")]
44
[JsonConverter(typeof(U8StringJsonConverter))]
45
[CollectionBuilder(typeof(U8String), nameof(Create))]
46
public readonly partial struct U8String :
47
    IEquatable<U8String>,
48
    IEquatable<U8String?>,
49
    IEquatable<byte[]?>,
50
    IComparable<U8String>,
51
    IComparable<U8String?>,
52
    IComparable<byte[]?>,
53
    IList<byte>,
54
    ICloneable,
55
    ISpanParsable<U8String>,
56
    ISpanFormattable,
57
    IUtf8SpanParsable<U8String>,
58
    IUtf8SpanFormattable
59
{
60
    /// <summary>
61
    /// Represents an empty <see cref="U8String"/>.
62
    /// </summary>
63
    /// <remarks>
64
    /// Functionally equivalent to <see langword="default(U8String)"/>.
65
    /// </remarks>
66
    public static U8String Empty => default;
×
67

68
    internal readonly byte[]? _value;
69
    internal readonly U8Range _inner;
70

71
    internal int Offset
72
    {
73
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
74
        get => _inner.Offset;
2,762✔
75
    }
76

77
    /// <summary>
78
    /// The number of UTF-8 bytes in the current <see cref="U8String"/>.
79
    /// </summary>
80
    /// <returns>The number of UTF-8 bytes.</returns>
81
    public int Length
82
    {
83
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
84
        get => _inner.Length;
2,762✔
85
    }
86

87
    /// <summary>
88
    /// Indicates whether the current <see cref="U8String"/> is empty.
89
    /// </summary>
90
    /// <returns><see langword="true"/> if the current <see cref="U8String"/> is empty; otherwise, <see langword="false"/>.</returns>
91
    [MemberNotNullWhen(false, nameof(_value))]
92
    public bool IsEmpty
93
    {
94
        // TODO: consolidate to a single discriminant of "emptiness"
95
        // and stop relying on null _value. That is, as long as it
96
        // does not regress Warpskimmer numbers.
97
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
98
        get => _value is null;
81✔
99
    }
100

101
    /// <summary>
102
    /// The number of UTF-8 code points in the current <see cref="U8String"/>.
103
    /// </summary>
104
    /// <remarks>
105
    /// Although evaluation of this property is O(n), its actual cost is very low.
106
    /// </remarks>
107
    public int RuneCount
108
    {
109
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
110
        get
111
        {
112
            return !IsEmpty ? U8Searching.CountRunes(ref UnsafeRef, (nuint)Length) : 0;
15!
113
        }
114
    }
115

116
    /// <inheritdoc/>
117
    int ICollection<byte>.Count => Length;
×
118

119
    /// <inheritdoc/>
120
    bool ICollection<byte>.IsReadOnly => true;
×
121

122
    /// <summary>
123
    /// Similar to <see cref="UnsafeRef"/>, but does not throw NRE if <see cref="IsEmpty"/> is true.
124
    /// </summary>
125
    /// <remarks>
126
    /// cmov's the ref out of byte[] if it is not null and uncoditionally increments it by <see cref="Offset"/>.
127
    /// </remarks>
128
    internal ref byte DangerousRef
129
    {
130
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
131
        get
132
        {
133
            var value = _value;
×
134
            ref var reference = ref Unsafe.NullRef<byte>();
×
135
            if (value != null) reference = ref MemoryMarshal.GetArrayDataReference(value);
×
136
            reference = ref Unsafe.Add(ref reference, Offset);
×
137
            return ref reference;
×
138
        }
139
    }
140

141
    /// <summary>
142
    /// Will throw NRE if <see cref="IsEmpty"/> is true.
143
    /// </summary>
144
    internal ref byte UnsafeRef
145
    {
146
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
147
        get => ref Unsafe.Add(
35✔
148
            ref MemoryMarshal.GetArrayDataReference(_value!), (nint)(uint)Offset);
35✔
149
    }
150

151
    /// <summary>
152
    /// Will throw NRE if <see cref="IsEmpty"/> is true.
153
    /// </summary>
154
    internal ReadOnlySpan<byte> UnsafeSpan
155
    {
156
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
157
        get => MemoryMarshal.CreateReadOnlySpan(ref UnsafeRef, Length);
20✔
158
    }
159

160
    /// <summary>
161
    /// Will throw NRE if <see cref="IsEmpty"/> is true.
162
    /// </summary>
163
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
164
    internal ref byte UnsafeRefAdd(int index)
165
    {
166
        return ref Unsafe.Add(
×
167
            ref MemoryMarshal.GetArrayDataReference(_value!), (nint)(uint)Offset + (nint)(uint)index);
×
168
    }
169

170
    /// <summary>
171
    /// Evaluates if the current <see cref="U8String"/> contains only ASCII characters.
172
    /// </summary>
173
    public bool IsAscii() => Ascii.IsValid(this);
×
174

175
    /// <summary>
176
    /// Evaluates if the current <see cref="U8String"/> is normalized to the specified
177
    /// Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
178
    /// </summary>
179
    public bool IsNormalized(NormalizationForm form = NormalizationForm.FormC) => throw new NotImplementedException();
×
180

181
    /// <summary>
182
    /// Validates that the <paramref name="value"/> is a valid UTF-8 byte sequence.
183
    /// </summary>
184
    /// <param name="value">The <see cref="ReadOnlySpan{T}"/> to validate.</param>
185
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
186
    public static bool IsValid(ReadOnlySpan<byte> value)
187
    {
188
        if (value.Length is 1)
45!
189
        {
190
            return U8Info.IsAsciiByte(value[0]);
×
191
        }
192

193
        if (value.Length != 0)
45!
194
        {
195
            return Utf8.IsValid(value);
45✔
196
        }
197

198
        return true;
×
199
    }
200

201
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
202
    internal static void Validate(ReadOnlySpan<byte> value)
203
    {
204
        if (!IsValid(value))
45!
205
        {
206
            ThrowHelpers.InvalidUtf8();
×
207
        }
208
    }
45✔
209

210
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
211
    internal static bool ValidateSlice(ReadOnlySpan<byte> value, int offset, int length)
212
    {
213
        // TODO: Another method which requires like 10 iterations to achieve good codegen.
214
        throw new NotImplementedException();
×
215
    }
216

217
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
218
    internal void Deconstruct(out byte[]? value, out int offset, out int length)
219
    {
220
        value = _value;
2,044✔
221
        offset = Offset;
2,044✔
222
        length = Length;
2,044✔
223
    }
2,044✔
224

225
    [EditorBrowsable(EditorBrowsableState.Never)]
226
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
227
    public ref readonly byte GetPinnableReference() => ref DangerousRef;
×
228

229
    void IList<byte>.Insert(int index, byte item) => throw new NotImplementedException();
×
230
    void IList<byte>.RemoveAt(int index) => throw new NotImplementedException();
×
231
    void ICollection<byte>.Add(byte item) => throw new NotImplementedException();
×
232
    void ICollection<byte>.Clear() => throw new NotImplementedException();
×
233
    bool ICollection<byte>.Remove(byte item) => throw new NotImplementedException();
×
234
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc