5992664408

Committed 27 Aug 2023 06:57PM UTC coverage: 18.359% (+0.04%) from 18.315%

Build # 5992664408

Build Type

push

github

Committed by

neon-sunset

Commit Message

feat: make ascii comparer len hint inlineable, fix ToUpper

Run Details

134 of 1038 branches covered (0.0%)

Branch coverage included in aggregate %.

5 of 5 new or added lines in 3 files covered. (100.0%)

479 of 2301 relevant lines covered (20.82%)

35781.08 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/src/U8String.Manipulation.cs

using System.Runtime.InteropServices;
using System.Text;

using U8Primitives.Abstractions;
using U8Primitives.InteropServices;

namespace U8Primitives;

#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
public readonly partial struct U8String
{
    // TODO: Optimize/deduplicate Concat variants
    // TODO: Investigate if it is possible fold validation for u8 literals
    public static U8String Concat(U8String left, U8String right)
    {
        if (!left.IsEmpty)
        {
            if (!right.IsEmpty)
            {
                return U8Manipulation.ConcatUnchecked(
                    left.UnsafeSpan,
                    right.UnsafeSpan);
            }

            return left;
        }

        return right;
    }

    public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
    {
        if (!right.IsEmpty)
        {
            Validate(right);
            if (!left.IsEmpty)
            {
                return U8Manipulation.ConcatUnchecked(left.UnsafeSpan, right);
            }

            return new U8String(right, skipValidation: true);
        }

        return left;
    }

    public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
    {
        if (!left.IsEmpty)
        {
            Validate(left);
            if (!right.IsEmpty)
            {
                return U8Manipulation.ConcatUnchecked(left, right.UnsafeSpan);
            }

            return new U8String(left, skipValidation: true);
        }

        return right;
    }

    public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
    {
        var length = left.Length + right.Length;
        if (length != 0)
        {
            var value = new byte[length];

            left.CopyTo(value);
            right.CopyTo(value.SliceUnsafe(left.Length, right.Length));

            Validate(value);
            return new U8String(value, 0, length);
        }

        return default;
    }

    /// <inheritdoc />
    public void CopyTo(byte[] destination, int index)
    {
        var src = this;
        var dst = destination.AsSpan()[index..];
        if (src.Length > dst.Length)
        {
            ThrowHelpers.ArgumentOutOfRange(nameof(index));
        }

        src.UnsafeSpan.CopyTo(dst);
    }

    /// <summary>
    /// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
    /// </summary>
    /// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
    public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
    {
        throw new NotImplementedException();
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public U8String Replace(byte oldValue, byte newValue)
    {
        return U8Manipulation.Replace(this, oldValue, newValue);
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public U8String Replace(char oldValue, char newValue)
    {
        return U8Manipulation.Replace(this, oldValue, newValue);
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public U8String Replace(Rune oldValue, Rune newValue)
    {
        return U8Manipulation.Replace(this, oldValue, newValue);
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
    {
        return U8Manipulation.Replace(this, oldValue, newValue);
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public U8String Replace(U8String oldValue, U8String newValue)
    {
        return U8Manipulation.Replace(this, oldValue, newValue);
    }

    public U8String ReplaceLineEndings()
    {
        var source = this;
        if (!source.IsEmpty)
        {
            if (!OperatingSystem.IsWindows())
            {
                return U8Manipulation.ReplaceCore(
                    source, "\r\n"u8, "\n"u8, validate: false);
            }

            // This needs manual loop which is sad
            throw new NotImplementedException();
        }

        return source;
    }

    /// <summary>
    /// Retrieves a substring from this instance. The substring starts at a specified
    /// character position and continues to the end of the string.
    /// </summary>
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
    /// <returns>A substring view that begins at <paramref name="start"/>.</returns>
    /// <exception cref="ArgumentOutOfRangeException">
    /// <paramref name="start"/> is less than zero or greater than the length of this instance.
    /// </exception>
    /// <exception cref="ArgumentException">
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
    /// </exception>
    public U8String Slice(int start)
    {
        var source = this;
        // From ReadOnly/Span<T> Slice(int) implementation
        if ((ulong)(uint)start > (ulong)(uint)source.Length)
        {
            ThrowHelpers.ArgumentOutOfRange();
        }

        var length = source.Length - start;
        if (length > 0)
        {
            if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))
            {
                ThrowHelpers.InvalidSplit();
            }

            return new(source._value, source.Offset + start, length);
        }

        return default;
    }

    /// <summary>
    /// Retrieves a substring from this instance. The substring starts at a specified
    /// character position and has a specified length.
    /// </summary>
    /// <param name="start">The zero-based starting character position of a substring in this instance.</param>
    /// <param name="length">The number of bytes in the substring.</param>
    /// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
    /// <exception cref="ArgumentOutOfRangeException">
    /// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
    /// </exception>
    /// <exception cref="ArgumentException">
    /// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
    /// </exception>
    public U8String Slice(int start, int length)
    {
        var source = this;
        // From ReadOnly/Span<T> Slice(int, int) implementation
        if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)
        {
            ThrowHelpers.ArgumentOutOfRange();
        }

        var result = default(U8String);
        if (length > 0)
        {
            // TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
            if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) || (
                length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))
            {
                // TODO: Exception message UX
                ThrowHelpers.InvalidSplit();
            }

            result = new(source._value, source.Offset + start, length);
        }

        return result;
    }

    /// <summary>
    /// Removes all leading and trailing whitespace characters from the current string.
    /// </summary>
    /// <returns>
    /// A sub-slice that remains after all whitespace characters
    /// are removed from the start and end of the current string.
    /// </returns>
    public U8String Trim()
    {
        // TODO: Optimize fast path on no whitespace
        // TODO 2: Do not convert to runes and have proper
        // whitespace LUT to evaluate code points in a branchless way
        var source = this;
        if (!source.IsEmpty)
        {
            ref var ptr = ref source.UnsafeRef;

            var start = 0;
            while (start < source.Length)
            {
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
                {
                    break;
                }
                start += size;
            }

            var end = source.Length - 1;
            for (var endSearch = end; endSearch >= start; endSearch--)
            {
                var b = ptr.Add(endSearch);
                if (!U8Info.IsContinuationByte(b))
                {
                    if (U8Info.IsAsciiByte(b)
                        ? U8Info.IsAsciiWhitespace(b)
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
                    {
                        // Save the last found whitespace code point offset and continue searching
                        // for more whitspace byte sequences from their end. If we don't do this,
                        // we will end up trimming away continuation bytes at the end of the string.
                        end = endSearch - 1;
                    }
                    else
                    {
                        break;
                    }
                }
            }

            return U8Marshal.Slice(source, start, end - start + 1);
        }

        return default;
    }

    /// <summary>
    /// Removes all leading whitespace characters from the current string.
    /// </summary>
    /// <returns>
    /// A sub-slice that remains after all whitespace characters
    /// are removed from the start of the current string.
    /// </returns>
    public U8String TrimStart()
    {
        var source = this;
        if (!source.IsEmpty)
        {
            ref var ptr = ref source.UnsafeRef;
            var b = ptr;

            if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))
            {
                return source;
            }

            var start = 0;
            while (start < source.Length)
            {
                if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))
                {
                    break;
                }
                start += size;
            }

            return U8Marshal.Slice(source, start);
        }

        return default;
    }

    /// <summary>
    /// Removes all trailing whitespace characters from the current string.
    /// </summary>
    /// <returns>
    /// A sub-slice that remains after all whitespace characters
    /// are removed from the end of the current string.
    /// </returns>
    public U8String TrimEnd()
    {
        var source = this;
        if (!source.IsEmpty)
        {
            ref var ptr = ref source.UnsafeRef;

            var end = source.Length - 1;
            for (var endSearch = end; endSearch >= 0; endSearch--)
            {
                var b = ptr.Add(endSearch);
                if (!U8Info.IsContinuationByte(b))
                {
                    if (U8Info.IsAsciiByte(b)
                        ? U8Info.IsAsciiWhitespace(b)
                        : U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))
                    {
                        end = endSearch - 1;
                    }
                    else
                    {
                        break;
                    }
                }
            }

            return U8Marshal.Slice(source, 0, end + 1);
        }

        return default;
    }

    /// <summary>
    /// Removes all leading and trailing ASCII whitespace characters from the current string.
    /// </summary>
    /// <returns>
    /// A sub-slice that remains after all ASCII whitespace characters
    /// are removed from the start and end of the current string.
    /// </returns>
    public U8String TrimAscii()
    {
        var source = this;
        var range = Ascii.Trim(source);

        return U8Marshal.Slice(source, range);
    }

    /// <summary>
    /// Removes all the leading ASCII whitespace characters from the current string.
    /// </summary>
    /// <returns>
    /// A sub-slice that remains after all whitespace characters
    /// are removed from the start of the current string.
    /// </returns>
    public U8String TrimStartAscii()
    {
        var source = this;
        var range = Ascii.TrimStart(source);

        return U8Marshal.Slice(source, range);
    }

    /// <summary>
    /// Removes all the trailing ASCII whitespace characters from the current string.
    /// </summary>
    /// <returns>
    /// A sub-slice that remains after all whitespace characters
    /// are removed from the end of the current string.
    /// </returns>
    public U8String TrimEndAscii()
    {
        var source = this;
        var range = Ascii.TrimEnd(source);

        return U8Marshal.Slice(source, range);
    }

    // TODO:
    // - Complete impl. depends on porting of InlineArray-based array builder for letters
    // which have different lengths in upper/lower case.
    // - Remove/rename to ToLowerFallback or move to something like "FallbackInvariantComparer"
    // clearly indicating it being slower and inferior alternative to proper implementations
    // which call into ICU/NLS/Hybrid-provided case change exports.
    public U8String ToLower<T>(T converter)
        where T : IU8CaseConverter
    {
        // 1. Estimate the start offset of the conversion (first char requiring case change)
        // 2. Estimate the length of the conversion (the length of the resulting segment after case change)
        // 3. Allocate the resulting buffer and copy the pre-offset segment
        // 4. Perform the conversion which writes to the remainder segment of the buffer
        // 5. Return the resulting buffer as a new string

        var deref = this;
        if (!deref.IsEmpty)
        {
            var source = deref.UnsafeSpan;

            var (replaceStart, resultLength) = converter.LowercaseHint(source);

            if ((uint)replaceStart < (uint)source.Length)
            {
                var lowercase = new byte[resultLength];
                var destination = lowercase.AsSpan();

                source[..replaceStart].CopyTo(destination);
                source = source.Slice(replaceStart);
                destination = destination.Slice(replaceStart);

                var convertedLength = converter.ToLower(source, destination);

                return new U8String(lowercase, 0, replaceStart + convertedLength);
            }
        }

        return deref;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public U8String ToUpper<T>(T converter)
        where T : IU8CaseConverter
    {
        var deref = this;
        if (!deref.IsEmpty)
        {
            var source = deref.UnsafeSpan;
            var (replaceStart, resultLength) = converter.UppercaseHint(source);

            if ((uint)replaceStart < (uint)source.Length)
            {
                var uppercase = new byte[resultLength];
                var destination = uppercase.AsSpan();

                source[..replaceStart].CopyTo(destination);
                source = source.Slice(replaceStart);
                destination = destination.Slice(replaceStart);

                var convertedLength = converter.ToUpper(source, destination);

                return new U8String(uppercase, 0, replaceStart + convertedLength);
            }
        }

        return deref;
    }

    // TODO: docs
    // TODO 2: scan for lower/uppercase chars and only allocate if there are any
    public U8String ToLowerAscii()
    {
        var source = this;
        if (source.Length > 0)
        {
            var destination = new byte[source.Length];

            U8Manipulation.ToLowerAscii(
                ref source.UnsafeRef,
                ref MemoryMarshal.GetArrayDataReference(destination),
                (uint)source.Length);

            return new(destination, 0, source.Length);
        }

        return default;
    }

    public U8String ToUpperAscii()
    {
        var source = this;
        if (source.Length > 0)
        {
            var destination = new byte[source.Length];

            U8Manipulation.ToUpperAscii(
                ref source.UnsafeRef,
                ref MemoryMarshal.GetArrayDataReference(destination),
                (uint)source.Length);

            return new(destination, 0, source.Length);
        }

        return default;
    }
}

1	using System.Runtime.InteropServices;
2	using System.Text;
3
4	using U8Primitives.Abstractions;
5	using U8Primitives.InteropServices;
6
7	namespace U8Primitives;
8
9	#pragma warning disable IDE0046, IDE0057 // Why: range slicing and ternary expressions do not produce desired codegen
10	public readonly partial struct U8String
11	{
12	// TODO: Optimize/deduplicate Concat variants
13	// TODO: Investigate if it is possible fold validation for u8 literals
14	public static U8String Concat(U8String left, U8String right)
15	{
16	if (!left.IsEmpty)	×
17	{
18	if (!right.IsEmpty)	×
19	{
20	return U8Manipulation.ConcatUnchecked(	×
21	left.UnsafeSpan,	×
22	right.UnsafeSpan);	×
23	}
24
25	return left;	×
26	}
27
28	return right;	×
29	}
30
31	public static U8String Concat(U8String left, ReadOnlySpan<byte> right)
32	{
33	if (!right.IsEmpty)	×
34	{
35	Validate(right);	×
36	if (!left.IsEmpty)	×
37	{
38	return U8Manipulation.ConcatUnchecked(left.UnsafeSpan, right);	×
39	}
40
41	return new U8String(right, skipValidation: true);	×
42	}
43
44	return left;	×
45	}
46
47	public static U8String Concat(ReadOnlySpan<byte> left, U8String right)
48	{
49	if (!left.IsEmpty)	×
50	{
51	Validate(left);	×
52	if (!right.IsEmpty)	×
53	{
54	return U8Manipulation.ConcatUnchecked(left, right.UnsafeSpan);	×
55	}
56
57	return new U8String(left, skipValidation: true);	×
58	}
59
60	return right;	×
61	}
62
63	public static U8String Concat(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
64	{
65	var length = left.Length + right.Length;	×
66	if (length != 0)	×
67	{
68	var value = new byte[length];	×
69
70	left.CopyTo(value);	×
71	right.CopyTo(value.SliceUnsafe(left.Length, right.Length));	×
72
73	Validate(value);	×
74	return new U8String(value, 0, length);	×
75	}
76
77	return default;	×
78	}
79
80	/// <inheritdoc />
81	public void CopyTo(byte[] destination, int index)
82	{
83	var src = this;	×
84	var dst = destination.AsSpan()[index..];	×
85	if (src.Length > dst.Length)	×
86	{
87	ThrowHelpers.ArgumentOutOfRange(nameof(index));	×
88	}
89
90	src.UnsafeSpan.CopyTo(dst);	×
91	}	×
92
93	/// <summary>
94	/// Normalizes current <see cref="U8String"/> to the specified Unicode normalization form (default: <see cref="NormalizationForm.FormC"/>).
95	/// </summary>
96	/// <returns>A new <see cref="U8String"/> normalized to the specified form.</returns>
97	public U8String Normalize(NormalizationForm form = NormalizationForm.FormC)
98	{
99	throw new NotImplementedException();	×
100	}
101
102	[MethodImpl(MethodImplOptions.AggressiveInlining)]
103	public U8String Replace(byte oldValue, byte newValue)
104	{
105	return U8Manipulation.Replace(this, oldValue, newValue);	×
106	}
107
108	[MethodImpl(MethodImplOptions.AggressiveInlining)]
109	public U8String Replace(char oldValue, char newValue)
110	{
111	return U8Manipulation.Replace(this, oldValue, newValue);	×
112	}
113
114	[MethodImpl(MethodImplOptions.AggressiveInlining)]
115	public U8String Replace(Rune oldValue, Rune newValue)
116	{
117	return U8Manipulation.Replace(this, oldValue, newValue);	×
118	}
119
120	[MethodImpl(MethodImplOptions.AggressiveInlining)]
121	public U8String Replace(ReadOnlySpan<byte> oldValue, ReadOnlySpan<byte> newValue)
122	{
123	return U8Manipulation.Replace(this, oldValue, newValue);	×
124	}
125
126	[MethodImpl(MethodImplOptions.AggressiveInlining)]
127	public U8String Replace(U8String oldValue, U8String newValue)
128	{
129	return U8Manipulation.Replace(this, oldValue, newValue);	×
130	}
131
132	public U8String ReplaceLineEndings()
133	{
134	var source = this;	×
135	if (!source.IsEmpty)	×
136	{
137	if (!OperatingSystem.IsWindows())	×
138	{
139	return U8Manipulation.ReplaceCore(	×
140	source, "\r\n"u8, "\n"u8, validate: false);	×
141	}
142
143	// This needs manual loop which is sad
144	throw new NotImplementedException();	×
145	}
146
147	return source;	×
148	}
149
150	/// <summary>
151	/// Retrieves a substring from this instance. The substring starts at a specified
152	/// character position and continues to the end of the string.
153	/// </summary>
154	/// <param name="start">The zero-based starting character position of a substring in this instance.</param>
155	/// <returns>A substring view that begins at <paramref name="start"/>.</returns>
156	/// <exception cref="ArgumentOutOfRangeException">
157	/// <paramref name="start"/> is less than zero or greater than the length of this instance.
158	/// </exception>
159	/// <exception cref="ArgumentException">
160	/// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
161	/// </exception>
162	public U8String Slice(int start)
163	{
164	var source = this;	×
165	// From ReadOnly/Span<T> Slice(int) implementation
166	if ((ulong)(uint)start > (ulong)(uint)source.Length)	×
167	{
168	ThrowHelpers.ArgumentOutOfRange();	×
169	}
170
171	var length = source.Length - start;	×
172	if (length > 0)	×
173	{
174	if (U8Info.IsContinuationByte(in source.UnsafeRefAdd(start)))	×
175	{
176	ThrowHelpers.InvalidSplit();	×
177	}
178
179	return new(source._value, source.Offset + start, length);	×
180	}
181
182	return default;	×
183	}
184
185	/// <summary>
186	/// Retrieves a substring from this instance. The substring starts at a specified
187	/// character position and has a specified length.
188	/// </summary>
189	/// <param name="start">The zero-based starting character position of a substring in this instance.</param>
190	/// <param name="length">The number of bytes in the substring.</param>
191	/// <returns>A substring view that begins at <paramref name="start"/> and has <paramref name="length"/> bytes.</returns>
192	/// <exception cref="ArgumentOutOfRangeException">
193	/// <paramref name="start"/> or <paramref name="length"/> is less than zero, or the sum of <paramref name="start"/> and <paramref name="length"/> is greater than the length of the current instance.
194	/// </exception>
195	/// <exception cref="ArgumentException">
196	/// The resulting substring splits at a UTF-8 code point boundary and would result in an invalid UTF-8 string.
197	/// </exception>
198	public U8String Slice(int start, int length)
199	{
200	var source = this;	×
201	// From ReadOnly/Span<T> Slice(int, int) implementation
202	if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)source.Length)	×
203	{
204	ThrowHelpers.ArgumentOutOfRange();	×
205	}
206
207	var result = default(U8String);	×
208	if (length > 0)	×
209	{
210	// TODO: Is there really no way to get rid of length < source.Length when checking the last+1 byte?
211	if ((start > 0 && U8Info.IsContinuationByte(source.UnsafeRefAdd(start))) \|\| (	×
212	length < source.Length && U8Info.IsContinuationByte(source.UnsafeRefAdd(start + length))))	×
213	{
214	// TODO: Exception message UX
215	ThrowHelpers.InvalidSplit();	×
216	}
217
218	result = new(source._value, source.Offset + start, length);	×
219	}
220
221	return result;	×
222	}
223
224	/// <summary>
225	/// Removes all leading and trailing whitespace characters from the current string.
226	/// </summary>
227	/// <returns>
228	/// A sub-slice that remains after all whitespace characters
229	/// are removed from the start and end of the current string.
230	/// </returns>
231	public U8String Trim()
232	{
233	// TODO: Optimize fast path on no whitespace
234	// TODO 2: Do not convert to runes and have proper
235	// whitespace LUT to evaluate code points in a branchless way
236	var source = this;	×
237	if (!source.IsEmpty)	×
238	{
239	ref var ptr = ref source.UnsafeRef;	×
240
241	var start = 0;	×
242	while (start < source.Length)	×
243	{
244	if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))	×
245	{
246	break;
247	}
248	start += size;	×
249	}
250
251	var end = source.Length - 1;	×
252	for (var endSearch = end; endSearch >= start; endSearch--)	×
253	{
254	var b = ptr.Add(endSearch);	×
255	if (!U8Info.IsContinuationByte(b))	×
256	{
257	if (U8Info.IsAsciiByte(b)	×
258	? U8Info.IsAsciiWhitespace(b)	×
259	: U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))	×
260	{
261	// Save the last found whitespace code point offset and continue searching
262	// for more whitspace byte sequences from their end. If we don't do this,
263	// we will end up trimming away continuation bytes at the end of the string.
264	end = endSearch - 1;	×
265	}
266	else
267	{
268	break;
269	}
270	}
271	}
272
273	return U8Marshal.Slice(source, start, end - start + 1);	×
274	}
275
276	return default;	×
277	}
278
279	/// <summary>
280	/// Removes all leading whitespace characters from the current string.
281	/// </summary>
282	/// <returns>
283	/// A sub-slice that remains after all whitespace characters
284	/// are removed from the start of the current string.
285	/// </returns>
286	public U8String TrimStart()
287	{
288	var source = this;	×
289	if (!source.IsEmpty)	×
290	{
291	ref var ptr = ref source.UnsafeRef;	×
292	var b = ptr;	×
293
294	if (U8Info.IsAsciiByte(b) && !U8Info.IsAsciiWhitespace(b))	×
295	{
296	return source;	×
297	}
298
299	var start = 0;	×
300	while (start < source.Length)	×
301	{
302	if (!U8Info.IsWhitespaceRune(ref ptr.Add(start), out var size))	×
303	{
304	break;
305	}
306	start += size;	×
307	}
308
309	return U8Marshal.Slice(source, start);	×
310	}
311
312	return default;	×
313	}
314
315	/// <summary>
316	/// Removes all trailing whitespace characters from the current string.
317	/// </summary>
318	/// <returns>
319	/// A sub-slice that remains after all whitespace characters
320	/// are removed from the end of the current string.
321	/// </returns>
322	public U8String TrimEnd()
323	{
324	var source = this;	×
325	if (!source.IsEmpty)	×
326	{
327	ref var ptr = ref source.UnsafeRef;	×
328
329	var end = source.Length - 1;	×
330	for (var endSearch = end; endSearch >= 0; endSearch--)	×
331	{
332	var b = ptr.Add(endSearch);	×
333	if (!U8Info.IsContinuationByte(b))	×
334	{
335	if (U8Info.IsAsciiByte(b)	×
336	? U8Info.IsAsciiWhitespace(b)	×
337	: U8Info.IsNonAsciiWhitespace(ref ptr.Add(end), out _))	×
338	{
339	end = endSearch - 1;	×
340	}
341	else
342	{
343	break;
344	}
345	}
346	}
347
348	return U8Marshal.Slice(source, 0, end + 1);	×
349	}
350
351	return default;	×
352	}
353
354	/// <summary>
355	/// Removes all leading and trailing ASCII whitespace characters from the current string.
356	/// </summary>
357	/// <returns>
358	/// A sub-slice that remains after all ASCII whitespace characters
359	/// are removed from the start and end of the current string.
360	/// </returns>
361	public U8String TrimAscii()
362	{
363	var source = this;	×
364	var range = Ascii.Trim(source);	×
365
366	return U8Marshal.Slice(source, range);	×
367	}
368
369	/// <summary>
370	/// Removes all the leading ASCII whitespace characters from the current string.
371	/// </summary>
372	/// <returns>
373	/// A sub-slice that remains after all whitespace characters
374	/// are removed from the start of the current string.
375	/// </returns>
376	public U8String TrimStartAscii()
377	{
378	var source = this;	×
379	var range = Ascii.TrimStart(source);	×
380
381	return U8Marshal.Slice(source, range);	×
382	}
383
384	/// <summary>
385	/// Removes all the trailing ASCII whitespace characters from the current string.
386	/// </summary>
387	/// <returns>
388	/// A sub-slice that remains after all whitespace characters
389	/// are removed from the end of the current string.
390	/// </returns>
391	public U8String TrimEndAscii()
392	{
393	var source = this;	×
394	var range = Ascii.TrimEnd(source);	×
395
396	return U8Marshal.Slice(source, range);	×
397	}
398
399	// TODO:
400	// - Complete impl. depends on porting of InlineArray-based array builder for letters
401	// which have different lengths in upper/lower case.
402	// - Remove/rename to ToLowerFallback or move to something like "FallbackInvariantComparer"
403	// clearly indicating it being slower and inferior alternative to proper implementations
404	// which call into ICU/NLS/Hybrid-provided case change exports.
405	public U8String ToLower<T>(T converter)
406	where T : IU8CaseConverter
407	{
408	// 1. Estimate the start offset of the conversion (first char requiring case change)
409	// 2. Estimate the length of the conversion (the length of the resulting segment after case change)
410	// 3. Allocate the resulting buffer and copy the pre-offset segment
411	// 4. Perform the conversion which writes to the remainder segment of the buffer
412	// 5. Return the resulting buffer as a new string
413
414	var deref = this;	×
415	if (!deref.IsEmpty)	×
416	{
417	var source = deref.UnsafeSpan;	×
418
419	var (replaceStart, resultLength) = converter.LowercaseHint(source);	×
420
421	if ((uint)replaceStart < (uint)source.Length)	×
422	{
423	var lowercase = new byte[resultLength];	×
424	var destination = lowercase.AsSpan();	×
425
426	source[..replaceStart].CopyTo(destination);	×
427	source = source.Slice(replaceStart);	×
428	destination = destination.Slice(replaceStart);	×
429
430	var convertedLength = converter.ToLower(source, destination);	×
431
432	return new U8String(lowercase, 0, replaceStart + convertedLength);	×
433	}
434	}
435
436	return deref;	×
437	}
438
439	[MethodImpl(MethodImplOptions.AggressiveInlining)]
440	public U8String ToUpper<T>(T converter)
441	where T : IU8CaseConverter
442	{
443	var deref = this;	×
444	if (!deref.IsEmpty)	×
445	{
446	var source = deref.UnsafeSpan;	×
447	var (replaceStart, resultLength) = converter.UppercaseHint(source);	×
448
449	if ((uint)replaceStart < (uint)source.Length)	×
450	{
451	var uppercase = new byte[resultLength];	×
452	var destination = uppercase.AsSpan();	×
453
454	source[..replaceStart].CopyTo(destination);	×
455	source = source.Slice(replaceStart);	×
456	destination = destination.Slice(replaceStart);	×
457
458	var convertedLength = converter.ToUpper(source, destination);	×
459
460	return new U8String(uppercase, 0, replaceStart + convertedLength);	×
461	}
462	}
463
464	return deref;	×
465	}
466
467	// TODO: docs
468	// TODO 2: scan for lower/uppercase chars and only allocate if there are any
469	public U8String ToLowerAscii()
470	{
471	var source = this;	×
472	if (source.Length > 0)	×
473	{
474	var destination = new byte[source.Length];	×
475
476	U8Manipulation.ToLowerAscii(	×
477	ref source.UnsafeRef,	×
478	ref MemoryMarshal.GetArrayDataReference(destination),	×
479	(uint)source.Length);	×
480
481	return new(destination, 0, source.Length);	×
482	}
483
484	return default;	×
485	}
486
487	public U8String ToUpperAscii()
488	{
489	var source = this;	×
490	if (source.Length > 0)	×
491	{
492	var destination = new byte[source.Length];	×
493
494	U8Manipulation.ToUpperAscii(	×
495	ref source.UnsafeRef,	×
496	ref MemoryMarshal.GetArrayDataReference(destination),	×
497	(uint)source.Length);	×
498
499	return new(destination, 0, source.Length);	×
500	}
501
502	return default;	×
503	}
504	}

neon-sunset / U8String / 5992664408

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous