Skip to content

Commit fe53637

Browse files
committed
Improve vectorization of IndexOf(chars, StringComparison.OrdinalIgnoreCase)
Use the same general "Algorithm 1: Generic SIMD" that we do for StringComparison.Ordinal, adapter for OrdinalIgnoreCase.
1 parent 0f06ede commit fe53637

File tree

1 file changed

+193
-5
lines changed
  • src/libraries/System.Private.CoreLib/src/System/Globalization

1 file changed

+193
-5
lines changed

src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs

Lines changed: 193 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System.Diagnostics;
5-
using System.Text.Unicode;
5+
using System.Numerics;
66
using System.Runtime.CompilerServices;
77
using System.Runtime.InteropServices;
88
using System.Runtime.Intrinsics;
9+
using System.Runtime.Intrinsics.X86;
10+
using System.Text.Unicode;
911

1012
namespace System.Globalization
1113
{
@@ -295,7 +297,6 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
295297
// A non-linguistic search compares chars directly against one another, so large
296298
// target strings can never be found inside small search spaces. This check also
297299
// handles empty 'source' spans.
298-
299300
return -1;
300301
}
301302

@@ -309,25 +310,39 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
309310
return CompareInfo.NlsIndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning: true);
310311
}
311312

312-
// If value starts with an ASCII char, we can use a vectorized path
313+
// If value doesn't start with ASCII, fall back to a non-vectorized non-ASCII friendly version.
313314
ref char valueRef = ref MemoryMarshal.GetReference(value);
314315
char valueChar = valueRef;
315-
316316
if (!char.IsAscii(valueChar))
317317
{
318-
// Fallback to a more non-ASCII friendly version
319318
return OrdinalCasing.IndexOf(source, value);
320319
}
321320

322321
// Hoist some expressions from the loop
323322
int valueTailLength = value.Length - 1;
324323
int searchSpaceLength = source.Length - valueTailLength;
324+
int searchSpaceMinusValueTailLength = source.Length - valueTailLength;
325325
ref char searchSpace = ref MemoryMarshal.GetReference(source);
326326
char valueCharU = default;
327327
char valueCharL = default;
328328
nint offset = 0;
329329
bool isLetter = false;
330330

331+
// If the input is long enough and the value ends with ASCII, we can take a special vectorized
332+
// path that compares both the beginning and the end at the same time.
333+
if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128<ushort>.Count)
334+
{
335+
valueCharU = Unsafe.Add(ref valueRef, valueTailLength);
336+
if (char.IsAscii(valueCharU))
337+
{
338+
goto SearchTwoChars;
339+
}
340+
}
341+
342+
// We're searching for the first character and it's known to be ASCII. If it's not a letter,
343+
// then IgnoreCase doesn't impact what it matches and we just need to do a normal search
344+
// for that single character. If it is a letter, then we need to search for both its upper
345+
// and lower-case variants.
331346
if (char.IsAsciiLetter(valueChar))
332347
{
333348
valueCharU = (char)(valueChar & ~0x20);
@@ -370,6 +385,179 @@ ref Unsafe.Add(ref valueRef, 1), valueTailLength))
370385
while (searchSpaceLength > 0);
371386

372387
return -1;
388+
389+
// Based on SpanHelpers.IndexOf(ref char, int, ref char, int), which was in turn based on
390+
// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd. This version has additional
391+
// modifications to support case-insensitive searches.
392+
SearchTwoChars:
393+
// Both the first character in value (valueChar) and the last character in value (valueCharU) are ASCII. Get their lowercase variants.
394+
valueChar = (char)(valueChar | 0x20);
395+
valueCharU = (char)(valueCharU | 0x20);
396+
397+
// The search is more efficient if the two characters being searched for are different. As long as they are equal, walk backwards
398+
// from the last character in the search value until we find a character that's different. Since we're dealing with IgnoreCase,
399+
// we compare the lowercase variants, as that's what we'll be comparing against in the main loop.
400+
nint ch1ch2Distance = valueTailLength;
401+
while (valueCharU == valueChar && ch1ch2Distance > 1)
402+
{
403+
char tmp = Unsafe.Add(ref valueRef, ch1ch2Distance - 1);
404+
if (!char.IsAscii(tmp))
405+
{
406+
break;
407+
}
408+
--ch1ch2Distance;
409+
valueCharU = (char)(tmp | 0x20);
410+
}
411+
412+
// Use Vector256 if the input is long enough.
413+
if (Vector256.IsHardwareAccelerated && searchSpaceMinusValueTailLength - Vector256<ushort>.Count >= 0)
414+
{
415+
// Create a vector for each of the lowercase ASCII characters we're searching for.
416+
Vector256<ushort> ch1 = Vector256.Create((ushort)valueChar);
417+
Vector256<ushort> ch2 = Vector256.Create((ushort)valueCharU);
418+
419+
nint searchSpaceMinusValueTailLengthAndVector = searchSpaceMinusValueTailLength - (nint)Vector256<ushort>.Count;
420+
do
421+
{
422+
// Make sure we don't go out of bounds.
423+
Debug.Assert(offset + ch1ch2Distance + Vector256<ushort>.Count <= searchSpaceLength);
424+
425+
// Load a vector from the current search space offset and another from the offset plus the distance between the two characters.
426+
// For each, | with 0x20 so that letters are lowercased, then & those together to get a mask. If the mask is all zeros, there
427+
// was no match. If it wasn't, we have to do more work to check for a match.
428+
Vector256<ushort> cmpCh2 = Vector256.Equals(ch2, Vector256.BitwiseOr(Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance)), Vector256.Create((ushort)0x20)));
429+
Vector256<ushort> cmpCh1 = Vector256.Equals(ch1, Vector256.BitwiseOr(Vector256.LoadUnsafe(ref searchSpace, (nuint)offset), Vector256.Create((ushort)0x20)));
430+
Vector256<byte> cmpAnd = (cmpCh1 & cmpCh2).AsByte();
431+
if (cmpAnd != Vector256<byte>.Zero)
432+
{
433+
goto CandidateFound;
434+
}
435+
436+
LoopFooter:
437+
// No match. Advance to the next vector.
438+
offset += Vector256<ushort>.Count;
439+
440+
// If we've reached the end of the search space, bail.
441+
if (offset == searchSpaceMinusValueTailLength)
442+
{
443+
return -1;
444+
}
445+
446+
// If we're within a vector's length of the end of the search space, adjust the offset
447+
// to point to the last vector so that our next iteration will process it.
448+
if (offset > searchSpaceMinusValueTailLengthAndVector)
449+
{
450+
offset = searchSpaceMinusValueTailLengthAndVector;
451+
}
452+
453+
continue;
454+
455+
CandidateFound:
456+
// Possible matches at the current location. Extract the bits for each element.
457+
// For each set bits, we'll check if it's a match at that location.
458+
uint mask = cmpAnd.ExtractMostSignificantBits();
459+
do
460+
{
461+
// Do a full IgnoreCase equality comparison. SpanHelpers.IndexOf skips comparing the two characters in some cases,
462+
// but we don't actually know that the two characters are equal, since we compared with | 0x20. So we just compare
463+
// the full string always.
464+
int bitPos = BitOperations.TrailingZeroCount(mask);
465+
nint charPos = (nint)((uint)bitPos / 2); // div by 2 (shr) because we work with 2-byte chars
466+
if (EqualsIgnoreCase(ref Unsafe.Add(ref searchSpace, offset + charPos), ref valueRef, value.Length))
467+
{
468+
// Match! Return the index.
469+
return (int)(offset + charPos);
470+
}
471+
472+
// Clear the two lowest set bits in the mask. If there are no more set bits, we're done.
473+
// If any remain, we loop around to do the next comparison.
474+
if (Bmi1.IsSupported)
475+
{
476+
mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask));
477+
}
478+
else
479+
{
480+
mask &= ~(uint)(0b11 << bitPos);
481+
}
482+
} while (mask != 0);
483+
goto LoopFooter;
484+
485+
} while (true);
486+
}
487+
else // 128bit vector path (SSE2 or AdvSimd)
488+
{
489+
// Create a vector for each of the lowercase ASCII characters we're searching for.
490+
Vector128<ushort> ch1 = Vector128.Create((ushort)valueChar);
491+
Vector128<ushort> ch2 = Vector128.Create((ushort)valueCharU);
492+
493+
nint searchSpaceMinusValueTailLengthAndVector = searchSpaceMinusValueTailLength - (nint)Vector128<ushort>.Count;
494+
do
495+
{
496+
// Make sure we don't go out of bounds.
497+
Debug.Assert(offset + ch1ch2Distance + Vector128<ushort>.Count <= searchSpaceLength);
498+
499+
// Load a vector from the current search space offset and another from the offset plus the distance between the two characters.
500+
// For each, | with 0x20 so that letters are lowercased, then & those together to get a mask. If the mask is all zeros, there
501+
// was no match. If it wasn't, we have to do more work to check for a match.
502+
Vector128<ushort> cmpCh2 = Vector128.Equals(ch2, Vector128.BitwiseOr(Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance)), Vector128.Create((ushort)0x20)));
503+
Vector128<ushort> cmpCh1 = Vector128.Equals(ch1, Vector128.BitwiseOr(Vector128.LoadUnsafe(ref searchSpace, (nuint)offset), Vector128.Create((ushort)0x20)));
504+
Vector128<byte> cmpAnd = (cmpCh1 & cmpCh2).AsByte();
505+
if (cmpAnd != Vector128<byte>.Zero)
506+
{
507+
goto CandidateFound;
508+
}
509+
510+
LoopFooter:
511+
// No match. Advance to the next vector.
512+
offset += Vector128<ushort>.Count;
513+
514+
// If we've reached the end of the search space, bail.
515+
if (offset == searchSpaceMinusValueTailLength)
516+
{
517+
return -1;
518+
}
519+
520+
// If we're within a vector's length of the end of the search space, adjust the offset
521+
// to point to the last vector so that our next iteration will process it.
522+
if (offset > searchSpaceMinusValueTailLengthAndVector)
523+
{
524+
offset = searchSpaceMinusValueTailLengthAndVector;
525+
}
526+
527+
continue;
528+
529+
CandidateFound:
530+
// Possible matches at the current location. Extract the bits for each element.
531+
// For each set bits, we'll check if it's a match at that location.
532+
uint mask = cmpAnd.ExtractMostSignificantBits();
533+
do
534+
{
535+
// Do a full IgnoreCase equality comparison. SpanHelpers.IndexOf skips comparing the two characters in some cases,
536+
// but we don't actually know that the two characters are equal, since we compared with | 0x20. So we just compare
537+
// the full string always.
538+
int bitPos = BitOperations.TrailingZeroCount(mask);
539+
int charPos = (int)((uint)bitPos / 2); // div by 2 (shr) because we work with 2-byte chars
540+
if (EqualsIgnoreCase(ref Unsafe.Add(ref searchSpace, offset + charPos), ref valueRef, value.Length))
541+
{
542+
// Match! Return the index.
543+
return (int)(offset + charPos);
544+
}
545+
546+
// Clear the two lowest set bits in the mask. If there are no more set bits, we're done.
547+
// If any remain, we loop around to do the next comparison.
548+
if (Bmi1.IsSupported)
549+
{
550+
mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask));
551+
}
552+
else
553+
{
554+
mask &= ~(uint)(0b11 << bitPos);
555+
}
556+
} while (mask != 0);
557+
goto LoopFooter;
558+
559+
} while (true);
560+
}
373561
}
374562

375563
internal static int LastIndexOf(string source, string value, int startIndex, int count)

0 commit comments

Comments
 (0)