Created
June 12, 2024 12:50
-
-
Save VAllens/a4790264da85db3ab40e1b9320b341f9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace AllenCai; | |
/// <summary> | |
/// This is an imitation of the Microsoft.VisualBasic.CompilerServices.LikeOperator.LikeString method, | |
/// implementing matching that supports * and ? wildcards and support ignores case rules.<br /> | |
/// The purpose of this implementation is to reduce memory allocation and improve performance. | |
/// </summary> | |
public static class ZeroMemAllocLikeOperator | |
{ | |
/// <summary> | |
/// Supports * and ? wildcards, support case-insensitive matching. | |
/// </summary> | |
public static bool LikeString(string? content, string? pattern, bool ignoreCase = true) | |
{ | |
if (content == null && pattern == null) | |
return true; | |
if (content == null || pattern == null) | |
return false; | |
ReadOnlySpan<char> patternSpan = pattern.AsSpan(); | |
ReadOnlySpan<char> contentSpan = content.AsSpan(); | |
char zeroOrMoreChars = '*'; | |
char oneChar = '?'; | |
// If the pattern is composed of a single asterisk *, there is no need to match, return true directly. | |
if (patternSpan.Length == 1) | |
{ | |
ref readonly char patternItem = ref patternSpan[0]; | |
if (patternItem == zeroOrMoreChars) | |
{ | |
return true; | |
} | |
} | |
// If the length of the content to be matched is only 1, and the pattern is exactly a question mark ?, there is no need to match, return true directly. | |
if (contentSpan.Length == 1) | |
{ | |
ref readonly char patternItem = ref patternSpan[0]; | |
if (patternItem == oneChar) | |
{ | |
return true; | |
} | |
} | |
// If the pattern is composed of multiple asterisks * and question marks ?, there is no need to match, return true directly. | |
int zeroOrMorePatternCount = 0; | |
int onePatternCount = 0; | |
for (int i = 0; i < patternSpan.Length; i++) | |
{ | |
ref readonly char patternItem = ref patternSpan[i]; | |
if (patternItem == zeroOrMoreChars) | |
{ | |
zeroOrMorePatternCount++; | |
} | |
else if (patternItem == oneChar) | |
{ | |
onePatternCount++; | |
} | |
} | |
if (zeroOrMorePatternCount + onePatternCount == patternSpan.Length) | |
{ | |
// As long as there is one or more asterisks *, there is no need to care about the length of the content to be matched. | |
if (zeroOrMorePatternCount > 0) | |
{ | |
return true; | |
} | |
// If there is no asterisk *, all are question marks ?, then check whether the length of the pattern composed of question marks ? is consistent with the length of the content to be matched. If it is consistent, there is no need to match, return true directly. | |
if (patternSpan.Length == contentSpan.Length) | |
{ | |
return true; | |
} | |
} | |
return LikeStringCore(contentSpan, patternSpan, ref zeroOrMoreChars, ref oneChar, ignoreCase); | |
} | |
/// <summary> | |
/// Supports * and ? wildcards, support case-insensitive matching. | |
/// </summary> | |
public static bool LikeString(ReadOnlySpan<char> contentSpan, ReadOnlySpan<char> patternSpan, bool ignoreCase = true) | |
{ | |
char zeroOrMoreChars = '*'; | |
char oneChar = '?'; | |
// If the pattern is composed of a single asterisk *, there is no need to match, return true directly. | |
if (patternSpan.Length == 1) | |
{ | |
ref readonly char patternItem = ref patternSpan[0]; | |
if (patternItem == zeroOrMoreChars) | |
{ | |
return true; | |
} | |
} | |
// If the length of the content to be matched is only 1, and the pattern is exactly a question mark ?, there is no need to match, return true directly. | |
if (contentSpan.Length == 1) | |
{ | |
ref readonly char patternItem = ref patternSpan[0]; | |
if (patternItem == oneChar) | |
{ | |
return true; | |
} | |
} | |
// If the pattern is composed of multiple asterisks * and question marks ?, there is no need to match, return true directly. | |
int zeroOrMorePatternCount = 0; | |
int onePatternCount = 0; | |
for (int i = 0; i < patternSpan.Length; i++) | |
{ | |
ref readonly char patternItem = ref patternSpan[i]; | |
if (patternItem == zeroOrMoreChars) | |
{ | |
zeroOrMorePatternCount++; | |
} | |
else if (patternItem == oneChar) | |
{ | |
onePatternCount++; | |
} | |
} | |
if (zeroOrMorePatternCount + onePatternCount == patternSpan.Length) | |
{ | |
// As long as there is one or more asterisks *, there is no need to care about the length of the content to be matched. | |
if (zeroOrMorePatternCount > 0) | |
{ | |
return true; | |
} | |
// If there is no asterisk *, all are question marks ?, then check whether the length of the pattern composed of question marks ? is consistent with the length of the content to be matched. If it is consistent, there is no need to match, return true directly. | |
if (patternSpan.Length == contentSpan.Length) | |
{ | |
return true; | |
} | |
} | |
return LikeStringCore(contentSpan, patternSpan, ref zeroOrMoreChars, ref oneChar, ignoreCase); | |
} | |
private static bool LikeStringCore(ReadOnlySpan<char> contentSpan, ReadOnlySpan<char> patternSpan, ref char zeroOrMoreChars, ref char oneChar, bool ignoreCase = true) | |
{ | |
// Traverse the pattern, match character by character. | |
int contentIndex = 0; | |
int patternIndex = 0; | |
while (contentIndex < contentSpan.Length && patternIndex < patternSpan.Length) | |
{ | |
ref readonly char patternItem = ref patternSpan[patternIndex]; | |
if (patternItem == zeroOrMoreChars) | |
{ | |
// If the next character in the pattern is an asterisk *, then move patternIndex back until a character that is not an asterisk * is found. | |
while (true) | |
{ | |
if (patternIndex < patternSpan.Length) | |
{ | |
ref readonly char nextPatternItem = ref patternSpan[patternIndex]; | |
if (nextPatternItem == zeroOrMoreChars) | |
{ | |
patternIndex++; | |
continue; | |
} | |
} | |
break; | |
} | |
// If patternIndex has reached the end of the pattern, there is no need to match anymore, return true directly. | |
if (patternIndex == patternSpan.Length) | |
{ | |
return true; | |
} | |
// If patternIndex has not reached the end of the pattern, then start matching from contentIndex. | |
while (contentIndex < contentSpan.Length) | |
{ | |
if (LikeStringCore(contentSpan.Slice(contentIndex), patternSpan.Slice(patternIndex), ref zeroOrMoreChars, ref oneChar, ignoreCase)) | |
{ | |
return true; | |
} | |
contentIndex++; | |
} | |
return false; | |
} | |
if (patternItem == oneChar) | |
{ | |
// If the next character in the pattern is a question mark ?, then match one character. | |
contentIndex++; | |
patternIndex++; | |
} | |
else | |
{ | |
// If the next character in the pattern is not an asterisk *, nor a question mark ?, then match one character. | |
if (contentIndex >= contentSpan.Length) | |
{ | |
return false; | |
} | |
ref readonly char contentItem = ref contentSpan[contentIndex]; | |
if (ignoreCase) | |
{ | |
if (char.ToUpperInvariant(contentItem) != char.ToUpperInvariant(patternItem)) | |
{ | |
return false; | |
} | |
} | |
else | |
{ | |
if (contentItem != patternItem) | |
{ | |
return false; | |
} | |
} | |
contentIndex++; | |
patternIndex++; | |
} | |
} | |
// If all the content is matched, and the pattern has not been traversed, then check whether the remaining patternItem are all asterisks *. If yes, return true, otherwise return false. | |
if (contentIndex == contentSpan.Length) | |
{ | |
// If the next character in the pattern is an asterisk *, then move patternIndex back until a character that is not an asterisk * is found. | |
while (true) | |
{ | |
if (patternIndex < patternSpan.Length) | |
{ | |
ref readonly char nextPatternItem = ref patternSpan[patternIndex]; | |
if (nextPatternItem == zeroOrMoreChars) | |
{ | |
patternIndex++; | |
continue; | |
} | |
} | |
break; | |
} | |
return patternIndex == patternSpan.Length; | |
} | |
return false; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment