Created
June 30, 2016 13:04
-
-
Save JamisonWhite/1b7a7a44f59d8f8d53752fd90b9d7e26 to your computer and use it in GitHub Desktop.
SplitOnWhiteSpace is 400 times faster than String.Split for large text.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Split string on whitespace | |
/// </summary> | |
/// <remarks> | |
/// Faster than String.Split for large strings. | |
/// Results from splitting 1000 email text files | |
/// text.Split(new char[] {}, StringSplitOptions.RemoveEmptyEntries) | |
/// 00:00:00.0008075 2134951 Tokens StringHelper.SplitOnWhiteSpace | |
/// 00:00:00.3263374 2134951 Tokens String.Split | |
/// </remarks> | |
/// <param name="text"></param> | |
/// <returns></returns> | |
public static IEnumerable<string> SplitOnWhiteSpace(string text) | |
{ | |
var startIndex = 0; | |
for (var i = 0; i < text.Length; i++) | |
{ | |
if (!char.IsWhiteSpace(text[i])) | |
{ | |
continue; | |
} | |
if (startIndex != i) | |
{ | |
yield return text.Substring(startIndex, i - startIndex); | |
} | |
startIndex = i + 1; | |
} | |
if (startIndex < text.Length) | |
{ | |
yield return text.Substring(startIndex, text.Length - startIndex); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment