Created
January 16, 2025 15:43
-
-
Save drolevar/12acc9d5575b76c7dc4edac056c97d79 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.ComponentModel; | |
using System.Diagnostics; | |
using System.Reflection; | |
using System.Text; | |
public static class StringBuilderExtensions | |
{ | |
/// <summary> | |
/// GetChunks returns ChunkEnumerator that follows the IEnumerable pattern and | |
/// thus can be used in a C# 'foreach' statements to retrieve the data in the StringBuilder | |
/// as chunks (ReadOnlyMemory) of characters. An example use is: | |
/// | |
/// foreach (ReadOnlyMemory<char> chunk in sb.GetChunks()) | |
/// foreach (char c in chunk.Span) | |
/// { /* operation on c } | |
/// | |
/// It is undefined what happens if the StringBuilder is modified while the chunk | |
/// enumeration is incomplete. StringBuilder is also not thread-safe, so operating | |
/// on it with concurrent threads is illegal. Finally the ReadOnlyMemory chunks returned | |
/// are NOT guaranteed to remain unchanged if the StringBuilder is modified, so do | |
/// not cache them for later use either. This API's purpose is efficiently extracting | |
/// the data of a CONSTANT StringBuilder. | |
/// | |
/// Creating a ReadOnlySpan from a ReadOnlyMemory (the .Span property) is expensive | |
/// compared to the fetching of the character, so create a local variable for the SPAN | |
/// if you need to use it in a nested for statement. For example | |
/// | |
/// foreach (ReadOnlyMemory<char> chunk in sb.GetChunks()) | |
/// { | |
/// var span = chunk.Span; | |
/// for (int i = 0; i < span.Length; i++) | |
/// { /* operation on span[i] */ } | |
/// } | |
/// </summary> | |
public static ChunkEnumerator GetChunks(this StringBuilder sb) => new ChunkEnumerator(sb); | |
/// <summary> | |
/// ChunkEnumerator supports both the IEnumerable and IEnumerator pattern so foreach | |
/// works (see GetChunks). It needs to be public (so the compiler can use it | |
/// when building a foreach statement) but users typically don't use it explicitly. | |
/// (which is why it is a nested type). | |
/// </summary> | |
public struct ChunkEnumerator | |
{ | |
private readonly StringBuilder _firstChunk; // The first Stringbuilder chunk (which is the end of the logical string) | |
private StringBuilder? _currentChunk; // The chunk that this enumerator is currently returning (Current). | |
private readonly ManyChunkInfo? _manyChunks; // Only used for long string builders with many chunks (see constructor) | |
/// <summary> | |
/// Implement IEnumerable.GetEnumerator() to return 'this' as the IEnumerator | |
/// </summary> | |
[EditorBrowsable(EditorBrowsableState.Never)] // Only here to make foreach work | |
public ChunkEnumerator GetEnumerator() | |
{ | |
return this; | |
} | |
/// <summary> | |
/// Implements the IEnumerator pattern. | |
/// </summary> | |
public bool MoveNext() | |
{ | |
if (_currentChunk == _firstChunk) | |
{ | |
return false; | |
} | |
if (_manyChunks != null) | |
{ | |
return _manyChunks.MoveNext(ref _currentChunk); | |
} | |
StringBuilder next = _firstChunk; | |
while (next.GetChunkPrevious() != _currentChunk) | |
{ | |
Debug.Assert(next.GetChunkPrevious() != null); | |
next = next.GetChunkPrevious(); | |
} | |
_currentChunk = next; | |
return true; | |
} | |
/// <summary> | |
/// Implements the IEnumerator pattern. | |
/// </summary> | |
public ReadOnlyMemory<char> Current | |
{ | |
get | |
{ | |
if (_currentChunk == null) | |
{ | |
throw new InvalidOperationException("Can't happen"); | |
} | |
return new ReadOnlyMemory<char>(_currentChunk.GetChunkChars(), 0, _currentChunk.GetChunkLength()); | |
} | |
} | |
#region private | |
internal ChunkEnumerator(StringBuilder stringBuilder) | |
{ | |
Debug.Assert(stringBuilder != null); | |
_firstChunk = stringBuilder; | |
_currentChunk = null; // MoveNext will find the last chunk if we do this. | |
_manyChunks = null; | |
// There is a performance-vs-allocation tradeoff. Because the chunks | |
// are a linked list with each chunk pointing to its PREDECESSOR, walking | |
// the list FORWARD is not efficient. If there are few chunks (< 8) we | |
// simply scan from the start each time, and tolerate the N*N behavior. | |
// However above this size, we allocate an array to hold reference to all | |
// the chunks and we can be efficient for large N. | |
int chunkCount = ChunkCount(stringBuilder); | |
if (8 < chunkCount) | |
{ | |
_manyChunks = new ManyChunkInfo(stringBuilder, chunkCount); | |
} | |
} | |
private static int ChunkCount(StringBuilder? stringBuilder) | |
{ | |
int ret = 0; | |
while (stringBuilder != null) | |
{ | |
ret++; | |
stringBuilder = stringBuilder.GetChunkPrevious(); | |
} | |
return ret; | |
} | |
/// <summary> | |
/// Used to hold all the chunks indexes when you have many chunks. | |
/// </summary> | |
private sealed class ManyChunkInfo | |
{ | |
private readonly StringBuilder[] _chunks; // These are in normal order (first chunk first) | |
private int _chunkPos; | |
public bool MoveNext(ref StringBuilder? current) | |
{ | |
int pos = ++_chunkPos; | |
if (_chunks.Length <= pos) | |
{ | |
return false; | |
} | |
current = _chunks[pos]; | |
return true; | |
} | |
public ManyChunkInfo(StringBuilder? stringBuilder, int chunkCount) | |
{ | |
_chunks = new StringBuilder[chunkCount]; | |
while (0 <= --chunkCount) | |
{ | |
Debug.Assert(stringBuilder != null); | |
_chunks[chunkCount] = stringBuilder; | |
stringBuilder = stringBuilder.GetChunkPrevious(); | |
} | |
_chunkPos = -1; | |
} | |
} | |
#endregion | |
} | |
// Reflection info for the internal fields on StringBuilder | |
private static readonly FieldInfo ChunkCharsField = | |
typeof(StringBuilder).GetField("m_ChunkChars", BindingFlags.NonPublic | BindingFlags.Instance); | |
private static readonly FieldInfo ChunkPreviousField = | |
typeof(StringBuilder).GetField("m_ChunkPrevious", BindingFlags.NonPublic | BindingFlags.Instance); | |
private static readonly FieldInfo ChunkLengthField = | |
typeof(StringBuilder).GetField("m_ChunkLength", BindingFlags.NonPublic | BindingFlags.Instance); | |
private static readonly FieldInfo ChunkOffsetField = | |
typeof(StringBuilder).GetField("m_ChunkOffset", BindingFlags.NonPublic | BindingFlags.Instance); | |
/// <summary> | |
/// Gets the internal <c>m_ChunkChars</c> array from a <see cref="StringBuilder"/>. | |
/// </summary> | |
private static char[] GetChunkChars(this StringBuilder sb) | |
{ | |
if (sb == null) throw new ArgumentNullException(nameof(sb)); | |
return (char[]) ChunkCharsField.GetValue(sb); | |
} | |
/// <summary> | |
/// Gets the internal <c>m_ChunkPrevious</c> from a <see cref="StringBuilder"/>. | |
/// </summary> | |
private static StringBuilder GetChunkPrevious(this StringBuilder sb) | |
{ | |
if (sb == null) throw new ArgumentNullException(nameof(sb)); | |
return (StringBuilder) ChunkPreviousField.GetValue(sb); | |
} | |
/// <summary> | |
/// Gets the internal <c>m_ChunkLength</c> from a <see cref="StringBuilder"/>. | |
/// </summary> | |
private static int GetChunkLength(this StringBuilder sb) | |
{ | |
if (sb == null) throw new ArgumentNullException(nameof(sb)); | |
return (int) ChunkLengthField.GetValue(sb); | |
} | |
/// <summary> | |
/// Gets the internal <c>m_ChunkOffset</c> from a <see cref="StringBuilder"/>. | |
/// </summary> | |
private static int GetChunkOffset(this StringBuilder sb) | |
{ | |
if (sb == null) throw new ArgumentNullException(nameof(sb)); | |
return (int) ChunkOffsetField.GetValue(sb); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment