Skip to content

Instantly share code, notes, and snippets.

@drolevar
Created January 16, 2025 15:43
Show Gist options
  • Save drolevar/12acc9d5575b76c7dc4edac056c97d79 to your computer and use it in GitHub Desktop.
Save drolevar/12acc9d5575b76c7dc4edac056c97d79 to your computer and use it in GitHub Desktop.
using System;
using System.ComponentModel;
using System.Diagnostics;
using System.Reflection;
using System.Text;
public static class StringBuilderExtensions
{
/// <summary>
/// GetChunks returns ChunkEnumerator that follows the IEnumerable pattern and
/// thus can be used in a C# 'foreach' statements to retrieve the data in the StringBuilder
/// as chunks (ReadOnlyMemory) of characters. An example use is:
///
/// foreach (ReadOnlyMemory&lt;char&gt; chunk in sb.GetChunks())
/// foreach (char c in chunk.Span)
/// { /* operation on c }
///
/// It is undefined what happens if the StringBuilder is modified while the chunk
/// enumeration is incomplete. StringBuilder is also not thread-safe, so operating
/// on it with concurrent threads is illegal. Finally the ReadOnlyMemory chunks returned
/// are NOT guaranteed to remain unchanged if the StringBuilder is modified, so do
/// not cache them for later use either. This API's purpose is efficiently extracting
/// the data of a CONSTANT StringBuilder.
///
/// Creating a ReadOnlySpan from a ReadOnlyMemory (the .Span property) is expensive
/// compared to the fetching of the character, so create a local variable for the SPAN
/// if you need to use it in a nested for statement. For example
///
/// foreach (ReadOnlyMemory&lt;char&gt; chunk in sb.GetChunks())
/// {
/// var span = chunk.Span;
/// for (int i = 0; i &lt; span.Length; i++)
/// { /* operation on span[i] */ }
/// }
/// </summary>
public static ChunkEnumerator GetChunks(this StringBuilder sb) => new ChunkEnumerator(sb);
/// <summary>
/// ChunkEnumerator supports both the IEnumerable and IEnumerator pattern so foreach
/// works (see GetChunks). It needs to be public (so the compiler can use it
/// when building a foreach statement) but users typically don't use it explicitly.
/// (which is why it is a nested type).
/// </summary>
public struct ChunkEnumerator
{
private readonly StringBuilder _firstChunk; // The first Stringbuilder chunk (which is the end of the logical string)
private StringBuilder? _currentChunk; // The chunk that this enumerator is currently returning (Current).
private readonly ManyChunkInfo? _manyChunks; // Only used for long string builders with many chunks (see constructor)
/// <summary>
/// Implement IEnumerable.GetEnumerator() to return 'this' as the IEnumerator
/// </summary>
[EditorBrowsable(EditorBrowsableState.Never)] // Only here to make foreach work
public ChunkEnumerator GetEnumerator()
{
return this;
}
/// <summary>
/// Implements the IEnumerator pattern.
/// </summary>
public bool MoveNext()
{
if (_currentChunk == _firstChunk)
{
return false;
}
if (_manyChunks != null)
{
return _manyChunks.MoveNext(ref _currentChunk);
}
StringBuilder next = _firstChunk;
while (next.GetChunkPrevious() != _currentChunk)
{
Debug.Assert(next.GetChunkPrevious() != null);
next = next.GetChunkPrevious();
}
_currentChunk = next;
return true;
}
/// <summary>
/// Implements the IEnumerator pattern.
/// </summary>
public ReadOnlyMemory<char> Current
{
get
{
if (_currentChunk == null)
{
throw new InvalidOperationException("Can't happen");
}
return new ReadOnlyMemory<char>(_currentChunk.GetChunkChars(), 0, _currentChunk.GetChunkLength());
}
}
#region private
internal ChunkEnumerator(StringBuilder stringBuilder)
{
Debug.Assert(stringBuilder != null);
_firstChunk = stringBuilder;
_currentChunk = null; // MoveNext will find the last chunk if we do this.
_manyChunks = null;
// There is a performance-vs-allocation tradeoff. Because the chunks
// are a linked list with each chunk pointing to its PREDECESSOR, walking
// the list FORWARD is not efficient. If there are few chunks (< 8) we
// simply scan from the start each time, and tolerate the N*N behavior.
// However above this size, we allocate an array to hold reference to all
// the chunks and we can be efficient for large N.
int chunkCount = ChunkCount(stringBuilder);
if (8 < chunkCount)
{
_manyChunks = new ManyChunkInfo(stringBuilder, chunkCount);
}
}
private static int ChunkCount(StringBuilder? stringBuilder)
{
int ret = 0;
while (stringBuilder != null)
{
ret++;
stringBuilder = stringBuilder.GetChunkPrevious();
}
return ret;
}
/// <summary>
/// Used to hold all the chunks indexes when you have many chunks.
/// </summary>
private sealed class ManyChunkInfo
{
private readonly StringBuilder[] _chunks; // These are in normal order (first chunk first)
private int _chunkPos;
public bool MoveNext(ref StringBuilder? current)
{
int pos = ++_chunkPos;
if (_chunks.Length <= pos)
{
return false;
}
current = _chunks[pos];
return true;
}
public ManyChunkInfo(StringBuilder? stringBuilder, int chunkCount)
{
_chunks = new StringBuilder[chunkCount];
while (0 <= --chunkCount)
{
Debug.Assert(stringBuilder != null);
_chunks[chunkCount] = stringBuilder;
stringBuilder = stringBuilder.GetChunkPrevious();
}
_chunkPos = -1;
}
}
#endregion
}
// Reflection info for the internal fields on StringBuilder
private static readonly FieldInfo ChunkCharsField =
typeof(StringBuilder).GetField("m_ChunkChars", BindingFlags.NonPublic | BindingFlags.Instance);
private static readonly FieldInfo ChunkPreviousField =
typeof(StringBuilder).GetField("m_ChunkPrevious", BindingFlags.NonPublic | BindingFlags.Instance);
private static readonly FieldInfo ChunkLengthField =
typeof(StringBuilder).GetField("m_ChunkLength", BindingFlags.NonPublic | BindingFlags.Instance);
private static readonly FieldInfo ChunkOffsetField =
typeof(StringBuilder).GetField("m_ChunkOffset", BindingFlags.NonPublic | BindingFlags.Instance);
/// <summary>
/// Gets the internal <c>m_ChunkChars</c> array from a <see cref="StringBuilder"/>.
/// </summary>
private static char[] GetChunkChars(this StringBuilder sb)
{
if (sb == null) throw new ArgumentNullException(nameof(sb));
return (char[]) ChunkCharsField.GetValue(sb);
}
/// <summary>
/// Gets the internal <c>m_ChunkPrevious</c> from a <see cref="StringBuilder"/>.
/// </summary>
private static StringBuilder GetChunkPrevious(this StringBuilder sb)
{
if (sb == null) throw new ArgumentNullException(nameof(sb));
return (StringBuilder) ChunkPreviousField.GetValue(sb);
}
/// <summary>
/// Gets the internal <c>m_ChunkLength</c> from a <see cref="StringBuilder"/>.
/// </summary>
private static int GetChunkLength(this StringBuilder sb)
{
if (sb == null) throw new ArgumentNullException(nameof(sb));
return (int) ChunkLengthField.GetValue(sb);
}
/// <summary>
/// Gets the internal <c>m_ChunkOffset</c> from a <see cref="StringBuilder"/>.
/// </summary>
private static int GetChunkOffset(this StringBuilder sb)
{
if (sb == null) throw new ArgumentNullException(nameof(sb));
return (int) ChunkOffsetField.GetValue(sb);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment