Skip to content

Instantly share code, notes, and snippets.

@ufcpp
Created March 27, 2025 09:42
Show Gist options
  • Save ufcpp/a60294b69404745a5334f487b32d635c to your computer and use it in GitHub Desktop.
Save ufcpp/a60294b69404745a5334f487b32d635c to your computer and use it in GitHub Desktop.
using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
/// <summary>
/// とりあえず Add する一方の Dictionary 的な実装。
/// ちゃんと「増えすぎたら古いのから消す」みたいな処理入れるなら自前でやるのちょっとつらく、
/// Ben.StringIntern とかの導入考えた方がよさげ。
/// </summary>
class InternPool
{
public static readonly InternPool Shared = new();
private struct Entry
{
public string? value;
public int hash;
public int next;
}
private int _count;
private int _freeList = -1;
private int[]? _buckets;
private Entry[]? _entries;
private readonly Lock _lock = new();
public string Intern(scoped ReadOnlySpan<byte> utf8Value)
{
char[]? array = null;
var count = utf8Value.Length * 2;
Span<char> span = count <= StackAllocThresholdChars ? stackalloc char[count] : array = ArrayPool<char>.Shared.Rent(count);
System.Text.Unicode.Utf8.ToUtf16(utf8Value, span, out _, out count);
string r = Intern(span[..count]);
if (array != null) ArrayPool<char>.Shared.Return(array);
return r;
}
public string Intern(scoped ReadOnlySpan<char> value)
{
ref string? r = ref Unsafe.NullRef<string?>();
lock (_lock) r = ref GetValueRef(value);
return r ??= new(value);
}
public ref string? GetValueRef(scoped ReadOnlySpan<char> value)
{
var hash = string.GetHashCode(value, StringComparison.Ordinal);
if (_buckets == null)
{
Initialize();
Debug.Assert(_buckets != null);
return ref Add(value, hash, hash & (_buckets.Length - 1));
}
Debug.Assert(_buckets != null);
Debug.Assert(_entries != null);
var buckets = _buckets;
var entries = _entries;
int bucketIndex = hash & (buckets.Length - 1);
int collisionCount = 0;
for (int i = buckets[bucketIndex] - 1;
(uint)i < (uint)entries.Length; i = entries[i].next)
{
ref var e = ref entries[i];
if(e.hash == hash && value.SequenceEqual(e.value))
return ref entries[i].value;
if (collisionCount == entries.Length)
{
ThrowInvalidOperationException_ConcurrentOperationsNotSupported();
}
collisionCount++;
}
return ref Add(value, hash, bucketIndex);
}
private ref string? Add(scoped ReadOnlySpan<char> value, int hash, int bucketIndex)
{
Debug.Assert(_buckets != null);
Debug.Assert(_entries != null);
var buckets = _buckets;
var entries = _entries;
int entryIndex;
if (_freeList != -1)
{
entryIndex = _freeList;
_freeList = -3 - entries[_freeList].next;
}
else
{
if (_count == entries.Length || entries.Length == 1)
{
entries = Resize();
hash = string.GetHashCode(value, StringComparison.Ordinal);
bucketIndex = hash & (buckets.Length - 1);
// entry indexes were not changed by Resize
}
entryIndex = _count;
}
ref var e = ref entries[entryIndex];
e.next = buckets[bucketIndex] - 1;
e.hash = hash;
buckets[bucketIndex] = entryIndex + 1;
_count++;
return ref e.value;
}
private Entry[] Resize()
{
Debug.Assert(_buckets != null);
Debug.Assert(_entries != null);
int count = _count;
int newSize = Math.Max(_entries.Length * 2, MinCapacity);
if ((uint)newSize > (uint)int.MaxValue) // uint cast handles overflow
ThrowCapacityOverflow();
var entries = new Entry[newSize];
Array.Copy(_entries, 0, entries, 0, count);
var newBuckets = new int[entries.Length];
while (count-- > 0)
{
var hash = string.GetHashCode(entries[count].value, StringComparison.Ordinal);
int bucketIndex = hash & (newBuckets.Length - 1);
entries[count].next = newBuckets[bucketIndex] - 1;
newBuckets[bucketIndex] = count + 1;
}
_buckets = newBuckets;
_entries = entries;
return entries;
}
private const int StackAllocThresholdChars = 256;
private const int MinCapacity = 4;
internal static void ThrowInvalidOperationException_ConcurrentOperationsNotSupported() => throw new InvalidOperationException("concurrent operations not supported");
internal static void ThrowCapacityOverflow() => throw new InvalidOperationException("capacity overflow");
private void Initialize()
{
int[] buckets = new int[MinCapacity];
Entry[] entries = new Entry[MinCapacity];
_freeList = -1;
_buckets = buckets;
_entries = entries;
}
}
using BenchmarkDotNet.Attributes;
using System.Runtime.CompilerServices;
using System.Text;
#if DEBUG
var a = new A();
a.Setup();
a.WithIntern();
#else
BenchmarkDotNet.Running.BenchmarkRunner.Run<A>();
#endif
[MemoryDiagnoser]
public class A
{
private const int MaxCharLength = 1;
private const int Loop = 1000;
private byte[] _chars = null!;
[GlobalSetup]
public void Setup()
{
var chars = _chars = new byte[26 * 2];
for (int i = 0; i < 26; i++)
{
chars[i] = (byte)('A' + i);
chars[i + 26] = (byte)('a' + i);
}
}
[Benchmark]
public void WithoutIntern()
{
var buffer = (stackalloc byte[32]);
Random rand = new(0);
for (int i = 0; i < Loop; i++)
{
var len = rand.Next(1, MaxCharLength);
rand.GetItems(_chars, buffer[..len]);
var s = Encoding.UTF8.GetString(buffer[..len]);
}
}
[Benchmark]
public void WithIntern()
{
var buffer = (stackalloc byte[32]);
Random rand = new(0);
for (int i = 0; i < Loop; i++)
{
var len = rand.Next(1, MaxCharLength);
rand.GetItems(_chars, buffer[..len]);
var s = InternPool.Shared.Intern(buffer[..len]);
}
}
}
Method Mean Error StdDev Gen0 Allocated
WithoutIntern 16.99 us 0.081 us 0.063 us 1.8311 24304 B
WithIntern 29.53 us 0.226 us 0.211 us - 304 B
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment