Method | Mean | Error | StdDev | Gen0 | Allocated |
---|---|---|---|---|---|
WithoutIntern | 16.99 us | 0.081 us | 0.063 us | 1.8311 | 24304 B |
WithIntern | 29.53 us | 0.226 us | 0.211 us | - | 304 B |
Created
March 27, 2025 09:42
-
-
Save ufcpp/a60294b69404745a5334f487b32d635c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Buffers; | |
using System.Diagnostics; | |
using System.Runtime.CompilerServices; | |
/// <summary> | |
/// とりあえず Add する一方の Dictionary 的な実装。 | |
/// ちゃんと「増えすぎたら古いのから消す」みたいな処理入れるなら自前でやるのちょっとつらく、 | |
/// Ben.StringIntern とかの導入考えた方がよさげ。 | |
/// </summary> | |
class InternPool | |
{ | |
public static readonly InternPool Shared = new(); | |
private struct Entry | |
{ | |
public string? value; | |
public int hash; | |
public int next; | |
} | |
private int _count; | |
private int _freeList = -1; | |
private int[]? _buckets; | |
private Entry[]? _entries; | |
private readonly Lock _lock = new(); | |
public string Intern(scoped ReadOnlySpan<byte> utf8Value) | |
{ | |
char[]? array = null; | |
var count = utf8Value.Length * 2; | |
Span<char> span = count <= StackAllocThresholdChars ? stackalloc char[count] : array = ArrayPool<char>.Shared.Rent(count); | |
System.Text.Unicode.Utf8.ToUtf16(utf8Value, span, out _, out count); | |
string r = Intern(span[..count]); | |
if (array != null) ArrayPool<char>.Shared.Return(array); | |
return r; | |
} | |
public string Intern(scoped ReadOnlySpan<char> value) | |
{ | |
ref string? r = ref Unsafe.NullRef<string?>(); | |
lock (_lock) r = ref GetValueRef(value); | |
return r ??= new(value); | |
} | |
public ref string? GetValueRef(scoped ReadOnlySpan<char> value) | |
{ | |
var hash = string.GetHashCode(value, StringComparison.Ordinal); | |
if (_buckets == null) | |
{ | |
Initialize(); | |
Debug.Assert(_buckets != null); | |
return ref Add(value, hash, hash & (_buckets.Length - 1)); | |
} | |
Debug.Assert(_buckets != null); | |
Debug.Assert(_entries != null); | |
var buckets = _buckets; | |
var entries = _entries; | |
int bucketIndex = hash & (buckets.Length - 1); | |
int collisionCount = 0; | |
for (int i = buckets[bucketIndex] - 1; | |
(uint)i < (uint)entries.Length; i = entries[i].next) | |
{ | |
ref var e = ref entries[i]; | |
if(e.hash == hash && value.SequenceEqual(e.value)) | |
return ref entries[i].value; | |
if (collisionCount == entries.Length) | |
{ | |
ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); | |
} | |
collisionCount++; | |
} | |
return ref Add(value, hash, bucketIndex); | |
} | |
private ref string? Add(scoped ReadOnlySpan<char> value, int hash, int bucketIndex) | |
{ | |
Debug.Assert(_buckets != null); | |
Debug.Assert(_entries != null); | |
var buckets = _buckets; | |
var entries = _entries; | |
int entryIndex; | |
if (_freeList != -1) | |
{ | |
entryIndex = _freeList; | |
_freeList = -3 - entries[_freeList].next; | |
} | |
else | |
{ | |
if (_count == entries.Length || entries.Length == 1) | |
{ | |
entries = Resize(); | |
hash = string.GetHashCode(value, StringComparison.Ordinal); | |
bucketIndex = hash & (buckets.Length - 1); | |
// entry indexes were not changed by Resize | |
} | |
entryIndex = _count; | |
} | |
ref var e = ref entries[entryIndex]; | |
e.next = buckets[bucketIndex] - 1; | |
e.hash = hash; | |
buckets[bucketIndex] = entryIndex + 1; | |
_count++; | |
return ref e.value; | |
} | |
private Entry[] Resize() | |
{ | |
Debug.Assert(_buckets != null); | |
Debug.Assert(_entries != null); | |
int count = _count; | |
int newSize = Math.Max(_entries.Length * 2, MinCapacity); | |
if ((uint)newSize > (uint)int.MaxValue) // uint cast handles overflow | |
ThrowCapacityOverflow(); | |
var entries = new Entry[newSize]; | |
Array.Copy(_entries, 0, entries, 0, count); | |
var newBuckets = new int[entries.Length]; | |
while (count-- > 0) | |
{ | |
var hash = string.GetHashCode(entries[count].value, StringComparison.Ordinal); | |
int bucketIndex = hash & (newBuckets.Length - 1); | |
entries[count].next = newBuckets[bucketIndex] - 1; | |
newBuckets[bucketIndex] = count + 1; | |
} | |
_buckets = newBuckets; | |
_entries = entries; | |
return entries; | |
} | |
private const int StackAllocThresholdChars = 256; | |
private const int MinCapacity = 4; | |
internal static void ThrowInvalidOperationException_ConcurrentOperationsNotSupported() => throw new InvalidOperationException("concurrent operations not supported"); | |
internal static void ThrowCapacityOverflow() => throw new InvalidOperationException("capacity overflow"); | |
private void Initialize() | |
{ | |
int[] buckets = new int[MinCapacity]; | |
Entry[] entries = new Entry[MinCapacity]; | |
_freeList = -1; | |
_buckets = buckets; | |
_entries = entries; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using BenchmarkDotNet.Attributes; | |
using System.Runtime.CompilerServices; | |
using System.Text; | |
#if DEBUG | |
var a = new A(); | |
a.Setup(); | |
a.WithIntern(); | |
#else | |
BenchmarkDotNet.Running.BenchmarkRunner.Run<A>(); | |
#endif | |
[MemoryDiagnoser] | |
public class A | |
{ | |
private const int MaxCharLength = 1; | |
private const int Loop = 1000; | |
private byte[] _chars = null!; | |
[GlobalSetup] | |
public void Setup() | |
{ | |
var chars = _chars = new byte[26 * 2]; | |
for (int i = 0; i < 26; i++) | |
{ | |
chars[i] = (byte)('A' + i); | |
chars[i + 26] = (byte)('a' + i); | |
} | |
} | |
[Benchmark] | |
public void WithoutIntern() | |
{ | |
var buffer = (stackalloc byte[32]); | |
Random rand = new(0); | |
for (int i = 0; i < Loop; i++) | |
{ | |
var len = rand.Next(1, MaxCharLength); | |
rand.GetItems(_chars, buffer[..len]); | |
var s = Encoding.UTF8.GetString(buffer[..len]); | |
} | |
} | |
[Benchmark] | |
public void WithIntern() | |
{ | |
var buffer = (stackalloc byte[32]); | |
Random rand = new(0); | |
for (int i = 0; i < Loop; i++) | |
{ | |
var len = rand.Next(1, MaxCharLength); | |
rand.GetItems(_chars, buffer[..len]); | |
var s = InternPool.Shared.Intern(buffer[..len]); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment