Created
August 8, 2017 21:47
-
-
Save Arithmomaniac/2a36321ae2e613b87f84a5749ef88c82 to your computer and use it in GitHub Desktop.
GroupBatch naive implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static class GroupBatchExtension | |
{ | |
/// <summary>A simple IGrouping implementation used by GroupBatch.</summary> | |
private class GroupBatchGrouping<TKey, TElement> : List<TElement>, IGrouping<TKey, TElement> | |
{ | |
public GroupBatchGrouping(int rank, TKey key, IEnumerable<TElement> values):base(values) | |
{ | |
Key = key; | |
Rank = rank; | |
} | |
public TKey Key { get; private set; } | |
//an actual implementation would not keep this inside of the returned object. | |
internal int Rank { get; private set; } | |
} | |
/// <summary> | |
/// Yields items in batches of at most a specific size, where each batch shares a key in common | |
/// </summary> | |
/// <typeparam name="TKey">The type of the key for the batch</typeparam> | |
/// <typeparam name="TSource">The type of the enumerable to batch</typeparam> | |
/// <param name="source">THe source enumerable</param> | |
/// <param name="keySelector">The selector to determine the key from the items</param> | |
/// <param name="size">The maximum number of items in a batch.</param> | |
/// <returns>A stream of batches of the method's size, followed by any "incomplete" batches in order | |
/// of when that batch was created.</returns> | |
public static IEnumerable<IGrouping<TKey, TSource>> GroupBatch<TKey, TSource>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, int size) | |
{ | |
if (source == null) throw new ArgumentNullException(nameof(source)); | |
if (size <= 0) throw new ArgumentOutOfRangeException(nameof(size)); | |
if (keySelector == null) throw new ArgumentNullException(nameof(keySelector)); | |
var groupingDict = new Dictionary<TKey, GroupBatchGrouping<TKey, TSource>>(); | |
int i = 0; | |
foreach (var item in source) | |
{ | |
var key = keySelector(item); | |
//ensure the batch exists for the given item | |
GroupBatchGrouping<TKey, TSource> grouping; | |
if (groupingDict.TryGetValue(key, out grouping)) | |
{ | |
groupingDict[key] = grouping = new GroupBatchGrouping<TKey, TSource>(i, key, new List<TSource>()); | |
i++; | |
} | |
//add the item for that key to the batch in progress | |
grouping.Add(item); | |
//yield the batch in progress for that key if full | |
if (grouping.Count == size) | |
{ | |
yield return grouping; | |
groupingDict.Remove(key); | |
} | |
} | |
//yield remaining batches in ascending order of creation | |
foreach (var grouping in groupingDict.Values.OrderBy(x => x.Rank) | |
{ | |
yield return grouping; | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[TestClass] | |
public class GroupBatchExtension | |
{ | |
[TestMethod] | |
public void GroupBatch() | |
{ | |
var itemsToGroup = new[] { "AA", "BA", "BB", "AB", "AC", "BC", "CA", "CB", "BD", "BE" }; | |
var batches = EnumerableExtension.GroupBatch(itemsToGroup, x => x[0], 2).ToList(); | |
//expect five batches. | |
//first, expect one with key of "B", because hits batch size of two first. | |
//then one of "A", because does second. | |
//then one of "C", because does third. | |
//then we hit "A" again. | |
//Nothing hits batch size again, so we then look at when their batch size was reset. | |
//We skip C's batch, because it is empty. | |
// Since "A"'s current batch was reset longer ago then "B" was, "A" goes first, then "B". (even though | |
//B was the first batch reset overall). | |
Assert.AreEqual(6, batches.Count); | |
Assert.AreEqual('B', batches[0].Key); | |
CollectionAssert.AreEqual((List<string>)batches[0], new[] { "BA", "BB" }); | |
Assert.AreEqual('A', batches[1].Key); | |
CollectionAssert.AreEqual((List<string>)batches[1], new[] { "AA", "AB" }); | |
Assert.AreEqual('C', batches[2].Key); | |
CollectionAssert.AreEqual((List<string>)batches[2], new[] { "CA", "CB" }); | |
Assert.AreEqual('B', batches[3].Key); | |
CollectionAssert.AreEqual((List<string>)batches[3], new[] { "BC", "BD" }); | |
Assert.AreEqual('A', batches[4].Key); | |
CollectionAssert.AreEqual((List<string>)batches[4], new[] { "AC" }); | |
Assert.AreEqual('B', batches[5].Key); | |
CollectionAssert.AreEqual((List<string>)batches[5], new[] { "BE" }); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment