Last active
May 11, 2021 05:21
-
-
Save mikeminutillo/b767695599789ccc49551a82e0e99eaf to your computer and use it in GitHub Desktop.
Using a very basic algorithm to extract ids from serialized messages. Running this on my my machine reports 6.662 ms per message (117,870 bytes) for Json messages and 11.328 ms per message (165,034 bytes) for xml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<Query Kind="Program"> | |
<NuGetReference>Newtonsoft.Json</NuGetReference> | |
<NuGetReference>System.Text.Json</NuGetReference> | |
<Namespace>Newtonsoft.Json</Namespace> | |
<Namespace>System.Text.Json</Namespace> | |
<Namespace>System.Xml.Serialization</Namespace> | |
</Query> | |
void Main() | |
{ | |
var random = new Random(); | |
var message = new MyMessage | |
{ | |
CustomerId = Guid.NewGuid(), | |
OrderId = Guid.NewGuid(), | |
LineItems = (from r in Enumerable.Range(1, 1000) | |
select new OrderLineItem | |
{ | |
ProductId = Guid.NewGuid(), | |
Qty = random.Next(1, 100), | |
UnitCost = random.Next(50, 10000) / 100m | |
} | |
).ToArray() | |
}; | |
var propertyFinder = new IndexablePropertyFinder(); | |
propertyFinder.AddExtractor(new JsonIndexablePropertyExtractor()); | |
propertyFinder.AddExtractor(new XmlIndexablePropertyExtractor()); | |
propertyFinder.AddPropertyIndexRule("id$"); | |
TestJson(message, propertyFinder, 1000); | |
TestXml(message, propertyFinder, 1000); | |
} | |
void TestJson(MyMessage message, IndexablePropertyFinder propertyFinder, int iterations) | |
{ | |
var serialized = JsonConvert.SerializeObject(message, Newtonsoft.Json.Formatting.Indented); | |
var jsonBytes = Encoding.UTF8.GetBytes(serialized); | |
var stopwatch = Stopwatch.StartNew(); | |
for (var i = 0; i < iterations; i++) | |
{ | |
propertyFinder.FindProperties(jsonBytes, "application/json"); | |
} | |
stopwatch.Stop(); | |
new | |
{ | |
TotalBytes = jsonBytes.Length, | |
TotalMilliseconds = stopwatch.ElapsedMilliseconds, | |
MillisecondsPerMessage = stopwatch.ElapsedMilliseconds / (decimal)iterations | |
}.Dump("Json"); | |
} | |
void TestXml(MyMessage message, IndexablePropertyFinder propertyFinder, int iterations) | |
{ | |
var serializer = new XmlSerializer(typeof(MyMessage)); | |
var serialized = message.Serialize(); | |
var xmlBytes = Encoding.UTF8.GetBytes(serialized); | |
var stopwatch = Stopwatch.StartNew(); | |
for (var i = 0; i < iterations; i++) | |
{ | |
propertyFinder.FindProperties(xmlBytes, "text/xml"); | |
} | |
stopwatch.Stop(); | |
new | |
{ | |
TotalBytes = xmlBytes.Length, | |
TotalMilliseconds = stopwatch.ElapsedMilliseconds, | |
MillisecondsPerMessage = stopwatch.ElapsedMilliseconds / (decimal)iterations | |
}.Dump("Xml"); | |
} | |
public static class Extensions | |
{ | |
public static string Serialize<T>(this T obj) | |
{ | |
var serializer = new XmlSerializer(typeof(T)); | |
var settings = new XmlWriterSettings | |
{ | |
Indent = true, | |
NewLineOnAttributes = true, | |
Encoding = Encoding.UTF8 | |
}; | |
using(var stringWriter = new Utf8StringWriter()) | |
using(var textWriter = XmlWriter.Create(stringWriter, settings)) | |
{ | |
serializer.Serialize(textWriter, obj); | |
stringWriter.Flush(); | |
return stringWriter.ToString(); | |
} | |
} | |
class Utf8StringWriter : StringWriter | |
{ | |
public override Encoding Encoding => Encoding.UTF8; | |
} | |
} | |
#region Data Model | |
public class MyMessage | |
{ | |
public Guid CustomerId { get; set; } | |
public Guid OrderId { get; set; } | |
public OrderLineItem[] LineItems { get; set; } | |
} | |
public class OrderLineItem | |
{ | |
public Guid ProductId { get; set; } | |
public int Qty { get; set; } | |
public decimal UnitCost { get; set; } | |
} | |
#endregion | |
class IndexableProperty | |
{ | |
public string PropertyName { get; } | |
public string PropertyValue { get; } | |
public IndexableProperty(string propertyName, string propertyValue) | |
{ | |
PropertyName = propertyName; | |
PropertyValue = propertyValue; | |
} | |
} | |
interface IIndexablePropertyExtractor | |
{ | |
string ContentType { get; } | |
IEnumerable<IndexableProperty> ExtractProperties(byte[] bytes, Func<string, bool> shouldIndex); | |
} | |
class IndexablePropertyFinder | |
{ | |
private IDictionary<string, IIndexablePropertyExtractor> extractors = new Dictionary<string, IIndexablePropertyExtractor>(StringComparer.InvariantCultureIgnoreCase); | |
private IList<Func<string, bool>> rules = new List<Func<string, bool>>(); | |
public void AddExtractor(IIndexablePropertyExtractor extractor) | |
{ | |
extractors.Add(extractor.ContentType, extractor); | |
} | |
public void AddPropertyIndexRule(Func<string, bool> rule) | |
{ | |
rules.Add(rule); | |
} | |
public void AddPropertyIndexRule(string rule) | |
{ | |
var regex = new Regex(rule, RegexOptions.IgnoreCase | RegexOptions.Compiled); | |
rules.Add(regex.IsMatch); | |
} | |
public IEnumerable<IndexableProperty> FindProperties(byte[] bytes, string contentType) | |
{ | |
return extractors.TryGetValue(contentType, out var finder) | |
? finder.ExtractProperties(bytes, property => rules.Any(r => r.Invoke(property))) | |
: Enumerable.Empty<IndexableProperty>(); | |
} | |
} | |
class JsonIndexablePropertyExtractor : IIndexablePropertyExtractor | |
{ | |
public string ContentType { get; } = "application/json"; | |
public IEnumerable<IndexableProperty> ExtractProperties(byte[] bytes, Func<string, bool> shouldIndex) | |
{ | |
var found = new List<IndexableProperty>(); | |
var reader = new Utf8JsonReader(bytes); | |
while (reader.Read()) | |
{ | |
if (reader.TokenType == JsonTokenType.PropertyName) | |
{ | |
var property = reader.GetString(); | |
if (shouldIndex(property)) | |
{ | |
if (reader.Read()) | |
{ | |
switch (reader.TokenType) | |
{ | |
case JsonTokenType.Number: | |
found.Add(new IndexableProperty(property, reader.GetDecimal().ToString())); | |
break; | |
case JsonTokenType.String: | |
found.Add(new IndexableProperty(property, reader.GetString())); | |
break; | |
} | |
} | |
} | |
} | |
} | |
return found; | |
} | |
} | |
class XmlIndexablePropertyExtractor : IIndexablePropertyExtractor | |
{ | |
public string ContentType { get; } = "text/xml"; | |
public IEnumerable<IndexableProperty> ExtractProperties(byte[] bytes, Func<string, bool> shouldIndex) | |
{ | |
var found = new List<IndexableProperty>(); | |
using (var stream = new MemoryStream(bytes)) | |
using (var reader = XmlReader.Create(stream)) | |
{ | |
while (reader.Read()) | |
{ | |
if (reader.NodeType == XmlNodeType.Element | |
&& shouldIndex(reader.Name)) | |
{ | |
var propertyName = reader.Name; | |
using (var innerReader = reader.ReadSubtree()) | |
{ | |
while (innerReader.Read()) | |
{ | |
if (innerReader.NodeType == XmlNodeType.Text) | |
{ | |
found.Add(new IndexableProperty( | |
propertyName, | |
innerReader.Value | |
)); | |
} | |
} | |
} | |
} | |
} | |
} | |
return found; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment