I ported my simple XML parser to Nokogiri and Ox. Latter seems to have smoked the benchmark.
The code should find its way into Peddler et al.
I ported my simple XML parser to Nokogiri and Ox. Latter seems to have smoked the benchmark.
The code should find its way into Peddler et al.
☁ code ruby sax_parsers.rb | |
Rehearsal -------------------------------------------- | |
ox 0.800000 0.020000 0.820000 ( 0.820952) | |
nokogiri 2.110000 0.020000 2.130000 ( 2.144550) | |
----------------------------------- total: 2.950000sec | |
user system total real | |
ox 0.790000 0.010000 0.800000 ( 0.808858) | |
nokogiri 2.080000 0.030000 2.110000 ( 2.239248) |
require 'benchmark' | |
require 'pp' | |
require 'stringio' | |
require 'nokogiri' | |
require 'ox' | |
io = StringIO.new %{ | |
<?xml version="1.0" encoding="UTF-8"?> | |
<ItemLookupResponse> | |
<Items> | |
<Item> | |
<ASIN>0816614024</ASIN> | |
<ItemAttributes> | |
<Creator Role="Author">Gilles Deleuze</Creator> | |
<Creator Role="Contributor">Felix Guattari</Creator> | |
<Title>Thousand Plateaus</Title> | |
</ItemAttributes> | |
</Item> | |
<Item> | |
<ASIN>0231081596</ASIN> | |
<ItemAttributes> | |
<Creator Role="Author">Gilles Deleuze</Creator> | |
<Title>Difference and Repetition</Title> | |
</ItemAttributes> | |
</Item> | |
</Items> | |
</ItemLookupResponse> | |
}.strip.gsub />\s+</, '><' | |
class OxHandler < Ox::Sax | |
attr :root | |
def initialize | |
@stack = [@node = @root = {}] | |
end | |
def attr(key, val) | |
@node[key] = val | |
end | |
def end_element(key) | |
child = @stack.pop | |
@node = @stack.last | |
case @node[key] | |
when Array | |
@node[key] << child | |
when Hash | |
@node[key] = [@node[key], child] | |
else | |
if child.keys == [:__content__] | |
@node[key] = child[:__content__] | |
else | |
@node[key] = child | |
end | |
end | |
end | |
def start_element(key) | |
@stack << @node = {} | |
end | |
def text(val) | |
@node[:__content__] = val | |
end | |
end | |
class NokogiriHandler < Nokogiri::XML::SAX::Document | |
attr :root | |
def characters(val) | |
(@node['__content__'] ||= '') << val | |
end | |
def end_element(key) | |
child = @stack.pop | |
@node = @stack.last | |
case @node[key] | |
when Array | |
@node[key] << child | |
when Hash | |
@node[key] = [@node[key], child] | |
else | |
if child.keys == ['__content__'] | |
@node[key] = child['__content__'] | |
else | |
@node[key] = child | |
end | |
end | |
end | |
def start_element(key, attrs = []) | |
@stack << @node = {} | |
attrs.each do |attr| | |
key, val = *attr | |
@node[key] = val | |
end | |
end | |
def start_document | |
@stack = [@root = {}] | |
end | |
end | |
n = 10000 | |
Benchmark.bmbm do |b| | |
b.report('ox') do | |
n.times do | |
io.rewind | |
handler = OxHandler.new | |
Ox.sax_parse handler, io | |
end | |
end | |
b.report('nokogiri') do | |
n.times do | |
io.rewind | |
handler = NokogiriHandler.new | |
parser = Nokogiri::XML::SAX::Parser.new handler | |
parser.parse io | |
end | |
end | |
end |