Created
September 15, 2010 23:44
-
-
Save markan/581696 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
def decode_vint(s) | |
acc = 0 | |
i=0 | |
s.each_byte do |v| | |
puts "i #{i} v #{v}" | |
nv = v & 0x7f | |
acc+= nv << (i*7) | |
if (v & 0x80 == 0x80) | |
puts "BIG!" | |
else | |
break acc, i | |
end | |
i+=1 | |
puts "i #{i} a #{acc}" | |
end | |
end | |
@test = "\000\001" | |
def get_vint(ios) | |
acc = 0 | |
i=0 | |
while | |
v = ios.readbyte | |
# puts "i #{i} v #{v}" | |
nv = v & 0x7f | |
acc+= nv << (i*7) | |
if (v & 0x80 == 0) | |
break acc | |
end | |
i+=1 | |
# puts "i #{i} a #{acc}" | |
end | |
end | |
def get_string(ios) | |
l = get_vint(ios) | |
s = ios.read(l) | |
# puts "String: #{s}" | |
s | |
end | |
def get_record(ios) | |
s = get_string(ios) | |
b = ios.readbyte | |
return s,b | |
end | |
def get_preamble(ios) | |
# unknown header | |
(1..5).each { ios.readbyte } | |
count = get_vint(ios) | |
end | |
def process_stream(ios) | |
results = {} | |
length = get_preamble(ios) | |
count = 0 | |
while !ios.eof? do | |
s,b = get_record(ios) | |
results[s] = true | |
count+=1 | |
end | |
puts "Expected #{length} got #{count}" | |
results | |
end | |
def process_all(files) | |
all = {} | |
files.each do |file| | |
File.open(file) do |stream| | |
r = process_stream(stream) | |
all.merge!(r) | |
end | |
end | |
all.keys.sort | |
end | |
files = Dir.glob('*.fnm') | |
k=process_all(files) | |
of = File.open("solr_keys.txt","w") | |
k.each { |x| of.write("#{x}\n") } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment