Created
June 5, 2015 11:33
-
-
Save damphyr/0db02fd687ea11391b4f to your computer and use it in GitHub Desktop.
Diesel tokenizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Diesel | |
module Tokenize | |
attr_accessor :input | |
def tokenize source | |
tokens = Array.new | |
source.each do |line| | |
# Comment Type 1: Completely ignore all text between '##' and EOL | |
input = line.gsub(/##.*$/, "").strip | |
# Comment Type 2: Interpret all text between '%%' and EOL as docs | |
parts = input.split("%%") | |
# Everything else is just to be split into tokens. | |
if !parts.empty? then | |
linetokens = parts[0].gsub(/([{}\[\]=;])/, ' \1 ').split(/\s+/) | |
linetokens.each do |linetoken| | |
strippedlinetoken = linetoken.strip | |
if !strippedlinetoken.empty? then | |
tokens.push(strippedlinetoken) | |
end | |
end | |
# Encode comments of type 2 as a pair of special tokens. | |
for i in 1..(parts.length-1) do | |
tokens.push("!DOC!") | |
tokens.push(parts[i].strip) | |
end | |
end | |
end | |
return tokens | |
end | |
def history | |
@history||=[] | |
@history | |
end | |
def peek tokens | |
return tokens.first | |
end | |
def get_next tokens | |
first=tokens.slice!(0) | |
history<<first | |
fail_verbosely "Fatal tokenizing error: NIL token in queue" unless first != NIL | |
fail_verbosely "Fatal tokenizing error: empty token in queue" unless !first.empty? | |
return first | |
end | |
def accept_keyword keyword, tokens | |
if tokens.empty? then return nil end | |
if tokens.first == keyword then return get_next(tokens) end | |
return nil | |
end | |
def expect_keyword keyword, tokens | |
fail_verbosely "Unexpected EOF" unless !tokens.empty? | |
token = get_next(tokens) | |
fail_verbosely "Unexpected '#{token}' read (expected '#{keyword}')" unless token == keyword | |
return token | |
end | |
def expect_old array, tokens | |
fail_verbosely "Unexpected EOF" unless !tokens.empty? | |
token = get_next(tokens) | |
fail_verbosely "Unknown identifier '#{token}'" unless array.include?(token) | |
return token | |
end | |
def expect_new array, tokens | |
fail_verbosely "Unexpected EOF" unless !tokens.empty? | |
token = get_next(tokens) | |
fail_verbosely "Identifier '#{token}' already in use" unless !array.include?(token) | |
return token | |
end | |
def exclude_value value, array | |
fail_verbosely "Forbidden value '#{value}'" unless !array.include?(value) | |
return value | |
end | |
def accept_allowed value, array | |
fail_verbosely "Forbidden value '#{value}', should be one of '#{array}'" unless array.include?(value) | |
return value | |
end | |
def fail_verbosely msg | |
puts "Token history at point of error:" | |
puts "================================" | |
puts history.join("\n") | |
puts "================================" | |
raise Diesel::Error, msg | |
end | |
end | |
class Error<RuntimeError | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment