Created
November 20, 2012 03:46
-
-
Save cheald/4115813 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%{ | |
machine rfc2822_header; | |
action mark { | |
@pos = p | |
} | |
action domain { | |
# We have to match 'foo . bar' per the tests and transform it into 'foo.bar' | |
@parts[:host] = data[@pos..p-1].delete(" ") | |
} | |
action local { | |
@parts[:local] = data[@pos..p-1] | |
} | |
action name { | |
# Enclosing quotes are stripped to make the tests pass. | |
@parts[:name] = data[@pos..p-1].gsub(/^"|"$/, "") | |
} | |
# Common ABNF rules | |
cr = "\r"; | |
lf = "\n"; | |
crlf = cr lf; | |
sp = " "; | |
tab = "\t"; | |
wsp = (sp | tab); | |
obs_fws = wsp+ ( crlf wsp+ )*; | |
fws = ( ( wsp* crlf )? wsp+ ) | obs_fws; | |
NO_WS_CTL = 0x01..0x08 | "\v" | "\f" | 0x0e..0x1f | 0x7f; | |
# Rules to consume comments | |
consumeAny = any - (")" | ")") | "(" @{fcall consumeComments;}; | |
consumeComments := consumeAny+ ")" $!{fhold; fret; }; | |
startComment = "(" @{fcall consumeComments;}; | |
CFWS = ( fws? startComment )* ( ( fws? startComment ) | fws); | |
ctext = NO_WS_CTL | 0x21..0x27 | 0x2a..0x5b; | |
obs_char = 0x00..0x09 | "\v" | "\f" | 0x0e..0x7f; | |
obs_text = "\n"* "\r"* ( obs_char "\n"* "\r"* )*; | |
text = 0x01..0x09 | "\v" | "\f" | 0x0e..0x7f; # | obs_text; # obs_causes parser ambiguity; track down why. | |
obs_qp = "\\" 0x00..0x7f; | |
quoted_pair = ( "\\" text ) | obs_qp; | |
dtext = NO_WS_CTL | 0x21..0x5a | 0x5E..0x7E; | |
dcontent = dtext | quoted_pair; | |
atext = alpha | digit | "!" | "#" | "$" | "%" | "&" | "'" | "*" | "+" | "-" | "/" | "=" | "?" | "^" | "_" | "`" | "{" | "|" | "}" | "~"; | |
dot_atom_text = atext+ ( "."* atext+ )* "."*; | |
dot_atom = CFWS? dot_atom_text CFWS?; | |
qtext = NO_WS_CTL | "!" | 0x23..0x5b | 0x5d..0x7e; | |
qcontent = qtext | quoted_pair; | |
quoted_string = "\"" ( fws? qcontent )* fws? "\""; | |
atom = atext+; | |
word = atom | quoted_string; | |
obs_phrase = word (word | "." | CFWS)+; | |
phrase = word+ | obs_phrase; | |
obs_domain = atom ( "." atom )*; | |
domain_literal = CFWS? "[" (fws? dcontent) fws? "]" CFWS?; | |
domain = dot_atom ("."? dot_atom)* | domain_literal | obs_domain; | |
obs_local_part = word ( "." word )*; | |
obs_domain_list = "@" domain ((CFWS | "," )* CFWS? "@" domain)*; | |
obs_route = CFWS? obs_domain_list ":" CFWS?; | |
# I modified display_name here from `phrase` to account for unquoted names with commas. | |
display_name = word (CFWS? (word | ","))*; | |
local_part = dot_atom | quoted_string | obs_local_part; | |
addr_spec = (local_part >mark %local) "@" (domain >mark %domain); | |
obs_angle_addr = CFWS? "<" obs_route? addr_spec ">" CFWS?; | |
angle_addr = CFWS? "<" addr_spec ">" CFWS? | obs_angle_addr; | |
name_addr = (display_name >mark %name) angle_addr; | |
mailbox = name_addr | addr_spec; | |
obs_mbox_list = (mailbox? CFWS? "," CFWS?)+ mailbox?; | |
mailbox_list = mailbox ("," mailbox)* | obs_mbox_list; | |
group = display_name ":" (mailbox_list | CFWS)? ";" CFWS?; | |
address = mailbox | group; | |
obs_addr_list = (address? CFWS? "," CFWS?)+ address?; | |
address_list = address ("," address)* | obs_addr_list; | |
main := address_list; | |
}%% | |
def process_host(host) | |
host.gsub(' ','') | |
end | |
def parse(email) | |
@parts = {} | |
@pos = [] | |
data = email | |
eof = data.length | |
%%write data; | |
%%write init; | |
%%write exec; | |
@parts | |
end | |
require 'minitest/spec' | |
require 'minitest/autorun' | |
describe "mail parsing" do | |
it "can parse a basic email address" do | |
address = parse("[email protected]") | |
address[:local].must_equal "ryan" | |
address[:host].must_equal "example.com" | |
end | |
it "can parse a named address" do | |
address = parse("Ryan Bigg <[email protected]>") | |
address[:name].must_equal "Ryan Bigg" | |
address[:local].must_equal "ryan" | |
address[:host].must_equal "example.com" | |
end | |
# Stolen from Mail | |
it "should support |Minero Aoki<[email protected]>|" do | |
address = parse("Minero Aoki<[email protected]>") | |
address[:name].must_equal "Minero Aoki" | |
address[:local].must_equal "aamine" | |
address[:host].must_equal "0246.loveruby.net" | |
end | |
# Stolen from Mail | |
it "should support lots of dots" do | |
1.upto(10) do |times| | |
dots = "." * times | |
address = parse("hoge#{dots}[email protected]") | |
address[:local].must_equal "hoge#{dots}test" | |
address[:host].must_equal "docomo.ne.jp" | |
end | |
end | |
# Stolen from Mail | |
it "should handle trailing dots" do | |
1.upto(10) do |times| | |
dots = "." * times | |
address = parse("hoge#{dots}@docomo.ne.jp") | |
address[:local].must_equal "hoge#{dots}" | |
address[:host].must_equal "docomo.ne.jp" | |
end | |
end | |
# Stolen from Mail (which actually stole it from a Perl test suite) | |
it 'should handle "Joe & J. Harvey" <ddd @Org>' do | |
address = parse('"Joe & J. Harvey" <ddd @Org>') | |
address[:name].must_equal "Joe & J. Harvey" | |
address[:local].must_equal "ddd " | |
address[:host].must_equal "Org" | |
end | |
it "should handle jrh%[email protected]" do | |
address = parse('jrh%[email protected]') | |
address[:local].must_equal "jrh%cup.portal.com" | |
address[:host].must_equal "portal.unix.portal.com" | |
end | |
it "should handle David Apfelbaum <[email protected]>" do | |
address = parse("David Apfelbaum <[email protected]>'") | |
address[:name].must_equal "David Apfelbaum" | |
address[:local].must_equal "da0g+" | |
address[:host].must_equal "andrew.cmu.edu" | |
end | |
it "should handle Stephen Burke, Liverpool <[email protected]>" do | |
address = parse("Stephen Burke, Liverpool <[email protected]>") | |
address[:name].must_equal "Stephen Burke, Liverpool" | |
address[:local].must_equal "BURKE" | |
address[:host].must_equal "vxdsya.desy.de" | |
end | |
it "should handle jdoe@test . example" do | |
address = parse("jdoe@test . example") | |
address[:local].must_equal "jdoe" | |
address[:host].must_equal "test.example" | |
end | |
it "should handle /G=Owen/S=Smith/O=SJ-Research/ADMD=INTERSPAN/C=GB/@mhs-relay.ac.uk" do | |
address = parse("/G=Owen/S=Smith/O=SJ-Research/ADMD=INTERSPAN/C=GB/@mhs-relay.ac.uk") | |
address[:local].must_equal "/G=Owen/S=Smith/O=SJ-Research/ADMD=INTERSPAN/C=GB/" | |
address[:host].must_equal "mhs-relay.ac.uk" | |
end | |
it "should handle The Newcastle Info-Server <[email protected]>" do | |
address = parse("The Newcastle Info-Server <[email protected]>") | |
address[:name].must_equal "The Newcastle Info-Server" | |
address[:local].must_equal "info-admin" | |
address[:host].must_equal "newcastle.ac.uk" | |
end | |
it "should handle JAMES R. TWINE - THE NERD <TWINE57%SDELVB%[email protected]>" do | |
address = parse(%Q{"JAMES R. TWINE - THE NERD" <TWINE57%SDELVB%[email protected]>}) | |
address[:name].must_equal "JAMES R. TWINE - THE NERD" | |
address[:local].must_equal "TWINE57%SDELVB%SNYDELVA.bitnet" | |
address[:host].must_equal "CUNYVM.CUNY.EDU" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment