Last active
August 29, 2015 14:15
-
-
Save szensk/0c5158429f33b4c134e8 to your computer and use it in GitHub Desktop.
parse.lua
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local util = require"moonscript.util" | |
local lpeg = require"lpeg" | |
local debug_grammar = false | |
local data = require"moonscript.data" | |
local types = require"moonscript.types" | |
local ntype = types.ntype | |
local trim = util.trim | |
local getfenv = util.getfenv | |
local setfenv = util.setfenv | |
local unpack = util.unpack | |
local Stack = data.Stack | |
local function count_indent(str) | |
local sum = 0 | |
for v in str:gmatch("[\t ]") do | |
if v == ' ' then sum = sum + 1 end | |
if v == '\t' then sum = sum + 4 end | |
end | |
return sum | |
end | |
local R, S, V, P = lpeg.R, lpeg.S, lpeg.V, lpeg.P | |
local C, Ct, Cmt, Cg, Cb, Cc = lpeg.C, lpeg.Ct, lpeg.Cmt, lpeg.Cg, lpeg.Cb, lpeg.Cc | |
lpeg.setmaxstack(10000) | |
local White = S" \t\r\n"^0 | |
local _Space = S" \t"^0 | |
local Break = P"\r"^-1 * P"\n" | |
local Stop = Break + -1 | |
local Indent = C(S"\t "^0) / count_indent | |
local Comment = P"--" * (1 - S"\r\n")^0 * #Stop | |
local Space = _Space * Comment^-1 | |
local SomeSpace = S" \t"^1 * Comment^-1 | |
local SpaceBreak = Space * Break | |
local EmptyLine = SpaceBreak | |
local AlphaNum = R("az", "AZ", "09", "__") | |
local _Name = C(R("az", "AZ", "__") * AlphaNum^0) | |
local Name = Space * _Name | |
local Num = P"0x" * R("09", "af", "AF")^1 * (S"uU"^-1 * S"lL"^2)^-1 + | |
R"09"^1 * (S"uU"^-1 * S"lL"^2) + | |
( | |
R"09"^1 * (P"." * R"09"^1)^-1 + | |
P"." * R"09"^1 | |
) * (S"eE" * P"-"^-1 * R"09"^1)^-1 | |
Num = Space * (Num / function(value) return {"number", value} end) | |
local FactorOp = Space * C(S"+-") | |
local TermOp = Space * C(S"*/%^") | |
local Shebang = P"#!" * P(1 - Stop)^0 | |
-- can't have P(false) because it causes preceding patterns not to run | |
local Cut = P(function() return false end) | |
local function ensure(patt, finally) | |
return patt * finally + finally * Cut | |
end | |
-- auto declare Proper variables with lpeg.V | |
local function wrap_env(fn) | |
local env = getfenv(fn) | |
local wrap_name = V | |
if debug_grammar then | |
local indent = 0 | |
local indent_char = " " | |
local function iprint(...) | |
local args = {...} | |
for i=1,#args do | |
args[i] = tostring(args[i]) | |
end | |
io.stdout:write(indent_char:rep(indent) .. table.concat(args, ", ") .. "\n") | |
end | |
wrap_name = function(name) | |
local v = V(name) | |
v = Cmt("", function() | |
iprint("* " .. name) | |
indent = indent + 1 | |
return true | |
end) * Cmt(v, function(str, pos, ...) | |
iprint(name, true) | |
indent = indent - 1 | |
return true, ... | |
end) + Cmt("", function() | |
iprint(name, false) | |
indent = indent - 1 | |
return false | |
end) | |
return v | |
end | |
end | |
return setfenv(fn, setmetatable({}, { | |
__index = function(self, name) | |
local value = env[name] | |
if value ~= nil then return value end | |
if name:match"^[A-Z][A-Za-z0-9]*$" then | |
local v = wrap_name(name) | |
rawset(self, name, v) | |
return v | |
end | |
error("unknown variable referenced: "..name) | |
end | |
})) | |
end | |
local function extract_line(str, start_pos) | |
str = str:sub(start_pos) | |
local m = str:match"^(.-)\n" | |
if m then return m end | |
return str:match"^.-$" | |
end | |
local function mark(name) | |
return function(...) | |
return {name, ...} | |
end | |
end | |
local function insert_pos(pos, value) | |
if type(value) == "table" then | |
value[-1] = pos | |
end | |
return value | |
end | |
local function pos(patt) | |
return (lpeg.Cp() * patt) / insert_pos | |
end | |
local function got(what) | |
return Cmt("", function(str, pos, ...) | |
local cap = {...} | |
print("++ got "..what, "["..extract_line(str, pos).."]") | |
return true | |
end) | |
end | |
local function flatten_or_mark(name) | |
return function(tbl) | |
if #tbl == 1 then return tbl[1] end | |
table.insert(tbl, 1, name) | |
return tbl | |
end | |
end | |
-- makes sure the last item in a chain is an index | |
local _chain_assignable = { index = true, dot = true, slice = true } | |
local function is_assignable(node) | |
if node == "..." then | |
return false | |
end | |
local t = ntype(node) | |
return t == "ref" or t == "self" or t == "value" or t == "self_class" or | |
t == "chain" and _chain_assignable[ntype(node[#node])] or | |
t == "table" | |
end | |
local function check_assignable(str, pos, value) | |
if is_assignable(value) then | |
return true, value | |
end | |
return false | |
end | |
local flatten_explist = flatten_or_mark"explist" | |
local function format_assign(lhs_exps, assign) | |
if not assign then | |
return flatten_explist(lhs_exps) | |
end | |
for _, assign_exp in ipairs(lhs_exps) do | |
if not is_assignable(assign_exp) then | |
error {assign_exp, "left hand expression is not assignable"} | |
end | |
end | |
local t = ntype(assign) | |
if t == "assign" then | |
return {"assign", lhs_exps, unpack(assign, 2)} | |
elseif t == "update" then | |
return {"update", lhs_exps[1], unpack(assign, 2)} | |
end | |
error "unknown assign expression" | |
end | |
-- the if statement only takes a single lhs, so we wrap in table to git to | |
-- "assign" tuple format | |
local function format_single_assign(lhs, assign) | |
if assign then | |
return format_assign({lhs}, assign) | |
end | |
return lhs | |
end | |
local function sym(chars) | |
return Space * chars | |
end | |
local function symx(chars) | |
return chars | |
end | |
local function simple_string(delim, allow_interpolation) | |
local inner = P('\\'..delim) + "\\\\" + (1 - P(delim)) | |
if allow_interpolation then | |
local inter = symx"#{" * V"Exp" * sym"}" | |
inner = (C((inner - inter)^1) + inter / mark"interpolate")^0 | |
else | |
inner = C(inner^0) | |
end | |
return C(symx(delim)) * | |
inner * sym(delim) / mark"string" | |
end | |
local function wrap_func_arg(value) | |
return {"call", {value}} | |
end | |
-- DOCME | |
local function flatten_func(callee, args) | |
if #args == 0 then return callee end | |
args = {"call", args} | |
if ntype(callee) == "chain" then | |
-- check for colon stub that needs arguments | |
if ntype(callee[#callee]) == "colon_stub" then | |
local stub = callee[#callee] | |
stub[1] = "colon" | |
table.insert(stub, args) | |
else | |
table.insert(callee, args) | |
end | |
return callee | |
end | |
return {"chain", callee, args} | |
end | |
local function flatten_string_chain(str, chain, args) | |
if not chain then return str end | |
return flatten_func({"chain", str, unpack(chain)}, args) | |
end | |
-- transforms a statement that has a line decorator | |
local function wrap_decorator(stm, dec) | |
if not dec then return stm end | |
return { "decorated", stm, dec } | |
end | |
local function check_lua_string(str, pos, right, left) | |
return #left == #right | |
end | |
-- :name in table literal | |
local function self_assign(name) | |
return {{"key_literal", name}, name} | |
end | |
local err_msg = "Failed to parse:%s\n [%d] >> %s" | |
local build_grammar = wrap_env(function() | |
local _indent = Stack(0) -- current indent | |
local _do_stack = Stack(0) | |
local last_pos = 0 -- used to know where to report error | |
local function check_indent(str, pos, indent) | |
last_pos = pos | |
return _indent:top() == indent | |
end | |
local function advance_indent(str, pos, indent) | |
local top = _indent:top() | |
if top ~= -1 and indent > _indent:top() then | |
_indent:push(indent) | |
return true | |
end | |
end | |
local function push_indent(str, pos, indent) | |
_indent:push(indent) | |
return true | |
end | |
local function pop_indent(str, pos) | |
if not _indent:pop() then error("unexpected outdent") end | |
return true | |
end | |
local function check_do(str, pos, do_node) | |
local top = _do_stack:top() | |
if top == nil or top then | |
return true, do_node | |
end | |
return false | |
end | |
local function disable_do(str_pos) | |
_do_stack:push(false) | |
return true | |
end | |
local function pop_do(str, pos) | |
if nil == _do_stack:pop() then error("unexpected do pop") end | |
return true | |
end | |
local DisableDo = Cmt("", disable_do) | |
local PopDo = Cmt("", pop_do) | |
local keywords = {} | |
local function key(chars) | |
keywords[chars] = true | |
return Space * chars * -AlphaNum | |
end | |
local function op(word) | |
local patt = Space * C(word) | |
if word:match("^%w*$") then | |
keywords[word] = true | |
patt = patt * -AlphaNum | |
end | |
return patt | |
end | |
-- make sure name is not a keyword | |
local Name = Cmt(Name, function(str, pos, name) | |
if keywords[name] then return false end | |
return true | |
end) / trim | |
local SelfName = Space * "@" * ( | |
"@" * (_Name / mark"self_class" + Cc"self.__class") + | |
_Name / mark"self" + Cc"self") | |
local KeyName = SelfName + Space * _Name / mark"key_literal" | |
local VarArg = Space * P"..." / trim | |
local g = lpeg.P{ | |
File, | |
File = Shebang^-1 * (Block + Ct""), | |
Block = Ct(Line * (Break^1 * Line)^0), | |
CheckIndent = Cmt(Indent, check_indent), -- validates line is in correct indent | |
Line = (CheckIndent * Statement + Space * #Stop), | |
Statement = pos( | |
Import + While + With + For + ForEach + Switch + Return + | |
Local + Export + BreakLoop + | |
Ct(ExpList) * (Update + Assign)^-1 / format_assign | |
) * Space * (( | |
-- statement decorators | |
key"if" * Exp * (key"else" * Exp)^-1 * Space / mark"if" + | |
key"unless" * Exp / mark"unless" + | |
CompInner / mark"comprehension" | |
) * Space)^-1 / wrap_decorator, | |
Body = Space^-1 * Break * EmptyLine^0 * InBlock + Ct(Statement), -- either a statement, or an indented block | |
Advance = #Cmt(Indent, advance_indent), -- Advances the indent, gives back whitespace for CheckIndent | |
PushIndent = Cmt(Indent, push_indent), | |
PreventIndent = Cmt(Cc(-1), push_indent), | |
PopIndent = Cmt("", pop_indent), | |
InBlock = Advance * Block * PopIndent, | |
Local = key"local" * ((op"*" + op"^") / mark"declare_glob" + Ct(NameList) / mark"declare_with_shadows"), | |
Import = key"import" * Ct(ImportNameList) * SpaceBreak^0 * key"from" * Exp / mark"import", | |
ImportName = ((sym"\\" + P'->') * Ct(Cc"colon_stub" * Name) + Name), | |
ImportNameList = SpaceBreak^0 * ImportName * ((SpaceBreak^1 + sym"," * SpaceBreak^0) * ImportName)^0, | |
BreakLoop = Ct(key"break"/trim) + Ct(key"continue"/trim), | |
Return = key"return" * (ExpListLow/mark"explist" + C"") / mark"return", | |
WithExp = Ct(ExpList) * Assign^-1 / format_assign, | |
With = key"with" * DisableDo * ensure(WithExp, PopDo) * key"do"^-1 * Body / mark"with", | |
Switch = key"switch" * DisableDo * ensure(Exp, PopDo) * key"do"^-1 * Space^-1 * Break * SwitchBlock / mark"switch", | |
SwitchBlock = EmptyLine^0 * Advance * Ct(SwitchCase * (Break^1 * SwitchCase)^0 * (Break^1 * SwitchElse)^-1) * PopIndent, | |
SwitchCase = key"when" * Ct(ExpList) * key"then"^-1 * Body / mark"case", | |
SwitchElse = key"else" * Body / mark"else", | |
IfCond = Exp * Assign^-1 / format_single_assign, | |
If = key"if" * IfCond * key"then"^-1 * Body * | |
((Break * CheckIndent)^-1 * EmptyLine^0 * key"elseif" * pos(IfCond) * key"then"^-1 * Body / mark"elseif")^0 * | |
((Break * CheckIndent)^-1 * EmptyLine^0 * key"else" * Body / mark"else")^-1 / mark"if", | |
Unless = key"unless" * IfCond * key"then"^-1 * Body * | |
((Break * CheckIndent)^-1 * EmptyLine^0 * key"else" * Body / mark"else")^-1 / mark"unless", | |
While = key"while" * DisableDo * ensure(Exp, PopDo) * key"do"^-1 * Body / mark"while", | |
For = key"for" * DisableDo * ensure(Name * sym"=" * Ct(Exp * sym"," * Exp * (sym"," * Exp)^-1), PopDo) * | |
key"do"^-1 * Body / mark"for", | |
ForEach = key"for" * Ct(AssignableNameList) * key"in" * DisableDo * ensure(Ct(sym"*" * Exp / mark"unpack" + ExpList), PopDo) * key"do"^-1 * Body / mark"foreach", | |
Do = key"do" * Body / mark"do", | |
Comprehension = sym"[" * Exp * CompInner * sym"]" / mark"comprehension", | |
TblComprehension = sym"{" * Ct(Exp * (sym"," * Exp)^-1) * CompInner * sym"}" / mark"tblcomprehension", | |
CompInner = Ct((CompForEach + CompFor) * CompClause^0), | |
CompForEach = key"for" * Ct(NameList) * key"in" * (sym"*" * Exp / mark"unpack" + Exp) / mark"foreach", | |
CompFor = key "for" * Name * sym"=" * Ct(Exp * sym"," * Exp * (sym"," * Exp)^-1) / mark"for", | |
CompClause = CompFor + CompForEach + key"when" * Exp / mark"when", | |
Assign = sym"=" * (Ct(With + If + Switch) + Ct(TableBlock + ExpListLow)) / mark"assign", | |
Update = ((sym"..=" + sym"+=" + sym"-=" + sym"*=" + sym"/=" + sym"%=" + sym"or=" + sym"and=") / trim) * Exp / mark"update", | |
-- we can ignore precedence for now | |
OtherOps = op"or" + op"and" + op"<=" + op">=" + op"~=" + op"!=" + op"==" + op".." + op"<" + op">", | |
Assignable = Cmt(DotChain + Chain, check_assignable) + Name + SelfName, | |
Exp = Ct(Value * ((OtherOps + FactorOp + TermOp) * Value)^0) / flatten_or_mark"exp", | |
-- Exp = Ct(Factor * (OtherOps * Factor)^0) / flatten_or_mark"exp", | |
-- Factor = Ct(Term * (FactorOp * Term)^0) / flatten_or_mark"exp", | |
-- Term = Ct(Value * (TermOp * Value)^0) / flatten_or_mark"exp", | |
SimpleValue = | |
If + Unless + | |
Switch + | |
With + | |
ClassDecl + | |
ForEach + For + While + | |
Cmt(Do, check_do) + | |
sym"-" * -SomeSpace * Exp / mark"minus" + | |
sym"#" * Exp / mark"length" + | |
key"not" * Exp / mark"not" + | |
TblComprehension + | |
TableLit + | |
Comprehension + | |
FunLit + | |
Num, | |
ChainValue = -- a function call or an object access | |
StringChain + | |
((Chain + DotChain + Callable) * Ct(InvokeArgs^-1)) / flatten_func, | |
Value = pos( | |
SimpleValue + | |
Ct(KeyValueList) / mark"table" + | |
ChainValue), | |
SliceValue = SimpleValue + ChainValue, | |
StringChain = String * | |
(Ct((ColonCall + ColonSuffix) * ChainTail^-1) * Ct(InvokeArgs^-1))^-1 / flatten_string_chain, | |
String = Space * DoubleString + Space * SingleString + LuaString, | |
SingleString = simple_string("'"), | |
DoubleString = simple_string('"', true), | |
LuaString = Cg(LuaStringOpen, "string_open") * Cb"string_open" * Break^-1 * | |
C((1 - Cmt(C(LuaStringClose) * Cb"string_open", check_lua_string))^0) * | |
LuaStringClose / mark"string", | |
LuaStringOpen = sym"[" * P"="^0 * "[" / trim, | |
LuaStringClose = "]" * P"="^0 * "]", | |
Callable = pos(Name / mark"ref") + SelfName + VarArg + Parens / mark"parens", | |
Parens = sym"(" * Exp * sym")", | |
FnArgs = symx"(" * Ct(ExpList^-1) * sym")" + sym"!" * -P"=" * Ct"", | |
ChainTail = ChainItem^1 * ColonSuffix^-1 + ColonSuffix, | |
-- a list of funcalls and indexes on a callable | |
Chain = Callable * ChainTail / mark"chain", | |
-- shorthand dot call for use in with statement | |
DotChain = | |
(sym"." * Cc(-1) * (_Name / mark"dot") * ChainTail^-1) / mark"chain" + | |
((sym"->" + P"\\") * Cc(-1) * ( | |
(_Name * Invoke / mark"colon") * ChainTail^-1 + | |
(_Name / mark"colon_stub") | |
)) / mark"chain", | |
ChainItem = | |
Invoke + | |
Slice + | |
symx"[" * Exp/mark"index" * sym"]" + | |
symx"." * _Name/mark"dot" + | |
ColonCall, | |
Slice = symx"[" * (SliceValue + Cc(1)) * sym"," * (SliceValue + Cc"") * | |
(sym"," * SliceValue)^-1 *sym"]" / mark"slice", | |
ColonCall = (symx"->" + P"\\") * (_Name * Invoke) / mark"colon", | |
ColonSuffix = (symx"->" + P"\\") * _Name / mark"colon_stub", | |
Invoke = FnArgs/mark"call" + | |
SingleString / wrap_func_arg + | |
DoubleString / wrap_func_arg, | |
TableValue = KeyValue + Ct(Exp), | |
TableLit = sym"{" * Ct( | |
TableValueList^-1 * sym","^-1 * | |
(SpaceBreak * TableLitLine * (sym","^-1 * SpaceBreak * TableLitLine)^0 * sym","^-1)^-1 | |
) * White * sym"}" / mark"table", | |
TableValueList = TableValue * (sym"," * TableValue)^0, | |
TableLitLine = PushIndent * ((TableValueList * PopIndent) + (PopIndent * Cut)) + Space, | |
-- the unbounded table | |
TableBlockInner = Ct(KeyValueLine * (SpaceBreak^1 * KeyValueLine)^0), | |
TableBlock = SpaceBreak^1 * Advance * ensure(TableBlockInner, PopIndent) / mark"table", | |
ClassDecl = key"class" * -P":" * (Assignable + Cc(nil)) * (key"extends" * PreventIndent * ensure(Exp, PopIndent) + C"")^-1 * (ClassBlock + Ct("")) / mark"class", | |
ClassBlock = SpaceBreak^1 * Advance * | |
Ct(ClassLine * (SpaceBreak^1 * ClassLine)^0) * PopIndent, | |
ClassLine = CheckIndent * (( | |
KeyValueList / mark"props" + | |
Statement / mark"stm" + | |
Exp / mark"stm" | |
) * sym","^-1), | |
Export = key"export" * ( | |
Cc"class" * ClassDecl + | |
op"*" + op"^" + | |
Ct(NameList) * (sym"=" * Ct(ExpListLow))^-1) / mark"export", | |
KeyValue = (sym":" * -SomeSpace * Name) / self_assign + Ct((KeyName + sym"[" * Exp * sym"]" + DoubleString + SingleString) * symx":" * (Exp + TableBlock)), | |
KeyValueList = KeyValue * (sym"," * KeyValue)^0, | |
KeyValueLine = CheckIndent * KeyValueList * sym","^-1, | |
FnArgsDef = sym"(" * Ct(FnArgDefList^-1) * | |
(key"using" * Ct(NameList + Space * "nil") + Ct"") * | |
sym")" + Ct"" * Ct"", | |
FnArgDefList = FnArgDef * (sym"," * FnArgDef)^0 * (sym"," * Ct(VarArg))^0 + Ct(VarArg), | |
FnArgDef = Ct((Name + SelfName) * (sym"=" * Exp)^-1), | |
FunLit = FnArgsDef * | |
(sym"->" * Cc"slim" + sym"=>" * Cc"fat") * | |
(Body + Ct"") / mark"fndef", | |
NameList = Name * (sym"," * Name)^0, | |
NameOrDestructure = Name + TableLit, | |
AssignableNameList = NameOrDestructure * (sym"," * NameOrDestructure)^0, | |
ExpList = Exp * (sym"," * Exp)^0, | |
ExpListLow = Exp * ((sym"," + sym";") * Exp)^0, | |
InvokeArgs = -P"-" * (ExpList * (sym"," * (TableBlock + SpaceBreak * Advance * ArgBlock * TableBlock^-1) + TableBlock)^-1 + TableBlock), | |
ArgBlock = ArgLine * (sym"," * SpaceBreak * ArgLine)^0 * PopIndent, | |
ArgLine = CheckIndent * ExpList | |
} | |
return { | |
_g = White * g * White * -1, | |
match = function(self, str, ...) | |
local pos_to_line = function(pos) | |
return util.pos_to_line(str, pos) | |
end | |
local get_line = function(num) | |
return util.get_line(str, num) | |
end | |
local tree | |
local parse_args = {...} | |
local pass, err = xpcall(function() | |
tree = self._g:match(str, unpack(parse_args)) | |
end, function(err) | |
return debug.traceback(err, 2) | |
end) | |
-- regular error, let it bubble up | |
if type(err) == "string" then | |
return nil, err | |
end | |
if not tree then | |
local pos = last_pos | |
local msg | |
if err then | |
local node | |
node, msg = unpack(err) | |
msg = msg and " " .. msg | |
pos = node[-1] | |
end | |
local line_no = pos_to_line(pos) | |
local line_str = get_line(line_no) or "" | |
return nil, err_msg:format(msg or "", line_no, trim(line_str)) | |
end | |
return tree | |
end | |
} | |
end) | |
return { | |
extract_line = extract_line, | |
-- parse a string | |
-- returns tree, or nil and error message | |
string = function (str) | |
local g = build_grammar() | |
return g:match(str) | |
end | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment