Last active
April 29, 2016 13:33
-
-
Save cray0000/a66fc8469f7d497ce696f06926ed2869 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* JavaScript Grammar | |
* ================== | |
* | |
* Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a | |
* syntax tree compatible with Mozilla SpiderMonkey Parser API [2]. Properties | |
* and node types reflecting features not present in ECMA-262 are not included. | |
* | |
* Limitations: | |
* | |
* * Non-BMP characters are completely ignored to avoid surrogate pair | |
* handling. | |
* | |
* * One can create identifiers containing illegal characters using Unicode | |
* escape sequences. For example, "abcd\u0020efgh" is not a valid | |
* identifier, but it is accepted by the parser. | |
* | |
* * Strict mode is not recognized. This means that within strict mode code, | |
* "implements", "interface", "let", "package", "private", "protected", | |
* "public", "static" and "yield" can be used as names. Many other | |
* restrictions and exceptions from Annex C are also not applied. | |
* | |
* All the limitations could be resolved, but the costs would likely outweigh | |
* the benefits. | |
* | |
* Many thanks to inimino [3] for his grammar [4] which helped me to solve some | |
* problems (such as automatic semicolon insertion) and also served to double | |
* check that I converted the original grammar correctly. | |
* | |
* [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm | |
* [2] https://developer.mozilla.org/en-US/docs/SpiderMonkey/Parser_API | |
* [3] http://inimino.org/~inimino/blog/ | |
* [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg | |
*/ | |
{ | |
var TYPES_TO_PROPERTY_NAMES = { | |
CallExpression: "callee", | |
MemberExpression: "object", | |
}; | |
function filledArray(count, value) { | |
var result = new Array(count), i; | |
for (i = 0; i < count; i++) { | |
result[i] = value; | |
} | |
return result; | |
} | |
function extractOptional(optional, index) { | |
return optional ? optional[index] : null; | |
} | |
function extractList(list, index) { | |
var result = new Array(list.length), i; | |
for (i = 0; i < list.length; i++) { | |
result[i] = list[i][index]; | |
} | |
return result; | |
} | |
function buildList(head, tail, index) { | |
return [head].concat(extractList(tail, index)); | |
} | |
function buildTree(head, tail, builder) { | |
var result = head, i; | |
for (i = 0; i < tail.length; i++) { | |
result = builder(result, tail[i]); | |
} | |
return result; | |
} | |
function buildBinaryExpression(head, tail) { | |
return buildTree(head, tail, function(result, element) { | |
return { | |
type: "BinaryExpression", | |
operator: element[1], | |
left: result, | |
right: element[3] | |
}; | |
}); | |
} | |
function buildLogicalExpression(head, tail) { | |
return buildTree(head, tail, function(result, element) { | |
return { | |
type: "LogicalExpression", | |
operator: element[1], | |
left: result, | |
right: element[3] | |
}; | |
}); | |
} | |
function optionalList(value) { | |
return value !== null ? value : []; | |
} | |
} | |
Start | |
= __ expression:ConditionalExpression __ { return expression; } | |
/* ----- A.1 Lexical Grammar ----- */ | |
SourceCharacter | |
= . | |
WhiteSpace "whitespace" | |
= "\t" | |
/ "\v" | |
/ "\f" | |
/ " " | |
/ "\u00A0" | |
/ "\uFEFF" | |
/ Zs | |
LineTerminator | |
= [\n\r\u2028\u2029] | |
LineTerminatorSequence "end of line" | |
= "\n" | |
/ "\r\n" | |
/ "\r" | |
/ "\u2028" | |
/ "\u2029" | |
Comment "comment" | |
= SingleLineComment | |
SingleLineComment | |
= "//" (!LineTerminator SourceCharacter)* | |
Identifier | |
= !ReservedWord name:IdentifierName { return name; } | |
IdentifierName "identifier" | |
= head:IdentifierStart tail:IdentifierPart* { | |
return { | |
type: "Identifier", | |
name: head + tail.join("") | |
}; | |
} | |
IdentifierStart | |
= [a-zA-Z] | |
/ "$" | |
/ "_" | |
IdentifierPart | |
= IdentifierStart | |
/ DecimalDigit | |
ReservedWord | |
= NullLiteral | |
/ BooleanLiteral | |
Literal | |
= NullLiteral | |
/ BooleanLiteral | |
/ NumericLiteral | |
/ StringLiteral | |
/ RegularExpressionLiteral | |
NullLiteral | |
= NullToken { return { type: "Literal", value: null }; } | |
BooleanLiteral | |
= TrueToken { return { type: "Literal", value: true }; } | |
/ FalseToken { return { type: "Literal", value: false }; } | |
/* | |
* The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official | |
* grammar, it comes from text in section 7.8.3. | |
*/ | |
NumericLiteral "number" | |
= literal:DecimalLiteral !(IdentifierStart / DecimalDigit) { | |
return literal; | |
} | |
DecimalLiteral | |
= DecimalIntegerLiteral "." DecimalDigit* ExponentPart? { | |
return { type: "Literal", value: parseFloat(text()) }; | |
} | |
/ "." DecimalDigit+ ExponentPart? { | |
return { type: "Literal", value: parseFloat(text()) }; | |
} | |
/ DecimalIntegerLiteral ExponentPart? { | |
return { type: "Literal", value: parseFloat(text()) }; | |
} | |
DecimalIntegerLiteral | |
= "0" | |
/ NonZeroDigit DecimalDigit* | |
DecimalDigit | |
= [0-9] | |
NonZeroDigit | |
= [1-9] | |
ExponentPart | |
= ExponentIndicator SignedInteger | |
ExponentIndicator | |
= "e"i | |
SignedInteger | |
= [+-]? DecimalDigit+ | |
HexIntegerLiteral | |
= "0x"i digits:$HexDigit+ { | |
return { type: "Literal", value: parseInt(digits, 16) }; | |
} | |
HexDigit | |
= [0-9a-f]i | |
StringLiteral "string" | |
= '"' chars:DoubleStringCharacter* '"' { | |
return { type: "Literal", value: chars.join("") }; | |
} | |
/ "'" chars:SingleStringCharacter* "'" { | |
return { type: "Literal", value: chars.join("") }; | |
} | |
DoubleStringCharacter | |
= !('"' / "\\" / LineTerminator) SourceCharacter { return text(); } | |
/ "\\" sequence:EscapeSequence { return sequence; } | |
/ LineContinuation | |
SingleStringCharacter | |
= !("'" / "\\" / LineTerminator) SourceCharacter { return text(); } | |
/ "\\" sequence:EscapeSequence { return sequence; } | |
/ LineContinuation | |
LineContinuation | |
= "\\" LineTerminatorSequence { return ""; } | |
EscapeSequence | |
= CharacterEscapeSequence | |
/ "0" !DecimalDigit { return "\0"; } | |
CharacterEscapeSequence | |
= SingleEscapeCharacter | |
/ NonEscapeCharacter | |
SingleEscapeCharacter | |
= "'" | |
/ '"' | |
/ "\\" | |
/ "b" { return "\b"; } | |
/ "f" { return "\f"; } | |
/ "n" { return "\n"; } | |
/ "r" { return "\r"; } | |
/ "t" { return "\t"; } | |
/ "v" { return "\x0B"; } // IE does not recognize "\v". | |
NonEscapeCharacter | |
= !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); } | |
EscapeCharacter | |
= SingleEscapeCharacter | |
/ DecimalDigit | |
/ "u" | |
RegularExpressionLiteral "regular expression" | |
= "/" pattern:$RegularExpressionBody "/" flags:$RegularExpressionFlags { | |
var value; | |
try { | |
value = new RegExp(pattern, flags); | |
} catch (e) { | |
error(e.message); | |
} | |
return { type: "Literal", value: value }; | |
} | |
RegularExpressionBody | |
= RegularExpressionFirstChar RegularExpressionChar* | |
RegularExpressionFirstChar | |
= ![*\\/[] RegularExpressionNonTerminator | |
/ RegularExpressionBackslashSequence | |
/ RegularExpressionClass | |
RegularExpressionChar | |
= ![\\/[] RegularExpressionNonTerminator | |
/ RegularExpressionBackslashSequence | |
/ RegularExpressionClass | |
RegularExpressionBackslashSequence | |
= "\\" RegularExpressionNonTerminator | |
RegularExpressionNonTerminator | |
= !LineTerminator SourceCharacter | |
RegularExpressionClass | |
= "[" RegularExpressionClassChar* "]" | |
RegularExpressionClassChar | |
= ![\]\\] RegularExpressionNonTerminator | |
/ RegularExpressionBackslashSequence | |
RegularExpressionFlags | |
= IdentifierPart* | |
// [Unicode] Separator, Space | |
Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] | |
/* Tokens */ | |
FalseToken = "false" !IdentifierPart | |
NullToken = "null" !IdentifierPart | |
TrueToken = "true" !IdentifierPart | |
/* Skipped */ | |
__ | |
= (WhiteSpace / LineTerminatorSequence / Comment)* | |
_ | |
= (WhiteSpace)* | |
/* Automatic Semicolon Insertion */ | |
EOS | |
= _ SingleLineComment? LineTerminatorSequence | |
/ _ &"}" | |
/ __ EOF | |
EOF | |
= !. | |
/* ----- A.2 Number Conversions ----- */ | |
/* Irrelevant. */ | |
/* ----- A.3 Expressions ----- */ | |
PrimaryExpression | |
= Identifier | |
/ Literal | |
/ ArrayLiteral | |
/ ObjectLiteral | |
/ "(" __ expression:Expression __ ")" { return expression; } | |
ArrayLiteral | |
= "[" __ elision:(Elision __)? "]" { | |
return { | |
type: "ArrayExpression", | |
elements: optionalList(extractOptional(elision, 0)) | |
}; | |
} | |
/ "[" __ elements:ElementList __ "]" { | |
return { | |
type: "ArrayExpression", | |
elements: elements | |
}; | |
} | |
/ "[" __ elements:ElementList __ "," __ elision:(Elision __)? "]" { | |
return { | |
type: "ArrayExpression", | |
elements: elements.concat(optionalList(extractOptional(elision, 0))) | |
}; | |
} | |
ElementList | |
= head:( | |
elision:(Elision __)? element:ConditionalExpression { | |
return optionalList(extractOptional(elision, 0)).concat(element); | |
} | |
) | |
tail:( | |
__ "," __ elision:(Elision __)? element:ConditionalExpression { | |
return optionalList(extractOptional(elision, 0)).concat(element); | |
} | |
)* | |
{ return Array.prototype.concat.apply(head, tail); } | |
Elision | |
= "," commas:(__ ",")* { return filledArray(commas.length + 1, null); } | |
ObjectLiteral | |
= "{" __ "}" { return { type: "ObjectExpression", properties: [] }; } | |
/ "{" __ properties:PropertyNameAndValueList __ "}" { | |
return { type: "ObjectExpression", properties: properties }; | |
} | |
/ "{" __ properties:PropertyNameAndValueList __ "," __ "}" { | |
return { type: "ObjectExpression", properties: properties }; | |
} | |
PropertyNameAndValueList | |
= head:PropertyAssignment tail:(__ "," __ PropertyAssignment)* { | |
return buildList(head, tail, 3); | |
} | |
PropertyAssignment | |
= key:PropertyName __ ":" __ value:ConditionalExpression { | |
return { key: key, value: value }; | |
} | |
PropertyName | |
= IdentifierName | |
/ StringLiteral | |
/ NumericLiteral | |
MemberExpression | |
= head:( | |
PrimaryExpression | |
) | |
tail:( | |
__ "[" __ property:Expression __ "]" { | |
return { property: property, computed: true }; | |
} | |
/ __ "." __ property:IdentifierName { | |
return { property: property, computed: false }; | |
} | |
)* | |
{ | |
return buildTree(head, tail, function(result, element) { | |
return { | |
type: "MemberExpression", | |
object: result, | |
property: element.property, | |
computed: element.computed | |
}; | |
}); | |
} | |
CallExpression | |
= head:( | |
callee:MemberExpression __ args:Arguments { | |
return { type: "CallExpression", callee: callee, arguments: args }; | |
} | |
) | |
tail:( | |
__ args:Arguments { | |
return { type: "CallExpression", arguments: args }; | |
} | |
/ __ "[" __ property:Expression __ "]" { | |
return { | |
type: "MemberExpression", | |
property: property, | |
computed: true | |
}; | |
} | |
/ __ "." __ property:IdentifierName { | |
return { | |
type: "MemberExpression", | |
property: property, | |
computed: false | |
}; | |
} | |
)* | |
{ | |
return buildTree(head, tail, function(result, element) { | |
element[TYPES_TO_PROPERTY_NAMES[element.type]] = result; | |
return element; | |
}); | |
} | |
Arguments | |
= "(" __ args:(ArgumentList __)? ")" { | |
return optionalList(extractOptional(args, 0)); | |
} | |
ArgumentList | |
= head:ConditionalExpression tail:(__ "," __ ConditionalExpression)* { | |
return buildList(head, tail, 3); | |
} | |
LeftHandSideExpression | |
= CallExpression | |
/ MemberExpression | |
PostfixExpression | |
= argument:LeftHandSideExpression _ operator:PostfixOperator { | |
return { | |
type: "UpdateExpression", | |
operator: operator, | |
argument: argument, | |
prefix: false | |
}; | |
} | |
/ LeftHandSideExpression | |
PostfixOperator | |
= "++" | |
/ "--" | |
// TODO: Maybe it should have right recursion here | |
UnaryExpression | |
= PostfixExpression | |
/ operator:UnaryOperator __ argument:UnaryExpression { | |
return { | |
type: "UnaryExpression", | |
operator: operator, | |
argument: argument, | |
prefix: true | |
}; | |
} | |
UnaryOperator | |
= $("+" !"=") | |
/ $("-" !"=") | |
/ "~" | |
/ "!" | |
MultiplicativeExpression | |
= head:UnaryExpression | |
tail:(__ MultiplicativeOperator __ UnaryExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
MultiplicativeOperator | |
= $("*" !"=") | |
/ $("/" !"=") | |
/ $("%" !"=") | |
AdditiveExpression | |
= head:MultiplicativeExpression | |
tail:(__ AdditiveOperator __ MultiplicativeExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
AdditiveOperator | |
= $("+" ![+=]) | |
/ $("-" ![-=]) | |
ShiftExpression | |
= head:AdditiveExpression | |
tail:(__ ShiftOperator __ AdditiveExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
ShiftOperator | |
= $("<<" !"=") | |
/ $(">>>" !"=") | |
/ $(">>" !"=") | |
RelationalExpression | |
= head:ShiftExpression | |
tail:(__ RelationalOperator __ ShiftExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
RelationalOperator | |
= "<=" | |
/ ">=" | |
/ $("<" !"<") | |
/ $(">" !">") | |
EqualityExpression | |
= head:RelationalExpression | |
tail:(__ EqualityOperator __ RelationalExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
EqualityOperator | |
= "===" | |
/ "!==" | |
/ "==" | |
/ "!=" | |
BitwiseANDExpression | |
= head:EqualityExpression | |
tail:(__ BitwiseANDOperator __ EqualityExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
BitwiseANDOperator | |
= $("&" ![&=]) | |
BitwiseXORExpression | |
= head:BitwiseANDExpression | |
tail:(__ BitwiseXOROperator __ BitwiseANDExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
BitwiseXOROperator | |
= $("^" !"=") | |
BitwiseORExpression | |
= head:BitwiseXORExpression | |
tail:(__ BitwiseOROperator __ BitwiseXORExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
BitwiseOROperator | |
= $("|" ![|=]) | |
LogicalANDExpression | |
= head:BitwiseORExpression | |
tail:(__ LogicalANDOperator __ BitwiseORExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
LogicalANDOperator | |
= "&&" | |
LogicalORExpression | |
= head:LogicalANDExpression | |
tail:(__ LogicalOROperator __ LogicalANDExpression)* | |
{ return buildBinaryExpression(head, tail); } | |
LogicalOROperator | |
= "||" | |
ConditionalExpression | |
= test:LogicalORExpression __ | |
"?" __ consequent:ConditionalExpression __ | |
":" __ alternate:ConditionalExpression | |
{ | |
return { | |
type: "ConditionalExpression", | |
test: test, | |
consequent: consequent, | |
alternate: alternate | |
}; | |
} | |
/ LogicalORExpression | |
Expression | |
= head:ConditionalExpression tail:(__ "," __ ConditionalExpression)* { | |
return tail.length > 0 | |
? { type: "SequenceExpression", expressions: buildList(head, tail, 3) } | |
: head; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment