Skip to content

Instantly share code, notes, and snippets.

@cray0000
Last active April 29, 2016 13:33
Show Gist options
  • Save cray0000/a66fc8469f7d497ce696f06926ed2869 to your computer and use it in GitHub Desktop.
Save cray0000/a66fc8469f7d497ce696f06926ed2869 to your computer and use it in GitHub Desktop.
/*
* JavaScript Grammar
* ==================
*
* Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a
* syntax tree compatible with Mozilla SpiderMonkey Parser API [2]. Properties
* and node types reflecting features not present in ECMA-262 are not included.
*
* Limitations:
*
* * Non-BMP characters are completely ignored to avoid surrogate pair
* handling.
*
* * One can create identifiers containing illegal characters using Unicode
* escape sequences. For example, "abcd\u0020efgh" is not a valid
* identifier, but it is accepted by the parser.
*
* * Strict mode is not recognized. This means that within strict mode code,
* "implements", "interface", "let", "package", "private", "protected",
* "public", "static" and "yield" can be used as names. Many other
* restrictions and exceptions from Annex C are also not applied.
*
* All the limitations could be resolved, but the costs would likely outweigh
* the benefits.
*
* Many thanks to inimino [3] for his grammar [4] which helped me to solve some
* problems (such as automatic semicolon insertion) and also served to double
* check that I converted the original grammar correctly.
*
* [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm
* [2] https://developer.mozilla.org/en-US/docs/SpiderMonkey/Parser_API
* [3] http://inimino.org/~inimino/blog/
* [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg
*/
{
var TYPES_TO_PROPERTY_NAMES = {
CallExpression: "callee",
MemberExpression: "object",
};
function filledArray(count, value) {
var result = new Array(count), i;
for (i = 0; i < count; i++) {
result[i] = value;
}
return result;
}
function extractOptional(optional, index) {
return optional ? optional[index] : null;
}
function extractList(list, index) {
var result = new Array(list.length), i;
for (i = 0; i < list.length; i++) {
result[i] = list[i][index];
}
return result;
}
function buildList(head, tail, index) {
return [head].concat(extractList(tail, index));
}
function buildTree(head, tail, builder) {
var result = head, i;
for (i = 0; i < tail.length; i++) {
result = builder(result, tail[i]);
}
return result;
}
function buildBinaryExpression(head, tail) {
return buildTree(head, tail, function(result, element) {
return {
type: "BinaryExpression",
operator: element[1],
left: result,
right: element[3]
};
});
}
function buildLogicalExpression(head, tail) {
return buildTree(head, tail, function(result, element) {
return {
type: "LogicalExpression",
operator: element[1],
left: result,
right: element[3]
};
});
}
function optionalList(value) {
return value !== null ? value : [];
}
}
Start
= __ expression:ConditionalExpression __ { return expression; }
/* ----- A.1 Lexical Grammar ----- */
SourceCharacter
= .
WhiteSpace "whitespace"
= "\t"
/ "\v"
/ "\f"
/ " "
/ "\u00A0"
/ "\uFEFF"
/ Zs
LineTerminator
= [\n\r\u2028\u2029]
LineTerminatorSequence "end of line"
= "\n"
/ "\r\n"
/ "\r"
/ "\u2028"
/ "\u2029"
Comment "comment"
= SingleLineComment
SingleLineComment
= "//" (!LineTerminator SourceCharacter)*
Identifier
= !ReservedWord name:IdentifierName { return name; }
IdentifierName "identifier"
= head:IdentifierStart tail:IdentifierPart* {
return {
type: "Identifier",
name: head + tail.join("")
};
}
IdentifierStart
= [a-zA-Z]
/ "$"
/ "_"
IdentifierPart
= IdentifierStart
/ DecimalDigit
ReservedWord
= NullLiteral
/ BooleanLiteral
Literal
= NullLiteral
/ BooleanLiteral
/ NumericLiteral
/ StringLiteral
/ RegularExpressionLiteral
NullLiteral
= NullToken { return { type: "Literal", value: null }; }
BooleanLiteral
= TrueToken { return { type: "Literal", value: true }; }
/ FalseToken { return { type: "Literal", value: false }; }
/*
* The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official
* grammar, it comes from text in section 7.8.3.
*/
NumericLiteral "number"
= literal:DecimalLiteral !(IdentifierStart / DecimalDigit) {
return literal;
}
DecimalLiteral
= DecimalIntegerLiteral "." DecimalDigit* ExponentPart? {
return { type: "Literal", value: parseFloat(text()) };
}
/ "." DecimalDigit+ ExponentPart? {
return { type: "Literal", value: parseFloat(text()) };
}
/ DecimalIntegerLiteral ExponentPart? {
return { type: "Literal", value: parseFloat(text()) };
}
DecimalIntegerLiteral
= "0"
/ NonZeroDigit DecimalDigit*
DecimalDigit
= [0-9]
NonZeroDigit
= [1-9]
ExponentPart
= ExponentIndicator SignedInteger
ExponentIndicator
= "e"i
SignedInteger
= [+-]? DecimalDigit+
HexIntegerLiteral
= "0x"i digits:$HexDigit+ {
return { type: "Literal", value: parseInt(digits, 16) };
}
HexDigit
= [0-9a-f]i
StringLiteral "string"
= '"' chars:DoubleStringCharacter* '"' {
return { type: "Literal", value: chars.join("") };
}
/ "'" chars:SingleStringCharacter* "'" {
return { type: "Literal", value: chars.join("") };
}
DoubleStringCharacter
= !('"' / "\\" / LineTerminator) SourceCharacter { return text(); }
/ "\\" sequence:EscapeSequence { return sequence; }
/ LineContinuation
SingleStringCharacter
= !("'" / "\\" / LineTerminator) SourceCharacter { return text(); }
/ "\\" sequence:EscapeSequence { return sequence; }
/ LineContinuation
LineContinuation
= "\\" LineTerminatorSequence { return ""; }
EscapeSequence
= CharacterEscapeSequence
/ "0" !DecimalDigit { return "\0"; }
CharacterEscapeSequence
= SingleEscapeCharacter
/ NonEscapeCharacter
SingleEscapeCharacter
= "'"
/ '"'
/ "\\"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "v" { return "\x0B"; } // IE does not recognize "\v".
NonEscapeCharacter
= !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); }
EscapeCharacter
= SingleEscapeCharacter
/ DecimalDigit
/ "u"
RegularExpressionLiteral "regular expression"
= "/" pattern:$RegularExpressionBody "/" flags:$RegularExpressionFlags {
var value;
try {
value = new RegExp(pattern, flags);
} catch (e) {
error(e.message);
}
return { type: "Literal", value: value };
}
RegularExpressionBody
= RegularExpressionFirstChar RegularExpressionChar*
RegularExpressionFirstChar
= ![*\\/[] RegularExpressionNonTerminator
/ RegularExpressionBackslashSequence
/ RegularExpressionClass
RegularExpressionChar
= ![\\/[] RegularExpressionNonTerminator
/ RegularExpressionBackslashSequence
/ RegularExpressionClass
RegularExpressionBackslashSequence
= "\\" RegularExpressionNonTerminator
RegularExpressionNonTerminator
= !LineTerminator SourceCharacter
RegularExpressionClass
= "[" RegularExpressionClassChar* "]"
RegularExpressionClassChar
= ![\]\\] RegularExpressionNonTerminator
/ RegularExpressionBackslashSequence
RegularExpressionFlags
= IdentifierPart*
// [Unicode] Separator, Space
Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]
/* Tokens */
FalseToken = "false" !IdentifierPart
NullToken = "null" !IdentifierPart
TrueToken = "true" !IdentifierPart
/* Skipped */
__
= (WhiteSpace / LineTerminatorSequence / Comment)*
_
= (WhiteSpace)*
/* Automatic Semicolon Insertion */
EOS
= _ SingleLineComment? LineTerminatorSequence
/ _ &"}"
/ __ EOF
EOF
= !.
/* ----- A.2 Number Conversions ----- */
/* Irrelevant. */
/* ----- A.3 Expressions ----- */
PrimaryExpression
= Identifier
/ Literal
/ ArrayLiteral
/ ObjectLiteral
/ "(" __ expression:Expression __ ")" { return expression; }
ArrayLiteral
= "[" __ elision:(Elision __)? "]" {
return {
type: "ArrayExpression",
elements: optionalList(extractOptional(elision, 0))
};
}
/ "[" __ elements:ElementList __ "]" {
return {
type: "ArrayExpression",
elements: elements
};
}
/ "[" __ elements:ElementList __ "," __ elision:(Elision __)? "]" {
return {
type: "ArrayExpression",
elements: elements.concat(optionalList(extractOptional(elision, 0)))
};
}
ElementList
= head:(
elision:(Elision __)? element:ConditionalExpression {
return optionalList(extractOptional(elision, 0)).concat(element);
}
)
tail:(
__ "," __ elision:(Elision __)? element:ConditionalExpression {
return optionalList(extractOptional(elision, 0)).concat(element);
}
)*
{ return Array.prototype.concat.apply(head, tail); }
Elision
= "," commas:(__ ",")* { return filledArray(commas.length + 1, null); }
ObjectLiteral
= "{" __ "}" { return { type: "ObjectExpression", properties: [] }; }
/ "{" __ properties:PropertyNameAndValueList __ "}" {
return { type: "ObjectExpression", properties: properties };
}
/ "{" __ properties:PropertyNameAndValueList __ "," __ "}" {
return { type: "ObjectExpression", properties: properties };
}
PropertyNameAndValueList
= head:PropertyAssignment tail:(__ "," __ PropertyAssignment)* {
return buildList(head, tail, 3);
}
PropertyAssignment
= key:PropertyName __ ":" __ value:ConditionalExpression {
return { key: key, value: value };
}
PropertyName
= IdentifierName
/ StringLiteral
/ NumericLiteral
MemberExpression
= head:(
PrimaryExpression
)
tail:(
__ "[" __ property:Expression __ "]" {
return { property: property, computed: true };
}
/ __ "." __ property:IdentifierName {
return { property: property, computed: false };
}
)*
{
return buildTree(head, tail, function(result, element) {
return {
type: "MemberExpression",
object: result,
property: element.property,
computed: element.computed
};
});
}
CallExpression
= head:(
callee:MemberExpression __ args:Arguments {
return { type: "CallExpression", callee: callee, arguments: args };
}
)
tail:(
__ args:Arguments {
return { type: "CallExpression", arguments: args };
}
/ __ "[" __ property:Expression __ "]" {
return {
type: "MemberExpression",
property: property,
computed: true
};
}
/ __ "." __ property:IdentifierName {
return {
type: "MemberExpression",
property: property,
computed: false
};
}
)*
{
return buildTree(head, tail, function(result, element) {
element[TYPES_TO_PROPERTY_NAMES[element.type]] = result;
return element;
});
}
Arguments
= "(" __ args:(ArgumentList __)? ")" {
return optionalList(extractOptional(args, 0));
}
ArgumentList
= head:ConditionalExpression tail:(__ "," __ ConditionalExpression)* {
return buildList(head, tail, 3);
}
LeftHandSideExpression
= CallExpression
/ MemberExpression
PostfixExpression
= argument:LeftHandSideExpression _ operator:PostfixOperator {
return {
type: "UpdateExpression",
operator: operator,
argument: argument,
prefix: false
};
}
/ LeftHandSideExpression
PostfixOperator
= "++"
/ "--"
// TODO: Maybe it should have right recursion here
UnaryExpression
= PostfixExpression
/ operator:UnaryOperator __ argument:UnaryExpression {
return {
type: "UnaryExpression",
operator: operator,
argument: argument,
prefix: true
};
}
UnaryOperator
= $("+" !"=")
/ $("-" !"=")
/ "~"
/ "!"
MultiplicativeExpression
= head:UnaryExpression
tail:(__ MultiplicativeOperator __ UnaryExpression)*
{ return buildBinaryExpression(head, tail); }
MultiplicativeOperator
= $("*" !"=")
/ $("/" !"=")
/ $("%" !"=")
AdditiveExpression
= head:MultiplicativeExpression
tail:(__ AdditiveOperator __ MultiplicativeExpression)*
{ return buildBinaryExpression(head, tail); }
AdditiveOperator
= $("+" ![+=])
/ $("-" ![-=])
ShiftExpression
= head:AdditiveExpression
tail:(__ ShiftOperator __ AdditiveExpression)*
{ return buildBinaryExpression(head, tail); }
ShiftOperator
= $("<<" !"=")
/ $(">>>" !"=")
/ $(">>" !"=")
RelationalExpression
= head:ShiftExpression
tail:(__ RelationalOperator __ ShiftExpression)*
{ return buildBinaryExpression(head, tail); }
RelationalOperator
= "<="
/ ">="
/ $("<" !"<")
/ $(">" !">")
EqualityExpression
= head:RelationalExpression
tail:(__ EqualityOperator __ RelationalExpression)*
{ return buildBinaryExpression(head, tail); }
EqualityOperator
= "==="
/ "!=="
/ "=="
/ "!="
BitwiseANDExpression
= head:EqualityExpression
tail:(__ BitwiseANDOperator __ EqualityExpression)*
{ return buildBinaryExpression(head, tail); }
BitwiseANDOperator
= $("&" ![&=])
BitwiseXORExpression
= head:BitwiseANDExpression
tail:(__ BitwiseXOROperator __ BitwiseANDExpression)*
{ return buildBinaryExpression(head, tail); }
BitwiseXOROperator
= $("^" !"=")
BitwiseORExpression
= head:BitwiseXORExpression
tail:(__ BitwiseOROperator __ BitwiseXORExpression)*
{ return buildBinaryExpression(head, tail); }
BitwiseOROperator
= $("|" ![|=])
LogicalANDExpression
= head:BitwiseORExpression
tail:(__ LogicalANDOperator __ BitwiseORExpression)*
{ return buildBinaryExpression(head, tail); }
LogicalANDOperator
= "&&"
LogicalORExpression
= head:LogicalANDExpression
tail:(__ LogicalOROperator __ LogicalANDExpression)*
{ return buildBinaryExpression(head, tail); }
LogicalOROperator
= "||"
ConditionalExpression
= test:LogicalORExpression __
"?" __ consequent:ConditionalExpression __
":" __ alternate:ConditionalExpression
{
return {
type: "ConditionalExpression",
test: test,
consequent: consequent,
alternate: alternate
};
}
/ LogicalORExpression
Expression
= head:ConditionalExpression tail:(__ "," __ ConditionalExpression)* {
return tail.length > 0
? { type: "SequenceExpression", expressions: buildList(head, tail, 3) }
: head;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment