Skip to content

Instantly share code, notes, and snippets.

@nberlette
Last active May 13, 2025 03:37
Show Gist options
  • Save nberlette/78639d19a6079f3c13c5722231dab4f1 to your computer and use it in GitHub Desktop.
Save nberlette/78639d19a6079f3c13c5722231dab4f1 to your computer and use it in GitHub Desktop.
TypeScript port of the dabeaz/ply tool (python)
// deno-lint-ignore-file no-explicit-any
/**
* Logger interface for parser/lexer.
*/
export interface Logger {
debug(message: string, ...args: unknown[]): void;
info(message: string, ...args: unknown[]): void;
warning(message: string, ...args: unknown[]): void;
error(message: string, ...args: unknown[]): void;
critical(message: string, ...args: unknown[]): void;
}
/**
* Console-based logger implementation.
*/
export class ConsoleLogger implements Logger {
debug(message: string, ...args: unknown[]): void {
console.debug(message, ...args);
}
info(message: string, ...args: unknown[]): void {
console.info(message, ...args);
}
warning(message: string, ...args: unknown[]): void {
console.warn("WARNING:", message, ...args);
}
error(message: string, ...args: unknown[]): void {
console.error("ERROR:", message, ...args);
}
critical(message: string, ...args: unknown[]): void {
console.error(message, ...args);
}
}
/**
* No-op logger.
*/
export class NullLogger implements Logger {
debug(): void {}
info(): void {}
warning(): void {}
error(): void {}
critical(): void {}
}
// -----------------------------------------------------------------------------
// Metadata infrastructure
// -----------------------------------------------------------------------------
/**
* Unique symbols for metadata keys.
*/
const productionsKey = Symbol("productions");
const tokensKey = Symbol("tokens");
const precedenceKey = Symbol("precedence");
const startKey = Symbol("start");
const errorHandlerKey = Symbol("errorHandler");
const lexTokenKey = Symbol("lexToken");
const lexIgnoreKey = Symbol("lexIgnore");
const lexLiteralKey = Symbol("lexLiteral");
const lexStateKey = Symbol("lexState");
export type Property = string | symbol;
export type PropertyLike = Property | undefined;
/**
* Typed record for all metadata entries.
*/
interface MetadataRecord {
[productionsKey]?: Array<{ method: PropertyLike; bnf: string }>;
[tokensKey]?: string[];
[precedenceKey]?: Array<
{ assoc: "left" | "right" | "nonassoc"; terms: string[] }
>;
[startKey]?: PropertyLike;
[errorHandlerKey]?: PropertyLike;
[lexTokenKey]?: Array<
{ method: PropertyLike; pattern: RegExp | string; states: string[] }
>;
[lexIgnoreKey]?: Array<{ chars: PropertyLike; states: string[] }>;
[lexLiteralKey]?: PropertyLike;
[lexStateKey]?: Array<{ name: string; type: "inclusive" | "exclusive" }>;
}
// Fallback storage when context.metadata is unavailable
const METADATA = new WeakMap<WeakKey, MetadataRecord>();
/**
* Retrieve or initialize the metadata record for a decorator context.
* Uses context.metadata (plain object) if provided; otherwise falls back to a WeakMap keyed by constructor.prototype.
*/
function getMetaRecord(
target: object,
context?: { metadata?: object },
): MetadataRecord {
let record = METADATA.get(context?.metadata ?? target);
if (!record) {
record = Object.create(null) as MetadataRecord;
METADATA.set(target, record);
}
return record;
}
/**
* Read metadata: prefer Stage 3 metadata; otherwise fallback.
*/
function readMeta<T>(
target: object,
key: symbol,
): T | undefined {
const ctor = typeof target === "function" ? target : target.constructor;
// const proto = ctor.prototype;
const metaKey =
(Symbol.metadata && Symbol.metadata in ctor && ctor[Symbol.metadata]) ||
ctor;
return METADATA.get(metaKey)?.[key as keyof MetadataRecord] as T | undefined;
}
// -----------------------------------------------------------------------------
// Grammar decorators
// -----------------------------------------------------------------------------
/**
* Register a grammar production on a class method.
*/
export function Production(
bnf: string,
): <T, V extends (...args: unknown[]) => unknown>(
method: V,
context: ClassMethodDecoratorContext<T, V>,
) => void {
return (method, context) => {
if (context.kind !== "method") {
throw new TypeError("@Production may only decorate class methods");
}
const meta = getMetaRecord(method, context);
const list = meta[productionsKey] ??= [];
list.push({ method: context.name, bnf });
};
}
/**
* Declare the token list for the parser.
*/
export function Tokens(
tokenList: string[],
): <C extends abstract new (...args: unknown[]) => unknown>(
ctor: C,
context: ClassDecoratorContext<C>,
) => C | void {
return (ctor, context) => {
const meta = getMetaRecord(ctor, context);
meta[tokensKey] = [...tokenList];
};
}
/**
* Set operator precedence rules for the grammar.
*/
export function Precedence(
assoc: "left" | "right" | "nonassoc",
...terms: string[]
): <C extends abstract new (...args: any) => any>(
ctor: C,
context: ClassDecoratorContext<C>,
) => void {
return (ctor, context) => {
const meta = getMetaRecord(ctor, context);
const arr = meta[precedenceKey] ??= [];
arr.push({ assoc, terms });
};
}
/**
* Mark the start symbol for the grammar.
*/
export function Start(
symbol: string,
): <C extends abstract new (...args: unknown[]) => unknown>(
ctor: C,
context: ClassDecoratorContext<C>,
) => void {
return (ctor, context) => {
const meta = getMetaRecord(ctor, context);
meta[startKey] = symbol;
};
}
/**
* Designate an error handler method.
*/
export function Error(): <T, V extends (tok: unknown) => unknown>(
method: V,
context: ClassMethodDecoratorContext<T, V>,
) => void {
return (method, context) => {
if (context.kind !== "method") {
throw new TypeError("@Error may only decorate class methods");
}
const meta = getMetaRecord(method, context);
meta[errorHandlerKey] = context.name;
};
}
// -----------------------------------------------------------------------------
// Lexer decorators
// -----------------------------------------------------------------------------
/**
* Register a lexer rule on a class method.
*/
export function LexToken(
pattern: RegExp | string,
states: string[] = ["INITIAL"],
): <T, V extends (tok: unknown) => unknown>(
method: V,
context: ClassMethodDecoratorContext<T, V>,
) => void {
return (method, context) => {
if (context.kind !== "method") {
throw new TypeError("@LexToken may only decorate class methods");
}
const meta = getMetaRecord(method, context);
const arr = meta[lexTokenKey] ??= [];
states = [...states];
arr.push({ method: context.name, pattern, states });
};
}
/**
* Ignore specified characters in given lexer states.
*/
export function Ignore(
chars: string,
states: string[] = ["INITIAL"],
): <C extends abstract new (...args: unknown[]) => unknown>(
ctor: C,
context: ClassDecoratorContext<C>,
) => void {
return (ctor, context) => {
const meta = getMetaRecord(ctor, context);
const arr = meta[lexIgnoreKey] ??= [];
states = [...states];
arr.push({ chars, states });
};
}
/**
* Pass through literal characters in the lexer.
*/
export function Literal(
chars: string,
): <C extends abstract new (...args: unknown[]) => unknown>(
ctor: C,
context: ClassDecoratorContext<C>,
) => C | void {
return (ctor, context) => {
const meta = getMetaRecord(ctor, context);
meta[lexLiteralKey] = chars;
return ctor;
};
}
/**
* Define a lexer state (inclusive or exclusive).
*/
export function State(
name: string,
type: "inclusive" | "exclusive" = "inclusive",
): <C extends abstract new (...args: unknown[]) => unknown>(
ctor: C,
context: ClassDecoratorContext<C>,
) => void {
return (ctor, context) => {
const meta = getMetaRecord(ctor, context);
const arr = meta[lexStateKey] ??= [];
arr.push({ name, type });
meta[lexStateKey] = arr;
};
}
// -----------------------------------------------------------------------------
// Metadata getters
// -----------------------------------------------------------------------------
export function getProductions(
target: object,
): { method: string; bnf: string }[] {
return readMeta(target, productionsKey) ?? [];
}
export function getTokens(target: object): string[] {
return readMeta(target, tokensKey) ?? [];
}
export function getPrecedence(
target: object,
): { assoc: "left" | "right" | "nonassoc"; terms: string[] }[] {
return readMeta(target, precedenceKey) ?? [];
}
export function getStartSymbol(target: object): string | undefined {
return readMeta(target, startKey);
}
export function getErrorHandler(target: object): string | undefined {
return readMeta(target, errorHandlerKey);
}
export function getLexTokens(
target: object,
): { method: string; pattern: RegExp | string; states: string[] }[] {
return readMeta(target, lexTokenKey) ?? [];
}
export function getLexIgnores(
target: object,
): { chars: string; states: string[] }[] {
return readMeta(target, lexIgnoreKey) ?? [];
}
export function getLiterals(target: object): string {
return readMeta(target, lexLiteralKey) ?? "";
}
export function getStates(
target: object,
): { name: string; type: "inclusive" | "exclusive" }[] {
return readMeta(target, lexStateKey) ??
[{ name: "INITIAL", type: "inclusive" }];
}
// lex.ts
// import {
// Logger,
// ConsoleLogger,
// LexToken,
// Ignore,
// Literal,
// State,
// getLexTokens,
// getLexIgnores,
// getLiterals,
// getStates,
// } from "./decorators.ts";
// ----------------------------------------------------------------------------
// LexError: thrown when no error rule handles illegal characters
// ----------------------------------------------------------------------------
export class LexError extends globalThis.Error {
public readonly text: string;
constructor(message: string, text: string) {
super(message);
this.text = text;
Object.setPrototypeOf(this, LexError.prototype);
}
}
// ----------------------------------------------------------------------------
// LexToken interface
// ----------------------------------------------------------------------------
export interface LexToken {
type: string;
value: string;
lineno: number;
lexpos: number;
lexer?: Lexer;
}
// ----------------------------------------------------------------------------
// Lexer runtime
// ----------------------------------------------------------------------------
export class Lexer {
protected lexre: Array<
[
RegExp,
Array<[((tok: LexToken) => LexToken | null) | null, string | null]>,
]
> = [];
protected lexstateignore: Record<string, string> = {};
protected lexstateerrorf: Record<string, (tok: LexToken) => LexToken | null> =
{};
protected lexstateeoff: Record<string, (tok: LexToken) => LexToken> = {};
protected lexliterals: string = "";
protected lexdata = "";
protected lexerrorf: ((tok: LexToken) => LexToken | null) | null = null;
protected lexeoff: ((tok: LexToken) => LexToken) | null = null;
protected lextokens!: Set<string>;
public lexpos = 0;
public lexlen = 0;
public lineno = 1;
constructor(protected readonly logger: Logger = new ConsoleLogger()) {}
/** Clone the lexer, rebinding any method-based error/EoF handlers. */
clone(contextObject?: object): Lexer {
const cloned = new Lexer(this.logger);
cloned.lexre = this.lexre;
cloned.lexstateignore = this.lexstateignore;
cloned.lexliterals = this.lexliterals;
cloned.lexdata = this.lexdata;
cloned.lexerrorf = this.lexerrorf;
cloned.lexeoff = this.lexeoff;
cloned.lextokens = this.lextokens;
cloned.lexpos = this.lexpos;
cloned.lexlen = this.lexlen;
cloned.lineno = this.lineno;
if (contextObject) {
for (const [state, handler] of Object.entries(this.lexstateerrorf)) {
cloned.lexstateerrorf[state] = (contextObject as any)[handler.name]
.bind(contextObject);
}
for (const [state, handler] of Object.entries(this.lexstateeoff)) {
cloned.lexstateeoff[state] = (contextObject as any)[handler.name].bind(
contextObject,
);
}
}
return cloned;
}
/** Provide input string to lexer. */
input(input: string): this {
this.lexdata = input;
this.lexpos = 0;
this.lexlen = input.length;
return this;
}
/** Switch lexing state. */
begin(state: string): this {
if (!(state in this.lexstateignore)) {
throw new TypeError(`Undefined state '${state}'`);
}
this.lexerrorf = this.lexstateerrorf[state] ?? null;
this.lexeoff = this.lexstateeoff[state] ?? null;
return this;
}
/** Skip n characters. */
skip(n: number): this {
this.lexpos += n;
return this;
}
/** Return next token or null at EOF. */
token(): LexToken | null {
const ignoreChars = this.lexstateignore["INITIAL"] ?? "";
while (this.lexpos < this.lexlen) {
const currentChar = this.lexdata[this.lexpos];
if (ignoreChars.includes(currentChar)) {
this.lexpos++;
continue;
}
for (const [regex, map] of this.lexre) {
regex.lastIndex = this.lexpos;
const match = regex.exec(this.lexdata);
if (!match) continue;
const text = match[0];
const tokenBase: LexToken = {
type: "",
value: text,
lineno: this.lineno,
lexpos: this.lexpos,
lexer: this,
};
this.lexpos += text.length;
const groupIndex = match.findIndex((grp, i) => i > 0 && grp != null);
if (groupIndex >= 0) {
const [handler, tokenType] = map[groupIndex] as [
((tok: LexToken) => LexToken | null) | null,
string | null,
];
if (handler) {
const result = handler(tokenBase);
if (result) return result;
break; // handler consumed token, continue scanning
}
if (tokenType) {
return { ...tokenBase, type: tokenType };
}
}
}
if (this.lexliterals.includes(currentChar)) {
const litToken: LexToken = {
type: currentChar,
value: currentChar,
lineno: this.lineno,
lexpos: this.lexpos,
lexer: this,
};
this.lexpos++;
return litToken;
}
if (this.lexerrorf) {
const errToken: LexToken = {
type: "error",
value: this.lexdata.slice(this.lexpos),
lineno: this.lineno,
lexpos: this.lexpos,
lexer: this,
};
const recovered = this.lexerrorf(errToken);
if (!recovered || this.lexpos === errToken.lexpos) {
throw new LexError(`Illegal character '${currentChar}'`, currentChar);
}
return recovered;
}
throw new LexError(`Illegal character '${currentChar}'`, currentChar);
}
if (this.lexeoff) {
const eofToken: LexToken = {
type: "eof",
value: "",
lineno: this.lineno,
lexpos: this.lexpos,
lexer: this,
};
return this.lexeoff(eofToken);
}
return null;
}
*[Symbol.iterator](): IterableIterator<LexToken> {
const token = this.token();
if (token) return yield token;
}
}
// deno-lint-ignore ban-types
type unknowns = {} | null | undefined;
export interface UserLexerModule {
[name: string]: ((tok: LexToken) => LexToken | null) | unknowns;
tokens: string[];
literals: string;
t_error?(tok: LexToken): LexToken | null;
t_eof?(tok: LexToken): LexToken;
}
// ----------------------------------------------------------------------------
// LexerReflect: builds a Lexer instance from decorators metadata
// ----------------------------------------------------------------------------
export class LexerReflect {
constructor(
private readonly mod: UserLexerModule,
private readonly logger: Logger = new ConsoleLogger(),
) {}
build(): Lexer {
const lexer = new Lexer(this.logger);
// Setup ignore and literals
for (const { chars, states } of getLexIgnores(this.mod)) {
for (const state of states) {
lexer["lexstateignore"][state] = chars;
}
}
lexer["lexliterals"] = getLiterals(this.mod);
// Setup error and eof handlers
const errorFn = this.mod.t_error;
const eofFn = this.mod.t_eof;
for (const { name } of getStates(this.mod)) {
if (errorFn) lexer["lexstateerrorf"][name] = errorFn.bind(this.mod);
if (eofFn) lexer["lexstateeoff"][name] = eofFn.bind(this.mod);
}
// Collect patterns
const patternEntries = getLexTokens(this.mod);
const groupMap: Array<
[((tok: LexToken) => LexToken | null) | null, string | null]
> = [];
const regexSource = patternEntries.map(({ method, pattern }, idx) => {
const src = typeof pattern === "string" ? pattern : pattern.source;
groupMap[idx + 1] = [
(this.mod[method] as (tok: LexToken) => LexToken | null)?.bind(
this.mod,
) ?? null,
method,
];
return `(?<G${idx}>${src})`;
});
const masterRegex = new RegExp(regexSource.join("|"), "gy");
lexer["lexre"] = [[masterRegex, groupMap]];
return lexer;
}
}
// ----------------------------------------------------------------------------
// Entry point similar to Python lex()
// ----------------------------------------------------------------------------
export function lex(mod: UserLexerModule, logger?: Logger): Lexer {
const reflect = new LexerReflect(mod, logger);
return reflect.build();
}
// // yacc.ts
// import {
// Logger,
// ConsoleLogger,
// } from "./decorators.ts";
// import {
// getProductions,
// getTokens,
// getPrecedence,
// getStartSymbol,
// getErrorHandler,
// } from "./decorators.ts";
// ----------------------------------------------------------------------------
// YaccError
// ----------------------------------------------------------------------------
/**
* Exception thrown for parser errors.
*/
export class YaccError extends globalThis.Error {
constructor(message: string) {
super(message);
Object.setPrototypeOf(this, YaccError.prototype);
}
}
// ----------------------------------------------------------------------------
// YaccSymbol
// ----------------------------------------------------------------------------
/**
* Symbol held on parser stack.
*/
export class YaccSymbol {
type!: string;
value: unknown;
lineno?: number;
lexpos?: number;
endlineno?: number;
endlexpos?: number;
toString(): string {
return this.type;
}
}
// ----------------------------------------------------------------------------
// YaccProduction
// ----------------------------------------------------------------------------
/**
* Wrapper passed to semantic action functions.
*/
export class YaccProduction {
slice!: YaccSymbol[];
stack!: YaccSymbol[];
parser!: LRParser;
lexer!: any;
/** Get the value of position i in production. */
get(i: number): any {
return this.slice[i].value;
}
/** Set the value of position i. */
set(i: number, v: any): void {
this.slice[i].value = v;
}
/** Number of symbols on RHS. */
length(): number {
return this.slice.length;
}
/** Signal a syntax error in action. */
error(): never {
throw new SyntaxError();
}
}
// ----------------------------------------------------------------------------
// ProductionRule
// ----------------------------------------------------------------------------
interface ProductionRule {
lhs: string;
rhs: string[];
fn?: (p: YaccProduction) => void;
}
// ----------------------------------------------------------------------------
// Grammar (SLR)
// ----------------------------------------------------------------------------
/**
* Grammar with FIRST/FOLLOW and productions.
*/
export class Grammar {
productions: ProductionRule[] = [];
terminals = new Set<string>();
nonterminals = new Set<string>();
prec: Array<{ term: string; assoc: string; level: number }> = [];
startSymbol = "";
first = new Map<string, Set<string>>();
follow = new Map<string, Set<string>>();
/** Add a production and optional action. */
addProduction(
lhs: string,
rhs: string[],
fn?: (p: YaccProduction) => void,
): void {
this.nonterminals.add(lhs);
for (const sym of rhs) {
if (!/^[A-Z_][A-Z0-9_]*$/.test(sym)) {
this.terminals.add(sym);
}
}
this.productions.push({ lhs, rhs, fn });
}
/** Declare token names. */
setTokens(tokens: string[]): void {
for (const t of tokens) this.terminals.add(t);
}
/** Declare precedence rules. */
setPrecedence(
arr: Array<{ assoc: "left" | "right" | "nonassoc"; terms: string[] }>,
): void {
let level = 1;
for (const entry of arr) {
for (const term of entry.terms) {
this.prec.push({ term, assoc: entry.assoc, level });
}
level++;
}
}
/** Set start symbol and add augmented production. */
setStart(start?: string): void {
this.startSymbol = start || this.productions[0].lhs;
// augmented production at index 0
this.productions.unshift({ lhs: "S'", rhs: [this.startSymbol] });
this.nonterminals.add("S'");
}
/** Compute FIRST sets for terminals and nonterminals. */
computeFirst(): void {
// init
for (const t of this.terminals) this.first.set(t, new Set([t]));
for (const A of this.nonterminals) this.first.set(A, new Set());
let changed: boolean;
do {
changed = false;
for (const prod of this.productions) {
const A = prod.lhs;
const firstA = this.first.get(A)!;
let nullable = true;
for (const X of prod.rhs) {
const firstX = this.first.get(X)!;
for (const a of firstX) {
if (a !== "<empty>" && !firstA.has(a)) {
firstA.add(a);
changed = true;
}
}
if (!firstX.has("<empty>")) {
nullable = false;
break;
}
}
if (nullable && !firstA.has("<empty>")) {
firstA.add("<empty>");
changed = true;
}
}
} while (changed);
}
/** Compute FOLLOW sets for nonterminals. */
computeFollow(): void {
for (const A of this.nonterminals) this.follow.set(A, new Set());
this.follow.get(this.startSymbol)!.add("$end");
let changed: boolean;
do {
changed = false;
for (const prod of this.productions) {
const A = prod.lhs;
for (let i = 0; i < prod.rhs.length; i++) {
const B = prod.rhs[i];
if (!this.nonterminals.has(B)) continue;
const followB = this.follow.get(B)!;
const beta = prod.rhs.slice(i + 1);
let firstBeta = new Set<string>();
if (beta.length === 0) {
firstBeta = new Set(["<empty>"]);
} else {
firstBeta = this.first.get(beta[0])!;
}
for (const b of firstBeta) {
if (b !== "<empty>" && !followB.has(b)) {
followB.add(b);
changed = true;
}
}
if (
beta.length === 0 ||
firstBeta.has("<empty>")
) {
for (const b of this.follow.get(A)!) {
if (!followB.has(b)) {
followB.add(b);
changed = true;
}
}
}
}
}
} while (changed);
}
}
// ----------------------------------------------------------------------------
// LRTable (SLR)
// ----------------------------------------------------------------------------
interface Item {
prod: number;
dot: number;
}
/**
* LR parse table (SLR(1)).
*/
export class LRTable {
public action: Record<number, Record<string, number>> = {};
public goto: Record<number, Record<string, number>> = {};
public productions: ProductionRule[];
private C: Item[][] = [];
constructor(
private grammar: Grammar,
private logger: Logger = new ConsoleLogger(),
) {
this.productions = grammar.productions;
grammar.computeFirst();
grammar.computeFollow();
this.C = this.lr0Items();
this.buildTable();
}
private lr0Closure(I: Item[]): Item[] {
const J = [...I];
let changed: boolean;
do {
changed = false;
for (const itm of J) {
const prod = this.productions[itm.prod];
if (itm.dot < prod.rhs.length) {
const B = prod.rhs[itm.dot];
if (this.grammar.nonterminals.has(B)) {
for (let p = 0; p < this.productions.length; p++) {
if (this.productions[p].lhs === B) {
const newItm = { prod: p, dot: 0 };
if (!J.some(x => x.prod === p && x.dot === 0)) {
J.push(newItm);
changed = true;
}
}
}
}
}
}
} while (changed);
return J;
}
private lr0Goto(I: Item[], X: string): Item[] {
const J: Item[] = [];
for (const itm of I) {
const prod = this.productions[itm.prod];
if (itm.dot < prod.rhs.length && prod.rhs[itm.dot] === X) {
J.push({ prod: itm.prod, dot: itm.dot + 1 });
}
}
return this.lr0Closure(J);
}
private lr0Items(): Item[][] {
const C: Item[][] = [];
const startSet = this.lr0Closure([{ prod: 0, dot: 0 }]);
C.push(startSet);
const symbols = [...this.grammar.terminals, ...this.grammar.nonterminals];
for (let i = 0; i < C.length; i++) {
for (const X of symbols) {
const gotoIX = this.lr0Goto(C[i], X);
if (gotoIX.length === 0) continue;
if (!C.some(s => this.sameItems(s, gotoIX))) {
C.push(gotoIX);
}
}
}
return C;
}
private sameItems(A: Item[], B: Item[]): boolean {
if (A.length !== B.length) return false;
return A.every(a => B.some(b => b.prod === a.prod && b.dot === a.dot));
}
private buildTable(): void {
const C = this.C;
for (let i = 0; i < C.length; i++) {
this.action[i] = {};
this.goto[i] = {};
for (const itm of C[i]) {
const prod = this.productions[itm.prod];
if (itm.dot < prod.rhs.length) {
const a = prod.rhs[itm.dot];
if (this.grammar.terminals.has(a)) {
const J = this.lr0Goto(C[i], a);
const j = C.findIndex(s => this.sameItems(s, J));
if (j >= 0) this.action[i][a] = j;
}
} else {
if (itm.prod === 0) {
this.action[i]["$end"] = 0;
} else {
const A = prod.lhs;
for (const a of this.grammar.follow.get(A)!) {
this.action[i][a] = -itm.prod;
}
}
}
}
for (const A of this.grammar.nonterminals) {
const J = this.lr0Goto(C[i], A);
const j = C.findIndex(s => this.sameItems(s, J));
if (j >= 0) this.goto[i][A] = j;
}
}
}
}
// ----------------------------------------------------------------------------
// LRParser
// ----------------------------------------------------------------------------
/**
* LR parser engine.
*/
export class LRParser {
constructor(
private table: LRTable,
private errorHandler?: (tok: any) => any,
private logger: Logger = new ConsoleLogger(),
) {}
/** Parse input using a lexer instance. */
parse(input: string, lexer: any): unknown {
lexer.input(input);
const stateStack: number[] = [0];
const symbolStack: YaccSymbol[] = [new YaccSymbol()];
symbolStack[0].type = "$end";
let lookahead: any = null;
const prodObj = new YaccProduction();
prodObj.parser = this;
prodObj.lexer = lexer;
while (true) {
if (!lookahead) {
const tk = lexer.token();
lookahead = tk || Symbol("end");
if (tk) lookahead.type = tk.type;
}
const state = stateStack[stateStack.length - 1];
const act = this.table.action[state][lookahead.type];
if (act === undefined) {
if (this.errorHandler) {
lookahead = this.errorHandler(lookahead);
continue;
}
throw new YaccError(`Syntax error: unexpected ${lookahead.type}`);
}
if (act > 0) {
stateStack.push(act);
symbolStack.push(lookahead as YaccSymbol);
lookahead = null;
} else if (act < 0) {
const prodIndex = -act;
const prod = this.table.productions[prodIndex];
const rhsLen = prod.rhs.length;
const slice = symbolStack.splice(-rhsLen);
stateStack.splice(-rhsLen);
prodObj.slice = slice;
prodObj.stack = symbolStack;
if (prod.fn) prod.fn(prodObj);
const newSym = new YaccSymbol();
newSym.type = prod.lhs;
newSym.value = prodObj.get(0);
symbolStack.push(newSym);
const gotoState = this.table.goto[stateStack[stateStack.length - 1]][prod.lhs];
stateStack.push(gotoState);
} else {
// accept
return symbolStack[symbolStack.length - 1].value;
}
}
}
}
// ----------------------------------------------------------------------------
// Public API: yacc()
// ----------------------------------------------------------------------------
/**
* Build an LR parser from a decorated class.
*/
export function yacc<C extends new (...args: any[]) => any>(
ParserClass: C,
logger: Logger = new ConsoleLogger(),
): LRParser {
const grammar = new Grammar();
grammar.setTokens(getTokens(ParserClass.prototype));
grammar.setPrecedence(getPrecedence(ParserClass.prototype));
grammar.setStart(getStartSymbol(ParserClass.prototype));
// bind productions
for (const { method, bnf } of getProductions(ParserClass.prototype)) {
const parts = bnf.split(/\s+/);
const lhs = parts[0];
const rhs = parts.slice(2);
const fn = (ParserClass.prototype as any)[method].bind(null);
grammar.addProduction(lhs, rhs, fn);
}
const table = new LRTable(grammar, logger);
const errorMethod = getErrorHandler(ParserClass.prototype);
const errorFn = errorMethod
? (ParserClass.prototype as any)[errorMethod].bind(null)
: undefined;
return new LRParser(table, errorFn, logger);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment