Last active
May 13, 2025 03:37
-
-
Save nberlette/78639d19a6079f3c13c5722231dab4f1 to your computer and use it in GitHub Desktop.
TypeScript port of the dabeaz/ply tool (python)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// deno-lint-ignore-file no-explicit-any | |
/** | |
* Logger interface for parser/lexer. | |
*/ | |
export interface Logger { | |
debug(message: string, ...args: unknown[]): void; | |
info(message: string, ...args: unknown[]): void; | |
warning(message: string, ...args: unknown[]): void; | |
error(message: string, ...args: unknown[]): void; | |
critical(message: string, ...args: unknown[]): void; | |
} | |
/** | |
* Console-based logger implementation. | |
*/ | |
export class ConsoleLogger implements Logger { | |
debug(message: string, ...args: unknown[]): void { | |
console.debug(message, ...args); | |
} | |
info(message: string, ...args: unknown[]): void { | |
console.info(message, ...args); | |
} | |
warning(message: string, ...args: unknown[]): void { | |
console.warn("WARNING:", message, ...args); | |
} | |
error(message: string, ...args: unknown[]): void { | |
console.error("ERROR:", message, ...args); | |
} | |
critical(message: string, ...args: unknown[]): void { | |
console.error(message, ...args); | |
} | |
} | |
/** | |
* No-op logger. | |
*/ | |
export class NullLogger implements Logger { | |
debug(): void {} | |
info(): void {} | |
warning(): void {} | |
error(): void {} | |
critical(): void {} | |
} | |
// ----------------------------------------------------------------------------- | |
// Metadata infrastructure | |
// ----------------------------------------------------------------------------- | |
/** | |
* Unique symbols for metadata keys. | |
*/ | |
const productionsKey = Symbol("productions"); | |
const tokensKey = Symbol("tokens"); | |
const precedenceKey = Symbol("precedence"); | |
const startKey = Symbol("start"); | |
const errorHandlerKey = Symbol("errorHandler"); | |
const lexTokenKey = Symbol("lexToken"); | |
const lexIgnoreKey = Symbol("lexIgnore"); | |
const lexLiteralKey = Symbol("lexLiteral"); | |
const lexStateKey = Symbol("lexState"); | |
export type Property = string | symbol; | |
export type PropertyLike = Property | undefined; | |
/** | |
* Typed record for all metadata entries. | |
*/ | |
interface MetadataRecord { | |
[productionsKey]?: Array<{ method: PropertyLike; bnf: string }>; | |
[tokensKey]?: string[]; | |
[precedenceKey]?: Array< | |
{ assoc: "left" | "right" | "nonassoc"; terms: string[] } | |
>; | |
[startKey]?: PropertyLike; | |
[errorHandlerKey]?: PropertyLike; | |
[lexTokenKey]?: Array< | |
{ method: PropertyLike; pattern: RegExp | string; states: string[] } | |
>; | |
[lexIgnoreKey]?: Array<{ chars: PropertyLike; states: string[] }>; | |
[lexLiteralKey]?: PropertyLike; | |
[lexStateKey]?: Array<{ name: string; type: "inclusive" | "exclusive" }>; | |
} | |
// Fallback storage when context.metadata is unavailable | |
const METADATA = new WeakMap<WeakKey, MetadataRecord>(); | |
/** | |
* Retrieve or initialize the metadata record for a decorator context. | |
* Uses context.metadata (plain object) if provided; otherwise falls back to a WeakMap keyed by constructor.prototype. | |
*/ | |
function getMetaRecord( | |
target: object, | |
context?: { metadata?: object }, | |
): MetadataRecord { | |
let record = METADATA.get(context?.metadata ?? target); | |
if (!record) { | |
record = Object.create(null) as MetadataRecord; | |
METADATA.set(target, record); | |
} | |
return record; | |
} | |
/** | |
* Read metadata: prefer Stage 3 metadata; otherwise fallback. | |
*/ | |
function readMeta<T>( | |
target: object, | |
key: symbol, | |
): T | undefined { | |
const ctor = typeof target === "function" ? target : target.constructor; | |
// const proto = ctor.prototype; | |
const metaKey = | |
(Symbol.metadata && Symbol.metadata in ctor && ctor[Symbol.metadata]) || | |
ctor; | |
return METADATA.get(metaKey)?.[key as keyof MetadataRecord] as T | undefined; | |
} | |
// ----------------------------------------------------------------------------- | |
// Grammar decorators | |
// ----------------------------------------------------------------------------- | |
/** | |
* Register a grammar production on a class method. | |
*/ | |
export function Production( | |
bnf: string, | |
): <T, V extends (...args: unknown[]) => unknown>( | |
method: V, | |
context: ClassMethodDecoratorContext<T, V>, | |
) => void { | |
return (method, context) => { | |
if (context.kind !== "method") { | |
throw new TypeError("@Production may only decorate class methods"); | |
} | |
const meta = getMetaRecord(method, context); | |
const list = meta[productionsKey] ??= []; | |
list.push({ method: context.name, bnf }); | |
}; | |
} | |
/** | |
* Declare the token list for the parser. | |
*/ | |
export function Tokens( | |
tokenList: string[], | |
): <C extends abstract new (...args: unknown[]) => unknown>( | |
ctor: C, | |
context: ClassDecoratorContext<C>, | |
) => C | void { | |
return (ctor, context) => { | |
const meta = getMetaRecord(ctor, context); | |
meta[tokensKey] = [...tokenList]; | |
}; | |
} | |
/** | |
* Set operator precedence rules for the grammar. | |
*/ | |
export function Precedence( | |
assoc: "left" | "right" | "nonassoc", | |
...terms: string[] | |
): <C extends abstract new (...args: any) => any>( | |
ctor: C, | |
context: ClassDecoratorContext<C>, | |
) => void { | |
return (ctor, context) => { | |
const meta = getMetaRecord(ctor, context); | |
const arr = meta[precedenceKey] ??= []; | |
arr.push({ assoc, terms }); | |
}; | |
} | |
/** | |
* Mark the start symbol for the grammar. | |
*/ | |
export function Start( | |
symbol: string, | |
): <C extends abstract new (...args: unknown[]) => unknown>( | |
ctor: C, | |
context: ClassDecoratorContext<C>, | |
) => void { | |
return (ctor, context) => { | |
const meta = getMetaRecord(ctor, context); | |
meta[startKey] = symbol; | |
}; | |
} | |
/** | |
* Designate an error handler method. | |
*/ | |
export function Error(): <T, V extends (tok: unknown) => unknown>( | |
method: V, | |
context: ClassMethodDecoratorContext<T, V>, | |
) => void { | |
return (method, context) => { | |
if (context.kind !== "method") { | |
throw new TypeError("@Error may only decorate class methods"); | |
} | |
const meta = getMetaRecord(method, context); | |
meta[errorHandlerKey] = context.name; | |
}; | |
} | |
// ----------------------------------------------------------------------------- | |
// Lexer decorators | |
// ----------------------------------------------------------------------------- | |
/** | |
* Register a lexer rule on a class method. | |
*/ | |
export function LexToken( | |
pattern: RegExp | string, | |
states: string[] = ["INITIAL"], | |
): <T, V extends (tok: unknown) => unknown>( | |
method: V, | |
context: ClassMethodDecoratorContext<T, V>, | |
) => void { | |
return (method, context) => { | |
if (context.kind !== "method") { | |
throw new TypeError("@LexToken may only decorate class methods"); | |
} | |
const meta = getMetaRecord(method, context); | |
const arr = meta[lexTokenKey] ??= []; | |
states = [...states]; | |
arr.push({ method: context.name, pattern, states }); | |
}; | |
} | |
/** | |
* Ignore specified characters in given lexer states. | |
*/ | |
export function Ignore( | |
chars: string, | |
states: string[] = ["INITIAL"], | |
): <C extends abstract new (...args: unknown[]) => unknown>( | |
ctor: C, | |
context: ClassDecoratorContext<C>, | |
) => void { | |
return (ctor, context) => { | |
const meta = getMetaRecord(ctor, context); | |
const arr = meta[lexIgnoreKey] ??= []; | |
states = [...states]; | |
arr.push({ chars, states }); | |
}; | |
} | |
/** | |
* Pass through literal characters in the lexer. | |
*/ | |
export function Literal( | |
chars: string, | |
): <C extends abstract new (...args: unknown[]) => unknown>( | |
ctor: C, | |
context: ClassDecoratorContext<C>, | |
) => C | void { | |
return (ctor, context) => { | |
const meta = getMetaRecord(ctor, context); | |
meta[lexLiteralKey] = chars; | |
return ctor; | |
}; | |
} | |
/** | |
* Define a lexer state (inclusive or exclusive). | |
*/ | |
export function State( | |
name: string, | |
type: "inclusive" | "exclusive" = "inclusive", | |
): <C extends abstract new (...args: unknown[]) => unknown>( | |
ctor: C, | |
context: ClassDecoratorContext<C>, | |
) => void { | |
return (ctor, context) => { | |
const meta = getMetaRecord(ctor, context); | |
const arr = meta[lexStateKey] ??= []; | |
arr.push({ name, type }); | |
meta[lexStateKey] = arr; | |
}; | |
} | |
// ----------------------------------------------------------------------------- | |
// Metadata getters | |
// ----------------------------------------------------------------------------- | |
export function getProductions( | |
target: object, | |
): { method: string; bnf: string }[] { | |
return readMeta(target, productionsKey) ?? []; | |
} | |
export function getTokens(target: object): string[] { | |
return readMeta(target, tokensKey) ?? []; | |
} | |
export function getPrecedence( | |
target: object, | |
): { assoc: "left" | "right" | "nonassoc"; terms: string[] }[] { | |
return readMeta(target, precedenceKey) ?? []; | |
} | |
export function getStartSymbol(target: object): string | undefined { | |
return readMeta(target, startKey); | |
} | |
export function getErrorHandler(target: object): string | undefined { | |
return readMeta(target, errorHandlerKey); | |
} | |
export function getLexTokens( | |
target: object, | |
): { method: string; pattern: RegExp | string; states: string[] }[] { | |
return readMeta(target, lexTokenKey) ?? []; | |
} | |
export function getLexIgnores( | |
target: object, | |
): { chars: string; states: string[] }[] { | |
return readMeta(target, lexIgnoreKey) ?? []; | |
} | |
export function getLiterals(target: object): string { | |
return readMeta(target, lexLiteralKey) ?? ""; | |
} | |
export function getStates( | |
target: object, | |
): { name: string; type: "inclusive" | "exclusive" }[] { | |
return readMeta(target, lexStateKey) ?? | |
[{ name: "INITIAL", type: "inclusive" }]; | |
} | |
// lex.ts | |
// import { | |
// Logger, | |
// ConsoleLogger, | |
// LexToken, | |
// Ignore, | |
// Literal, | |
// State, | |
// getLexTokens, | |
// getLexIgnores, | |
// getLiterals, | |
// getStates, | |
// } from "./decorators.ts"; | |
// ---------------------------------------------------------------------------- | |
// LexError: thrown when no error rule handles illegal characters | |
// ---------------------------------------------------------------------------- | |
export class LexError extends globalThis.Error { | |
public readonly text: string; | |
constructor(message: string, text: string) { | |
super(message); | |
this.text = text; | |
Object.setPrototypeOf(this, LexError.prototype); | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// LexToken interface | |
// ---------------------------------------------------------------------------- | |
export interface LexToken { | |
type: string; | |
value: string; | |
lineno: number; | |
lexpos: number; | |
lexer?: Lexer; | |
} | |
// ---------------------------------------------------------------------------- | |
// Lexer runtime | |
// ---------------------------------------------------------------------------- | |
export class Lexer { | |
protected lexre: Array< | |
[ | |
RegExp, | |
Array<[((tok: LexToken) => LexToken | null) | null, string | null]>, | |
] | |
> = []; | |
protected lexstateignore: Record<string, string> = {}; | |
protected lexstateerrorf: Record<string, (tok: LexToken) => LexToken | null> = | |
{}; | |
protected lexstateeoff: Record<string, (tok: LexToken) => LexToken> = {}; | |
protected lexliterals: string = ""; | |
protected lexdata = ""; | |
protected lexerrorf: ((tok: LexToken) => LexToken | null) | null = null; | |
protected lexeoff: ((tok: LexToken) => LexToken) | null = null; | |
protected lextokens!: Set<string>; | |
public lexpos = 0; | |
public lexlen = 0; | |
public lineno = 1; | |
constructor(protected readonly logger: Logger = new ConsoleLogger()) {} | |
/** Clone the lexer, rebinding any method-based error/EoF handlers. */ | |
clone(contextObject?: object): Lexer { | |
const cloned = new Lexer(this.logger); | |
cloned.lexre = this.lexre; | |
cloned.lexstateignore = this.lexstateignore; | |
cloned.lexliterals = this.lexliterals; | |
cloned.lexdata = this.lexdata; | |
cloned.lexerrorf = this.lexerrorf; | |
cloned.lexeoff = this.lexeoff; | |
cloned.lextokens = this.lextokens; | |
cloned.lexpos = this.lexpos; | |
cloned.lexlen = this.lexlen; | |
cloned.lineno = this.lineno; | |
if (contextObject) { | |
for (const [state, handler] of Object.entries(this.lexstateerrorf)) { | |
cloned.lexstateerrorf[state] = (contextObject as any)[handler.name] | |
.bind(contextObject); | |
} | |
for (const [state, handler] of Object.entries(this.lexstateeoff)) { | |
cloned.lexstateeoff[state] = (contextObject as any)[handler.name].bind( | |
contextObject, | |
); | |
} | |
} | |
return cloned; | |
} | |
/** Provide input string to lexer. */ | |
input(input: string): this { | |
this.lexdata = input; | |
this.lexpos = 0; | |
this.lexlen = input.length; | |
return this; | |
} | |
/** Switch lexing state. */ | |
begin(state: string): this { | |
if (!(state in this.lexstateignore)) { | |
throw new TypeError(`Undefined state '${state}'`); | |
} | |
this.lexerrorf = this.lexstateerrorf[state] ?? null; | |
this.lexeoff = this.lexstateeoff[state] ?? null; | |
return this; | |
} | |
/** Skip n characters. */ | |
skip(n: number): this { | |
this.lexpos += n; | |
return this; | |
} | |
/** Return next token or null at EOF. */ | |
token(): LexToken | null { | |
const ignoreChars = this.lexstateignore["INITIAL"] ?? ""; | |
while (this.lexpos < this.lexlen) { | |
const currentChar = this.lexdata[this.lexpos]; | |
if (ignoreChars.includes(currentChar)) { | |
this.lexpos++; | |
continue; | |
} | |
for (const [regex, map] of this.lexre) { | |
regex.lastIndex = this.lexpos; | |
const match = regex.exec(this.lexdata); | |
if (!match) continue; | |
const text = match[0]; | |
const tokenBase: LexToken = { | |
type: "", | |
value: text, | |
lineno: this.lineno, | |
lexpos: this.lexpos, | |
lexer: this, | |
}; | |
this.lexpos += text.length; | |
const groupIndex = match.findIndex((grp, i) => i > 0 && grp != null); | |
if (groupIndex >= 0) { | |
const [handler, tokenType] = map[groupIndex] as [ | |
((tok: LexToken) => LexToken | null) | null, | |
string | null, | |
]; | |
if (handler) { | |
const result = handler(tokenBase); | |
if (result) return result; | |
break; // handler consumed token, continue scanning | |
} | |
if (tokenType) { | |
return { ...tokenBase, type: tokenType }; | |
} | |
} | |
} | |
if (this.lexliterals.includes(currentChar)) { | |
const litToken: LexToken = { | |
type: currentChar, | |
value: currentChar, | |
lineno: this.lineno, | |
lexpos: this.lexpos, | |
lexer: this, | |
}; | |
this.lexpos++; | |
return litToken; | |
} | |
if (this.lexerrorf) { | |
const errToken: LexToken = { | |
type: "error", | |
value: this.lexdata.slice(this.lexpos), | |
lineno: this.lineno, | |
lexpos: this.lexpos, | |
lexer: this, | |
}; | |
const recovered = this.lexerrorf(errToken); | |
if (!recovered || this.lexpos === errToken.lexpos) { | |
throw new LexError(`Illegal character '${currentChar}'`, currentChar); | |
} | |
return recovered; | |
} | |
throw new LexError(`Illegal character '${currentChar}'`, currentChar); | |
} | |
if (this.lexeoff) { | |
const eofToken: LexToken = { | |
type: "eof", | |
value: "", | |
lineno: this.lineno, | |
lexpos: this.lexpos, | |
lexer: this, | |
}; | |
return this.lexeoff(eofToken); | |
} | |
return null; | |
} | |
*[Symbol.iterator](): IterableIterator<LexToken> { | |
const token = this.token(); | |
if (token) return yield token; | |
} | |
} | |
// deno-lint-ignore ban-types | |
type unknowns = {} | null | undefined; | |
export interface UserLexerModule { | |
[name: string]: ((tok: LexToken) => LexToken | null) | unknowns; | |
tokens: string[]; | |
literals: string; | |
t_error?(tok: LexToken): LexToken | null; | |
t_eof?(tok: LexToken): LexToken; | |
} | |
// ---------------------------------------------------------------------------- | |
// LexerReflect: builds a Lexer instance from decorators metadata | |
// ---------------------------------------------------------------------------- | |
export class LexerReflect { | |
constructor( | |
private readonly mod: UserLexerModule, | |
private readonly logger: Logger = new ConsoleLogger(), | |
) {} | |
build(): Lexer { | |
const lexer = new Lexer(this.logger); | |
// Setup ignore and literals | |
for (const { chars, states } of getLexIgnores(this.mod)) { | |
for (const state of states) { | |
lexer["lexstateignore"][state] = chars; | |
} | |
} | |
lexer["lexliterals"] = getLiterals(this.mod); | |
// Setup error and eof handlers | |
const errorFn = this.mod.t_error; | |
const eofFn = this.mod.t_eof; | |
for (const { name } of getStates(this.mod)) { | |
if (errorFn) lexer["lexstateerrorf"][name] = errorFn.bind(this.mod); | |
if (eofFn) lexer["lexstateeoff"][name] = eofFn.bind(this.mod); | |
} | |
// Collect patterns | |
const patternEntries = getLexTokens(this.mod); | |
const groupMap: Array< | |
[((tok: LexToken) => LexToken | null) | null, string | null] | |
> = []; | |
const regexSource = patternEntries.map(({ method, pattern }, idx) => { | |
const src = typeof pattern === "string" ? pattern : pattern.source; | |
groupMap[idx + 1] = [ | |
(this.mod[method] as (tok: LexToken) => LexToken | null)?.bind( | |
this.mod, | |
) ?? null, | |
method, | |
]; | |
return `(?<G${idx}>${src})`; | |
}); | |
const masterRegex = new RegExp(regexSource.join("|"), "gy"); | |
lexer["lexre"] = [[masterRegex, groupMap]]; | |
return lexer; | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// Entry point similar to Python lex() | |
// ---------------------------------------------------------------------------- | |
export function lex(mod: UserLexerModule, logger?: Logger): Lexer { | |
const reflect = new LexerReflect(mod, logger); | |
return reflect.build(); | |
} | |
// // yacc.ts | |
// import { | |
// Logger, | |
// ConsoleLogger, | |
// } from "./decorators.ts"; | |
// import { | |
// getProductions, | |
// getTokens, | |
// getPrecedence, | |
// getStartSymbol, | |
// getErrorHandler, | |
// } from "./decorators.ts"; | |
// ---------------------------------------------------------------------------- | |
// YaccError | |
// ---------------------------------------------------------------------------- | |
/** | |
* Exception thrown for parser errors. | |
*/ | |
export class YaccError extends globalThis.Error { | |
constructor(message: string) { | |
super(message); | |
Object.setPrototypeOf(this, YaccError.prototype); | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// YaccSymbol | |
// ---------------------------------------------------------------------------- | |
/** | |
* Symbol held on parser stack. | |
*/ | |
export class YaccSymbol { | |
type!: string; | |
value: unknown; | |
lineno?: number; | |
lexpos?: number; | |
endlineno?: number; | |
endlexpos?: number; | |
toString(): string { | |
return this.type; | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// YaccProduction | |
// ---------------------------------------------------------------------------- | |
/** | |
* Wrapper passed to semantic action functions. | |
*/ | |
export class YaccProduction { | |
slice!: YaccSymbol[]; | |
stack!: YaccSymbol[]; | |
parser!: LRParser; | |
lexer!: any; | |
/** Get the value of position i in production. */ | |
get(i: number): any { | |
return this.slice[i].value; | |
} | |
/** Set the value of position i. */ | |
set(i: number, v: any): void { | |
this.slice[i].value = v; | |
} | |
/** Number of symbols on RHS. */ | |
length(): number { | |
return this.slice.length; | |
} | |
/** Signal a syntax error in action. */ | |
error(): never { | |
throw new SyntaxError(); | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// ProductionRule | |
// ---------------------------------------------------------------------------- | |
interface ProductionRule { | |
lhs: string; | |
rhs: string[]; | |
fn?: (p: YaccProduction) => void; | |
} | |
// ---------------------------------------------------------------------------- | |
// Grammar (SLR) | |
// ---------------------------------------------------------------------------- | |
/** | |
* Grammar with FIRST/FOLLOW and productions. | |
*/ | |
export class Grammar { | |
productions: ProductionRule[] = []; | |
terminals = new Set<string>(); | |
nonterminals = new Set<string>(); | |
prec: Array<{ term: string; assoc: string; level: number }> = []; | |
startSymbol = ""; | |
first = new Map<string, Set<string>>(); | |
follow = new Map<string, Set<string>>(); | |
/** Add a production and optional action. */ | |
addProduction( | |
lhs: string, | |
rhs: string[], | |
fn?: (p: YaccProduction) => void, | |
): void { | |
this.nonterminals.add(lhs); | |
for (const sym of rhs) { | |
if (!/^[A-Z_][A-Z0-9_]*$/.test(sym)) { | |
this.terminals.add(sym); | |
} | |
} | |
this.productions.push({ lhs, rhs, fn }); | |
} | |
/** Declare token names. */ | |
setTokens(tokens: string[]): void { | |
for (const t of tokens) this.terminals.add(t); | |
} | |
/** Declare precedence rules. */ | |
setPrecedence( | |
arr: Array<{ assoc: "left" | "right" | "nonassoc"; terms: string[] }>, | |
): void { | |
let level = 1; | |
for (const entry of arr) { | |
for (const term of entry.terms) { | |
this.prec.push({ term, assoc: entry.assoc, level }); | |
} | |
level++; | |
} | |
} | |
/** Set start symbol and add augmented production. */ | |
setStart(start?: string): void { | |
this.startSymbol = start || this.productions[0].lhs; | |
// augmented production at index 0 | |
this.productions.unshift({ lhs: "S'", rhs: [this.startSymbol] }); | |
this.nonterminals.add("S'"); | |
} | |
/** Compute FIRST sets for terminals and nonterminals. */ | |
computeFirst(): void { | |
// init | |
for (const t of this.terminals) this.first.set(t, new Set([t])); | |
for (const A of this.nonterminals) this.first.set(A, new Set()); | |
let changed: boolean; | |
do { | |
changed = false; | |
for (const prod of this.productions) { | |
const A = prod.lhs; | |
const firstA = this.first.get(A)!; | |
let nullable = true; | |
for (const X of prod.rhs) { | |
const firstX = this.first.get(X)!; | |
for (const a of firstX) { | |
if (a !== "<empty>" && !firstA.has(a)) { | |
firstA.add(a); | |
changed = true; | |
} | |
} | |
if (!firstX.has("<empty>")) { | |
nullable = false; | |
break; | |
} | |
} | |
if (nullable && !firstA.has("<empty>")) { | |
firstA.add("<empty>"); | |
changed = true; | |
} | |
} | |
} while (changed); | |
} | |
/** Compute FOLLOW sets for nonterminals. */ | |
computeFollow(): void { | |
for (const A of this.nonterminals) this.follow.set(A, new Set()); | |
this.follow.get(this.startSymbol)!.add("$end"); | |
let changed: boolean; | |
do { | |
changed = false; | |
for (const prod of this.productions) { | |
const A = prod.lhs; | |
for (let i = 0; i < prod.rhs.length; i++) { | |
const B = prod.rhs[i]; | |
if (!this.nonterminals.has(B)) continue; | |
const followB = this.follow.get(B)!; | |
const beta = prod.rhs.slice(i + 1); | |
let firstBeta = new Set<string>(); | |
if (beta.length === 0) { | |
firstBeta = new Set(["<empty>"]); | |
} else { | |
firstBeta = this.first.get(beta[0])!; | |
} | |
for (const b of firstBeta) { | |
if (b !== "<empty>" && !followB.has(b)) { | |
followB.add(b); | |
changed = true; | |
} | |
} | |
if ( | |
beta.length === 0 || | |
firstBeta.has("<empty>") | |
) { | |
for (const b of this.follow.get(A)!) { | |
if (!followB.has(b)) { | |
followB.add(b); | |
changed = true; | |
} | |
} | |
} | |
} | |
} | |
} while (changed); | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// LRTable (SLR) | |
// ---------------------------------------------------------------------------- | |
interface Item { | |
prod: number; | |
dot: number; | |
} | |
/** | |
* LR parse table (SLR(1)). | |
*/ | |
export class LRTable { | |
public action: Record<number, Record<string, number>> = {}; | |
public goto: Record<number, Record<string, number>> = {}; | |
public productions: ProductionRule[]; | |
private C: Item[][] = []; | |
constructor( | |
private grammar: Grammar, | |
private logger: Logger = new ConsoleLogger(), | |
) { | |
this.productions = grammar.productions; | |
grammar.computeFirst(); | |
grammar.computeFollow(); | |
this.C = this.lr0Items(); | |
this.buildTable(); | |
} | |
private lr0Closure(I: Item[]): Item[] { | |
const J = [...I]; | |
let changed: boolean; | |
do { | |
changed = false; | |
for (const itm of J) { | |
const prod = this.productions[itm.prod]; | |
if (itm.dot < prod.rhs.length) { | |
const B = prod.rhs[itm.dot]; | |
if (this.grammar.nonterminals.has(B)) { | |
for (let p = 0; p < this.productions.length; p++) { | |
if (this.productions[p].lhs === B) { | |
const newItm = { prod: p, dot: 0 }; | |
if (!J.some(x => x.prod === p && x.dot === 0)) { | |
J.push(newItm); | |
changed = true; | |
} | |
} | |
} | |
} | |
} | |
} | |
} while (changed); | |
return J; | |
} | |
private lr0Goto(I: Item[], X: string): Item[] { | |
const J: Item[] = []; | |
for (const itm of I) { | |
const prod = this.productions[itm.prod]; | |
if (itm.dot < prod.rhs.length && prod.rhs[itm.dot] === X) { | |
J.push({ prod: itm.prod, dot: itm.dot + 1 }); | |
} | |
} | |
return this.lr0Closure(J); | |
} | |
private lr0Items(): Item[][] { | |
const C: Item[][] = []; | |
const startSet = this.lr0Closure([{ prod: 0, dot: 0 }]); | |
C.push(startSet); | |
const symbols = [...this.grammar.terminals, ...this.grammar.nonterminals]; | |
for (let i = 0; i < C.length; i++) { | |
for (const X of symbols) { | |
const gotoIX = this.lr0Goto(C[i], X); | |
if (gotoIX.length === 0) continue; | |
if (!C.some(s => this.sameItems(s, gotoIX))) { | |
C.push(gotoIX); | |
} | |
} | |
} | |
return C; | |
} | |
private sameItems(A: Item[], B: Item[]): boolean { | |
if (A.length !== B.length) return false; | |
return A.every(a => B.some(b => b.prod === a.prod && b.dot === a.dot)); | |
} | |
private buildTable(): void { | |
const C = this.C; | |
for (let i = 0; i < C.length; i++) { | |
this.action[i] = {}; | |
this.goto[i] = {}; | |
for (const itm of C[i]) { | |
const prod = this.productions[itm.prod]; | |
if (itm.dot < prod.rhs.length) { | |
const a = prod.rhs[itm.dot]; | |
if (this.grammar.terminals.has(a)) { | |
const J = this.lr0Goto(C[i], a); | |
const j = C.findIndex(s => this.sameItems(s, J)); | |
if (j >= 0) this.action[i][a] = j; | |
} | |
} else { | |
if (itm.prod === 0) { | |
this.action[i]["$end"] = 0; | |
} else { | |
const A = prod.lhs; | |
for (const a of this.grammar.follow.get(A)!) { | |
this.action[i][a] = -itm.prod; | |
} | |
} | |
} | |
} | |
for (const A of this.grammar.nonterminals) { | |
const J = this.lr0Goto(C[i], A); | |
const j = C.findIndex(s => this.sameItems(s, J)); | |
if (j >= 0) this.goto[i][A] = j; | |
} | |
} | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// LRParser | |
// ---------------------------------------------------------------------------- | |
/** | |
* LR parser engine. | |
*/ | |
export class LRParser { | |
constructor( | |
private table: LRTable, | |
private errorHandler?: (tok: any) => any, | |
private logger: Logger = new ConsoleLogger(), | |
) {} | |
/** Parse input using a lexer instance. */ | |
parse(input: string, lexer: any): unknown { | |
lexer.input(input); | |
const stateStack: number[] = [0]; | |
const symbolStack: YaccSymbol[] = [new YaccSymbol()]; | |
symbolStack[0].type = "$end"; | |
let lookahead: any = null; | |
const prodObj = new YaccProduction(); | |
prodObj.parser = this; | |
prodObj.lexer = lexer; | |
while (true) { | |
if (!lookahead) { | |
const tk = lexer.token(); | |
lookahead = tk || Symbol("end"); | |
if (tk) lookahead.type = tk.type; | |
} | |
const state = stateStack[stateStack.length - 1]; | |
const act = this.table.action[state][lookahead.type]; | |
if (act === undefined) { | |
if (this.errorHandler) { | |
lookahead = this.errorHandler(lookahead); | |
continue; | |
} | |
throw new YaccError(`Syntax error: unexpected ${lookahead.type}`); | |
} | |
if (act > 0) { | |
stateStack.push(act); | |
symbolStack.push(lookahead as YaccSymbol); | |
lookahead = null; | |
} else if (act < 0) { | |
const prodIndex = -act; | |
const prod = this.table.productions[prodIndex]; | |
const rhsLen = prod.rhs.length; | |
const slice = symbolStack.splice(-rhsLen); | |
stateStack.splice(-rhsLen); | |
prodObj.slice = slice; | |
prodObj.stack = symbolStack; | |
if (prod.fn) prod.fn(prodObj); | |
const newSym = new YaccSymbol(); | |
newSym.type = prod.lhs; | |
newSym.value = prodObj.get(0); | |
symbolStack.push(newSym); | |
const gotoState = this.table.goto[stateStack[stateStack.length - 1]][prod.lhs]; | |
stateStack.push(gotoState); | |
} else { | |
// accept | |
return symbolStack[symbolStack.length - 1].value; | |
} | |
} | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// Public API: yacc() | |
// ---------------------------------------------------------------------------- | |
/** | |
* Build an LR parser from a decorated class. | |
*/ | |
export function yacc<C extends new (...args: any[]) => any>( | |
ParserClass: C, | |
logger: Logger = new ConsoleLogger(), | |
): LRParser { | |
const grammar = new Grammar(); | |
grammar.setTokens(getTokens(ParserClass.prototype)); | |
grammar.setPrecedence(getPrecedence(ParserClass.prototype)); | |
grammar.setStart(getStartSymbol(ParserClass.prototype)); | |
// bind productions | |
for (const { method, bnf } of getProductions(ParserClass.prototype)) { | |
const parts = bnf.split(/\s+/); | |
const lhs = parts[0]; | |
const rhs = parts.slice(2); | |
const fn = (ParserClass.prototype as any)[method].bind(null); | |
grammar.addProduction(lhs, rhs, fn); | |
} | |
const table = new LRTable(grammar, logger); | |
const errorMethod = getErrorHandler(ParserClass.prototype); | |
const errorFn = errorMethod | |
? (ParserClass.prototype as any)[errorMethod].bind(null) | |
: undefined; | |
return new LRParser(table, errorFn, logger); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment