Skip to content

Instantly share code, notes, and snippets.

@Hugoberry
Created January 29, 2025 14:24
Show Gist options
  • Save Hugoberry/4905f6e091305c6a3ff3c1fe2c0a50b2 to your computer and use it in GitHub Desktop.
Save Hugoberry/4905f6e091305c6a3ff3c1fe2c0a50b2 to your computer and use it in GitHub Desktop.
@{%
const moo = require('moo');
// Consolidated token patterns
const patterns = {
// Whitespace and basic tokens
ws: /[ \t]+/,
newline: { match: /\n/, lineBreaks: true },
colon: ':',
equals: '=',
lparen: '(',
rparen: ')',
comma: ',',
hash: '#',
// Special patterns
angle_expr: {
match: /<(?:[^<>]+|<[^<>]*>)*>/,
value: x => x.slice(1, -1)
},
empty_parens: {
match: /\(\)/,
value: () => '()'
},
number_range: {
match: /\d+-\d+/,
value: x => x
},
number: {
match: /\d+(?:\.\d+)?/,
value: Number
},
// Column references
empty_column_ref: {
match: /''\[[^\]]*\]/,
value: x => ({type: 'column_ref', table: '', column: x.slice(3, -1)})
},
column_ref: {
match: /'[^']*'\[[^\]]*\]/,
value: x => {
const parts = x.split('[');
return {
type: 'column_ref',
table: parts[0].slice(1, -1),
column: parts[1].slice(0, -1)
};
}
},
measure_ref: {
match: /\[[^\]]*\]/,
value: x => x.slice(1, -1)
},
// Keywords and identifiers
operator_type: {
match: ['RelLogOp', 'ScaLogOp', 'LookupPhyOp', 'IterPhyOp', 'SpoolPhyOp'],
type: 'operator_type'
},
keyword: {
match: ['Boolean', 'Integer', 'String', 'Currency', 'Double', 'NONE', 'BLANK'],
type: 'keyword'
},
identifier: {
match: /[a-zA-Z_][a-zA-Z0-9_.]*/,
type: 'identifier'
}
};
const lexer = moo.compile(patterns);
%}
@lexer lexer
# Root rules
main -> lines {%
([lines]) => buildTree(lines)
%}
lines -> line (%newline line):* {%
([first, rest]) => [first, ...(rest?.map(([, line]) => line) || [])]
%}
# Basic line structure
line -> _ statement {%
([indent, stmt]) => ({
indent: indent?.length || 0,
...stmt
})
%}
statement -> operator angle_params:? %colon rest_of_line {%
([op, params,, props]) => ({
operator: op,
parameters: params,
properties: props || {}
})
%}
# Operator handling
operator ->
%identifier {% ([id]) => id.value %}
| %column_ref {% ([col]) => col.value %}
| %empty_column_ref {% ([col]) => col.value %}
angle_params -> %angle_expr {% ([expr]) => expr.value %}
# Property handling
rest_of_line -> (_ token):* {%
([tokens]) => parseProperties(tokens.map(([, t]) => t))
%}
token ->
%operator_type
| %identifier
| %equals
| %lparen
| %rparen
| %angle_expr
| %number
| %hash
| %comma
| %column_ref
| %empty_column_ref
| %empty_parens
| %number_range
| %keyword
| %measure_ref
_ -> %ws:* {% ([ws]) => ws.map(w => w.value).join('') %}
@{%
// Helper function to parse properties from token stream
function parseProperties(tokens) {
const properties = {};
let current = null;
let inParens = false;
let parenContent = [];
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
// Handle operator types
if (token.type === 'operator_type') {
properties.type = token.value;
continue;
}
// Handle keywords
if (token.type === 'keyword') {
if (current) parenContent.push(token.value);
else properties.value = token.value;
continue;
}
// Handle identifiers
if (token.type === 'identifier') {
// Property assignments
if (tokens[i + 1]?.type === 'equals') {
properties[token.value] = tokens[i + 2].value;
i += 2;
continue;
}
// Empty parentheses
if (tokens[i + 1]?.type === 'empty_parens') {
const hasDoubleParens = tokens[i + 2]?.type === 'empty_parens';
properties[token.value] = hasDoubleParens ? '()()' : '()';
i += hasDoubleParens ? 2 : 1;
continue;
}
// Start of parentheses group
if (tokens[i + 1]?.type === 'lparen') {
current = token.value;
inParens = true;
parenContent = [];
i++;
continue;
}
}
// Handle hash properties
if (token.type === 'hash' &&
tokens[i + 1]?.type === 'identifier' &&
tokens[i + 2]?.type === 'equals') {
properties[tokens[i + 1].value] = tokens[i + 3].value;
i += 3;
continue;
}
// Handle content inside parentheses
if (inParens) {
if (token.type === 'rparen') {
properties[current] = parenContent;
current = null;
inParens = false;
} else if (token.type !== 'comma') {
parenContent.push(token.value);
}
}
}
return properties;
}
// Helper function to build the tree structure
function buildTree(lines) {
const root = { children: [] };
const stack = [root];
let currentIndent = 0;
for (const line of lines) {
// Handle outdenting
while (line.indent < currentIndent) {
stack.pop();
currentIndent--;
}
// Handle indenting
while (line.indent > currentIndent) {
const parent = stack[stack.length - 1];
const newNode = { children: [] };
parent.children.push(newNode);
stack.push(newNode);
currentIndent++;
}
// Add node to current level
const current = stack[stack.length - 1];
const node = {
operator: line.operator,
parameters: line.parameters,
properties: line.properties,
children: []
};
current.children.push(node);
}
return root.children;
}
%}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment