Last active
July 15, 2024 06:14
-
-
Save pczarn/3f4574c1a49f42ff09241ee33984b09d to your computer and use it in GitHub Desktop.
Design experiments for Panini's grammar description
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fn main() { | |
let mkv = grammar! { | |
start ::= (id, size, hsize):resync | |
resync ::= resync_element_id:bytes(0x1F43B675 | 0x18538067 | 0x1654AE6B) ~ (seglen, x):matroska_number => { | |
(resync_element_id, seglen, x + 4) | |
}; | |
matroska_number ::= b:byte ~ (mut r):byte+(leading_zeros(b)) => { | |
let n = leading_zeros(b) + 1; | |
if r == (1 << (7 * n)) - 1 { | |
r = -1; | |
} | |
(r, n) | |
}; | |
#[lua] | |
comptime leading_zeros(arg: byte) { | |
local msb = 2 ^ 7 | |
local result = 0 | |
while (bit.band(arg, msb) == 0 and msb != 0) do | |
msb /= 2 | |
result += 1 | |
end | |
return result | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let mut parser = grammar! { | |
start ::= attrs:inner_attr* ~ stmts:stmt* ~ lexer:lexer => { | |
Stmts { | |
attrs: attrs, | |
stmts: stmts, | |
lexer: lexer, | |
} | |
}; | |
inner_attr ::= | |
spanned:Spanned(pound ~ not ~ l_bracket ~ value:meta_item ~ r_bracket) => { | |
let attr = rs::Attribute_ { | |
id: rs::mk_attr_id(), | |
style: rs::ast::AttrStyle::Inner, | |
value: value, | |
is_sugared_doc: false, | |
}; | |
let mut ret = rs::spanned(span, attr); | |
ret.span.expn_id = span.lo.expn_id; | |
ret | |
}; | |
meta_item: rs::P<rs::Spanned<_>> ::= | |
name:ident => { | |
rs::ast::MetaItemKind::Word(name.node.as_str()).into_with(ctx | |
) | |
} | |
| span:Spanned(name:ident ~ l_paren ~ items:meta_item_list ~ r_paren ~ rightmost) => { | |
let list = rs::ast::MetaItemKind::List(name.node.as_str(), items); | |
rs::spanned(span, list).into() | |
}; | |
// liberal separation? | |
meta_item_list ::= meta_item*(%comma); | |
stmt ::= lhs:name ~ ty:ty ~ defined_as ~ rhs:top_rhs ~ semi => { | |
Stmt { | |
lhs: lhs, | |
rhs: rhs, | |
ty: ty, | |
span: rs::DUMMY_SP, | |
} | |
}; | |
defined_as ::= mod_sep eq; | |
ty ::= | |
(rarrow tt:tt => { | |
Some(quote_ty!(cx, $tt)) | |
})?; | |
action ::= | |
fat_arrow l_brace tts:tts r_brace => { | |
Some(quote_expr!(cx, { $tts })) | |
} | |
| () => { | |
None | |
}; | |
tt ::= | |
t:any_token => { | |
let (t, _): (&rs::Token, _) = t; | |
rs::TokenTree::Token(rs::DUMMY_SP, (*t).clone()) | |
} | |
| l_bracket tts:tts r_bracket => { | |
delimit(tts, rs::Bracket) | |
} | |
| l_paren tts:tts r_paren => { | |
delimit(tts, rs::Paren) | |
} | |
| brace_tt; | |
brace_tt ::= l_brace tts:tts r_brace => { | |
delimit(tts, rs::Brace) | |
}; | |
tts ::= tt*; | |
pattern ::= | |
ident:ident colon => { | |
let ident: rs::SpannedIdent = ident; | |
Some(AstBuilder::new().span(ident.span).pat().id(ident.node)) | |
} | |
| underscore colon => { | |
Some(AstBuilder::new().pat().wild()) | |
} | |
| () => { | |
None | |
}; | |
top_rhs ::= | |
(elems:pat_elem* block:action)*[%pipe] => { | |
vec![(Rhs(elems), Action { expr: block })] | |
} | |
| v:top_rhs pipe elems:pat_elem* block:action => { | |
let mut v = v; | |
v.push((Rhs(elems), Action { expr: block })); | |
v | |
}; | |
alt ::= rhs*(%pipe); | |
rhs ::= elems:pat_elem* => { Rhs(elems) }; | |
pat_elem ::= pat:pattern ~ elem:elem => { | |
RhsElement { | |
bind: pat, | |
elem: elem, | |
} | |
}; | |
elem ::= | |
sym:name => { | |
RhsAst::Symbol(sym) | |
} | |
| l_paren ~ alt:alt ~ r_paren => { | |
RhsAst::Sum(alt) | |
} | |
| rhs:rhs_elem ~ star => { | |
RhsAst::Sequence(Sequence{ | |
rhs: rhs, | |
min: 0, | |
max: None, | |
}) | |
} | |
| rhs:rhs_elem plus => { | |
RhsAst::Sequence(Sequence{ | |
rhs: rhs, | |
min: 1, | |
max: None, | |
}) | |
} | |
| sp:Span![s:string] => { | |
let t = if let &rs::token::Literal(rs::token::Str_(t), _) = s { | |
t | |
} else { | |
panic!(); | |
}; | |
RhsAst::String(rs::respan(sp, t)) | |
}; | |
rhs_elem ::= elem:elem => { | |
let mut v = Vec::new(); | |
v.push(RhsElement { | |
bind: None, | |
elem: elem, | |
}); | |
Rhs(v) | |
}; | |
lexer ::= | |
"sub" name:name l_brace tts:tts r_brace => { | |
Some(Lexer::new(name.node, tts)) | |
} | |
| () => { | |
None | |
}; | |
ident: Spanned<_> ::= ident_tok_with_span; | |
// Tokenization is performed by Rust's lexer. Using the enum adaptor to | |
// read tokens. | |
sub enum_stream! { | |
not = (&rs::Token::Not, _); | |
pound = (&rs::Token::Pound, _); | |
comma = (&rs::Token::Comma, _); | |
underscore = (&rs::Token::Underscore, _); | |
colon = (&rs::Token::Colon, _); | |
mod_sep = (&rs::Token::ModSep, _); | |
eq = (&rs::Token::Eq, _); | |
rarrow = (&rs::Token::RArrow, _); | |
fat_arrow = (&rs::Token::FatArrow, _); | |
semi = (&rs::Token::Semi, _); | |
star = (&rs::Token::BinOp(rs::BinOpToken::Star), _); | |
plus = (&rs::Token::BinOp(rs::BinOpToken::Plus), _); | |
pipe = (&rs::Token::BinOp(rs::BinOpToken::Or), _); | |
ident_tok_with_span = (&rs::Token::Ident(_), _); | |
l_brace = (&rs::Token::OpenDelim(rs::DelimToken::Brace), _); | |
r_brace = (&rs::Token::CloseDelim(rs::DelimToken::Brace), _); | |
l_bracket = (&rs::Token::OpenDelim(rs::DelimToken::Bracket), _); | |
r_bracket = (&rs::Token::CloseDelim(rs::DelimToken::Bracket), _); | |
l_paren = (&rs::Token::OpenDelim(rs::DelimToken::Paren), _); | |
r_paren = (&rs::Token::CloseDelim(rs::DelimToken::Paren), _); | |
string = (&rs::Token::Literal(rs::token::Str_(_), _), _); | |
sub = (&rs::Token::Ident(ident), _) if (ident.name.as_str() == "sub"); | |
any_token = | |
~&rs::Token::OpenDelim(_) && | |
~&rs::Token::CloseDelim(_); | |
} | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fn main() { | |
let grammar = grammar! { | |
start ::= foo regexp("[a-zA-z]"i); | |
foo ::= pomsky("'xyz' 'foobar'") ~ regexp(r#"["'].*["']"#); | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment