quat1024 · September 10, 2023 05:47
diff --git a/ParserExploration2.java b/ParserExploration2.java
 package agency.highlysuspect.parsefunny;

 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;

 import java.util.ArrayDeque;
 import java.util.Deque;
 import java.util.stream.Collectors;

 //This is sort of a synthesis of the parsers described in these two articles:
 //
 //matklad - "Simple but Powerful Pratt Parsing"
 //  https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
 //  Very featureful Pratt parser that can recognize prefix, postfix, infix, and "around"fix operators.
 //  Uses a somewhat delicate numeric precedence scheme. Implemented in Rust.
 //  *He also has this nice article https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
 //
 //Jamie Brandon (scattered-thoughts) - "Better operator precedence"
 //  https://www.scattered-thoughts.net/writing/better-operator-precedence/
 //  Introduces a "comparePrecedence" function instead of using numeric precedence.
 //  But the given implementation (in Zig) doesn't cover anything other than infix operators.
 //
 //Wanted the best of both. I think I got it.
 //
 //There's also this nice article about pratt parsing by the Crafting Interpreters developer:
 //Bob Nystrom - Pratt Parsers: Expression Parsing Made Easy
 //  https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
 //
 //But it gets a little bogged down in all the Java, so much so that someone wrote a "pratt parsing
 //without virtual dispatch or Java" article about it (http://www.oilshell.org/blog/2016/11/03.html),
 //and I wanted to redeem Java a bit :) So here's a complete lexer and parser in ~200 lines of commented java 17.

 public class ParserExploration2 {
 	static void check(boolean arg, String msg) {
 		if(!arg) throw new IllegalStateException(msg);
 	}
 	
 	/// Lexing ///
 	
 	enum TokenType {
 		EOF,  //token after the end of the file (very common technique to avoid Optional<Token> everywhere)
 		ATOM, //stand-in for a "number" or "name literal" token you might have in a real parser
 		STAR, SLASH, SQUIGGLE, PLUS, MINUS, DOT, BANG, PERCENT, //grab bag of operators
 		LPAREN, RPAREN, //parenthesis
 	}
 	
 	record Token(TokenType type, String display) {
 		@Override public String toString() { return display; }
 	}
 	
 	static class Lexer {
 		//Toy-quality lexer ported from matklad's post, lexes 1-character long operations and literals only.
 		//Don't get bogged down with this, it's not the important part.
 		Lexer(String in) {
 			this.tokens = in.chars()
 				.filter(i -> !Character.isWhitespace(i))
 				.mapToObj(i -> switch(i) {
 					case '*' -> new Token(TokenType.STAR, "*");
 					case '/' -> new Token(TokenType.SLASH, "/");
 					case '~' -> new Token(TokenType.SQUIGGLE, "~");
 					case '+' -> new Token(TokenType.PLUS, "+");
 					case '-' -> new Token(TokenType.MINUS, "-");
 					case '.' -> new Token(TokenType.DOT, ".");
 					case '!' -> new Token(TokenType.BANG, "!");
 					case '%' -> new Token(TokenType.PERCENT, "%");
 					case '(' -> new Token(TokenType.LPAREN, "(");
 					case ')' -> new Token(TokenType.RPAREN, ")");
 					default -> new Token(TokenType.ATOM, Character.toString(i));
 				}).collect(Collectors.toCollection(ArrayDeque::new));
 		}
 		
 		//A "peeking iterator" API over the lexemes. Note that there's no way to see two tokens ahead.
 		final Deque<Token> tokens;
 		
 		Token peek() {
 			return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.peekFirst();
 		}
 		
 		Token pop() {
 			return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.pollFirst();
 		}
 	}
 	
 	/// AST sketch. Renders toString as an S-expression. ///
 	
 	interface Expr {}
 	record Atom(String s) implements Expr {
 		@Override public String toString() { return s; }
 	}
 	record UnaryOp(OperationType op, Expr operand) implements Expr {
 		@Override public String toString() { return "(%s %s)".formatted(op, operand); }
 	}
 	record BinaryOp(OperationType op, Expr left, Expr right) implements Expr {
 		@Override public String toString() { return "(%s %s %s)".formatted(op, left, right); }
 	}
 	
 	/// Parsing ///
 	
 	//Entrypoint for the recursive expression parser. In a real parsing application, this would be one of many little functions.
 	Expr expr(Lexer lex) {
 		return pratt(lex, null); //`null` means "there is no operation to my left to consider binding to".
 	}
 	
 	//Pratt parser bullshit magic:
 	Expr pratt(Lexer lex, @Nullable OperationType leftOp) {
 		Token lhsToken = lex.pop();
 		Expr lhs = switch(lhsToken.type) {
 			case ATOM -> new Atom(lhsToken.toString());
 			case LPAREN -> {
 				//When crossing into parenthesis, revert to the null operation type.
 				//Nothing should be able to bind leftwards *through* an opening paren.
 				Expr e = pratt(lex, null);
 				check(lex.pop().type == TokenType.RPAREN, "expected closing parenthesis");
 				yield e;
 			}
 			default -> {
 				//is it prefix?
 				OperationType asPrefix = OperationType.prefixOpFrom(lhsToken);
 				check(asPrefix != null, lhsToken + " is not a prefix operation!");
 				//we already consumed the prefix operator's token
 				Expr rhs = pratt(lex, asPrefix); //recurse to find the right-hand side of the expression
 				yield new UnaryOp(asPrefix, rhs); //apply the prefix operator to the *next* operand
 				//look for another infix or postfix operation
 			}
 		};
 		
 		while(true) {
 			Token opToken = lex.peek();
 			//(Special casing for EOF, RParen, etc is not actually needed; xxxOpFrom returning `null` takes care of it.)
 			
 			//is it infix?
 			OperationType infixOp = OperationType.infixOpFrom(opToken);
 			if(infixOp != null && bindsToRight(leftOp, infixOp)) {
 				lex.pop(); //consume the infix operator's token
 				Expr rhs = pratt(lex, infixOp); //recurse to find the right-hand side of the expression (!HARD to understand!)
 				lhs = new BinaryOp(infixOp, lhs, rhs); //apply the infix operator to *both* operands
 				continue; //look for another infix or postfix operation
 			}
 			
 			//is it postfix?
 			OperationType postfixOp = OperationType.postfixOpFrom(opToken);
 			if(postfixOp != null && bindsToRight(leftOp, postfixOp)) {
 				lex.pop(); //consume the postfix operator's token
 				lhs = new UnaryOp(postfixOp, lhs); //apply the postfix operator to the *previous* operand
 				continue; //look for another infix or postfix operation
 			}
 			
 			return lhs;
 		}
 	}
 	
 	//Whether the operation on the right is "stronger" than the one on the left.
 	static boolean bindsToRight(@Nullable OperationType left, @NotNull OperationType right) {
 		if(left == null) return true; //There isn't an operator on the left to even consider binding to!
 		
 		//Key observation: this is the only place precedence and rightAssociative are read.
 		//The precedence implementation can be swapped out for anything (such as a handwritten table),
 		//if manual precedence-number fiddling isn't up your alley. Exceptions for specific pairs of
 		//operators can also be added to this method.
 		if(left.precedence == right.precedence) return left.rightAssociative; //break ties like this
 		else return right.precedence > left.precedence;
 	}
 	
 	//A token corresponds to zero or more "logical operations" depending on the token's fixity.
 	//For example, the - token can be unary minus or subtraction, and unary minus binds tighter than
 	//multiplication, but multiplication binds tighter than subtraction. In other words, tokens don't
 	//really "have precedence", but (token, fixity) tuples do. The typical way of modelling this is
 	//maintaining separate precedence tables: one for prefix operators, one for infix operators, and
 	//one for postfix operators. This is a little fiddly, so in the spirit of reducing the amount of
 	//"magic numbers" in pratt parsing, I tried mapping each operator and fixity into a *single*
 	//OperationType table, giving a name to the operation at the same time (so I can say "factorial"
 	//instead of "postfix bang"). Maybe it makes more sense this way? It's probably a little easier to
 	//reason about when it comes time to add more operations? I think it's just as flexible? Idk.
 	enum OperationType {
 		ADD           (20, false, "+"),
 		SUBTRACT      (20, false, "-"),
 		SQUIGGLE      (30, false, "~"), //Arbitrary binary operator with precedence between + and *
 		PERCENT       (30,  true, "%"), //Same, but right-associative
 		MULTIPLY      (40, false, "*"),
 		DIVIDE        (40, false, "/"),
 		UNARY_PLUS    (50, false, "+"),
 		UNARY_MINUS   (50, false, "-"),
 		FACTORIAL     (60, false, "!"), //Postfix
 		UNARY_SQUIGGLE(70, false, "~"),
 		COMPOSITION   (100, true, "."); //Right-associative
 		
 		OperationType(int precedence, boolean rightAssociative, String display) {
 			this.precedence = precedence;
 			this.rightAssociative = rightAssociative;
 			this.display = display;
 		}
 		
 		final int precedence;
 		final boolean rightAssociative;
 		final String display;
 		@Override public String toString() { return this.display; }
 		
 		//Tables for converting tokens into logical operations, depending on the token's fixity.
 		//Note that if a token has infix *and* postfix forms, the main parser loop will just pick one.
 		//I think correctly disambiguating cases like that requires backtracking or infinite lookahead.
 		//Prefix and infix is okay though.
 		static @Nullable OperationType prefixOpFrom(Token tok) {
 			return switch(tok.type) {
 				case PLUS -> OperationType.UNARY_PLUS;
 				case MINUS -> OperationType.UNARY_MINUS;
 				case SQUIGGLE -> OperationType.UNARY_SQUIGGLE;
 				default -> null;
 			};
 		}
 		
 		static @Nullable OperationType infixOpFrom(Token tok) {
 			return switch(tok.type) {
 				case PLUS -> OperationType.ADD;
 				case MINUS -> OperationType.SUBTRACT;
 				case SQUIGGLE -> OperationType.SQUIGGLE;
 				case PERCENT -> OperationType.PERCENT;
 				case STAR -> OperationType.MULTIPLY;
 				case SLASH -> OperationType.DIVIDE;
 				case DOT -> OperationType.COMPOSITION;
 				default -> null;
 			};
 		}
 		
 		static @Nullable OperationType postfixOpFrom(Token tok) {
 			return switch(tok.type) {
 				case BANG -> OperationType.FACTORIAL;
 				default -> null;
 			};
 		}
 	}
 	
 	/// demonstration ///
 	
 	void checkParse(String in, String expect) {
 		String out = expr(new Lexer(in)).toString();
 		System.out.println("in:     " + in);
 		System.out.println("out:    " + out);
 		System.out.println("expect: " + expect);
 		check(out.equals(expect), "Expected " + expect + " but found " + out);
 	}
 	
 	void go() {
 		//left-associative operators
 		checkParse("1 + 2 + 3 + 4", "(+ (+ (+ 1 2) 3) 4)");
 		checkParse("1 ~ 2 ~ 3 ~ 4", "(~ (~ (~ 1 2) 3) 4)");
 		checkParse("1 * 2 * 3 * 4", "(* (* (* 1 2) 3) 4)");
 		checkParse("1 + 2 - 3 + 4", "(+ (- (+ 1 2) 3) 4)"); //operators with the same priority
 		checkParse("1 * 2 / 3 * 4", "(* (/ (* 1 2) 3) 4)");
 		
 		//right-associative operators
 		checkParse("1 . 2 . 3 . 4", "(. 1 (. 2 (. 3 4)))");
 		checkParse("1 % 2 % 3 % 4", "(% 1 (% 2 (% 3 4)))");
 		
 		//mixed associativity among operators with the same precedence
 		checkParse("1 ~ 2 ~ 3 % 4", "(% (~ (~ 1 2) 3) 4)");
 		checkParse("1 ~ 2 % 3 % 4", "(% (~ 1 2) (% 3 4))");
 		checkParse("1 % 2 ~ 3 ~ 4", "(% 1 (~ (~ 2 3) 4))");
 		checkParse("1 % 2 % 3 ~ 4", "(% 1 (% 2 (~ 3 4)))");
 		
 		//operator precedence
 		checkParse("1 + 2 ~ 3", "(+ 1 (~ 2 3))"); //plus is weaker than squiggle
 		checkParse("1 * 2 ~ 3", "(~ (* 1 2) 3)"); //star is stronger than squiggle
 		checkParse("1 + 2 * 3 + 4", "(+ (+ 1 (* 2 3)) 4)");
 		checkParse("1 * 2 + 3 * 4", "(+ (* 1 2) (* 3 4))");
 		
 		//prefix operators
 		checkParse("-1", "(- 1)");
 		checkParse("- - 1", "(- (- 1))");
 		checkParse("- + 1", "(- (+ 1))");
 		checkParse("- - 1 - 2", "(- (- (- 1)) 2)");
 		checkParse("- - 1 - - 2", "(- (- (- 1)) (- 2))");
 		checkParse("1--2", "(- 1 (- 2))");
 		checkParse("- 1 * - 2", "(* (- 1) (- 2))");
 		
 		//postfix operators
 		checkParse("5 !", "(! 5)");
 		checkParse("5 ! !", "(! (! 5))");
 		checkParse("- 5 !", "(- (! 5))"); //unary - is weaker than !
 		checkParse("~ 5 !", "(! (~ 5))"); //unary ~ is stronger than !
 		checkParse("---5!!!", "(- (- (- (! (! (! 5))))))");
 		checkParse("~~~5!!!", "(! (! (! (~ (~ (~ 5))))))");
 		checkParse("-~~5!!!", "(- (! (! (! (~ (~ 5))))))");
 		checkParse("~-~5!!!", "(~ (- (! (! (! (~ 5))))))");
 		checkParse("1 + 3!", "(+ 1 (! 3))");
 		checkParse("1 + 3 ! + 5", "(+ (+ 1 (! 3)) 5)");
 		
 		//parentheticals
 		checkParse("( 1 + 2 ) * 3", "(* (+ 1 2) 3)");
 		checkParse("1 + ( 2 * 3 )", "(+ 1 (* 2 3))");
 		checkParse("(1+2)*(3+4)", "(* (+ 1 2) (+ 3 4))");
 		checkParse("~(1)", "(~ 1)");
 		checkParse("~((((((1))))))", "(~ 1)");
 		checkParse("~(((~(((1))))))", "(~ (~ 1))");
 		checkParse("(4 + 5)!", "(! (+ 4 5))");
 		
 		//stupid viral math problems from the internet
 		checkParse("6 / 2 * (1 + 2)", "(* (/ 6 2) (+ 1 2))");
 		checkParse("2 + 2 * 4", "(+ 2 (* 2 4))");
 		checkParse("1 + 1 * 0 + 1", "(+ (+ 1 (* 1 0)) 1)");
 	}
 	
 	public static void main(String[] args) {
 		new ParserExploration2().go();
 	}
 }
diff --git a/ParserExploration2_InfixOnly.java b/ParserExploration2_InfixOnly.java
 package agency.highlysuspect.parsefunny;

 import java.util.ArrayDeque;
 import java.util.Deque;
 import java.util.stream.Collectors;

 //This shorter version strips away the pre/postfix parsing & parenthesis parsing, leaving only the
 //core of the algorithm. It's also implemented more traditionally, using a numeric "binding power".

 public class ParserExploration2_InfixOnly {
 	static void check(boolean arg, String msg) {
 		if(!arg) throw new IllegalStateException(msg);
 	}
 	
 	/// Lexing ///
 	
 	enum TokenType {
 		EOF,  //token after the end of the file (very common technique to avoid Optional<Token> everywhere)
 		ATOM, //stand-in for a "number" or "name literal" token you might have in a real parser
 		STAR, SLASH, SQUIGGLE, PLUS, MINUS, DOT, PERCENT, //grab bag of infix binary operators
 	}
 	
 	record Token(TokenType type, String display) {
 		@Override public String toString() { return display; }
 	}
 	
 	static class Lexer {
 		//Toy-quality lexer ported from matklad's post, lexes 1-character long operations and literals only.
 		//Don't get bogged down with this, it's not the important part.
 		Lexer(String in) {
 			this.tokens = in.chars()
 				.filter(i -> !Character.isWhitespace(i))
 				.mapToObj(i -> switch(i) {
 					case '*' -> new Token(TokenType.STAR, "*");
 					case '/' -> new Token(TokenType.SLASH, "/");
 					case '~' -> new Token(TokenType.SQUIGGLE, "~");
 					case '+' -> new Token(TokenType.PLUS, "+");
 					case '-' -> new Token(TokenType.MINUS, "-");
 					case '.' -> new Token(TokenType.DOT, ".");
 					case '%' -> new Token(TokenType.PERCENT, "%");
 					default -> new Token(TokenType.ATOM, Character.toString(i));
 				}).collect(Collectors.toCollection(ArrayDeque::new));
 		}
 		
 		//A "peeking iterator" API over the lexemes. Note that there's no way to see two tokens ahead.
 		final Deque<Token> tokens;
 		
 		Token peek() {
 			return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.peekFirst();
 		}
 		
 		Token pop() {
 			return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.pollFirst();
 		}
 	}
 	
 	/// AST sketch. Renders toString as an S-expression. ///
 	
 	interface Expr {}
 	record Atom(String s) implements Expr {
 		@Override public String toString() { return s; }
 	}
 	record BinaryOp(String op, Expr left, Expr right) implements Expr {
 		@Override public String toString() { return "(%s %s %s)".formatted(op, left, right); }
 	}
 	
 	/// Parsing ///
 	
 	static final int NOT_INFIX_OP = 0;
 	
 	Expr expr(Lexer lex) {
 		return pratt(lex, NOT_INFIX_OP); //"there is no operation to my left to consider binding to"
 	}
 	
 	Expr pratt(Lexer lex, int minBindingPower) {
 		Token lhsToken = lex.pop();
 		check(lhsToken.type == TokenType.ATOM, "expected atom");
 		Expr lhs = new Atom(lhsToken.toString());
 		
 		while(true) {
 			Token opToken = lex.peek();
 			if(leftPower(opToken) == NOT_INFIX_OP || leftPower(opToken) < minBindingPower) return lhs;
 			
 			lex.pop();
 			Expr rhs = pratt(lex, rightPower(opToken)); //recurse to find the right-hand side of the expression
 			lhs = new BinaryOp(opToken.toString(), lhs, rhs); //^ that is the HARD part to understand about this function
 		}
 	}
 	
 	static int leftPower(Token t) {
 		return switch(t.type) {
 			case PLUS, MINUS -> 20;
 			case SQUIGGLE, PERCENT -> 30;
 			case STAR, SLASH -> 40;
 			case DOT -> 100;
 			default -> NOT_INFIX_OP;
 		};
 	}
 	
 	static int rightPower(Token t) {
 		return switch(t.type) {
 			case PLUS, MINUS, SQUIGGLE, STAR, SLASH -> leftPower(t) + 1;
 			case DOT, PERCENT -> leftPower(t) - 1;
 			default -> NOT_INFIX_OP;
 		};
 	}
 	
 	/// demonstration ///
 	
 	void checkParse(String in, String expect) {
 		String out = expr(new Lexer(in)).toString();
 		System.out.println("in:     " + in);
 		System.out.println("out:    " + out);
 		System.out.println("expect: " + expect);
 		check(out.equals(expect), "Expected " + expect + " but found " + out);
 	}
 	
 	void go() {
 		//left-associative operators
 		checkParse("1 + 2 + 3 + 4", "(+ (+ (+ 1 2) 3) 4)");
 		checkParse("1 ~ 2 ~ 3 ~ 4", "(~ (~ (~ 1 2) 3) 4)");
 		checkParse("1 * 2 * 3 * 4", "(* (* (* 1 2) 3) 4)");
 		checkParse("1 + 2 - 3 + 4", "(+ (- (+ 1 2) 3) 4)"); //operators with the same priority
 		checkParse("1 * 2 / 3 * 4", "(* (/ (* 1 2) 3) 4)");
 		
 		//right-associative operators
 		checkParse("1 . 2 . 3 . 4", "(. 1 (. 2 (. 3 4)))");
 		checkParse("1 % 2 % 3 % 4", "(% 1 (% 2 (% 3 4)))");
 		
 		//mixed associativity among operators with the same precedence
 		checkParse("1 ~ 2 ~ 3 % 4", "(% (~ (~ 1 2) 3) 4)");
 		checkParse("1 ~ 2 % 3 % 4", "(% (~ 1 2) (% 3 4))");
 		checkParse("1 % 2 ~ 3 ~ 4", "(% 1 (~ (~ 2 3) 4))");
 		checkParse("1 % 2 % 3 ~ 4", "(% 1 (% 2 (~ 3 4)))");
 		
 		//operator precedence
 		checkParse("1 + 2 ~ 3", "(+ 1 (~ 2 3))"); //plus is weaker than squiggle
 		checkParse("1 * 2 ~ 3", "(~ (* 1 2) 3)"); //star is stronger than squiggle
 		checkParse("1 + 2 * 3 + 4", "(+ (+ 1 (* 2 3)) 4)");
 		checkParse("1 * 2 + 3 * 4", "(+ (* 1 2) (* 3 4))");
 	}
 	
 	public static void main(String[] args) {
 		new ParserExploration2_InfixOnly().go();
 	}
 }
	package agency.highlysuspect.parsefunny;

	import org.jetbrains.annotations.NotNull;
	import org.jetbrains.annotations.Nullable;

	import java.util.ArrayDeque;
	import java.util.Deque;
	import java.util.stream.Collectors;

	//This is sort of a synthesis of the parsers described in these two articles:
	//
	//matklad - "Simple but Powerful Pratt Parsing"
	// https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
	// Very featureful Pratt parser that can recognize prefix, postfix, infix, and "around"fix operators.
	// Uses a somewhat delicate numeric precedence scheme. Implemented in Rust.
	// *He also has this nice article https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
	//
	//Jamie Brandon (scattered-thoughts) - "Better operator precedence"
	// https://www.scattered-thoughts.net/writing/better-operator-precedence/
	// Introduces a "comparePrecedence" function instead of using numeric precedence.
	// But the given implementation (in Zig) doesn't cover anything other than infix operators.
	//
	//Wanted the best of both. I think I got it.
	//
	//There's also this nice article about pratt parsing by the Crafting Interpreters developer:
	//Bob Nystrom - Pratt Parsers: Expression Parsing Made Easy
	// https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
	//
	//But it gets a little bogged down in all the Java, so much so that someone wrote a "pratt parsing
	//without virtual dispatch or Java" article about it (http://www.oilshell.org/blog/2016/11/03.html),
	//and I wanted to redeem Java a bit :) So here's a complete lexer and parser in ~200 lines of commented java 17.

	public class ParserExploration2 {
	static void check(boolean arg, String msg) {
	if(!arg) throw new IllegalStateException(msg);
	}

	/// Lexing ///

	enum TokenType {
	EOF, //token after the end of the file (very common technique to avoid Optional<Token> everywhere)
	ATOM, //stand-in for a "number" or "name literal" token you might have in a real parser
	STAR, SLASH, SQUIGGLE, PLUS, MINUS, DOT, BANG, PERCENT, //grab bag of operators
	LPAREN, RPAREN, //parenthesis
	}

	record Token(TokenType type, String display) {
	@Override public String toString() { return display; }
	}

	static class Lexer {
	//Toy-quality lexer ported from matklad's post, lexes 1-character long operations and literals only.
	//Don't get bogged down with this, it's not the important part.
	Lexer(String in) {
	this.tokens = in.chars()
	.filter(i -> !Character.isWhitespace(i))
	.mapToObj(i -> switch(i) {
	case '' -> new Token(TokenType.STAR, "");
	case '/' -> new Token(TokenType.SLASH, "/");
	case '~' -> new Token(TokenType.SQUIGGLE, "~");
	case '+' -> new Token(TokenType.PLUS, "+");
	case '-' -> new Token(TokenType.MINUS, "-");
	case '.' -> new Token(TokenType.DOT, ".");
	case '!' -> new Token(TokenType.BANG, "!");
	case '%' -> new Token(TokenType.PERCENT, "%");
	case '(' -> new Token(TokenType.LPAREN, "(");
	case ')' -> new Token(TokenType.RPAREN, ")");
	default -> new Token(TokenType.ATOM, Character.toString(i));
	}).collect(Collectors.toCollection(ArrayDeque::new));
	}

	//A "peeking iterator" API over the lexemes. Note that there's no way to see two tokens ahead.
	final Deque<Token> tokens;

	Token peek() {
	return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.peekFirst();
	}

	Token pop() {
	return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.pollFirst();
	}
	}

	/// AST sketch. Renders toString as an S-expression. ///

	interface Expr {}
	record Atom(String s) implements Expr {
	@Override public String toString() { return s; }
	}
	record UnaryOp(OperationType op, Expr operand) implements Expr {
	@Override public String toString() { return "(%s %s)".formatted(op, operand); }
	}
	record BinaryOp(OperationType op, Expr left, Expr right) implements Expr {
	@Override public String toString() { return "(%s %s %s)".formatted(op, left, right); }
	}

	/// Parsing ///

	//Entrypoint for the recursive expression parser. In a real parsing application, this would be one of many little functions.
	Expr expr(Lexer lex) {
	return pratt(lex, null); //`null` means "there is no operation to my left to consider binding to".
	}

	//Pratt parser bullshit magic:
	Expr pratt(Lexer lex, @Nullable OperationType leftOp) {
	Token lhsToken = lex.pop();
	Expr lhs = switch(lhsToken.type) {
	case ATOM -> new Atom(lhsToken.toString());
	case LPAREN -> {
	//When crossing into parenthesis, revert to the null operation type.
	//Nothing should be able to bind leftwards through an opening paren.
	Expr e = pratt(lex, null);
	check(lex.pop().type == TokenType.RPAREN, "expected closing parenthesis");
	yield e;
	}
	default -> {
	//is it prefix?
	OperationType asPrefix = OperationType.prefixOpFrom(lhsToken);
	check(asPrefix != null, lhsToken + " is not a prefix operation!");
	//we already consumed the prefix operator's token
	Expr rhs = pratt(lex, asPrefix); //recurse to find the right-hand side of the expression
	yield new UnaryOp(asPrefix, rhs); //apply the prefix operator to the next operand
	//look for another infix or postfix operation
	}
	};

	while(true) {
	Token opToken = lex.peek();
	//(Special casing for EOF, RParen, etc is not actually needed; xxxOpFrom returning `null` takes care of it.)

	//is it infix?
	OperationType infixOp = OperationType.infixOpFrom(opToken);
	if(infixOp != null && bindsToRight(leftOp, infixOp)) {
	lex.pop(); //consume the infix operator's token
	Expr rhs = pratt(lex, infixOp); //recurse to find the right-hand side of the expression (!HARD to understand!)
	lhs = new BinaryOp(infixOp, lhs, rhs); //apply the infix operator to both operands
	continue; //look for another infix or postfix operation
	}

	//is it postfix?
	OperationType postfixOp = OperationType.postfixOpFrom(opToken);
	if(postfixOp != null && bindsToRight(leftOp, postfixOp)) {
	lex.pop(); //consume the postfix operator's token
	lhs = new UnaryOp(postfixOp, lhs); //apply the postfix operator to the previous operand
	continue; //look for another infix or postfix operation
	}

	return lhs;
	}
	}

	//Whether the operation on the right is "stronger" than the one on the left.
	static boolean bindsToRight(@Nullable OperationType left, @NotNull OperationType right) {
	if(left == null) return true; //There isn't an operator on the left to even consider binding to!

	//Key observation: this is the only place precedence and rightAssociative are read.
	//The precedence implementation can be swapped out for anything (such as a handwritten table),
	//if manual precedence-number fiddling isn't up your alley. Exceptions for specific pairs of
	//operators can also be added to this method.
	if(left.precedence == right.precedence) return left.rightAssociative; //break ties like this
	else return right.precedence > left.precedence;
	}

	//A token corresponds to zero or more "logical operations" depending on the token's fixity.
	//For example, the - token can be unary minus or subtraction, and unary minus binds tighter than
	//multiplication, but multiplication binds tighter than subtraction. In other words, tokens don't
	//really "have precedence", but (token, fixity) tuples do. The typical way of modelling this is
	//maintaining separate precedence tables: one for prefix operators, one for infix operators, and
	//one for postfix operators. This is a little fiddly, so in the spirit of reducing the amount of
	//"magic numbers" in pratt parsing, I tried mapping each operator and fixity into a single
	//OperationType table, giving a name to the operation at the same time (so I can say "factorial"
	//instead of "postfix bang"). Maybe it makes more sense this way? It's probably a little easier to
	//reason about when it comes time to add more operations? I think it's just as flexible? Idk.
	enum OperationType {
	ADD (20, false, "+"),
	SUBTRACT (20, false, "-"),
	SQUIGGLE (30, false, "~"), //Arbitrary binary operator with precedence between + and *
	PERCENT (30, true, "%"), //Same, but right-associative
	MULTIPLY (40, false, "*"),
	DIVIDE (40, false, "/"),
	UNARY_PLUS (50, false, "+"),
	UNARY_MINUS (50, false, "-"),
	FACTORIAL (60, false, "!"), //Postfix
	UNARY_SQUIGGLE(70, false, "~"),
	COMPOSITION (100, true, "."); //Right-associative

	OperationType(int precedence, boolean rightAssociative, String display) {
	this.precedence = precedence;
	this.rightAssociative = rightAssociative;
	this.display = display;
	}

	final int precedence;
	final boolean rightAssociative;
	final String display;
	@Override public String toString() { return this.display; }

	//Tables for converting tokens into logical operations, depending on the token's fixity.
	//Note that if a token has infix and postfix forms, the main parser loop will just pick one.
	//I think correctly disambiguating cases like that requires backtracking or infinite lookahead.
	//Prefix and infix is okay though.
	static @Nullable OperationType prefixOpFrom(Token tok) {
	return switch(tok.type) {
	case PLUS -> OperationType.UNARY_PLUS;
	case MINUS -> OperationType.UNARY_MINUS;
	case SQUIGGLE -> OperationType.UNARY_SQUIGGLE;
	default -> null;
	};
	}

	static @Nullable OperationType infixOpFrom(Token tok) {
	return switch(tok.type) {
	case PLUS -> OperationType.ADD;
	case MINUS -> OperationType.SUBTRACT;
	case SQUIGGLE -> OperationType.SQUIGGLE;
	case PERCENT -> OperationType.PERCENT;
	case STAR -> OperationType.MULTIPLY;
	case SLASH -> OperationType.DIVIDE;
	case DOT -> OperationType.COMPOSITION;
	default -> null;
	};
	}

	static @Nullable OperationType postfixOpFrom(Token tok) {
	return switch(tok.type) {
	case BANG -> OperationType.FACTORIAL;
	default -> null;
	};
	}
	}

	/// demonstration ///

	void checkParse(String in, String expect) {
	String out = expr(new Lexer(in)).toString();
	System.out.println("in: " + in);
	System.out.println("out: " + out);
	System.out.println("expect: " + expect);
	check(out.equals(expect), "Expected " + expect + " but found " + out);
	}

	void go() {
	//left-associative operators
	checkParse("1 + 2 + 3 + 4", "(+ (+ (+ 1 2) 3) 4)");
	checkParse("1 ~ 2 ~ 3 ~ 4", "(~ (~ (~ 1 2) 3) 4)");
	checkParse("1 * 2 * 3 * 4", "(* (* (* 1 2) 3) 4)");
	checkParse("1 + 2 - 3 + 4", "(+ (- (+ 1 2) 3) 4)"); //operators with the same priority
	checkParse("1 * 2 / 3 * 4", "(* (/ (* 1 2) 3) 4)");

	//right-associative operators
	checkParse("1 . 2 . 3 . 4", "(. 1 (. 2 (. 3 4)))");
	checkParse("1 % 2 % 3 % 4", "(% 1 (% 2 (% 3 4)))");

	//mixed associativity among operators with the same precedence
	checkParse("1 ~ 2 ~ 3 % 4", "(% (~ (~ 1 2) 3) 4)");
	checkParse("1 ~ 2 % 3 % 4", "(% (~ 1 2) (% 3 4))");
	checkParse("1 % 2 ~ 3 ~ 4", "(% 1 (~ (~ 2 3) 4))");
	checkParse("1 % 2 % 3 ~ 4", "(% 1 (% 2 (~ 3 4)))");

	//operator precedence
	checkParse("1 + 2 ~ 3", "(+ 1 (~ 2 3))"); //plus is weaker than squiggle
	checkParse("1 * 2 ~ 3", "(~ (* 1 2) 3)"); //star is stronger than squiggle
	checkParse("1 + 2 * 3 + 4", "(+ (+ 1 (* 2 3)) 4)");
	checkParse("1 * 2 + 3 * 4", "(+ (* 1 2) (* 3 4))");

	//prefix operators
	checkParse("-1", "(- 1)");
	checkParse("- - 1", "(- (- 1))");
	checkParse("- + 1", "(- (+ 1))");
	checkParse("- - 1 - 2", "(- (- (- 1)) 2)");
	checkParse("- - 1 - - 2", "(- (- (- 1)) (- 2))");
	checkParse("1--2", "(- 1 (- 2))");
	checkParse("- 1 * - 2", "(* (- 1) (- 2))");

	//postfix operators
	checkParse("5 !", "(! 5)");
	checkParse("5 ! !", "(! (! 5))");
	checkParse("- 5 !", "(- (! 5))"); //unary - is weaker than !
	checkParse("~ 5 !", "(! (~ 5))"); //unary ~ is stronger than !
	checkParse("---5!!!", "(- (- (- (! (! (! 5))))))");
	checkParse("~~~5!!!", "(! (! (! (~ (~ (~ 5))))))");
	checkParse("-~~5!!!", "(- (! (! (! (~ (~ 5))))))");
	checkParse("~-~5!!!", "(~ (- (! (! (! (~ 5))))))");
	checkParse("1 + 3!", "(+ 1 (! 3))");
	checkParse("1 + 3 ! + 5", "(+ (+ 1 (! 3)) 5)");

	//parentheticals
	checkParse("( 1 + 2 ) * 3", "(* (+ 1 2) 3)");
	checkParse("1 + ( 2 * 3 )", "(+ 1 (* 2 3))");
	checkParse("(1+2)(3+4)", "( (+ 1 2) (+ 3 4))");
	checkParse("~(1)", "(~ 1)");
	checkParse("~((((((1))))))", "(~ 1)");
	checkParse("~(((~(((1))))))", "(~ (~ 1))");
	checkParse("(4 + 5)!", "(! (+ 4 5))");

	//stupid viral math problems from the internet
	checkParse("6 / 2 * (1 + 2)", "(* (/ 6 2) (+ 1 2))");
	checkParse("2 + 2 * 4", "(+ 2 (* 2 4))");
	checkParse("1 + 1 * 0 + 1", "(+ (+ 1 (* 1 0)) 1)");
	}

	public static void main(String[] args) {
	new ParserExploration2().go();
	}
	}