d0rc · September 25, 2024 02:08
diff --git a/php.go b/php.go
 package main

 import (
 	"fmt"
 	"strconv"
 )

 // TokenType represents the type of a token
 type TokenType int

 const (
 	TOKEN_EOF TokenType = iota
 	TOKEN_IDENTIFIER
 	TOKEN_STRING
 	TOKEN_NUMBER
 	TOKEN_EQUALS
 	TOKEN_SEMICOLON
 	TOKEN_DOLLAR
 	TOKEN_LPAREN
 	TOKEN_RPAREN
 	TOKEN_COMMA
 	TOKEN_ECHO
 	TOKEN_PHP_START
 	TOKEN_PHP_END
 	TOKEN_DOT
 )

 // Token represents a lexical token
 type Token struct {
 	Type    TokenType
 	Literal string
 }

 // Lexer performs lexical analysis
 type Lexer struct {
 	input        string
 	position     int
 	readPosition int
 	ch           byte
 }

 // NewLexer creates a new Lexer
 func NewLexer(input string) *Lexer {
 	l := &Lexer{input: input}
 	l.readChar()
 	return l
 }

 // readChar reads the next character
 func (l *Lexer) readChar() {
 	if l.readPosition >= len(l.input) {
 		l.ch = 0
 	} else {
 		l.ch = l.input[l.readPosition]
 	}
 	l.position = l.readPosition
 	l.readPosition++
 }

 // NextToken returns the next token
 func (l *Lexer) NextToken() Token {
 	var tok Token

 	l.skipWhitespace()

 	switch l.ch {
 	case '=':
 		tok = Token{Type: TOKEN_EQUALS, Literal: string(l.ch)}
 	case ';':
 		tok = Token{Type: TOKEN_SEMICOLON, Literal: string(l.ch)}
 	case '$':
 		tok = Token{Type: TOKEN_DOLLAR, Literal: string(l.ch)}
 	case '(':
 		tok = Token{Type: TOKEN_LPAREN, Literal: string(l.ch)}
 	case ')':
 		tok = Token{Type: TOKEN_RPAREN, Literal: string(l.ch)}
 	case ',':
 		tok = Token{Type: TOKEN_COMMA, Literal: string(l.ch)}
 	case '.':
 		tok = Token{Type: TOKEN_DOT, Literal: string(l.ch)}
 	case '"':
 		tok.Type = TOKEN_STRING
 		tok.Literal = l.readString()
 	case 0:
 		tok.Literal = ""
 		tok.Type = TOKEN_EOF
 	default:
 		if isLetter(l.ch) {
 			tok.Literal = l.readIdentifier()
 			tok.Type = l.lookupIdentifier(tok.Literal)
 			return tok
 		} else if isDigit(l.ch) {
 			tok.Type = TOKEN_NUMBER
 			tok.Literal = l.readNumber()
 			return tok
 		} else if l.ch == '<' && l.peekChar() == '?' {
 			l.readChar() // consume '<'
 			l.readChar() // consume '?'
 			if l.ch == 'p' && l.peekChar() == 'h' {
 				l.readChar() // consume 'p'
 				l.readChar() // consume 'h'
 				l.readChar() // consume 'p'
 				tok = Token{Type: TOKEN_PHP_START, Literal: "<?php"}
 			}
 		} else if l.ch == '?' && l.peekChar() == '>' {
 			l.readChar() // consume '?'
 			l.readChar() // consume '>'
 			tok = Token{Type: TOKEN_PHP_END, Literal: "?>"}
 		} else {
 			tok = Token{Type: TOKEN_EOF, Literal: string(l.ch)}
 		}
 	}

 	l.readChar()
 	return tok
 }

 func (l *Lexer) skipWhitespace() {
 	for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
 		l.readChar()
 	}
 }

 func (l *Lexer) readIdentifier() string {
 	position := l.position
 	for isLetter(l.ch) || isDigit(l.ch) {
 		l.readChar()
 	}
 	return l.input[position:l.position]
 }

 func (l *Lexer) readNumber() string {
 	position := l.position
 	for isDigit(l.ch) {
 		l.readChar()
 	}
 	return l.input[position:l.position]
 }

 func (l *Lexer) readString() string {
 	position := l.position + 1
 	for {
 		l.readChar()
 		if l.ch == '"' || l.ch == 0 {
 			break
 		}
 	}
 	return l.input[position:l.position]
 }

 func (l *Lexer) peekChar() byte {
 	if l.readPosition >= len(l.input) {
 		return 0
 	}
 	return l.input[l.readPosition]
 }

 func (l *Lexer) lookupIdentifier(ident string) TokenType {
 	switch ident {
 	case "echo":
 		return TOKEN_ECHO
 	default:
 		return TOKEN_IDENTIFIER
 	}
 }

 func isLetter(ch byte) bool {
 	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
 }

 func isDigit(ch byte) bool {
 	return '0' <= ch && ch <= '9'
 }

 // Node represents a node in the AST
 type Node interface {
 	TokenLiteral() string
 }

 // Statement represents a statement node
 type Statement interface {
 	Node
 	statementNode()
 }

 // Expression represents an expression node
 type Expression interface {
 	Node
 	expressionNode()
 }

 // Program represents the root node of the AST
 type Program struct {
 	Statements []Statement
 }

 func (p *Program) TokenLiteral() string {
 	if len(p.Statements) > 0 {
 		return p.Statements[0].TokenLiteral()
 	}
 	return ""
 }

 // ExpressionStatement represents an expression statement
 type ExpressionStatement struct {
 	Token      Token
 	Expression Expression
 }

 func (es *ExpressionStatement) statementNode()       {}
 func (es *ExpressionStatement) TokenLiteral() string { return es.Token.Literal }

 // AssignmentStatement represents an assignment statement
 type AssignmentStatement struct {
 	Token Token // The '=' token
 	Name  *Identifier
 	Value Expression
 }

 func (as *AssignmentStatement) statementNode()       {}
 func (as *AssignmentStatement) TokenLiteral() string { return as.Token.Literal }

 // EchoStatement represents an echo statement
 type EchoStatement struct {
 	Token Token
 	Value Expression
 }

 func (es *EchoStatement) statementNode()       {}
 func (es *EchoStatement) TokenLiteral() string { return es.Token.Literal }

 // Identifier represents an identifier
 type Identifier struct {
 	Token Token
 	Value string
 }

 func (i *Identifier) expressionNode()      {}
 func (i *Identifier) TokenLiteral() string { return i.Token.Literal }

 // StringLiteral represents a string literal
 type StringLiteral struct {
 	Token Token
 	Value string
 }

 func (sl *StringLiteral) expressionNode()      {}
 func (sl *StringLiteral) TokenLiteral() string { return sl.Token.Literal }

 // NumberLiteral represents a number literal
 type NumberLiteral struct {
 	Token Token
 	Value float64
 }

 func (nl *NumberLiteral) expressionNode()      {}
 func (nl *NumberLiteral) TokenLiteral() string { return nl.Token.Literal }

 // CallExpression represents a function call
 type CallExpression struct {
 	Token     Token
 	Function  Expression
 	Arguments []Expression
 }

 func (ce *CallExpression) expressionNode()      {}
 func (ce *CallExpression) TokenLiteral() string { return ce.Token.Literal }

 // Parser parses tokens into an AST
 type Parser struct {
 	l         *Lexer
 	curToken  Token
 	peekToken Token
 	errors    []string
 }

 // NewParser creates a new Parser
 func NewParser(l *Lexer) *Parser {
 	p := &Parser{l: l, errors: []string{}}
 	p.nextToken()
 	p.nextToken()
 	return p
 }

 func (p *Parser) nextToken() {
 	p.curToken = p.peekToken
 	p.peekToken = p.l.NextToken()
 }

 func (p *Parser) ParseProgram() *Program {
 	program := &Program{}
 	program.Statements = []Statement{}

 	for p.curToken.Type != TOKEN_EOF {
 		if p.curToken.Type == TOKEN_PHP_START {
 			p.nextToken() // consume <?php
 			for p.curToken.Type != TOKEN_PHP_END && p.curToken.Type != TOKEN_EOF {
 				if p.curToken.Type == TOKEN_ECHO {
 					stmt := p.parseEchoStatement()
 					program.Statements = append(program.Statements, stmt)
 				} else if p.curToken.Type == TOKEN_DOLLAR {
 					stmt := p.parseAssignmentStatement()
 					program.Statements = append(program.Statements, stmt)
 				}
 				p.nextToken()
 			}
 		} else {
 			p.nextToken()
 		}
 	}

 	return program
 }

 func (p *Parser) parseStatement() Statement {
 	switch p.curToken.Type {
 	case TOKEN_ECHO:
 		return p.parseEchoStatement()
 	case TOKEN_DOLLAR:
 		return p.parseAssignmentStatement()
 	case TOKEN_STRING:
 		return p.parseEchoStatement()
 	default:
 		return p.parseExpressionStatement()
 	}
 }

 func (p *Parser) parseEchoStatement() *EchoStatement {
 	stmt := &EchoStatement{Token: p.curToken}
 	p.nextToken()
 	stmt.Value = p.parseExpression()
 	for p.peekToken.Type == TOKEN_DOT {
 		p.nextToken() // consume '.'
 		p.nextToken() // consume next token
 		stmt.Value = &CallExpression{Token: Token{Type: TOKEN_IDENTIFIER, Literal: "."}, Function: stmt.Value, Arguments: []Expression{p.parseExpression()}}
 	}
 	return stmt
 }

 func (p *Parser) parseAssignmentStatement() *AssignmentStatement {
 	stmt := &AssignmentStatement{Token: p.curToken}
 	p.nextToken() // consume '$'
 	stmt.Name = &Identifier{Token: p.curToken, Value: p.curToken.Literal}
 	p.nextToken() // consume identifier
 	if p.curToken.Type != TOKEN_EQUALS {
 		p.peekError(TOKEN_EQUALS)
 		return nil
 	}
 	p.nextToken() // consume '='
 	stmt.Value = p.parseExpression()
 	return stmt
 }

 func (p *Parser) parseExpressionStatement() *ExpressionStatement {
 	stmt := &ExpressionStatement{Token: p.curToken}
 	stmt.Expression = p.parseExpression()
 	return stmt
 }

 func (p *Parser) parseExpression() Expression {
 	switch p.curToken.Type {
 	case TOKEN_IDENTIFIER:
 		return &Identifier{Token: p.curToken, Value: p.curToken.Literal}
 	case TOKEN_STRING:
 		return &StringLiteral{Token: p.curToken, Value: p.curToken.Literal}
 	case TOKEN_NUMBER:
 		return p.parseNumberLiteral()
 	case TOKEN_DOLLAR:
 		return &Identifier{Token: p.curToken, Value: p.curToken.Literal}
 	default:
 		return nil
 	}
 }

 func (p *Parser) parseNumberLiteral() Expression {
 	lit := &NumberLiteral{Token: p.curToken}
 	value, err := strconv.ParseFloat(p.curToken.Literal, 64)
 	if err != nil {
 		msg := fmt.Sprintf("could not parse %q as float", p.curToken.Literal)
 		p.errors = append(p.errors, msg)
 		return nil
 	}
 	lit.Value = value
 	return lit
 }

 func (p *Parser) expectPeek(t TokenType) bool {
 	if p.peekToken.Type == t {
 		p.nextToken()
 		return true
 	}
 	p.peekError(t)
 	return false
 }

 func (p *Parser) peekError(t TokenType) {
 	msg := fmt.Sprintf("expected next token to be %s, got %s instead",
 		t, p.peekToken.Type)
 	p.errors = append(p.errors, msg)
 }

 // For testing and demonstration
 func main() {
 	input := `<?php
 $name = "World";
 echo "Hello, " . $name;
 $age = 30;
 echo "Age: " . $age;
 ?>`

 	l := NewLexer(input)
 	p := NewParser(l)
 	program := p.ParseProgram()

 	if len(p.errors) != 0 {
 		for _, err := range p.errors {
 			fmt.Println("parser error:", err)
 		}
 		return
 	}

 	fmt.Println("AST:")
 	for _, stmt := range program.Statements {
 		fmt.Printf("%T\n", stmt)
 	}
 }
	package main

	import (
	"fmt"
	"strconv"
	)

	// TokenType represents the type of a token
	type TokenType int

	const (
	TOKEN_EOF TokenType = iota
	TOKEN_IDENTIFIER
	TOKEN_STRING
	TOKEN_NUMBER
	TOKEN_EQUALS
	TOKEN_SEMICOLON
	TOKEN_DOLLAR
	TOKEN_LPAREN
	TOKEN_RPAREN
	TOKEN_COMMA
	TOKEN_ECHO
	TOKEN_PHP_START
	TOKEN_PHP_END
	TOKEN_DOT
	)

	// Token represents a lexical token
	type Token struct {
	Type TokenType
	Literal string
	}

	// Lexer performs lexical analysis
	type Lexer struct {
	input string
	position int
	readPosition int
	ch byte
	}

	// NewLexer creates a new Lexer
	func NewLexer(input string) *Lexer {
	l := &Lexer{input: input}
	l.readChar()
	return l
	}

	// readChar reads the next character
	func (l *Lexer) readChar() {
	if l.readPosition >= len(l.input) {
	l.ch = 0
	} else {
	l.ch = l.input[l.readPosition]
	}
	l.position = l.readPosition
	l.readPosition++
	}

	// NextToken returns the next token
	func (l *Lexer) NextToken() Token {
	var tok Token

	l.skipWhitespace()

	switch l.ch {
	case '=':
	tok = Token{Type: TOKEN_EQUALS, Literal: string(l.ch)}
	case ';':
	tok = Token{Type: TOKEN_SEMICOLON, Literal: string(l.ch)}
	case '$':
	tok = Token{Type: TOKEN_DOLLAR, Literal: string(l.ch)}
	case '(':
	tok = Token{Type: TOKEN_LPAREN, Literal: string(l.ch)}
	case ')':
	tok = Token{Type: TOKEN_RPAREN, Literal: string(l.ch)}
	case ',':
	tok = Token{Type: TOKEN_COMMA, Literal: string(l.ch)}
	case '.':
	tok = Token{Type: TOKEN_DOT, Literal: string(l.ch)}
	case '"':
	tok.Type = TOKEN_STRING
	tok.Literal = l.readString()
	case 0:
	tok.Literal = ""
	tok.Type = TOKEN_EOF
	default:
	if isLetter(l.ch) {
	tok.Literal = l.readIdentifier()
	tok.Type = l.lookupIdentifier(tok.Literal)
	return tok
	} else if isDigit(l.ch) {
	tok.Type = TOKEN_NUMBER
	tok.Literal = l.readNumber()
	return tok
	} else if l.ch == '<' && l.peekChar() == '?' {
	l.readChar() // consume '<'
	l.readChar() // consume '?'
	if l.ch == 'p' && l.peekChar() == 'h' {
	l.readChar() // consume 'p'
	l.readChar() // consume 'h'
	l.readChar() // consume 'p'
	tok = Token{Type: TOKEN_PHP_START, Literal: "<?php"}
	}
	} else if l.ch == '?' && l.peekChar() == '>' {
	l.readChar() // consume '?'
	l.readChar() // consume '>'
	tok = Token{Type: TOKEN_PHP_END, Literal: "?>"}
	} else {
	tok = Token{Type: TOKEN_EOF, Literal: string(l.ch)}
	}
	}

	l.readChar()
	return tok
	}

	func (l *Lexer) skipWhitespace() {
	for l.ch == ' ' \|\| l.ch == '\t' \|\| l.ch == '\n' \|\| l.ch == '\r' {
	l.readChar()
	}
	}

	func (l *Lexer) readIdentifier() string {
	position := l.position
	for isLetter(l.ch) \|\| isDigit(l.ch) {
	l.readChar()
	}
	return l.input[position:l.position]
	}

	func (l *Lexer) readNumber() string {
	position := l.position
	for isDigit(l.ch) {
	l.readChar()
	}
	return l.input[position:l.position]
	}

	func (l *Lexer) readString() string {
	position := l.position + 1
	for {
	l.readChar()
	if l.ch == '"' \|\| l.ch == 0 {
	break
	}
	}
	return l.input[position:l.position]
	}

	func (l *Lexer) peekChar() byte {
	if l.readPosition >= len(l.input) {
	return 0
	}
	return l.input[l.readPosition]
	}

	func (l *Lexer) lookupIdentifier(ident string) TokenType {
	switch ident {
	case "echo":
	return TOKEN_ECHO
	default:
	return TOKEN_IDENTIFIER
	}
	}

	func isLetter(ch byte) bool {
	return 'a' <= ch && ch <= 'z' \|\| 'A' <= ch && ch <= 'Z' \|\| ch == '_'
	}

	func isDigit(ch byte) bool {
	return '0' <= ch && ch <= '9'
	}

	// Node represents a node in the AST
	type Node interface {
	TokenLiteral() string
	}

	// Statement represents a statement node
	type Statement interface {
	Node
	statementNode()
	}

	// Expression represents an expression node
	type Expression interface {
	Node
	expressionNode()
	}

	// Program represents the root node of the AST
	type Program struct {
	Statements []Statement
	}

	func (p *Program) TokenLiteral() string {
	if len(p.Statements) > 0 {
	return p.Statements[0].TokenLiteral()
	}
	return ""
	}

	// ExpressionStatement represents an expression statement
	type ExpressionStatement struct {
	Token Token
	Expression Expression
	}

	func (es *ExpressionStatement) statementNode() {}
	func (es *ExpressionStatement) TokenLiteral() string { return es.Token.Literal }

	// AssignmentStatement represents an assignment statement
	type AssignmentStatement struct {
	Token Token // The '=' token
	Name *Identifier
	Value Expression
	}

	func (as *AssignmentStatement) statementNode() {}
	func (as *AssignmentStatement) TokenLiteral() string { return as.Token.Literal }

	// EchoStatement represents an echo statement
	type EchoStatement struct {
	Token Token
	Value Expression
	}

	func (es *EchoStatement) statementNode() {}
	func (es *EchoStatement) TokenLiteral() string { return es.Token.Literal }

	// Identifier represents an identifier
	type Identifier struct {
	Token Token
	Value string
	}

	func (i *Identifier) expressionNode() {}
	func (i *Identifier) TokenLiteral() string { return i.Token.Literal }

	// StringLiteral represents a string literal
	type StringLiteral struct {
	Token Token
	Value string
	}

	func (sl *StringLiteral) expressionNode() {}
	func (sl *StringLiteral) TokenLiteral() string { return sl.Token.Literal }

	// NumberLiteral represents a number literal
	type NumberLiteral struct {
	Token Token
	Value float64
	}

	func (nl *NumberLiteral) expressionNode() {}
	func (nl *NumberLiteral) TokenLiteral() string { return nl.Token.Literal }

	// CallExpression represents a function call
	type CallExpression struct {
	Token Token
	Function Expression
	Arguments []Expression
	}

	func (ce *CallExpression) expressionNode() {}
	func (ce *CallExpression) TokenLiteral() string { return ce.Token.Literal }

	// Parser parses tokens into an AST
	type Parser struct {
	l *Lexer
	curToken Token
	peekToken Token
	errors []string
	}

	// NewParser creates a new Parser
	func NewParser(l Lexer) Parser {
	p := &Parser{l: l, errors: []string{}}
	p.nextToken()
	p.nextToken()
	return p
	}

	func (p *Parser) nextToken() {
	p.curToken = p.peekToken
	p.peekToken = p.l.NextToken()
	}

	func (p Parser) ParseProgram() Program {
	program := &Program{}
	program.Statements = []Statement{}

	for p.curToken.Type != TOKEN_EOF {
	if p.curToken.Type == TOKEN_PHP_START {
	p.nextToken() // consume <?php
	for p.curToken.Type != TOKEN_PHP_END && p.curToken.Type != TOKEN_EOF {
	if p.curToken.Type == TOKEN_ECHO {
	stmt := p.parseEchoStatement()
	program.Statements = append(program.Statements, stmt)
	} else if p.curToken.Type == TOKEN_DOLLAR {
	stmt := p.parseAssignmentStatement()
	program.Statements = append(program.Statements, stmt)
	}
	p.nextToken()
	}
	} else {
	p.nextToken()
	}
	}

	return program
	}

	func (p *Parser) parseStatement() Statement {
	switch p.curToken.Type {
	case TOKEN_ECHO:
	return p.parseEchoStatement()
	case TOKEN_DOLLAR:
	return p.parseAssignmentStatement()
	case TOKEN_STRING:
	return p.parseEchoStatement()
	default:
	return p.parseExpressionStatement()
	}
	}

	func (p Parser) parseEchoStatement() EchoStatement {
	stmt := &EchoStatement{Token: p.curToken}
	p.nextToken()
	stmt.Value = p.parseExpression()
	for p.peekToken.Type == TOKEN_DOT {
	p.nextToken() // consume '.'
	p.nextToken() // consume next token
	stmt.Value = &CallExpression{Token: Token{Type: TOKEN_IDENTIFIER, Literal: "."}, Function: stmt.Value, Arguments: []Expression{p.parseExpression()}}
	}
	return stmt
	}

	func (p Parser) parseAssignmentStatement() AssignmentStatement {
	stmt := &AssignmentStatement{Token: p.curToken}
	p.nextToken() // consume '$'
	stmt.Name = &Identifier{Token: p.curToken, Value: p.curToken.Literal}
	p.nextToken() // consume identifier
	if p.curToken.Type != TOKEN_EQUALS {
	p.peekError(TOKEN_EQUALS)
	return nil
	}
	p.nextToken() // consume '='
	stmt.Value = p.parseExpression()
	return stmt
	}

	func (p Parser) parseExpressionStatement() ExpressionStatement {
	stmt := &ExpressionStatement{Token: p.curToken}
	stmt.Expression = p.parseExpression()
	return stmt
	}

	func (p *Parser) parseExpression() Expression {
	switch p.curToken.Type {
	case TOKEN_IDENTIFIER:
	return &Identifier{Token: p.curToken, Value: p.curToken.Literal}
	case TOKEN_STRING:
	return &StringLiteral{Token: p.curToken, Value: p.curToken.Literal}
	case TOKEN_NUMBER:
	return p.parseNumberLiteral()
	case TOKEN_DOLLAR:
	return &Identifier{Token: p.curToken, Value: p.curToken.Literal}
	default:
	return nil
	}
	}

	func (p *Parser) parseNumberLiteral() Expression {
	lit := &NumberLiteral{Token: p.curToken}
	value, err := strconv.ParseFloat(p.curToken.Literal, 64)
	if err != nil {
	msg := fmt.Sprintf("could not parse %q as float", p.curToken.Literal)
	p.errors = append(p.errors, msg)
	return nil
	}
	lit.Value = value
	return lit
	}

	func (p *Parser) expectPeek(t TokenType) bool {
	if p.peekToken.Type == t {
	p.nextToken()
	return true
	}
	p.peekError(t)
	return false
	}

	func (p *Parser) peekError(t TokenType) {
	msg := fmt.Sprintf("expected next token to be %s, got %s instead",
	t, p.peekToken.Type)
	p.errors = append(p.errors, msg)
	}

	// For testing and demonstration
	func main() {
	input := `<?php
	$name = "World";
	echo "Hello, " . $name;
	$age = 30;
	echo "Age: " . $age;
	?>`

	l := NewLexer(input)
	p := NewParser(l)
	program := p.ParseProgram()

	if len(p.errors) != 0 {
	for _, err := range p.errors {
	fmt.Println("parser error:", err)
	}
	return
	}

	fmt.Println("AST:")
	for _, stmt := range program.Statements {
	fmt.Printf("%T\n", stmt)
	}
	}