From 94a9cd8cc7091b2a87712d10cf15d050640e2b43 Mon Sep 17 00:00:00 2001 From: Nareshkumar Rao Date: Thu, 29 May 2025 01:03:39 +0200 Subject: [PATCH] parsed ast of basic application --- src/ast/expression.rs | 23 ++++++++++++ src/{ast.rs => ast/mod.rs} | 14 +++----- src/lexer.rs | 4 +-- src/main.rs | 5 ++- src/parse/expression.rs | 74 ++++++++++++++++++++++++++++++++++++++ src/parse/macros.rs | 56 ++++++++++++++++++++++------- src/parse/mod.rs | 49 ++++++++++++++++--------- 7 files changed, 180 insertions(+), 45 deletions(-) create mode 100644 src/ast/expression.rs rename src/{ast.rs => ast/mod.rs} (69%) create mode 100644 src/parse/expression.rs diff --git a/src/ast/expression.rs b/src/ast/expression.rs new file mode 100644 index 0000000..2c07837 --- /dev/null +++ b/src/ast/expression.rs @@ -0,0 +1,23 @@ +use crate::token::{LiteralKind, Number, Token}; + +#[derive(Debug)] +pub enum Expression { + Literal { + literal: LiteralKind, + token: Token, + }, + Unary, + Binary, + Call { + callee: Box, + arguments: Vec, + }, + Grouping, + Variable { + name: String, + token: Token, + }, +} + +pub enum BinaryOperator {} +pub enum UnaryOperator {} diff --git a/src/ast.rs b/src/ast/mod.rs similarity index 69% rename from src/ast.rs rename to src/ast/mod.rs index 8fe9215..6d9795e 100644 --- a/src/ast.rs +++ b/src/ast/mod.rs @@ -1,5 +1,9 @@ +mod expression; +pub use expression::*; + use crate::token::Token; +#[derive(Debug)] pub struct Module { pub statements: Vec, } @@ -19,13 +23,3 @@ pub struct ParameterDeclaration { pub name: Token, pub typename: Token, } - -#[derive(Debug)] -pub enum Expression { - Identifier(Token), - FunctionCall { - function: Box, - arguments: Vec, - return_type: Option, - }, -} diff --git a/src/lexer.rs b/src/lexer.rs index 4e70ba0..c080dda 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -31,6 +31,7 @@ impl Lexer { self.next_token(); } self.clean_newlines(); + self.push(TokenKind::EndOfFile); self.tokens } @@ -118,9 +119,6 @@ impl Lexer { break; } } - if self.is_eof() { - todo!("Not sure if handling is necessary") - } let s = self.current_scan(0, 0); if let Ok(k) = TryInto::::try_into(s.as_str()) { self.push(TokenKind::Keyword(k)); diff --git a/src/main.rs b/src/main.rs index 77a7cfb..c9c68e4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,7 +7,6 @@ mod parse; mod token; fn main() { - use format::Formatter; use lexer::Lexer; const BASIC: &str = r#" function hello(name: string){ @@ -22,6 +21,6 @@ hello(); let lexer = Lexer::new(BASIC, Some("basic.file".to_string())); let tokens = lexer.lex(); let mut parser = Parser::new(tokens); - let statement = parser.statement(); - println!("{statement:?}"); + let module = parser.module(); + println!("{module:?}"); } diff --git a/src/parse/expression.rs b/src/parse/expression.rs new file mode 100644 index 0000000..829520d --- /dev/null +++ b/src/parse/expression.rs @@ -0,0 +1,74 @@ +use crate::{ast, expect_token, get_token, match_token, token::TokenKind}; + +use super::Parser; + +impl Parser { + pub fn expression(&mut self) -> ast::Expression { + self.call() + } + fn equality(&mut self) -> ast::Expression { + todo!() + } + fn comparison(&mut self) -> ast::Expression { + todo!() + } + fn term(&mut self) -> ast::Expression { + todo!() + } + fn factor(&mut self) -> ast::Expression { + todo!() + } + fn unary(&mut self) -> ast::Expression { + todo!() + } + fn call(&mut self) -> ast::Expression { + let mut e = self.primary(); + + if match_token!(self, TokenKind::LeftParen) { + let mut arguments = Vec::new(); + while !match_token!(self, TokenKind::RightParen) { + arguments.push(self.expression()); + if !match_token!(self, TokenKind::Comma) { + expect_token!(self, TokenKind::RightParen, "RightParen"); + break; + } + } + e = ast::Expression::Call { + callee: Box::new(e), + arguments, + } + } + + e + } + fn primary(&mut self) -> ast::Expression { + if let Some((token, TokenKind::Literal(literal))) = get_token!(self, TokenKind::Literal(_)) + { + return ast::Expression::Literal { literal, token }; + } + + if let Some((token, TokenKind::Identifier(name))) = + get_token!(self, TokenKind::Identifier(_)) + { + return ast::Expression::Variable { name, token }; + } + + todo!("Unknown expression {:?}", self.peek()) + } +} + +#[cfg(test)] +mod tests { + use crate::{ast::Expression, lexer::Lexer, parse::Parser, token::TokenKind}; + + #[test] + fn test_identifier() { + let tokens = Lexer::new("my_var", None).lex(); + let mut parser = Parser::new(tokens); + let expr = parser.expression(); + let Expression::Variable { name, token } = expr else { + panic!() + }; + assert_eq!("my_var", name) + } +} diff --git a/src/parse/macros.rs b/src/parse/macros.rs index 36c3e2a..6f572c4 100644 --- a/src/parse/macros.rs +++ b/src/parse/macros.rs @@ -1,9 +1,10 @@ #[macro_export] macro_rules! expect_token { - ($self:ident, $expect:pat) => { + ($self:ident, $expect:pat, $help:literal) => { let t = $self.consume(); - if !matches!(t.kind, $expect) { - todo!("Expected token, found {:?}", t.kind) + let kind = t.as_ref().map(|t| &t.kind); + if !matches!(kind, Some($expect)) { + todo!("Expected token ({}), found {:?}.", $help, t) } }; } @@ -12,10 +13,11 @@ macro_rules! expect_token { macro_rules! expect_identifier { ($self:ident) => {{ let t = $self.consume(); - if !matches!(t.kind, TokenKind::Identifier(_)) { - todo!("Expected token, found {:?}", t.kind) + let kind = t.as_ref().map(|t| &t.kind); + if !matches!(kind, Some(TokenKind::Identifier(_))) { + todo!("Expected identifier, found {:?}", t); } - t + t.unwrap() }}; } @@ -23,10 +25,11 @@ macro_rules! expect_identifier { macro_rules! expect_any_keyword { ($self:ident) => {{ let t = $self.consume(); - if !matches!(t.kind, TokenKind::Keyword(_)) { - todo!("Expected token, found {:?}", t.kind) + let kind = t.as_ref().map(|t| &t.kind); + if !matches!(kind, Some(TokenKind::Keyword(_))) { + todo!("Expected keyword, found {:?}", t) } - t + t.unwrap() }}; } @@ -34,8 +37,9 @@ macro_rules! expect_any_keyword { macro_rules! expect_keyword { ($self:ident, $keyword:pat) => { let t = $self.consume(); - if !matches!(t.kind, TokenKind::Keyword($keyword)) { - todo!("Expected token, found {:?}", t.kind) + let kind = t.as_ref().map(|t| &t.kind); + if !matches!(kind, Some(TokenKind::Keyword($keyword))) { + todo!("Expected keyword, found {:?}", t) } }; } @@ -43,13 +47,39 @@ macro_rules! expect_keyword { #[macro_export] macro_rules! peek_keyword { ($self:ident, $keyword:pat) => { - matches!($self.peek().kind, TokenKind::Keyword($keyword)) + matches!( + $self.peek().map(|t| &t.kind), + Some(TokenKind::Keyword($keyword)) + ) }; } #[macro_export] macro_rules! peek_match { ($self:ident, $p:pat) => { - matches!($self.peek().kind, $p) + matches!($self.peek().map(|t| &t.kind), Some($p)) }; } + +#[macro_export] +macro_rules! match_token { + ($self:ident, $p:pat) => {{ + let b = matches!($self.peek().map(|t| &t.kind), Some($p)); + if b { + $self.consume(); + } + b + }}; +} + +#[macro_export] +macro_rules! get_token { + ($self:ident, $p:pat) => {{ + let b = matches!($self.peek().map(|t| &t.kind), Some($p)); + if b { + $self.consume().map(|t| (t.clone(), t.kind)) + } else { + None + } + }}; +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 8c70495..3d2d1de 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,8 +1,9 @@ +mod expression; mod macros; use crate::{ - ast, expect_any_keyword, expect_identifier, expect_keyword, expect_token, peek_keyword, - peek_match, + ast, expect_any_keyword, expect_identifier, expect_keyword, expect_token, match_token, + peek_keyword, peek_match, token::{KeywordKind, Token, TokenKind}, }; @@ -13,26 +14,40 @@ pub struct Parser { impl Parser { pub fn new(tokens: Vec) -> Parser { + let tokens = tokens + .into_iter() + .filter(|t| !matches!(t.kind, TokenKind::NewLine)) + .collect::>(); + Self { tokens, current: 0 } } - pub fn statement(&mut self) -> ast::Statement { + pub fn module(&mut self) -> ast::Module { + let mut statements = Vec::new(); + while !match_token!(self, TokenKind::EndOfFile) { + let s = self.statement(); + println!("Parsed Statement {s:?}"); + statements.push(s); + } + ast::Module { statements } + } + + fn statement(&mut self) -> ast::Statement { if peek_keyword!(self, KeywordKind::function) { return self.function_declaration(); } - return self.expression_statement(); - todo!("No statement"); + self.expression_statement() } fn function_declaration(&mut self) -> ast::Statement { expect_keyword!(self, KeywordKind::function); let id = expect_identifier!(self); - expect_token!(self, TokenKind::LeftParen); + expect_token!(self, TokenKind::LeftParen, "LeftParen"); let mut parameters = Vec::new(); while peek_match!(self, TokenKind::Identifier(_)) { let name = expect_identifier!(self); - expect_token!(self, TokenKind::Colon); + expect_token!(self, TokenKind::Colon, "Colon"); let typename = expect_any_keyword!(self); let parameter = ast::ParameterDeclaration { name: name.clone(), @@ -41,9 +56,9 @@ impl Parser { parameters.push(parameter); } - expect_token!(self, TokenKind::RightParen); + expect_token!(self, TokenKind::RightParen, "RightParen"); - expect_token!(self, TokenKind::LeftCurly); + expect_token!(self, TokenKind::LeftCurly, "LeftCurly"); let mut statements = Vec::new(); while !peek_match!(self, TokenKind::RightCurly) { @@ -51,7 +66,7 @@ impl Parser { statements.push(statement); } - expect_token!(self, TokenKind::RightCurly); + expect_token!(self, TokenKind::RightCurly, "RightCurly"); ast::Statement::FunctionDeclaration { name: id.clone(), @@ -61,17 +76,19 @@ impl Parser { } fn expression_statement(&mut self) -> ast::Statement { - todo!() + let e = self.expression(); + expect_token!(self, TokenKind::Semicolon, "Semicolon"); + ast::Statement::Expression(e) } } impl Parser { - fn peek(&self) -> &Token { - &self.tokens[self.current] + fn peek(&self) -> Option<&Token> { + self.tokens.get(self.current) } - fn consume(&mut self) -> Token { - let token = &self.tokens[self.current]; + fn consume(&mut self) -> Option { + let token = self.peek().cloned(); self.current += 1; - token.clone() + token } }