This commit is contained in:
2025-05-27 23:46:52 +02:00
commit d8422261f6
9 changed files with 420 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

16
Cargo.lock generated Normal file
View File

@ -0,0 +1,16 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "anyhow"
version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "ts-parser"
version = "0.1.0"
dependencies = [
"anyhow",
]

7
Cargo.toml Normal file
View File

@ -0,0 +1,7 @@
[package]
name = "ts-parser"
version = "0.1.0"
edition = "2024"
[dependencies]
anyhow = "1.0.98"

23
src/ast.rs Normal file
View File

@ -0,0 +1,23 @@
use crate::token::Token;
pub struct Module {
pub statements: Vec<Statement>,
}
pub enum Statement {
FunctionDeclaration {
name: Token,
parameters: Vec<ParameterDeclaration>,
},
Expression,
}
pub struct ParameterDeclaration {
name: Token,
typename: Token,
}
pub enum Expression {
Identifier(Token),
FunctionCall {},
}

51
src/format.rs Normal file
View File

@ -0,0 +1,51 @@
use crate::token::{CommentKind, KeywordKind, LiteralKind, Number, Token, TokenKind};
pub trait Formatter {
fn format(self, options: FormatterOptions) -> anyhow::Result<String>;
}
pub struct FormatterOptions {}
impl Formatter for &[Token] {
fn format(self, options: FormatterOptions) -> anyhow::Result<String> {
let mut result = String::new();
for t in self {
let kind = &t.kind;
let s = match kind {
TokenKind::Identifier(i) => i.clone(),
TokenKind::Literal(kind) => match kind {
LiteralKind::String(s) => {
format!("\"{s}\"")
}
LiteralKind::Number(number) => match number {
Number::Integer(i) => format!("{i}"),
Number::Float(f) => format!("{f}"),
},
},
TokenKind::Keyword(kind) => format!(
"{}",
match kind {
KeywordKind::function => "function ",
KeywordKind::string => "string",
KeywordKind::number => "number",
}
),
TokenKind::Comment(kind, s) => match kind {
CommentKind::Line => format!("// {s}"),
CommentKind::Block => format!("/* {s} */"),
},
TokenKind::LeftParen => "(".to_string(),
TokenKind::RightParen => ")".to_string(),
TokenKind::LeftCurly => " {".to_string(),
TokenKind::RightCurly => "}".to_string(),
TokenKind::Comma => ", ".to_string(),
TokenKind::Colon => ": ".to_string(),
TokenKind::Semicolon => ";".to_string(),
TokenKind::Period => ".".to_string(),
TokenKind::NewLine => "\n".to_string(),
TokenKind::EndOfFile => "".to_string(),
};
result += &s;
}
Ok(result)
}
}

198
src/lexer.rs Normal file
View File

@ -0,0 +1,198 @@
use anyhow::bail;
use crate::token::{CommentKind, KeywordKind, LiteralKind, Token, TokenKind, TokenLocation};
pub struct Lexer {
file: Option<String>,
source: Vec<char>,
tokens: Vec<Token>,
line: usize,
current_line_offset: usize,
start: usize,
current: usize,
}
impl Lexer {
pub fn new(source: &str, file: Option<String>) -> Lexer {
Lexer {
source: source.chars().collect::<Vec<_>>(),
tokens: Vec::new(),
line: 1,
start: 0,
current: 0,
file,
current_line_offset: 0,
}
}
pub fn lex(mut self) -> Vec<Token> {
while self.current < self.source.len() {
self.start = self.current;
self.next_token();
}
self.clean_newlines();
self.tokens
}
fn next_token(&mut self) {
let c = self.consume();
let t = match c {
'(' => Some(TokenKind::LeftParen),
')' => Some(TokenKind::RightParen),
'{' => Some(TokenKind::LeftCurly),
'}' => Some(TokenKind::RightCurly),
',' => Some(TokenKind::Comma),
':' => Some(TokenKind::Colon),
';' => Some(TokenKind::Semicolon),
'.' => Some(TokenKind::Period),
'\n' => {
self.line += 1;
self.current_line_offset = self.current;
Some(TokenKind::NewLine)
}
_ => None,
};
if let Some(t) = t {
self.push(t);
return;
}
if c == '/' {
let p = self.peek();
let t = match p {
'/' => {
while !self.is_eof() {
let c = self.consume();
if c == '\n' {
self.line += 1;
self.current_line_offset = self.current;
break;
}
}
let s = self.current_scan(2, 0);
TokenKind::Comment(CommentKind::Line, s)
}
'*' => {
while !self.is_eof() {
if self.peek_match("*/") {
break;
}
}
if self.is_eof() {
todo!("Expected */ before EOF");
}
self.current += 2;
let s = self.current_scan(2, 2);
TokenKind::Comment(CommentKind::Block, s)
}
_ => todo!("forward slash"),
};
self.push(t);
}
if c == '"' {
while !self.is_eof() {
let c = self.consume();
match c {
'"' => break,
'\n' => todo!("Expected closing string before new line"),
_ => (),
}
}
if self.is_eof() {
todo!("Expected closing string before EOL")
}
let s = self.current_scan(1, 1);
self.push(TokenKind::Literal(LiteralKind::String(s)));
return;
}
if c.is_ascii_alphabetic() {
while !self.is_eof() {
let p = self.peek();
if p.is_alphanumeric() || p == '_' {
self.consume();
} else {
break;
}
}
if self.is_eof() {
todo!("Not sure if handling is necessary")
}
let s = self.current_scan(0, 0);
if let Ok(k) = TryInto::<KeywordKind>::try_into(s.as_str()) {
self.push(TokenKind::Keyword(k));
return;
}
self.push(TokenKind::Identifier(s));
return;
}
}
fn clean_newlines(&mut self) {
while let Some(TokenKind::NewLine) = self.tokens.first().map(|t| &t.kind) {
self.tokens.remove(0);
}
let mut i = 0;
loop {
let w = self
.tokens
.get(i..(i + 3))
.map(|ts| ts.iter().map(|t| &t.kind).collect::<Vec<_>>());
match w.as_deref() {
Some([TokenKind::NewLine, TokenKind::NewLine, TokenKind::NewLine]) => {
self.tokens.remove(i + 2);
}
Some(_) => {
i += 1;
}
None => break,
}
}
}
fn current_scan(&self, start_offset: usize, end_offset: usize) -> String {
self.source[(self.start + start_offset)..(self.current - end_offset)]
.iter()
.collect::<String>()
}
fn push(&mut self, kind: TokenKind) {
self.tokens.push(Token {
kind,
location: TokenLocation {
file: self.file.clone(),
line: self.line,
column: self.start.saturating_sub(self.current_line_offset),
},
});
}
fn peek(&self) -> char {
self.source[self.current]
}
fn peek_n(&self, n: usize) -> Option<char> {
self.source.get(self.current + n).copied()
}
fn peek_match(&self, m: &str) -> bool {
let c = self.current;
let s = self
.source
.get(c..(c + m.len()))
.map(|s| s.iter().collect::<String>());
if let Some(s) = s { s == m } else { false }
}
fn consume(&mut self) -> char {
let c = self.source[self.current];
self.current += 1;
c
}
fn is_eof(&self) -> bool {
self.current == self.source.len()
}
}

40
src/main.rs Normal file
View File

@ -0,0 +1,40 @@
use token::Token;
mod ast;
mod format;
mod lexer;
mod parse;
mod token;
fn main() {
println!("Hello, world!");
}
#[cfg(test)]
mod tests {
use crate::{
format::Formatter,
lexer::{self, Lexer},
};
const BASIC: &str = r#"
function hello(name: string){
console.log("Hey, ", name);
}
console.log("Starting!");
hello();
"#;
#[test]
fn lex() {
println!("Running lex");
let lexer = Lexer::new(BASIC, Some("basic.file".to_string()));
let tokens = lexer.lex();
println!(
"{}",
tokens.format(crate::format::FormatterOptions {}).unwrap()
);
}
}

13
src/parse.rs Normal file
View File

@ -0,0 +1,13 @@
use crate::token::Token;
pub struct Parser {
tokens: Vec<Token>,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Parser {
Self { tokens }
}
fn parse(&mut self) {}
}

71
src/token.rs Normal file
View File

@ -0,0 +1,71 @@
use anyhow::anyhow;
#[derive(Debug)]
pub struct Token {
pub kind: TokenKind,
pub location: TokenLocation,
}
#[derive(Debug)]
pub enum TokenKind {
Identifier(String),
Literal(LiteralKind),
Keyword(KeywordKind),
Comment(CommentKind, String),
LeftParen,
RightParen,
LeftCurly,
RightCurly,
Comma,
Colon,
Semicolon,
Period,
NewLine,
EndOfFile,
}
#[derive(Debug)]
pub enum CommentKind {
Line,
Block,
}
#[allow(non_camel_case_types)]
#[derive(Debug)]
pub enum KeywordKind {
function,
string,
number,
}
impl TryFrom<&str> for KeywordKind {
type Error = anyhow::Error;
fn try_from(value: &str) -> Result<Self, Self::Error> {
match value {
"function" => Ok(Self::function),
"string" => Ok(Self::string),
"number" => Ok(Self::number),
_ => Err(anyhow!("unknown keyword")),
}
}
}
#[derive(Debug)]
pub enum LiteralKind {
String(String),
Number(Number),
}
#[derive(Debug)]
pub enum Number {
Integer(usize),
Float(f64),
}
#[derive(Debug)]
pub struct TokenLocation {
pub file: Option<String>,
pub line: usize,
pub column: usize,
}