Files
Cagire/crates/forth/src/compiler.rs

312 lines
9.4 KiB
Rust

use std::sync::Arc;
use super::ops::Op;
use super::types::{Dictionary, SourceSpan};
use super::words::compile_word;
#[derive(Clone, Debug)]
enum Token {
Int(i64, SourceSpan),
Float(f64, SourceSpan),
Str(String, SourceSpan),
Word(String, SourceSpan),
}
pub(super) fn compile_script(input: &str, dict: &Dictionary) -> Result<Vec<Op>, String> {
let tokens = tokenize(input);
compile(&tokens, dict)
}
fn tokenize(input: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let mut chars = input.char_indices().peekable();
while let Some(&(pos, c)) = chars.peek() {
if c.is_whitespace() {
chars.next();
continue;
}
if c == '(' || c == ')' {
chars.next();
continue;
}
if c == '"' {
let start = pos;
chars.next();
let mut s = String::new();
let mut end = start + 1;
while let Some(&(i, ch)) = chars.peek() {
end = i + ch.len_utf8();
chars.next();
if ch == '"' {
break;
}
s.push(ch);
}
tokens.push(Token::Str(s, SourceSpan { start, end }));
continue;
}
if c == ';' {
chars.next(); // consume first ;
if let Some(&(_, ';')) = chars.peek() {
// ;; starts a comment to end of line
chars.next(); // consume second ;
while let Some(&(_, ch)) = chars.peek() {
if ch == '\n' {
break;
}
chars.next();
}
continue;
}
// single ; is a word, create token
tokens.push(Token::Word(
";".to_string(),
SourceSpan {
start: pos,
end: pos + 1,
},
));
continue;
}
let start = pos;
let mut word = String::new();
let mut end = start;
while let Some(&(i, ch)) = chars.peek() {
if ch.is_whitespace() {
break;
}
end = i + ch.len_utf8();
word.push(ch);
chars.next();
}
let span = SourceSpan { start, end };
// Normalize shorthand float syntax: .25 -> 0.25, -.5 -> -0.5
let word_to_parse = if word.starts_with('.')
&& word.len() > 1
&& word.as_bytes()[1].is_ascii_digit()
{
format!("0{word}")
} else if word.starts_with("-.")
&& word.len() > 2
&& word.as_bytes()[2].is_ascii_digit()
{
format!("-0{}", &word[1..])
} else {
word.clone()
};
if let Ok(i) = word_to_parse.parse::<i64>() {
tokens.push(Token::Int(i, span));
} else if let Ok(f) = word_to_parse.parse::<f64>() {
tokens.push(Token::Float(f, span));
} else {
tokens.push(Token::Word(word, span));
}
}
tokens
}
fn compile(tokens: &[Token], dict: &Dictionary) -> Result<Vec<Op>, String> {
let mut ops = Vec::new();
let mut i = 0;
while i < tokens.len() {
match &tokens[i] {
Token::Int(n, span) => {
let key = n.to_string();
if let Some(body) = dict.lock().get(&key).cloned() {
ops.extend(body);
} else {
ops.push(Op::PushInt(*n, Some(*span)));
}
}
Token::Float(f, span) => {
let key = f.to_string();
if let Some(body) = dict.lock().get(&key).cloned() {
ops.extend(body);
} else {
ops.push(Op::PushFloat(*f, Some(*span)));
}
}
Token::Str(s, span) => ops.push(Op::PushStr(Arc::from(s.as_str()), Some(*span))),
Token::Word(w, span) => {
let word = w.as_str();
if word == "{" {
let (quote_ops, consumed, end_span) =
compile_quotation(&tokens[i + 1..], dict)?;
i += consumed;
let body_span = SourceSpan {
start: span.start,
end: end_span.end,
};
ops.push(Op::Quotation(Arc::from(quote_ops), Some(body_span)));
} else if word == "}" {
return Err("unexpected }".into());
} else if word == ":" {
let (consumed, name, body) = compile_colon_def(&tokens[i + 1..], dict)?;
i += consumed;
dict.lock().insert(name, body);
} else if word == ";" {
return Err("unexpected ;".into());
} else if word == "if" {
let (then_ops, else_ops, consumed, then_span, else_span) =
compile_if(&tokens[i + 1..], dict)?;
i += consumed;
if else_ops.is_empty() {
ops.push(Op::BranchIfZero(then_ops.len(), then_span, None));
ops.extend(then_ops);
} else {
ops.push(Op::BranchIfZero(then_ops.len() + 1, then_span, else_span));
ops.extend(then_ops);
ops.push(Op::Branch(else_ops.len()));
ops.extend(else_ops);
}
} else if !compile_word(word, Some(*span), &mut ops, dict) {
return Err(format!("unknown word: {word}"));
}
}
}
i += 1;
}
Ok(ops)
}
fn compile_quotation(
tokens: &[Token],
dict: &Dictionary,
) -> Result<(Vec<Op>, usize, SourceSpan), String> {
let mut depth = 1;
let mut end_idx = None;
for (i, tok) in tokens.iter().enumerate() {
if let Token::Word(w, _) = tok {
match w.as_str() {
"{" => depth += 1,
"}" => {
depth -= 1;
if depth == 0 {
end_idx = Some(i);
break;
}
}
_ => {}
}
}
}
let end_idx = end_idx.ok_or("missing }")?;
let end_span = match &tokens[end_idx] {
Token::Word(_, span) => *span,
_ => unreachable!(),
};
let quote_ops = compile(&tokens[..end_idx], dict)?;
Ok((quote_ops, end_idx + 1, end_span))
}
fn token_span(tok: &Token) -> Option<SourceSpan> {
match tok {
Token::Int(_, s) | Token::Float(_, s) | Token::Str(_, s) | Token::Word(_, s) => Some(*s),
}
}
fn compile_colon_def(
tokens: &[Token],
dict: &Dictionary,
) -> Result<(usize, String, Vec<Op>), String> {
if tokens.is_empty() {
return Err("expected word name after ':'".into());
}
let name = match &tokens[0] {
Token::Word(w, _) => w.clone(),
Token::Int(n, _) => n.to_string(),
Token::Float(f, _) => f.to_string(),
Token::Str(s, _) => s.clone(),
};
let mut semi_pos = None;
for (i, tok) in tokens[1..].iter().enumerate() {
if let Token::Word(w, _) = tok {
if w == ";" {
semi_pos = Some(i + 1);
break;
}
}
}
let semi_pos = semi_pos.ok_or("missing ';' in word definition")?;
let body_tokens = &tokens[1..semi_pos];
let body_ops = compile(body_tokens, dict)?;
Ok((semi_pos + 1, name, body_ops))
}
fn tokens_span(tokens: &[Token]) -> Option<SourceSpan> {
let first = tokens.first().and_then(token_span)?;
let last = tokens.last().and_then(token_span)?;
Some(SourceSpan {
start: first.start,
end: last.end,
})
}
#[allow(clippy::type_complexity)]
fn compile_if(
tokens: &[Token],
dict: &Dictionary,
) -> Result<
(
Vec<Op>,
Vec<Op>,
usize,
Option<SourceSpan>,
Option<SourceSpan>,
),
String,
> {
let mut depth = 1;
let mut else_pos = None;
let mut then_pos = None;
for (i, tok) in tokens.iter().enumerate() {
if let Token::Word(w, _) = tok {
match w.as_str() {
"if" => depth += 1,
"else" if depth == 1 => else_pos = Some(i),
"then" => {
depth -= 1;
if depth == 0 {
then_pos = Some(i);
break;
}
}
_ => {}
}
}
}
let then_pos = then_pos.ok_or("missing 'then'")?;
let (then_ops, else_ops, then_span, else_span) = if let Some(ep) = else_pos {
let then_slice = &tokens[..ep];
let else_slice = &tokens[ep + 1..then_pos];
let then_span = tokens_span(then_slice);
let else_span = tokens_span(else_slice);
let then_ops = compile(then_slice, dict)?;
let else_ops = compile(else_slice, dict)?;
(then_ops, else_ops, then_span, else_span)
} else {
let then_slice = &tokens[..then_pos];
let then_span = tokens_span(then_slice);
let then_ops = compile(then_slice, dict)?;
(then_ops, Vec::new(), then_span, None)
};
Ok((then_ops, else_ops, then_pos + 1, then_span, else_span))
}