Reorganize repository
This commit is contained in:
282
crates/forth/src/compiler.rs
Normal file
282
crates/forth/src/compiler.rs
Normal file
@@ -0,0 +1,282 @@
|
||||
use super::ops::Op;
|
||||
use super::types::{Dictionary, SourceSpan};
|
||||
use super::words::{compile_word, simple_op};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum Token {
|
||||
Int(i64, SourceSpan),
|
||||
Float(f64, SourceSpan),
|
||||
Str(String, SourceSpan),
|
||||
Word(String, SourceSpan),
|
||||
QuoteStart(usize),
|
||||
QuoteEnd(usize),
|
||||
}
|
||||
|
||||
pub(super) fn compile_script(input: &str, dict: &Dictionary) -> Result<Vec<Op>, String> {
|
||||
let tokens = tokenize(input);
|
||||
compile(&tokens, dict)
|
||||
}
|
||||
|
||||
fn tokenize(input: &str) -> Vec<Token> {
|
||||
let mut tokens = Vec::new();
|
||||
let mut chars = input.char_indices().peekable();
|
||||
|
||||
while let Some(&(pos, c)) = chars.peek() {
|
||||
if c.is_whitespace() {
|
||||
chars.next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if c == '"' {
|
||||
let start = pos;
|
||||
chars.next();
|
||||
let mut s = String::new();
|
||||
let mut end = start + 1;
|
||||
while let Some(&(i, ch)) = chars.peek() {
|
||||
end = i + ch.len_utf8();
|
||||
chars.next();
|
||||
if ch == '"' {
|
||||
break;
|
||||
}
|
||||
s.push(ch);
|
||||
}
|
||||
tokens.push(Token::Str(s, SourceSpan { start, end }));
|
||||
continue;
|
||||
}
|
||||
|
||||
if c == '(' {
|
||||
while let Some(&(_, ch)) = chars.peek() {
|
||||
chars.next();
|
||||
if ch == ')' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if c == '{' {
|
||||
chars.next();
|
||||
tokens.push(Token::QuoteStart(pos));
|
||||
continue;
|
||||
}
|
||||
|
||||
if c == '}' {
|
||||
chars.next();
|
||||
tokens.push(Token::QuoteEnd(pos));
|
||||
continue;
|
||||
}
|
||||
|
||||
let start = pos;
|
||||
let mut word = String::new();
|
||||
let mut end = start;
|
||||
while let Some(&(i, ch)) = chars.peek() {
|
||||
if ch.is_whitespace() || ch == '{' || ch == '}' {
|
||||
break;
|
||||
}
|
||||
end = i + ch.len_utf8();
|
||||
word.push(ch);
|
||||
chars.next();
|
||||
}
|
||||
|
||||
let span = SourceSpan { start, end };
|
||||
if let Ok(i) = word.parse::<i64>() {
|
||||
tokens.push(Token::Int(i, span));
|
||||
} else if let Ok(f) = word.parse::<f64>() {
|
||||
tokens.push(Token::Float(f, span));
|
||||
} else {
|
||||
tokens.push(Token::Word(word, span));
|
||||
}
|
||||
}
|
||||
|
||||
tokens
|
||||
}
|
||||
|
||||
fn compile(tokens: &[Token], dict: &Dictionary) -> Result<Vec<Op>, String> {
|
||||
let mut ops = Vec::new();
|
||||
let mut i = 0;
|
||||
let mut pipe_parity = false;
|
||||
let mut list_depth: usize = 0;
|
||||
|
||||
while i < tokens.len() {
|
||||
match &tokens[i] {
|
||||
Token::Int(n, span) => ops.push(Op::PushInt(*n, Some(*span))),
|
||||
Token::Float(f, span) => ops.push(Op::PushFloat(*f, Some(*span))),
|
||||
Token::Str(s, span) => ops.push(Op::PushStr(s.clone(), Some(*span))),
|
||||
Token::QuoteStart(start_pos) => {
|
||||
let (quote_ops, consumed, end_pos) = compile_quotation(&tokens[i + 1..], dict)?;
|
||||
i += consumed;
|
||||
let body_span = SourceSpan { start: *start_pos, end: end_pos + 1 };
|
||||
ops.push(Op::Quotation(quote_ops, Some(body_span)));
|
||||
}
|
||||
Token::QuoteEnd(_) => {
|
||||
return Err("unexpected }".into());
|
||||
}
|
||||
Token::Word(w, span) => {
|
||||
let word = w.as_str();
|
||||
if word == ":" {
|
||||
let (consumed, name, body) = compile_colon_def(&tokens[i + 1..], dict)?;
|
||||
i += consumed;
|
||||
dict.lock().unwrap().insert(name, body);
|
||||
} else if word == ";" {
|
||||
return Err("unexpected ;".into());
|
||||
} else if word == "|" {
|
||||
if pipe_parity {
|
||||
ops.push(Op::LocalCycleEnd);
|
||||
list_depth = list_depth.saturating_sub(1);
|
||||
} else {
|
||||
ops.push(Op::ListStart);
|
||||
list_depth += 1;
|
||||
}
|
||||
pipe_parity = !pipe_parity;
|
||||
} else if word == "if" {
|
||||
let (then_ops, else_ops, consumed, then_span, else_span) = compile_if(&tokens[i + 1..], dict)?;
|
||||
i += consumed;
|
||||
if else_ops.is_empty() {
|
||||
ops.push(Op::BranchIfZero(then_ops.len(), then_span, None));
|
||||
ops.extend(then_ops);
|
||||
} else {
|
||||
ops.push(Op::BranchIfZero(then_ops.len() + 1, then_span, else_span));
|
||||
ops.extend(then_ops);
|
||||
ops.push(Op::Branch(else_ops.len()));
|
||||
ops.extend(else_ops);
|
||||
}
|
||||
} else if is_list_start(word) {
|
||||
ops.push(Op::ListStart);
|
||||
list_depth += 1;
|
||||
} else if is_list_end(word) {
|
||||
list_depth = list_depth.saturating_sub(1);
|
||||
if let Some(op) = simple_op(word) {
|
||||
ops.push(op);
|
||||
}
|
||||
} else if list_depth > 0 {
|
||||
let mut word_ops = Vec::new();
|
||||
if !compile_word(word, Some(*span), &mut word_ops, dict) {
|
||||
return Err(format!("unknown word: {word}"));
|
||||
}
|
||||
ops.push(Op::Quotation(word_ops, Some(*span)));
|
||||
} else if !compile_word(word, Some(*span), &mut ops, dict) {
|
||||
return Err(format!("unknown word: {word}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
Ok(ops)
|
||||
}
|
||||
|
||||
fn is_list_start(word: &str) -> bool {
|
||||
matches!(word, "[" | "<" | "<<")
|
||||
}
|
||||
|
||||
fn is_list_end(word: &str) -> bool {
|
||||
matches!(word, "]" | ">" | ">>")
|
||||
}
|
||||
|
||||
fn compile_quotation(tokens: &[Token], dict: &Dictionary) -> Result<(Vec<Op>, usize, usize), String> {
|
||||
let mut depth = 1;
|
||||
let mut end_idx = None;
|
||||
|
||||
for (i, tok) in tokens.iter().enumerate() {
|
||||
match tok {
|
||||
Token::QuoteStart(_) => depth += 1,
|
||||
Token::QuoteEnd(_) => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
end_idx = Some(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let end_idx = end_idx.ok_or("missing }")?;
|
||||
let byte_pos = match &tokens[end_idx] {
|
||||
Token::QuoteEnd(pos) => *pos,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let quote_ops = compile(&tokens[..end_idx], dict)?;
|
||||
Ok((quote_ops, end_idx + 1, byte_pos))
|
||||
}
|
||||
|
||||
fn token_span(tok: &Token) -> Option<SourceSpan> {
|
||||
match tok {
|
||||
Token::Int(_, s) | Token::Float(_, s) | Token::Str(_, s) | Token::Word(_, s) => Some(*s),
|
||||
Token::QuoteStart(p) => Some(SourceSpan { start: *p, end: *p + 1 }),
|
||||
Token::QuoteEnd(p) => Some(SourceSpan { start: *p, end: *p + 1 }),
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_colon_def(tokens: &[Token], dict: &Dictionary) -> Result<(usize, String, Vec<Op>), String> {
|
||||
if tokens.is_empty() {
|
||||
return Err("expected word name after ':'".into());
|
||||
}
|
||||
let name = match &tokens[0] {
|
||||
Token::Word(w, _) => w.clone(),
|
||||
_ => return Err("expected word name after ':'".into()),
|
||||
};
|
||||
let mut semi_pos = None;
|
||||
for (i, tok) in tokens[1..].iter().enumerate() {
|
||||
if let Token::Word(w, _) = tok {
|
||||
if w == ";" {
|
||||
semi_pos = Some(i + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let semi_pos = semi_pos.ok_or("missing ';' in word definition")?;
|
||||
let body_tokens = &tokens[1..semi_pos];
|
||||
let body_ops = compile(body_tokens, dict)?;
|
||||
// consumed = name + body + semicolon
|
||||
Ok((semi_pos + 1, name, body_ops))
|
||||
}
|
||||
|
||||
fn tokens_span(tokens: &[Token]) -> Option<SourceSpan> {
|
||||
let first = tokens.first().and_then(token_span)?;
|
||||
let last = tokens.last().and_then(token_span)?;
|
||||
Some(SourceSpan { start: first.start, end: last.end })
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn compile_if(tokens: &[Token], dict: &Dictionary) -> Result<(Vec<Op>, Vec<Op>, usize, Option<SourceSpan>, Option<SourceSpan>), String> {
|
||||
let mut depth = 1;
|
||||
let mut else_pos = None;
|
||||
let mut then_pos = None;
|
||||
|
||||
for (i, tok) in tokens.iter().enumerate() {
|
||||
if let Token::Word(w, _) = tok {
|
||||
match w.as_str() {
|
||||
"if" => depth += 1,
|
||||
"else" if depth == 1 => else_pos = Some(i),
|
||||
"then" => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
then_pos = Some(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let then_pos = then_pos.ok_or("missing 'then'")?;
|
||||
|
||||
let (then_ops, else_ops, then_span, else_span) = if let Some(ep) = else_pos {
|
||||
let then_slice = &tokens[..ep];
|
||||
let else_slice = &tokens[ep + 1..then_pos];
|
||||
let then_span = tokens_span(then_slice);
|
||||
let else_span = tokens_span(else_slice);
|
||||
let then_ops = compile(then_slice, dict)?;
|
||||
let else_ops = compile(else_slice, dict)?;
|
||||
(then_ops, else_ops, then_span, else_span)
|
||||
} else {
|
||||
let then_slice = &tokens[..then_pos];
|
||||
let then_span = tokens_span(then_slice);
|
||||
let then_ops = compile(then_slice, dict)?;
|
||||
(then_ops, Vec::new(), then_span, None)
|
||||
};
|
||||
|
||||
Ok((then_ops, else_ops, then_pos + 1, then_span, else_span))
|
||||
}
|
||||
Reference in New Issue
Block a user