312 lines
9.4 KiB
Rust
312 lines
9.4 KiB
Rust
use std::sync::Arc;
|
|
|
|
use super::ops::Op;
|
|
use super::types::{Dictionary, SourceSpan};
|
|
use super::words::compile_word;
|
|
|
|
#[derive(Clone, Debug)]
|
|
enum Token {
|
|
Int(i64, SourceSpan),
|
|
Float(f64, SourceSpan),
|
|
Str(String, SourceSpan),
|
|
Word(String, SourceSpan),
|
|
}
|
|
|
|
pub(super) fn compile_script(input: &str, dict: &Dictionary) -> Result<Vec<Op>, String> {
|
|
let tokens = tokenize(input);
|
|
compile(&tokens, dict)
|
|
}
|
|
|
|
fn tokenize(input: &str) -> Vec<Token> {
|
|
let mut tokens = Vec::new();
|
|
let mut chars = input.char_indices().peekable();
|
|
|
|
while let Some(&(pos, c)) = chars.peek() {
|
|
if c.is_whitespace() {
|
|
chars.next();
|
|
continue;
|
|
}
|
|
|
|
if c == '(' || c == ')' {
|
|
chars.next();
|
|
continue;
|
|
}
|
|
|
|
if c == '"' {
|
|
let start = pos;
|
|
chars.next();
|
|
let mut s = String::new();
|
|
let mut end = start + 1;
|
|
while let Some(&(i, ch)) = chars.peek() {
|
|
end = i + ch.len_utf8();
|
|
chars.next();
|
|
if ch == '"' {
|
|
break;
|
|
}
|
|
s.push(ch);
|
|
}
|
|
tokens.push(Token::Str(s, SourceSpan { start, end }));
|
|
continue;
|
|
}
|
|
|
|
if c == ';' {
|
|
chars.next(); // consume first ;
|
|
if let Some(&(_, ';')) = chars.peek() {
|
|
// ;; starts a comment to end of line
|
|
chars.next(); // consume second ;
|
|
while let Some(&(_, ch)) = chars.peek() {
|
|
if ch == '\n' {
|
|
break;
|
|
}
|
|
chars.next();
|
|
}
|
|
continue;
|
|
}
|
|
// single ; is a word, create token
|
|
tokens.push(Token::Word(
|
|
";".to_string(),
|
|
SourceSpan {
|
|
start: pos,
|
|
end: pos + 1,
|
|
},
|
|
));
|
|
continue;
|
|
}
|
|
|
|
let start = pos;
|
|
let mut word = String::new();
|
|
let mut end = start;
|
|
while let Some(&(i, ch)) = chars.peek() {
|
|
if ch.is_whitespace() {
|
|
break;
|
|
}
|
|
end = i + ch.len_utf8();
|
|
word.push(ch);
|
|
chars.next();
|
|
}
|
|
|
|
let span = SourceSpan { start, end };
|
|
|
|
// Normalize shorthand float syntax: .25 -> 0.25, -.5 -> -0.5
|
|
let word_to_parse = if word.starts_with('.')
|
|
&& word.len() > 1
|
|
&& word.as_bytes()[1].is_ascii_digit()
|
|
{
|
|
format!("0{word}")
|
|
} else if word.starts_with("-.")
|
|
&& word.len() > 2
|
|
&& word.as_bytes()[2].is_ascii_digit()
|
|
{
|
|
format!("-0{}", &word[1..])
|
|
} else {
|
|
word.clone()
|
|
};
|
|
|
|
if let Ok(i) = word_to_parse.parse::<i64>() {
|
|
tokens.push(Token::Int(i, span));
|
|
} else if let Ok(f) = word_to_parse.parse::<f64>() {
|
|
tokens.push(Token::Float(f, span));
|
|
} else {
|
|
tokens.push(Token::Word(word, span));
|
|
}
|
|
}
|
|
|
|
tokens
|
|
}
|
|
|
|
fn compile(tokens: &[Token], dict: &Dictionary) -> Result<Vec<Op>, String> {
|
|
let mut ops = Vec::new();
|
|
let mut i = 0;
|
|
|
|
while i < tokens.len() {
|
|
match &tokens[i] {
|
|
Token::Int(n, span) => {
|
|
let key = n.to_string();
|
|
if let Some(body) = dict.lock().get(&key).cloned() {
|
|
ops.extend(body);
|
|
} else {
|
|
ops.push(Op::PushInt(*n, Some(*span)));
|
|
}
|
|
}
|
|
Token::Float(f, span) => {
|
|
let key = f.to_string();
|
|
if let Some(body) = dict.lock().get(&key).cloned() {
|
|
ops.extend(body);
|
|
} else {
|
|
ops.push(Op::PushFloat(*f, Some(*span)));
|
|
}
|
|
}
|
|
Token::Str(s, span) => ops.push(Op::PushStr(Arc::from(s.as_str()), Some(*span))),
|
|
Token::Word(w, span) => {
|
|
let word = w.as_str();
|
|
if word == "{" {
|
|
let (quote_ops, consumed, end_span) =
|
|
compile_quotation(&tokens[i + 1..], dict)?;
|
|
i += consumed;
|
|
let body_span = SourceSpan {
|
|
start: span.start,
|
|
end: end_span.end,
|
|
};
|
|
ops.push(Op::Quotation(Arc::from(quote_ops), Some(body_span)));
|
|
} else if word == "}" {
|
|
return Err("unexpected }".into());
|
|
} else if word == ":" {
|
|
let (consumed, name, body) = compile_colon_def(&tokens[i + 1..], dict)?;
|
|
i += consumed;
|
|
dict.lock().insert(name, body);
|
|
} else if word == ";" {
|
|
return Err("unexpected ;".into());
|
|
} else if word == "if" {
|
|
let (then_ops, else_ops, consumed, then_span, else_span) =
|
|
compile_if(&tokens[i + 1..], dict)?;
|
|
i += consumed;
|
|
if else_ops.is_empty() {
|
|
ops.push(Op::BranchIfZero(then_ops.len(), then_span, None));
|
|
ops.extend(then_ops);
|
|
} else {
|
|
ops.push(Op::BranchIfZero(then_ops.len() + 1, then_span, else_span));
|
|
ops.extend(then_ops);
|
|
ops.push(Op::Branch(else_ops.len()));
|
|
ops.extend(else_ops);
|
|
}
|
|
} else if !compile_word(word, Some(*span), &mut ops, dict) {
|
|
return Err(format!("unknown word: {word}"));
|
|
}
|
|
}
|
|
}
|
|
i += 1;
|
|
}
|
|
|
|
Ok(ops)
|
|
}
|
|
|
|
fn compile_quotation(
|
|
tokens: &[Token],
|
|
dict: &Dictionary,
|
|
) -> Result<(Vec<Op>, usize, SourceSpan), String> {
|
|
let mut depth = 1;
|
|
let mut end_idx = None;
|
|
|
|
for (i, tok) in tokens.iter().enumerate() {
|
|
if let Token::Word(w, _) = tok {
|
|
match w.as_str() {
|
|
"{" => depth += 1,
|
|
"}" => {
|
|
depth -= 1;
|
|
if depth == 0 {
|
|
end_idx = Some(i);
|
|
break;
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
let end_idx = end_idx.ok_or("missing }")?;
|
|
let end_span = match &tokens[end_idx] {
|
|
Token::Word(_, span) => *span,
|
|
_ => unreachable!(),
|
|
};
|
|
let quote_ops = compile(&tokens[..end_idx], dict)?;
|
|
Ok((quote_ops, end_idx + 1, end_span))
|
|
}
|
|
|
|
fn token_span(tok: &Token) -> Option<SourceSpan> {
|
|
match tok {
|
|
Token::Int(_, s) | Token::Float(_, s) | Token::Str(_, s) | Token::Word(_, s) => Some(*s),
|
|
}
|
|
}
|
|
|
|
fn compile_colon_def(
|
|
tokens: &[Token],
|
|
dict: &Dictionary,
|
|
) -> Result<(usize, String, Vec<Op>), String> {
|
|
if tokens.is_empty() {
|
|
return Err("expected word name after ':'".into());
|
|
}
|
|
let name = match &tokens[0] {
|
|
Token::Word(w, _) => w.clone(),
|
|
Token::Int(n, _) => n.to_string(),
|
|
Token::Float(f, _) => f.to_string(),
|
|
Token::Str(s, _) => s.clone(),
|
|
};
|
|
let mut semi_pos = None;
|
|
for (i, tok) in tokens[1..].iter().enumerate() {
|
|
if let Token::Word(w, _) = tok {
|
|
if w == ";" {
|
|
semi_pos = Some(i + 1);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
let semi_pos = semi_pos.ok_or("missing ';' in word definition")?;
|
|
let body_tokens = &tokens[1..semi_pos];
|
|
let body_ops = compile(body_tokens, dict)?;
|
|
Ok((semi_pos + 1, name, body_ops))
|
|
}
|
|
|
|
fn tokens_span(tokens: &[Token]) -> Option<SourceSpan> {
|
|
let first = tokens.first().and_then(token_span)?;
|
|
let last = tokens.last().and_then(token_span)?;
|
|
Some(SourceSpan {
|
|
start: first.start,
|
|
end: last.end,
|
|
})
|
|
}
|
|
|
|
#[allow(clippy::type_complexity)]
|
|
fn compile_if(
|
|
tokens: &[Token],
|
|
dict: &Dictionary,
|
|
) -> Result<
|
|
(
|
|
Vec<Op>,
|
|
Vec<Op>,
|
|
usize,
|
|
Option<SourceSpan>,
|
|
Option<SourceSpan>,
|
|
),
|
|
String,
|
|
> {
|
|
let mut depth = 1;
|
|
let mut else_pos = None;
|
|
let mut then_pos = None;
|
|
|
|
for (i, tok) in tokens.iter().enumerate() {
|
|
if let Token::Word(w, _) = tok {
|
|
match w.as_str() {
|
|
"if" => depth += 1,
|
|
"else" if depth == 1 => else_pos = Some(i),
|
|
"then" => {
|
|
depth -= 1;
|
|
if depth == 0 {
|
|
then_pos = Some(i);
|
|
break;
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
let then_pos = then_pos.ok_or("missing 'then'")?;
|
|
|
|
let (then_ops, else_ops, then_span, else_span) = if let Some(ep) = else_pos {
|
|
let then_slice = &tokens[..ep];
|
|
let else_slice = &tokens[ep + 1..then_pos];
|
|
let then_span = tokens_span(then_slice);
|
|
let else_span = tokens_span(else_slice);
|
|
let then_ops = compile(then_slice, dict)?;
|
|
let else_ops = compile(else_slice, dict)?;
|
|
(then_ops, else_ops, then_span, else_span)
|
|
} else {
|
|
let then_slice = &tokens[..then_pos];
|
|
let then_span = tokens_span(then_slice);
|
|
let then_ops = compile(then_slice, dict)?;
|
|
(then_ops, Vec::new(), then_span, None)
|
|
};
|
|
|
|
Ok((then_ops, else_ops, then_pos + 1, then_span, else_span))
|
|
}
|