diff --git a/Cargo.lock b/Cargo.lock index 80a849b..7fc9577 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -354,6 +354,27 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "dtor" version = "0.1.1" @@ -455,6 +476,7 @@ dependencies = [ "clap", "ctor", "cucumber", + "dirs-next", "env_logger", "eyre", "futures", @@ -462,7 +484,7 @@ dependencies = [ "nom", "regex", "rustyline", - "thiserror", + "thiserror 2.0.9", "tokio", ] @@ -555,6 +577,17 @@ dependencies = [ "slab", ] +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "gherkin" version = "0.15.0" @@ -568,7 +601,7 @@ dependencies = [ "serde_json", "syn", "textwrap", - "thiserror", + "thiserror 2.0.9", "typed-builder", ] @@ -693,6 +726,15 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" +[[package]] +name = "libredox" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" +dependencies = [ + "libc", +] + [[package]] name = "linked-hash-map" version = "0.5.6" @@ -956,6 +998,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom", + "libredox", + "thiserror 1.0.69", +] + [[package]] name = "ref-cast" version = "1.0.25" @@ -1266,13 +1319,33 @@ dependencies = [ "unicode-width 0.1.14", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.9", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -1393,6 +1466,22 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.9" @@ -1402,6 +1491,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-link" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 38bbcff..28ef797 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ thiserror = "2.0.9" clap = { version = "4.5.23", features = ["cargo", "derive"] } rustyline = "17.0" eyre = "0.6.12" +dirs = { version = "2.0", package = "dirs-next" } [dev-dependencies] bigdecimal = "0.4" @@ -27,5 +28,9 @@ futures = "0.3.31" name = "features" harness = false +[[test]] +name = "repl" +harness = false + [workspace.lints.clippy] unwrap_used = "deny" diff --git a/docs/README.md b/docs/README.md index 2fe5095..c976905 100644 --- a/docs/README.md +++ b/docs/README.md @@ -5,9 +5,9 @@ This folder contains initial documentation for the `flt` language as it exists i ## Contents - [`quickstart.md`](quickstart.md) - Running `flt` and trying expressions in the REPL. -- [`syntax.md`](syntax.md) - Core expression forms, literals, comments, and identifiers. +- [`syntax.md`](syntax.md) - Core expression forms, literals, comments, identifiers, reserved keywords, and statements. - [`operators.md`](operators.md) - Unary/binary operators and precedence. -- [`functions-and-pipe.md`](functions-and-pipe.md) - Function call syntax and pipe chaining. +- [`functions-and-pipe.md`](functions-and-pipe.md) - Function call syntax (including keyword arguments) and pipe chaining. - [`runtime-and-limitations.md`](runtime-and-limitations.md) - Evaluation behavior, errors, and current limitations. ## Scope diff --git a/docs/functions-and-pipe.md b/docs/functions-and-pipe.md index abfc9a5..10cc1f7 100644 --- a/docs/functions-and-pipe.md +++ b/docs/functions-and-pipe.md @@ -12,6 +12,18 @@ bar(1) add(1, 2) ``` +### Keyword arguments + +In the parenthesized form, arguments may include trailing **keyword arguments**: key-value pairs with the form `key: value`. All positional arguments must come first; after the first keyword argument, no further positional arguments are allowed. + +```flt +foo(1, bar: true) +baz(a: 1, b: 2) +qux(1, 2, option: "value") +``` + +Keys follow the same rules as map keys (bare identifier or quoted string). The parser collects keyword arguments into a single map and passes them as the final argument to the call. + ### Whitespace form In this form, at least one argument is required. @@ -34,5 +46,5 @@ The parser treats this as a left-associative binary operator chain. ## Current Semantics Note -- Parsing for function calls and pipe expressions is implemented. +- Parsing for function calls (including keyword arguments) and pipe expressions is implemented. - Runtime evaluation for function calls and pipe is not implemented yet in `flt-cli`. diff --git a/docs/operators.md b/docs/operators.md index 04d92bb..4bd2642 100644 --- a/docs/operators.md +++ b/docs/operators.md @@ -24,6 +24,12 @@ | `|` | bitwise or (parsed) | `a | b` | | `^` | bitwise xor (parsed) | `a ^ b` | | `|>` | pipe | `x |> f` | +| `==` | equality | `x == 1` | +| `!=` | inequality | `x != 0` | +| `<` | less than | `a < b` | +| `>` | greater than | `a > b` | +| `<=` | less than or equal | `a <= b` | +| `>=` | greater than or equal | `a >= b` | ## Precedence (Low to High) @@ -36,6 +42,7 @@ 7. `&` 8. `+`, `-`, `<>` 9. `*`, `/` +10. `==`, `!=`, `<`, `>`, `<=`, `>=` All binary levels are left-associative. diff --git a/docs/runtime-and-limitations.md b/docs/runtime-and-limitations.md index 84aa926..a164deb 100644 --- a/docs/runtime-and-limitations.md +++ b/docs/runtime-and-limitations.md @@ -27,9 +27,11 @@ Common errors include: These constructs parse successfully but are not fully supported by the current evaluator: -- Function calls +- Function calls (including calls with keyword arguments, e.g. `foo(1, bar: true)`) - Pipe expressions (`|>`) - Bitwise operators (`&`, `|`, `^`) +- Let statements (`let x = expr`) — the REPL accepts expressions only; statement parsing is available in the library for other entry points +- Standalone keyword expressions (e.g. `if`, `return`, `fn`) — they parse as expressions but have no evaluation behavior yet ## Practical Guidance diff --git a/docs/syntax.md b/docs/syntax.md index d571edb..fbbe00d 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -10,8 +10,36 @@ This page covers the core surface syntax supported by the parser. - Identifiers - Unary expressions - Binary expressions -- Function calls +- Function calls (including [keyword arguments](./functions-and-pipe.md#keyword-arguments)) - Parenthesized expressions +- Reserved keywords as expressions (e.g. `if`, `return`) + +## Statements + +The parser supports **let bindings**: + +```flt +let x = 1 +let name = "flt" +let foo = 2 + 3 +``` + +- A statement may be followed by an optional `;`. +- If a statement ends on a newline, the semicolon is not required. +- Two statements on the same line require `;` after the first: `let x = 1; let y = 2`. + +The REPL currently accepts expressions only; statement parsing is available for use in other entry points (e.g. batch or file evaluation). + +## Reserved Keywords + +The following words are reserved and recognized with word boundaries (e.g. `if` is a keyword, but `iffy` is an identifier): + +| Keyword | Keyword | Keyword | +| --- | --- | --- | +| `if` | `else` | `return` | +| `and` | `or` | `not` | +| `for` | `in` | `let` | +| `while` | `do` | `fn` | ## Identifiers @@ -21,7 +49,7 @@ Identifiers are parsed as one or more of: - `_` - `-` -In practice, expression parsing prefers literals before identifiers. For example, `true` and `false` parse as booleans, and a leading numeric form is parsed as a number first. +In practice, expression parsing prefers literals before identifiers. For example, `true` and `false` parse as booleans, and a leading numeric form is parsed as a number first. Reserved keywords (e.g. `if`, `let`) are parsed as keywords when they appear as whole words; identifiers like `iffy` or `input` do not match the `if` or `in` keyword. Examples: diff --git a/features/ast/binary.feature b/features/ast/binary.feature index efd595b..43c2bf2 100644 --- a/features/ast/binary.feature +++ b/features/ast/binary.feature @@ -1,7 +1,7 @@ Feature: Binary expressions Binary operators combine two expressions. Precedence (lowest to highest): - `|>`, `||`, `&&`, `^^`, `|`, `^`, `&`, `+`/`-`/`<>`, `*`, `/`. + `|>`, `||`, `&&`, `^^`, `|`, `^`, `&`, `+`/`-`/`<>`, `*`, `/`, `==`, `!=`, `<`, `>`, `<=`, `>=`. Scenario: Addition Given the input "1 + 2" @@ -57,3 +57,33 @@ Feature: Binary expressions Given the input "x |> f" When I parse the input Then the output should be 'BinaryExpr(Ident("x"), Pipe, Ident("f"))' + + Scenario: Equality + Given the input "x == 1" + When I parse the input + Then the output should be 'BinaryExpr(Ident("x"), Eq, Literal(Number(Numeric { value: BigDecimal(sign=Plus, scale=0, digits=[1]) })))' + + Scenario: Inequality + Given the input "x != 0" + When I parse the input + Then the output should be 'BinaryExpr(Ident("x"), Ne, Literal(Number(Numeric { value: BigDecimal(sign=NoSign, scale=0, digits=[]) })))' + + Scenario: Less than + Given the input "a < b" + When I parse the input + Then the output should be 'BinaryExpr(Ident("a"), Lt, Ident("b"))' + + Scenario: Greater than + Given the input "a > b" + When I parse the input + Then the output should be 'BinaryExpr(Ident("a"), Gt, Ident("b"))' + + Scenario: Less than or equal + Given the input "a <= b" + When I parse the input + Then the output should be 'BinaryExpr(Ident("a"), Lte, Ident("b"))' + + Scenario: Greater than or equal + Given the input "a >= b" + When I parse the input + Then the output should be 'BinaryExpr(Ident("a"), Gte, Ident("b"))' diff --git a/features/repl/basic.feature b/features/repl/basic.feature index 28a58c1..a7f418a 100644 --- a/features/repl/basic.feature +++ b/features/repl/basic.feature @@ -5,4 +5,5 @@ Feature: flt repl """ 1 + 1 """ - Then the output should contain "2" + Then the command should succeed + And the output should contain "2" diff --git a/features/syntax/assignment.feature b/features/syntax/assignment.feature new file mode 100644 index 0000000..649c0fe --- /dev/null +++ b/features/syntax/assignment.feature @@ -0,0 +1,11 @@ +Feature: Assignment + + Scenario: Assigning a value to a variable + Given the input "let x = 1;" + When I parse the input + Then the output should be a `Statement::Let(Identifier("x"), Expr::Literal(Literal::Number(1)))` + + Scenario: Assigning a value without a let keyword + Given the input "x = 1;" + When I parse the input + Then the output should be a `Statement::Let(Identifier("x"), Expr::Literal(Literal::Number(1)))` \ No newline at end of file diff --git a/features/syntax/conditionals.feature b/features/syntax/conditionals.feature new file mode 100644 index 0000000..f0130f2 --- /dev/null +++ b/features/syntax/conditionals.feature @@ -0,0 +1,35 @@ +Feature: Conditionals + + If expressions evaluate a condition (must be boolean), then either the then-branch + or the else-branch. The else branch is optional; when omitted and the condition + is false, the expression evaluates to unit. + + Scenario: If with block branches and else + Given the input "if true { 1 } else { 2 }" + When I parse the input + Then the output should be 'IfExpr { condition: Literal(Boolean(true)), then_branch: Literal(Number(Numeric { value: BigDecimal(sign=Plus, scale=0, digits=[1]) })), else_branch: Some(Literal(Number(Numeric { value: BigDecimal(sign=Plus, scale=0, digits=[2]) }))) }' + + Scenario: If with block branches and else (condition false) + Given the input "if false { 1 } else { 2 }" + When I parse the input + Then the output should be 'IfExpr { condition: Literal(Boolean(false)), then_branch: Literal(Number(Numeric { value: BigDecimal(sign=Plus, scale=0, digits=[1]) })), else_branch: Some(Literal(Number(Numeric { value: BigDecimal(sign=Plus, scale=0, digits=[2]) }))) }' + + Scenario: If with block then-branch only (no else) + Given the input "if true { 1 }" + When I parse the input + Then the output should be 'IfExpr { condition: Literal(Boolean(true)), then_branch: Literal(Number(Numeric { value: BigDecimal(sign=Plus, scale=0, digits=[1]) })), else_branch: None }' + + Scenario: If with block then-branch only and function call (no else) + Given the input "if false { do() }" + When I parse the input + Then the output should be 'IfExpr { condition: Literal(Boolean(false)), then_branch: FunctionCall(Identifier("do"), []), else_branch: None }' + + Scenario: If with expression branches (no blocks) + Given the input 'if success "Ok" else ":("' + When I parse the input + Then the output should be 'IfExpr { condition: Ident("success"), then_branch: Literal(String("Ok")), else_branch: Some(Literal(String(":("))) }' + + Scenario: If with parenthesized condition + Given the input "if (true) { 1 } else { 2 }" + When I parse the input + Then the output should be 'IfExpr { condition: Parenthesized(Literal(Boolean(true))), then_branch: Literal(Number(Numeric { value: BigDecimal(sign=Plus, scale=0, digits=[1]) })), else_branch: Some(Literal(Number(Numeric { value: BigDecimal(sign=Plus, scale=0, digits=[2]) }))) }' diff --git a/features/syntax/expr_statement.feature b/features/syntax/expr_statement.feature new file mode 100644 index 0000000..aba2556 --- /dev/null +++ b/features/syntax/expr_statement.feature @@ -0,0 +1,7 @@ +Feature: Expression statements + + Scenario: Number expression as a statement + Given the input "42" + When I parse the input + Then the output should be a `Literal::Number(42)` + diff --git a/src/ast.rs b/src/ast.rs index 9ae8b8c..b21c861 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,15 +1,21 @@ //! The flt abstract syntax tree mod expr; +mod function; mod identifier; +mod keywords; mod literal; mod number; mod operands; +mod statement; pub use expr::Expr; pub use expr::KeyValue; +pub use function::FunctionCall; pub use identifier::Identifier; +pub use keywords::Keyword; pub use literal::Literal; pub use number::Numeric; pub use operands::BinaryOp; pub use operands::UnaryOp; +pub use statement::Statement; diff --git a/src/ast/expr.rs b/src/ast/expr.rs index 0b37239..065d4ab 100644 --- a/src/ast/expr.rs +++ b/src/ast/expr.rs @@ -3,18 +3,12 @@ use std::fmt::Display; use bigdecimal::BigDecimal; use super::identifier::Identifier; +use super::keywords::Keyword; use super::literal::Literal; use super::operands::BinaryOp; use super::operands::UnaryOp; use crate::utils::escape_string; -/// A key-value pair in a map literal. -#[derive(Clone, Debug, PartialEq)] -pub struct KeyValue { - pub key: String, - pub value: Expr, -} - /// An expression in the language. #[derive(Clone, Debug, PartialEq)] pub enum Expr { @@ -34,35 +28,49 @@ pub enum Expr { MapLiteral(Vec), /// An array literal: `[ expr, ... ]`. ArrayLiteral(Vec), + /// A reserved keyword (e.g. `if`, `else`, `return`). + Keyword(Keyword), + /// An if expression: `if condition then_branch else else_branch` (else optional). + /// + /// When `else_branch` is `None` and `condition` evaluates to `false`, the expression + /// evaluates to unit `()`. + IfExpr { + condition: Box, + then_branch: Box, + else_branch: Option>, + }, } impl Display for Expr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Expr::Literal(literal) => write!(f, "{}", literal), - Expr::Ident(ident) => write!(f, "{}", ident), + Expr::Literal(literal) => literal.fmt(f), + Expr::Ident(ident) => ident.fmt(f), Expr::UnaryExpr(op, expr) => write!(f, "{op}{expr}"), Expr::BinaryExpr(left, op, right) => write!(f, "{left} {op} {right}"), Expr::FunctionCall(name, args) => { - let args = args - .iter() - .map(|arg| arg.to_string()) - .collect::>() - .join(", "); - write!(f, "{name}({args})") + name.fmt(f)?; + write!(f, "(")?; + for (i, arg) in args.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + arg.fmt(f)?; + } + write!(f, ")") + } + Expr::Parenthesized(expr) => { + write!(f, "(")?; + expr.fmt(f)?; + write!(f, ")") } - Expr::Parenthesized(expr) => write!(f, "({expr})"), Expr::MapLiteral(entries) => { write!(f, "{{ ")?; for (i, kv) in entries.iter().enumerate() { if i > 0 { write!(f, ", ")?; } - if kv.key.contains(|c: char| !c.is_alphanumeric() && c != '_') { - write!(f, "\"{}\": {}", escape_string(&kv.key), kv.value)?; - } else { - write!(f, "{}: {}", kv.key, kv.value)?; - } + kv.fmt(f)?; } write!(f, " }}") } @@ -72,10 +80,22 @@ impl Display for Expr { if i > 0 { write!(f, ", ")?; } - write!(f, "{e}")?; + e.fmt(f)?; } write!(f, " ]") } + Expr::Keyword(kw) => kw.fmt(f), + Expr::IfExpr { + condition, + then_branch, + else_branch, + } => { + write!(f, "if {condition} {then_branch}")?; + if let Some(else_branch) = else_branch { + write!(f, " else {else_branch}")?; + } + Ok(()) + } } } } @@ -146,6 +166,40 @@ impl Expr { pub fn array_literal(elems: Vec) -> Self { Expr::ArrayLiteral(elems) } + + /// Constructs a keyword expression. + pub fn keyword(kw: Keyword) -> Self { + Expr::Keyword(kw) + } + + /// Constructs an if expression with optional else branch. + pub fn if_expr(condition: Expr, then_branch: Expr, else_branch: Option) -> Self { + Expr::IfExpr { + condition: Box::new(condition), + then_branch: Box::new(then_branch), + else_branch: else_branch.map(Box::new), + } + } +} + +/// A key-value pair in a map literal. +#[derive(Clone, Debug, PartialEq)] +pub struct KeyValue { + pub key: String, + pub value: Expr, +} + +impl Display for KeyValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self + .key + .contains(|c: char| !c.is_alphanumeric() && c != '_') + { + write!(f, "\"{}\": {}", escape_string(&self.key), self.value) + } else { + write!(f, "{}: {}", self.key, self.value) + } + } } #[cfg(test)] @@ -287,4 +341,21 @@ mod tests { ); assert_eq!(expr.to_string(), "(1 + 2) * 3"); } + + #[test] + fn test_display_if_expr() { + let expr = Expr::if_expr( + Expr::literal_boolean(true), + Expr::literal_number(n("1")), + None, + ); + assert_eq!(expr.to_string(), "if true 1"); + + let expr = Expr::if_expr( + Expr::literal_boolean(false), + Expr::literal_number(n("1")), + Some(Expr::literal_number(n("2"))), + ); + assert_eq!(expr.to_string(), "if false 1 else 2"); + } } diff --git a/src/ast/function.rs b/src/ast/function.rs new file mode 100644 index 0000000..1991294 --- /dev/null +++ b/src/ast/function.rs @@ -0,0 +1,28 @@ +//! AST types for function calls. + +use super::expr::Expr; +use super::expr::KeyValue; +use super::identifier::Identifier; + +/// A function call: name, positional arguments, then optional key-value pairs. +#[derive(Clone, Debug, PartialEq)] +pub struct FunctionCall { + pub name: Identifier, + /// Positional arguments (must come first). + pub positional_args: Vec, + /// Trailing key-value pairs (e.g. `foo(1, bar: true)`). + pub keyword_args: Vec, +} + +impl FunctionCall { + /// Converts the argument list to the form used in `Expr::FunctionCall`: positional + /// args first, with keyword args collected into a single `MapLiteral` as the + /// final argument if present. + pub fn args_as_exprs(&self) -> Vec { + let mut exprs = self.positional_args.clone(); + if !self.keyword_args.is_empty() { + exprs.push(Expr::MapLiteral(self.keyword_args.clone())); + } + exprs + } +} diff --git a/src/ast/keywords.rs b/src/ast/keywords.rs new file mode 100644 index 0000000..f6bdf92 --- /dev/null +++ b/src/ast/keywords.rs @@ -0,0 +1,38 @@ +use std::fmt::Display; + +/// Reserved keywords in the language. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Keyword { + If, + Else, + Return, + And, + Or, + Not, + For, + In, + Let, + While, + Do, + Fn, +} + +impl Display for Keyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + Keyword::If => "if", + Keyword::Else => "else", + Keyword::Return => "return", + Keyword::And => "and", + Keyword::Or => "or", + Keyword::Not => "not", + Keyword::For => "for", + Keyword::In => "in", + Keyword::Let => "let", + Keyword::While => "while", + Keyword::Do => "do", + Keyword::Fn => "fn", + }; + write!(f, "{s}") + } +} diff --git a/src/ast/operands.rs b/src/ast/operands.rs index 668a280..b0a41a5 100644 --- a/src/ast/operands.rs +++ b/src/ast/operands.rs @@ -8,7 +8,7 @@ pub enum UnaryOp { Minus, } -/// Binary operand: `+`, `-`, `*`, `/`, `&`, `&&`, `|`, `||`, `^`, `^^`, `|>`, `<>`. +/// Binary operand: `+`, `-`, `*`, `/`, `&`, `&&`, `|`, `||`, `^`, `^^`, `|>`, `<>`, `==`, `!=`, `<`, `>`, `<=`, `>=`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum BinaryOp { Add, @@ -25,6 +25,18 @@ pub enum BinaryOp { Pipe, /// String concatenation: concatenates two strings (e.g. `"foo" <> "bar"` → `"foobar"`). Concat, + /// Equality: same type and value. + Eq, + /// Inequality: not equal (different type or value). + Ne, + /// Less than (numbers). + Lt, + /// Greater than (numbers). + Gt, + /// Less than or equal (numbers). + Lte, + /// Greater than or equal (numbers). + Gte, } impl Display for UnaryOp { @@ -52,6 +64,12 @@ impl Display for BinaryOp { BinaryOp::Xor => write!(f, "^^"), BinaryOp::Pipe => write!(f, "|>"), BinaryOp::Concat => write!(f, "<>"), + BinaryOp::Eq => write!(f, "=="), + BinaryOp::Ne => write!(f, "!="), + BinaryOp::Lt => write!(f, "<"), + BinaryOp::Gt => write!(f, ">"), + BinaryOp::Lte => write!(f, "<="), + BinaryOp::Gte => write!(f, ">="), } } } diff --git a/src/ast/statement.rs b/src/ast/statement.rs new file mode 100644 index 0000000..2218421 --- /dev/null +++ b/src/ast/statement.rs @@ -0,0 +1,22 @@ +use std::fmt::Display; + +use super::expr::Expr; +use super::identifier::Identifier; + +/// A statement in the language. +#[derive(Clone, Debug, PartialEq)] +pub enum Statement { + /// A let binding: `let ident = expr`. + Let(Identifier, Expr), + /// A bare expression used as a statement. + Expr(Expr), +} + +impl Display for Statement { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Statement::Let(ident, expr) => write!(f, "let {} = {}", ident, expr), + Statement::Expr(expr) => write!(f, "{expr}"), + } + } +} diff --git a/src/bin/flt.rs b/src/bin/flt.rs index cdc0249..4244b87 100644 --- a/src/bin/flt.rs +++ b/src/bin/flt.rs @@ -1,46 +1,13 @@ use std::process::ExitCode; -use flt::eval::eval; -use flt::parser::parse_expr; +use flt::repl::run_repl; use rustyline::error::ReadlineError; -use rustyline::DefaultEditor; // Returns the library version, which reflects the crate version pub fn version() -> String { clap::crate_version!().to_string() } -fn run_repl() -> Result<(), ReadlineError> { - let mut rl = DefaultEditor::new()?; - loop { - let line = rl.readline("> ")?; - let line = line.trim(); - if line.is_empty() { - continue; - } - match parse_expr(line) { - Ok((remainder, expr)) => { - let remainder = remainder.trim(); - if remainder.is_empty() { - match eval(&expr) { - Ok(val) => println!("{}", val), - Err(e) => eprintln!("eval error: {:?}", e), - } - } else { - eprintln!( - "parse error: unexpected input after expression: {:?}", - remainder - ); - } - } - Err(e) => { - eprintln!("parse error: {:?}", e); - } - } - println!(); - } -} - fn main() -> ExitCode { let args: Vec = std::env::args().collect(); if args.get(1).map(|s| s.as_str()) == Some("version") { diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 739ff92..58d482e 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -1,133 +1,13 @@ -use crate::ast::BinaryOp; use crate::ast::Expr; -use crate::ast::Literal; -use crate::ast::UnaryOp; +use crate::ast::Statement; use crate::errors::Error; -use crate::errors::RuntimeError; -use crate::utils::escape_string; -use bigdecimal::BigDecimal; +use crate::runtime::Runtime; +use crate::runtime::SimpleRuntime; pub fn eval(expr: &Expr) -> Result { - let lit = eval_to_literal(expr)?; - Ok(literal_to_string(&lit)) -} - -fn eval_to_literal(expr: &Expr) -> Result { - match expr { - Expr::Literal(lit) => eval_literal(lit), - Expr::Ident(s) => Err(Error::RuntimeError(RuntimeError::UnboundIdentifier( - s.clone(), - ))), - Expr::UnaryExpr(op, inner) => eval_unary_expr(*op, inner), - Expr::BinaryExpr(left, op, right) => eval_binary_expr(left, *op, right), - Expr::FunctionCall(_, _) => Err(Error::RuntimeError(RuntimeError::UnsupportedFunctionCall)), - Expr::Parenthesized(inner) => eval_to_literal(inner), - Expr::MapLiteral(_) => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), - Expr::ArrayLiteral(_) => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), - } -} - -fn literal_to_string(lit: &Literal) -> String { - match lit { - Literal::Number(n) => n.as_ref().to_string(), - Literal::String(s) => format!("\"{}\"", escape_string(s)), - Literal::Boolean(b) => b.to_string(), - Literal::Symbol(s) => format!(":{}", s), - } -} - -fn eval_literal(lit: &Literal) -> Result { - match lit { - Literal::Number(n) => Ok(Literal::number(n.as_ref().clone())), - Literal::String(s) => Ok(Literal::string(s.clone())), - Literal::Boolean(b) => Ok(Literal::boolean(*b)), - Literal::Symbol(s) => Ok(Literal::symbol(s.clone())), - } -} - -fn eval_unary_expr(op: UnaryOp, inner: &Expr) -> Result { - let val = eval_to_literal(inner)?; - match op { - UnaryOp::Not => match &val { - Literal::Boolean(b) => Ok(Literal::boolean(!b)), - _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), - }, - UnaryOp::Plus => match &val { - Literal::Number(n) => Ok(Literal::number(n.as_ref().clone())), - _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), - }, - UnaryOp::Minus => match &val { - Literal::Number(n) => Ok(Literal::number(-n.as_ref().clone())), - _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), - }, - } -} - -fn eval_binary_expr(left: &Expr, op: BinaryOp, right: &Expr) -> Result { - let l = eval_to_literal(left)?; - let r = eval_to_literal(right)?; - match op { - BinaryOp::Add => binary_number(&l, &r, |a, b| a + b), - BinaryOp::Sub => binary_number(&l, &r, |a, b| a - b), - BinaryOp::Mul => binary_number(&l, &r, |a, b| a * b), - BinaryOp::Div => { - let (a, b) = (as_bigdecimal(&l)?, as_bigdecimal(&r)?); - if b == 0 { - Err(Error::RuntimeError(RuntimeError::DivisionByZero)) - } else { - Ok(Literal::number(a / b)) - } - } - BinaryOp::And => binary_bool(&l, &r, |a, b| a && b), - BinaryOp::Or => binary_bool(&l, &r, |a, b| a || b), - BinaryOp::Xor => binary_bool(&l, &r, |a, b| a ^ b), - BinaryOp::BitAnd | BinaryOp::BitOr | BinaryOp::BitXor => { - Err(Error::RuntimeError(RuntimeError::InvalidOperandType)) - } - BinaryOp::Concat => binary_string(&l, &r), - BinaryOp::Pipe => Err(Error::RuntimeError(RuntimeError::UnsupportedFunctionCall)), - } -} - -fn as_bigdecimal(lit: &Literal) -> Result { - match lit { - Literal::Number(n) => Ok(n.as_ref().clone()), - _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), - } -} - -fn binary_number(l: &Literal, r: &Literal, f: F) -> Result -where - F: FnOnce(BigDecimal, BigDecimal) -> BigDecimal, -{ - let a = as_bigdecimal(l)?; - let b = as_bigdecimal(r)?; - Ok(Literal::number(f(a, b))) -} - -fn binary_bool(l: &Literal, r: &Literal, f: F) -> Result -where - F: FnOnce(bool, bool) -> bool, -{ - match (l, r) { - (Literal::Boolean(a), Literal::Boolean(b)) => Ok(Literal::boolean(f(*a, *b))), - _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), - } -} - -fn literal_to_concat_str(lit: &Literal) -> String { - match lit { - Literal::Number(n) => n.as_ref().to_string(), - Literal::String(s) => s.clone(), - Literal::Boolean(b) => b.to_string(), - Literal::Symbol(s) => s.clone(), - } -} - -fn binary_string(l: &Literal, r: &Literal) -> Result { - let a = literal_to_concat_str(l); - let b = literal_to_concat_str(r); - Ok(Literal::string(format!("{}{}", a, b))) + let mut rt = SimpleRuntime::default(); + let value = rt.eval(&Statement::Expr(expr.clone()))?; + Ok(value.to_string()) } #[cfg(test)] @@ -338,6 +218,117 @@ mod tests { assert_eq!(eval(&expr).unwrap(), "\"foobar\""); } + #[test] + fn test_eval_binary_eq() { + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_number(1), + BinaryOp::Eq, + Expr::literal_number(1), + )) + .unwrap(), + "true" + ); + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_number(1), + BinaryOp::Eq, + Expr::literal_number(2), + )) + .unwrap(), + "false" + ); + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_string("a"), + BinaryOp::Eq, + Expr::literal_string("a"), + )) + .unwrap(), + "true" + ); + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_boolean(true), + BinaryOp::Eq, + Expr::literal_boolean(false), + )) + .unwrap(), + "false" + ); + } + + #[test] + fn test_eval_binary_ne() { + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_number(1), + BinaryOp::Ne, + Expr::literal_number(2), + )) + .unwrap(), + "true" + ); + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_number(1), + BinaryOp::Ne, + Expr::literal_number(1), + )) + .unwrap(), + "false" + ); + } + + #[test] + fn test_eval_binary_gt_lt_gte_lte() { + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_number(3), + BinaryOp::Gt, + Expr::literal_number(2), + )) + .unwrap(), + "true" + ); + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_number(1), + BinaryOp::Gt, + Expr::literal_number(2), + )) + .unwrap(), + "false" + ); + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_number(1), + BinaryOp::Lt, + Expr::literal_number(2), + )) + .unwrap(), + "true" + ); + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_number(2), + BinaryOp::Lte, + Expr::literal_number(2), + )) + .unwrap(), + "true" + ); + assert_eq!( + eval(&Expr::binary_expr( + Expr::literal_number(3), + BinaryOp::Gte, + Expr::literal_number(3), + )) + .unwrap(), + "true" + ); + } + #[test] fn test_eval_string_interpolation() { let expr = Expr::binary_expr( @@ -393,4 +384,44 @@ mod tests { Error::RuntimeError(RuntimeError::UnsupportedFunctionCall) )); } + + #[test] + fn test_eval_if_expr_with_else() { + let expr = Expr::if_expr( + Expr::literal_boolean(true), + Expr::literal_number(1), + Some(Expr::literal_number(2)), + ); + assert_eq!(eval(&expr).unwrap(), "1"); + + let expr = Expr::if_expr( + Expr::literal_boolean(false), + Expr::literal_number(1), + Some(Expr::literal_number(2)), + ); + assert_eq!(eval(&expr).unwrap(), "2"); + } + + #[test] + fn test_eval_if_expr_without_else_returns_unit() { + let expr = Expr::if_expr(Expr::literal_boolean(true), Expr::literal_number(1), None); + assert_eq!(eval(&expr).unwrap(), "1"); + + let expr = Expr::if_expr(Expr::literal_boolean(false), Expr::literal_number(1), None); + assert_eq!(eval(&expr).unwrap(), "()"); + } + + #[test] + fn test_eval_if_expr_condition_must_be_boolean() { + let expr = Expr::if_expr( + Expr::literal_number(1), + Expr::literal_number(10), + Some(Expr::literal_number(20)), + ); + let err = eval(&expr).unwrap_err(); + assert!(matches!( + err, + Error::RuntimeError(RuntimeError::InvalidOperandType) + )); + } } diff --git a/src/lib.rs b/src/lib.rs index 7ebf75b..dda4449 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,8 @@ pub mod ast; pub mod errors; pub mod eval; pub mod parser; +pub mod repl; +pub mod runtime; pub mod utils; pub use errors::Error; diff --git a/src/parser.rs b/src/parser.rs index 1f78d22..40816f9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -6,10 +6,12 @@ mod comment; mod expr; mod function; mod identifier; +mod keyword; mod literal; mod map; mod number; mod operands; +mod statement; mod string; mod symbol; @@ -23,5 +25,6 @@ pub use literal::parse_literal; pub use number::parse_number; pub use operands::parse_binary_op; pub use operands::parse_unary_op; +pub use statement::parse_statement; pub use string::parse_string; pub use symbol::parse_symbol; diff --git a/src/parser/expr.rs b/src/parser/expr.rs index b7cbad1..8c33dfe 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -1,6 +1,8 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::combinator::cut; use nom::combinator::map; +use nom::combinator::opt; use nom::combinator::verify; use nom::multi::many0; use nom::sequence::delimited; @@ -10,7 +12,9 @@ use nom::Parser; use super::array::parse_array_literal; use super::comment::multispace0_or_comment; use super::function::parse_function_call; +use super::function::parse_function_call_parens_only; use super::identifier::parse_identifier; +use super::keyword::parse_keyword; use super::literal::parse_literal; use super::map::parse_map_literal; use super::operands::parse_binary_op; @@ -18,15 +22,76 @@ use super::operands::parse_unary_op; use super::string::parse_interpolated_string; use crate::ast::BinaryOp; use crate::ast::Expr; +use crate::ast::FunctionCall; +use crate::ast::Keyword; + +/// Parses a `{ expr }` block used by control-flow expressions. +fn parse_block_expr(input: &str) -> IResult<&str, Expr> { + delimited( + (multispace0_or_comment, tag("{"), multispace0_or_comment), + parse_or, + (multispace0_or_comment, tag("}"), multispace0_or_comment), + ) + .parse(input) +} + +fn parse_if_branch(input: &str) -> IResult<&str, Expr> { + alt((parse_block_expr, parse_or)).parse(input) +} + +fn parse_if_then_branch(input: &str) -> IResult<&str, Expr> { + verify(parse_if_branch, |e: &Expr| match e { + Expr::Keyword(Keyword::Else) => false, + Expr::FunctionCall(name, _) if *name == "else" => false, + _ => true, + }) + .parse(input) +} + +fn parse_if_else_clause(input: &str) -> IResult<&str, Expr> { + let (input, _) = verify(parse_keyword, |k: &Keyword| *k == Keyword::Else).parse(input)?; + let (input, _) = multispace0_or_comment(input)?; + parse_if_branch(input) +} + +fn parse_if_expr(input: &str) -> IResult<&str, Expr> { + let (input, _) = verify(parse_keyword, |k: &Keyword| *k == Keyword::If).parse(input)?; + let (input, _) = multispace0_or_comment(input)?; + + let (input, (condition, then_branch, else_branch)) = alt(( + |input| { + let (input, condition) = cut(parse_pipe).parse(input)?; + let (input, _) = multispace0_or_comment(input)?; + let (input, then_branch) = parse_if_then_branch.parse(input)?; + let (input, _) = multispace0_or_comment(input)?; + let (input, else_branch) = opt(parse_if_else_clause).parse(input)?; + Ok((input, (condition, then_branch, else_branch))) + }, + |input| { + let (input, condition) = cut(parse_if_condition_pipe).parse(input)?; + let (input, _) = multispace0_or_comment(input)?; + let (input, then_branch) = parse_if_then_branch.parse(input)?; + let (input, _) = multispace0_or_comment(input)?; + let (input, else_branch) = opt(parse_if_else_clause).parse(input)?; + Ok((input, (condition, then_branch, else_branch))) + }, + )) + .parse(input)?; + + Ok((input, Expr::if_expr(condition, then_branch, else_branch))) +} /// Parses a primary expression: literal, identifier, function call, or parenthesized expression. fn parse_primary(input: &str) -> IResult<&str, Expr> { alt(( parse_interpolated_string(parse_or), map(parse_literal, Expr::Literal), - map(parse_function_call(parse_or), |(name, args)| { - Expr::FunctionCall(name, args) + parse_if_expr, + map(parse_function_call(parse_or), |fc: FunctionCall| { + let args = fc.args_as_exprs(); + Expr::FunctionCall(fc.name, args) }), + map(parse_keyword, Expr::keyword), map(parse_identifier, Expr::ident), parse_array_literal(parse_or), parse_map_literal(parse_or), @@ -42,22 +107,152 @@ fn parse_primary(input: &str) -> IResult<&str, Expr> { .parse(input) } -/// Parses a unary expression: optionally prefixed with `!`, `+`, or `-`. -fn parse_unary(input: &str) -> IResult<&str, Expr> { +/// Parses a primary expression used specifically for `if` conditions. +/// +/// The main difference from `parse_primary` is that it disallows *parenless* +/// function calls (`Identifier args`), which would otherwise make +/// `if success "Ok" else ...` ambiguous. +fn parse_if_condition_primary(input: &str) -> IResult<&str, Expr> { + alt(( + parse_interpolated_string(parse_if_condition_or), + map(parse_literal, Expr::Literal), + map( + parse_function_call_parens_only(parse_if_condition_or), + |fc: FunctionCall| { + let args = fc.args_as_exprs(); + Expr::FunctionCall(fc.name, args) + }, + ), + parse_if_expr, + map(parse_keyword, Expr::keyword), + map(parse_identifier, Expr::ident), + parse_array_literal(parse_if_condition_or), + parse_map_literal(parse_if_condition_or), + map( + delimited( + (multispace0_or_comment, tag("("), multispace0_or_comment), + parse_if_condition_or, + (multispace0_or_comment, tag(")"), multispace0_or_comment), + ), + Expr::parenthesized, + ), + )) + .parse(input) +} + +/// Parses a unary expression for `if` conditions. +fn parse_if_condition_unary(input: &str) -> IResult<&str, Expr> { let (input, _) = multispace0_or_comment(input)?; alt(( map( - (parse_unary_op, multispace0_or_comment, parse_unary), - |(op, _, e)| Expr::unary_expr(op, e), + (parse_unary_op, parse_if_condition_unary_tight), + |(op, e)| Expr::unary_expr(op, e), + ), + parse_if_condition_primary, + )) + .parse(input) +} + +/// Parses a unary expression for `if` conditions without whitespace between unary +/// operators and the expression that follows. +fn parse_if_condition_unary_tight(input: &str) -> IResult<&str, Expr> { + alt(( + map( + (parse_unary_op, parse_if_condition_unary_tight), + |(op, e)| Expr::unary_expr(op, e), ), + parse_if_condition_primary, + )) + .parse(input) +} + +fn parse_if_condition_pipe(input: &str) -> IResult<&str, Expr> { + parse_binary_level(input, parse_if_condition_or, &[BinaryOp::Pipe]) +} + +fn parse_if_condition_or(input: &str) -> IResult<&str, Expr> { + parse_binary_level(input, parse_if_condition_and, &[BinaryOp::Or]) +} + +fn parse_if_condition_and(input: &str) -> IResult<&str, Expr> { + parse_binary_level(input, parse_if_condition_xor, &[BinaryOp::And]) +} + +fn parse_if_condition_xor(input: &str) -> IResult<&str, Expr> { + parse_binary_level(input, parse_if_condition_bit_or, &[BinaryOp::Xor]) +} + +fn parse_if_condition_bit_or(input: &str) -> IResult<&str, Expr> { + parse_binary_level(input, parse_if_condition_bit_xor, &[BinaryOp::BitOr]) +} + +fn parse_if_condition_bit_xor(input: &str) -> IResult<&str, Expr> { + parse_binary_level(input, parse_if_condition_bit_and, &[BinaryOp::BitXor]) +} + +fn parse_if_condition_bit_and(input: &str) -> IResult<&str, Expr> { + parse_binary_level( + input, + parse_if_condition_add_sub_concat, + &[BinaryOp::BitAnd], + ) +} + +fn parse_if_condition_add_sub_concat(input: &str) -> IResult<&str, Expr> { + parse_binary_level( + input, + parse_if_condition_mul_div, + &[BinaryOp::Add, BinaryOp::Sub, BinaryOp::Concat], + ) +} + +fn parse_if_condition_mul_div(input: &str) -> IResult<&str, Expr> { + parse_binary_level( + input, + parse_if_condition_unary, + &[ + BinaryOp::Mul, + BinaryOp::Div, + BinaryOp::Eq, + BinaryOp::Ne, + BinaryOp::Lt, + BinaryOp::Gt, + BinaryOp::Lte, + BinaryOp::Gte, + ], + ) +} + +/// Parses a unary expression: optionally prefixed with `!`, `+`, or `-`. +/// +/// Note: unary operators must be immediately adjacent to their operand. +/// e.g. `!x` and `+1` are valid, but `! x` / `+ 1` are not. +fn parse_unary(input: &str) -> IResult<&str, Expr> { + let (input, _) = multispace0_or_comment(input)?; + alt(( + map((parse_unary_op, parse_unary_tight), |(op, e)| { + Expr::unary_expr(op, e) + }), + parse_primary, + )) + .parse(input) +} + +/// Parses a unary expression without allowing whitespace/comments between a unary +/// operator and the expression that follows. +fn parse_unary_tight(input: &str) -> IResult<&str, Expr> { + alt(( + map((parse_unary_op, parse_unary_tight), |(op, e)| { + Expr::unary_expr(op, e) + }), parse_primary, )) .parse(input) } /// Parses binary expressions: `Expr` then `BinaryOp` then `Expr`, with left-associative folding. +/// Precedence (lowest to highest): ||, &&, ^^, |, ^, &, +/-/<> (add/sub/concat), *, /, ==, !=, <, >, <=, >= /// `next` parses the higher-precedence operand; `allowed` restricts which operators this level accepts. -/// Precedence (lowest to highest): ||, &&, ^^, |, ^, &, +/-/<> (add/sub/concat), *, / fn parse_binary_level<'a>( input: &'a str, next: fn(&str) -> IResult<&str, Expr>, @@ -120,7 +315,20 @@ fn parse_add_sub_concat(input: &str) -> IResult<&str, Expr> { } fn parse_mul_div(input: &str) -> IResult<&str, Expr> { - parse_binary_level(input, parse_unary, &[BinaryOp::Mul, BinaryOp::Div]) + parse_binary_level( + input, + parse_unary, + &[ + BinaryOp::Mul, + BinaryOp::Div, + BinaryOp::Eq, + BinaryOp::Ne, + BinaryOp::Lt, + BinaryOp::Gt, + BinaryOp::Lte, + BinaryOp::Gte, + ], + ) } /// Parses an expression: unary and binary with proper precedence. @@ -165,6 +373,71 @@ mod tests { assert!(parse_expr("_foo").is_err()); } + #[test] + fn test_parse_keyword() { + use crate::ast::Keyword; + + assert!(parse_expr("if").is_err()); + assert_eq!(parse_expr("else"), Ok(("", Expr::keyword(Keyword::Else)))); + assert_eq!( + parse_expr("return"), + Ok(("", Expr::keyword(Keyword::Return))) + ); + assert_eq!(parse_expr("and"), Ok(("", Expr::keyword(Keyword::And)))); + assert_eq!(parse_expr("or"), Ok(("", Expr::keyword(Keyword::Or)))); + assert_eq!(parse_expr("not"), Ok(("", Expr::keyword(Keyword::Not)))); + assert_eq!(parse_expr("for"), Ok(("", Expr::keyword(Keyword::For)))); + assert_eq!(parse_expr("in"), Ok(("", Expr::keyword(Keyword::In)))); + assert_eq!(parse_expr("while"), Ok(("", Expr::keyword(Keyword::While)))); + assert_eq!(parse_expr("do"), Ok(("", Expr::keyword(Keyword::Do)))); + assert_eq!(parse_expr("fn"), Ok(("", Expr::keyword(Keyword::Fn)))); + assert_eq!(parse_expr("let"), Ok(("", Expr::keyword(Keyword::Let)))); + // Keywords are not identifiers: "iffy" parses as ident, not "if" + "fy" + assert_eq!(parse_expr("iffy"), Ok(("", Expr::ident("iffy")))); + } + + #[test] + fn test_parse_if_expr_block_and_optional_else() { + assert_eq!( + parse_expr("if true { 1 } else { 2 }"), + Ok(( + "", + Expr::if_expr( + Expr::literal_boolean(true), + Expr::literal_number(1), + Some(Expr::literal_number(2)) + ) + )) + ); + + assert_eq!( + parse_expr("if false { do() }"), + Ok(( + "", + Expr::if_expr( + Expr::literal_boolean(false), + Expr::function_call("do", vec![]), + None + ) + )) + ); + } + + #[test] + fn test_parse_if_expr_expression_branches() { + assert_eq!( + parse_expr("if success \"Ok\" else \":(\""), + Ok(( + "", + Expr::if_expr( + Expr::ident("success"), + Expr::literal_string("Ok"), + Some(Expr::literal_string(":(")) + ) + )) + ); + } + #[test] fn test_parse_string() { assert_eq!( @@ -205,6 +478,13 @@ mod tests { parse_expr("!x"), Ok(("", Expr::unary_expr(UnaryOp::Not, Expr::ident("x")))) ); + assert_eq!( + parse_expr("+1"), + Ok(("", Expr::unary_expr(UnaryOp::Plus, Expr::literal_number(1)))) + ); + assert!(parse_expr("! x").is_err()); + assert!(parse_expr("+ 1").is_err()); + assert!(parse_expr("- 42").is_err()); assert_eq!( parse_expr("-42"), Ok(( @@ -240,6 +520,15 @@ mod tests { ); } + #[test] + fn test_parse_binary_add_spacing_variants() { + let expected = Expr::binary_expr(Expr::ident("x"), BinaryOp::Add, Expr::literal_number(1)); + assert_eq!(parse_expr("x + 1"), Ok(("", expected.clone()))); + assert_eq!(parse_expr("x+1"), Ok(("", expected.clone()))); + assert_eq!(parse_expr("x +1"), Ok(("", expected.clone()))); + assert_eq!(parse_expr("x+ 1"), Ok(("", expected))); + } + #[test] fn test_parse_precedence() { // * has higher precedence than + diff --git a/src/parser/function.rs b/src/parser/function.rs index 8ed5c44..1718432 100644 --- a/src/parser/function.rs +++ b/src/parser/function.rs @@ -2,6 +2,7 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::combinator::map; use nom::combinator::map_res; +use nom::error::ErrorKind; use nom::multi::separated_list0; use nom::multi::separated_list1; use nom::sequence::delimited; @@ -10,6 +11,7 @@ use nom::IResult; use nom::Parser; use crate::ast::Expr; +use crate::ast::FunctionCall; use crate::ast::Identifier; use crate::ast::KeyValue; @@ -17,48 +19,69 @@ use super::comment::{multispace0_or_comment, multispace1_or_comment}; use super::map::parse_kv_pair; use super::parse_identifier; -enum FnArg<'a> { +/// Internal enum used only during parsing to represent one arg (positional or key-value). +enum ParsedArg { Positional(Expr), - KeyValue(std::borrow::Cow<'a, str>, Expr), + KeyValue(KeyValue), } -fn parse_fn_arg<'a>( +fn parse_arg<'a>( expr_parser: fn(&'a str) -> IResult<&'a str, Expr>, -) -> impl FnMut(&'a str) -> IResult<&'a str, FnArg<'a>> { +) -> impl FnMut(&'a str) -> IResult<&'a str, ParsedArg> { move |input: &'a str| { alt(( - map(parse_kv_pair(expr_parser), |(k, v)| FnArg::KeyValue(k, v)), - map(expr_parser, FnArg::Positional), + map(parse_kv_pair(expr_parser), |(k, v)| { + ParsedArg::KeyValue(KeyValue { + key: k.into_owned(), + value: v, + }) + }), + map(expr_parser, ParsedArg::Positional), )) .parse(input) } } -/// Positional args must all come before key-value pairs. -fn collect_fn_args(items: Vec>) -> Result, &'static str> { - let mut args = Vec::new(); - let mut kv_pairs: Vec = Vec::new(); +/// Reject arguments that start with unary `+` in parenless calls like `f + 1`. +/// +/// Without this, expressions such as `x + 1` become ambiguous with the grammar +/// `Identifier args` and get parsed as a function call. +fn parse_arg_disallow_unary_plus<'a>( + expr_parser: fn(&'a str) -> IResult<&'a str, Expr>, +) -> impl FnMut(&'a str) -> IResult<&'a str, ParsedArg> { + let mut inner = parse_arg(expr_parser); + move |input: &'a str| { + if input.starts_with('+') { + return Err(nom::Err::Error(nom::error::Error::new( + input, + ErrorKind::Tag, + ))); + } + inner(input) + } +} + +/// Splits parsed args into positionals (all leading Positional) and keyword_args (the rest). +fn collect_args(items: Vec) -> Result<(Vec, Vec), &'static str> { + let mut positional_args = Vec::new(); + let mut keyword_args = Vec::new(); + let mut seen_kv = false; for item in items { match item { - FnArg::Positional(expr) => { - if !kv_pairs.is_empty() { + ParsedArg::Positional(expr) => { + if seen_kv { return Err("positional argument after key-value pair"); } - args.push(expr); + positional_args.push(expr); + } + ParsedArg::KeyValue(kv) => { + seen_kv = true; + keyword_args.push(kv); } - FnArg::KeyValue(key, value) => kv_pairs.push(KeyValue { - key: key.into_owned(), - value, - }), } } - - if !kv_pairs.is_empty() { - args.push(Expr::MapLiteral(kv_pairs)); - } - - Ok(args) + Ok((positional_args, keyword_args)) } /// Parses a function call: `Identifier` `(` args `)` or `Identifier` args. @@ -67,11 +90,11 @@ fn collect_fn_args(items: Vec>) -> Result, &'static str> { /// pairs that are collected into a `MapLiteral` as the final argument. pub fn parse_function_call( parse_expr: fn(&str) -> IResult<&str, Expr>, -) -> impl FnMut(&str) -> IResult<&str, (Identifier, Vec)> { +) -> impl FnMut(&str) -> IResult<&str, FunctionCall> { move |input: &str| { let (input, name) = map(parse_identifier, |s: &str| Identifier(s.to_string())).parse(input)?; - let (input, args) = alt(( + let (input, (positional_args, keyword_args)) = alt(( preceded( multispace0_or_comment, delimited( @@ -81,9 +104,9 @@ pub fn parse_function_call( map_res( separated_list0( (multispace0_or_comment, tag(","), multispace0_or_comment), - parse_fn_arg(parse_expr), + parse_arg(parse_expr), ), - collect_fn_args, + collect_args, ), multispace0_or_comment, ), @@ -95,14 +118,64 @@ pub fn parse_function_call( map_res( separated_list1( (multispace0_or_comment, tag(","), multispace0_or_comment), - parse_fn_arg(parse_expr), + parse_arg_disallow_unary_plus(parse_expr), ), - collect_fn_args, + collect_args, ), ), )) .parse(input)?; - Ok((input, (name, args))) + Ok(( + input, + FunctionCall { + name, + positional_args, + keyword_args, + }, + )) + } +} + +/// Parses a function call that *requires* parentheses: `Identifier` `(` args `)`. +/// +/// This is useful in contexts like `if else `, where the +/// parenless call form `Identifier args` would otherwise absorb +/// the `` expression and make parsing ambiguous. +pub fn parse_function_call_parens_only( + parse_expr: fn(&str) -> IResult<&str, Expr>, +) -> impl FnMut(&str) -> IResult<&str, FunctionCall> { + move |input: &str| { + let (input, name) = + map(parse_identifier, |s: &str| Identifier(s.to_string())).parse(input)?; + + let (input, (positional_args, keyword_args)) = preceded( + multispace0_or_comment, + delimited( + tag("("), + delimited( + multispace0_or_comment, + map_res( + separated_list0( + (multispace0_or_comment, tag(","), multispace0_or_comment), + parse_arg(parse_expr), + ), + collect_args, + ), + multispace0_or_comment, + ), + tag(")"), + ), + ) + .parse(input)?; + + Ok(( + input, + FunctionCall { + name, + positional_args, + keyword_args, + }, + )) } } @@ -114,9 +187,10 @@ mod tests { use super::parse_function_call; use crate::ast::Expr; - use crate::parser::expr::parse_expr; - + use crate::ast::FunctionCall; use crate::ast::Identifier; + use crate::ast::KeyValue; + use crate::parser::expr::parse_expr; #[test] fn test_parse_trim() { @@ -124,10 +198,11 @@ mod tests { parse_function_call(parse_expr)(r#"trim("string")"#), Ok(( "", - ( - Identifier::try_from("trim").expect("invalid identifier"), - vec![Expr::literal_string("string")] - ) + FunctionCall { + name: Identifier::try_from("trim").expect("invalid identifier"), + positional_args: vec![Expr::literal_string("string")], + keyword_args: vec![], + } )) ); } @@ -138,12 +213,13 @@ mod tests { parse_function_call(parse_expr)("floor(3.14)"), Ok(( "", - ( - Identifier::try_from("floor").expect("invalid identifier"), - vec![Expr::literal_number( + FunctionCall { + name: Identifier::try_from("floor").expect("invalid identifier"), + positional_args: vec![Expr::literal_number( BigDecimal::from_str("3.14").expect("unable to parse 3.14 into BigDecimal") - )] - ) + )], + keyword_args: vec![], + } )) ); } @@ -154,12 +230,13 @@ mod tests { parse_function_call(parse_expr)("ceil(3.14)"), Ok(( "", - ( - Identifier::try_from("ceil").expect("invalid identifier"), - vec![Expr::literal_number( + FunctionCall { + name: Identifier::try_from("ceil").expect("invalid identifier"), + positional_args: vec![Expr::literal_number( BigDecimal::from_str("3.14").expect("unable to parse 3.14 into BigDecimal") - )] - ) + )], + keyword_args: vec![], + } )) ); } @@ -170,16 +247,17 @@ mod tests { parse_function_call(parse_expr)("round(3.14, 2)"), Ok(( "", - ( - Identifier::try_from("round").expect("invalid identifier"), - vec![ + FunctionCall { + name: Identifier::try_from("round").expect("invalid identifier"), + positional_args: vec![ Expr::literal_number( BigDecimal::from_str("3.14") .expect("unable to parse 3.14 into BigDecimal") ), - Expr::literal_number(2) - ] - ) + Expr::literal_number(2), + ], + keyword_args: vec![], + } )) ); } @@ -190,10 +268,11 @@ mod tests { parse_function_call(parse_expr)("add 1"), Ok(( "", - ( - Identifier::try_from("add").expect("invalid identifier"), - vec![Expr::literal_number(1)] - ) + FunctionCall { + name: Identifier::try_from("add").expect("invalid identifier"), + positional_args: vec![Expr::literal_number(1)], + keyword_args: vec![], + } )) ); } @@ -204,10 +283,11 @@ mod tests { parse_function_call(parse_expr)("add 1, 2"), Ok(( "", - ( - Identifier::try_from("add").expect("invalid identifier"), - vec![Expr::literal_number(1), Expr::literal_number(2)] - ) + FunctionCall { + name: Identifier::try_from("add").expect("invalid identifier"), + positional_args: vec![Expr::literal_number(1), Expr::literal_number(2),], + keyword_args: vec![], + } )) ); } @@ -218,13 +298,14 @@ mod tests { parse_function_call(parse_expr)("foo(1, optional: true)"), Ok(( "", - ( - Identifier::try_from("foo").expect("invalid identifier"), - vec![ - Expr::literal_number(1), - Expr::map_literal(vec![("optional", Expr::literal_boolean(true))]), - ] - ) + FunctionCall { + name: Identifier::try_from("foo").expect("invalid identifier"), + positional_args: vec![Expr::literal_number(1)], + keyword_args: vec![KeyValue { + key: "optional".into(), + value: Expr::literal_boolean(true), + }], + } )) ); } @@ -235,13 +316,20 @@ mod tests { parse_function_call(parse_expr)(r#"foo(name: "Alice", age: 30)"#), Ok(( "", - ( - Identifier::try_from("foo").expect("invalid identifier"), - vec![Expr::map_literal(vec![ - ("name", Expr::literal_string("Alice")), - ("age", Expr::literal_number(30)), - ])] - ) + FunctionCall { + name: Identifier::try_from("foo").expect("invalid identifier"), + positional_args: vec![], + keyword_args: vec![ + KeyValue { + key: "name".into(), + value: Expr::literal_string("Alice"), + }, + KeyValue { + key: "age".into(), + value: Expr::literal_number(30), + }, + ], + } )) ); } @@ -252,13 +340,14 @@ mod tests { parse_function_call(parse_expr)("foo 1, optional: true"), Ok(( "", - ( - Identifier::try_from("foo").expect("invalid identifier"), - vec![ - Expr::literal_number(1), - Expr::map_literal(vec![("optional", Expr::literal_boolean(true))]), - ] - ) + FunctionCall { + name: Identifier::try_from("foo").expect("invalid identifier"), + positional_args: vec![Expr::literal_number(1)], + keyword_args: vec![KeyValue { + key: "optional".into(), + value: Expr::literal_boolean(true), + }], + } )) ); } @@ -274,13 +363,14 @@ mod tests { parse_function_call(parse_expr)(r#"foo(1, "output file": "out.csv")"#), Ok(( "", - ( - Identifier::try_from("foo").expect("invalid identifier"), - vec![ - Expr::literal_number(1), - Expr::map_literal(vec![("output file", Expr::literal_string("out.csv"),)]), - ] - ) + FunctionCall { + name: Identifier::try_from("foo").expect("invalid identifier"), + positional_args: vec![Expr::literal_number(1)], + keyword_args: vec![KeyValue { + key: "output file".into(), + value: Expr::literal_string("out.csv"), + }], + } )) ); } diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs new file mode 100644 index 0000000..4086a1d --- /dev/null +++ b/src/parser/keyword.rs @@ -0,0 +1,88 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::bytes::complete::take_while_m_n; +use nom::combinator::peek; +use nom::combinator::value; +use nom::sequence::terminated; +use nom::IResult; +use nom::Parser; + +use crate::ast::Keyword; + +fn is_identifier_continue(c: char) -> bool { + c.is_alphanumeric() || c == '-' || c == '_' +} + +/// Ensures the next character (if any) is not an identifier continuation, +/// so that e.g. "if" is recognized but "iffy" is not. +fn word_boundary(input: &str) -> IResult<&str, ()> { + peek(alt(( + value((), nom::combinator::eof), + value( + (), + take_while_m_n(1, 1, |c: char| !is_identifier_continue(c)), + ), + ))) + .parse(input) +} + +/// Parses a reserved keyword as an expression (word-boundary aware). +pub fn parse_keyword(input: &str) -> IResult<&str, Keyword> { + alt(( + value(Keyword::Return, terminated(tag("return"), word_boundary)), + value(Keyword::While, terminated(tag("while"), word_boundary)), + value(Keyword::Else, terminated(tag("else"), word_boundary)), + value(Keyword::For, terminated(tag("for"), word_boundary)), + value(Keyword::And, terminated(tag("and"), word_boundary)), + value(Keyword::Not, terminated(tag("not"), word_boundary)), + value(Keyword::If, terminated(tag("if"), word_boundary)), + value(Keyword::In, terminated(tag("in"), word_boundary)), + value(Keyword::Let, terminated(tag("let"), word_boundary)), + value(Keyword::Or, terminated(tag("or"), word_boundary)), + value(Keyword::Do, terminated(tag("do"), word_boundary)), + value(Keyword::Fn, terminated(tag("fn"), word_boundary)), + )) + .parse(input) +} + +#[cfg(test)] +mod tests { + use crate::ast::Keyword; + + use super::*; + + #[test] + fn test_parse_keywords() { + assert_eq!(parse_keyword("if"), Ok(("", Keyword::If))); + assert_eq!(parse_keyword("else"), Ok(("", Keyword::Else))); + assert_eq!(parse_keyword("return"), Ok(("", Keyword::Return))); + assert_eq!(parse_keyword("and"), Ok(("", Keyword::And))); + assert_eq!(parse_keyword("or"), Ok(("", Keyword::Or))); + assert_eq!(parse_keyword("not"), Ok(("", Keyword::Not))); + assert_eq!(parse_keyword("for"), Ok(("", Keyword::For))); + assert_eq!(parse_keyword("in"), Ok(("", Keyword::In))); + assert_eq!(parse_keyword("while"), Ok(("", Keyword::While))); + assert_eq!(parse_keyword("do"), Ok(("", Keyword::Do))); + assert_eq!(parse_keyword("fn"), Ok(("", Keyword::Fn))); + assert_eq!(parse_keyword("let"), Ok(("", Keyword::Let))); + } + + #[test] + fn test_parse_keyword_with_remainder() { + assert_eq!(parse_keyword("if "), Ok((" ", Keyword::If))); + assert_eq!(parse_keyword("return("), Ok(("(", Keyword::Return))); + } + + #[test] + fn test_keyword_word_boundary() { + // "if" alone is keyword + assert_eq!(parse_keyword("if"), Ok(("", Keyword::If))); + // "iffy" should not match "if" as keyword (identifier wins later in alt) + assert!(parse_keyword("iffy").is_err()); + // "in" alone is keyword + assert_eq!(parse_keyword("in"), Ok(("", Keyword::In))); + // "int" or "input" should not match "in" + assert!(parse_keyword("int").is_err()); + assert!(parse_keyword("input").is_err()); + } +} diff --git a/src/parser/operands.rs b/src/parser/operands.rs index aa1e9f9..a8825aa 100644 --- a/src/parser/operands.rs +++ b/src/parser/operands.rs @@ -17,11 +17,17 @@ pub fn parse_unary_op(input: &str) -> IResult<&str, UnaryOp> { .parse(input) } -/// Parses a binary operand. Longer tokens must be tried first (`&&` before `&`, `||` before `|`, `^^` before `^`, `|>` before `|`). +/// Parses a binary operand. Longer tokens must be tried first (`>=`/`<=` before `>`/`<`, `==`/`!=` before `=`/`!`, etc.). pub fn parse_binary_op(input: &str) -> IResult<&str, BinaryOp> { alt(( value(BinaryOp::Pipe, tag("|>")), value(BinaryOp::Concat, tag("<>")), + value(BinaryOp::Eq, tag("==")), + value(BinaryOp::Ne, tag("!=")), + value(BinaryOp::Gte, tag(">=")), + value(BinaryOp::Lte, tag("<=")), + value(BinaryOp::Gt, tag(">")), + value(BinaryOp::Lt, tag("<")), value(BinaryOp::And, tag("&&")), value(BinaryOp::Or, tag("||")), value(BinaryOp::Xor, tag("^^")), @@ -71,6 +77,12 @@ mod tests { assert_eq!(parse_binary_op("^^"), Ok(("", BinaryOp::Xor))); assert_eq!(parse_binary_op("|>"), Ok(("", BinaryOp::Pipe))); assert_eq!(parse_binary_op("<>"), Ok(("", BinaryOp::Concat))); + assert_eq!(parse_binary_op("=="), Ok(("", BinaryOp::Eq))); + assert_eq!(parse_binary_op("!="), Ok(("", BinaryOp::Ne))); + assert_eq!(parse_binary_op("<"), Ok(("", BinaryOp::Lt))); + assert_eq!(parse_binary_op(">"), Ok(("", BinaryOp::Gt))); + assert_eq!(parse_binary_op("<="), Ok(("", BinaryOp::Lte))); + assert_eq!(parse_binary_op(">="), Ok(("", BinaryOp::Gte))); } #[test] @@ -79,6 +91,9 @@ mod tests { assert_eq!(parse_binary_op("&&"), Ok(("", BinaryOp::And))); assert_eq!(parse_binary_op("||"), Ok(("", BinaryOp::Or))); assert_eq!(parse_binary_op("^^"), Ok(("", BinaryOp::Xor))); + // `>=` and `<=` should parse as Gte/Lte, not as Gt/Lt plus something + assert_eq!(parse_binary_op(">="), Ok(("", BinaryOp::Gte))); + assert_eq!(parse_binary_op("<="), Ok(("", BinaryOp::Lte))); } #[test] diff --git a/src/parser/statement.rs b/src/parser/statement.rs new file mode 100644 index 0000000..2521d43 --- /dev/null +++ b/src/parser/statement.rs @@ -0,0 +1,171 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::combinator::map; +use nom::combinator::opt; +use nom::combinator::verify; +use nom::IResult; +use nom::Parser; + +use crate::ast::Identifier; +use crate::ast::Keyword; +use crate::ast::Statement; + +use super::comment::multispace0_or_comment; +use super::expr::parse_expr; +use super::identifier::parse_identifier; +use super::keyword::parse_keyword; + +/// Parses a let/assignment statement: optional `let` keyword, then identifier, `=`, expression, +/// with optional whitespace (or comments) between each part. +/// So both `let x = 1` and `x = 1` are valid and equivalent. +/// A statement may be followed by an optional `;`. If it ends on a newline, +/// the `;` is not required. Two statements on the same line require `;` after the first. +pub fn parse_statement(input: &str) -> IResult<&str, Statement> { + let (input, _) = multispace0_or_comment(input)?; + let mut parse_let_or_assign = alt(( + map( + ( + verify(parse_keyword, |k: &Keyword| *k == Keyword::Let), + multispace0_or_comment, + parse_identifier, + multispace0_or_comment, + tag("="), + multispace0_or_comment, + parse_expr, + ), + |(_, _, name, _, _, _, expr)| Statement::Let(Identifier(name.to_string()), expr), + ), + map( + ( + parse_identifier, + multispace0_or_comment, + tag("="), + multispace0_or_comment, + parse_expr, + ), + |(name, _, _, _, expr)| Statement::Let(Identifier(name.to_string()), expr), + ), + )); + + let (input, stmt) = if input.starts_with("let") { + parse_let_or_assign.parse(input)? + } else { + alt((parse_let_or_assign, map(parse_expr, Statement::Expr))).parse(input)? + }; + let (input, _) = multispace0_or_comment(input)?; + let (input, _) = opt(tag(";")).parse(input)?; + Ok((input, stmt)) +} + +#[cfg(test)] +mod tests { + use crate::ast::Expr; + use crate::ast::Identifier; + use crate::ast::Statement; + + use super::*; + + #[test] + fn test_parse_let_statement() { + let (rest, stmt) = parse_statement("let x = 1").unwrap(); + assert!(rest.is_empty()); + assert_eq!( + stmt, + Statement::Let(Identifier("x".to_string()), Expr::literal_number(1)) + ); + } + + #[test] + fn test_parse_let_statement_no_spaces() { + let (rest, stmt) = parse_statement("let x=1").unwrap(); + assert!(rest.is_empty()); + assert_eq!( + stmt, + Statement::Let(Identifier("x".to_string()), Expr::literal_number(1)) + ); + } + + #[test] + fn test_parse_let_statement_with_expr() { + let (rest, stmt) = parse_statement("let foo = 2 + 3").unwrap(); + assert!(rest.is_empty()); + match &stmt { + Statement::Let(ident, expr) => { + assert!(*ident == "foo"); + assert!(matches!(expr, Expr::BinaryExpr(_, _, _))); + } + _ => panic!("expected let statement"), + } + } + + #[test] + fn test_parse_let_statement_fails_without_equals() { + assert!(parse_statement("let x 1").is_err()); + } + + #[test] + fn test_parse_assignment_without_let() { + let (rest, stmt) = parse_statement("x = 1").unwrap(); + assert!(rest.is_empty()); + assert_eq!( + stmt, + Statement::Let(Identifier("x".to_string()), Expr::literal_number(1)) + ); + } + + #[test] + fn test_parse_let_statement_optional_semicolon() { + let (rest, stmt) = parse_statement("let x = 1;").unwrap(); + assert!(rest.is_empty()); + assert_eq!( + stmt, + Statement::Let(Identifier("x".to_string()), Expr::literal_number(1)) + ); + } + + #[test] + fn test_parse_two_statements_same_line() { + let (rest, stmt1) = parse_statement("let x = 1; let y = 2").unwrap(); + assert_eq!( + stmt1, + Statement::Let(Identifier("x".to_string()), Expr::literal_number(1)) + ); + let (rest, stmt2) = parse_statement(rest.trim()).unwrap(); + assert!(rest.is_empty()); + assert_eq!( + stmt2, + Statement::Let(Identifier("y".to_string()), Expr::literal_number(2)) + ); + } + + #[test] + fn test_parse_let_statement_newline_no_semicolon_required() { + let (rest, stmt) = parse_statement("let x = 1\n").unwrap(); + assert!(rest.is_empty()); + assert_eq!( + stmt, + Statement::Let(Identifier("x".to_string()), Expr::literal_number(1)) + ); + } + + #[test] + fn test_parse_expr_statement_number() { + let (rest, stmt) = parse_statement("42").unwrap(); + assert!(rest.is_empty()); + assert_eq!(stmt, Statement::Expr(Expr::literal_number(42))); + } + + #[test] + fn test_parse_expr_statement_binary_with_semicolon() { + let (rest, stmt) = parse_statement("1 + 1;").unwrap(); + assert!(rest.is_empty()); + assert_eq!( + stmt, + Statement::Expr(Expr::binary_expr( + Expr::literal_number(1), + crate::ast::BinaryOp::Add, + Expr::literal_number(1) + )) + ); + } +} diff --git a/src/repl.rs b/src/repl.rs new file mode 100644 index 0000000..ad45188 --- /dev/null +++ b/src/repl.rs @@ -0,0 +1,85 @@ +use std::path::PathBuf; + +use crate::parser::parse_statement; +use crate::runtime::Runtime; +use crate::runtime::SimpleRuntime; +use rustyline::error::ReadlineError; +use rustyline::DefaultEditor; + +/// Maximum number of inputs to keep in REPL history. +const HISTORY_DEPTH: usize = 1000; + +fn repl_history_path() -> Option { + dirs::data_local_dir().map(|dir| dir.join("flt").join("history")) +} + +fn load_repl_history(rl: &mut DefaultEditor) -> Result<(), ReadlineError> { + let Some(history_path) = repl_history_path() else { + return Ok(()); + }; + if history_path.exists() { + println!("Loading REPL history from: {:?}", history_path); + rl.load_history(&history_path)?; + } + Ok(()) +} + +fn save_repl_history(rl: &mut DefaultEditor) -> Result<(), ReadlineError> { + let Some(history_path) = repl_history_path() else { + return Ok(()); + }; + if let Some(parent) = history_path.parent() { + let _ = std::fs::create_dir_all(parent); + } + rl.save_history(&history_path)?; + Ok(()) +} + +pub fn run_repl() -> Result<(), ReadlineError> { + let config = rustyline::Config::builder() + .max_history_size(HISTORY_DEPTH) + .expect("valid history size") + .auto_add_history(true) + .build(); + let mut rl = DefaultEditor::with_config(config)?; + let _ = load_repl_history(&mut rl); + let mut runtime = SimpleRuntime::default(); + let repl_result = repl_loop(&mut rl, &mut runtime); + let _ = save_repl_history(&mut rl); + repl_result +} + +fn repl_loop(rl: &mut DefaultEditor, runtime: &mut SimpleRuntime) -> Result<(), ReadlineError> { + loop { + let line = match rl.readline("> ") { + Ok(line) => line, + Err(ReadlineError::Eof) => break Ok(()), + Err(ReadlineError::Interrupted) => continue, + Err(e) => return Err(e), + }; + let line = line.trim(); + if line.is_empty() { + continue; + } + match parse_statement(line) { + Ok((remainder, statement)) => { + let remainder = remainder.trim(); + if remainder.is_empty() { + match runtime.eval(&statement) { + Ok(val) => println!("{}", val), + Err(e) => eprintln!("eval error: {:?}", e), + } + } else { + eprintln!( + "parse error: unexpected input after statement: {:?}", + remainder + ); + } + } + Err(e) => { + eprintln!("parse error: {:?}", e); + } + } + println!(); + } +} diff --git a/src/runtime.rs b/src/runtime.rs new file mode 100644 index 0000000..fa1d7a3 --- /dev/null +++ b/src/runtime.rs @@ -0,0 +1,292 @@ +//! Runtimes for the `flt` language + +pub mod functions; +pub mod types; + +use std::collections::HashMap; +use std::fmt; + +use bigdecimal::BigDecimal; +use bigdecimal::Zero; + +use crate::ast::BinaryOp; +use crate::ast::Expr; +use crate::ast::Literal; +use crate::ast::Statement; +use crate::ast::UnaryOp; +use crate::errors::RuntimeError; +use crate::utils::escape_string; +use crate::Error; + +/// A value in the runtime +#[derive(Clone, Debug, PartialEq)] +pub enum Value { + /// The unit value (like `()` in Rust/Elixir) + Unit, + /// A number value + Number(BigDecimal), + /// A string value + String(String), + /// A boolean value + Boolean(bool), + /// A symbol value + Symbol(String), + /// A map of string keys to values + Map(HashMap), + /// An array of values + Array(Vec), +} + +impl fmt::Display for Value { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Value::Unit => write!(f, "()"), + Value::Number(n) => write!(f, "{}", n), + Value::String(s) => write!(f, "\"{}\"", escape_string(s)), + Value::Boolean(b) => write!(f, "{}", b), + Value::Symbol(s) => write!(f, ":{}", s), + Value::Map(m) => { + write!(f, "{{")?; + for (i, (k, v)) in m.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "\"{}\": {}", escape_string(k), v)?; + } + write!(f, "}}") + } + Value::Array(arr) => { + write!(f, "[")?; + for (i, v) in arr.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", v)?; + } + write!(f, "]") + } + } + } +} + +pub trait Runtime { + fn eval(&mut self, statement: &Statement) -> Result; +} + +#[derive(Default)] +pub struct SimpleRuntime { + pub built_in_functions: HashMap>, + pub global_scope: GlobalScope, +} + +impl Runtime for SimpleRuntime { + fn eval(&mut self, statement: &Statement) -> Result { + match statement { + Statement::Expr(expr) => self.eval_expr(expr), + Statement::Let(ident, expr) => { + let value = self.eval_expr(expr)?; + self.global_scope + .set_variable(ident.0.as_str(), value.clone()); + Ok(value) + } + } + } +} + +impl SimpleRuntime { + fn eval_expr(&mut self, expr: &Expr) -> Result { + match expr { + Expr::Literal(lit) => Ok(Self::literal_to_value(lit)), + Expr::Ident(s) => self + .global_scope + .get_variable(s.as_str()) + .cloned() + .ok_or_else(|| Error::RuntimeError(RuntimeError::UnboundIdentifier(s.clone()))), + Expr::IfExpr { + condition, + then_branch, + else_branch, + } => { + let cond_val = self.eval_expr(condition)?; + let cond_bool = match cond_val { + Value::Boolean(b) => b, + _ => return Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), + }; + + if cond_bool { + self.eval_expr(then_branch) + } else { + match else_branch { + Some(expr) => self.eval_expr(expr), + None => Ok(Value::Unit), + } + } + } + Expr::UnaryExpr(op, inner) => { + let val = self.eval_expr(inner)?; + Self::eval_unary(*op, &val) + } + Expr::BinaryExpr(left, op, right) => { + let l = self.eval_expr(left)?; + let r = self.eval_expr(right)?; + Self::eval_binary(&l, *op, &r) + } + Expr::FunctionCall(_, _) => { + Err(Error::RuntimeError(RuntimeError::UnsupportedFunctionCall)) + } + Expr::Parenthesized(inner) => self.eval_expr(inner), + Expr::MapLiteral(_) => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), + Expr::ArrayLiteral(_) => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), + Expr::Keyword(_) => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), + } + } + + fn literal_to_value(lit: &Literal) -> Value { + match lit { + Literal::Number(n) => Value::Number(n.as_ref().clone()), + Literal::String(s) => Value::String(s.clone()), + Literal::Boolean(b) => Value::Boolean(*b), + Literal::Symbol(s) => Value::Symbol(s.clone()), + } + } + + fn eval_unary(op: UnaryOp, inner: &Value) -> Result { + match op { + UnaryOp::Not => match inner { + Value::Boolean(b) => Ok(Value::Boolean(!b)), + _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), + }, + UnaryOp::Plus => match inner { + Value::Number(n) => Ok(Value::Number(n.clone())), + _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), + }, + UnaryOp::Minus => match inner { + Value::Number(n) => Ok(Value::Number(-n.clone())), + _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), + }, + } + } + + fn eval_binary(l: &Value, op: BinaryOp, r: &Value) -> Result { + match op { + BinaryOp::Add => Self::binary_number(l, r, |a, b| a + b), + BinaryOp::Sub => Self::binary_number(l, r, |a, b| a - b), + BinaryOp::Mul => Self::binary_number(l, r, |a, b| a * b), + BinaryOp::Div => { + let (a, b) = (Self::as_bigdecimal(l)?, Self::as_bigdecimal(r)?); + if b.is_zero() { + Err(Error::RuntimeError(RuntimeError::DivisionByZero)) + } else { + Ok(Value::Number(a / b)) + } + } + BinaryOp::And => Self::binary_bool(l, r, |a, b| a && b), + BinaryOp::Or => Self::binary_bool(l, r, |a, b| a || b), + BinaryOp::Xor => Self::binary_bool(l, r, |a, b| a ^ b), + BinaryOp::BitAnd | BinaryOp::BitOr | BinaryOp::BitXor => { + Err(Error::RuntimeError(RuntimeError::InvalidOperandType)) + } + BinaryOp::Concat => Self::binary_string(l, r), + BinaryOp::Eq => Ok(Value::Boolean(l == r)), + BinaryOp::Ne => Ok(Value::Boolean(l != r)), + BinaryOp::Lt => Self::binary_compare(l, r, |a, b| a < b), + BinaryOp::Gt => Self::binary_compare(l, r, |a, b| a > b), + BinaryOp::Lte => Self::binary_compare(l, r, |a, b| a <= b), + BinaryOp::Gte => Self::binary_compare(l, r, |a, b| a >= b), + BinaryOp::Pipe => Err(Error::RuntimeError(RuntimeError::UnsupportedFunctionCall)), + } + } + + fn as_bigdecimal(v: &Value) -> Result { + match v { + Value::Number(n) => Ok(n.clone()), + _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), + } + } + + fn binary_number(l: &Value, r: &Value, f: F) -> Result + where + F: FnOnce(BigDecimal, BigDecimal) -> BigDecimal, + { + let a = Self::as_bigdecimal(l)?; + let b = Self::as_bigdecimal(r)?; + Ok(Value::Number(f(a, b))) + } + + fn binary_bool(l: &Value, r: &Value, f: F) -> Result + where + F: FnOnce(bool, bool) -> bool, + { + match (l, r) { + (Value::Boolean(a), Value::Boolean(b)) => Ok(Value::Boolean(f(*a, *b))), + _ => Err(Error::RuntimeError(RuntimeError::InvalidOperandType)), + } + } + + fn binary_compare(l: &Value, r: &Value, f: F) -> Result + where + F: FnOnce(&BigDecimal, &BigDecimal) -> bool, + { + let a = Self::as_bigdecimal(l)?; + let b = Self::as_bigdecimal(r)?; + Ok(Value::Boolean(f(&a, &b))) + } + + fn value_to_concat_str(v: &Value) -> String { + match v { + Value::Number(n) => n.to_string(), + Value::String(s) => s.clone(), + Value::Boolean(b) => b.to_string(), + Value::Symbol(s) => s.clone(), + _ => String::new(), + } + } + + fn binary_string(l: &Value, r: &Value) -> Result { + let a = Self::value_to_concat_str(l); + let b = Self::value_to_concat_str(r); + Ok(Value::String(format!("{}{}", a, b))) + } +} + +/// The global scope is the scope that is available to all other scopes. +#[derive(Default)] +pub struct GlobalScope { + pub functions: HashMap>, + pub variables: HashMap, +} + +impl GlobalScope { + /// Check if the global scope has a function with the given name. + pub fn has_function(&self, name: &str) -> bool { + self.functions.contains_key(name) + } + + /// Get a function from the global scope by name. + pub fn get_function(&self, name: &str) -> Option<&dyn Function> { + self.functions.get(name).map(|f| f.as_ref()) + } + + /// Check if the global scope has a variable with the given name. + pub fn has_variable(&self, name: &str) -> bool { + self.variables.contains_key(name) + } + + /// Get a variable from the global scope by name. + pub fn get_variable(&self, name: &str) -> Option<&Value> { + self.variables.get(name) + } + + /// Set a variable in the global scope by name. + pub fn set_variable(&mut self, name: &str, value: Value) { + self.variables.insert(name.to_string(), value); + } +} + +pub struct FunctionSignature { + pub name: String, +} + +pub trait Function { + fn signature(&self) -> FunctionSignature; +} diff --git a/src/runtime/functions.rs b/src/runtime/functions.rs new file mode 100644 index 0000000..5a87741 --- /dev/null +++ b/src/runtime/functions.rs @@ -0,0 +1,96 @@ +//! Functions in the runtime + +use crate::runtime::types::Type; + +/// A function definition is a collection of function signatures (overloads) +pub struct FunctionDefinition { + pub name: String, + pub overloads: Vec, +} + +impl FunctionDefinition { + /// Create a new function definition with a name, return type, and arguments + pub fn new>(name: S, return_type: Type, arguments: Vec) -> Self { + Self { + name: name.into(), + overloads: vec![FunctionSignature { + arguments, + return_type, + }], + } + } + + /// Add a new overload to the function definition + pub fn add_overload(mut self, return_type: Type, arguments: Vec) -> Self { + self.overloads.push(FunctionSignature { + arguments, + return_type, + }); + self + } + + /// Check if the function definition accepts the given arguments. + /// The arguments are matched by name and type, and according to the order they + /// were defined (inserted) into the function definition. + pub fn accepts(&self, arguments: Vec) -> bool { + for overload in &self.overloads { + if overload.arguments == arguments { + return true; + } + } + false + } +} + +/// A function signature is a single function definition with a name, arguments, and return type +pub struct FunctionSignature { + pub arguments: Vec, + pub return_type: Type, +} + +/// An argument is a single argument to a function +#[derive(Debug, PartialEq)] +pub struct Argument { + pub name: String, + pub r#type: Type, +} + +impl Argument { + pub fn new>(name: S, r#type: Type) -> Self { + Self { + name: name.into(), + r#type, + } + } + + /// A convenience method for an argument with the built-in number type + pub fn number>(name: S) -> Self { + Self::new(name, Type::number()) + } + + /// A convenience method for an argument with the built-in string type + pub fn string>(name: S) -> Self { + Self::new(name, Type::string()) + } + + /// A convenience method for an argument with the built-in boolean type + pub fn boolean>(name: S) -> Self { + Self::new(name, Type::boolean()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_accepts() { + let function_definition = FunctionDefinition::new( + "add", + Type::number(), + vec![Argument::number("a"), Argument::number("b")], + ); + assert!(function_definition.accepts(vec![Argument::number("a"), Argument::number("b")])); + assert!(!function_definition.accepts(vec![Argument::number("b")])); + } +} diff --git a/src/runtime/types.rs b/src/runtime/types.rs new file mode 100644 index 0000000..58c3afd --- /dev/null +++ b/src/runtime/types.rs @@ -0,0 +1,47 @@ +//! Types for the flt runtime + +/// A type is either a builtin type or a custom type +#[derive(Debug, PartialEq)] +pub enum Type { + Builtin(BuiltinType), + Custom(CustomType), +} + +impl Type { + /// The built-in number type + pub fn number() -> Self { + Type::Builtin(BuiltinType::Number) + } + + /// The built-in string type + pub fn string() -> Self { + Type::Builtin(BuiltinType::String) + } + + /// The built-in boolean type + pub fn boolean() -> Self { + Type::Builtin(BuiltinType::Boolean) + } +} + +/// A builtin type is a type that is predefined in the runtime. +#[derive(Debug, PartialEq)] +pub enum BuiltinType { + String, + Number, + Boolean, + Array, + Map, +} + +/// A custom type is a type that is defined by the user. +#[derive(Debug, PartialEq)] +pub struct CustomType { + pub name: String, +} + +impl CustomType { + pub fn new(name: String) -> Self { + Self { name } + } +} diff --git a/tests/features.rs b/tests/features.rs index c845c6d..1310430 100644 --- a/tests/features.rs +++ b/tests/features.rs @@ -9,13 +9,18 @@ use cucumber::World; use flt::ast::BinaryOp; use flt::ast::Expr; +use flt::ast::Identifier; use flt::ast::Literal; +use flt::ast::Statement; use flt::parser::parse_expr; +use flt::parser::parse_statement; #[derive(Debug, Default, World)] pub struct AstWorld { pub input: Option, pub output: Option>, + /// Parsed as a statement when input is e.g. "let x = 1" or "x = 1". + pub output_statement: Option>, /// Set by array step so "first/second/third element" steps can inspect it. pub last_parsed_expr: Option, } @@ -39,6 +44,15 @@ fn given_the_multiline_input(world: &mut AstWorld, step: &Step) { #[when(expr = "I parse the input")] fn when_i_parse_the_input(world: &mut AstWorld) { let input = world.input.take().expect("input should be set"); + if let Ok((remainder, stmt)) = parse_statement(&input) { + if remainder.trim().is_empty() { + if let Statement::Expr(expr) = &stmt { + world.output = Some(Ok(expr.clone())); + } + world.output_statement = Some(Ok(stmt)); + return; + } + } world.output = Some(match parse_expr(&input) { Ok((remainder, expr)) => { if remainder.is_empty() { @@ -123,6 +137,29 @@ fn then_parsing_should_fail(world: &mut AstWorld) { ); } +#[then( + regex = r#"^the output should be a `Statement::Let\(Identifier\("([^"]*)"\), Expr::Literal\(Literal::Number\((\d+)\)\)\)`$"# +)] +fn then_output_should_be_let_statement_number( + world: &mut AstWorld, + ident: String, + expected_num: i64, +) { + let output = world + .output_statement + .take() + .expect("output_statement should be set (input was parsed as statement)"); + let stmt = output.expect("statement parse should succeed"); + let expected = Statement::Let( + Identifier(ident.clone()), + Expr::literal_number(expected_num), + ); + assert_eq!( + stmt, expected, + "expected Statement::Let(Identifier({ident:?}), Literal::Number({expected_num}))" + ); +} + #[then(expr = r"the output should parse to interpolated string {string} {word} {string}")] fn then_output_should_be_interpolated_string( world: &mut AstWorld, diff --git a/tests/repl.rs b/tests/repl.rs index eaf79a3..6f33768 100644 --- a/tests/repl.rs +++ b/tests/repl.rs @@ -14,19 +14,29 @@ use cucumber::World; #[derive(Debug, Default, World)] pub struct ReplWorld { pub output: Option, + pub last_output: Option, } #[when(regex = r#"^the REPL is run and the user types:$"#)] async fn the_repl_is_run_and_the_user_types(world: &mut ReplWorld, step: &Step) { let input = step.docstring.as_ref().expect("Step requires a docstring"); - let mut child = Command::new("cargo") - .current_dir(env!("CARGO_MANIFEST_DIR")) - .args(["run", "--"]) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .expect("Failed to spawn flt REPL"); + let mut child = if let Ok(flt_path) = std::env::var("CARGO_BIN_EXE_flt") { + Command::new(flt_path) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .expect("Failed to spawn flt REPL") + } else { + Command::new("cargo") + .current_dir(env!("CARGO_MANIFEST_DIR")) + .args(["run", "--"]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .expect("Failed to spawn flt REPL") + }; { let stdin = child.stdin.as_mut().expect("Failed to open stdin"); @@ -38,13 +48,30 @@ async fn the_repl_is_run_and_the_user_types(world: &mut ReplWorld, step: &Step) drop(child.stdin.take()); let output = child.wait_with_output().expect("Failed waiting for flt"); + world.last_output = Some(output); + let last = world.last_output.as_ref().unwrap(); world.output = Some(format!( "{}{}", - String::from_utf8(output.stdout).unwrap(), - String::from_utf8(output.stderr).unwrap() + String::from_utf8(last.stdout.clone()).unwrap(), + String::from_utf8(last.stderr.clone()).unwrap() )); } +#[then(regex = r#"^the command should succeed$"#)] +async fn the_command_should_succeed(world: &mut ReplWorld) { + let output = world + .last_output + .as_ref() + .expect("No command output; use 'the REPL is run and the user types' first"); + assert!( + output.status.success(), + "Command failed with exit code {:?}:\nstdout: {}\nstderr: {}", + output.status.code(), + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); +} + #[then(expr = r"the output should contain {string}")] async fn the_output_should_contain(world: &mut ReplWorld, expected: String) { assert!(world.output.is_some(), "No output");