From 897353dcb038d5f0f7d6525ac5f8b4717a39da2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luiz=20Felipe=20Gon=C3=A7alves?= Date: Sun, 8 Jun 2025 18:22:35 -0300 Subject: [PATCH 1/8] Resolve Symbols in Let and Case bindings --- src/ast.rs | 23 ++++++++++++++--- src/parser.rs | 12 +++++---- src/type_checker.rs | 60 ++++++++++++++++++++++++++------------------ src/util/fmt/tree.rs | 30 +++++++++++++++++++--- 4 files changed, 88 insertions(+), 37 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 0b11fc5..c8a43e4 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -59,6 +59,8 @@ pub trait Info { type Assignment: Clone + Debug; type Id: Clone + Debug; + type LetBinding: Clone + Debug; + type CaseArm: Clone + Debug; } /// Untyped AST. @@ -70,6 +72,8 @@ impl Info for Untyped { type Expr = (); type Assignment = (); type Id = (); + type LetBinding = (); + type CaseArm = (); } /// Typed AST. @@ -81,6 +85,8 @@ impl Info for Typed { type Expr = Type; type Assignment = Symbol; type Id = Symbol; + type LetBinding = Symbol; + type CaseArm = Symbol; } #[derive(Debug, Default)] @@ -101,12 +107,12 @@ pub struct Class { #[derive(Debug, Clone)] pub enum Feature { - Attribute(Binding), + Attribute(Attribute), Method(Method), } #[derive(Debug, Clone)] -pub struct Binding { +pub struct Attribute { pub name: Ident, pub ty: I::Ty, pub initializer: Option>, @@ -182,7 +188,7 @@ pub enum ExprKind { }, Let { /// Non empty list of bindings. - bindings: Vec>, + bindings: Vec>, body: Box>, }, Case { @@ -210,6 +216,14 @@ pub enum ExprKind { Dummy, } +#[derive(Debug, Clone)] +pub struct LetBinding { + pub name: Ident, + pub ty: I::Ty, + pub initializer: Option>, + pub info: I::LetBinding, +} + #[derive(Debug, Clone)] pub struct DispatchQualifier { pub expr: Box>, @@ -221,6 +235,7 @@ pub struct CaseArm { pub name: Ident, pub ty: I::Ty, pub body: Box>, + pub info: I::CaseArm, } #[derive(Copy, Clone, Debug)] @@ -299,7 +314,7 @@ pub mod desugar { use super::*; pub fn multi_binding_let( - bindings: Vec>, + bindings: Vec>, mut body: Box>, span: Span, info: &I::Expr, diff --git a/src/parser.rs b/src/parser.rs index 8560b61..3aadee2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ use crate::{ ast::{ - BinaryOperator, Binding, CaseArm, Class, DispatchQualifier, Expr, ExprKind, Feature, - Formal, Ident, Method, Program, TypeName, UnaryOperator, Untyped, + Attribute, BinaryOperator, CaseArm, Class, DispatchQualifier, Expr, ExprKind, Feature, + Formal, Ident, LetBinding, Method, Program, TypeName, UnaryOperator, Untyped, }, lexer::{self, extract}, token::{Span, Spanned, Token, TokenKind}, @@ -134,7 +134,7 @@ impl Parser<'_, '_, '_> { TokenKind::Colon => { let ty = self.parse_type()?; let initializer = self.parse_initializer()?; - Ok(Feature::Attribute(Binding { + Ok(Feature::Attribute(Attribute { name, ty, initializer, @@ -614,16 +614,17 @@ impl Parser<'_, '_, '_> { Some(bp) } - fn parse_let_binding(&mut self) -> Result> { + fn parse_let_binding(&mut self) -> Result> { let name = self.parse_ident()?; self.consume(TokenKind::Colon)?; let ty = self.parse_type()?; let initializer = self.parse_initializer()?; - Ok(Binding { + Ok(LetBinding { name, ty, initializer, + info: (), }) } @@ -640,6 +641,7 @@ impl Parser<'_, '_, '_> { name, ty, body: Box::new(body), + info: (), }; Ok((arm, span)) } diff --git a/src/type_checker.rs b/src/type_checker.rs index 2844267..425bbbf 100644 --- a/src/type_checker.rs +++ b/src/type_checker.rs @@ -83,7 +83,7 @@ impl Checker<'_> { let attributes: Vec<_> = this.scoped_formals(current_class.clone(), &[], |this| { attributes .into_iter() - .map(|binding| this.check_binding(binding)) + .map(|binding| this.check_attribute(binding)) .collect() }); @@ -104,14 +104,14 @@ impl Checker<'_> { }) } - fn check_binding(&mut self, binding: ast::Binding) -> ast::Binding { + fn check_attribute(&mut self, binding: ast::Attribute) -> ast::Attribute { let ty = self.get_type_allowing_self_type(binding.ty); let initializer = binding.initializer.map(|expr| { let expr = self.check_expr(expr); self.assert_is_subtype(expr.ty(), &ty, expr.span); expr }); - ast::Binding { + ast::Attribute { name: binding.name, ty, initializer, @@ -314,24 +314,29 @@ impl Checker<'_> { )); } - let binding = { - assert_eq!(bindings.len(), 1); - let binding = bindings.remove(0); // untyped - let ty = self.get_type_allowing_self_type(binding.ty); - ast::Binding { - name: binding.name, - initializer: binding.initializer.map(|i| { - let expr = self.check_expr(i); - self.assert_is_subtype(expr.ty(), &ty, expr.span); - expr - }), - ty, - } - }; - let body = self.scoped_local(binding.name.name, binding.ty.clone(), |this| { - this.check_expr(*body) + assert_eq!(bindings.len(), 1); + let binding = bindings.remove(0); + + let binding_name = binding.name; + let binding_ty = self.get_type_allowing_self_type(binding.ty); + let binding_initializer = binding.initializer.map(|i| { + let expr = self.check_expr(i); + self.assert_is_subtype(expr.ty(), &binding_ty, expr.span); + expr }); - let bindings = vec![binding]; + + let (symbol, body) = + self.scoped_local(binding_name.name, binding_ty.clone(), |this| { + this.check_expr(*body) + }); + + let bindings = vec![ast::LetBinding { + name: binding_name, + ty: binding_ty, + initializer: binding_initializer, + info: symbol, + }]; + let ty = body.ty().clone(); let body = Box::new(body); (ExprKind::Let { bindings, body }, ty) @@ -350,7 +355,7 @@ impl Checker<'_> { } seen.insert(ty.name()); - let body = self.scoped_local(arm.name.name, ty.clone(), |this| { + let (symbol, body) = self.scoped_local(arm.name.name, ty.clone(), |this| { this.check_expr(*arm.body) }); @@ -360,6 +365,7 @@ impl Checker<'_> { name: arm.name, ty, body: Box::new(body), + info: symbol, } }) .collect(); @@ -985,14 +991,20 @@ impl Checker<'_> { name: Interned, ty: Type, f: impl FnOnce(&mut Self) -> T, - ) -> T { + ) -> (Symbol, T) { let id = LocalId(self.symbol_table.locals); - self.symbol_table.scopes.push(Scope::Local(name, ty, id)); + self.symbol_table + .scopes + .push(Scope::Local(name, ty.clone(), id)); + let symbol = Symbol { + ty, + binding: Binding::Local(id), + }; self.symbol_table.locals += 1; let res = f(self); self.symbol_table.scopes.pop().expect("just pushed"); - res + (symbol, res) } fn scoped_formals( diff --git a/src/util/fmt/tree.rs b/src/util/fmt/tree.rs index 48c0c22..575c270 100644 --- a/src/util/fmt/tree.rs +++ b/src/util/fmt/tree.rs @@ -55,7 +55,7 @@ fn print_feature( Feature::Attribute(binding) => { sp(w, i)?; write!(w, "attribute ")?; - print_binding(w, idents, i, binding)?; + print_attribute(w, idents, i, binding)?; } Feature::Method(Method { name, @@ -84,11 +84,11 @@ fn print_feature( Ok(()) } -fn print_binding( +fn print_attribute( w: &mut impl Write, idents: &Interner, i: usize, - binding: &Binding, + binding: &Attribute, ) -> std::io::Result<()> { write!( w, @@ -180,7 +180,7 @@ pub fn print_expr( for binding in bindings { sp(w, i + 1)?; write!(w, "binding ")?; - print_binding(w, idents, i + 1, binding)?; + print_let_binding(w, idents, i + 1, binding)?; } sp(w, i + 1)?; writeln!(w, "in")?; @@ -233,6 +233,28 @@ pub fn print_expr( Ok(()) } +fn print_let_binding( + w: &mut impl Write, + idents: &Interner, + i: usize, + binding: &LetBinding, +) -> std::io::Result<()> { + write!( + w, + "{}: {}", + idents.get(binding.name), + binding.ty.write(idents), + )?; + if let Some(ref initializer) = binding.initializer { + write!(w, " (initialized)")?; + writeln!(w)?; + print_expr(w, idents, i + 1, initializer)?; + } else { + writeln!(w)?; + } + Ok(()) +} + fn print_case_arm( w: &mut impl Write, idents: &Interner, From e96749c817d1b03f63874205bd5172df169bfe05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luiz=20Felipe=20Gon=C3=A7alves?= Date: Sun, 8 Jun 2025 19:27:30 -0300 Subject: [PATCH 2/8] Entry point check --- src/bin/main.rs | 3 ++- src/type_checker.rs | 39 ++++++++++++++++++++++++++++++++------- src/util/fmt/error.rs | 4 ++++ src/util/test_utils.rs | 8 +++++--- 4 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/bin/main.rs b/src/bin/main.rs index fd681c2..4cb70f3 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -95,7 +95,8 @@ fn pipeline(src: &str, tokens: &mut Vec, ident_interner: &mut Interner prog, Err((_prog, _registry, errors)) => { diff --git a/src/type_checker.rs b/src/type_checker.rs index 425bbbf..f9644b4 100644 --- a/src/type_checker.rs +++ b/src/type_checker.rs @@ -26,10 +26,25 @@ pub struct Checker<'ident> { current_class: Interned, errors: Vec>, ident_interner: &'ident mut Interner, + found_main: bool, +} + +pub mod flags { + /// Sane behavior for common user workloads. + pub const DEFAULT: u32 = 0; + + /// Disables entry point check. Useful for testing. + pub const SKIP_ENTRY_POINT_CHECK: u32 = 1 << 0; } impl Checker<'_> { - pub fn with_capacity(ident_interner: &mut Interner, capacity: usize) -> Checker<'_> { + pub fn with_capacity( + ident_interner: &mut Interner, + capacity: usize, + checker_flags: u32, + ) -> Checker<'_> { + let skip_entrypoint_check = (checker_flags & flags::SKIP_ENTRY_POINT_CHECK) == 1; + Checker { registry: TypeRegistry::with_capacity(capacity), classes: HashMap::with_capacity(0), @@ -37,6 +52,7 @@ impl Checker<'_> { current_class: builtins::NO_TYPE, errors: Vec::with_capacity(8), ident_interner, + found_main: skip_entrypoint_check, } } @@ -56,6 +72,11 @@ impl Checker<'_> { .collect(); let program = Program { classes }; + if !self.found_main { + let error = Error::MissingEntryPoint; + self.errors.push(Span::new_of_length(0, 0).wrap(error)); + } + if self.errors.is_empty() { Ok((program, self.registry)) } else { @@ -119,6 +140,9 @@ impl Checker<'_> { } fn check_method(&mut self, method: ast::Method) -> ast::Method { + let name = (self.current_class, method.name.name); + self.found_main |= name == (well_known::MAIN, well_known::MAIN_METHOD); + let formals: Vec<_> = method .formals .into_iter() @@ -810,6 +834,7 @@ impl Checker<'_> { #[derive(Copy, Clone, Debug)] pub enum Error { + MissingEntryPoint, DuplicateTypeDefinition { name: Interned, other: Span, @@ -1108,7 +1133,7 @@ mod tests { use crate::{ parser::test_utils::parse_program, - type_checker::{Checker, ClassesEnv}, + type_checker::{flags, Checker, ClassesEnv}, util::{ intern::Interner, test_utils::{assert_errors, tree_tests}, @@ -1917,7 +1942,7 @@ mod tests { class Block inherits Entity {}; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); assert!(checker.errors.is_empty()); assert_eq!( @@ -1947,7 +1972,7 @@ mod tests { class Object {}; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); assert_errors( checker.ident_interner, @@ -1966,7 +1991,7 @@ mod tests { class Entity inherits UndefinedClass {}; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); assert_errors( checker.ident_interner, @@ -1990,7 +2015,7 @@ mod tests { }; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); checker.build_classes_env(&prog); assert!(checker.errors.is_empty()); @@ -2049,7 +2074,7 @@ mod tests { }; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); checker.build_classes_env(&prog); assert!(checker.errors.is_empty()); diff --git a/src/util/fmt/error.rs b/src/util/fmt/error.rs index a46c5eb..0781c4d 100644 --- a/src/util/fmt/error.rs +++ b/src/util/fmt/error.rs @@ -18,6 +18,10 @@ impl Show for Spanned { use type_checker::Error::*; match error { + MissingEntryPoint => write!( + f, + "program has no entrypoint (`main` method in `Main` class)" + ), DuplicateTypeDefinition { name, other } => { let name = i.get(name); write!(f, "class {name} already defined at {other}") diff --git a/src/util/test_utils.rs b/src/util/test_utils.rs index 4929bce..44408f5 100644 --- a/src/util/test_utils.rs +++ b/src/util/test_utils.rs @@ -1,7 +1,7 @@ use crate::{ ast, parser, token::Spanned, - type_checker::Checker, + type_checker::{self, Checker}, util::{ self, fmt::{tree, Show}, @@ -71,7 +71,8 @@ pub fn run_pipeline(test: Test) -> (String, Vec) { }; let mut fmt_errors = format_errors(interner, &errors); - let checker = Checker::with_capacity(interner, 128); + let flags = type_checker::flags::SKIP_ENTRY_POINT_CHECK; + let checker = Checker::with_capacity(interner, 128, flags); let (prog, errors) = match checker.check(prog) { Ok((prog, _reg)) => (prog, vec![]), Err((prog, _reg, errors)) => (prog, errors), @@ -89,7 +90,8 @@ pub fn run_pipeline(test: Test) -> (String, Vec) { let prog = ast::test_utils::from_expr_to_main_program(expr); let mut fmt_errors = format_errors(interner, &errors); - let checker = Checker::with_capacity(interner, 128); + let flags = type_checker::flags::SKIP_ENTRY_POINT_CHECK; + let checker = Checker::with_capacity(interner, 128, flags); let (prog, errors) = match checker.check(prog) { Ok((prog, _reg)) => (prog, vec![]), Err((prog, _reg, errors)) => (prog, errors), From 2951c9998d99d4e17f7e6f49d879f3130ebebfe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luiz=20Felipe=20Gon=C3=A7alves?= Date: Sun, 8 Jun 2025 23:05:59 -0300 Subject: [PATCH 3/8] Scaffold codegen --- Cargo.lock | 105 +++++++++++++++++- Cargo.toml | 9 ++ examples/small.cool | 7 +- examples/trivial.cool | 3 + src/bin/main.rs | 124 +++++++++++++++++++--- src/codegen/interface.rs | 55 ++++++++++ src/codegen/x86_64.rs | 218 ++++++++++++++++++++++++++++++++++++++ src/codegen/x86_64_env.rs | 36 +++++++ src/lib.rs | 9 ++ src/types.rs | 5 + 10 files changed, 553 insertions(+), 18 deletions(-) create mode 100644 examples/trivial.cool create mode 100644 src/codegen/interface.rs create mode 100644 src/codegen/x86_64.rs create mode 100644 src/codegen/x86_64_env.rs diff --git a/Cargo.lock b/Cargo.lock index cefea86..8e311b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,12 +17,56 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "anstream" +version = "0.6.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -84,21 +128,36 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.29" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acebd8ad879283633b343856142139f2da2317c96b05b4dd6181c61e2480184" +checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] name = "clap_builder" -version = "4.5.29" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ba32cbda51c7e1dfd49acc1457ba1a7dec5b64fe360e828acb13ca8dc9c2f9" +checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -107,10 +166,18 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "cool" version = "0.0.1" dependencies = [ + "cfg-if", + "clap", "indoc", "phf", "pretty_assertions", @@ -205,6 +272,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.4.0" @@ -228,6 +301,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.10.5" @@ -286,6 +365,12 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + [[package]] name = "oorandom" version = "11.1.4" @@ -513,6 +598,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.98" @@ -540,6 +631,12 @@ version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 703ddfc..84335b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,8 @@ version = "0.0.1" [workspace.dependencies] cool.path = "." +cfg-if = "1.0.0" +clap = { version = "4.5.39", features = ["derive"] } criterion = "0.5.1" indoc = "2.0.6" phf = { version = "0.11.3", features = ["macros"] } @@ -29,8 +31,15 @@ result_unit_err = { level = "allow", priority = 2 } name = "cool" version.workspace = true edition.workspace = true +default-run = "coolc" + +[[bin]] +name = "coolc" +path = "./src/bin/main.rs" [dependencies] +cfg-if.workspace = true +clap.workspace = true phf.workspace = true [dev-dependencies] diff --git a/examples/small.cool b/examples/small.cool index 129f897..c331eeb 100644 --- a/examples/small.cool +++ b/examples/small.cool @@ -1,5 +1,10 @@ class Main { main() : Int { - 1 + 2 * 3 + let io : IO <- new IO in + let res : Int <- 1 + 2 * 3 in + let s : String <- io.string_of_int(res) in { + io.out_string(s); + 0 + } }; }; diff --git a/examples/trivial.cool b/examples/trivial.cool new file mode 100644 index 0000000..143023c --- /dev/null +++ b/examples/trivial.cool @@ -0,0 +1,3 @@ +class Main { + main() : Int { 0 }; +}; diff --git a/src/bin/main.rs b/src/bin/main.rs index 4cb70f3..4f23819 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -1,11 +1,15 @@ use std::{ env, error::Error, - fs, - io::{self, Write}, + fs::{self, File}, + io::{self, BufWriter, Write}, + path::PathBuf, + process::exit, }; +use clap::Parser; use cool::{ + codegen::{self, Target}, parser, token::{Spanned, Token}, type_checker, @@ -16,6 +20,32 @@ use cool::{ }, }; +#[derive(Parser)] +struct Args { + /// Emits the untyped AST. + #[arg(short, long)] + untyped_ast: bool, + + /// Emits the typed AST. + #[arg(long)] + typed_ast: bool, + + /// Emits the un-assembled machine code. + #[arg(long)] + assembly: bool, + + /// Compilation target. + #[arg(short, long, default_value_t = codegen::DEFAULT_TARGET)] + target: codegen::Target, + + /// Binary output. + #[arg(short, long)] + output: Option, + + /// Program input, Cool source code. + input: Option, +} + fn main() { if let Err(error) = run() { eprintln!("Error: {error}"); @@ -24,14 +54,22 @@ fn main() { } fn run() -> Result<(), Box> { - let mut args = env::args().skip(1); + let args = Args::parse(); + + if args.target == Target::none { + eprintln!( + "Couldn't infer compilation target. Check `--help` for available target options." + ); + exit(1); + } + let mut tokens_buf = Vec::with_capacity(8 * 1024); let mut ident_interner = Interner::with_capacity(1024); // File mode - if let Some(prog_path) = args.next() { + if let Some(prog_path) = &args.input { let input = fs::read_to_string(prog_path)?; - pipeline(&input, &mut tokens_buf, &mut ident_interner); + pipeline(&input, &args, &mut tokens_buf, &mut ident_interner); return Ok(()); } @@ -57,7 +95,12 @@ fn run() -> Result<(), Box> { if accumulated_input.trim().is_empty() { println!("^D"); } else { - pipeline(&accumulated_input, &mut tokens_buf, &mut ident_interner); + pipeline( + &accumulated_input, + &args, + &mut tokens_buf, + &mut ident_interner, + ); } return Ok(()); } @@ -65,7 +108,12 @@ fn run() -> Result<(), Box> { // Empty line is another termination signal if current_line.trim().is_empty() { if !accumulated_input.trim().is_empty() { - pipeline(&accumulated_input, &mut tokens_buf, &mut ident_interner); + pipeline( + &accumulated_input, + &args, + &mut tokens_buf, + &mut ident_interner, + ); accumulated_input.clear(); // Clear for next input } } else { @@ -74,7 +122,7 @@ fn run() -> Result<(), Box> { } } -fn pipeline(src: &str, tokens: &mut Vec, ident_interner: &mut Interner) { +fn pipeline(src: &str, args: &Args, tokens: &mut Vec, ident_interner: &mut Interner) { tokens.clear(); let prog = match parser::parse_program(src, tokens, ident_interner) { @@ -92,8 +140,10 @@ fn pipeline(src: &str, tokens: &mut Vec, ident_interner: &mut Interner, ident_interner: &mut Interner(src: &str, error: &Spanned, ident_interner: &Interner) diff --git a/src/codegen/interface.rs b/src/codegen/interface.rs new file mode 100644 index 0000000..2d44887 --- /dev/null +++ b/src/codegen/interface.rs @@ -0,0 +1,55 @@ +use std::io; + +use crate::{ + ast::{Program, Typed}, + codegen::{x86_64::Generator, x86_64_env}, + util::intern::Interner, +}; + +pub fn generate( + writer: W, + ident_interner: &Interner, + target: Target, + program: &Program, +) where + W: io::Write, +{ + type DarwinGenerator<'a, W> = Generator<'a, W, x86_64_env::Darwin>; + type LinuxGenerator<'a, W> = Generator<'a, W, x86_64_env::Linux>; + + match target { + Target::x86_64_darwin => DarwinGenerator::new(writer, ident_interner).generate(program), + Target::x86_64_linux => LinuxGenerator::new(writer, ident_interner).generate(program), + Target::none => unreachable!("must be handled during args validation"), + } +} + +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum)] +#[clap(rename_all = "snake_case")] +pub enum Target { + x86_64_darwin, + x86_64_linux, + #[clap(skip)] + none, +} + +impl std::fmt::Display for Target { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Target::x86_64_darwin => f.write_str("x86_64_darwin"), + Target::x86_64_linux => f.write_str("x86_64_linux"), + Target::none => f.write_str("none"), + } + } +} + +cfg_if::cfg_if! { + if #[cfg(all(target_arch = "x86_64", target_os = "macos"))] { + pub const DEFAULT_TARGET: Target = Target::x86_64_darwin; + } else if #[cfg(all(target_arch = "x86_64", target_os = "linux"))] { + pub const DEFAULT_TARGET: Target = Target::x86_64_linux; + } else { + pub const DEFAULT_TARGET: Target = Target::none; + } +} diff --git a/src/codegen/x86_64.rs b/src/codegen/x86_64.rs new file mode 100644 index 0000000..f980aa1 --- /dev/null +++ b/src/codegen/x86_64.rs @@ -0,0 +1,218 @@ +use std::{format_args as f, io, marker::PhantomData}; + +use crate::{ + ast::{self, Expr, Typed}, + codegen::x86_64_env, + types::well_known, + util::intern::{Interned, Interner}, +}; + +pub struct Generator<'ident, W, E> { + writer: W, + ident_interner: &'ident Interner, + indent: bool, + _env: PhantomData, +} + +impl Generator<'_, W, E> +where + W: io::Write, + E: x86_64_env::Env, +{ + pub fn new(writer: W, ident_interner: &Interner) -> Generator<'_, W, E> { + Generator { + writer, + ident_interner, + indent: false, + _env: PhantomData, + } + } + + pub fn generate(mut self, program: &ast::Program) { + self.g_program_prologue(); + self.g_methods(program); + self.g_vtables(program); + self.g_data(program); + } +} + +/// Target-specific functions. +impl Generator<'_, W, E> +where + W: io::Write, + E: x86_64_env::Env, +{ + fn g_program_prologue(&mut self) { + self.out(E::GLOBAL_PROLOGUE); + } + + fn g_methods(&mut self, program: &ast::Program) { + self.out(f!(".section {}", E::SECTION_TEXT)); + + for class in &program.classes { + let class_name = self.ident(&class.name); + self.out(f!("# CLASS {class_name}\n")); + + let methods = class.features.iter().filter_map(|feature| { + if let ast::Feature::Method(method) = feature { + Some(method) + } else { + None + } + }); + + for method in methods { + self.g_method(method, class_name); + } + } + } + + fn g_method(&mut self, method: &ast::Method, class_name: ResolvedIdent) { + let method_name = self.ident(method.name); + let qualified = Self::qualified(class_name, method_name); + if qualified.is_main() { + self.out(f!(".global {qualified}")); + } + self.out(f!("{qualified}:")); + self.indented(|this| { + this.g_method_prologue(method); + this.g_expr(&method.body); + this.g_method_epilogue(method); + }); + } + + #[expect(unused_variables)] + fn g_expr(&mut self, e: &Expr) { + match &e.kind { + ast::ExprKind::Assignment { + target, + value, + info, + } => todo!(), + ast::ExprKind::Dispatch { + qualifier, + method, + args, + } => todo!(), + ast::ExprKind::Conditional { + predicate, + then_arm, + else_arm, + } => todo!(), + ast::ExprKind::While { predicate, body } => todo!(), + ast::ExprKind::Block { body } => todo!(), + ast::ExprKind::Let { bindings, body } => todo!(), + ast::ExprKind::Case { predicate, arms } => todo!(), + ast::ExprKind::New { ty } => todo!(), + ast::ExprKind::Unary { op, expr } => todo!(), + ast::ExprKind::Binary { op, lhs, rhs } => todo!(), + ast::ExprKind::Paren(expr) => todo!(), + ast::ExprKind::Id(ident, _) => todo!(), + ast::ExprKind::Int(int) => self.out(f!("mov rax, {int}")), + ast::ExprKind::String(_) => todo!(), + ast::ExprKind::Bool(_) => todo!(), + ast::ExprKind::Dummy => todo!(), + } + } + + fn g_method_prologue(&mut self, _method: &ast::Method) { + self.out("push rbp"); + self.out("mov rbp, rsp"); + } + + fn g_method_epilogue(&mut self, _method: &ast::Method) { + self.out("pop rbp"); + self.out("ret"); + } + + #[expect(clippy::unused_self)] + fn g_vtables(&mut self, _program: &ast::Program) {} + + #[expect(clippy::unused_self)] + fn g_data(&mut self, _program: &ast::Program) {} +} + +/// Utility functions. +impl<'ident, W, E> Generator<'ident, W, E> +where + W: io::Write, + E: x86_64_env::Env, +{ + /// Prints a line. + fn out(&mut self, f: impl std::fmt::Display) { + let indent = if self.indent { " " } else { "" }; + writeln!(self.writer, "{indent}{f}").expect("Failed to write to sink"); + } + + /// Prints an empty line. + fn out_line(&mut self) { + writeln!(self.writer).expect("Failed to write to sink"); + } + + /// Resolves an identifier, returning a resolved ident. + fn ident(&mut self, handle: impl Into>) -> ResolvedIdent<'ident> { + let ident = handle.into(); + let name = self.ident_interner.get(ident); + ResolvedIdent { ident, name } + } + + fn qualified<'i>( + class: ResolvedIdent<'i>, + method: ResolvedIdent<'i>, + ) -> QualifiedMethod<'i, E> { + QualifiedMethod { + class, + method, + _env: PhantomData, + } + } + + /// Writes in an indented block that is finished with an empty line. + fn indented(&mut self, f: impl FnOnce(&mut Self) -> T) -> T { + self.indent = true; + let res = f(self); + self.indent = false; + self.out_line(); + res + } +} + +#[derive(Copy, Clone)] +struct QualifiedMethod<'i, E> { + class: ResolvedIdent<'i>, + method: ResolvedIdent<'i>, + _env: PhantomData, +} + +impl QualifiedMethod<'_, E> { + fn is_main(&self) -> bool { + self.class.ident == well_known::MAIN && self.method.ident == well_known::MAIN_METHOD + } +} + +impl std::fmt::Display for QualifiedMethod<'_, E> +where + E: x86_64_env::Env, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.is_main() { + write!(f, "{}", E::ENTRY_POINT) + } else { + let class = self.class.name; + let method = self.method.name; + write!(f, ".{class}__{method}") + } + } +} + +#[derive(Copy, Clone)] +struct ResolvedIdent<'i> { + ident: Interned, + name: &'i str, +} + +impl std::fmt::Display for ResolvedIdent<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name) + } +} diff --git a/src/codegen/x86_64_env.rs b/src/codegen/x86_64_env.rs new file mode 100644 index 0000000..adf4dd9 --- /dev/null +++ b/src/codegen/x86_64_env.rs @@ -0,0 +1,36 @@ +pub trait Env { + const ENTRY_POINT: &str; + + const GLOBAL_PROLOGUE: &str; + + const SECTION_TEXT: &str; + const SECTION_READ_ONLY_DATA: &str; + const SECTION_READ_ONLY_RELOCATABLE_DATA: &str; +} + +impl Env for Darwin { + const ENTRY_POINT: &str = "_main"; + + const GLOBAL_PROLOGUE: &str = ".intel_syntax noprefix\n\n"; + + const SECTION_TEXT: &str = "__TEXT,__text,regular,pure_instructions"; + const SECTION_READ_ONLY_DATA: &str = "__TEXT,__const"; + const SECTION_READ_ONLY_RELOCATABLE_DATA: &str = "__DATA,__const"; +} + +impl Env for Linux { + const ENTRY_POINT: &str = "main"; + + const GLOBAL_PROLOGUE: &str = concat!( + ".intel_syntax noprefix\n", + ".section .note.GNU-stack,\"\",@progbits\n\n", + ); + + const SECTION_TEXT: &str = ".text"; + const SECTION_READ_ONLY_DATA: &str = ".rodata"; + const SECTION_READ_ONLY_RELOCATABLE_DATA: &str = ".data.rel.ro"; +} + +pub struct Darwin; + +pub struct Linux; diff --git a/src/lib.rs b/src/lib.rs index 96787da..6064369 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,15 @@ pub mod parser; /// and maps it into a typed AST. pub mod type_checker; +/// The code generator takes a typed AST and generates target machine code. +pub mod codegen { + mod interface; + pub use interface::{generate, Target, DEFAULT_TARGET}; + + mod x86_64; + mod x86_64_env; +} + pub mod ast; pub mod token; pub mod types; diff --git a/src/types.rs b/src/types.rs index 795a346..61cdac5 100644 --- a/src/types.rs +++ b/src/types.rs @@ -222,8 +222,13 @@ pub mod builtins { class IO("IO", 6) inherits OBJECT { out_string(x: String) : SELF_TYPE; out_int(x: Int) : SELF_TYPE; + in_string(): String; in_int(): Int; + + string_of_int(i: Int) : String; + string_of_bool(b: Bool) : String; + exit(status: Int) : ""; }; } From aad6731f433c042be5b41946c78b03e8ccef51a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luiz=20Felipe=20Gon=C3=A7alves?= Date: Sun, 8 Jun 2025 23:08:51 -0300 Subject: [PATCH 4/8] Check main return type --- src/type_checker.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/type_checker.rs b/src/type_checker.rs index f9644b4..89c52bd 100644 --- a/src/type_checker.rs +++ b/src/type_checker.rs @@ -140,9 +140,6 @@ impl Checker<'_> { } fn check_method(&mut self, method: ast::Method) -> ast::Method { - let name = (self.current_class, method.name.name); - self.found_main |= name == (well_known::MAIN, well_known::MAIN_METHOD); - let formals: Vec<_> = method .formals .into_iter() @@ -160,6 +157,13 @@ impl Checker<'_> { }); let return_ty = self.get_type_allowing_self_type(method.return_ty); self.assert_is_subtype(body.ty(), &return_ty, body.span); + + let name = (self.current_class, method.name.name); + if name == (well_known::MAIN, well_known::MAIN_METHOD) { + self.found_main = true; + self.assert_is_type(&return_ty, builtins::INT, return_ty.span()); + } + ast::Method { name: method.name, formals, From c1fdc4d85fb0211ee51cfe8db43e1c15fa81b495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luiz=20Felipe=20Gon=C3=A7alves?= Date: Sun, 8 Jun 2025 23:17:38 -0300 Subject: [PATCH 5/8] Improve main error handling of command invocation --- src/bin/main.rs | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/bin/main.rs b/src/bin/main.rs index 4f23819..47c2415 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -183,20 +183,24 @@ fn pipeline(src: &str, args: &Args, tokens: &mut Vec, ident_interner: &mu out_assembly.flush().unwrap(); // Assemble - println!("assembling"); + eprintln!("assembling"); let as_out = std::process::Command::new("as") .arg("-o") .arg("target/_coolc/out.o") .arg("target/_coolc/out.s") .output() - .expect("failed to assemble program"); - assert!( - as_out.status.success(), - "failed to assemble program: non-zero status" - ); + .unwrap_or_else(|error| { + eprintln!("failed to assemble program with `as`: {error}"); + exit(1); + }); + if !as_out.status.success() { + eprintln!("failed to assemble program with `as`:"); + io::stderr().write_all(&as_out.stderr).unwrap(); + exit(1); + } // Link - println!("linking"); + eprintln!("linking"); let cc = env::var("CC"); let cc = cc.as_deref().unwrap_or("clang"); let link_out = std::process::Command::new(cc) @@ -204,11 +208,15 @@ fn pipeline(src: &str, args: &Args, tokens: &mut Vec, ident_interner: &mu .arg(out_file) .arg("target/_coolc/out.o") .output() - .expect("failed to link program"); - assert!( - link_out.status.success(), - "failed to link program: non-exit status" - ); + .unwrap_or_else(|error| { + eprintln!("failed to link program with `{cc}`: {error}"); + exit(1); + }); + if !link_out.status.success() { + eprintln!("failed to link program with `{cc}`:"); + io::stderr().write_all(&link_out.stderr).unwrap(); + exit(1); + } } fn report_error(src: &str, error: &Spanned, ident_interner: &Interner) From 094b653a8559722bf1703da507b7395d93716c04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luiz=20Felipe=20Gon=C3=A7alves?= Date: Mon, 9 Jun 2025 11:46:10 -0300 Subject: [PATCH 6/8] Integrate runtime with Rust toolchain --- Cargo.lock | 15 +++++++-- Cargo.toml | 10 ++---- TODO | 1 + coolc-bin/Cargo.toml | 15 +++++++++ coolc-bin/build.rs | 46 ++++++++++++++++++++++++++++ {src/bin => coolc-bin/src}/main.rs | 34 ++++++++++++++++----- coolc-bin/src/target.rs | 49 ++++++++++++++++++++++++++++++ examples/trivial2.cool | 9 ++++++ runtime/Cargo.toml | 12 ++++++++ runtime/main.s | 26 ++++++++++++++++ runtime/src/lib.rs | 4 +++ rust-toolchain.toml | 3 ++ src/codegen/interface.rs | 28 ++++++++--------- src/lib.rs | 2 +- 14 files changed, 220 insertions(+), 34 deletions(-) create mode 100644 TODO create mode 100644 coolc-bin/Cargo.toml create mode 100644 coolc-bin/build.rs rename {src/bin => coolc-bin/src}/main.rs (90%) create mode 100644 coolc-bin/src/target.rs create mode 100644 examples/trivial2.cool create mode 100644 runtime/Cargo.toml create mode 100644 runtime/main.s create mode 100644 runtime/src/lib.rs create mode 100644 rust-toolchain.toml diff --git a/Cargo.lock b/Cargo.lock index 8e311b9..ae8d711 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -176,13 +176,20 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" name = "cool" version = "0.0.1" dependencies = [ - "cfg-if", - "clap", "indoc", "phf", "pretty_assertions", ] +[[package]] +name = "coolc-bin" +version = "0.0.1" +dependencies = [ + "cfg-if", + "clap", + "cool", +] + [[package]] name = "criterion" version = "0.5.1" @@ -539,6 +546,10 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "runtime" +version = "0.0.1" + [[package]] name = "rustversion" version = "1.0.19" diff --git a/Cargo.toml b/Cargo.toml index 84335b1..c2cc326 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] -members = [".", "bench"] +members = [".", "coolc-bin", "runtime", "bench"] +default-members = [".", "coolc-bin"] resolver = "3" [workspace.package] @@ -31,15 +32,8 @@ result_unit_err = { level = "allow", priority = 2 } name = "cool" version.workspace = true edition.workspace = true -default-run = "coolc" - -[[bin]] -name = "coolc" -path = "./src/bin/main.rs" [dependencies] -cfg-if.workspace = true -clap.workspace = true phf.workspace = true [dev-dependencies] diff --git a/TODO b/TODO new file mode 100644 index 0000000..47642c6 --- /dev/null +++ b/TODO @@ -0,0 +1 @@ +- Forbid (new T) where T is primitive diff --git a/coolc-bin/Cargo.toml b/coolc-bin/Cargo.toml new file mode 100644 index 0000000..8fff675 --- /dev/null +++ b/coolc-bin/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "coolc-bin" +edition.workspace = true +version.workspace = true + +[dependencies] +cool.workspace = true +cfg-if.workspace = true +clap.workspace = true + +[build-dependencies] +cool.workspace = true + +[lints] +workspace = true diff --git a/coolc-bin/build.rs b/coolc-bin/build.rs new file mode 100644 index 0000000..b0b6279 --- /dev/null +++ b/coolc-bin/build.rs @@ -0,0 +1,46 @@ +use std::{path::Path, process::Command}; + +use cool::codegen::Target; + +fn compile_target(target: Target) { + let cargo = env!("CARGO"); + + let manifest = Path::new(env!("CARGO_MANIFEST_PATH")); + let root = manifest + .parent() + .and_then(|p| p.parent()) + .unwrap() + .to_str() + .unwrap(); + + let target_dir = format!("{root}/target/_coolc/runtime/rt-{}", target); + + let out = Command::new(cargo) + .arg("build") + .arg("--release") + .arg("--package") + .arg("runtime") + .arg("--target-dir") + .arg(&target_dir) + .arg("--target") + .arg(target.triple()) + .output() + .expect("failed to run cargo"); + if !out.status.success() { + let error = String::from_utf8_lossy(&out.stderr); + panic!("failed to build {target}:\n{error}"); + } + + // Cargo creates a folder with the target inside the path specified in + // `--target-dir`. Hence `{target}` appears two times in the path. + let cargo_triple = target.triple(); + let runtime_archive_path = format!("{target_dir}/{cargo_triple}/release/libruntime.a"); + println!("cargo::rustc-env=COOL_RT_{target}={runtime_archive_path}"); +} + +fn main() { + println!("cargo::rerun-if-changed=../runtime"); + for target in Target::ALL { + compile_target(*target); + } +} diff --git a/src/bin/main.rs b/coolc-bin/src/main.rs similarity index 90% rename from src/bin/main.rs rename to coolc-bin/src/main.rs index 47c2415..0cff34f 100644 --- a/src/bin/main.rs +++ b/coolc-bin/src/main.rs @@ -9,8 +9,7 @@ use std::{ use clap::Parser; use cool::{ - codegen::{self, Target}, - parser, + codegen, parser, token::{Spanned, Token}, type_checker, util::{ @@ -20,6 +19,10 @@ use cool::{ }, }; +use crate::target::{Target, DEFAULT_TARGET}; + +mod target; + #[derive(Parser)] struct Args { /// Emits the untyped AST. @@ -35,8 +38,8 @@ struct Args { assembly: bool, /// Compilation target. - #[arg(short, long, default_value_t = codegen::DEFAULT_TARGET)] - target: codegen::Target, + #[arg(short, long, default_value_t = DEFAULT_TARGET)] + target: Target, /// Binary output. #[arg(short, long)] @@ -56,7 +59,7 @@ fn main() { fn run() -> Result<(), Box> { let args = Args::parse(); - if args.target == Target::none { + if args.target == Target::None { eprintln!( "Couldn't infer compilation target. Check `--help` for available target options." ); @@ -169,17 +172,28 @@ fn pipeline(src: &str, args: &Args, tokens: &mut Vec, ident_interner: &mu exit(1); } println!("=== Assembly ==="); - codegen::generate(io::stdout(), ident_interner, args.target, &typed_prog); + codegen::generate( + io::stdout(), + ident_interner, + args.target.into(), + &typed_prog, + ); } let Some(out_file) = &args.output else { return; }; + let target = args.target; // Create assembly file std::fs::create_dir_all("target/_coolc").unwrap(); let mut out_assembly = BufWriter::new(File::create("target/_coolc/out.s").unwrap()); - codegen::generate(&mut out_assembly, ident_interner, args.target, &typed_prog); + codegen::generate( + &mut out_assembly, + ident_interner, + args.target.into(), + &typed_prog, + ); out_assembly.flush().unwrap(); // Assemble @@ -199,6 +213,11 @@ fn pipeline(src: &str, args: &Args, tokens: &mut Vec, ident_interner: &mu exit(1); } + // Create runtime archive file + let runtime_file = &format!("target/_coolc/libruntime-{target}.a"); + std::fs::write(runtime_file, target.get_runtime_bytes()) + .expect("Failed to create runtime file"); + // Link eprintln!("linking"); let cc = env::var("CC"); @@ -207,6 +226,7 @@ fn pipeline(src: &str, args: &Args, tokens: &mut Vec, ident_interner: &mu .arg("-o") .arg(out_file) .arg("target/_coolc/out.o") + .arg(runtime_file) .output() .unwrap_or_else(|error| { eprintln!("failed to link program with `{cc}`: {error}"); diff --git a/coolc-bin/src/target.rs b/coolc-bin/src/target.rs new file mode 100644 index 0000000..e44646d --- /dev/null +++ b/coolc-bin/src/target.rs @@ -0,0 +1,49 @@ +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum)] +#[clap(rename_all = "snake_case")] +pub enum Target { + x86_64_darwin, + x86_64_linux, + #[clap(skip)] + None, +} + +impl std::fmt::Display for Target { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if *self == Target::None { + f.write_str("none") + } else { + cool::codegen::Target::from(*self).fmt(f) + } + } +} + +impl Target { + pub const fn get_runtime_bytes(&self) -> &'static [u8] { + match self { + Target::x86_64_darwin => include_bytes!(env!("COOL_RT_x86_64_darwin")), + Target::x86_64_linux => include_bytes!(env!("COOL_RT_x86_64_linux")), + Target::None => panic!("invalid target none"), + } + } +} + +impl From for cool::codegen::Target { + fn from(value: Target) -> Self { + match value { + Target::x86_64_darwin => cool::codegen::Target::x86_64_darwin, + Target::x86_64_linux => cool::codegen::Target::x86_64_linux, + Target::None => panic!("can't convert target"), + } + } +} + +cfg_if::cfg_if! { + if #[cfg(all(target_arch = "x86_64", target_os = "macos"))] { + pub const DEFAULT_TARGET: Target = Target::x86_64_darwin; + } else if #[cfg(all(target_arch = "x86_64", target_os = "linux"))] { + pub const DEFAULT_TARGET: Target = Target::x86_64_linux; + } else { + pub const DEFAULT_TARGET: Target = Target::none; + } +} diff --git a/examples/trivial2.cool b/examples/trivial2.cool new file mode 100644 index 0000000..e37def9 --- /dev/null +++ b/examples/trivial2.cool @@ -0,0 +1,9 @@ +class Main { + main() : Int { + let io : IO <- new IO in + { + io.out_int(5); + 0; + } + }; +}; diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml new file mode 100644 index 0000000..4a45bd5 --- /dev/null +++ b/runtime/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "runtime" +edition.workspace = true +version.workspace = true + +[lib] +crate-type = ["staticlib"] + +[dependencies] + +[lints] +workspace = true diff --git a/runtime/main.s b/runtime/main.s new file mode 100644 index 0000000..3916cb3 --- /dev/null +++ b/runtime/main.s @@ -0,0 +1,26 @@ +.intel_syntax noprefix +.section .note.GNU-stack,"",@progbits + +.section .rodata +hello_world: .ascii "Hello, world!\n" +hello_world_len: .quad . - hello_world + +.section .text +.global main +main: + push rbp + mov rbp, rsp + + mov rax, 1 # write + mov rdi, 0 # fd 0 - stdout + lea rsi, [rip + hello_world] + mov rdx, [rip + hello_world_len] + syscall + + # Exit status (the return value of the main function). + mov rax, 0 + + pop rbp + ret + +.end diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs new file mode 100644 index 0000000..6ffd58f --- /dev/null +++ b/runtime/src/lib.rs @@ -0,0 +1,4 @@ +#[unsafe(no_mangle)] +pub fn lffg_add(left: isize, right: isize) -> isize { + left + right +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..51313f4 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "stable" +targets = ["x86_64-apple-darwin", "x86_64-unknown-linux-gnu"] diff --git a/src/codegen/interface.rs b/src/codegen/interface.rs index 2d44887..9f57a38 100644 --- a/src/codegen/interface.rs +++ b/src/codegen/interface.rs @@ -20,18 +20,25 @@ pub fn generate( match target { Target::x86_64_darwin => DarwinGenerator::new(writer, ident_interner).generate(program), Target::x86_64_linux => LinuxGenerator::new(writer, ident_interner).generate(program), - Target::none => unreachable!("must be handled during args validation"), } } #[allow(non_camel_case_types)] -#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum)] -#[clap(rename_all = "snake_case")] +#[derive(Copy, Clone, PartialEq, Eq)] pub enum Target { x86_64_darwin, x86_64_linux, - #[clap(skip)] - none, +} + +impl Target { + pub const ALL: &[Target] = &[Target::x86_64_darwin, Target::x86_64_linux]; + + pub const fn triple(&self) -> &'static str { + match self { + Target::x86_64_darwin => "x86_64-apple-darwin", + Target::x86_64_linux => "x86_64-unknown-linux-gnu", + } + } } impl std::fmt::Display for Target { @@ -39,17 +46,6 @@ impl std::fmt::Display for Target { match self { Target::x86_64_darwin => f.write_str("x86_64_darwin"), Target::x86_64_linux => f.write_str("x86_64_linux"), - Target::none => f.write_str("none"), } } } - -cfg_if::cfg_if! { - if #[cfg(all(target_arch = "x86_64", target_os = "macos"))] { - pub const DEFAULT_TARGET: Target = Target::x86_64_darwin; - } else if #[cfg(all(target_arch = "x86_64", target_os = "linux"))] { - pub const DEFAULT_TARGET: Target = Target::x86_64_linux; - } else { - pub const DEFAULT_TARGET: Target = Target::none; - } -} diff --git a/src/lib.rs b/src/lib.rs index 6064369..bb301aa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,7 +11,7 @@ pub mod type_checker; /// The code generator takes a typed AST and generates target machine code. pub mod codegen { mod interface; - pub use interface::{generate, Target, DEFAULT_TARGET}; + pub use interface::{generate, Target}; mod x86_64; mod x86_64_env; From 5287ff2a2e840e31eb88b36eade96592f7dc8bbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luiz=20Felipe=20Gon=C3=A7alves?= Date: Mon, 9 Jun 2025 18:45:17 -0300 Subject: [PATCH 7/8] Add representation for objects in runtime --- coolc-bin/build.rs | 2 +- coolc-bin/src/target.rs | 2 +- runtime/src/lib.rs | 5 +- runtime/src/obj.rs | 405 ++++++++++++++++++++++++++++++++++++++++ src/lexer.rs | 2 +- src/types.rs | 2 +- 6 files changed, 410 insertions(+), 8 deletions(-) create mode 100644 runtime/src/obj.rs diff --git a/coolc-bin/build.rs b/coolc-bin/build.rs index b0b6279..8c177b1 100644 --- a/coolc-bin/build.rs +++ b/coolc-bin/build.rs @@ -13,7 +13,7 @@ fn compile_target(target: Target) { .to_str() .unwrap(); - let target_dir = format!("{root}/target/_coolc/runtime/rt-{}", target); + let target_dir = format!("{root}/target/_coolc/runtime/rt-{target}"); let out = Command::new(cargo) .arg("build") diff --git a/coolc-bin/src/target.rs b/coolc-bin/src/target.rs index e44646d..c865109 100644 --- a/coolc-bin/src/target.rs +++ b/coolc-bin/src/target.rs @@ -19,7 +19,7 @@ impl std::fmt::Display for Target { } impl Target { - pub const fn get_runtime_bytes(&self) -> &'static [u8] { + pub const fn get_runtime_bytes(self) -> &'static [u8] { match self { Target::x86_64_darwin => include_bytes!(env!("COOL_RT_x86_64_darwin")), Target::x86_64_linux => include_bytes!(env!("COOL_RT_x86_64_linux")), diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 6ffd58f..6c99717 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -1,4 +1 @@ -#[unsafe(no_mangle)] -pub fn lffg_add(left: isize, right: isize) -> isize { - left + right -} +pub mod obj; diff --git a/runtime/src/obj.rs b/runtime/src/obj.rs new file mode 100644 index 0000000..3924eb4 --- /dev/null +++ b/runtime/src/obj.rs @@ -0,0 +1,405 @@ +use std::alloc::{alloc, dealloc, Layout}; +use std::ffi::c_void; +use std::ptr::NonNull; + +// Type IDs +pub const OBJECT_TYPE_ID: u32 = 1; +pub const NO_TYPE_TYPE_ID: u32 = 2; +pub const STRING_TYPE_ID: u32 = 3; +pub const INT_TYPE_ID: u32 = 4; +pub const BOOL_TYPE_ID: u32 = 5; +pub const IO_TYPE_ID: u32 = 6; + +#[repr(C)] +pub struct VTable { + pub size: u32, + pub type_id: u32, + // ... functions (in the heap allocation) +} + +impl VTable { + #[cfg(test)] + pub unsafe fn dummy() -> *mut VTable { + Box::into_raw(Box::new(VTable { + size: 8, + type_id: NO_TYPE_TYPE_ID, + })) + } +} + +#[repr(C)] +pub struct Object { + pub size: u32, + pub type_id: u32, + pub vtable_ptr: NonNull, + // ... fields (in the heap allocation) +} + +impl Object { + pub fn fields(&self) -> u32 { + self.size.checked_sub(Self::HEADER_SIZE).unwrap() + } +} + +#[repr(C)] +pub struct String { + pub base: Object, + pub len: usize, + pub bytes: *mut u8, +} + +#[repr(C)] +pub struct Int { + pub base: Object, + pub value: i64, +} + +#[repr(C)] +pub struct Bool { + pub base: Object, + pub value: bool, +} + +#[repr(C)] +pub struct IO { + pub base: Object, +} + +impl Object { + /// size + type_id + vtable_ptr. + pub const HEADER_SIZE: u32 = 16; + + /// Allocates a new object with the given TOTAL size (includes all the + /// fields), type ID and vtable pointer. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `size` >= [`Self::HEADER_SIZE`] (16 bytes minimum) + /// - `size` is a multiple of 8 to maintain pointer alignment for trailing + /// fields + /// - `type_id` accurately represents the intended object type that will be + /// stored + /// - `vtable_ptr` points to a valid, properly initialized VTable that will + /// remain valid for the lifetime of this object + /// - `vtable_ptr` is properly aligned for VTable + /// - The caller will properly initialize any type-specific fields after + /// allocation + /// - The returned object will be deallocated using [`Object::deallocate`] + pub unsafe fn allocate( + size: u32, + type_id: u32, + vtable_ptr: NonNull, + ) -> NonNull { + assert!( + size >= Self::HEADER_SIZE, + "Size must be at least header size" + ); + assert!( + size % 8 == 0, + "Size must be 8-byte aligned for pointer fields" + ); + + let layout = Layout::from_size_align_unchecked(size as usize, 8); + let ptr = alloc(layout) as *mut Object; + + if ptr.is_null() { + panic!("Allocation failed"); + } + + // Initialize header + std::ptr::write( + ptr, + Object { + size, + type_id, + vtable_ptr, + }, + ); + + // Zero-initialize the trailing fields + let fields_size = size - Self::HEADER_SIZE; + if fields_size > 0 { + let fields_ptr = (ptr as *mut u8).add(Self::HEADER_SIZE as usize); + std::ptr::write_bytes(fields_ptr, 0, fields_size as usize); + } + + NonNull::new_unchecked(ptr) + } + + /// Deallocates an object. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `obj` was allocated using [`Object::allocate`] + /// - `obj` has not been deallocated before + /// - `obj.size` has not been corrupted since allocation + /// - No references to this object or its fields exist after this call + /// - The object is not accessed after deallocation + pub unsafe fn deallocate(obj: NonNull) { + let size = obj.as_ref().size; + let layout = Layout::from_size_align_unchecked(size as usize, 8); + dealloc(obj.as_ptr() as *mut u8, layout); + } + + /// Returns the trailing dynamic part as (field_count, fields_ptr). + /// + /// Assumes trailing data consists of pointer-sized fields. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was properly allocated using [`Object::allocate`] + /// - `self.size` has not been corrupted since allocation + /// - The object's memory layout has not been corrupted + /// - If `field_count > 0`, the trailing memory was intended as pointer + /// fields + /// - The returned pointer is only used while `self` remains valid + pub unsafe fn get_fields(&self) -> (usize, *mut *mut c_void) { + let trailing_fields = self.fields(); + let field_size = std::mem::size_of::<*mut c_void>() as u32; + let field_count = (trailing_fields / field_size) as usize; + + let fields_ptr = if field_count > 0 { + let base_ptr = (self as *const Object) as *const u8; + base_ptr.add(Self::HEADER_SIZE as usize) as *mut *mut c_void + } else { + std::ptr::null_mut() + }; + + (field_count, fields_ptr) + } + + /// Returns a specific trailing field by index. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was properly allocated using [`Object::allocate`] + /// - `self.size` and memory layout have not been corrupted + /// - `index < field_count` (as returned by [`Object::get_fields`]) + /// - The trailing memory was allocated and initialized as pointer fields + /// - The object remains valid for the duration of use + pub unsafe fn get_field(&self, index: usize) -> *mut c_void { + let (field_count, fields_ptr) = self.get_fields(); + assert!(index < field_count, "Field index out of bounds"); + *fields_ptr.add(index) + } + + /// Sets a specific trailing field by index. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was properly allocated using [`Object::allocate`] + /// - `self.size` and memory layout have not been corrupted + /// - `index < field_count` (as returned by [`Object::get_fields`]) + /// - The trailing memory was allocated as pointer fields + /// - `value` is a valid pointer or null (must not be a dangling pointer) + /// - If `value` is non-null, it points to valid memory that will remain + /// valid + /// - The object remains valid and won't be deallocated while field is in + /// use + pub unsafe fn set_field(&mut self, index: usize, value: *mut c_void) { + let (field_count, fields_ptr) = self.get_fields(); + assert!(index < field_count, "Field index out of bounds"); + *fields_ptr.add(index) = value; + } + + /// Casts to String. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was originally allocated as a [`String`] object + /// - The memory actually contains a valid [`String`] layout + /// - `self.type_id` accurately reflects that this is a String object + /// - The object remains valid for the lifetime of use + pub unsafe fn as_string(&self) -> *mut String { + self as *const Object as *mut String + } + + /// Casts to Int. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was originally allocated as an [`Int`] object + /// - The memory actually contains a valid [`Int`] layout + /// - `self.type_id` accurately reflects that this is an Int object + /// - The object remains valid for the lifetime of use + pub unsafe fn as_int(&self) -> *mut Int { + self as *const Object as *mut Int + } + + /// Casts to Bool + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was originally allocated as a [`Bool`] object + /// - The memory actually contains a valid [`Bool`] layout + /// - `self.type_id` accurately reflects that this is a Bool object + /// - The object remains valid for the lifetime of use + pub unsafe fn as_bool(&self) -> *mut Bool { + self as *const Object as *mut Bool + } + + /// Casts to IO. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was originally allocated as an [`IO`] object + /// - The memory actually contains a valid [`IO`] layout + /// - `self.type_id` accurately reflects that this is an IO object + /// - The object remains valid for the lifetime of use + pub unsafe fn as_io(&self) -> *mut IO { + self as *const Object as *mut IO + } +} + +impl String { + /// # Safety + /// + /// Caller must ensure: + /// - `vtable_ptr` points to a valid `VTable` for strings that will remain + /// valid + /// - `bytes` is either null or points to at least `len` bytes of valid, + /// initialized memory + /// - If `bytes` is non-null, the memory will remain valid for the object's + /// lifetime + /// - `len` accurately represents the number of valid bytes at `bytes` + /// - The caller will properly manage the lifetime of both the object and + /// the `bytes` memory + pub unsafe fn new(vtable_ptr: NonNull, len: usize, bytes: *mut u8) -> NonNull { + let size = std::mem::size_of::() as u32; + let obj = Object::allocate(size, STRING_TYPE_ID, vtable_ptr); + + let string_ptr = obj.as_ptr() as *mut String; + (*string_ptr).len = len; + (*string_ptr).bytes = bytes; + + NonNull::new_unchecked(string_ptr) + } +} + +impl Int { + /// # Safety + /// + /// Caller must ensure: + /// - `vtable_ptr` points to a valid `VTable` for integers that will remain + /// valid + /// - The caller will properly manage the lifetime of the returned object + pub unsafe fn new(vtable_ptr: NonNull, value: i64) -> NonNull { + let size = std::mem::size_of::() as u32; + let obj = Object::allocate(size, INT_TYPE_ID, vtable_ptr); + + let int_ptr = obj.as_ptr() as *mut Int; + (*int_ptr).value = value; + + NonNull::new_unchecked(int_ptr) + } +} + +impl Bool { + /// # Safety + /// + /// Caller must ensure: + /// - `vtable_ptr` points to a valid `VTable` for booleans that will remain + /// valid + /// - The caller will properly manage the lifetime of the returned object + pub unsafe fn new(vtable_ptr: NonNull, value: bool) -> NonNull { + let size = std::mem::size_of::() as u32; + let obj = Object::allocate(size, BOOL_TYPE_ID, vtable_ptr); + + let bool_ptr = obj.as_ptr() as *mut Bool; + (*bool_ptr).value = value; + + NonNull::new_unchecked(bool_ptr) + } +} + +impl IO { + /// # Safety + /// + /// Caller must ensure: + /// - `vtable_ptr` points to a valid `VTable` for IO that will remain valid + /// - The caller will properly manage the lifetime of the returned object + pub unsafe fn new(vtable_ptr: NonNull) -> NonNull { + let size = std::mem::size_of::() as u32; + let obj = Object::allocate(size, IO_TYPE_ID, vtable_ptr); + + NonNull::new_unchecked(obj.as_ptr() as *mut IO) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::mem::{align_of, size_of}; + + #[test] + fn test_object_sizes() { + assert_eq!(std::mem::size_of::(), 16); + assert_eq!(Object::HEADER_SIZE, 16); + + assert_eq!(size_of::(), 32); + assert_eq!(size_of::(), 24); + assert_eq!(size_of::(), 24); + } + + #[test] + fn test_type_alignments() { + const _: () = { + assert!(align_of::() == 8); + + assert!(align_of::() <= 8); + assert!(align_of::() <= 8); + assert!(align_of::() <= 8); + }; + + assert!(align_of::() == 8); + assert!(align_of::() <= 8); + assert!(align_of::() <= 8); + assert!(align_of::() <= 8); + } + + #[test] + fn test_trailing_fields() { + unsafe { + let dummy_vtable = VTable::dummy(); + let vtable_ptr = NonNull::new_unchecked(dummy_vtable); + + // Create object with 2 trailing pointer fields (16 + 16 = 32 bytes) + let obj = Object::allocate(32, OBJECT_TYPE_ID, vtable_ptr); + let obj_ref = obj.as_ref(); + + let (field_count, _) = obj_ref.get_fields(); + assert_eq!(field_count, 2); + + Object::deallocate(obj); + drop(Box::from_raw(dummy_vtable)); + } + } + + #[test] + fn test_type_casting() { + unsafe { + let dummy_vtable = VTable::dummy(); + let vtable_ptr = NonNull::new_unchecked(dummy_vtable); + + // Create an Int + let int_obj = Int::new(vtable_ptr, 42); + let obj_ref = &int_obj.as_ref().base; + + // Test casting - now returns raw pointers + let int_ptr = obj_ref.as_int(); + assert_eq!((*int_ptr).value, 42); + + Object::deallocate(int_obj.cast()); + drop(Box::from_raw(dummy_vtable)); + } + } +} diff --git a/src/lexer.rs b/src/lexer.rs index c14a710..86d13f4 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -194,7 +194,7 @@ impl Lexer<'_, '_> { match self.advance() { ')' => break, // finished closing comment '\0' => return TokenKind::ErrorUnclosedComment, - _ => continue, // sadly couldn't close it! keep scanning... + _ => (), // sadly couldn't close it! keep scanning... } } TokenKind::MultilineComment diff --git a/src/types.rs b/src/types.rs index 61cdac5..0cc0bf7 100644 --- a/src/types.rs +++ b/src/types.rs @@ -107,7 +107,7 @@ impl Type { pub fn is_subtype_of(&self, other: &Self) -> bool { if self.name() == builtins::NO_TYPE { return true; - }; + } let mut curr = self; loop { if curr == other { From b7d1f221dd4cb9c46d833069835b86ec41a80dc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luiz=20Felipe=20Gon=C3=A7alves?= Date: Mon, 9 Jun 2025 18:48:52 -0300 Subject: [PATCH 8/8] Fix test due to main return type check --- src/type_checker.rs | 8 +++++++- src/types.rs | 3 --- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/type_checker.rs b/src/type_checker.rs index 89c52bd..d378e81 100644 --- a/src/type_checker.rs +++ b/src/type_checker.rs @@ -159,7 +159,13 @@ impl Checker<'_> { self.assert_is_subtype(body.ty(), &return_ty, body.span); let name = (self.current_class, method.name.name); - if name == (well_known::MAIN, well_known::MAIN_METHOD) { + // Usually, checks such as `and !self.found_main` aren't necessary. + // However, the type checker's caller may define a flag to disable the + // main check. In this case, we set `found_main` as true in the + // constructor. Hence, this check is necessary to avoid running the + // return type in the check if the main was found *or* if it was + // bypassed by such a flag. + if name == (well_known::MAIN, well_known::MAIN_METHOD) && !self.found_main { self.found_main = true; self.assert_is_type(&return_ty, builtins::INT, return_ty.span()); } diff --git a/src/types.rs b/src/types.rs index 0cc0bf7..b1e50c9 100644 --- a/src/types.rs +++ b/src/types.rs @@ -226,9 +226,6 @@ pub mod builtins { in_string(): String; in_int(): Int; - string_of_int(i: Int) : String; - string_of_bool(b: Bool) : String; - exit(status: Int) : ""; }; }