diff --git a/Cargo.lock b/Cargo.lock index cefea86..ae8d711 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,12 +17,56 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "anstream" +version = "0.6.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -84,21 +128,36 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.29" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acebd8ad879283633b343856142139f2da2317c96b05b4dd6181c61e2480184" +checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] name = "clap_builder" -version = "4.5.29" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ba32cbda51c7e1dfd49acc1457ba1a7dec5b64fe360e828acb13ca8dc9c2f9" +checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -107,6 +166,12 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "cool" version = "0.0.1" @@ -116,6 +181,15 @@ dependencies = [ "pretty_assertions", ] +[[package]] +name = "coolc-bin" +version = "0.0.1" +dependencies = [ + "cfg-if", + "clap", + "cool", +] + [[package]] name = "criterion" version = "0.5.1" @@ -205,6 +279,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.4.0" @@ -228,6 +308,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.10.5" @@ -286,6 +372,12 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + [[package]] name = "oorandom" version = "11.1.4" @@ -454,6 +546,10 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "runtime" +version = "0.0.1" + [[package]] name = "rustversion" version = "1.0.19" @@ -513,6 +609,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.98" @@ -540,6 +642,12 @@ version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 703ddfc..c2cc326 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] -members = [".", "bench"] +members = [".", "coolc-bin", "runtime", "bench"] +default-members = [".", "coolc-bin"] resolver = "3" [workspace.package] @@ -8,6 +9,8 @@ version = "0.0.1" [workspace.dependencies] cool.path = "." +cfg-if = "1.0.0" +clap = { version = "4.5.39", features = ["derive"] } criterion = "0.5.1" indoc = "2.0.6" phf = { version = "0.11.3", features = ["macros"] } diff --git a/TODO b/TODO new file mode 100644 index 0000000..47642c6 --- /dev/null +++ b/TODO @@ -0,0 +1 @@ +- Forbid (new T) where T is primitive diff --git a/coolc-bin/Cargo.toml b/coolc-bin/Cargo.toml new file mode 100644 index 0000000..8fff675 --- /dev/null +++ b/coolc-bin/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "coolc-bin" +edition.workspace = true +version.workspace = true + +[dependencies] +cool.workspace = true +cfg-if.workspace = true +clap.workspace = true + +[build-dependencies] +cool.workspace = true + +[lints] +workspace = true diff --git a/coolc-bin/build.rs b/coolc-bin/build.rs new file mode 100644 index 0000000..8c177b1 --- /dev/null +++ b/coolc-bin/build.rs @@ -0,0 +1,46 @@ +use std::{path::Path, process::Command}; + +use cool::codegen::Target; + +fn compile_target(target: Target) { + let cargo = env!("CARGO"); + + let manifest = Path::new(env!("CARGO_MANIFEST_PATH")); + let root = manifest + .parent() + .and_then(|p| p.parent()) + .unwrap() + .to_str() + .unwrap(); + + let target_dir = format!("{root}/target/_coolc/runtime/rt-{target}"); + + let out = Command::new(cargo) + .arg("build") + .arg("--release") + .arg("--package") + .arg("runtime") + .arg("--target-dir") + .arg(&target_dir) + .arg("--target") + .arg(target.triple()) + .output() + .expect("failed to run cargo"); + if !out.status.success() { + let error = String::from_utf8_lossy(&out.stderr); + panic!("failed to build {target}:\n{error}"); + } + + // Cargo creates a folder with the target inside the path specified in + // `--target-dir`. Hence `{target}` appears two times in the path. + let cargo_triple = target.triple(); + let runtime_archive_path = format!("{target_dir}/{cargo_triple}/release/libruntime.a"); + println!("cargo::rustc-env=COOL_RT_{target}={runtime_archive_path}"); +} + +fn main() { + println!("cargo::rerun-if-changed=../runtime"); + for target in Target::ALL { + compile_target(*target); + } +} diff --git a/coolc-bin/src/main.rs b/coolc-bin/src/main.rs new file mode 100644 index 0000000..0cff34f --- /dev/null +++ b/coolc-bin/src/main.rs @@ -0,0 +1,287 @@ +use std::{ + env, + error::Error, + fs::{self, File}, + io::{self, BufWriter, Write}, + path::PathBuf, + process::exit, +}; + +use clap::Parser; +use cool::{ + codegen, parser, + token::{Spanned, Token}, + type_checker, + util::{ + self, + fmt::{tree::print_program, Show}, + intern::Interner, + }, +}; + +use crate::target::{Target, DEFAULT_TARGET}; + +mod target; + +#[derive(Parser)] +struct Args { + /// Emits the untyped AST. + #[arg(short, long)] + untyped_ast: bool, + + /// Emits the typed AST. + #[arg(long)] + typed_ast: bool, + + /// Emits the un-assembled machine code. + #[arg(long)] + assembly: bool, + + /// Compilation target. + #[arg(short, long, default_value_t = DEFAULT_TARGET)] + target: Target, + + /// Binary output. + #[arg(short, long)] + output: Option, + + /// Program input, Cool source code. + input: Option, +} + +fn main() { + if let Err(error) = run() { + eprintln!("Error: {error}"); + std::process::exit(1); + } +} + +fn run() -> Result<(), Box> { + let args = Args::parse(); + + if args.target == Target::None { + eprintln!( + "Couldn't infer compilation target. Check `--help` for available target options." + ); + exit(1); + } + + let mut tokens_buf = Vec::with_capacity(8 * 1024); + let mut ident_interner = Interner::with_capacity(1024); + + // File mode + if let Some(prog_path) = &args.input { + let input = fs::read_to_string(prog_path)?; + pipeline(&input, &args, &mut tokens_buf, &mut ident_interner); + return Ok(()); + } + + // Interactive REPL mode + println!("Welcome to interactive coolc."); + println!("Enter code, finish with empty line, or send Ctrl+D to exit."); + + let mut accumulated_input = String::new(); + + loop { + if accumulated_input.is_empty() { + print!("> "); + } else { + print!("| "); + } + io::stdout().flush()?; + + let mut current_line = String::new(); + let n = io::stdin().read_line(&mut current_line)?; + + if n == 0 { + println!(); + if accumulated_input.trim().is_empty() { + println!("^D"); + } else { + pipeline( + &accumulated_input, + &args, + &mut tokens_buf, + &mut ident_interner, + ); + } + return Ok(()); + } + + // Empty line is another termination signal + if current_line.trim().is_empty() { + if !accumulated_input.trim().is_empty() { + pipeline( + &accumulated_input, + &args, + &mut tokens_buf, + &mut ident_interner, + ); + accumulated_input.clear(); // Clear for next input + } + } else { + accumulated_input.push_str(¤t_line); + } + } +} + +fn pipeline(src: &str, args: &Args, tokens: &mut Vec, ident_interner: &mut Interner) { + tokens.clear(); + + let prog = match parser::parse_program(src, tokens, ident_interner) { + Ok(prog) => prog, + Err((prog, errors)) => { + eprintln!("Got {} errors", errors.len()); + eprintln!(); + eprintln!("Partial AST:"); + print_program(&mut io::stdout(), ident_interner, &prog).unwrap(); + eprintln!(); + for error in errors { + report_error(src, &error, ident_interner); + } + return; + } + }; + + if args.untyped_ast { + println!("=== Untyped AST ==="); + print_program(&mut io::stdout(), ident_interner, &prog).unwrap(); + } + + let checker = + type_checker::Checker::with_capacity(ident_interner, 512, type_checker::flags::DEFAULT); + let (typed_prog, _registry) = match checker.check(prog) { + Ok(prog) => prog, + Err((_prog, _registry, errors)) => { + eprintln!("Got {} type errors", errors.len()); + for error in errors { + report_error(src, &error, ident_interner); + } + return; + } + }; + + if args.typed_ast { + println!("=== Typed AST ==="); + print_program(&mut io::stdout(), ident_interner, &typed_prog).unwrap(); + } + + if args.assembly { + if args.output.is_some() { + eprintln!("`output` flag can't be used with `--assembly` in this version. Try again."); + exit(1); + } + println!("=== Assembly ==="); + codegen::generate( + io::stdout(), + ident_interner, + args.target.into(), + &typed_prog, + ); + } + + let Some(out_file) = &args.output else { + return; + }; + let target = args.target; + + // Create assembly file + std::fs::create_dir_all("target/_coolc").unwrap(); + let mut out_assembly = BufWriter::new(File::create("target/_coolc/out.s").unwrap()); + codegen::generate( + &mut out_assembly, + ident_interner, + args.target.into(), + &typed_prog, + ); + out_assembly.flush().unwrap(); + + // Assemble + eprintln!("assembling"); + let as_out = std::process::Command::new("as") + .arg("-o") + .arg("target/_coolc/out.o") + .arg("target/_coolc/out.s") + .output() + .unwrap_or_else(|error| { + eprintln!("failed to assemble program with `as`: {error}"); + exit(1); + }); + if !as_out.status.success() { + eprintln!("failed to assemble program with `as`:"); + io::stderr().write_all(&as_out.stderr).unwrap(); + exit(1); + } + + // Create runtime archive file + let runtime_file = &format!("target/_coolc/libruntime-{target}.a"); + std::fs::write(runtime_file, target.get_runtime_bytes()) + .expect("Failed to create runtime file"); + + // Link + eprintln!("linking"); + let cc = env::var("CC"); + let cc = cc.as_deref().unwrap_or("clang"); + let link_out = std::process::Command::new(cc) + .arg("-o") + .arg(out_file) + .arg("target/_coolc/out.o") + .arg(runtime_file) + .output() + .unwrap_or_else(|error| { + eprintln!("failed to link program with `{cc}`: {error}"); + exit(1); + }); + if !link_out.status.success() { + eprintln!("failed to link program with `{cc}`:"); + io::stderr().write_all(&link_out.stderr).unwrap(); + exit(1); + } +} + +fn report_error(src: &str, error: &Spanned, ident_interner: &Interner) +where + Spanned: Show, +{ + let span = error.span; + + // Try to find line number and column + let mut line = 1; + let mut line_start = 0; + let mut column = 0; + + // Calculate the start position (line and column) + for (i, char) in src.char_indices() { + if i >= span.lo as usize { + column = i - line_start + 1; + break; + } + if char == '\n' { + line += 1; + line_start = i + 1; + } + } + // If span.lo is beyond the source length (e.g., EOF span at end) + if span.lo as usize >= src.len() && !src.is_empty() { + column = src.len() - line_start + 1; + } else if src.is_empty() { + column = 1; + } + + let ctx = util::fmt::Context { ident_interner }; + let error_display = error.display(&ctx); + eprintln!("Error (line {line}, col {column}): {error_display}"); + + if let Some(line_content) = src.lines().nth(line - 1) { + eprintln!("{line:>4} | {line_content}"); + // Add an indicator '^' under the approximate error location + let indicator_padding = column.saturating_sub(1); + let indicator_len = std::cmp::max(1, (span.hi - span.lo) as usize); + eprintln!( + "{:>4} | {}{}", + "", + " ".repeat(indicator_padding), + "^".repeat(indicator_len) + ); + } +} diff --git a/coolc-bin/src/target.rs b/coolc-bin/src/target.rs new file mode 100644 index 0000000..c865109 --- /dev/null +++ b/coolc-bin/src/target.rs @@ -0,0 +1,49 @@ +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum)] +#[clap(rename_all = "snake_case")] +pub enum Target { + x86_64_darwin, + x86_64_linux, + #[clap(skip)] + None, +} + +impl std::fmt::Display for Target { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if *self == Target::None { + f.write_str("none") + } else { + cool::codegen::Target::from(*self).fmt(f) + } + } +} + +impl Target { + pub const fn get_runtime_bytes(self) -> &'static [u8] { + match self { + Target::x86_64_darwin => include_bytes!(env!("COOL_RT_x86_64_darwin")), + Target::x86_64_linux => include_bytes!(env!("COOL_RT_x86_64_linux")), + Target::None => panic!("invalid target none"), + } + } +} + +impl From for cool::codegen::Target { + fn from(value: Target) -> Self { + match value { + Target::x86_64_darwin => cool::codegen::Target::x86_64_darwin, + Target::x86_64_linux => cool::codegen::Target::x86_64_linux, + Target::None => panic!("can't convert target"), + } + } +} + +cfg_if::cfg_if! { + if #[cfg(all(target_arch = "x86_64", target_os = "macos"))] { + pub const DEFAULT_TARGET: Target = Target::x86_64_darwin; + } else if #[cfg(all(target_arch = "x86_64", target_os = "linux"))] { + pub const DEFAULT_TARGET: Target = Target::x86_64_linux; + } else { + pub const DEFAULT_TARGET: Target = Target::none; + } +} diff --git a/examples/small.cool b/examples/small.cool index 129f897..c331eeb 100644 --- a/examples/small.cool +++ b/examples/small.cool @@ -1,5 +1,10 @@ class Main { main() : Int { - 1 + 2 * 3 + let io : IO <- new IO in + let res : Int <- 1 + 2 * 3 in + let s : String <- io.string_of_int(res) in { + io.out_string(s); + 0 + } }; }; diff --git a/examples/trivial.cool b/examples/trivial.cool new file mode 100644 index 0000000..143023c --- /dev/null +++ b/examples/trivial.cool @@ -0,0 +1,3 @@ +class Main { + main() : Int { 0 }; +}; diff --git a/examples/trivial2.cool b/examples/trivial2.cool new file mode 100644 index 0000000..e37def9 --- /dev/null +++ b/examples/trivial2.cool @@ -0,0 +1,9 @@ +class Main { + main() : Int { + let io : IO <- new IO in + { + io.out_int(5); + 0; + } + }; +}; diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml new file mode 100644 index 0000000..4a45bd5 --- /dev/null +++ b/runtime/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "runtime" +edition.workspace = true +version.workspace = true + +[lib] +crate-type = ["staticlib"] + +[dependencies] + +[lints] +workspace = true diff --git a/runtime/main.s b/runtime/main.s new file mode 100644 index 0000000..3916cb3 --- /dev/null +++ b/runtime/main.s @@ -0,0 +1,26 @@ +.intel_syntax noprefix +.section .note.GNU-stack,"",@progbits + +.section .rodata +hello_world: .ascii "Hello, world!\n" +hello_world_len: .quad . - hello_world + +.section .text +.global main +main: + push rbp + mov rbp, rsp + + mov rax, 1 # write + mov rdi, 0 # fd 0 - stdout + lea rsi, [rip + hello_world] + mov rdx, [rip + hello_world_len] + syscall + + # Exit status (the return value of the main function). + mov rax, 0 + + pop rbp + ret + +.end diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs new file mode 100644 index 0000000..6c99717 --- /dev/null +++ b/runtime/src/lib.rs @@ -0,0 +1 @@ +pub mod obj; diff --git a/runtime/src/obj.rs b/runtime/src/obj.rs new file mode 100644 index 0000000..3924eb4 --- /dev/null +++ b/runtime/src/obj.rs @@ -0,0 +1,405 @@ +use std::alloc::{alloc, dealloc, Layout}; +use std::ffi::c_void; +use std::ptr::NonNull; + +// Type IDs +pub const OBJECT_TYPE_ID: u32 = 1; +pub const NO_TYPE_TYPE_ID: u32 = 2; +pub const STRING_TYPE_ID: u32 = 3; +pub const INT_TYPE_ID: u32 = 4; +pub const BOOL_TYPE_ID: u32 = 5; +pub const IO_TYPE_ID: u32 = 6; + +#[repr(C)] +pub struct VTable { + pub size: u32, + pub type_id: u32, + // ... functions (in the heap allocation) +} + +impl VTable { + #[cfg(test)] + pub unsafe fn dummy() -> *mut VTable { + Box::into_raw(Box::new(VTable { + size: 8, + type_id: NO_TYPE_TYPE_ID, + })) + } +} + +#[repr(C)] +pub struct Object { + pub size: u32, + pub type_id: u32, + pub vtable_ptr: NonNull, + // ... fields (in the heap allocation) +} + +impl Object { + pub fn fields(&self) -> u32 { + self.size.checked_sub(Self::HEADER_SIZE).unwrap() + } +} + +#[repr(C)] +pub struct String { + pub base: Object, + pub len: usize, + pub bytes: *mut u8, +} + +#[repr(C)] +pub struct Int { + pub base: Object, + pub value: i64, +} + +#[repr(C)] +pub struct Bool { + pub base: Object, + pub value: bool, +} + +#[repr(C)] +pub struct IO { + pub base: Object, +} + +impl Object { + /// size + type_id + vtable_ptr. + pub const HEADER_SIZE: u32 = 16; + + /// Allocates a new object with the given TOTAL size (includes all the + /// fields), type ID and vtable pointer. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `size` >= [`Self::HEADER_SIZE`] (16 bytes minimum) + /// - `size` is a multiple of 8 to maintain pointer alignment for trailing + /// fields + /// - `type_id` accurately represents the intended object type that will be + /// stored + /// - `vtable_ptr` points to a valid, properly initialized VTable that will + /// remain valid for the lifetime of this object + /// - `vtable_ptr` is properly aligned for VTable + /// - The caller will properly initialize any type-specific fields after + /// allocation + /// - The returned object will be deallocated using [`Object::deallocate`] + pub unsafe fn allocate( + size: u32, + type_id: u32, + vtable_ptr: NonNull, + ) -> NonNull { + assert!( + size >= Self::HEADER_SIZE, + "Size must be at least header size" + ); + assert!( + size % 8 == 0, + "Size must be 8-byte aligned for pointer fields" + ); + + let layout = Layout::from_size_align_unchecked(size as usize, 8); + let ptr = alloc(layout) as *mut Object; + + if ptr.is_null() { + panic!("Allocation failed"); + } + + // Initialize header + std::ptr::write( + ptr, + Object { + size, + type_id, + vtable_ptr, + }, + ); + + // Zero-initialize the trailing fields + let fields_size = size - Self::HEADER_SIZE; + if fields_size > 0 { + let fields_ptr = (ptr as *mut u8).add(Self::HEADER_SIZE as usize); + std::ptr::write_bytes(fields_ptr, 0, fields_size as usize); + } + + NonNull::new_unchecked(ptr) + } + + /// Deallocates an object. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `obj` was allocated using [`Object::allocate`] + /// - `obj` has not been deallocated before + /// - `obj.size` has not been corrupted since allocation + /// - No references to this object or its fields exist after this call + /// - The object is not accessed after deallocation + pub unsafe fn deallocate(obj: NonNull) { + let size = obj.as_ref().size; + let layout = Layout::from_size_align_unchecked(size as usize, 8); + dealloc(obj.as_ptr() as *mut u8, layout); + } + + /// Returns the trailing dynamic part as (field_count, fields_ptr). + /// + /// Assumes trailing data consists of pointer-sized fields. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was properly allocated using [`Object::allocate`] + /// - `self.size` has not been corrupted since allocation + /// - The object's memory layout has not been corrupted + /// - If `field_count > 0`, the trailing memory was intended as pointer + /// fields + /// - The returned pointer is only used while `self` remains valid + pub unsafe fn get_fields(&self) -> (usize, *mut *mut c_void) { + let trailing_fields = self.fields(); + let field_size = std::mem::size_of::<*mut c_void>() as u32; + let field_count = (trailing_fields / field_size) as usize; + + let fields_ptr = if field_count > 0 { + let base_ptr = (self as *const Object) as *const u8; + base_ptr.add(Self::HEADER_SIZE as usize) as *mut *mut c_void + } else { + std::ptr::null_mut() + }; + + (field_count, fields_ptr) + } + + /// Returns a specific trailing field by index. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was properly allocated using [`Object::allocate`] + /// - `self.size` and memory layout have not been corrupted + /// - `index < field_count` (as returned by [`Object::get_fields`]) + /// - The trailing memory was allocated and initialized as pointer fields + /// - The object remains valid for the duration of use + pub unsafe fn get_field(&self, index: usize) -> *mut c_void { + let (field_count, fields_ptr) = self.get_fields(); + assert!(index < field_count, "Field index out of bounds"); + *fields_ptr.add(index) + } + + /// Sets a specific trailing field by index. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was properly allocated using [`Object::allocate`] + /// - `self.size` and memory layout have not been corrupted + /// - `index < field_count` (as returned by [`Object::get_fields`]) + /// - The trailing memory was allocated as pointer fields + /// - `value` is a valid pointer or null (must not be a dangling pointer) + /// - If `value` is non-null, it points to valid memory that will remain + /// valid + /// - The object remains valid and won't be deallocated while field is in + /// use + pub unsafe fn set_field(&mut self, index: usize, value: *mut c_void) { + let (field_count, fields_ptr) = self.get_fields(); + assert!(index < field_count, "Field index out of bounds"); + *fields_ptr.add(index) = value; + } + + /// Casts to String. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was originally allocated as a [`String`] object + /// - The memory actually contains a valid [`String`] layout + /// - `self.type_id` accurately reflects that this is a String object + /// - The object remains valid for the lifetime of use + pub unsafe fn as_string(&self) -> *mut String { + self as *const Object as *mut String + } + + /// Casts to Int. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was originally allocated as an [`Int`] object + /// - The memory actually contains a valid [`Int`] layout + /// - `self.type_id` accurately reflects that this is an Int object + /// - The object remains valid for the lifetime of use + pub unsafe fn as_int(&self) -> *mut Int { + self as *const Object as *mut Int + } + + /// Casts to Bool + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was originally allocated as a [`Bool`] object + /// - The memory actually contains a valid [`Bool`] layout + /// - `self.type_id` accurately reflects that this is a Bool object + /// - The object remains valid for the lifetime of use + pub unsafe fn as_bool(&self) -> *mut Bool { + self as *const Object as *mut Bool + } + + /// Casts to IO. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `self` was originally allocated as an [`IO`] object + /// - The memory actually contains a valid [`IO`] layout + /// - `self.type_id` accurately reflects that this is an IO object + /// - The object remains valid for the lifetime of use + pub unsafe fn as_io(&self) -> *mut IO { + self as *const Object as *mut IO + } +} + +impl String { + /// # Safety + /// + /// Caller must ensure: + /// - `vtable_ptr` points to a valid `VTable` for strings that will remain + /// valid + /// - `bytes` is either null or points to at least `len` bytes of valid, + /// initialized memory + /// - If `bytes` is non-null, the memory will remain valid for the object's + /// lifetime + /// - `len` accurately represents the number of valid bytes at `bytes` + /// - The caller will properly manage the lifetime of both the object and + /// the `bytes` memory + pub unsafe fn new(vtable_ptr: NonNull, len: usize, bytes: *mut u8) -> NonNull { + let size = std::mem::size_of::() as u32; + let obj = Object::allocate(size, STRING_TYPE_ID, vtable_ptr); + + let string_ptr = obj.as_ptr() as *mut String; + (*string_ptr).len = len; + (*string_ptr).bytes = bytes; + + NonNull::new_unchecked(string_ptr) + } +} + +impl Int { + /// # Safety + /// + /// Caller must ensure: + /// - `vtable_ptr` points to a valid `VTable` for integers that will remain + /// valid + /// - The caller will properly manage the lifetime of the returned object + pub unsafe fn new(vtable_ptr: NonNull, value: i64) -> NonNull { + let size = std::mem::size_of::() as u32; + let obj = Object::allocate(size, INT_TYPE_ID, vtable_ptr); + + let int_ptr = obj.as_ptr() as *mut Int; + (*int_ptr).value = value; + + NonNull::new_unchecked(int_ptr) + } +} + +impl Bool { + /// # Safety + /// + /// Caller must ensure: + /// - `vtable_ptr` points to a valid `VTable` for booleans that will remain + /// valid + /// - The caller will properly manage the lifetime of the returned object + pub unsafe fn new(vtable_ptr: NonNull, value: bool) -> NonNull { + let size = std::mem::size_of::() as u32; + let obj = Object::allocate(size, BOOL_TYPE_ID, vtable_ptr); + + let bool_ptr = obj.as_ptr() as *mut Bool; + (*bool_ptr).value = value; + + NonNull::new_unchecked(bool_ptr) + } +} + +impl IO { + /// # Safety + /// + /// Caller must ensure: + /// - `vtable_ptr` points to a valid `VTable` for IO that will remain valid + /// - The caller will properly manage the lifetime of the returned object + pub unsafe fn new(vtable_ptr: NonNull) -> NonNull { + let size = std::mem::size_of::() as u32; + let obj = Object::allocate(size, IO_TYPE_ID, vtable_ptr); + + NonNull::new_unchecked(obj.as_ptr() as *mut IO) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::mem::{align_of, size_of}; + + #[test] + fn test_object_sizes() { + assert_eq!(std::mem::size_of::(), 16); + assert_eq!(Object::HEADER_SIZE, 16); + + assert_eq!(size_of::(), 32); + assert_eq!(size_of::(), 24); + assert_eq!(size_of::(), 24); + } + + #[test] + fn test_type_alignments() { + const _: () = { + assert!(align_of::() == 8); + + assert!(align_of::() <= 8); + assert!(align_of::() <= 8); + assert!(align_of::() <= 8); + }; + + assert!(align_of::() == 8); + assert!(align_of::() <= 8); + assert!(align_of::() <= 8); + assert!(align_of::() <= 8); + } + + #[test] + fn test_trailing_fields() { + unsafe { + let dummy_vtable = VTable::dummy(); + let vtable_ptr = NonNull::new_unchecked(dummy_vtable); + + // Create object with 2 trailing pointer fields (16 + 16 = 32 bytes) + let obj = Object::allocate(32, OBJECT_TYPE_ID, vtable_ptr); + let obj_ref = obj.as_ref(); + + let (field_count, _) = obj_ref.get_fields(); + assert_eq!(field_count, 2); + + Object::deallocate(obj); + drop(Box::from_raw(dummy_vtable)); + } + } + + #[test] + fn test_type_casting() { + unsafe { + let dummy_vtable = VTable::dummy(); + let vtable_ptr = NonNull::new_unchecked(dummy_vtable); + + // Create an Int + let int_obj = Int::new(vtable_ptr, 42); + let obj_ref = &int_obj.as_ref().base; + + // Test casting - now returns raw pointers + let int_ptr = obj_ref.as_int(); + assert_eq!((*int_ptr).value, 42); + + Object::deallocate(int_obj.cast()); + drop(Box::from_raw(dummy_vtable)); + } + } +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..51313f4 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "stable" +targets = ["x86_64-apple-darwin", "x86_64-unknown-linux-gnu"] diff --git a/src/ast.rs b/src/ast.rs index 0b11fc5..c8a43e4 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -59,6 +59,8 @@ pub trait Info { type Assignment: Clone + Debug; type Id: Clone + Debug; + type LetBinding: Clone + Debug; + type CaseArm: Clone + Debug; } /// Untyped AST. @@ -70,6 +72,8 @@ impl Info for Untyped { type Expr = (); type Assignment = (); type Id = (); + type LetBinding = (); + type CaseArm = (); } /// Typed AST. @@ -81,6 +85,8 @@ impl Info for Typed { type Expr = Type; type Assignment = Symbol; type Id = Symbol; + type LetBinding = Symbol; + type CaseArm = Symbol; } #[derive(Debug, Default)] @@ -101,12 +107,12 @@ pub struct Class { #[derive(Debug, Clone)] pub enum Feature { - Attribute(Binding), + Attribute(Attribute), Method(Method), } #[derive(Debug, Clone)] -pub struct Binding { +pub struct Attribute { pub name: Ident, pub ty: I::Ty, pub initializer: Option>, @@ -182,7 +188,7 @@ pub enum ExprKind { }, Let { /// Non empty list of bindings. - bindings: Vec>, + bindings: Vec>, body: Box>, }, Case { @@ -210,6 +216,14 @@ pub enum ExprKind { Dummy, } +#[derive(Debug, Clone)] +pub struct LetBinding { + pub name: Ident, + pub ty: I::Ty, + pub initializer: Option>, + pub info: I::LetBinding, +} + #[derive(Debug, Clone)] pub struct DispatchQualifier { pub expr: Box>, @@ -221,6 +235,7 @@ pub struct CaseArm { pub name: Ident, pub ty: I::Ty, pub body: Box>, + pub info: I::CaseArm, } #[derive(Copy, Clone, Debug)] @@ -299,7 +314,7 @@ pub mod desugar { use super::*; pub fn multi_binding_let( - bindings: Vec>, + bindings: Vec>, mut body: Box>, span: Span, info: &I::Expr, diff --git a/src/bin/main.rs b/src/bin/main.rs deleted file mode 100644 index fd681c2..0000000 --- a/src/bin/main.rs +++ /dev/null @@ -1,160 +0,0 @@ -use std::{ - env, - error::Error, - fs, - io::{self, Write}, -}; - -use cool::{ - parser, - token::{Spanned, Token}, - type_checker, - util::{ - self, - fmt::{tree::print_program, Show}, - intern::Interner, - }, -}; - -fn main() { - if let Err(error) = run() { - eprintln!("Error: {error}"); - std::process::exit(1); - } -} - -fn run() -> Result<(), Box> { - let mut args = env::args().skip(1); - let mut tokens_buf = Vec::with_capacity(8 * 1024); - let mut ident_interner = Interner::with_capacity(1024); - - // File mode - if let Some(prog_path) = args.next() { - let input = fs::read_to_string(prog_path)?; - pipeline(&input, &mut tokens_buf, &mut ident_interner); - return Ok(()); - } - - // Interactive REPL mode - println!("Welcome to interactive coolc."); - println!("Enter code, finish with empty line, or send Ctrl+D to exit."); - - let mut accumulated_input = String::new(); - - loop { - if accumulated_input.is_empty() { - print!("> "); - } else { - print!("| "); - } - io::stdout().flush()?; - - let mut current_line = String::new(); - let n = io::stdin().read_line(&mut current_line)?; - - if n == 0 { - println!(); - if accumulated_input.trim().is_empty() { - println!("^D"); - } else { - pipeline(&accumulated_input, &mut tokens_buf, &mut ident_interner); - } - return Ok(()); - } - - // Empty line is another termination signal - if current_line.trim().is_empty() { - if !accumulated_input.trim().is_empty() { - pipeline(&accumulated_input, &mut tokens_buf, &mut ident_interner); - accumulated_input.clear(); // Clear for next input - } - } else { - accumulated_input.push_str(¤t_line); - } - } -} - -fn pipeline(src: &str, tokens: &mut Vec, ident_interner: &mut Interner) { - tokens.clear(); - - let prog = match parser::parse_program(src, tokens, ident_interner) { - Ok(prog) => prog, - Err((prog, errors)) => { - eprintln!("Got {} errors", errors.len()); - eprintln!(); - eprintln!("Partial AST:"); - print_program(&mut io::stdout(), ident_interner, &prog).unwrap(); - eprintln!(); - for error in errors { - report_error(src, &error, ident_interner); - } - return; - } - }; - - println!("=== Untyped AST ==="); - print_program(&mut io::stdout(), ident_interner, &prog).unwrap(); - - let checker = type_checker::Checker::with_capacity(ident_interner, 512); - let (typed_prog, _registry) = match checker.check(prog) { - Ok(prog) => prog, - Err((_prog, _registry, errors)) => { - eprintln!("Got {} type errors", errors.len()); - for error in errors { - report_error(src, &error, ident_interner); - } - return; - } - }; - - println!(); - println!("=== Typed AST ==="); - print_program(&mut io::stdout(), ident_interner, &typed_prog).unwrap(); -} - -fn report_error(src: &str, error: &Spanned, ident_interner: &Interner) -where - Spanned: Show, -{ - let span = error.span; - - // Try to find line number and column - let mut line = 1; - let mut line_start = 0; - let mut column = 0; - - // Calculate the start position (line and column) - for (i, char) in src.char_indices() { - if i >= span.lo as usize { - column = i - line_start + 1; - break; - } - if char == '\n' { - line += 1; - line_start = i + 1; - } - } - // If span.lo is beyond the source length (e.g., EOF span at end) - if span.lo as usize >= src.len() && !src.is_empty() { - column = src.len() - line_start + 1; - } else if src.is_empty() { - column = 1; - } - - let ctx = util::fmt::Context { ident_interner }; - let error_display = error.display(&ctx); - eprintln!("Error (line {line}, col {column}): {error_display}"); - - if let Some(line_content) = src.lines().nth(line - 1) { - eprintln!("{line:>4} | {line_content}"); - // Add an indicator '^' under the approximate error location - let indicator_padding = column.saturating_sub(1); - let indicator_len = std::cmp::max(1, (span.hi - span.lo) as usize); - eprintln!( - "{:>4} | {}{}", - "", - " ".repeat(indicator_padding), - "^".repeat(indicator_len) - ); - } -} diff --git a/src/codegen/interface.rs b/src/codegen/interface.rs new file mode 100644 index 0000000..9f57a38 --- /dev/null +++ b/src/codegen/interface.rs @@ -0,0 +1,51 @@ +use std::io; + +use crate::{ + ast::{Program, Typed}, + codegen::{x86_64::Generator, x86_64_env}, + util::intern::Interner, +}; + +pub fn generate( + writer: W, + ident_interner: &Interner, + target: Target, + program: &Program, +) where + W: io::Write, +{ + type DarwinGenerator<'a, W> = Generator<'a, W, x86_64_env::Darwin>; + type LinuxGenerator<'a, W> = Generator<'a, W, x86_64_env::Linux>; + + match target { + Target::x86_64_darwin => DarwinGenerator::new(writer, ident_interner).generate(program), + Target::x86_64_linux => LinuxGenerator::new(writer, ident_interner).generate(program), + } +} + +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum Target { + x86_64_darwin, + x86_64_linux, +} + +impl Target { + pub const ALL: &[Target] = &[Target::x86_64_darwin, Target::x86_64_linux]; + + pub const fn triple(&self) -> &'static str { + match self { + Target::x86_64_darwin => "x86_64-apple-darwin", + Target::x86_64_linux => "x86_64-unknown-linux-gnu", + } + } +} + +impl std::fmt::Display for Target { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Target::x86_64_darwin => f.write_str("x86_64_darwin"), + Target::x86_64_linux => f.write_str("x86_64_linux"), + } + } +} diff --git a/src/codegen/x86_64.rs b/src/codegen/x86_64.rs new file mode 100644 index 0000000..f980aa1 --- /dev/null +++ b/src/codegen/x86_64.rs @@ -0,0 +1,218 @@ +use std::{format_args as f, io, marker::PhantomData}; + +use crate::{ + ast::{self, Expr, Typed}, + codegen::x86_64_env, + types::well_known, + util::intern::{Interned, Interner}, +}; + +pub struct Generator<'ident, W, E> { + writer: W, + ident_interner: &'ident Interner, + indent: bool, + _env: PhantomData, +} + +impl Generator<'_, W, E> +where + W: io::Write, + E: x86_64_env::Env, +{ + pub fn new(writer: W, ident_interner: &Interner) -> Generator<'_, W, E> { + Generator { + writer, + ident_interner, + indent: false, + _env: PhantomData, + } + } + + pub fn generate(mut self, program: &ast::Program) { + self.g_program_prologue(); + self.g_methods(program); + self.g_vtables(program); + self.g_data(program); + } +} + +/// Target-specific functions. +impl Generator<'_, W, E> +where + W: io::Write, + E: x86_64_env::Env, +{ + fn g_program_prologue(&mut self) { + self.out(E::GLOBAL_PROLOGUE); + } + + fn g_methods(&mut self, program: &ast::Program) { + self.out(f!(".section {}", E::SECTION_TEXT)); + + for class in &program.classes { + let class_name = self.ident(&class.name); + self.out(f!("# CLASS {class_name}\n")); + + let methods = class.features.iter().filter_map(|feature| { + if let ast::Feature::Method(method) = feature { + Some(method) + } else { + None + } + }); + + for method in methods { + self.g_method(method, class_name); + } + } + } + + fn g_method(&mut self, method: &ast::Method, class_name: ResolvedIdent) { + let method_name = self.ident(method.name); + let qualified = Self::qualified(class_name, method_name); + if qualified.is_main() { + self.out(f!(".global {qualified}")); + } + self.out(f!("{qualified}:")); + self.indented(|this| { + this.g_method_prologue(method); + this.g_expr(&method.body); + this.g_method_epilogue(method); + }); + } + + #[expect(unused_variables)] + fn g_expr(&mut self, e: &Expr) { + match &e.kind { + ast::ExprKind::Assignment { + target, + value, + info, + } => todo!(), + ast::ExprKind::Dispatch { + qualifier, + method, + args, + } => todo!(), + ast::ExprKind::Conditional { + predicate, + then_arm, + else_arm, + } => todo!(), + ast::ExprKind::While { predicate, body } => todo!(), + ast::ExprKind::Block { body } => todo!(), + ast::ExprKind::Let { bindings, body } => todo!(), + ast::ExprKind::Case { predicate, arms } => todo!(), + ast::ExprKind::New { ty } => todo!(), + ast::ExprKind::Unary { op, expr } => todo!(), + ast::ExprKind::Binary { op, lhs, rhs } => todo!(), + ast::ExprKind::Paren(expr) => todo!(), + ast::ExprKind::Id(ident, _) => todo!(), + ast::ExprKind::Int(int) => self.out(f!("mov rax, {int}")), + ast::ExprKind::String(_) => todo!(), + ast::ExprKind::Bool(_) => todo!(), + ast::ExprKind::Dummy => todo!(), + } + } + + fn g_method_prologue(&mut self, _method: &ast::Method) { + self.out("push rbp"); + self.out("mov rbp, rsp"); + } + + fn g_method_epilogue(&mut self, _method: &ast::Method) { + self.out("pop rbp"); + self.out("ret"); + } + + #[expect(clippy::unused_self)] + fn g_vtables(&mut self, _program: &ast::Program) {} + + #[expect(clippy::unused_self)] + fn g_data(&mut self, _program: &ast::Program) {} +} + +/// Utility functions. +impl<'ident, W, E> Generator<'ident, W, E> +where + W: io::Write, + E: x86_64_env::Env, +{ + /// Prints a line. + fn out(&mut self, f: impl std::fmt::Display) { + let indent = if self.indent { " " } else { "" }; + writeln!(self.writer, "{indent}{f}").expect("Failed to write to sink"); + } + + /// Prints an empty line. + fn out_line(&mut self) { + writeln!(self.writer).expect("Failed to write to sink"); + } + + /// Resolves an identifier, returning a resolved ident. + fn ident(&mut self, handle: impl Into>) -> ResolvedIdent<'ident> { + let ident = handle.into(); + let name = self.ident_interner.get(ident); + ResolvedIdent { ident, name } + } + + fn qualified<'i>( + class: ResolvedIdent<'i>, + method: ResolvedIdent<'i>, + ) -> QualifiedMethod<'i, E> { + QualifiedMethod { + class, + method, + _env: PhantomData, + } + } + + /// Writes in an indented block that is finished with an empty line. + fn indented(&mut self, f: impl FnOnce(&mut Self) -> T) -> T { + self.indent = true; + let res = f(self); + self.indent = false; + self.out_line(); + res + } +} + +#[derive(Copy, Clone)] +struct QualifiedMethod<'i, E> { + class: ResolvedIdent<'i>, + method: ResolvedIdent<'i>, + _env: PhantomData, +} + +impl QualifiedMethod<'_, E> { + fn is_main(&self) -> bool { + self.class.ident == well_known::MAIN && self.method.ident == well_known::MAIN_METHOD + } +} + +impl std::fmt::Display for QualifiedMethod<'_, E> +where + E: x86_64_env::Env, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.is_main() { + write!(f, "{}", E::ENTRY_POINT) + } else { + let class = self.class.name; + let method = self.method.name; + write!(f, ".{class}__{method}") + } + } +} + +#[derive(Copy, Clone)] +struct ResolvedIdent<'i> { + ident: Interned, + name: &'i str, +} + +impl std::fmt::Display for ResolvedIdent<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name) + } +} diff --git a/src/codegen/x86_64_env.rs b/src/codegen/x86_64_env.rs new file mode 100644 index 0000000..adf4dd9 --- /dev/null +++ b/src/codegen/x86_64_env.rs @@ -0,0 +1,36 @@ +pub trait Env { + const ENTRY_POINT: &str; + + const GLOBAL_PROLOGUE: &str; + + const SECTION_TEXT: &str; + const SECTION_READ_ONLY_DATA: &str; + const SECTION_READ_ONLY_RELOCATABLE_DATA: &str; +} + +impl Env for Darwin { + const ENTRY_POINT: &str = "_main"; + + const GLOBAL_PROLOGUE: &str = ".intel_syntax noprefix\n\n"; + + const SECTION_TEXT: &str = "__TEXT,__text,regular,pure_instructions"; + const SECTION_READ_ONLY_DATA: &str = "__TEXT,__const"; + const SECTION_READ_ONLY_RELOCATABLE_DATA: &str = "__DATA,__const"; +} + +impl Env for Linux { + const ENTRY_POINT: &str = "main"; + + const GLOBAL_PROLOGUE: &str = concat!( + ".intel_syntax noprefix\n", + ".section .note.GNU-stack,\"\",@progbits\n\n", + ); + + const SECTION_TEXT: &str = ".text"; + const SECTION_READ_ONLY_DATA: &str = ".rodata"; + const SECTION_READ_ONLY_RELOCATABLE_DATA: &str = ".data.rel.ro"; +} + +pub struct Darwin; + +pub struct Linux; diff --git a/src/lexer.rs b/src/lexer.rs index c14a710..86d13f4 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -194,7 +194,7 @@ impl Lexer<'_, '_> { match self.advance() { ')' => break, // finished closing comment '\0' => return TokenKind::ErrorUnclosedComment, - _ => continue, // sadly couldn't close it! keep scanning... + _ => (), // sadly couldn't close it! keep scanning... } } TokenKind::MultilineComment diff --git a/src/lib.rs b/src/lib.rs index 96787da..bb301aa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,15 @@ pub mod parser; /// and maps it into a typed AST. pub mod type_checker; +/// The code generator takes a typed AST and generates target machine code. +pub mod codegen { + mod interface; + pub use interface::{generate, Target}; + + mod x86_64; + mod x86_64_env; +} + pub mod ast; pub mod token; pub mod types; diff --git a/src/parser.rs b/src/parser.rs index 8560b61..3aadee2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ use crate::{ ast::{ - BinaryOperator, Binding, CaseArm, Class, DispatchQualifier, Expr, ExprKind, Feature, - Formal, Ident, Method, Program, TypeName, UnaryOperator, Untyped, + Attribute, BinaryOperator, CaseArm, Class, DispatchQualifier, Expr, ExprKind, Feature, + Formal, Ident, LetBinding, Method, Program, TypeName, UnaryOperator, Untyped, }, lexer::{self, extract}, token::{Span, Spanned, Token, TokenKind}, @@ -134,7 +134,7 @@ impl Parser<'_, '_, '_> { TokenKind::Colon => { let ty = self.parse_type()?; let initializer = self.parse_initializer()?; - Ok(Feature::Attribute(Binding { + Ok(Feature::Attribute(Attribute { name, ty, initializer, @@ -614,16 +614,17 @@ impl Parser<'_, '_, '_> { Some(bp) } - fn parse_let_binding(&mut self) -> Result> { + fn parse_let_binding(&mut self) -> Result> { let name = self.parse_ident()?; self.consume(TokenKind::Colon)?; let ty = self.parse_type()?; let initializer = self.parse_initializer()?; - Ok(Binding { + Ok(LetBinding { name, ty, initializer, + info: (), }) } @@ -640,6 +641,7 @@ impl Parser<'_, '_, '_> { name, ty, body: Box::new(body), + info: (), }; Ok((arm, span)) } diff --git a/src/type_checker.rs b/src/type_checker.rs index 2844267..d378e81 100644 --- a/src/type_checker.rs +++ b/src/type_checker.rs @@ -26,10 +26,25 @@ pub struct Checker<'ident> { current_class: Interned, errors: Vec>, ident_interner: &'ident mut Interner, + found_main: bool, +} + +pub mod flags { + /// Sane behavior for common user workloads. + pub const DEFAULT: u32 = 0; + + /// Disables entry point check. Useful for testing. + pub const SKIP_ENTRY_POINT_CHECK: u32 = 1 << 0; } impl Checker<'_> { - pub fn with_capacity(ident_interner: &mut Interner, capacity: usize) -> Checker<'_> { + pub fn with_capacity( + ident_interner: &mut Interner, + capacity: usize, + checker_flags: u32, + ) -> Checker<'_> { + let skip_entrypoint_check = (checker_flags & flags::SKIP_ENTRY_POINT_CHECK) == 1; + Checker { registry: TypeRegistry::with_capacity(capacity), classes: HashMap::with_capacity(0), @@ -37,6 +52,7 @@ impl Checker<'_> { current_class: builtins::NO_TYPE, errors: Vec::with_capacity(8), ident_interner, + found_main: skip_entrypoint_check, } } @@ -56,6 +72,11 @@ impl Checker<'_> { .collect(); let program = Program { classes }; + if !self.found_main { + let error = Error::MissingEntryPoint; + self.errors.push(Span::new_of_length(0, 0).wrap(error)); + } + if self.errors.is_empty() { Ok((program, self.registry)) } else { @@ -83,7 +104,7 @@ impl Checker<'_> { let attributes: Vec<_> = this.scoped_formals(current_class.clone(), &[], |this| { attributes .into_iter() - .map(|binding| this.check_binding(binding)) + .map(|binding| this.check_attribute(binding)) .collect() }); @@ -104,14 +125,14 @@ impl Checker<'_> { }) } - fn check_binding(&mut self, binding: ast::Binding) -> ast::Binding { + fn check_attribute(&mut self, binding: ast::Attribute) -> ast::Attribute { let ty = self.get_type_allowing_self_type(binding.ty); let initializer = binding.initializer.map(|expr| { let expr = self.check_expr(expr); self.assert_is_subtype(expr.ty(), &ty, expr.span); expr }); - ast::Binding { + ast::Attribute { name: binding.name, ty, initializer, @@ -136,6 +157,19 @@ impl Checker<'_> { }); let return_ty = self.get_type_allowing_self_type(method.return_ty); self.assert_is_subtype(body.ty(), &return_ty, body.span); + + let name = (self.current_class, method.name.name); + // Usually, checks such as `and !self.found_main` aren't necessary. + // However, the type checker's caller may define a flag to disable the + // main check. In this case, we set `found_main` as true in the + // constructor. Hence, this check is necessary to avoid running the + // return type in the check if the main was found *or* if it was + // bypassed by such a flag. + if name == (well_known::MAIN, well_known::MAIN_METHOD) && !self.found_main { + self.found_main = true; + self.assert_is_type(&return_ty, builtins::INT, return_ty.span()); + } + ast::Method { name: method.name, formals, @@ -314,24 +348,29 @@ impl Checker<'_> { )); } - let binding = { - assert_eq!(bindings.len(), 1); - let binding = bindings.remove(0); // untyped - let ty = self.get_type_allowing_self_type(binding.ty); - ast::Binding { - name: binding.name, - initializer: binding.initializer.map(|i| { - let expr = self.check_expr(i); - self.assert_is_subtype(expr.ty(), &ty, expr.span); - expr - }), - ty, - } - }; - let body = self.scoped_local(binding.name.name, binding.ty.clone(), |this| { - this.check_expr(*body) + assert_eq!(bindings.len(), 1); + let binding = bindings.remove(0); + + let binding_name = binding.name; + let binding_ty = self.get_type_allowing_self_type(binding.ty); + let binding_initializer = binding.initializer.map(|i| { + let expr = self.check_expr(i); + self.assert_is_subtype(expr.ty(), &binding_ty, expr.span); + expr }); - let bindings = vec![binding]; + + let (symbol, body) = + self.scoped_local(binding_name.name, binding_ty.clone(), |this| { + this.check_expr(*body) + }); + + let bindings = vec![ast::LetBinding { + name: binding_name, + ty: binding_ty, + initializer: binding_initializer, + info: symbol, + }]; + let ty = body.ty().clone(); let body = Box::new(body); (ExprKind::Let { bindings, body }, ty) @@ -350,7 +389,7 @@ impl Checker<'_> { } seen.insert(ty.name()); - let body = self.scoped_local(arm.name.name, ty.clone(), |this| { + let (symbol, body) = self.scoped_local(arm.name.name, ty.clone(), |this| { this.check_expr(*arm.body) }); @@ -360,6 +399,7 @@ impl Checker<'_> { name: arm.name, ty, body: Box::new(body), + info: symbol, } }) .collect(); @@ -804,6 +844,7 @@ impl Checker<'_> { #[derive(Copy, Clone, Debug)] pub enum Error { + MissingEntryPoint, DuplicateTypeDefinition { name: Interned, other: Span, @@ -985,14 +1026,20 @@ impl Checker<'_> { name: Interned, ty: Type, f: impl FnOnce(&mut Self) -> T, - ) -> T { + ) -> (Symbol, T) { let id = LocalId(self.symbol_table.locals); - self.symbol_table.scopes.push(Scope::Local(name, ty, id)); + self.symbol_table + .scopes + .push(Scope::Local(name, ty.clone(), id)); + let symbol = Symbol { + ty, + binding: Binding::Local(id), + }; self.symbol_table.locals += 1; let res = f(self); self.symbol_table.scopes.pop().expect("just pushed"); - res + (symbol, res) } fn scoped_formals( @@ -1096,7 +1143,7 @@ mod tests { use crate::{ parser::test_utils::parse_program, - type_checker::{Checker, ClassesEnv}, + type_checker::{flags, Checker, ClassesEnv}, util::{ intern::Interner, test_utils::{assert_errors, tree_tests}, @@ -1905,7 +1952,7 @@ mod tests { class Block inherits Entity {}; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); assert!(checker.errors.is_empty()); assert_eq!( @@ -1935,7 +1982,7 @@ mod tests { class Object {}; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); assert_errors( checker.ident_interner, @@ -1954,7 +2001,7 @@ mod tests { class Entity inherits UndefinedClass {}; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); assert_errors( checker.ident_interner, @@ -1978,7 +2025,7 @@ mod tests { }; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); checker.build_classes_env(&prog); assert!(checker.errors.is_empty()); @@ -2037,7 +2084,7 @@ mod tests { }; ", ); - let mut checker = Checker::with_capacity(&mut i, 16); + let mut checker = Checker::with_capacity(&mut i, 16, flags::SKIP_ENTRY_POINT_CHECK); checker.build_type_registry(&prog); checker.build_classes_env(&prog); assert!(checker.errors.is_empty()); diff --git a/src/types.rs b/src/types.rs index 795a346..b1e50c9 100644 --- a/src/types.rs +++ b/src/types.rs @@ -107,7 +107,7 @@ impl Type { pub fn is_subtype_of(&self, other: &Self) -> bool { if self.name() == builtins::NO_TYPE { return true; - }; + } let mut curr = self; loop { if curr == other { @@ -222,8 +222,10 @@ pub mod builtins { class IO("IO", 6) inherits OBJECT { out_string(x: String) : SELF_TYPE; out_int(x: Int) : SELF_TYPE; + in_string(): String; in_int(): Int; + exit(status: Int) : ""; }; } diff --git a/src/util/fmt/error.rs b/src/util/fmt/error.rs index a46c5eb..0781c4d 100644 --- a/src/util/fmt/error.rs +++ b/src/util/fmt/error.rs @@ -18,6 +18,10 @@ impl Show for Spanned { use type_checker::Error::*; match error { + MissingEntryPoint => write!( + f, + "program has no entrypoint (`main` method in `Main` class)" + ), DuplicateTypeDefinition { name, other } => { let name = i.get(name); write!(f, "class {name} already defined at {other}") diff --git a/src/util/fmt/tree.rs b/src/util/fmt/tree.rs index 48c0c22..575c270 100644 --- a/src/util/fmt/tree.rs +++ b/src/util/fmt/tree.rs @@ -55,7 +55,7 @@ fn print_feature( Feature::Attribute(binding) => { sp(w, i)?; write!(w, "attribute ")?; - print_binding(w, idents, i, binding)?; + print_attribute(w, idents, i, binding)?; } Feature::Method(Method { name, @@ -84,11 +84,11 @@ fn print_feature( Ok(()) } -fn print_binding( +fn print_attribute( w: &mut impl Write, idents: &Interner, i: usize, - binding: &Binding, + binding: &Attribute, ) -> std::io::Result<()> { write!( w, @@ -180,7 +180,7 @@ pub fn print_expr( for binding in bindings { sp(w, i + 1)?; write!(w, "binding ")?; - print_binding(w, idents, i + 1, binding)?; + print_let_binding(w, idents, i + 1, binding)?; } sp(w, i + 1)?; writeln!(w, "in")?; @@ -233,6 +233,28 @@ pub fn print_expr( Ok(()) } +fn print_let_binding( + w: &mut impl Write, + idents: &Interner, + i: usize, + binding: &LetBinding, +) -> std::io::Result<()> { + write!( + w, + "{}: {}", + idents.get(binding.name), + binding.ty.write(idents), + )?; + if let Some(ref initializer) = binding.initializer { + write!(w, " (initialized)")?; + writeln!(w)?; + print_expr(w, idents, i + 1, initializer)?; + } else { + writeln!(w)?; + } + Ok(()) +} + fn print_case_arm( w: &mut impl Write, idents: &Interner, diff --git a/src/util/test_utils.rs b/src/util/test_utils.rs index 4929bce..44408f5 100644 --- a/src/util/test_utils.rs +++ b/src/util/test_utils.rs @@ -1,7 +1,7 @@ use crate::{ ast, parser, token::Spanned, - type_checker::Checker, + type_checker::{self, Checker}, util::{ self, fmt::{tree, Show}, @@ -71,7 +71,8 @@ pub fn run_pipeline(test: Test) -> (String, Vec) { }; let mut fmt_errors = format_errors(interner, &errors); - let checker = Checker::with_capacity(interner, 128); + let flags = type_checker::flags::SKIP_ENTRY_POINT_CHECK; + let checker = Checker::with_capacity(interner, 128, flags); let (prog, errors) = match checker.check(prog) { Ok((prog, _reg)) => (prog, vec![]), Err((prog, _reg, errors)) => (prog, errors), @@ -89,7 +90,8 @@ pub fn run_pipeline(test: Test) -> (String, Vec) { let prog = ast::test_utils::from_expr_to_main_program(expr); let mut fmt_errors = format_errors(interner, &errors); - let checker = Checker::with_capacity(interner, 128); + let flags = type_checker::flags::SKIP_ENTRY_POINT_CHECK; + let checker = Checker::with_capacity(interner, 128, flags); let (prog, errors) = match checker.check(prog) { Ok((prog, _reg)) => (prog, vec![]), Err((prog, _reg, errors)) => (prog, errors),