diff --git a/crates/wasmparser/src/readers/core/custom.rs b/crates/wasmparser/src/readers/core/custom.rs index b682f016f0..33410cfe16 100644 --- a/crates/wasmparser/src/readers/core/custom.rs +++ b/crates/wasmparser/src/readers/core/custom.rs @@ -103,7 +103,7 @@ impl<'a> CustomSectionReader<'a> { /// Return value of [`CustomSectionReader::as_known`]. /// /// Note that this is `#[non_exhaustive]` because depending on crate features -/// this enumeration will different entries. +/// this enumeration will contain different entries. #[allow(missing_docs)] #[non_exhaustive] pub enum KnownCustom<'a> { diff --git a/crates/wit-component/src/encoding.rs b/crates/wit-component/src/encoding.rs index 92f04bdad9..fcf5d06733 100644 --- a/crates/wit-component/src/encoding.rs +++ b/crates/wit-component/src/encoding.rs @@ -81,6 +81,8 @@ use std::collections::HashMap; use std::hash::Hash; use std::mem; use wasm_encoder::*; +use wasmparser::KnownCustom::Producers; +use wasmparser::Payload::CustomSection; use wasmparser::{Validator, WasmFeatures}; use wit_parser::{ Function, FunctionKind, InterfaceId, LiveTypes, Resolve, Stability, Type, TypeDefKind, TypeId, @@ -2670,11 +2672,34 @@ pub(super) struct Adapter { library_info: Option, } +/// Returns whether the module appears to have been generated by TinyGo. Absent +/// any evidence, returns false. +fn is_produced_by_tiny_go(module: &[u8]) -> Result { + for section in wasmparser::Parser::new(0).parse_all(module) { + if let CustomSection(custom_reader) = section? { + if let Producers(producers_reader) = custom_reader.as_known() { + for field in producers_reader { + let field = field?; + if field.name == "processed-by" { + for value in field.values.into_iter() { + if value?.name == "TinyGo" { + return Ok(true); + } + } + } + } + return Ok(false); + } + } + } + return Ok(false); +} /// An encoder of components based on `wit` interface definitions. #[derive(Default)] pub struct ComponentEncoder { module: Vec, module_import_map: Option, + module_is_produced_by_tiny_go: bool, pub(super) metadata: Bindgen, validate: bool, pub(super) main_module_exports: IndexSet, @@ -2693,6 +2718,7 @@ impl ComponentEncoder { /// core module. pub fn module(mut self, module: &[u8]) -> Result { let (wasm, metadata) = self.decode(module.as_ref())?; + self.module_is_produced_by_tiny_go = is_produced_by_tiny_go(&module)?; let (wasm, module_import_map) = ModuleImportMap::new(wasm)?; let exports = self .merge_metadata(metadata) diff --git a/crates/wit-component/src/encoding/world.rs b/crates/wit-component/src/encoding/world.rs index fbfe7df842..6c57f6e190 100644 --- a/crates/wit-component/src/encoding/world.rs +++ b/crates/wit-component/src/encoding/world.rs @@ -1,11 +1,13 @@ use super::{Adapter, ComponentEncoder, LibraryInfo, RequiredOptions}; +use crate::gc::ReallocScheme; use crate::validation::{ Import, ImportMap, ValidatedModule, validate_adapter_module, validate_module, }; -use anyhow::{Context, Result}; +use anyhow::{Context, Result, bail}; use indexmap::{IndexMap, IndexSet}; use std::borrow::Cow; use std::collections::{HashMap, HashSet}; +use wasmparser::{FuncType, ValType}; use wit_parser::{ Function, InterfaceId, LiveTypes, Resolve, TypeDefKind, TypeId, TypeOwner, WorldId, WorldItem, WorldKey, @@ -24,7 +26,7 @@ pub struct WorldAdapter<'a> { /// `EncodingState` as this information doesn't change throughout the encoding /// process. pub struct ComponentWorld<'a> { - /// Encoder configuration with modules, the document ,etc. + /// Encoder configuration with modules, the document, etc. pub encoder: &'a ComponentEncoder, /// Validation information of the input module, or `None` in `--types-only` /// mode. @@ -83,6 +85,34 @@ impl<'a> ComponentWorld<'a> { Ok(ret) } + /// Given that there is no realloc function exported from the main module, + /// returns the realloc scheme we should use. + /// + /// Return an error if we need to use an exported malloc() and it wasn't + /// there. + fn fallback_realloc_scheme(&self) -> Result> { + if self.encoder.module_is_produced_by_tiny_go { + // If it appears the module was emitted by TinyGo, we delegate to + // its `malloc()` function. (TinyGo assumes its GC has rein over the + // whole memory and quickly overwrites the adapter's + // `memory.grow`-allocated State struct, causing a crash. So we use + // `malloc()` to inform TinyGo's GC of the memory we use.) + let malloc_type = FuncType::new([ValType::I32], [ValType::I32]); + match self.info.exports.get_func_type("malloc") { + Some(func_type) if *func_type == malloc_type => Ok(ReallocScheme::Malloc("malloc")), + Some(_) => bail!( + "TinyGo-derived wasm had a malloc() export, but it lacked the expected type of malloc(i32) -> i32" + ), + None => bail!( + "TinyGo-derived wasm lacked a malloc() export; we don't know how else to reserve space for the adapter's state from its GC" + ), + } + } else { + // If it's not TinyGo, use `memory.grow` instead. + Ok(ReallocScheme::MemoryGrow) + } + } + /// Process adapters which are required here. Iterate over all /// adapters and figure out what functions are required from the /// adapter itself, either because the functions are imported by the @@ -121,8 +151,8 @@ impl<'a> ComponentWorld<'a> { } else { // Without `library_info` this means that this is an adapter. // The goal of the adapter is to provide a suite of symbols that - // can be imported, but not all symbols may be imported. Here - // the module is trimmed down to only what's needed by the + // can be imported, but perhaps not all symbols are imported. + // Here the module is trimmed down to only what's needed by the // original main module. // // The main module requires `required_by_import` above, but @@ -163,17 +193,19 @@ impl<'a> ComponentWorld<'a> { required.insert(name.to_string()); } + let realloc = if self.encoder.realloc_via_memory_grow { + // User explicitly requested memory-grow-based realloc. We + // give them that unless it would definitely crash. + self.fallback_realloc_scheme()? + } else { + match self.info.exports.realloc_to_import_into_adapter() { + Some(name) => ReallocScheme::Realloc(name), + None => self.fallback_realloc_scheme()?, + } + }; Cow::Owned( - crate::gc::run( - wasm, - &required, - if self.encoder.realloc_via_memory_grow { - None - } else { - self.info.exports.realloc_to_import_into_adapter() - }, - ) - .context("failed to reduce input adapter module to its minimal size")?, + crate::gc::run(wasm, &required, realloc) + .context("failed to reduce input adapter module to its minimal size")?, ) }; let info = validate_adapter_module( diff --git a/crates/wit-component/src/gc.rs b/crates/wit-component/src/gc.rs index 71a58e4d26..ba43375180 100644 --- a/crates/wit-component/src/gc.rs +++ b/crates/wit-component/src/gc.rs @@ -13,7 +13,22 @@ use wasmparser::*; const PAGE_SIZE: i32 = 64 * 1024; -/// This function will reduce the input core `wasm` module to only the set of +/// The primitive allocation method we use to implement a realloc function, +/// based on what the main module provides +pub enum ReallocScheme<'a> { + /// The main module exports a ready-made realloc function with the given + /// name. + Realloc(&'a str), + /// The main module exports a `malloc(size: u32) -> u32` function with the + /// given name. We can wrap this in a thin shell to make realloc. + Malloc(&'a str), + /// The main module exports no realloc function or anything to make one out + /// of, so we should implement realloc in terms of `memory.grow`. + MemoryGrow, +} +use ReallocScheme::*; + +/// This function will reduce the input core `wasm` module (the adapter, I believe) to only the set of /// exports `required`. /// /// This internally performs a "gc" pass after removing exports to ensure that @@ -21,7 +36,7 @@ const PAGE_SIZE: i32 = 64 * 1024; pub fn run( wasm: &[u8], required: &IndexSet, - main_module_realloc: Option<&str>, + realloc_scheme: ReallocScheme, ) -> Result> { assert!(!required.is_empty()); @@ -46,7 +61,7 @@ pub fn run( } assert!(!module.exports.is_empty()); module.liveness()?; - module.encode(main_module_realloc) + module.encode(realloc_scheme) } /// This function generates a Wasm function body which implements `cabi_realloc` in terms of `memory.grow`. It @@ -500,9 +515,21 @@ impl<'a> Module<'a> { live_iter(&self.live_tables, self.tables.iter()) } + /// Returns a new Wasm function body which implements `cabi_realloc` to call + /// a `malloc` that's exported from the main module. Pass in the index of + /// the `malloc` function within the adapter. + fn realloc_via_malloc(&self, malloc_index: u32) -> Result { + let mut func = wasm_encoder::Function::new([]); + func.instructions() + .local_get(3) // desired new size + .call(malloc_index) + .end(); + Ok(func) + } + /// Encodes this `Module` to a new wasm module which is gc'd and only /// contains the items that are live as calculated by the `liveness` pass. - fn encode(&mut self, main_module_realloc: Option<&str>) -> Result> { + fn encode(&mut self, realloc_scheme: ReallocScheme) -> Result> { // Data structure used to track the mapping of old index to new index // for all live items. let mut map = Encoder::default(); @@ -584,11 +611,38 @@ impl<'a> Module<'a> { let is_realloc = |m, n| m == "__main_module__" && matches!(n, "canonical_abi_realloc" | "cabi_realloc"); + let add_malloc_type = |types: &mut wasm_encoder::TypeSection| { + let type_index = types.len(); + types + .ty() + .function([wasm_encoder::ValType::I32], [wasm_encoder::ValType::I32]); + type_index + }; + + let mut malloc_index = None; + let mut func_names = Vec::new(); + + // Import malloc before we start keeping track of the realloc_index so + // we don't have to add num_func_imports every time we read it. + if let Malloc(malloc_name) = realloc_scheme { + imports.import( + "__main_module__", + malloc_name, + EntityType::Function(add_malloc_type(&mut types)), + ); + malloc_index = Some(num_func_imports); + map.funcs.reserve(); + func_names.push((num_func_imports, malloc_name)); + num_func_imports += 1; + } + let (imported, local) = self.live_funcs() .partition::, _>(|(_, func)| match &func.def { Definition::Import(m, n) => { - !is_realloc(*m, *n) || main_module_realloc.is_some() + // Keep realloc function around iff we're going to use + // it. Always keep other functions around. + !is_realloc(*m, *n) || matches!(realloc_scheme, Realloc(_)) } Definition::Local(_) => false, }); @@ -603,7 +657,10 @@ impl<'a> Module<'a> { // exports that function, but possibly using a different name // (e.g. `canonical_abi_realloc`). Update the name to match if necessary. realloc_index = Some(num_func_imports); - main_module_realloc.unwrap_or(n) + match realloc_scheme { + Realloc(name) => name, + _ => n, + } } else { n }; @@ -637,22 +694,17 @@ impl<'a> Module<'a> { let sp = self.find_mut_i32_global("__stack_pointer")?; let allocation_state = self.find_mut_i32_global("allocation_state")?; - let mut func_names = Vec::new(); - - if let (Some(realloc), Some(_), None) = (main_module_realloc, sp, realloc_index) { + if let (Realloc(realloc_name), Some(_), None) = (&realloc_scheme, sp, realloc_index) { // The main module exports a realloc function, and although the adapter doesn't import it, we're going // to add a function which calls it to allocate some stack space, so let's add an import now. - - // Tell the function remapper we're reserving a slot for our extra import: - map.funcs.next += 1; - + map.funcs.reserve(); realloc_index = Some(num_func_imports); imports.import( "__main_module__", - realloc, + realloc_name, EntityType::Function(add_realloc_type(&mut types)), ); - func_names.push((num_func_imports, realloc)); + func_names.push((num_func_imports, realloc_name)); num_func_imports += 1; } @@ -665,7 +717,16 @@ impl<'a> Module<'a> { // exporting it. In this case, we need to define a local function it can call instead. realloc_index = Some(num_func_imports + funcs.len()); funcs.function(ty); - code.function(&realloc_via_memory_grow()); + let realloc_func = match realloc_scheme { + Malloc(_) => self.realloc_via_malloc( + malloc_index.expect("this was set above when the enum was Malloc"), + )?, + MemoryGrow => realloc_via_memory_grow(), + Realloc(_) => bail!( + "shouldn't get here, as we already know the main module doesn't export a realloc function" + ), + }; + code.function(&realloc_func); } Definition::Local(_) => { funcs.function(ty); @@ -673,22 +734,18 @@ impl<'a> Module<'a> { } } - let lazy_stack_init_index = - if sp.is_some() && allocation_state.is_some() && main_module_realloc.is_some() { - // We have a stack pointer, a `cabi_realloc` function from the main module, and a global variable for + let lazy_stack_init_index = match (&realloc_scheme, sp, allocation_state) { + (Realloc(_), Some(_), Some(_)) => { + // We have a `cabi_realloc` function from the main module, a stack pointer, and a global variable for // keeping track of (and short-circuiting) reentrance. That means we can (and should) do lazy stack // allocation. let index = num_func_imports + funcs.len(); - - // Tell the function remapper we're reserving a slot for our extra function: - map.funcs.next += 1; - + map.funcs.reserve(); funcs.function(add_empty_type(&mut types)); - Some(index) - } else { - None - }; + } + _ => None, + }; let exported_funcs = self .exports @@ -733,10 +790,16 @@ impl<'a> Module<'a> { if sp.is_some() && (realloc_index.is_none() || allocation_state.is_none()) { // Either the main module does _not_ export a realloc function, or it is not safe to use for stack - // allocation because we have no way to short-circuit reentrance, so we'll use `memory.grow` instead. + // allocation because we have no way to short-circuit reentrance, so we'll provide our own realloc + // function instead. realloc_index = Some(num_func_imports + funcs.len()); funcs.function(add_realloc_type(&mut types)); - code.function(&realloc_via_memory_grow()); + let realloc_func = if let Some(index) = malloc_index { + self.realloc_via_malloc(index)? + } else { + realloc_via_memory_grow() + }; + code.function(&realloc_func); } // Inject a start function to initialize the stack pointer which will be local to this module. This only @@ -879,11 +942,14 @@ impl<'a> Module<'a> { section.push(code); subsection.encode(&mut section); }; - if let (Some(realloc_index), true) = ( - realloc_index, - main_module_realloc.is_none() || allocation_state.is_none(), - ) { - func_names.push((realloc_index, "realloc_via_memory_grow")); + if let Some(realloc_index) = realloc_index { + match (realloc_scheme, allocation_state) { + (MemoryGrow, _) | (_, None) => { + func_names.push((realloc_index, "realloc_via_memory_grow")) + } + (Malloc(_), _) => func_names.push((realloc_index, "realloc_via_malloc")), + (Realloc(_), _) => (), // The realloc routine is in another module. + } } if let Some(lazy_stack_init_index) = lazy_stack_init_index { func_names.push((lazy_stack_init_index, "allocate_stack")); @@ -1118,6 +1184,14 @@ impl Remap { self.next += 1; } + /// Reserves the next "new index" for an item you are adding. + /// + /// For example, call this when you add a new function to a module and need + /// to avoid other functions getting remapped to its index. + fn reserve(&mut self) { + self.next += 1; + } + /// Returns the new index corresponding to an old index. /// /// Panics if the `old` index was not added via `push` above. diff --git a/crates/wit-component/src/validation.rs b/crates/wit-component/src/validation.rs index 435c5dbd2c..e64d646184 100644 --- a/crates/wit-component/src/validation.rs +++ b/crates/wit-component/src/validation.rs @@ -1103,6 +1103,12 @@ impl ExportMap { Ok(()) } + /// Returns the type of the given exported function, None if an exported + /// function of that name does not exist. + pub(crate) fn get_func_type<'a>(&'a self, name: &str) -> Option<&'a FuncType> { + self.raw_exports.get(name) + } + fn classify( &mut self, export: wasmparser::Export<'_>,