From 34bc8bae22bd8ac0615d765faa5bc054fd3c21b4 Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Wed, 13 Aug 2025 10:47:36 -0400 Subject: [PATCH 1/4] Port to target powerpc64. --- src/arch/mod.rs | 3 + src/arch/powerpc64.rs | 699 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/stack/unix.rs | 17 +- src/stack/valgrind.rs | 8 + src/tests/coroutine.rs | 26 +- src/tests/on_stack.rs | 10 +- 7 files changed, 760 insertions(+), 4 deletions(-) create mode 100644 src/arch/powerpc64.rs diff --git a/src/arch/mod.rs b/src/arch/mod.rs index a5c7d56b..750afc8e 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -164,6 +164,9 @@ cfg_if::cfg_if! { } else if #[cfg(all(target_arch = "loongarch64", not(windows)))] { mod loongarch64; pub use self::loongarch64::*; + } else if #[cfg(all(target_arch = "powerpc64", not(windows)))] { + mod powerpc64; + pub use self::powerpc64::*; } else { compile_error!("Unsupported target"); } diff --git a/src/arch/powerpc64.rs b/src/arch/powerpc64.rs new file mode 100644 index 00000000..57d5bd9e --- /dev/null +++ b/src/arch/powerpc64.rs @@ -0,0 +1,699 @@ +//! Low-level powerpc64le support. +//! +//! This file contains the low level operations that deal with switching between +//! stacks. +//! +//! The core operations are: +//! - `init_stack` to initialize a stack for the first resume. +//! - `switch_and_link` to switch control into a coroutine. +//! - `switch_yield` to return control from a coroutine back to its parent. +//! - `switch_and_reset` to return control for the last time. +//! +//! ## Linked stacks +//! +//! Stack linking allows a context switch to be automatically performed when the +//! initial function of a context returns or unwinds. This works by stashing a +//! copy of the parent (the routine resumes/invokes a coroutine) context stack +//! pointer near the stack base and updating it every time we switch into the +//! the child (the coroutine) context using `switch_and_link`. +//! +//! For unwinding and backtraces to work as expected (that is, to continue in +//! the parent after unwinding past the initial function of a child context), +//! we need to use special DWARF CFI instructions to tell the unwinder how to +//! find the parent frame. +//! +//! If you're curious a decent introduction to CFI things and unwinding is at +//! . +//! +//! ## Frame pointers +//! +//! Some tools or OSes do not use DWARF for stack unwinding, prefering to use +//! the older (but simpler) frame pointer chain to capture a backtrace. This is +//! particularly common in performance profiling tools such as Linux's perf +//! callgraph profiler. These work by following a linked list of frame records +//! starting from the FP register. Each record consists of 2 words: a pointer +//! to the previous frame (aka the previous FP value) and the return address +//! for this frame (aka the saved LR value). +//! +//! To support these tools, we also generate a valid stack frame record when +//! switching into a coroutine. This works by treating the parent link at the +//! root of the stack as a frame record which points to the top of the parent +//! stack. The top of the parent stack contains the saved FP and LR values in +//! the correct format for a frame record, which allows unwinding to continue on +//! the parent stack. +//! +//! The LR value associated with the parent link is invalid since it points to +//! the start of the initial function, but this shouldn't block the unwinding +//! process. +//! +//! ## Stack layout +//! +//! Note: Non-volatile registers R29, R30, and R31 need to be explicitly +//! saved/restored on our stack because they are used internally by LLVM and +//! cannot be used as operands for inline asm (e.g., `inlate()`). +//! +//! Here is what the layout of the stack looks like when a coroutine is +//! suspended. +//! +//! ```text +//! +//! +--------------+ <- Stack base +//! | Initial func | <- Only used once when resuming for the first time. +//! +--------------+ +//! | Parent link | <- The Yielder is a pointer to this address. When the +//! +--------------+ coroutine is running, it points to the top of the +//! | | parent stack which contains a saved R29, R30, R31, +//! ~ ... ~ and LR just like a suspened coroutine. +//! | | +//! +--------------+ +//! | Saved R31 | +//! +--------------+ +//! | Saved R30 | +//! +--------------+ +//! | Saved R29 | +//! +--------------+ +//! | Saved LR | +//! +--------------+ +//! | Link area | <- 32-byte link area +//! +--------------+ +//! ``` +//! +//! And this is the layout of the parent stack when a coroutine is running: +//! +//! ```text +//! +--------------+ +//! | ... | +//! | saved LR | <- 32-byte link area. +//! | ... | +//! +--------------+ <- Stack base. +//! | Saved R31 | +//! +--------------+ +//! | Saved R30 | +//! +--------------+ +//! | Saved R29 | <- frame pointer chain since FP points to it. +//! +--------------+ <- Parent link points here. +//! | Link area | <- 32-byte link area +//! +--------------+ +//! ``` +//! +//! And finally, this is the stack layout of a coroutine that has just been +//! initialized: +//! +//! ```text +//! +--------------+ <- Stack base +//! | Initial func | +//! +--------------+ +//! | Parent link | +//! +--------------+ +//! | | +//! ~ Initial obj ~ +//! | | +//! +--------------+ +//! | Initial PC | +//! +--------------+ +//! | Link area | <- 32-byte link area +//! +--------------+ <- stack top +//! ``` + +use core::arch::{asm, global_asm}; + +use super::{allocate_obj_on_stack, push}; +use crate::coroutine::adjusted_stack_base; +use crate::stack::{Stack, StackPointer}; +use crate::unwind::{ + asm_may_unwind_root, asm_may_unwind_yield, cfi_reset_args_size_root, cfi_reset_args_size_yield, + InitialFunc, StackCallFunc, TrapHandler, +}; +use crate::util::EncodedValue; + +pub const STACK_ALIGNMENT: usize = 16; +pub const PARENT_STACK_OFFSET: usize = 0; +pub const PARENT_LINK_OFFSET: usize = 16; +pub type StackWord = u64; + +// This is a pretty special function that has no real signature. Its use is to +// be the "base" function of all coroutines. This entrypoint is used in +// init_stack() to bootstrap the execution of a new coroutine. +// +// We also use this function as a persistent frame on the stack to emit dwarf +// information to unwind into the caller. This allows us to unwind from the +// coroutines's stack back to the main stack that the coroutine was called from. +// We use special dwarf directives here to do so since this is a pretty +// nonstandard function. +global_asm!( + ".balign 4", + asm_function_begin!("stack_init_trampoline"), + ".cfi_startproc", + cfi_signal_frame!(), + // At this point our register state contains the following: + // - SP points to the top of the parent stack. + // - LR contains the return address in the parent context. + // - R29, R30, and R31 contain their values from the parent context. + // - R5 points to the top of the initial coroutine stack. + // - R4 points to the base of the initial coroutine stack. + // - R3 contains the argument passed from switch_and_link. + // + // Save the R29, R30, and R31 of the parent context to the parent stack. + // When combined with the return address this forms a valid frame record + // (R29, R30, R31 & LR) in the frame pointer chain. + "mflr 0", + "std 31, -8(1)", + "std 30, -16(1)", + "std 29, -24(1)", + "std 0, 16(1)", + // Allocate a frame of 56 bytes (link area + space for R29, R30, and R31). + "stdu 1, -56(1)", + // Write the parent stack pointer to the parent link (back chain slot) of + // the coroutine stack and adjust R4 to point to the parent link. + "addi 4, 4, -16", + "std 1, 0(4)", + // Switch to the coroutine stack. + "mr 1, 5", + // Set up the frame pointer to point at the parent link. This is needed for + // the unwinding code below. + "mr 31, 4", + // The actual meanings of the magic bytes are: + // 0x0f: DW_CFA_def_cfa_expression + // 5: byte length of the following DWARF expression + // 0x8f 0x00: DW_OP_breg31 (31 + 0) + // 0x06: DW_OP_deref + // 0x23, 0x20: DW_OP_plus_uconst 56 + ".cfi_escape 0x0f, 5, 0x8f, 0x00, 0x06, 0x23, 0x38", + // Now we can tell the unwinder how to restore the 3 registers that were + // pushed on the parent stack. These are described as offsets from the CFA + // that we just calculated. + ".cfi_offset r31, -8", + ".cfi_offset r30, -16", + ".cfi_offset r29, -24", + ".cfi_offset lr, 16", + // Set up the 3rd argument to the initial function to point to the object + // that init_stack() set up on the stack. + "addi 5, 1, 40", + // As in the original x86_64 code, hand-write the call operation so that it + // doesn't push an entry into the CPU's return prediction stack. + + // Set the return address in LR. + // FIXME: Workaround for P9 and earlier that do not have pc-rel instructions. + "bl 0f", + "0:", + "mflr 6", + "addi 0, 6, 24", + "mtlr 0", + // load the initial function to R12 for function linkage. + "ld 12, 8(4)", + "mtctr 12", + "bctr", + asm_function_alt_entry!("stack_init_trampoline_return"), + // This 'trap' instruction is necessary because of our use of .cfi_signal_frame earlier. + "trap", + ".cfi_endproc", + asm_function_end!("stack_init_trampoline"), +); + +// This function calls a function pointer on a new stack and restores the +// original stack upon returning. It is used by on_stack() and is much simpler +// than the full coroutine logic, but also more limited since yielding is not +// possible. +global_asm!( + // See stack_init_trampoline for an explanation of the assembler directives + // used here. + ".balign 4", + asm_function_begin!("stack_call_trampoline"), + ".cfi_startproc", + cfi_signal_frame!(), + // At this point our register state contains the following: + // - SP points to the top of the parent stack. + // - R29, R30, and R31 hold their value from the parent context. + // - R5 is the function that should be called. + // - R4 points to the top of our stack. + // - R3 contains the argument to be passed to the function. + // + // Create a stack frame and point the frame pointer at it. + "mflr 0", + "stdu 1, -56(1)", + "std 31, 48(1)", + "std 30, 40(1)", + "std 29, 32(1)", + // Save LR in the link area of the parent frame. + "std 0, 72(1)", + ".cfi_def_cfa 1, 0", + ".cfi_offset r31, 48", + ".cfi_offset r30, 40", + ".cfi_offset r29, 32", + ".cfi_offset lr, 72", + // Switch to the new stack. + "mr 31, 1", + "mr 1, 4", + // FIXME: Workaround for P9 and earlier that do not have pc-rel instructions. + "bl 0f", + "0:", + "mflr 6", + "addi 0, 6, 24", + "mtlr 0", + // Call the function pointer. The argument is already in the correct + // register for the function. + "mr 12, 5", + "mtctr 12", + "bctr", + // Switch back to the original stack by restoring from the frame pointer, + // then return. + "addi 1, 31, 56", + "ld 31, -8(1)", + "ld 30, -16(1)", + "ld 29, -24(1)", + "ld 12, 16(1)", + "mtlr 12", + "blr", + ".cfi_endproc", + asm_function_end!("stack_call_trampoline"), +); + +// These trampolines use a custom calling convention and should only be called +// with inline assembly. +extern "C" { + fn stack_init_trampoline(arg: EncodedValue, stack_base: StackPointer, stack_ptr: StackPointer); + static stack_init_trampoline_return: [u8; 0]; + #[allow(dead_code)] + fn stack_call_trampoline(arg: *mut u8, sp: StackPointer, f: StackCallFunc); +} + +/// Sets up the initial state on a stack so that the given function is +/// executed on the first switch to this stack. +/// +/// The given object is written to the stack and its address on the stack is +/// passed as the 3rd argument to the initial function. +#[inline] +pub unsafe fn init_stack(stack: &impl Stack, func: InitialFunc, obj: T) -> StackPointer { + let mut sp = adjusted_stack_base(stack).get(); + + // Initial function. + push(&mut sp, Some(func as StackWord)); + + // Placeholder for parent link. + push(&mut sp, None); + + // Allocate space on the stack for the initial object, rounding to + // STACK_ALIGNMENT. + allocate_obj_on_stack(&mut sp, 16, obj); + + // Entry point called by switch_and_link(). + push(&mut sp, Some(stack_init_trampoline as StackWord)); + + // Space for 32 bytes link area. + push(&mut sp, None); + push(&mut sp, None); + push(&mut sp, None); + push(&mut sp, None); + + StackPointer::new_unchecked(sp) +} + +/// This function is used to transfer control to a coroutine along with an +/// argument. A pointer back to our context is stored at a fixed offset from +/// the base of the target stack. +/// +/// When another context switches back to us, we receive the argument they sent +/// as well as the stack pointer of the originating context. This can be `None` +/// if the caller used `switch_and_reset` and can't be returned to. +#[inline] +pub unsafe fn switch_and_link( + arg: EncodedValue, + sp: StackPointer, + stack_base: StackPointer, +) -> (EncodedValue, Option) { + let (ret_val, ret_sp); + + asm_may_unwind_root!( + // DW_CFA_GNU_args_size 0 + // + // Indicate to the unwinder that this "call" does not take any arguments + // and no stack space needs to be popped before executing a landing pad. + // This is mainly here to undo the effect of any previous + // DW_CFA_GNU_args_size that may have been set in the current function. + cfi_reset_args_size_root!(), + + // Read the saved PC from the coroutine stack and call it. + "ld 12, 32(5)", + "mtctr 12", + "bctrl", + "nop", + + // Upon returning from switch_yield or switch_and_reset, our register + // state contains the following: + // - R5: parant stack pointer. + // - R4: The top of the coroutine stack, or 0 if coming from + // switch_and_reset. + // - R3: The return value from the coroutine. + + // Switch back to parent stack and free the saved registers. + "addi 1, 5, 56", + + // Pass the argument in R3. + inlateout("3") arg => ret_val, + + // We get the coroutine stack pointer back in R4. + lateout("4") ret_sp, + + // We pass the stack base in R4. + in("4") stack_base.get() as u64, + + // We pass the target stack pointer in R5. + in("5") sp.get() as u64, + + // Mark all registers as clobbered. + lateout("14") _, lateout("15") _, lateout("16") _, lateout("17") _, + lateout("18") _, lateout("19") _, lateout("20") _, lateout("21") _, + lateout("22") _, lateout("23") _, lateout("24") _, lateout("25") _, + lateout("26") _, lateout("27") _, lateout("28") _, + clobber_abi("C"), + ); + + (ret_val, StackPointer::new(ret_sp)) +} + +/// This function performs the inverse of `switch_and_link` by returning +/// control to the parent context. +/// +/// This function does not return a stack pointer value for the parent context +/// when it switches back to us. Instead, the stack pointer value for the parent +/// context is available in the parent link on the stack. +// This function must always be inlined because it is very sensitive to the +// CPU's return address predictor. See stack_init_trampoline for more details. +//#[inline(always)] +pub unsafe fn switch_yield(arg: EncodedValue, parent_link: *mut StackPointer) -> EncodedValue { + let ret_val; + + asm_may_unwind_yield!( + // Save R29, R30, and R31. Ideally this would be done by specifying them as + // clobbers but that is not possible since they are LLVM reserved + // registers. Also save our LR. + "std 31, -8(1)", // Save Back chain + "std 30, -16(1)", + "std 29, -24(1)", + "stdu 1, -32(1)", // The space for R29, R30, R31. + + // FIXME: Workaroud for P9 and earlier that do not have pc-rel instructions. + "bl 1f", + "1:", + "mflr 6", + "addi 0, 6, 44", + // Save return address in the parent frame. + "std 0, 32(1)", + + // Get the parent stack pointer from the parent link. + "ld 5, 0(4)", + + // Save our stack pointer to R4. + "mr 4, 1", + + // Restore R29, R30, R31, and LR from the parent stack. + "ld 29, 32(5)", + "ld 30, 40(5)", + "ld 31, 48(5)", + "ld 12, 72(5)", // Get the LR. + "mtlr 12", + + // DW_CFA_GNU_args_size 0 + // + // Indicate to the unwinder that this "call" does not take any arguments + // and no stack space needs to be popped before executing a landing pad. + // This is mainly here to undo the effect of any previous + // DW_CFA_GNU_args_size that may have been set in the current function. + // + // This is needed here even though we don't call anything because + // switch_and_throw may inject a call which returns to this point. + cfi_reset_args_size_yield!(), + + // Return into the parent context + "blr", + "nop", + + // This gets called by switch_and_link(). At this point our register + // state contains the following: + // - SP points to the top of the parent stack. + // - LR contains the return address in the parent context. + // - R29 and R31 contain their value from the parent context. + // - R5 points to the top of the coroutine stack. + // - R4 points to the base of our stack. + // - R3 contains the argument passed from switch_and_link. + "0:", + + // Push the R29, R30, R31, and LR values of the parent context onto the parent + // stack. + "mflr 0", + "addi 1, 1, -56", + "std 31, 48(1)", + "std 30, 40(1)", + "std 29, 32(1)", + "std 0, 72(1)", + + // Write the parent stack pointer to the parent link. + "std 1, -16(4)", + + // Switch to the coroutine stack while popping the saved registers. + "addi 1, 5, 32", + + // Load our R29, R30, and R31 values from the coroutine stack. + "ld 31, -8(1)", + "ld 30, -16(1)", + "ld 29, -24(1)", + + // Pass the argument in R3. + inlateout("3") arg => ret_val, + + // The parent link can be in any register, R5 is arbitrarily chosen + // here. + in("5") parent_link as u64, + + // See switch_and_link() for an explanation of the clobbers. + lateout("14") _, lateout("15") _, lateout("16") _, lateout("17") _, + lateout("18") _, lateout("19") _, lateout("20") _, lateout("21") _, + lateout("22") _, lateout("23") _, lateout("24") _, lateout("25") _, + lateout("26") _, lateout("27") _, lateout("28") _, + clobber_abi("C"), + ); + + ret_val +} + +/// Variant of `switch_yield` used when returning from the initial function in a +/// context. +/// +/// This works by returning a stack pointer value of 0 which prevents the +/// current context from being resumed. There must not be any object left on the +/// stack with pending destructors when this is called. +/// +/// Since the stack is still available at this point, `arg` can safely point to +/// memory on the stack until the parent context frees or reuses the stack. +// This function must always be inlined because it is very sensitive to the +// CPU's return address predictor. See stack_init_trampoline for more details. +#[inline(always)] +pub unsafe fn switch_and_reset(arg: EncodedValue, parent_link: *mut StackPointer) -> ! { + // Most of this code is identical to switch_yield(), refer to the + // comments there. Only the differences are commented. + asm!( + // Load the parent context's stack pointer. + "ld 5, 0({parent_link})", + + // Restore R29, R30, R31, and LR from the parent stack. + "ld 29, 32(5)", + "ld 30, 40(5)", + "ld 31, 48(5)", + "ld 12, 72(5)", + "mtlr 12", + + // Return into the parent context + "blr", + + parent_link = in(reg) parent_link as u64, + + in("3") arg, + + // Hard-code the returned stack pointer value to 0 to indicate that this + // coroutine is done. + in("4") 0, + + options(noreturn), + ) +} + +/// Variant of `switch_and_link` which runs a function on the coroutine stack +/// instead of resuming the coroutine. This function will throw an exception +/// which will unwind the coroutine stack to its root. +#[inline] +#[cfg(feature = "asm-unwind")] +pub unsafe fn switch_and_throw( + forced_unwind: crate::unwind::ForcedUnwind, + sp: StackPointer, + stack_base: StackPointer, +) -> (EncodedValue, Option) { + extern "C-unwind " fn throw(forced_unwind: crate::unwind::ForcedUnwind) -> ! { + extern crate std; + use std::boxed::Box; + std::panic::resume_unwind(Box::new(forced_unwind)); + } + + let (ret_val, ret_sp); + + asm_may_unwind_root!( + // Set up a return address. + // FIXME: Workaroud for P9 and earlier that do not have pc-rel instructions. + "bl 0f", + "0:", + "mflr 6", + "addi 0, 6, 64", + + // Save the parent context onto the parent stack. + "mflr 0", + "stdu 1, -56(1)", + "std 31, 48(1)", + "std 30, 40(1)", + "std 29, 32(1)", + "std 0, 72(1)", + + // Write the parent stack pointer to the parent link. + "std 1, -16(4)", + + // Switch to the coroutine stack while popping the saved registers. + "addi 1, 5, 32", + + // Load the coroutine registers, with the saved LR value into LR. + "ld 31, -8(1)", + "ld 30, -16(1)", + "ld 29, -24(1)", + "ld 0, 224(1)", + "mtlr 0", + + // DW_CFA_GNU_args_size 0 + // + // Indicate to the unwinder that this "call" does not take any arguments + // and no stack space needs to be popped before executing a landing pad. + // This is mainly here to undo the effect of any previous + // DW_CFA_GNU_args_size that may have been set in the current function. + cfi_reset_args_size_root!(), + + // Simulate a call with an artificial return address so that the throw + // function will unwind straight into the switch_and_yield() call with + // the register state expected outside the asm! block. + "b throw", + + // Upon returning, our register state is just like a normal return into + // switch_and_link(). + "0:", + + // Switch back to our stack and free the saved registers. + "addi 1, 5, 56", + + // Helper function to trigger stack unwinding. + throw = throw, + + // Argument to pass to the throw function. + in("3") forced_unwind.0.get(), + + // Same output registers as switch_and_link(). + lateout("3") ret_val, + lateout("4") ret_sp, + + // Stack pointer and stack base inputs for stack switching. + in("4") stack_base.get() as u64, + in("5") sp.get() as u64, + + // See switch_and_link() for an explanation of the clobbers. + lateout("14") _, lateout("15") _, lateout("16") _, lateout("17") _, + lateout("18") _, lateout("19") _, lateout("20") _, lateout("21") _, + lateout("22") _, lateout("23") _, lateout("24") _, lateout("25") _, + lateout("26") _, lateout("27") _, lateout("28") _, + clobber_abi("C"), + ); + + (ret_val, StackPointer::new(ret_sp)) +} + +#[inline] +pub unsafe fn drop_initial_obj( + _stack_base: StackPointer, + stack_ptr: StackPointer, + drop_fn: unsafe fn(ptr: *mut u8), +) { + let ptr = (stack_ptr.get() as *mut u8).add(40); + drop_fn(ptr); +} + +/// Registers which must be updated upon return from a trap handler. +/// +/// The exact set of registers that need to be updated varies depending on the +/// target. Note that *all* registers must be updated to the specified values, +/// otherwise behavior is undefined. +/// +/// To catch any issues at compilation time, it is recommended to use Rust's +/// pattern matching syntax to extract the individual registers from this +/// struct. +/// +/// ``` +/// # use corosensei::trap::TrapHandlerRegs; +/// # let regs = TrapHandlerRegs { pc: 0, sp: 0, r3: 0, r4: 0, r31: 0, lr: 0 }; +/// let TrapHandlerRegs { pc, sp, r3, r4, r31, lr } = regs; +/// ``` +#[allow(missing_docs)] +#[derive(Clone, Copy, Debug)] +pub struct TrapHandlerRegs { + pub pc: u64, + pub sp: u64, + pub r3: u64, + pub r4: u64, + pub r31: u64, + pub lr: u64, +} + +pub unsafe fn setup_trap_trampoline( + stack_base: StackPointer, + val: T, + handler: TrapHandler, +) -> TrapHandlerRegs { + // Preserve the top 16 bytes of the stack since they contain the parent + // link. + let parent_link = stack_base.get() - PARENT_LINK_OFFSET; + + // Everything below this can be overwritten. Write the object to the stack. + let mut sp = parent_link; + allocate_obj_on_stack(&mut sp, 16, val); + + // Space for 32 bytes link area. + push(&mut sp, None); + push(&mut sp, None); + push(&mut sp, None); + push(&mut sp, None); + + let val_ptr = sp; + + // Set up registers for entry into the function. + TrapHandlerRegs { + pc: handler as u64, + sp: sp as u64, + r3: val_ptr as u64, + r4: parent_link as u64, + r31: parent_link as u64, + lr: stack_init_trampoline_return.as_ptr() as u64, + } +} + +/// This function executes a function on the given stack. The argument is passed +/// through to the called function. +#[inline] +pub unsafe fn on_stack(arg: *mut u8, stack: impl Stack, f: StackCallFunc) { + // This is a bit subtle: because we use .cfi_signal_frame in the trampoline, + // the unwinder will look for unwinding information at the instruction + // after the return address. Normal compiler code generation does not + // expect this and may generate incorrect entries in the exception handling + // table. We work around this by adding a NOP instruction after the call. + asm_may_unwind_root!( + // DW_CFA_GNU_args_size 0 + cfi_reset_args_size_root!(), + concat!("bl ", asm_mangle!("stack_call_trampoline")), + "nop", + in("3") arg, + in("4") adjusted_stack_base(&stack).get(), + in("5") f, + clobber_abi("C"), + ); +} diff --git a/src/lib.rs b/src/lib.rs index 89991460..d1cda0fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -236,6 +236,7 @@ #![no_std] #![cfg_attr(feature = "asm-unwind", feature(asm_unwind))] +#![cfg_attr(target_arch = "powerpc64", feature(asm_experimental_arch))] #![warn(missing_docs)] // Must come first because it defines macros used by other modules. diff --git a/src/stack/unix.rs b/src/stack/unix.rs index ff600905..7b5e3819 100644 --- a/src/stack/unix.rs +++ b/src/stack/unix.rs @@ -50,10 +50,17 @@ impl DefaultStack { return Err(Error::last_os_error()); } + // A 32-byte link area is reserved above the base, as defined by the + // 64-bit ELF PowerPC stack frame layout. + let link_area = if cfg!(target_arch = "powerpc64") { + 32 + } else { + 0 + }; // Create the result here. If the mprotect call fails then this will // be dropped and the memory will be unmapped. let out = Self { - base: StackPointer::new(mmap as usize + mmap_len).unwrap(), + base: StackPointer::new(mmap as usize + mmap_len - link_area).unwrap(), mmap_len, valgrind: ManuallyDrop::new(ValgrindStackRegistration::new( mmap as *mut u8, @@ -88,7 +95,13 @@ impl Drop for DefaultStack { // De-register the stack first. ManuallyDrop::drop(&mut self.valgrind); - let mmap = self.base.get() - self.mmap_len; + // A 32-byte link area is reserved above the base on PowerPC. + let link_area = if cfg!(target_arch = "powerpc64") { + 32 + } else { + 0 + }; + let mmap = self.base.get() - self.mmap_len + link_area; let ret = libc::munmap(mmap as _, self.mmap_len); debug_assert_eq!(ret, 0); } diff --git a/src/stack/valgrind.rs b/src/stack/valgrind.rs index 3822f90d..2ce94ab6 100644 --- a/src/stack/valgrind.rs +++ b/src/stack/valgrind.rs @@ -80,6 +80,14 @@ cfg_if::cfg_if! { ); result } + } else if #[cfg(target_arch = "powerpc64")] { + type Value = u64; + + // FIXME: look at Valgrind later, use a no-op for now. + #[inline] + unsafe fn valgrind_request(default: Value, _args: &[Value; 6]) -> Value { + default + } } else if #[cfg(any(target_arch = "riscv64", target_arch = "riscv32"))] { type Value = usize; diff --git a/src/tests/coroutine.rs b/src/tests/coroutine.rs index ee1192c5..d007da3e 100644 --- a/src/tests/coroutine.rs +++ b/src/tests/coroutine.rs @@ -54,7 +54,15 @@ fn suspend_and_resume() { } // Linked backtraces are not supported on x86 Windows. -#[cfg_attr(all(windows, target_arch = "x86"), ignore)] +// Linked backtraces are not supported on powerpc64 Linux yet due to the info +// generated by the (nightly) compiler. +#[cfg_attr( + any( + all(windows, target_arch = "x86"), + all(target_arch = "powerpc64", target_os = "linux") + ), + ignore +)] #[test] fn backtrace_traces_to_host() { #[inline(never)] // try to get this to show up in backtraces @@ -360,6 +368,8 @@ fn forward_stack_address() { std::panic::resume_unwind(result.unwrap_err()); } +// Trap handler is not supported on powerpc64 Linux. +#[cfg_attr(all(target_arch = "powerpc64", target_os = "linux"), ignore)] #[test] fn trap_handler() { trap_handler::setup_handler(); @@ -381,6 +391,8 @@ fn trap_handler() { } #[cfg(feature = "unwind")] +// Trap handler is not supported on powerpc64 Linux. +#[cfg_attr(all(target_arch = "powerpc64", target_os = "linux"), ignore)] #[test] #[should_panic = "foobar"] fn trap_handler_panic() { @@ -402,6 +414,8 @@ fn trap_handler_panic() { assert_eq!(coroutine.resume(()), CoroutineResult::Return(42)); } +// Trap handler is not supported on powerpc64 Linux. +#[cfg_attr(all(target_arch = "powerpc64", target_os = "linux"), ignore)] #[test] fn stack_overflow() { trap_handler::setup_handler(); @@ -516,6 +530,8 @@ mod trap_handler { sp = (*context.uc_mcontext).__ss.__sp as usize; } else if #[cfg(all(target_os = "linux", target_arch = "loongarch64"))] { sp = context.uc_mcontext.__gregs[3] as usize; + } else if #[cfg(all(target_os = "linux", target_arch = "powerpc64"))] { + sp = (*context.uc_mcontext.regs).gpr[1] as usize; } else { compile_error!("Unsupported platform"); } @@ -622,6 +638,14 @@ mod trap_handler { context.uc_mcontext.__gregs[4] = a0; context.uc_mcontext.__gregs[5] = a1; context.uc_mcontext.__gregs[22] = fp; + } else if #[cfg(all(target_os = "linux", target_arch = "powerpc64"))] { + let TrapHandlerRegs { pc, sp, r3, r4, r31, lr } = regs; + (*context.uc_mcontext.regs).nip = pc; + (*context.uc_mcontext.regs).gpr[1] = sp; + (*context.uc_mcontext.regs).gpr[3] = r3; + (*context.uc_mcontext.regs).gpr[4] = r4; + (*context.uc_mcontext.regs).gpr[31] = r31; + (*context.uc_mcontext.regs).link = lr; } else { compile_error!("Unsupported platform"); } diff --git a/src/tests/on_stack.rs b/src/tests/on_stack.rs index 15cd2f4d..5ac53881 100644 --- a/src/tests/on_stack.rs +++ b/src/tests/on_stack.rs @@ -21,7 +21,15 @@ fn smoke() { } // Linked backtraces are not supported on x86 Windows. -#[cfg_attr(all(windows, target_arch = "x86"), ignore)] +// Linked backtraces are not supported on powerpc64 Linux yet due to the info +// generated by the (nightly) compiler. +#[cfg_attr( + any( + all(windows, target_arch = "x86"), + all(target_arch = "powerpc64", target_os = "linux") + ), + ignore +)] #[test] fn backtrace_traces_to_host() { #[inline(never)] // try to get this to show up in backtraces From 576c081df21b406ee32e00ae67baf1c9c1338c9d Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Mon, 20 Oct 2025 10:19:32 -0400 Subject: [PATCH 2/4] Addressed comments: - change the stack layout as per suggestion, - save/restore the TOC register R2. --- src/arch/powerpc64.rs | 243 +++++++++++++++++++++--------------------- 1 file changed, 123 insertions(+), 120 deletions(-) diff --git a/src/arch/powerpc64.rs b/src/arch/powerpc64.rs index 57d5bd9e..e21d5c3b 100644 --- a/src/arch/powerpc64.rs +++ b/src/arch/powerpc64.rs @@ -1,99 +1,65 @@ -//! Low-level powerpc64le support. +//! Low-level PowerPC support. //! -//! This file contains the low level operations that deal with switching between -//! stacks. -//! -//! The core operations are: -//! - `init_stack` to initialize a stack for the first resume. -//! - `switch_and_link` to switch control into a coroutine. -//! - `switch_yield` to return control from a coroutine back to its parent. -//! - `switch_and_reset` to return control for the last time. -//! -//! ## Linked stacks -//! -//! Stack linking allows a context switch to be automatically performed when the -//! initial function of a context returns or unwinds. This works by stashing a -//! copy of the parent (the routine resumes/invokes a coroutine) context stack -//! pointer near the stack base and updating it every time we switch into the -//! the child (the coroutine) context using `switch_and_link`. -//! -//! For unwinding and backtraces to work as expected (that is, to continue in -//! the parent after unwinding past the initial function of a child context), -//! we need to use special DWARF CFI instructions to tell the unwinder how to -//! find the parent frame. -//! -//! If you're curious a decent introduction to CFI things and unwinding is at -//! . -//! -//! ## Frame pointers -//! -//! Some tools or OSes do not use DWARF for stack unwinding, prefering to use -//! the older (but simpler) frame pointer chain to capture a backtrace. This is -//! particularly common in performance profiling tools such as Linux's perf -//! callgraph profiler. These work by following a linked list of frame records -//! starting from the FP register. Each record consists of 2 words: a pointer -//! to the previous frame (aka the previous FP value) and the return address -//! for this frame (aka the saved LR value). -//! -//! To support these tools, we also generate a valid stack frame record when -//! switching into a coroutine. This works by treating the parent link at the -//! root of the stack as a frame record which points to the top of the parent -//! stack. The top of the parent stack contains the saved FP and LR values in -//! the correct format for a frame record, which allows unwinding to continue on -//! the parent stack. -//! -//! The LR value associated with the parent link is invalid since it points to -//! the start of the initial function, but this shouldn't block the unwinding -//! process. +//! This file is heavily based on the x86_64 implementation. +//! Relevant differences are highlighted in comments, but otherwise most +//! comments have been removed to avoid duplication. Refer to x86_64.rs for +//! detailed comments about what is happening in this file. //! //! ## Stack layout //! -//! Note: Non-volatile registers R29, R30, and R31 need to be explicitly -//! saved/restored on our stack because they are used internally by LLVM and -//! cannot be used as operands for inline asm (e.g., `inlate()`). -//! //! Here is what the layout of the stack looks like when a coroutine is //! suspended. //! -//! ```text +//! Note: "link area" refers to the minimal 32-byte stack frame header that is +//! required by the PowerPC ABI. //! +//! ```text //! +--------------+ <- Stack base -//! | Initial func | <- Only used once when resuming for the first time. -//! +--------------+ -//! | Parent link | <- The Yielder is a pointer to this address. When the -//! +--------------+ coroutine is running, it points to the top of the -//! | | parent stack which contains a saved R29, R30, R31, -//! ~ ... ~ and LR just like a suspened coroutine. +//! | Padding | <- +//! +--------------+ | +//! | Initial func | <- +//! +--------------+ | This mirrors the 32-byte link area layout. +//! | Padding | <- +//! +--------------+ | +//! | Parent link | <- +//! +--------------+ <- The 4 fields above form a fake link area which +//! | | backchains back to the parent stack. +//! ~ ... ~ //! | | //! +--------------+ -//! | Saved R31 | -//! +--------------+ -//! | Saved R30 | +//! | | +//! | Link area | <- 32-byte link area of the last frame before the suspend. +//! | | The LR field holds the PC to resume execution at. +//! +--------------+ <- Saved stack pointer points here. +//! | Saved R31 | //! +--------------+ -//! | Saved R29 | +//! | Saved R30 | //! +--------------+ -//! | Saved LR | +//! | Saved R29 | //! +--------------+ -//! | Link area | <- 32-byte link area +//! | Saved R2 | //! +--------------+ //! ``` //! //! And this is the layout of the parent stack when a coroutine is running: //! //! ```text -//! +--------------+ -//! | ... | -//! | saved LR | <- 32-byte link area. -//! | ... | -//! +--------------+ <- Stack base. -//! | Saved R31 | -//! +--------------+ -//! | Saved R30 | -//! +--------------+ -//! | Saved R29 | <- frame pointer chain since FP points to it. -//! +--------------+ <- Parent link points here. -//! | Link area | <- 32-byte link area -//! +--------------+ +//! | | +//! ~ ... ~ +//! | | +//! +-----------+ +//! | | +//! | Link area | <- 32-byte link area of the last frame before the suspend. +//! | | The LR field holds the PC to resume execution at. +//! +-----------+ <- Parent link points here. +//! | Saved R31 | +//! +-----------+ +//! | Saved R30 | +//! +-----------+ +//! | Saved R29 | +//! +-----------+ +//! | Saved R2 | +//! +-----------+ //! ``` //! //! And finally, this is the stack layout of a coroutine that has just been @@ -101,17 +67,33 @@ //! //! ```text //! +--------------+ <- Stack base +//! | Padding | +//! +--------------+ //! | Initial func | //! +--------------+ +//! | Padding | +//! +--------------+ //! | Parent link | //! +--------------+ //! | | //! ~ Initial obj ~ //! | | //! +--------------+ -//! | Initial PC | +//! | Padding | <- +//! +--------------+ | +//! | Initial PC | <- +//! +--------------+ | This mirrors the 32-byte link area layout. +//! | Padding | <- +//! +--------------+ | +//! | Padding | <- +//! +--------------+ <- Initial SP points here +//! | Saved R31 | +//! +--------------+ +//! | Saved R30 | //! +--------------+ -//! | Link area | <- 32-byte link area +//! | Saved R29 | +//! +--------------+ +//! | Saved R2 | //! +--------------+ <- stack top //! ``` @@ -128,7 +110,7 @@ use crate::util::EncodedValue; pub const STACK_ALIGNMENT: usize = 16; pub const PARENT_STACK_OFFSET: usize = 0; -pub const PARENT_LINK_OFFSET: usize = 16; +pub const PARENT_LINK_OFFSET: usize = 32; pub type StackWord = u64; // This is a pretty special function that has no real signature. Its use is to @@ -144,7 +126,7 @@ global_asm!( ".balign 4", asm_function_begin!("stack_init_trampoline"), ".cfi_startproc", - cfi_signal_frame!(), + //cfi_signal_frame!(), // At this point our register state contains the following: // - SP points to the top of the parent stack. // - LR contains the return address in the parent context. @@ -153,19 +135,21 @@ global_asm!( // - R4 points to the base of the initial coroutine stack. // - R3 contains the argument passed from switch_and_link. // - // Save the R29, R30, and R31 of the parent context to the parent stack. + // Save the R2, R29, R30, and R31 of the parent context to the parent stack. // When combined with the return address this forms a valid frame record - // (R29, R30, R31 & LR) in the frame pointer chain. + // (R2, R29, R30, R31 & LR) in the frame pointer chain. "mflr 0", - "std 31, -8(1)", - "std 30, -16(1)", - "std 29, -24(1)", - "std 0, 16(1)", // Allocate a frame of 56 bytes (link area + space for R29, R30, and R31). "stdu 1, -56(1)", - // Write the parent stack pointer to the parent link (back chain slot) of - // the coroutine stack and adjust R4 to point to the parent link. - "addi 4, 4, -16", + "std 31, 48(1)", + "std 30, 40(1)", + "std 29, 32(1)", + // Save R2 and LR in the link area of the parent frame. + "std 2, 80(1)", + "std 0, 72(1)", + // Adjust R4 to point to the parent link and Write the parent stack pointer + // to the parent link (back chain slot) of the coroutine stack. + "addi 4, 4, -32", "std 1, 0(4)", // Switch to the coroutine stack. "mr 1, 5", @@ -177,7 +161,7 @@ global_asm!( // 5: byte length of the following DWARF expression // 0x8f 0x00: DW_OP_breg31 (31 + 0) // 0x06: DW_OP_deref - // 0x23, 0x20: DW_OP_plus_uconst 56 + // 0x23, 0x38: DW_OP_plus_uconst 56 ".cfi_escape 0x0f, 5, 0x8f, 0x00, 0x06, 0x23, 0x38", // Now we can tell the unwinder how to restore the 3 registers that were // pushed on the parent stack. These are described as offsets from the CFA @@ -185,10 +169,11 @@ global_asm!( ".cfi_offset r31, -8", ".cfi_offset r30, -16", ".cfi_offset r29, -24", + ".cfi_offset r2, 24", ".cfi_offset lr, 16", // Set up the 3rd argument to the initial function to point to the object // that init_stack() set up on the stack. - "addi 5, 1, 40", + "addi 5, 1, 32", // As in the original x86_64 code, hand-write the call operation so that it // doesn't push an entry into the CPU's return prediction stack. @@ -200,7 +185,7 @@ global_asm!( "addi 0, 6, 24", "mtlr 0", // load the initial function to R12 for function linkage. - "ld 12, 8(4)", + "ld 12, 16(4)", "mtctr 12", "bctr", asm_function_alt_entry!("stack_init_trampoline_return"), @@ -223,7 +208,7 @@ global_asm!( cfi_signal_frame!(), // At this point our register state contains the following: // - SP points to the top of the parent stack. - // - R29, R30, and R31 hold their value from the parent context. + // - R2, R29, R30, and R31 hold their value from the parent context. // - R5 is the function that should be called. // - R4 points to the top of our stack. // - R3 contains the argument to be passed to the function. @@ -234,12 +219,14 @@ global_asm!( "std 31, 48(1)", "std 30, 40(1)", "std 29, 32(1)", - // Save LR in the link area of the parent frame. + // Save R2 and LR in the link area of the parent frame. + "std 2, 80(1)", "std 0, 72(1)", ".cfi_def_cfa 1, 0", ".cfi_offset r31, 48", ".cfi_offset r30, 40", ".cfi_offset r29, 32", + ".cfi_offset r2, 80", ".cfi_offset lr, 72", // Switch to the new stack. "mr 31, 1", @@ -261,6 +248,7 @@ global_asm!( "ld 31, -8(1)", "ld 30, -16(1)", "ld 29, -24(1)", + "ld 2, 24(1)", "ld 12, 16(1)", "mtlr 12", "blr", @@ -286,9 +274,14 @@ extern "C" { pub unsafe fn init_stack(stack: &impl Stack, func: InitialFunc, obj: T) -> StackPointer { let mut sp = adjusted_stack_base(stack).get(); + // The following 4 slots form a fake 32-byte link area layout. + push(&mut sp, None); + // Initial function. push(&mut sp, Some(func as StackWord)); + push(&mut sp, None); + // Placeholder for parent link. push(&mut sp, None); @@ -296,12 +289,12 @@ pub unsafe fn init_stack(stack: &impl Stack, func: InitialFunc, obj: T) -> // STACK_ALIGNMENT. allocate_obj_on_stack(&mut sp, 16, obj); - // Entry point called by switch_and_link(). + // This mirrors the 32-byte link area layout. + push(&mut sp, None); + + // Set the LR slot with the entry point called by switch_and_link(). push(&mut sp, Some(stack_init_trampoline as StackWord)); - // Space for 32 bytes link area. - push(&mut sp, None); - push(&mut sp, None); push(&mut sp, None); push(&mut sp, None); @@ -332,8 +325,8 @@ pub unsafe fn switch_and_link( // DW_CFA_GNU_args_size that may have been set in the current function. cfi_reset_args_size_root!(), - // Read the saved PC from the coroutine stack and call it. - "ld 12, 32(5)", + // Read the saved PC from the link area of the coroutine stack and call it. + "ld 12, 16(5)", "mtctr 12", "bctrl", "nop", @@ -379,6 +372,8 @@ pub unsafe fn switch_and_link( /// context is available in the parent link on the stack. // This function must always be inlined because it is very sensitive to the // CPU's return address predictor. See stack_init_trampoline for more details. +// +// FIXME: Inlining this function fails tests. //#[inline(always)] pub unsafe fn switch_yield(arg: EncodedValue, parent_link: *mut StackPointer) -> EncodedValue { let ret_val; @@ -386,19 +381,21 @@ pub unsafe fn switch_yield(arg: EncodedValue, parent_link: *mut StackPointer) -> asm_may_unwind_yield!( // Save R29, R30, and R31. Ideally this would be done by specifying them as // clobbers but that is not possible since they are LLVM reserved - // registers. Also save our LR. + // registers. Also save our R2 and LR. "std 31, -8(1)", // Save Back chain "std 30, -16(1)", "std 29, -24(1)", - "stdu 1, -32(1)", // The space for R29, R30, R31. + "std 2, 24(1)", + "stdu 1, -64(1)", // The 32-byte link area and the space for R29, R30, R31. + // Get the return address. // FIXME: Workaroud for P9 and earlier that do not have pc-rel instructions. "bl 1f", "1:", "mflr 6", - "addi 0, 6, 44", + "addi 0, 6, 48", // Save return address in the parent frame. - "std 0, 32(1)", + "std 0, 16(1)", // Get the parent stack pointer from the parent link. "ld 5, 0(4)", @@ -406,10 +403,11 @@ pub unsafe fn switch_yield(arg: EncodedValue, parent_link: *mut StackPointer) -> // Save our stack pointer to R4. "mr 4, 1", - // Restore R29, R30, R31, and LR from the parent stack. + // Restore R2, R29, R30, R31, and LR from the parent stack. "ld 29, 32(5)", "ld 30, 40(5)", "ld 31, 48(5)", + "ld 2, 80(5)", "ld 12, 72(5)", // Get the LR. "mtlr 12", @@ -432,31 +430,33 @@ pub unsafe fn switch_yield(arg: EncodedValue, parent_link: *mut StackPointer) -> // state contains the following: // - SP points to the top of the parent stack. // - LR contains the return address in the parent context. - // - R29 and R31 contain their value from the parent context. + // - R2, R29, R30, and R31 contain their value from the parent context. // - R5 points to the top of the coroutine stack. // - R4 points to the base of our stack. // - R3 contains the argument passed from switch_and_link. "0:", - // Push the R29, R30, R31, and LR values of the parent context onto the parent - // stack. + // Push the R2, R29, R30, R31, and LR values of the parent context onto + // the parent stack. "mflr 0", "addi 1, 1, -56", "std 31, 48(1)", "std 30, 40(1)", "std 29, 32(1)", + "std 2, 80(1)", "std 0, 72(1)", // Write the parent stack pointer to the parent link. - "std 1, -16(4)", + "std 1, -32(4)", // Switch to the coroutine stack while popping the saved registers. - "addi 1, 5, 32", + "addi 1, 5, 64", - // Load our R29, R30, and R31 values from the coroutine stack. + // Load our R2, R29, R30, and R31 values from the coroutine stack. "ld 31, -8(1)", "ld 30, -16(1)", "ld 29, -24(1)", + "ld 2, 24(1)", // Pass the argument in R3. inlateout("3") arg => ret_val, @@ -495,10 +495,11 @@ pub unsafe fn switch_and_reset(arg: EncodedValue, parent_link: *mut StackPointer // Load the parent context's stack pointer. "ld 5, 0({parent_link})", - // Restore R29, R30, R31, and LR from the parent stack. + // Restore R2, R29, R30, R31, and LR from the parent stack. "ld 29, 32(5)", "ld 30, 40(5)", "ld 31, 48(5)", + "ld 2, 80(5)", "ld 12, 72(5)", "mtlr 12", @@ -527,7 +528,7 @@ pub unsafe fn switch_and_throw( sp: StackPointer, stack_base: StackPointer, ) -> (EncodedValue, Option) { - extern "C-unwind " fn throw(forced_unwind: crate::unwind::ForcedUnwind) -> ! { + extern "C-unwind" fn throw(forced_unwind: crate::unwind::ForcedUnwind) -> ! { extern crate std; use std::boxed::Box; std::panic::resume_unwind(Box::new(forced_unwind)); @@ -541,7 +542,7 @@ pub unsafe fn switch_and_throw( "bl 0f", "0:", "mflr 6", - "addi 0, 6, 64", + "addi 0, 6, 72", // Save the parent context onto the parent stack. "mflr 0", @@ -549,19 +550,21 @@ pub unsafe fn switch_and_throw( "std 31, 48(1)", "std 30, 40(1)", "std 29, 32(1)", + "std 2, 80(1)", "std 0, 72(1)", // Write the parent stack pointer to the parent link. - "std 1, -16(4)", + "std 1, -32(4)", // Switch to the coroutine stack while popping the saved registers. - "addi 1, 5, 32", + "addi 1, 5, 64", // Load the coroutine registers, with the saved LR value into LR. "ld 31, -8(1)", "ld 30, -16(1)", "ld 29, -24(1)", - "ld 0, 224(1)", + "ld 2, 24(1)", + "ld 0, 16(1)", "mtlr 0", // DW_CFA_GNU_args_size 0 @@ -575,7 +578,7 @@ pub unsafe fn switch_and_throw( // Simulate a call with an artificial return address so that the throw // function will unwind straight into the switch_and_yield() call with // the register state expected outside the asm! block. - "b throw", + "b {throw}", // Upon returning, our register state is just like a normal return into // switch_and_link(). @@ -585,7 +588,7 @@ pub unsafe fn switch_and_throw( "addi 1, 5, 56", // Helper function to trigger stack unwinding. - throw = throw, + throw = sym throw, // Argument to pass to the throw function. in("3") forced_unwind.0.get(), @@ -615,7 +618,7 @@ pub unsafe fn drop_initial_obj( stack_ptr: StackPointer, drop_fn: unsafe fn(ptr: *mut u8), ) { - let ptr = (stack_ptr.get() as *mut u8).add(40); + let ptr = (stack_ptr.get() as *mut u8).add(32); drop_fn(ptr); } From ba617a3e9889933568b036174dd170dc6b4cc8f2 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 26 Nov 2025 15:22:33 +0000 Subject: [PATCH 3/4] Rework PowerPC implementation to use a new stack layout This also fixes various issues so that all the tests pass. --- src/arch/mod.rs | 2 +- src/arch/powerpc64.rs | 373 ++++++++++++++++------------------------- src/stack/unix.rs | 17 +- src/tests/coroutine.rs | 20 +-- src/tests/on_stack.rs | 10 +- 5 files changed, 148 insertions(+), 274 deletions(-) diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 750afc8e..73a3c240 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -164,7 +164,7 @@ cfg_if::cfg_if! { } else if #[cfg(all(target_arch = "loongarch64", not(windows)))] { mod loongarch64; pub use self::loongarch64::*; - } else if #[cfg(all(target_arch = "powerpc64", not(windows)))] { + } else if #[cfg(all(target_arch = "powerpc64", target_abi = "elfv2", not(windows)))] { mod powerpc64; pub use self::powerpc64::*; } else { diff --git a/src/arch/powerpc64.rs b/src/arch/powerpc64.rs index e21d5c3b..ad2733c8 100644 --- a/src/arch/powerpc64.rs +++ b/src/arch/powerpc64.rs @@ -7,12 +7,12 @@ //! //! ## Stack layout //! -//! Here is what the layout of the stack looks like when a coroutine is -//! suspended. -//! //! Note: "link area" refers to the minimal 32-byte stack frame header that is //! required by the PowerPC ABI. //! +//! Here is what the layout of the stack looks like when a coroutine is +//! suspended. +//! //! ```text //! +--------------+ <- Stack base //! | Padding | <- @@ -27,18 +27,14 @@ //! ~ ... ~ //! | | //! +--------------+ -//! | | -//! | Link area | <- 32-byte link area of the last frame before the suspend. -//! | | The LR field holds the PC to resume execution at. +//! | | <- 32-byte link area of the last frame before the suspend. +//! | Link area | | The LR field holds the PC to resume execution at. +//! | | <- The TOC field holds the R2 to resume execution with. //! +--------------+ <- Saved stack pointer points here. //! | Saved R31 | //! +--------------+ //! | Saved R30 | //! +--------------+ -//! | Saved R29 | -//! +--------------+ -//! | Saved R2 | -//! +--------------+ //! ``` //! //! And this is the layout of the parent stack when a coroutine is running: @@ -48,18 +44,14 @@ //! ~ ... ~ //! | | //! +-----------+ -//! | | -//! | Link area | <- 32-byte link area of the last frame before the suspend. -//! | | The LR field holds the PC to resume execution at. +//! | | <- 32-byte link area of the last frame before the suspend. +//! | Link area | | The LR field holds the PC to resume execution at. +//! | | <- The TOC field holds the R2 to resume execution with. //! +-----------+ <- Parent link points here. //! | Saved R31 | //! +-----------+ //! | Saved R30 | //! +-----------+ -//! | Saved R29 | -//! +-----------+ -//! | Saved R2 | -//! +-----------+ //! ``` //! //! And finally, this is the stack layout of a coroutine that has just been @@ -87,14 +79,6 @@ //! +--------------+ | //! | Padding | <- //! +--------------+ <- Initial SP points here -//! | Saved R31 | -//! +--------------+ -//! | Saved R30 | -//! +--------------+ -//! | Saved R29 | -//! +--------------+ -//! | Saved R2 | -//! +--------------+ <- stack top //! ``` use core::arch::{asm, global_asm}; @@ -109,82 +93,69 @@ use crate::unwind::{ use crate::util::EncodedValue; pub const STACK_ALIGNMENT: usize = 16; -pub const PARENT_STACK_OFFSET: usize = 0; +pub const PARENT_STACK_OFFSET: usize = 16; pub const PARENT_LINK_OFFSET: usize = 32; pub type StackWord = u64; -// This is a pretty special function that has no real signature. Its use is to -// be the "base" function of all coroutines. This entrypoint is used in -// init_stack() to bootstrap the execution of a new coroutine. -// -// We also use this function as a persistent frame on the stack to emit dwarf -// information to unwind into the caller. This allows us to unwind from the -// coroutines's stack back to the main stack that the coroutine was called from. -// We use special dwarf directives here to do so since this is a pretty -// nonstandard function. global_asm!( ".balign 4", asm_function_begin!("stack_init_trampoline"), ".cfi_startproc", - //cfi_signal_frame!(), + cfi_signal_frame!(), // At this point our register state contains the following: // - SP points to the top of the parent stack. // - LR contains the return address in the parent context. - // - R29, R30, and R31 contain their values from the parent context. + // - R30 and R31 contain their values from the parent context. // - R5 points to the top of the initial coroutine stack. // - R4 points to the base of the initial coroutine stack. // - R3 contains the argument passed from switch_and_link. // - // Save the R2, R29, R30, and R31 of the parent context to the parent stack. - // When combined with the return address this forms a valid frame record - // (R2, R29, R30, R31 & LR) in the frame pointer chain. + // Save the R2, R30, R31 and LR of the parent context to the parent stack. "mflr 0", - // Allocate a frame of 56 bytes (link area + space for R29, R30, and R31). - "stdu 1, -56(1)", - "std 31, 48(1)", - "std 30, 40(1)", - "std 29, 32(1)", - // Save R2 and LR in the link area of the parent frame. - "std 2, 80(1)", - "std 0, 72(1)", - // Adjust R4 to point to the parent link and Write the parent stack pointer - // to the parent link (back chain slot) of the coroutine stack. - "addi 4, 4, -32", - "std 1, 0(4)", + "std 0, 16(1)", + "std 31, -8(1)", + "std 30, -16(1)", + // Write the parent stack pointer to the parent link and adjust R4 to point + // to the parent link. + "stdu 1, -32(4)", // Switch to the coroutine stack. "mr 1, 5", - // Set up the frame pointer to point at the parent link. This is needed for - // the unwinding code below. - "mr 31, 4", + // Create a fake frame to complete the back-chain to the parent link frame. + "std 4, 0(1)", // The actual meanings of the magic bytes are: // 0x0f: DW_CFA_def_cfa_expression - // 5: byte length of the following DWARF expression - // 0x8f 0x00: DW_OP_breg31 (31 + 0) + // 4: byte length of the following DWARF expression + // 0x71 0x00: DW_OP_breg1 (1 + 0) + // 0x06: DW_OP_deref // 0x06: DW_OP_deref - // 0x23, 0x38: DW_OP_plus_uconst 56 - ".cfi_escape 0x0f, 5, 0x8f, 0x00, 0x06, 0x23, 0x38", - // Now we can tell the unwinder how to restore the 3 registers that were + // + // The double deref is used to first get the address of the parent link from + // the backchain field of our frame and then dereference the parent link to + // get the actual parent stack pointer value. + ".cfi_escape 0x0f, 4, 0x71, 0x00, 0x06, 0x06", + // Now we can tell the unwinder how to restore the registers that were // pushed on the parent stack. These are described as offsets from the CFA // that we just calculated. ".cfi_offset r31, -8", ".cfi_offset r30, -16", - ".cfi_offset r29, -24", - ".cfi_offset r2, 24", ".cfi_offset lr, 16", // Set up the 3rd argument to the initial function to point to the object // that init_stack() set up on the stack. "addi 5, 1, 32", + // Calculate the address of stack_init_trampoline_return in a way that works + // with position-independent code. Since POWER9 doesn't have PC-relative + // addressing, we use a call instruction instead. + "bl 2f", + "2:", + // We now have the address of "2" in LR. Ideally we would adjust it + // so that it points to stack_init_trampoline_return but this isn't actually + // necessary since the initial function never returns. The unwinding + // information at 2: and stack_init_trampoline_return is identical so we + // can just leave the return address as it is. + // // As in the original x86_64 code, hand-write the call operation so that it - // doesn't push an entry into the CPU's return prediction stack. - - // Set the return address in LR. - // FIXME: Workaround for P9 and earlier that do not have pc-rel instructions. - "bl 0f", - "0:", - "mflr 6", - "addi 0, 6, 24", - "mtlr 0", - // load the initial function to R12 for function linkage. + // doesn't push an entry into the CPU's return prediction stack. The ABI + // requires that the function address be in R12. "ld 12, 16(4)", "mtctr 12", "bctr", @@ -195,10 +166,6 @@ global_asm!( asm_function_end!("stack_init_trampoline"), ); -// This function calls a function pointer on a new stack and restores the -// original stack upon returning. It is used by on_stack() and is much simpler -// than the full coroutine logic, but also more limited since yielding is not -// possible. global_asm!( // See stack_init_trampoline for an explanation of the assembler directives // used here. @@ -208,49 +175,34 @@ global_asm!( cfi_signal_frame!(), // At this point our register state contains the following: // - SP points to the top of the parent stack. - // - R2, R29, R30, and R31 hold their value from the parent context. - // - R5 is the function that should be called. + // - R12 is the function that should be called. // - R4 points to the top of our stack. // - R3 contains the argument to be passed to the function. // - // Create a stack frame and point the frame pointer at it. + // Save the LR of the parent context to the parent stack. "mflr 0", - "stdu 1, -56(1)", - "std 31, 48(1)", - "std 30, 40(1)", - "std 29, 32(1)", - // Save R2 and LR in the link area of the parent frame. - "std 2, 80(1)", - "std 0, 72(1)", - ".cfi_def_cfa 1, 0", - ".cfi_offset r31, 48", - ".cfi_offset r30, 40", - ".cfi_offset r29, 32", - ".cfi_offset r2, 80", - ".cfi_offset lr, 72", + "std 0, 16(1)", + // Allocate a stack frame on the new stack, saving the parent stack pointer + // in the back-chain slot. + "stdu 1, -32(4)", // Switch to the new stack. - "mr 31, 1", "mr 1, 4", - // FIXME: Workaround for P9 and earlier that do not have pc-rel instructions. - "bl 0f", - "0:", - "mflr 6", - "addi 0, 6, 24", - "mtlr 0", + // The actual meanings of the magic bytes are: + // 0x0f: DW_CFA_def_cfa_expression + // 3: byte length of the following DWARF expression + // 0x71 0x00: DW_OP_breg1 (1 + 0) + // 0x06: DW_OP_deref + ".cfi_escape 0x0f, 3, 0x71, 0x00, 0x06", + ".cfi_offset lr, 16", // Call the function pointer. The argument is already in the correct - // register for the function. - "mr 12, 5", + // register for the ABI linkage. "mtctr 12", - "bctr", - // Switch back to the original stack by restoring from the frame pointer, - // then return. - "addi 1, 31, 56", - "ld 31, -8(1)", - "ld 30, -16(1)", - "ld 29, -24(1)", - "ld 2, 24(1)", - "ld 12, 16(1)", - "mtlr 12", + "bctrl", + // Switch back to the original stack. + "ld 1, 0(1)", + // Load the original LR and return. + "ld 0, 16(1)", + "mtlr 0", "blr", ".cfi_endproc", asm_function_end!("stack_call_trampoline"), @@ -265,11 +217,6 @@ extern "C" { fn stack_call_trampoline(arg: *mut u8, sp: StackPointer, f: StackCallFunc); } -/// Sets up the initial state on a stack so that the given function is -/// executed on the first switch to this stack. -/// -/// The given object is written to the stack and its address on the stack is -/// passed as the 3rd argument to the initial function. #[inline] pub unsafe fn init_stack(stack: &impl Stack, func: InitialFunc, obj: T) -> StackPointer { let mut sp = adjusted_stack_base(stack).get(); @@ -287,7 +234,7 @@ pub unsafe fn init_stack(stack: &impl Stack, func: InitialFunc, obj: T) -> // Allocate space on the stack for the initial object, rounding to // STACK_ALIGNMENT. - allocate_obj_on_stack(&mut sp, 16, obj); + allocate_obj_on_stack(&mut sp, 32, obj); // This mirrors the 32-byte link area layout. push(&mut sp, None); @@ -301,13 +248,6 @@ pub unsafe fn init_stack(stack: &impl Stack, func: InitialFunc, obj: T) -> StackPointer::new_unchecked(sp) } -/// This function is used to transfer control to a coroutine along with an -/// argument. A pointer back to our context is stored at a fixed offset from -/// the base of the target stack. -/// -/// When another context switches back to us, we receive the argument they sent -/// as well as the stack pointer of the originating context. This can be `None` -/// if the caller used `switch_and_reset` and can't be returned to. #[inline] pub unsafe fn switch_and_link( arg: EncodedValue, @@ -325,22 +265,26 @@ pub unsafe fn switch_and_link( // DW_CFA_GNU_args_size that may have been set in the current function. cfi_reset_args_size_root!(), + // Save R2 in the slot in the parent stack frame. This must be done here + // to properly support unwinding. + "std 2, 24(1)", + // Read the saved PC from the link area of the coroutine stack and call it. - "ld 12, 16(5)", - "mtctr 12", + "ld 0, 16(5)", + "mtctr 0", "bctrl", - "nop", + + // The unwinder sees this instruction immediately after the call and + // will restore R2 automatically. + "ld 2, 24(1)", // Upon returning from switch_yield or switch_and_reset, our register // state contains the following: - // - R5: parant stack pointer. + // - SP: parant stack pointer. // - R4: The top of the coroutine stack, or 0 if coming from // switch_and_reset. // - R3: The return value from the coroutine. - // Switch back to parent stack and free the saved registers. - "addi 1, 5, 56", - // Pass the argument in R3. inlateout("3") arg => ret_val, @@ -357,59 +301,45 @@ pub unsafe fn switch_and_link( lateout("14") _, lateout("15") _, lateout("16") _, lateout("17") _, lateout("18") _, lateout("19") _, lateout("20") _, lateout("21") _, lateout("22") _, lateout("23") _, lateout("24") _, lateout("25") _, - lateout("26") _, lateout("27") _, lateout("28") _, + lateout("26") _, lateout("27") _, lateout("28") _, lateout("29") _, clobber_abi("C"), ); (ret_val, StackPointer::new(ret_sp)) } -/// This function performs the inverse of `switch_and_link` by returning -/// control to the parent context. -/// -/// This function does not return a stack pointer value for the parent context -/// when it switches back to us. Instead, the stack pointer value for the parent -/// context is available in the parent link on the stack. -// This function must always be inlined because it is very sensitive to the -// CPU's return address predictor. See stack_init_trampoline for more details. -// -// FIXME: Inlining this function fails tests. -//#[inline(always)] +#[inline(always)] pub unsafe fn switch_yield(arg: EncodedValue, parent_link: *mut StackPointer) -> EncodedValue { let ret_val; asm_may_unwind_yield!( - // Save R29, R30, and R31. Ideally this would be done by specifying them as - // clobbers but that is not possible since they are LLVM reserved - // registers. Also save our R2 and LR. - "std 31, -8(1)", // Save Back chain + // Save R2, R30 and R31. Ideally this would be done by specifying them + // as clobbers but that is not possible since they are LLVM reserved + // registers. + "std 31, -8(1)", "std 30, -16(1)", - "std 29, -24(1)", "std 2, 24(1)", - "stdu 1, -64(1)", // The 32-byte link area and the space for R29, R30, R31. // Get the return address. // FIXME: Workaroud for P9 and earlier that do not have pc-rel instructions. "bl 1f", "1:", "mflr 6", - "addi 0, 6, 48", + "addi 6, 6, 0f - 1b", // Save return address in the parent frame. - "std 0, 16(1)", - - // Get the parent stack pointer from the parent link. - "ld 5, 0(4)", + "std 6, 16(1)", // Save our stack pointer to R4. "mr 4, 1", - // Restore R2, R29, R30, R31, and LR from the parent stack. - "ld 29, 32(5)", - "ld 30, 40(5)", - "ld 31, 48(5)", - "ld 2, 80(5)", - "ld 12, 72(5)", // Get the LR. - "mtlr 12", + // Switch to the parent stack from the parent link. + "ld 1, 0(5)", + + // Restore R2, R30, R31, and PC from the parent stack. + "ld 31, -8(1)", + "ld 30, -16(1)", + "ld 0, 16(1)", + "mtlr 0", // DW_CFA_GNU_args_size 0 // @@ -424,39 +354,37 @@ pub unsafe fn switch_yield(arg: EncodedValue, parent_link: *mut StackPointer) -> // Return into the parent context "blr", - "nop", // This gets called by switch_and_link(). At this point our register // state contains the following: // - SP points to the top of the parent stack. // - LR contains the return address in the parent context. - // - R2, R29, R30, and R31 contain their value from the parent context. + // - R2, R30, and R31 contain their value from the parent context. // - R5 points to the top of the coroutine stack. // - R4 points to the base of our stack. // - R3 contains the argument passed from switch_and_link. "0:", - // Push the R2, R29, R30, R31, and LR values of the parent context onto + // This must be the first instruction at the return address for the + // unwinder to properly restore R2. + "ld 2, 24(1)", + + // Save the R30, R31 and LR values of the parent context to // the parent stack. "mflr 0", - "addi 1, 1, -56", - "std 31, 48(1)", - "std 30, 40(1)", - "std 29, 32(1)", - "std 2, 80(1)", - "std 0, 72(1)", + "std 31, -8(1)", + "std 30, -16(1)", + "std 0, 16(1)", // Write the parent stack pointer to the parent link. "std 1, -32(4)", - // Switch to the coroutine stack while popping the saved registers. - "addi 1, 5, 64", + // Switch to the coroutine stack. + "mr 1, 5", - // Load our R2, R29, R30, and R31 values from the coroutine stack. + // Load R30 and R31 values from the coroutine stack. "ld 31, -8(1)", "ld 30, -16(1)", - "ld 29, -24(1)", - "ld 2, 24(1)", // Pass the argument in R3. inlateout("3") arg => ret_val, @@ -469,39 +397,26 @@ pub unsafe fn switch_yield(arg: EncodedValue, parent_link: *mut StackPointer) -> lateout("14") _, lateout("15") _, lateout("16") _, lateout("17") _, lateout("18") _, lateout("19") _, lateout("20") _, lateout("21") _, lateout("22") _, lateout("23") _, lateout("24") _, lateout("25") _, - lateout("26") _, lateout("27") _, lateout("28") _, + lateout("26") _, lateout("27") _, lateout("28") _, lateout("29") _, clobber_abi("C"), ); ret_val } -/// Variant of `switch_yield` used when returning from the initial function in a -/// context. -/// -/// This works by returning a stack pointer value of 0 which prevents the -/// current context from being resumed. There must not be any object left on the -/// stack with pending destructors when this is called. -/// -/// Since the stack is still available at this point, `arg` can safely point to -/// memory on the stack until the parent context frees or reuses the stack. -// This function must always be inlined because it is very sensitive to the -// CPU's return address predictor. See stack_init_trampoline for more details. #[inline(always)] pub unsafe fn switch_and_reset(arg: EncodedValue, parent_link: *mut StackPointer) -> ! { // Most of this code is identical to switch_yield(), refer to the // comments there. Only the differences are commented. asm!( - // Load the parent context's stack pointer. - "ld 5, 0({parent_link})", + // Load the parent context's stack pointer and switch to it. + "ld 1, 0({parent_link})", - // Restore R2, R29, R30, R31, and LR from the parent stack. - "ld 29, 32(5)", - "ld 30, 40(5)", - "ld 31, 48(5)", - "ld 2, 80(5)", - "ld 12, 72(5)", - "mtlr 12", + // Restore R30, R31, and PC from the parent stack. + "ld 31, -8(1)", + "ld 30, -16(1)", + "ld 0, 16(1)", + "mtlr 0", // Return into the parent context "blr", @@ -518,9 +433,6 @@ pub unsafe fn switch_and_reset(arg: EncodedValue, parent_link: *mut StackPointer ) } -/// Variant of `switch_and_link` which runs a function on the coroutine stack -/// instead of resuming the coroutine. This function will throw an exception -/// which will unwind the coroutine stack to its root. #[inline] #[cfg(feature = "asm-unwind")] pub unsafe fn switch_and_throw( @@ -539,33 +451,30 @@ pub unsafe fn switch_and_throw( asm_may_unwind_root!( // Set up a return address. // FIXME: Workaroud for P9 and earlier that do not have pc-rel instructions. - "bl 0f", - "0:", + "bl 1f", + "1:", "mflr 6", - "addi 0, 6, 72", + "addi 6, 6, 0f - 1b", - // Save the parent context onto the parent stack. - "mflr 0", - "stdu 1, -56(1)", - "std 31, 48(1)", - "std 30, 40(1)", - "std 29, 32(1)", - "std 2, 80(1)", - "std 0, 72(1)", + // Save the R2, R30, R31 and LR of the parent context to the parent stack. + "std 6, 16(1)", + "std 31, -8(1)", + "std 30, -16(1)", + "std 2, 24(1)", - // Write the parent stack pointer to the parent link. + // Update the parent link near the base of the coroutine stack. "std 1, -32(4)", - // Switch to the coroutine stack while popping the saved registers. - "addi 1, 5, 64", + // Switch to the coroutine stack. + "mr 1, 5", // Load the coroutine registers, with the saved LR value into LR. - "ld 31, -8(1)", - "ld 30, -16(1)", - "ld 29, -24(1)", - "ld 2, 24(1)", "ld 0, 16(1)", "mtlr 0", + "ld 31, -8(1)", + "ld 30, -16(1)", + // We don't need to load the TOC pointer here, either `throw` already + // uses our TOC or the linker will insert a stub to initialize R2. // DW_CFA_GNU_args_size 0 // @@ -583,9 +492,7 @@ pub unsafe fn switch_and_throw( // Upon returning, our register state is just like a normal return into // switch_and_link(). "0:", - - // Switch back to our stack and free the saved registers. - "addi 1, 5, 56", + "ld 2, 24(1)", // Helper function to trigger stack unwinding. throw = sym throw, @@ -605,7 +512,7 @@ pub unsafe fn switch_and_throw( lateout("14") _, lateout("15") _, lateout("16") _, lateout("17") _, lateout("18") _, lateout("19") _, lateout("20") _, lateout("21") _, lateout("22") _, lateout("23") _, lateout("24") _, lateout("25") _, - lateout("26") _, lateout("27") _, lateout("28") _, + lateout("26") _, lateout("27") _, lateout("28") _, lateout("29") _, clobber_abi("C"), ); @@ -634,8 +541,8 @@ pub unsafe fn drop_initial_obj( /// /// ``` /// # use corosensei::trap::TrapHandlerRegs; -/// # let regs = TrapHandlerRegs { pc: 0, sp: 0, r3: 0, r4: 0, r31: 0, lr: 0 }; -/// let TrapHandlerRegs { pc, sp, r3, r4, r31, lr } = regs; +/// # let regs = TrapHandlerRegs { pc: 0, sp: 0, r3: 0, r4: 0, r12: 0, lr: 0 }; +/// let TrapHandlerRegs { pc, sp, r3, r4, r12, lr } = regs; /// ``` #[allow(missing_docs)] #[derive(Clone, Copy, Debug)] @@ -644,7 +551,7 @@ pub struct TrapHandlerRegs { pub sp: u64, pub r3: u64, pub r4: u64, - pub r31: u64, + pub r12: u64, pub lr: u64, } @@ -659,15 +566,14 @@ pub unsafe fn setup_trap_trampoline( // Everything below this can be overwritten. Write the object to the stack. let mut sp = parent_link; - allocate_obj_on_stack(&mut sp, 16, val); + allocate_obj_on_stack(&mut sp, 32, val); + let val_ptr = sp; - // Space for 32 bytes link area. - push(&mut sp, None); + // Create a back-chain pointing to the parent link. push(&mut sp, None); push(&mut sp, None); push(&mut sp, None); - - let val_ptr = sp; + push(&mut sp, Some(parent_link as StackWord)); // Set up registers for entry into the function. TrapHandlerRegs { @@ -675,7 +581,7 @@ pub unsafe fn setup_trap_trampoline( sp: sp as u64, r3: val_ptr as u64, r4: parent_link as u64, - r31: parent_link as u64, + r12: handler as u64, lr: stack_init_trampoline_return.as_ptr() as u64, } } @@ -692,11 +598,14 @@ pub unsafe fn on_stack(arg: *mut u8, stack: impl Stack, f: StackCallFunc) { asm_may_unwind_root!( // DW_CFA_GNU_args_size 0 cfi_reset_args_size_root!(), + // R2 needs to be saved and restored here because the unwinder + // specifically looks for the "ld" instruction as a sign to restore R2. + "std 2, 24(1)", concat!("bl ", asm_mangle!("stack_call_trampoline")), - "nop", + "ld 2, 24(1)", in("3") arg, in("4") adjusted_stack_base(&stack).get(), - in("5") f, + in("12") f, clobber_abi("C"), ); } diff --git a/src/stack/unix.rs b/src/stack/unix.rs index 7b5e3819..ff600905 100644 --- a/src/stack/unix.rs +++ b/src/stack/unix.rs @@ -50,17 +50,10 @@ impl DefaultStack { return Err(Error::last_os_error()); } - // A 32-byte link area is reserved above the base, as defined by the - // 64-bit ELF PowerPC stack frame layout. - let link_area = if cfg!(target_arch = "powerpc64") { - 32 - } else { - 0 - }; // Create the result here. If the mprotect call fails then this will // be dropped and the memory will be unmapped. let out = Self { - base: StackPointer::new(mmap as usize + mmap_len - link_area).unwrap(), + base: StackPointer::new(mmap as usize + mmap_len).unwrap(), mmap_len, valgrind: ManuallyDrop::new(ValgrindStackRegistration::new( mmap as *mut u8, @@ -95,13 +88,7 @@ impl Drop for DefaultStack { // De-register the stack first. ManuallyDrop::drop(&mut self.valgrind); - // A 32-byte link area is reserved above the base on PowerPC. - let link_area = if cfg!(target_arch = "powerpc64") { - 32 - } else { - 0 - }; - let mmap = self.base.get() - self.mmap_len + link_area; + let mmap = self.base.get() - self.mmap_len; let ret = libc::munmap(mmap as _, self.mmap_len); debug_assert_eq!(ret, 0); } diff --git a/src/tests/coroutine.rs b/src/tests/coroutine.rs index d007da3e..7cd2f137 100644 --- a/src/tests/coroutine.rs +++ b/src/tests/coroutine.rs @@ -54,15 +54,7 @@ fn suspend_and_resume() { } // Linked backtraces are not supported on x86 Windows. -// Linked backtraces are not supported on powerpc64 Linux yet due to the info -// generated by the (nightly) compiler. -#[cfg_attr( - any( - all(windows, target_arch = "x86"), - all(target_arch = "powerpc64", target_os = "linux") - ), - ignore -)] +#[cfg_attr(all(windows, target_arch = "x86"), ignore)] #[test] fn backtrace_traces_to_host() { #[inline(never)] // try to get this to show up in backtraces @@ -368,8 +360,6 @@ fn forward_stack_address() { std::panic::resume_unwind(result.unwrap_err()); } -// Trap handler is not supported on powerpc64 Linux. -#[cfg_attr(all(target_arch = "powerpc64", target_os = "linux"), ignore)] #[test] fn trap_handler() { trap_handler::setup_handler(); @@ -391,8 +381,6 @@ fn trap_handler() { } #[cfg(feature = "unwind")] -// Trap handler is not supported on powerpc64 Linux. -#[cfg_attr(all(target_arch = "powerpc64", target_os = "linux"), ignore)] #[test] #[should_panic = "foobar"] fn trap_handler_panic() { @@ -414,8 +402,6 @@ fn trap_handler_panic() { assert_eq!(coroutine.resume(()), CoroutineResult::Return(42)); } -// Trap handler is not supported on powerpc64 Linux. -#[cfg_attr(all(target_arch = "powerpc64", target_os = "linux"), ignore)] #[test] fn stack_overflow() { trap_handler::setup_handler(); @@ -639,12 +625,12 @@ mod trap_handler { context.uc_mcontext.__gregs[5] = a1; context.uc_mcontext.__gregs[22] = fp; } else if #[cfg(all(target_os = "linux", target_arch = "powerpc64"))] { - let TrapHandlerRegs { pc, sp, r3, r4, r31, lr } = regs; + let TrapHandlerRegs { pc, sp, r3, r4, r12, lr } = regs; (*context.uc_mcontext.regs).nip = pc; (*context.uc_mcontext.regs).gpr[1] = sp; (*context.uc_mcontext.regs).gpr[3] = r3; (*context.uc_mcontext.regs).gpr[4] = r4; - (*context.uc_mcontext.regs).gpr[31] = r31; + (*context.uc_mcontext.regs).gpr[12] = r12; (*context.uc_mcontext.regs).link = lr; } else { compile_error!("Unsupported platform"); diff --git a/src/tests/on_stack.rs b/src/tests/on_stack.rs index 5ac53881..15cd2f4d 100644 --- a/src/tests/on_stack.rs +++ b/src/tests/on_stack.rs @@ -21,15 +21,7 @@ fn smoke() { } // Linked backtraces are not supported on x86 Windows. -// Linked backtraces are not supported on powerpc64 Linux yet due to the info -// generated by the (nightly) compiler. -#[cfg_attr( - any( - all(windows, target_arch = "x86"), - all(target_arch = "powerpc64", target_os = "linux") - ), - ignore -)] +#[cfg_attr(all(windows, target_arch = "x86"), ignore)] #[test] fn backtrace_traces_to_host() { #[inline(never)] // try to get this to show up in backtraces From be2b78535173f4b93c0bf7f81fb1cabcbfb13a48 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 3 Dec 2025 00:59:09 +0000 Subject: [PATCH 4/4] Add powerpc64le-unknown-linux-gnu to README and CI --- .github/workflows/ci.yml | 7 +++++++ README.md | 1 + src/lib.rs | 1 + 3 files changed, 9 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06e2bea3..89df8f56 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,7 @@ jobs: i686-pc-windows-gnu, i686-pc-windows-msvc, loongarch64-unknown-linux-gnu, + powerpc64le-unknown-linux-gnu, ] channel: [stable, nightly] include: @@ -79,3 +80,9 @@ jobs: os: windows-latest - target: loongarch64-unknown-linux-gnu os: ubuntu-latest + - target: powerpc64le-unknown-linux-gnu + os: ubuntu-latest + exclude: + # PowerPC inline asm is still unstable + - target: powerpc64le-unknown-linux-gnu + channel: stable diff --git a/README.md b/README.md index 45055cf5..2bac2ca8 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ This crate currently supports the following targets: | ARM | ✅ | ❌ | ❌ | | RISC-V | ✅ | ❌ | ❌ | | LoongArch64 | ✅ | ❌ | ❌ | +| PowerPC64 | ✅ | ❌ | ❌ | \* Linked backtraces are not supported on x86 Windows. diff --git a/src/lib.rs b/src/lib.rs index d1cda0fc..081dbf0d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,6 +79,7 @@ //! | ARM | ✅ | ❌ | ❌ | //! | RISC-V | ✅ | ❌ | ❌ | //! | LoongArch64 | ✅ | ❌ | ❌ | +//! | PowerPC64 | ✅ | ❌ | ❌ | //! //! *\* Linked backtraces are not supported on x86 Windows.* //!