From 5ca7464c00d5e44de8a1f3ad44c6bca4975c9ac2 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 00:49:13 +0200 Subject: [PATCH 01/27] wip --- examples/cpp/pyperf/CMakeLists.txt | 3 +-- examples/cpp/pyperf/PyPerfBPFProgram.cc | 27 +++++++++++++++---- examples/cpp/pyperf/PyPerfCollapsedPrinter.cc | 6 ++--- examples/cpp/pyperf/PyPerfType.h | 6 ++--- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/examples/cpp/pyperf/CMakeLists.txt b/examples/cpp/pyperf/CMakeLists.txt index 271ff80a6e99..a2c16123d4e4 100644 --- a/examples/cpp/pyperf/CMakeLists.txt +++ b/examples/cpp/pyperf/CMakeLists.txt @@ -17,9 +17,8 @@ add_executable(PyPerf PyPerfVersion.cc PyPerfProc.cc PyOffsets.cc - PyPerfNativeStackTrace.cc ) -target_link_libraries(PyPerf pthread libunwind-ptrace.a libunwind-x86_64.a libunwind.a lzma) +target_link_libraries(PyPerf pthread lzma) if(NOT CMAKE_USE_LIBBPF_PACKAGE) target_link_libraries(PyPerf bcc-static) else() diff --git a/examples/cpp/pyperf/PyPerfBPFProgram.cc b/examples/cpp/pyperf/PyPerfBPFProgram.cc index bd320922b678..04306bc0af65 100644 --- a/examples/cpp/pyperf/PyPerfBPFProgram.cc +++ b/examples/cpp/pyperf/PyPerfBPFProgram.cc @@ -215,13 +215,13 @@ get_task_thread_id(struct task_struct const *task, enum pthreads_impl pthreads_i // For glibc, corresponds to THREAD_SELF in "tls.h" in glibc source. // For musl, see definition of `__pthread_self`. -#ifdef __x86_64__ int ret; uint64_t fsbase; // HACK: Usually BCC would translate a deref of the field into `read_kernel` for us, but it // doesn't detect it due to the macro (because it transforms before preprocessing). bpf_probe_read_kernel(&fsbase, sizeof(fsbase), (u8*)task + FS_OFS); +#ifdef __x86_64__ switch (pthreads_impl) { case PTI_GLIBC: // 0x10 = offsetof(tcbhead_t, self) @@ -238,16 +238,33 @@ get_task_thread_id(struct task_struct const *task, enum pthreads_impl pthreads_i // driver passed bad value return ERROR_INVALID_PTHREADS_IMPL; } +#elif defined(__aarch64__) + switch (pthreads_impl) { + case PTI_GLIBC: + // TODO const bad + ret = fsbase - 0x6f0; + break; + + case PTI_MUSL: + // TODO ensure really same as x86 + // __pthread_self / __get_tp reads %fs:0x0 + // which corresponds to the field "self" in struct pthread + ret = bpf_probe_read_user(thread_id, sizeof(*thread_id), (void *)fsbase); + break; + + default: + // driver passed bad value + return ERROR_INVALID_PTHREADS_IMPL; + } +#else +#error "Unsupported platform" +#endif // __x86_64__ if (ret < 0) { return ERROR_BAD_FSBASE; } return ERROR_NONE; - -#else // __x86_64__ -#error "Unsupported platform" -#endif // __x86_64__ } // this function is trivial, but we need to do map lookup in separate function, diff --git a/examples/cpp/pyperf/PyPerfCollapsedPrinter.cc b/examples/cpp/pyperf/PyPerfCollapsedPrinter.cc index 12d3b1df6a03..e2b6090d18b4 100644 --- a/examples/cpp/pyperf/PyPerfCollapsedPrinter.cc +++ b/examples/cpp/pyperf/PyPerfCollapsedPrinter.cc @@ -132,14 +132,14 @@ void PyPerfCollapsedPrinter::processSamples( } } } - +/* nativeStackErrors += static_cast(sample.nativeStack.error_occured()); auto native_symbols = sample.nativeStack.get_stack_symbol(); for (auto it = native_symbols.crbegin(); it != native_symbols.crend(); ++it) { auto sym = *it; std::fprintf(output_file, ";%s_[pn]", sym.c_str()); } - +*/ if (sample.kernelStackId > 0) { auto symbols = kernelStacks.get_stack_symbol(sample.kernelStackId, -1); for (auto it = symbols.crbegin(); it != symbols.crend(); ++it) { @@ -161,7 +161,7 @@ void PyPerfCollapsedPrinter::processSamples( std::fprintf(stderr, "%d Python symbol errors\n", symbolErrors); std::fprintf(stderr, "%d times Python symbol lost\n", lostSymbols); std::fprintf(stderr, "%d kernel stack errors\n", kernelStackErrors); - std::fprintf(stderr, "%d native stack errors\n", nativeStackErrors); + // std::fprintf(stderr, "%d native stack errors\n", nativeStackErrors); std::fprintf(stderr, "%d errors\n", errors); if (!output_.empty()) { diff --git a/examples/cpp/pyperf/PyPerfType.h b/examples/cpp/pyperf/PyPerfType.h index 893f9eded730..63d2a2710219 100644 --- a/examples/cpp/pyperf/PyPerfType.h +++ b/examples/cpp/pyperf/PyPerfType.h @@ -231,7 +231,7 @@ struct PyPerfSample { uint8_t stackStatus; int32_t kernelStackId; std::vector pyStackIds; - NativeStackTrace nativeStack; + // NativeStackTrace nativeStack; explicit PyPerfSample(const Event* raw, int rawSize) : pid(raw->pid), @@ -240,9 +240,7 @@ struct PyPerfSample { errorCode(raw->error_code), stackStatus(raw->stack_status), kernelStackId(raw->kernel_stack_id), - pyStackIds(raw->stack, raw->stack + raw->stack_len), - nativeStack(raw->pid, raw->raw_user_stack, raw->user_stack_len, - raw->user_ip, raw->user_sp) {} + pyStackIds(raw->stack, raw->stack + raw->stack_len) {} }; } // namespace pyperf From 1d502e0ffb93bbfec6d77dddeb7668f0a90e6810 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 01:19:33 +0200 Subject: [PATCH 02/27] More work --- examples/cpp/pyperf/PyPerfBPFProgram.cc | 68 ++++++++++++++++++------- src/cc/compat/linux/types.h | 52 ++++++++++++++++++- 2 files changed, 100 insertions(+), 20 deletions(-) diff --git a/examples/cpp/pyperf/PyPerfBPFProgram.cc b/examples/cpp/pyperf/PyPerfBPFProgram.cc index 04306bc0af65..39bd1d74ae55 100644 --- a/examples/cpp/pyperf/PyPerfBPFProgram.cc +++ b/examples/cpp/pyperf/PyPerfBPFProgram.cc @@ -172,6 +172,7 @@ struct sample_state { uintptr_t constant_buffer_addr; uintptr_t interp_head; uintptr_t thread_state; + enum pthreads_impl pthreads_impl; struct struct_offsets offsets; uint32_t cur_cpu; uint32_t symbol_counter; @@ -267,6 +268,44 @@ get_task_thread_id(struct task_struct const *task, enum pthreads_impl pthreads_i return ERROR_NONE; } +static __always_inline int compare_task_thread_id(uint64_t a, uint64_t b, enum pthreads_impl pthreads_impl) { +#if defined(__x86_64__) + (void)pthreads_impl; + return a == b; +#elif defined(__aarch64__) + switch (pthreads_impl) { + case PTI_GLIBC: + return (int64_t)(a - b) < 0x500; + + case PTI_MUSL: + return a == b; + } +#endif +} + +static __always_inline int user_mode(struct pt_regs *regs) { +#if defined(__x86_64__) + // ebpf doesn't allow direct access to regs->cs, so we need to copy it + int cs; + bpf_probe_read_kernel(&cs, sizeof(cs), &(regs->cs)); + return cs & 3; +#elif defined(__aarch64__) + return ((regs)->pstate & PSR_MODE_MASK) == PSR_MODE_EL0t; +#endif +} + +static __always_inline struct pt_regs *task_pt_regs_ptr(struct task_struct const *const task) { + unsigned long stack; + bpf_probe_read_kernel(&stack, sizeof(stack), (void*)((unsigned long)task + STACK_OFS)); +#if defined(__x86_64__) + // This is equivalent to `task_pt_regs(task)` for x86. Macros doesn't + // work properly on bcc, so we need to re-implement. + return (struct pt_regs *)(stack + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING) - 1; +#elif defined(__aarch64__) + return (struct pt_regs *)(stack + THREAD_SIZE) - 1; +#endif +} + // this function is trivial, but we need to do map lookup in separate function, // because BCC doesn't allow direct map calls (including lookups) from inside // a macro (which we want to do in GET_STATE() macro below) @@ -326,37 +365,29 @@ on_event(struct pt_regs* ctx) { // Get raw native user stack struct pt_regs user_regs; - // ebpf doesn't allow direct access to ctx->cs, so we need to copy it - int cs; - bpf_probe_read_kernel(&cs, sizeof(cs), &(ctx->cs)); - // Are we in user mode? - if (cs & 3) { + if (user_mode(ctx)) { // Yes - use the registers context given to the BPF program user_regs = *ctx; } else { // No - use the registers context of usermode, that is stored on the stack. - - // The third argument is equivalent to `task_pt_regs(task)` for x86. Macros doesn't - // work properly on bcc, so we need to re-implement. bpf_probe_read_kernel( &user_regs, sizeof(user_regs), - // Note - BCC emits an implicit bpf_probe_read_kernel() here (for the deref of 'task'). - // I don't like the implicitness (and it will be something we'll need to fix if we're ever - // to move from BCC). Meanwhile, I tried to change it to be explicit but the BPF assembly - // varies too much so I prefer to avoid this change now ;( - (struct pt_regs *)(*(unsigned long*)((unsigned long)task + STACK_OFS) + THREAD_SIZE - - TOP_OF_KERNEL_STACK_PADDING) - 1); + task_pt_regs_ptr(task)); } + event->user_stack_len = 0; +#if defined(__x86_64__) event->user_sp = user_regs.sp; event->user_ip = user_regs.ip; - event->user_stack_len = 0; - // Subtract 128 from sp for x86-ABI red zone uintptr_t top_of_stack = user_regs.sp - 128; - +#elif defined(__aarch64__) + event->user_sp = user_regs.sp; + event->user_ip = user_regs.pc; + uintptr_t top_of_stack = user_regs.sp; +#endif // Copy one page at the time - if one fails we don't want to lose the others int i; #pragma unroll @@ -417,6 +448,7 @@ on_event(struct pt_regs* ctx) { state->offsets = pid_data->offsets; state->interp_head = pid_data->interp; state->constant_buffer_addr = pid_data->globals.constant_buffer; + state->pthreads_impl = pid_data->pthreads_impl; // Read pointer to first PyThreadState in thread states list: bpf_probe_read_user( @@ -454,7 +486,7 @@ get_thread_state(struct pt_regs *ctx) { for (int i = 0; i < THREAD_STATES_PER_PROG; ++i) { // Read the PyThreadState::thread_id to which this PyThreadState belongs: thread_id = read_tstate_thread_id(state->thread_state, &state->offsets); - if (thread_id == state->current_thread_id) { + if (compare_task_thread_id(thread_id, state->current_thread_id, state->pthreads_impl)) { goto found; } else if (unlikely(thread_id == BAD_THREAD_ID)) { diff --git a/src/cc/compat/linux/types.h b/src/cc/compat/linux/types.h index 44bccb936042..1875a7ea4417 100644 --- a/src/cc/compat/linux/types.h +++ b/src/cc/compat/linux/types.h @@ -51,10 +51,14 @@ typedef int bool; #define NULL ((void*)0) #define ENOSPC 28 + +#if defined(__x86_64__) + #define PAGE_SIZE 4096 #define PAGE_MASK (~(PAGE_SIZE-1)) #define THREAD_SIZE_ORDER 2 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define TOP_OF_KERNEL_STACK_PADDING 0 struct pt_regs { /* @@ -91,6 +95,52 @@ struct pt_regs { /* top of stack page */ }; +#elif defined(__aarch64__) + +#define PSR_MODE_EL0t 0x00000000 +#define PSR_MODE_MASK 0x0000000f + +#define PAGE_SIZE 4096 // on all systems I saw, CONFIG_ARM64_PAGE_SHIFT=12 +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define KASAN_THREAD_SHIFT 0 +#define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) +// if CONFIG_VMAP_STACK is enabled & PAGE_SHIFT is 12, then this path gets selected: +#define THREAD_SHIFT MIN_THREAD_SHIFT +#define THREAD_SIZE (1UL << THREAD_SHIFT) + +// this changes! +// this copy is from 5.16.0 +struct pt_regs { + union { + struct user_pt_regs user_regs; + struct { + u64 regs[31]; + u64 sp; + u64 pc; + u64 pstate; + }; + }; + u64 orig_x0; +#ifdef __AARCH64EB__ + u32 unused2; + s32 syscallno; +#else + s32 syscallno; + u32 unused2; +#endif + u64 sdei_ttbr1; + /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ + u64 pmr_save; + u64 stackframe[2]; + + /* Only valid for some EL1 exceptions. */ + u64 lockdep_hardirqs; + u64 exit_rcu; +}; + +#endif + # ifndef likely # define likely(x) __builtin_expect(x, 1) # endif @@ -312,8 +362,6 @@ unsigned long __rounddown_pow_of_two(unsigned long n) __roundup_pow_of_two(n) \ ) -#define TOP_OF_KERNEL_STACK_PADDING 0 - // END COPIED FROM LINUX #endif // _UAPI__LINUX_LINUX_H__ )********" From e9632c3e9d7a1ad2d58e1abb6cf86ccfdaa81549 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 02:08:21 +0200 Subject: [PATCH 03/27] add missing struct --- src/cc/compat/linux/types.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/cc/compat/linux/types.h b/src/cc/compat/linux/types.h index 1875a7ea4417..ae372dc1234b 100644 --- a/src/cc/compat/linux/types.h +++ b/src/cc/compat/linux/types.h @@ -109,6 +109,13 @@ struct pt_regs { #define THREAD_SHIFT MIN_THREAD_SHIFT #define THREAD_SIZE (1UL << THREAD_SHIFT) +struct user_pt_regs { + __u64 regs[31]; + __u64 sp; + __u64 pc; + __u64 pstate; +}; + // this changes! // this copy is from 5.16.0 struct pt_regs { From cf3405b7f35f910b5495f4804cfe65630a2688b6 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 02:14:56 +0200 Subject: [PATCH 04/27] no direct access to ctx. sigh --- examples/cpp/pyperf/PyPerfBPFProgram.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/cpp/pyperf/PyPerfBPFProgram.cc b/examples/cpp/pyperf/PyPerfBPFProgram.cc index 39bd1d74ae55..4c874af425dd 100644 --- a/examples/cpp/pyperf/PyPerfBPFProgram.cc +++ b/examples/cpp/pyperf/PyPerfBPFProgram.cc @@ -284,13 +284,15 @@ static __always_inline int compare_task_thread_id(uint64_t a, uint64_t b, enum p } static __always_inline int user_mode(struct pt_regs *regs) { + // ebpf doesn't allow direct access to regs (the ctx), so we need to copy it #if defined(__x86_64__) - // ebpf doesn't allow direct access to regs->cs, so we need to copy it int cs; bpf_probe_read_kernel(&cs, sizeof(cs), &(regs->cs)); return cs & 3; #elif defined(__aarch64__) - return ((regs)->pstate & PSR_MODE_MASK) == PSR_MODE_EL0t; + u64 pstate; + bpf_probe_read_kernel(&pstate, sizeof(pstate), &(regs->pstate)); + return (pstate & PSR_MODE_MASK) == PSR_MODE_EL0t; #endif } From b5d903fb4bef472dfae4a36757752142b9094815 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 02:22:54 +0200 Subject: [PATCH 05/27] enable back native stacks --- examples/cpp/pyperf/CMakeLists.txt | 12 +++++++++++- examples/cpp/pyperf/PyPerfCollapsedPrinter.cc | 6 +++--- examples/cpp/pyperf/PyPerfType.h | 6 ++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/examples/cpp/pyperf/CMakeLists.txt b/examples/cpp/pyperf/CMakeLists.txt index a2c16123d4e4..004c5f461862 100644 --- a/examples/cpp/pyperf/CMakeLists.txt +++ b/examples/cpp/pyperf/CMakeLists.txt @@ -17,8 +17,18 @@ add_executable(PyPerf PyPerfVersion.cc PyPerfProc.cc PyOffsets.cc + PyPerfNativeStackTrace.cc ) -target_link_libraries(PyPerf pthread lzma) +target_link_libraries(PyPerf pthread libunwind-ptrace.a libunwind.a lzma) + +execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE) + +if(${ARCHITECTURE} STREQUAL "x86_64") + target_link_libraries(PyPerf libunwind-x86_64.a) +elseif(${ARCHITECTURE} STREQUAL "aarch64") + target_link_libraries(PyPerf libunwind-x86_64.a) +endif() + if(NOT CMAKE_USE_LIBBPF_PACKAGE) target_link_libraries(PyPerf bcc-static) else() diff --git a/examples/cpp/pyperf/PyPerfCollapsedPrinter.cc b/examples/cpp/pyperf/PyPerfCollapsedPrinter.cc index e2b6090d18b4..12d3b1df6a03 100644 --- a/examples/cpp/pyperf/PyPerfCollapsedPrinter.cc +++ b/examples/cpp/pyperf/PyPerfCollapsedPrinter.cc @@ -132,14 +132,14 @@ void PyPerfCollapsedPrinter::processSamples( } } } -/* + nativeStackErrors += static_cast(sample.nativeStack.error_occured()); auto native_symbols = sample.nativeStack.get_stack_symbol(); for (auto it = native_symbols.crbegin(); it != native_symbols.crend(); ++it) { auto sym = *it; std::fprintf(output_file, ";%s_[pn]", sym.c_str()); } -*/ + if (sample.kernelStackId > 0) { auto symbols = kernelStacks.get_stack_symbol(sample.kernelStackId, -1); for (auto it = symbols.crbegin(); it != symbols.crend(); ++it) { @@ -161,7 +161,7 @@ void PyPerfCollapsedPrinter::processSamples( std::fprintf(stderr, "%d Python symbol errors\n", symbolErrors); std::fprintf(stderr, "%d times Python symbol lost\n", lostSymbols); std::fprintf(stderr, "%d kernel stack errors\n", kernelStackErrors); - // std::fprintf(stderr, "%d native stack errors\n", nativeStackErrors); + std::fprintf(stderr, "%d native stack errors\n", nativeStackErrors); std::fprintf(stderr, "%d errors\n", errors); if (!output_.empty()) { diff --git a/examples/cpp/pyperf/PyPerfType.h b/examples/cpp/pyperf/PyPerfType.h index 63d2a2710219..893f9eded730 100644 --- a/examples/cpp/pyperf/PyPerfType.h +++ b/examples/cpp/pyperf/PyPerfType.h @@ -231,7 +231,7 @@ struct PyPerfSample { uint8_t stackStatus; int32_t kernelStackId; std::vector pyStackIds; - // NativeStackTrace nativeStack; + NativeStackTrace nativeStack; explicit PyPerfSample(const Event* raw, int rawSize) : pid(raw->pid), @@ -240,7 +240,9 @@ struct PyPerfSample { errorCode(raw->error_code), stackStatus(raw->stack_status), kernelStackId(raw->kernel_stack_id), - pyStackIds(raw->stack, raw->stack + raw->stack_len) {} + pyStackIds(raw->stack, raw->stack + raw->stack_len), + nativeStack(raw->pid, raw->raw_user_stack, raw->user_stack_len, + raw->user_ip, raw->user_sp) {} }; } // namespace pyperf From 5cf535dd2c955c656a28ab2e0c31d1611ba441d2 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 02:24:40 +0200 Subject: [PATCH 06/27] fix lib name --- examples/cpp/pyperf/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/CMakeLists.txt b/examples/cpp/pyperf/CMakeLists.txt index 004c5f461862..8ab6a6ac5e62 100644 --- a/examples/cpp/pyperf/CMakeLists.txt +++ b/examples/cpp/pyperf/CMakeLists.txt @@ -26,7 +26,7 @@ execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE if(${ARCHITECTURE} STREQUAL "x86_64") target_link_libraries(PyPerf libunwind-x86_64.a) elseif(${ARCHITECTURE} STREQUAL "aarch64") - target_link_libraries(PyPerf libunwind-x86_64.a) + target_link_libraries(PyPerf libunwind-aarch64.a) endif() if(NOT CMAKE_USE_LIBBPF_PACKAGE) From 918fc3138beac21262f5f3b96f92b37b97837bf0 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 02:35:41 +0200 Subject: [PATCH 07/27] try --- examples/cpp/pyperf/PyPerfBPFProgram.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/PyPerfBPFProgram.cc b/examples/cpp/pyperf/PyPerfBPFProgram.cc index 4c874af425dd..518e807562d4 100644 --- a/examples/cpp/pyperf/PyPerfBPFProgram.cc +++ b/examples/cpp/pyperf/PyPerfBPFProgram.cc @@ -370,7 +370,8 @@ on_event(struct pt_regs* ctx) { // Are we in user mode? if (user_mode(ctx)) { // Yes - use the registers context given to the BPF program - user_regs = *ctx; + // user_regs = *ctx; + bpf_probe_read_kernel(&user_regs, sizeof(user_regs), ctx); } else { // No - use the registers context of usermode, that is stored on the stack. From 95fac2db6e51d3088e076b38eed1d072c8d2d43d Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 02:49:32 +0200 Subject: [PATCH 08/27] patch --- examples/cpp/pyperf/PyOffsets.cc | 58 +++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/examples/cpp/pyperf/PyOffsets.cc b/examples/cpp/pyperf/PyOffsets.cc index cf813ea7b4ae..6fb2ea218871 100644 --- a/examples/cpp/pyperf/PyOffsets.cc +++ b/examples/cpp/pyperf/PyOffsets.cc @@ -29,6 +29,7 @@ There are a couple of exceptions: 3. PyThreadState.thread - this field's name is "thread_id" in some Python versions. */ +#if defined(__x86_64__) extern const struct struct_offsets kPy27OffsetConfig = { .PyObject = { .ob_type = 8 @@ -229,19 +230,68 @@ extern const struct struct_offsets kPy310OffsetConfig = { }, }; +#elif defined(__aarch64__) + +extern const struct struct_offsets kPy37OffsetConfig = { + .PyObject = { + .ob_type = 8 + }, + .String = { + .data = 48, // offsetof(PyStringObject, ob_sval) + .size = -1, // offsetof(PyVarObject, ob_size) + }, + .PyTypeObject = { + .tp_name = 24 + }, + .PyThreadState = { + .next = 8, + .interp = 16, + .frame = 24, + .thread = 176, + }, + .PyInterpreterState = { + .tstate_head = 8, + }, + .PyRuntimeState = { + .interp_main = 32, // N/A + }, + .PyFrameObject = { + .f_back = 24, + .f_code = 32, + .f_lineno = 108, + .f_localsplus = 360, + }, + .PyCodeObject = { + .co_nlocals = 24, + .co_filename = 96, + .co_name = 104, + .co_varnames = 64, + .co_firstlineno = 36, + }, + .PyTupleObject = { + .ob_item = 24 + }, + .PyCellObject = { + .ob_ref = 16 + } +}; + +#endif + // List of mappings from Python 3 minor versions to offsets. `get_offsets` depends on this list // being sorted in ascending order when it searches through it. const std::vector> python3Versions = { - {{3,6,0}, kPy36OffsetConfig}, + // {{3,6,0}, kPy36OffsetConfig}, {{3,7,0}, kPy37OffsetConfig}, - {{3,8,0}, kPy38OffsetConfig}, + // {{3,8,0}, kPy38OffsetConfig}, + // TODO check on aarch64 // 3.9 is same as 3.8 - {{3,10,0}, kPy310OffsetConfig}, + // {{3,10,0}, kPy310OffsetConfig}, }; const struct_offsets& get_offsets(version& version) { if (version.major == 2) { - return kPy27OffsetConfig; + return kPy37OffsetConfig; } else { // Find offsets for Python 3 version: From e4de0ff3ff8174377ea08ef049dfec60418608da Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 02:52:44 +0200 Subject: [PATCH 09/27] wip --- examples/cpp/pyperf/PyOffsets.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/cpp/pyperf/PyOffsets.cc b/examples/cpp/pyperf/PyOffsets.cc index 6fb2ea218871..6051cf43ce68 100644 --- a/examples/cpp/pyperf/PyOffsets.cc +++ b/examples/cpp/pyperf/PyOffsets.cc @@ -262,7 +262,6 @@ extern const struct struct_offsets kPy37OffsetConfig = { .f_localsplus = 360, }, .PyCodeObject = { - .co_nlocals = 24, .co_filename = 96, .co_name = 104, .co_varnames = 64, From 8347ece735029404c743806a2f1325bc35352673 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 02:55:19 +0200 Subject: [PATCH 10/27] wip --- examples/cpp/pyperf/PyOffsets.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/examples/cpp/pyperf/PyOffsets.cc b/examples/cpp/pyperf/PyOffsets.cc index 6051cf43ce68..a10ab1ce2c01 100644 --- a/examples/cpp/pyperf/PyOffsets.cc +++ b/examples/cpp/pyperf/PyOffsets.cc @@ -237,8 +237,8 @@ extern const struct struct_offsets kPy37OffsetConfig = { .ob_type = 8 }, .String = { - .data = 48, // offsetof(PyStringObject, ob_sval) - .size = -1, // offsetof(PyVarObject, ob_size) + .data = 48, + .size = -1, }, .PyTypeObject = { .tp_name = 24 @@ -253,7 +253,7 @@ extern const struct struct_offsets kPy37OffsetConfig = { .tstate_head = 8, }, .PyRuntimeState = { - .interp_main = 32, // N/A + .interp_main = 32, }, .PyFrameObject = { .f_back = 24, @@ -270,9 +270,6 @@ extern const struct struct_offsets kPy37OffsetConfig = { .PyTupleObject = { .ob_item = 24 }, - .PyCellObject = { - .ob_ref = 16 - } }; #endif From fd3cd7510161a2b7b02a5aff234e4a71b8e8486c Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 03:05:51 +0200 Subject: [PATCH 11/27] wip --- examples/cpp/pyperf/PyOffsets.cc | 125 ++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 3 deletions(-) diff --git a/examples/cpp/pyperf/PyOffsets.cc b/examples/cpp/pyperf/PyOffsets.cc index a10ab1ce2c01..ba2195e98cce 100644 --- a/examples/cpp/pyperf/PyOffsets.cc +++ b/examples/cpp/pyperf/PyOffsets.cc @@ -231,6 +231,45 @@ extern const struct struct_offsets kPy310OffsetConfig = { }; #elif defined(__aarch64__) +extern const struct struct_offsets kPy36OffsetConfig = { + .PyObject = { + .ob_type = 8 + }, + .String = { + .data = 48, + .size = 16, + }, + .PyTypeObject = { + .tp_name = 24 + }, + .PyThreadState = { + .next = 8, + .interp = 16, + .frame = 24, + .thread = 152, + }, + .PyInterpreterState = { + .tstate_head = 8, + }, + .PyRuntimeState = { + .interp_main = 32, + }, + .PyFrameObject = { + .f_back = 24, + .f_code = 32, + .f_lineno = 124, + .f_localsplus = 376, + }, + .PyCodeObject = { + .co_filename = 96, + .co_name = 104, + .co_varnames = 64, + .co_firstlineno = 36, + }, + .PyTupleObject = { + .ob_item = 24 + }, +}; extern const struct struct_offsets kPy37OffsetConfig = { .PyObject = { @@ -272,17 +311,97 @@ extern const struct struct_offsets kPy37OffsetConfig = { }, }; +extern const struct struct_offsets kPy38ffsetConfig = { + .PyObject = { + .ob_type = 8 + }, + .String = { + .data = 48, + .size = -1, + }, + .PyTypeObject = { + .tp_name = 24 + }, + .PyThreadState = { + .next = 8, + .interp = 16, + .frame = 24, + .thread = 176, + }, + .PyInterpreterState = { + .tstate_head = 8, + }, + .PyRuntimeState = { + .interp_main = 32, + }, + .PyFrameObject = { + .f_back = 24, + .f_code = 32, + .f_lineno = 108, + .f_localsplus = 360, + }, + .PyCodeObject = { + .co_filename = 96, + .co_name = 104, + .co_varnames = 64, + .co_firstlineno = 36, + }, + .PyTupleObject = { + .ob_item = 24 + }, +}; + +extern const struct struct_offsets kPy310OffsetConfig = { + .PyObject = { + .ob_type = 8 + }, + .String = { + .data = 48, + .size = -1, + }, + .PyTypeObject = { + .tp_name = 24 + }, + .PyThreadState = { + .next = 8, + .interp = 16, + .frame = 24, + .thread = 176, + }, + .PyInterpreterState = { + .tstate_head = 8, + }, + .PyRuntimeState = { + .interp_main = 32, + }, + .PyFrameObject = { + .f_back = 24, + .f_code = 32, + .f_lineno = 108, + .f_localsplus = 360, + }, + .PyCodeObject = { + .co_filename = 96, + .co_name = 104, + .co_varnames = 64, + .co_firstlineno = 36, + }, + .PyTupleObject = { + .ob_item = 24 + }, +}; + #endif // List of mappings from Python 3 minor versions to offsets. `get_offsets` depends on this list // being sorted in ascending order when it searches through it. const std::vector> python3Versions = { - // {{3,6,0}, kPy36OffsetConfig}, + {{3,6,0}, kPy36OffsetConfig}, {{3,7,0}, kPy37OffsetConfig}, - // {{3,8,0}, kPy38OffsetConfig}, + {{3,8,0}, kPy38OffsetConfig}, // TODO check on aarch64 // 3.9 is same as 3.8 - // {{3,10,0}, kPy310OffsetConfig}, + {{3,10,0}, kPy310OffsetConfig}, }; const struct_offsets& get_offsets(version& version) { From b33dd19493fc7fc3c5c76935f17b2a67c7b30c9c Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 03:09:44 +0200 Subject: [PATCH 12/27] wip --- examples/cpp/pyperf/PyOffsets.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/PyOffsets.cc b/examples/cpp/pyperf/PyOffsets.cc index ba2195e98cce..aba9af5c48c9 100644 --- a/examples/cpp/pyperf/PyOffsets.cc +++ b/examples/cpp/pyperf/PyOffsets.cc @@ -311,7 +311,7 @@ extern const struct struct_offsets kPy37OffsetConfig = { }, }; -extern const struct struct_offsets kPy38ffsetConfig = { +extern const struct struct_offsets kPy38OffsetConfig = { .PyObject = { .ob_type = 8 }, From 91a4accb4c114a6b2185c1622b92437f16a09acc Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 1 Feb 2022 03:25:31 +0200 Subject: [PATCH 13/27] wip --- examples/cpp/pyperf/PyOffsets.cc | 43 +++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/PyOffsets.cc b/examples/cpp/pyperf/PyOffsets.cc index aba9af5c48c9..898c8981a58d 100644 --- a/examples/cpp/pyperf/PyOffsets.cc +++ b/examples/cpp/pyperf/PyOffsets.cc @@ -231,6 +231,47 @@ extern const struct struct_offsets kPy310OffsetConfig = { }; #elif defined(__aarch64__) + +extern const struct struct_offsets kPy27OffsetConfig = { + .PyObject = { + .ob_type = 8 + }, + .String = { + .data = 36, // offsetof(PyStringObject, ob_sval) + .size = 16, // offsetof(PyVarObject, ob_size) + }, + .PyTypeObject = { + .tp_name = 24 + }, + .PyThreadState = { + .next = 0, + .interp = 8, + .frame = 16, + .thread = 144, + }, + .PyInterpreterState = { + .tstate_head = 8, + }, + .PyRuntimeState = { + .interp_main = -1, // N/A + }, + .PyFrameObject = { + .f_back = 24, + .f_code = 32, + .f_lineno = 124, + .f_localsplus = 376, + }, + .PyCodeObject = { + .co_filename = 80, + .co_name = 88, + .co_varnames = 56, + .co_firstlineno = 96, + }, + .PyTupleObject = { + .ob_item = 24 + } +}; + extern const struct struct_offsets kPy36OffsetConfig = { .PyObject = { .ob_type = 8 @@ -406,7 +447,7 @@ const std::vector> python3Versions = { const struct_offsets& get_offsets(version& version) { if (version.major == 2) { - return kPy37OffsetConfig; + return kPy27OffsetConfig; } else { // Find offsets for Python 3 version: From aba6bdc0dcf089128b5d74f4b30ee0d86b56567b Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Thu, 3 Feb 2022 01:25:14 +0200 Subject: [PATCH 14/27] wip --- examples/cpp/pyperf/PyPerfBPFProgram.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/cpp/pyperf/PyPerfBPFProgram.cc b/examples/cpp/pyperf/PyPerfBPFProgram.cc index 518e807562d4..bb39fada60bf 100644 --- a/examples/cpp/pyperf/PyPerfBPFProgram.cc +++ b/examples/cpp/pyperf/PyPerfBPFProgram.cc @@ -370,8 +370,8 @@ on_event(struct pt_regs* ctx) { // Are we in user mode? if (user_mode(ctx)) { // Yes - use the registers context given to the BPF program - // user_regs = *ctx; bpf_probe_read_kernel(&user_regs, sizeof(user_regs), ctx); + // user_regs = *ctx; } else { // No - use the registers context of usermode, that is stored on the stack. @@ -419,9 +419,10 @@ on_event(struct pt_regs* ctx) { // Get PyThreadState of the thread that currently holds the GIL uintptr_t _PyThreadState_Current = 0; - bpf_probe_read_user( + int x = bpf_probe_read_user( &_PyThreadState_Current, sizeof(_PyThreadState_Current), (void*)pid_data->globals._PyThreadState_Current); + bpf_trace_printk("read addr %llx ret %d\n", pid_data->globals._PyThreadState_Current, ret); if (_PyThreadState_Current == 0) { // The GIL is released, we can only get native stacks // until it is held again. From bb466dfa4ab315e1dd457b708090cc84552e01e7 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Thu, 3 Feb 2022 01:34:15 +0200 Subject: [PATCH 15/27] wip --- examples/cpp/pyperf/PyPerfBPFProgram.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/PyPerfBPFProgram.cc b/examples/cpp/pyperf/PyPerfBPFProgram.cc index bb39fada60bf..35733054f5bd 100644 --- a/examples/cpp/pyperf/PyPerfBPFProgram.cc +++ b/examples/cpp/pyperf/PyPerfBPFProgram.cc @@ -422,7 +422,7 @@ on_event(struct pt_regs* ctx) { int x = bpf_probe_read_user( &_PyThreadState_Current, sizeof(_PyThreadState_Current), (void*)pid_data->globals._PyThreadState_Current); - bpf_trace_printk("read addr %llx ret %d\n", pid_data->globals._PyThreadState_Current, ret); + bpf_trace_printk("read addr %llx ret %d\n", pid_data->globals._PyThreadState_Current, x); if (_PyThreadState_Current == 0) { // The GIL is released, we can only get native stacks // until it is held again. From c7461900706a6cc710c8ba5aa47a9282c5ce9cfb Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Thu, 3 Feb 2022 08:55:11 +0200 Subject: [PATCH 16/27] wip --- examples/cpp/pyperf/PyPerfBPFProgram.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/PyPerfBPFProgram.cc b/examples/cpp/pyperf/PyPerfBPFProgram.cc index 35733054f5bd..5cc0d1960746 100644 --- a/examples/cpp/pyperf/PyPerfBPFProgram.cc +++ b/examples/cpp/pyperf/PyPerfBPFProgram.cc @@ -261,6 +261,8 @@ get_task_thread_id(struct task_struct const *task, enum pthreads_impl pthreads_i #error "Unsupported platform" #endif // __x86_64__ + bpf_trace_printk("fs: %llx libc: %llx: ret %llx\n", fsbase, pthread_impl, ret); + if (ret < 0) { return ERROR_BAD_FSBASE; } @@ -422,7 +424,7 @@ on_event(struct pt_regs* ctx) { int x = bpf_probe_read_user( &_PyThreadState_Current, sizeof(_PyThreadState_Current), (void*)pid_data->globals._PyThreadState_Current); - bpf_trace_printk("read addr %llx ret %d\n", pid_data->globals._PyThreadState_Current, x); + bpf_trace_printk("read addr %llx ret %d\n", (unsigned long)pid_data->globals._PyThreadState_Current, x); if (_PyThreadState_Current == 0) { // The GIL is released, we can only get native stacks // until it is held again. From 994959b6dd4f9e4f8fc8212110a1dd7d5be9b33d Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Fri, 4 Feb 2022 02:34:23 +0200 Subject: [PATCH 17/27] wip --- examples/cpp/pyperf/PyPerfBPFProgram.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/PyPerfBPFProgram.cc b/examples/cpp/pyperf/PyPerfBPFProgram.cc index 5cc0d1960746..0113bd36a56d 100644 --- a/examples/cpp/pyperf/PyPerfBPFProgram.cc +++ b/examples/cpp/pyperf/PyPerfBPFProgram.cc @@ -261,7 +261,7 @@ get_task_thread_id(struct task_struct const *task, enum pthreads_impl pthreads_i #error "Unsupported platform" #endif // __x86_64__ - bpf_trace_printk("fs: %llx libc: %llx: ret %llx\n", fsbase, pthread_impl, ret); + bpf_trace_printk("fs: %llx libc: %llx: ret %llx\n", fsbase, pthreads_impl, ret); if (ret < 0) { return ERROR_BAD_FSBASE; From e7f0b07f73221aedbbf0d63a745c51acdc7f4b13 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Fri, 4 Feb 2022 02:42:07 +0200 Subject: [PATCH 18/27] wip --- examples/cpp/pyperf/PyPerfBPFProgram.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/PyPerfBPFProgram.cc b/examples/cpp/pyperf/PyPerfBPFProgram.cc index 0113bd36a56d..94f5b83f4d16 100644 --- a/examples/cpp/pyperf/PyPerfBPFProgram.cc +++ b/examples/cpp/pyperf/PyPerfBPFProgram.cc @@ -243,7 +243,8 @@ get_task_thread_id(struct task_struct const *task, enum pthreads_impl pthreads_i switch (pthreads_impl) { case PTI_GLIBC: // TODO const bad - ret = fsbase - 0x6f0; + *thread_id = fsbase - 0x6f0; + ret = 0; break; case PTI_MUSL: From 586458e2f8703743cf5893079c9773867c1f497c Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Wed, 30 Nov 2022 01:28:46 +0200 Subject: [PATCH 19/27] swap link order --- examples/cpp/pyperf/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/CMakeLists.txt b/examples/cpp/pyperf/CMakeLists.txt index 8ab6a6ac5e62..324e7cfd4c1f 100644 --- a/examples/cpp/pyperf/CMakeLists.txt +++ b/examples/cpp/pyperf/CMakeLists.txt @@ -19,7 +19,7 @@ add_executable(PyPerf PyOffsets.cc PyPerfNativeStackTrace.cc ) -target_link_libraries(PyPerf pthread libunwind-ptrace.a libunwind.a lzma) +target_link_libraries(PyPerf pthread libunwind.a lzma) execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE) @@ -28,6 +28,7 @@ if(${ARCHITECTURE} STREQUAL "x86_64") elseif(${ARCHITECTURE} STREQUAL "aarch64") target_link_libraries(PyPerf libunwind-aarch64.a) endif() +target_link_libraries(PyPerf libunwind-ptrace.a) # after the x86_64/aarch64 link if(NOT CMAKE_USE_LIBBPF_PACKAGE) target_link_libraries(PyPerf bcc-static) From 6778520072d63134f23cf0141a57238140094e0e Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Wed, 30 Nov 2022 01:32:28 +0200 Subject: [PATCH 20/27] swap order --- examples/cpp/pyperf/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/cpp/pyperf/CMakeLists.txt b/examples/cpp/pyperf/CMakeLists.txt index 324e7cfd4c1f..7b607d46e4c2 100644 --- a/examples/cpp/pyperf/CMakeLists.txt +++ b/examples/cpp/pyperf/CMakeLists.txt @@ -19,7 +19,7 @@ add_executable(PyPerf PyOffsets.cc PyPerfNativeStackTrace.cc ) -target_link_libraries(PyPerf pthread libunwind.a lzma) +target_link_libraries(PyPerf pthread lzma) execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE) @@ -28,7 +28,7 @@ if(${ARCHITECTURE} STREQUAL "x86_64") elseif(${ARCHITECTURE} STREQUAL "aarch64") target_link_libraries(PyPerf libunwind-aarch64.a) endif() -target_link_libraries(PyPerf libunwind-ptrace.a) # after the x86_64/aarch64 link +target_link_libraries(PyPerf libunwind.a libunwind-ptrace.a) # after the x86_64/aarch64 link if(NOT CMAKE_USE_LIBBPF_PACKAGE) target_link_libraries(PyPerf bcc-static) From b2c69412b3abaaf2bef02ff95459f4572574431d Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Wed, 30 Nov 2022 01:40:22 +0200 Subject: [PATCH 21/27] swap order again --- examples/cpp/pyperf/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/cpp/pyperf/CMakeLists.txt b/examples/cpp/pyperf/CMakeLists.txt index 7b607d46e4c2..a44f5c10a175 100644 --- a/examples/cpp/pyperf/CMakeLists.txt +++ b/examples/cpp/pyperf/CMakeLists.txt @@ -19,7 +19,7 @@ add_executable(PyPerf PyOffsets.cc PyPerfNativeStackTrace.cc ) -target_link_libraries(PyPerf pthread lzma) +target_link_libraries(PyPerf pthread lzma libunwind-ptrace.a) execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE) @@ -28,7 +28,7 @@ if(${ARCHITECTURE} STREQUAL "x86_64") elseif(${ARCHITECTURE} STREQUAL "aarch64") target_link_libraries(PyPerf libunwind-aarch64.a) endif() -target_link_libraries(PyPerf libunwind.a libunwind-ptrace.a) # after the x86_64/aarch64 link +target_link_libraries(PyPerf libunwind.a) # this one is needed after the x86_64/aarch64 link if(NOT CMAKE_USE_LIBBPF_PACKAGE) target_link_libraries(PyPerf bcc-static) From 747cd10fc32514a221d89547e4ca8b1d2b10843d Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Wed, 30 Nov 2022 01:47:28 +0200 Subject: [PATCH 22/27] swap order --- examples/cpp/pyperf/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/CMakeLists.txt b/examples/cpp/pyperf/CMakeLists.txt index a44f5c10a175..8bdc7374a101 100644 --- a/examples/cpp/pyperf/CMakeLists.txt +++ b/examples/cpp/pyperf/CMakeLists.txt @@ -19,7 +19,7 @@ add_executable(PyPerf PyOffsets.cc PyPerfNativeStackTrace.cc ) -target_link_libraries(PyPerf pthread lzma libunwind-ptrace.a) +target_link_libraries(PyPerf pthread libunwind-ptrace.a) execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE) @@ -30,6 +30,8 @@ elseif(${ARCHITECTURE} STREQUAL "aarch64") endif() target_link_libraries(PyPerf libunwind.a) # this one is needed after the x86_64/aarch64 link +target_link_libraries(PyPerf lzma) + if(NOT CMAKE_USE_LIBBPF_PACKAGE) target_link_libraries(PyPerf bcc-static) else() From 704c7f4334b3562bc4bb8e93e139ad0b211005fe Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Sat, 17 Dec 2022 02:21:00 +0200 Subject: [PATCH 23/27] update offsets --- examples/cpp/pyperf/PyOffsets.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/examples/cpp/pyperf/PyOffsets.cc b/examples/cpp/pyperf/PyOffsets.cc index 7b63baa0c3a1..dfecbc74e4c0 100644 --- a/examples/cpp/pyperf/PyOffsets.cc +++ b/examples/cpp/pyperf/PyOffsets.cc @@ -262,6 +262,7 @@ extern const struct struct_offsets kPy27OffsetConfig = { .f_localsplus = 376, }, .PyCodeObject = { + .co_nlocals = 20, .co_filename = 80, .co_name = 88, .co_varnames = 56, @@ -269,7 +270,7 @@ extern const struct struct_offsets kPy27OffsetConfig = { }, .PyTupleObject = { .ob_item = 24 - } + }, }; extern const struct struct_offsets kPy36OffsetConfig = { @@ -357,8 +358,8 @@ extern const struct struct_offsets kPy38OffsetConfig = { .ob_type = 8 }, .String = { - .data = 48, - .size = -1, + .data = 48, // offsetof(PyStringObject, ob_sval) + .size = 16, // offsetof(PyVarObject, ob_size) }, .PyTypeObject = { .tp_name = 24 @@ -373,7 +374,7 @@ extern const struct struct_offsets kPy38OffsetConfig = { .tstate_head = 8, }, .PyRuntimeState = { - .interp_main = 32, + .interp_main = 40, // N/A }, .PyFrameObject = { .f_back = 24, @@ -382,10 +383,11 @@ extern const struct struct_offsets kPy38OffsetConfig = { .f_localsplus = 360, }, .PyCodeObject = { - .co_filename = 96, - .co_name = 104, - .co_varnames = 64, - .co_firstlineno = 36, + .co_nlocals = 28, + .co_filename = 104, + .co_name = 112, + .co_varnames = 72, + .co_firstlineno = 40, }, .PyTupleObject = { .ob_item = 24 From c38c1bcf1fe9adf57eca6d4bb603428299b92f2f Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Sat, 17 Dec 2022 02:25:13 +0200 Subject: [PATCH 24/27] update --- examples/cpp/pyperf/PyOffsets.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/cpp/pyperf/PyOffsets.cc b/examples/cpp/pyperf/PyOffsets.cc index dfecbc74e4c0..a293b7d16881 100644 --- a/examples/cpp/pyperf/PyOffsets.cc +++ b/examples/cpp/pyperf/PyOffsets.cc @@ -262,7 +262,6 @@ extern const struct struct_offsets kPy27OffsetConfig = { .f_localsplus = 376, }, .PyCodeObject = { - .co_nlocals = 20, .co_filename = 80, .co_name = 88, .co_varnames = 56, @@ -383,7 +382,6 @@ extern const struct struct_offsets kPy38OffsetConfig = { .f_localsplus = 360, }, .PyCodeObject = { - .co_nlocals = 28, .co_filename = 104, .co_name = 112, .co_varnames = 72, From cfc7af42c2c3676a7465a41753785c5f474907db Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Sat, 17 Dec 2022 02:46:19 +0200 Subject: [PATCH 25/27] fix offsets --- examples/cpp/pyperf/PyOffsets.cc | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/examples/cpp/pyperf/PyOffsets.cc b/examples/cpp/pyperf/PyOffsets.cc index a293b7d16881..64deb3ca81f4 100644 --- a/examples/cpp/pyperf/PyOffsets.cc +++ b/examples/cpp/pyperf/PyOffsets.cc @@ -277,8 +277,8 @@ extern const struct struct_offsets kPy36OffsetConfig = { .ob_type = 8 }, .String = { - .data = 48, - .size = 16, + .data = 48, // offsetof(PyStringObject, ob_sval) + .size = 16, // offsetof(PyVarObject, ob_size) }, .PyTypeObject = { .tp_name = 24 @@ -317,8 +317,8 @@ extern const struct struct_offsets kPy37OffsetConfig = { .ob_type = 8 }, .String = { - .data = 48, - .size = -1, + .data = 48, // offsetof(PyStringObject, ob_sval) + .size = 16, // offsetof(PyVarObject, ob_size) }, .PyTypeObject = { .tp_name = 24 @@ -397,8 +397,8 @@ extern const struct struct_offsets kPy310OffsetConfig = { .ob_type = 8 }, .String = { - .data = 48, - .size = -1, + .data = 48, // offsetof(PyStringObject, ob_sval) + .size = 16, // offsetof(PyVarObject, ob_size) }, .PyTypeObject = { .tp_name = 24 @@ -413,25 +413,27 @@ extern const struct struct_offsets kPy310OffsetConfig = { .tstate_head = 8, }, .PyRuntimeState = { - .interp_main = 32, + .interp_main = 40, }, .PyFrameObject = { .f_back = 24, .f_code = 32, - .f_lineno = 108, - .f_localsplus = 360, + .f_lineno = 100, + .f_localsplus = 352, }, .PyCodeObject = { - .co_filename = 96, - .co_name = 104, - .co_varnames = 64, - .co_firstlineno = 36, + .co_filename = 104, + .co_name = 112, + .co_varnames = 72, + .co_firstlineno = 40, }, .PyTupleObject = { .ob_item = 24 }, }; +#else +#error unknown arch #endif // List of mappings from Python 3 minor versions to offsets. `get_offsets` depends on this list @@ -440,8 +442,7 @@ const std::vector> python3Versions = { {{3,6,0}, kPy36OffsetConfig}, {{3,7,0}, kPy37OffsetConfig}, {{3,8,0}, kPy38OffsetConfig}, - // TODO check on aarch64 - // 3.9 is same as 3.8 + // 3.9 is same as 3.8 (on both x86_64 and Aarch64) {{3,10,0}, kPy310OffsetConfig}, }; From abc9714e81eddbad6bcf0b5486820fd0ebcc528b Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Sat, 17 Dec 2022 02:46:28 +0200 Subject: [PATCH 26/27] redzone --- examples/cpp/pyperf/PyPerfNativeStackTrace.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/cpp/pyperf/PyPerfNativeStackTrace.cc b/examples/cpp/pyperf/PyPerfNativeStackTrace.cc index 834df113f4c2..df0b8bb1f793 100644 --- a/examples/cpp/pyperf/PyPerfNativeStackTrace.cc +++ b/examples/cpp/pyperf/PyPerfNativeStackTrace.cc @@ -180,8 +180,15 @@ int NativeStackTrace::access_mem(unw_addr_space_t as, unw_word_t addr, return -UNW_EINVAL; } +#if defined(__x86_64__) // Subtract 128 for x86-ABI red zone - const uintptr_t top_of_stack = NativeStackTrace::sp - 128; + const unsigned redzone = 128; +#elif defined(__aarch64__) + const unsigned redzone = 0; +#else +#error unknown arch +#endif + const uintptr_t top_of_stack = NativeStackTrace::sp - redzone; const uintptr_t stack_start = top_of_stack & ~(getpagesize() - 1); const uintptr_t stack_end = stack_start + NativeStackTrace::stack_len; From 846228b6e3f244732783540e3e0b868271902e2d Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Sat, 17 Dec 2022 03:18:49 +0200 Subject: [PATCH 27/27] Protect more libunwind accessors --- examples/cpp/pyperf/PyPerfNativeStackTrace.cc | 33 +++++++++++++++---- examples/cpp/pyperf/PyPerfNativeStackTrace.h | 11 ++++--- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/examples/cpp/pyperf/PyPerfNativeStackTrace.cc b/examples/cpp/pyperf/PyPerfNativeStackTrace.cc index df0b8bb1f793..5d30df773b35 100644 --- a/examples/cpp/pyperf/PyPerfNativeStackTrace.cc +++ b/examples/cpp/pyperf/PyPerfNativeStackTrace.cc @@ -53,9 +53,17 @@ NativeStackTrace::NativeStackTrace(uint32_t pid, const unsigned char *raw_stack, return; } - unw_accessors_t my_accessors = _UPT_accessors; - my_accessors.access_mem = NativeStackTrace::access_mem; - my_accessors.access_reg = NativeStackTrace::access_reg; + // We hook some of the accessors to control the level of access libunwind gets of the target processes. + unw_accessors_t my_accessors = { + .find_proc_info = _UPT_find_proc_info, + .put_unwind_info = _UPT_put_unwind_info, + .get_dyn_info_list_addr = _UPT_get_dyn_info_list_addr, + .access_mem = NativeStackTrace::UPT_access_mem, + .access_reg = NativeStackTrace::UPT_access_reg, + .access_fpreg = NativeStackTrace::UPT_access_fpreg, + .resume = NativeStackTrace::UPT_resume, + .get_proc_name = _UPT_get_proc_name, + }; ProcSyms* procSymbols = nullptr; // reserve memory for platform-defined path limit AND the symbol const size_t buf_size = SymbolMaxSize + PATH_MAX + sizeof("() "); @@ -147,8 +155,8 @@ NativeStackTrace::NativeStackTrace(uint32_t pid, const unsigned char *raw_stack, } } -int NativeStackTrace::access_reg(unw_addr_space_t as, unw_regnum_t regnum, - unw_word_t *valp, int write, void *arg) { +int NativeStackTrace::UPT_access_reg(unw_addr_space_t as, unw_regnum_t regnum, + unw_word_t *valp, int write, void *arg) { if (regnum == UNW_REG_SP) { if (write) { logInfo(2, "Libunwind attempts to write to SP\n"); @@ -173,8 +181,8 @@ int NativeStackTrace::access_reg(unw_addr_space_t as, unw_regnum_t regnum, } } -int NativeStackTrace::access_mem(unw_addr_space_t as, unw_word_t addr, - unw_word_t *valp, int write, void *arg) { +int NativeStackTrace::UPT_access_mem(unw_addr_space_t as, unw_word_t addr, + unw_word_t *valp, int write, void *arg) { if (write) { logInfo(3, "Libunwind unexpected mem write attempt\n"); return -UNW_EINVAL; @@ -230,6 +238,17 @@ int NativeStackTrace::access_mem(unw_addr_space_t as, unw_word_t addr, return -UNW_EINVAL; } +int NativeStackTrace::UPT_access_fpreg(unw_addr_space_t as, unw_regnum_t reg, unw_fpreg_t *val, + int write, void *arg) { + logInfo(3, "Libunwind unexpected UPT_access_fpreg() attempt\n"); + return -UNW_EINVAL; +} + +int NativeStackTrace::UPT_resume(unw_addr_space_t as, unw_cursor_t *c, void *arg) { + logInfo(3, "Libunwind unexpected UPT_resume() attempt\n"); + return -UNW_EINVAL; +} + std::vector NativeStackTrace::get_stack_symbol() const { return symbols; } diff --git a/examples/cpp/pyperf/PyPerfNativeStackTrace.h b/examples/cpp/pyperf/PyPerfNativeStackTrace.h index cc8dcd0641bc..d7ddccdfd4e8 100644 --- a/examples/cpp/pyperf/PyPerfNativeStackTrace.h +++ b/examples/cpp/pyperf/PyPerfNativeStackTrace.h @@ -43,11 +43,14 @@ class NativeStackTrace { static uintptr_t sp; static ProcSymbolsCache procSymbolsCache; - static int access_reg(unw_addr_space_t as, unw_regnum_t regnum, - unw_word_t *valp, int write, void *arg); + static int UPT_access_reg(unw_addr_space_t as, unw_regnum_t regnum, + unw_word_t *valp, int write, void *arg); + static int UPT_access_mem(unw_addr_space_t as, unw_word_t addr, + unw_word_t *valp, int write, void *arg); + static int UPT_access_fpreg(unw_addr_space_t as, unw_regnum_t reg, unw_fpreg_t *val, + int write, void *arg); + static int UPT_resume(unw_addr_space_t as, unw_cursor_t *c, void *arg); - static int access_mem(unw_addr_space_t as, unw_word_t addr, unw_word_t *valp, - int write, void *arg); static ProcSyms* get_proc_symbols(uint32_t pid); };