diff --git a/CMakeLists.txt b/CMakeLists.txt index 90fb9790..f63cb1ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,7 +93,7 @@ target_compile_options(${EXECUTABLE} PRIVATE $<$:-mfloat-abi=hard> $<$:-mthumb> $<$:-specs=nosys.specs> - + -g -ffunction-sections -fdata-sections -fno-exceptions diff --git a/Core/Src/Examples/ExamplesHardFault.cpp b/Core/Src/Examples/ExamplesHardFault.cpp new file mode 100644 index 00000000..f569b5c0 --- /dev/null +++ b/Core/Src/Examples/ExamplesHardFault.cpp @@ -0,0 +1,58 @@ +#ifdef EXAMPLE_HARDFAULT + +#include "main.h" +#include "ST-LIB.hpp" + +#ifdef TEST_MEMORY_FAULT +constexpr auto my_uint32_t = MPUDomain::Buffer(); + +int main(void) { + + Hard_fault_check(); + STLIB::start(); + + using myBoard = ST_LIB::Board; + myBoard::init(); + + [[maybe_unused]] auto my_buffer = myBoard::instance_of().template as(); + my_buffer[1000000000] = 5; + while (1) { + STLIB::update(); + } +} + +#endif + +#ifdef TEST_BUS_FAULT + +int main(void) { + Hard_fault_check(); + *(uint32_t*)0xdead0000 = 0x20; + STLIB::start(); + + using myBoard = ST_LIB::Board<>; + myBoard::init(); + + while (1) { + STLIB::update(); + } +} + +#endif + +#ifdef TEST_USAGE_FAULT + +int main(void) { + Hard_fault_check(); + __builtin_trap(); + STLIB::start(); + using myBoard = ST_LIB::Board<>; + myBoard::init(); + + while (1) { + STLIB::update(); + } +} + +#endif +#endif \ No newline at end of file diff --git a/Core/Src/config/leds_hard_fault.cpp b/Core/Src/config/leds_hard_fault.cpp new file mode 100644 index 00000000..3859a135 --- /dev/null +++ b/Core/Src/config/leds_hard_fault.cpp @@ -0,0 +1,20 @@ +#include "HALAL/HALAL.hpp" +extern "C"{ + +#ifdef NUCLEO +GPIO_TypeDef* ports_hard_fault[] = {GPIOB,GPIOB,GPIOE}; +uint16_t pins_hard_fault[] = {GPIO_PIN_0,GPIO_PIN_14,GPIO_PIN_1}; +// //don't touch the count +uint8_t hard_fault_leds_count = (sizeof(ports_hard_fault)/sizeof(GPIO_TypeDef*) == sizeof(pins_hard_fault)/sizeof(uint16_t)) + ? sizeof(pins_hard_fault)/sizeof(uint16_t) : 0; + +#endif + +#ifdef BOARD +GPIO_TypeDef* ports_hard_fault[] = {GPIOG,GPIOG,GPIOG,GPIOG}; +uint16_t pins_hard_fault[] = {GPIO_PIN_13,GPIO_PIN_12,GPIO_PIN_11,GPIO_PIN_10}; +// //don't touch the count +uint8_t hard_fault_leds_count = (sizeof(ports_hard_fault)/sizeof(GPIO_TypeDef*) == sizeof(pins_hard_fault)/sizeof(uint16_t)) + ? sizeof(pins_hard_fault)/sizeof(uint16_t) : 0; +#endif +} \ No newline at end of file diff --git a/Core/Src/main.cpp b/Core/Src/main.cpp index e20dab78..c367556e 100644 --- a/Core/Src/main.cpp +++ b/Core/Src/main.cpp @@ -1,34 +1,23 @@ -#define EXAMPLE_BASE -#define TEST_0 // Test to be run - -// Include all examples, run the one defined above #include "Examples/ExampleMPU.cpp" - -#ifdef EXAMPLE_BASE +#include "Examples/ExamplesHardFault.cpp" #include "main.h" #include "ST-LIB.hpp" -int main(void) { -#ifdef SIM_ON - SharedMemory::start(); -#endif - - DigitalOutput led_on(PB0); - STLIB::start(); +int main(void) { + Hard_fault_check(); + STLIB::start(); - Time::register_low_precision_alarm(100, [&]() { led_on.toggle(); - }); + using myBoard = ST_LIB::Board<>; + myBoard::init(); - while (1) { - STLIB::update(); - } + while (1) { + STLIB::update(); + } } - void Error_Handler(void) { ErrorHandler("HAL error handler triggered"); while (1) { } } -#endif diff --git a/Core/Src/stm32h7xx_it.c b/Core/Src/stm32h7xx_it.c index a453f463..2a1d6d41 100644 --- a/Core/Src/stm32h7xx_it.c +++ b/Core/Src/stm32h7xx_it.c @@ -20,6 +20,8 @@ /* Includes ------------------------------------------------------------------*/ #include "main.h" #include "stm32h7xx_it.h" +#include "stm32h7xx_hal.h" +#include "HALAL/HardFault/HardfaultTrace.h" /* Private includes ----------------------------------------------------------*/ /* USER CODE BEGIN Includes */ /* USER CODE END Includes */ @@ -73,6 +75,15 @@ extern DMA_HandleTypeDef hdma_spi3_rx; extern DMA_HandleTypeDef hdma_spi3_tx; extern SPI_HandleTypeDef hspi3; extern FDCAN_HandleTypeDef hfdcan1; +/* +Externs for calltrace +*/ +extern uint32_t _stext; +extern uint32_t _etext; +extern uint32_t _sstack; +extern uint32_t _estack; +extern uint32_t _hf_stack_start; +extern uint32_t _hf_stack_end; /* USER CODE BEGIN EV */ /* USER CODE END EV */ @@ -83,48 +94,200 @@ extern FDCAN_HandleTypeDef hfdcan1; /** * @brief This function handles Non maskable interrupt. */ -void NMI_Handler(void) -{ - /* USER CODE BEGIN NonMaskableInt_IRQn 0 */ - /* USER CODE END NonMaskableInt_IRQn 0 */ - /* USER CODE BEGIN NonMaskableInt_IRQn 1 */ - while (1) - { - } - /* USER CODE END NonMaskableInt_IRQn 1 */ +//calls my_fault_handler with the MSP(main stack pointer) +#define HARDFAULT_HANDLING_ASM() \ +__asm__ __volatile__( \ + /* Detect which stack was in use */ \ + "tst lr, #4 \n" \ + "ite eq \n" \ + "mrseq r0, msp \n" \ + "mrsne r0, psp \n" \ + \ + /* Switch to dedicated HardFault stack */ \ + "ldr r1, =_hf_stack_end \n" \ + "msr msp, r1 \n" \ + "isb \n" \ + \ + /* Call C handler with original frame */ \ + "b my_fault_handler_c \n" \ +) + + + //create the space for the hardfault section in the flash +__attribute__((section(".hardfault_log"))) +volatile uint32_t hard_fault[128]; + +void hardfault_flash_write( + uint32_t addr_hard_fault, const void *data_hard_fault, size_t len_hard_fault, + uint32_t addr_metadata, const void *data_metadata, size_t len_metadata) +{ + __disable_irq(); + HAL_FLASH_Unlock(); + + // Erase sector + FLASH_EraseInitTypeDef erase; + uint32_t sector_error = 0; + erase.TypeErase = FLASH_TYPEERASE_SECTORS; + erase.Banks = FLASH_BANK_1; + erase.Sector = FLASH_SECTOR_6; + erase.NbSectors = 1; + erase.VoltageRange = FLASH_VOLTAGE_RANGE_3; + + if(HAL_FLASHEx_Erase(&erase, §or_error) != HAL_OK){ + __BKPT(0); + } + + + size_t offset, copy_len; + uint8_t block[32]; + + offset = 0; + while(offset < len_hard_fault){ + memset(block, 0xFF, sizeof(block)); + copy_len = (len_hard_fault - offset) > 32 ? 32 : (len_hard_fault - offset); + memcpy(block, (uint8_t*)data_hard_fault + offset, copy_len); + + if(HAL_FLASH_Program(FLASH_TYPEPROGRAM_FLASHWORD, addr_hard_fault + offset, (uint32_t*)block) != HAL_OK){ + __BKPT(0); + } + offset += 32; + } + + offset = 0; + while(offset < len_metadata){ + memset(block, 0xFF, sizeof(block)); + copy_len = (len_metadata - offset) > 32 ? 32 : (len_metadata - offset); + memcpy(block, (uint8_t*)data_metadata + offset, copy_len); + + if(HAL_FLASH_Program(FLASH_TYPEPROGRAM_FLASHWORD, addr_metadata + offset, (uint32_t*)block) != HAL_OK){ + __BKPT(0); + } + offset += 32; + } + + SCB_InvalidateICache(); + SCB_InvalidateDCache(); + + HAL_FLASH_Lock(); + __enable_irq(); +} +static uint8_t is_valid_pc(uint32_t pc) +{ + pc &= ~1U; // Thumb + return (pc >= (uint32_t)&_stext && + pc < (uint32_t)&_etext); +} +__attribute__((noreturn, optimize("O0"))) +static void scan_call_stack(sContextStateFrame *frame, HardFaultLog *log_hard_fault) +{ + uint32_t *stack_start = (uint32_t *)&_sstack; + uint32_t *stack_end = (uint32_t *)&_estack; + + log_hard_fault->CallTrace.depth = 0; + uint32_t *sp = (uint32_t *)(frame + 1); + while (sp < stack_end && sp >= stack_start) + { + uint32_t val = *sp++; + if (log_hard_fault->CallTrace.depth >= CALL_TRACE_MAX_DEPTH) break; + if ((val & 1U) == 0) continue; + if (!is_valid_pc(val)) continue; + log_hard_fault->CallTrace.pcs[log_hard_fault->CallTrace.depth++] = val & ~1U; + } } +__attribute__((noreturn,optimize("O0"))) +void my_fault_handler_c(sContextStateFrame *frame) { + volatile uint32_t real_fault_pc = frame->return_address & ~1; + volatile HardFaultLog log_hard_fault; + + volatile uint32_t *cfsr = (volatile uint32_t *)0xE000ED28; + //keep the log in the estructure + log_hard_fault.HF_flag = HF_FLAG_VALUE; + log_hard_fault.frame = *frame; + log_hard_fault.frame.return_address = real_fault_pc; + log_hard_fault.CfsrDecode.cfsr = *cfsr; + log_hard_fault.fault_address.Nothing_Valid = 0; + + const uint8_t memory_fault = *cfsr & 0x000000ff; + if(memory_fault){ + const uint8_t MMARVALID = memory_fault & 0b10000000; // We can find the exact place were occured the memory fault + const uint8_t MLSPERR = memory_fault & 0b00100000; // MemManage fault FPU stack + const uint8_t MSTKERR = memory_fault & 0b00010000; // Stack overflow while entring an exception + const uint8_t MUNSTKERR = memory_fault & 0b00001000; // Stack error while exiting from an exception (Corrupted stack) + const uint8_t DACCVIOL = memory_fault & 0b00000010; //Data access violation (acceded to pointer NULL, to a protected memory region, overflow in arrays ...) + const uint8_t IACCVIOL = memory_fault & 0b00000001; //Instruction access violation + if(MMARVALID){ + uint32_t memory_fault_address = *(volatile uint32_t *)0xE000ED34; + log_hard_fault.fault_address.MMAR_VALID = memory_fault_address; + } + } + const uint8_t bus_fault = (*cfsr & 0x0000ff00) >> 8; + if(bus_fault){ + const uint8_t BFARVALID = bus_fault & 0b10000000; // BFAR is valid we can know the address which triggered the fault + const uint8_t LSPERR = bus_fault & 0b00100000; //Fault stack FPU + const uint8_t STKERR = bus_fault & 0b00010000; // Fault stack while entring an exception + const uint8_t UNSTKERR = bus_fault & 0b00001000; // Stack error while exiting an exception + const uint8_t IMPRECISERR = bus_fault & 0b00000010; // Bus fault, but the instruction that caused the error can be uncertain + const uint8_t PRECISERR = bus_fault & 0b00000001; //You can read Bfar to find the eact direction of the instruction + if(BFARVALID){ + volatile uint32_t bus_fault_address = *(volatile uint32_t *)0xE000ED38; + log_hard_fault.fault_address.BFAR_VALID = bus_fault_address; + //Don't trust in case IMPRECISERR == 1; + } + } + const uint16_t usage_fault = (*cfsr & 0xffff0000) >> 16; + if(usage_fault){ + const uint16_t DIVBYZERO = usage_fault & 0x0200; // Div by ZERO hardfault; + const uint16_t UNALIGNED = usage_fault & 0x0100; // Unaligned access operation occured + const uint16_t NOCP = usage_fault & 0x0008; //Access to FPU when is not present + const uint16_t INVPC = usage_fault & 0x0004; //Invalid program counter load + const uint16_t INVSTATE = usage_fault & 0x0002; // Invalid processor state + const uint16_t UNDEFINSTR = usage_fault & 0x0001; //Undefined instruction. + } + if(usage_fault | bus_fault){ + scan_call_stack(frame,&log_hard_fault); + } + volatile uint8_t metadata_buffer[0x100]; + memcpy(metadata_buffer,(void*)METADATA_FLASH_ADDR,0x100); + //write log hard fault + hardfault_flash_write(HF_FLASH_ADDR,(uint8_t*)&log_hard_fault,sizeof(log_hard_fault),METADATA_FLASH_ADDR,&metadata_buffer,sizeof(metadata_buffer)); + //reboot the system + volatile uint32_t *aircr = (volatile uint32_t *)0xE000ED0C; + __asm volatile ("dsb"); + *aircr = (0x05FA << 16) | 0x1 << 2; + __asm volatile ("dsb"); + while (1) {} // should be unreachable + } -/** - * @brief This function handles Hard fault interrupt. - */ +__attribute__((naked)) void HardFault_Handler(void) { - /* USER CODE BEGIN HardFault_IRQn 0 */ + HARDFAULT_HANDLING_ASM(); + while (1){} +} - /* USER CODE END HardFault_IRQn 0 */ +void NMI_Handler(void) +{ + /* USER CODE BEGIN NonMaskableInt_IRQn 0 */ + /* USER CODE END NonMaskableInt_IRQn 0 */ + /* USER CODE BEGIN NonMaskableInt_IRQn 1 */ while (1) { - /* USER CODE BEGIN W1_HardFault_IRQn 0 */ - /* USER CODE END W1_HardFault_IRQn 0 */ } + /* USER CODE END NonMaskableInt_IRQn 1 */ } - /** * @brief This function handles Memory management fault. */ void MemManage_Handler(void) { - /* USER CODE BEGIN MemoryManagement_IRQn 0 */ - - /* USER CODE END MemoryManagement_IRQn 0 */ - while (1) - { - /* USER CODE BEGIN W1_MemoryManagement_IRQn 0 */ - /* USER CODE END W1_MemoryManagement_IRQn 0 */ + extern void my_fault_handler_c(sContextStateFrame *frame); + + __asm volatile( + "mrs r0, msp\n" // obtener stack frame + "b my_fault_handler_c\n" + ); } -} - /** * @brief This function handles Pre-fetch fault, memory access fault. */ diff --git a/deps/ST-LIB b/deps/ST-LIB index 332e04d7..e2a56178 160000 --- a/deps/ST-LIB +++ b/deps/ST-LIB @@ -1 +1 @@ -Subproject commit 332e04d7b84d18e36df36a9e7366c03d030b3a02 +Subproject commit e2a56178a9ed2cebb53a4840e00f862e5b9e3935 diff --git a/hard_faullt_analysis.py b/hard_faullt_analysis.py new file mode 100644 index 00000000..35e6bfc9 --- /dev/null +++ b/hard_faullt_analysis.py @@ -0,0 +1,244 @@ +import subprocess +import struct +import re +import os +HF_FLASH_ADDR = 0x080C0000 +HF_FLASH_ADDR_STRING = "0x080C000" +ELF_FILE = "out/build/latest.elf" + +CALL_TRACE_MAX_DEPTH = 16 +def read_flash(): + try: + cmd = [ + "STM32_Programmer_CLI", + "-c", "port=SWD", + "-r32", hex(HF_FLASH_ADDR), "112" + ] + out = subprocess.check_output(cmd, text=True) + return out + except subprocess.CalledProcessError as e: + print("Stop debugging to check fault analysis!!!") + print(f"Error: {e}") + return None + except FileNotFoundError: + print("STM32_Programmer_CLI not found. Make sure it is installed and in PATH.") + return None +def decode_cfsr_memory(cfsr, fault_addr): + memory_fault = cfsr & 0xFF + if memory_fault == 0: + return 0 + print("\nMemory Fault (MMFSR):") + if memory_fault & 0b10000000: + print(f" MMARVALID: Memory fault address valid -> 0x{fault_addr:08X}") + if fault_addr in (0xFFFFFFFF, 0x00000000): + print(" Fault address is invalid / unmapped memory") + else: + mem_info = addr2line(fault_addr) + print_code_context(mem_info) + if memory_fault & 0b00100000: + print(" MLSPERR : Floating Point Unit lazy state preservation error") + if memory_fault & 0b00010000: + print(" MSTKERR : Stack error on entry to exception") + if memory_fault & 0b00001000: + print(" MUNSTKERR : Stack error on return from exception") + if memory_fault & 0b00000010: + print(" DACCVIOL : Data access violation (NULL pointer or invalid access)") + if memory_fault & 0b00000001: + print(" IACCVIOL : Instruction access violation") + return 1 + +# -------------------------- +# Decode Bus Fault (BFSR) +# -------------------------- +def decode_cfsr_bus(cfsr, fault_addr): + bus_fault = (cfsr & 0x0000FF00) >> 8 + if bus_fault == 0: + return 0 + print("\nBus Fault (BFSR):") + if bus_fault & 0b10000000: + if(bus_fault & 0b00000001): + print(f" BFARVALID : Bus fault address valid -> 0x{fault_addr:08X}") + if bus_fault & 0b00000100: + print(f"\033[91m Bus fault address imprecise\033[0m (DON'T LOOK CALL STACK)") + + if bus_fault & 0b00100000: + print(" LSPERR : Floating Point Unit lazy state preservation error") + if bus_fault & 0b00010000: + print(" STKERR : Stack error on entry to exception") + if bus_fault & 0b00001000: + print(" UNSTKERR : Stack error on return from exception") + return 2 + +# -------------------------- +# Decode Usage Fault (UFSR) +# -------------------------- +def decode_cfsr_usage(cfsr): + usage_fault = (cfsr & 0xFFFF0000) >> 16 + if usage_fault == 0: + return 0 + print("\nUsage Fault (UFSR):") + if usage_fault & 0x0200: + print(" DIVBYZERO : Division by zero") + if usage_fault & 0x0100: + print(" UNALIGNED : Unaligned memory access") + if usage_fault & 0x0008: + print(" NOCP : Accessed FPU when not present") + if usage_fault & 0x0004: + print(" INVPC : Invalid Program Counter(PC) load") + if usage_fault & 0x0002: + print(" INVSTATE : Invalid processor state") + if usage_fault & 0x0001: + print(" UNDEFINSTR : Undefined instruction") + return 4 + +def decode_cfsr(cfsr, fault_addr): + error = 0 + error = decode_cfsr_memory(cfsr, fault_addr) + error + error = decode_cfsr_bus(cfsr, fault_addr) + error + error = decode_cfsr_usage(cfsr) + error + return error + + +def addr2line(addr): + cmd = ["arm-none-eabi-addr2line", "-e", ELF_FILE, "-f", "-C", hex(addr)] + try: + output = subprocess.check_output(cmd, text=True).strip() + return output + except Exception as e: + return f"addr2line failed: {e}" + +def analyze_call_stack(calltrace_depth, calltrace_pcs, context=2): + """ + Muestra el call stack, omitiendo frames sin fuente y mostrando snippet de código. + """ + print("\n==== Call Stack Trace ====") + if calltrace_depth == 0: + print("No call trace available.") + return + +def analyze_call_stack(calltrace_depth, calltrace_pcs, context=0): + """ + Muestra el call stack, mostrando snippet de código de la línea exacta + sin intentar sumar líneas arriba/abajo (context=0 por defecto). + Omite frames sin fuente. + """ + print("\n==== Call Stack Trace ====") + if calltrace_depth == 0: + print("No call trace available.") + return + + for pc in calltrace_pcs[:calltrace_depth]: + pc_base = pc & ~1 + snippet = addr2line(pc_base- 4).strip() + if not snippet or snippet.startswith("??:?"): + continue # no hay fuente, saltar + print_code_context(snippet,1) + + print("======================================================") + + + + +def print_code_context(lines, context=2): + """ + lines: exit of addr2line (función + file:line) + context: how many lines up/down show + """ + line_list = lines.splitlines() + if len(line_list) < 2: + print("Invalid addr2line output") + return + + file_line = line_list[1].strip() + split = file_line.rfind(':') + file_path = file_line[:split] + try: + line_no = int(file_line[split+1:]) - 1 # índice base 0 + except ValueError: + print("\33[91m Couldn't find exact line\33[0m") + return + if not os.path.exists(file_path): + print("Source file not found") + return + + with open(file_path, "r") as f: + file_lines = f.readlines() + + start = max(0, line_no - context) + end = min(len(file_lines), line_no + context + 1) + + print(f"\nSource snippet from {file_path}:") + for i in range(start, end): + code = file_lines[i].rstrip() + # Si es la línea del error, la ponemos en rojo + if i == line_no: + print(f"\033[91m{i+1:>4}: {code}\033[0m") # rojo + else: + print(f"{i+1:>4}: {code}") + +def hard_fault_analysis(memory_string): + raw = bytes.fromhex(memory_string) + raw = struct.unpack(">28I",raw) + hf = { + "HF_Flag": raw[0], + "r0": raw[1], + "r1": raw[2], + "r2": raw[3], + "r3": raw[4], + "r12": raw[5], + "lr": raw[6], + "pc": raw[7], + "psr": raw[8], + "cfsr": raw[9], + "fault_addr": raw[10], + "calltrace_depth": raw[11], + "calltrace_pcs": raw[12:28] + } + if(hf["HF_Flag"] != 0xFF00FF00): + print("There was no hardfault in your Microcontroller, Kudos for you, I hope...") + return + print("================HARDFAULT DETECTED ===========") + print("Registers:") + + for r in ['r0','r1','r2','r3','r12','lr','pc','psr']: + print(f" {r.upper():<4}: 0x{hf[r]:08X}") + + print(f" CFSR: 0x{hf['cfsr']:08X}") + error = decode_cfsr(hf["cfsr"], hf["fault_addr"]) + print("\nSource Location:") + pc_loc = addr2line(hf["pc"]) + lr_loc = addr2line(hf["lr"]) + print(f" Linker Register : 0x{hf['lr']:08X} -> {lr_loc}") + + print(f" Program Counter : 0x{hf['pc']:08X} -> {pc_loc}") + print_code_context(pc_loc) + + analyze_call_stack(hf["calltrace_depth"],hf["calltrace_pcs"]) + + print("======================================================") + + + print("Note: In Release builds (-O2/-O3) the PC may not point exactly to the failing instruction.") + print(" During interrupts, bus faults, or stack corruption, the PC can be imprecise.") + print("\nIn case of Imprecise error is dificult to find due to is asynchronous fault") + print("The error has to be before PC. But not possible to know exactly when.") + print("Check this link to know more : https://interrupt.memfault.com/blog/cortex-m-hardfault-debug#fn:8") + + +if __name__ == '__main__': + out = read_flash() + if(out == None): + exit() + pos_memory_flash = out.rfind(HF_FLASH_ADDR_STRING) + print(out[0:pos_memory_flash]) + flash = out[pos_memory_flash:] + print(flash) + memory_string = "" + for line in flash.splitlines(): + if(line.find(':') == -1): + break + _,mem = line.split(":") + memory_string += mem + memory_string = memory_string.replace(" ","") + hard_fault_analysis(memory_string) +