ESPHome 2026.3.0
Loading...
Searching...
No Matches
crash_handler.cpp
Go to the documentation of this file.
1#ifdef USE_ESP32
2
4#ifdef USE_ESP32_CRASH_HANDLER
5
6#include "crash_handler.h"
7#include "esphome/core/log.h"
8
9#include <cinttypes>
10#include <cstring>
11#include <esp_attr.h>
12#include <esp_private/panic_internal.h>
13#include <soc/soc.h>
14
15#if CONFIG_IDF_TARGET_ARCH_XTENSA
16#include <esp_cpu_utils.h>
17#include <esp_debug_helpers.h>
18#include <xtensa_context.h>
19#elif CONFIG_IDF_TARGET_ARCH_RISCV
20#include <riscv/rvruntime-frames.h>
21#endif
22
23static constexpr uint32_t CRASH_MAGIC = 0xDEADBEEF;
24static constexpr size_t MAX_BACKTRACE = 16;
25
26// Check if an address looks like code (flash-mapped or IRAM).
27// Must be safe to call from panic context (no flash access needed).
28static inline bool IRAM_ATTR is_code_addr(uint32_t addr) {
29 return (addr >= SOC_IROM_LOW && addr < SOC_IROM_HIGH) || (addr >= SOC_IRAM_LOW && addr < SOC_IRAM_HIGH);
30}
31
32#if CONFIG_IDF_TARGET_ARCH_RISCV
33// Check if a code address is a real return address by verifying the preceding
34// instruction is a JAL or JALR with rd=ra (x1). Called at log time (not during
35// panic) so flash cache is available and both IRAM and IROM are safely readable.
36static inline bool is_return_addr(uint32_t addr) {
37 if (!is_code_addr(addr) || addr < 4)
38 return false;
39 // A return address on the stack points to the instruction after a call.
40 // Check for 4-byte JAL/JALR call instruction before this address.
41 // Use memcpy for alignment safety — RISC-V C extension means code addresses
42 // are only 2-byte aligned, so addr-4 may not be 4-byte aligned.
43 uint32_t inst;
44 memcpy(&inst, (const void *) (addr - 4), sizeof(inst));
45 // RISC-V instruction encoding: bits [6:0] = opcode, bits [11:7] = rd
46 uint32_t opcode = inst & 0x7f; // Extract 7-bit opcode
47 uint32_t rd = inst & 0xf80; // Extract rd field (bits 11:7)
48 // Match JAL (0x6f) or JALR (0x67) with rd=ra (x1, encoded as 0x80 = 1<<7)
49 if ((opcode == 0x6f || opcode == 0x67) && rd == 0x80)
50 return true;
51 // Check for 2-byte compressed c.jalr before this address (C extension).
52 // c.jalr saves to ra implicitly: funct4=1001, rs1!=0, rs2=0, op=10
53 if (addr >= 2) {
54 uint16_t c_inst = *(uint16_t *) (addr - 2);
55 if ((c_inst & 0xf07f) == 0x9002 && (c_inst & 0x0f80) != 0)
56 return true;
57 }
58 return false;
59}
60#endif
61
62// Raw crash data written by the panic handler wrapper.
63// Lives in .noinit so it survives software reset but contains garbage after power cycle.
64// Validated by magic marker. Static linkage since it's only used within this file.
65// Version field is first so future firmware can always identify the struct layout.
66// Magic is second to validate the data. Remaining fields can change between versions.
67// Version is uint32_t because it would be padded to 4 bytes anyway before the next
68// uint32_t field, so we use the full width rather than wasting 3 bytes of padding.
69static constexpr uint32_t CRASH_DATA_VERSION = 1;
70struct RawCrashData {
71 uint32_t version;
72 uint32_t magic;
73 uint32_t pc;
74 uint8_t backtrace_count;
75 uint8_t reg_frame_count; // Number of entries from registers (not stack-scanned)
76 uint8_t exception; // panic_exception_t enum (FAULT/ABORT/IWDT/TWDT/DEBUG)
77 uint8_t pseudo_excause; // Whether cause is a pseudo exception (Xtensa SoC-level panic)
78 uint32_t backtrace[MAX_BACKTRACE];
79 uint32_t cause; // Architecture-specific: exccause (Xtensa) or mcause (RISC-V)
80};
81static RawCrashData __attribute__((section(".noinit")))
82s_raw_crash_data; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
83
84// Whether crash data was found and validated this boot.
85static bool s_crash_data_valid = false; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
86
87namespace esphome::esp32 {
88
89static const char *const TAG = "esp32.crash";
90
92 if (s_raw_crash_data.magic == CRASH_MAGIC && s_raw_crash_data.version == CRASH_DATA_VERSION) {
93 s_crash_data_valid = true;
94 // Clamp counts to prevent out-of-bounds reads from corrupt .noinit data
95 if (s_raw_crash_data.backtrace_count > MAX_BACKTRACE)
96 s_raw_crash_data.backtrace_count = MAX_BACKTRACE;
97 if (s_raw_crash_data.reg_frame_count > s_raw_crash_data.backtrace_count)
98 s_raw_crash_data.reg_frame_count = s_raw_crash_data.backtrace_count;
99 if (s_raw_crash_data.exception > 4) // panic_exception_t max value
100 s_raw_crash_data.exception = 4; // Default to PANIC_EXCEPTION_FAULT
101 if (s_raw_crash_data.pseudo_excause > 1)
102 s_raw_crash_data.pseudo_excause = 0;
103 }
104 // Clear magic regardless so we don't re-report on next normal reboot
105 s_raw_crash_data.magic = 0;
106}
107
108bool crash_handler_has_data() { return s_crash_data_valid; }
109
110// Look up the exception cause as a human-readable string.
111// Tables mirror ESP-IDF's panic_arch_fill_info() which uses local static arrays
112// not exposed via any public API.
113static const char *get_exception_reason() {
114#if CONFIG_IDF_TARGET_ARCH_XTENSA
115 if (s_raw_crash_data.pseudo_excause) {
116 // SoC-level panic: watchdog, cache error, etc.
117 // Keep in sync with ESP-IDF's PANIC_RSN_* defines
118 static const char *const PSEUDO_REASON[] = {
119 "Unknown reason", // 0
120 "Unhandled debug exception", // 1
121 "Double exception", // 2
122 "Unhandled kernel exception", // 3
123 "Coprocessor exception", // 4
124 "Interrupt wdt timeout on CPU0", // 5
125 "Interrupt wdt timeout on CPU1", // 6
126 "Cache error", // 7
127 };
128 uint32_t cause = s_raw_crash_data.cause;
129 if (cause < sizeof(PSEUDO_REASON) / sizeof(PSEUDO_REASON[0]))
130 return PSEUDO_REASON[cause];
131 return PSEUDO_REASON[0];
132 }
133 // Real Xtensa exception
134 static const char *const REASON[] = {
135 "IllegalInstruction",
136 "Syscall",
137 "InstructionFetchError",
138 "LoadStoreError",
139 "Level1Interrupt",
140 "Alloca",
141 "IntegerDivideByZero",
142 "PCValue",
143 "Privileged",
144 "LoadStoreAlignment",
145 nullptr,
146 nullptr,
147 "InstrPDAddrError",
148 "LoadStorePIFDataError",
149 "InstrPIFAddrError",
150 "LoadStorePIFAddrError",
151 "InstTLBMiss",
152 "InstTLBMultiHit",
153 "InstFetchPrivilege",
154 nullptr,
155 "InstrFetchProhibited",
156 nullptr,
157 nullptr,
158 nullptr,
159 "LoadStoreTLBMiss",
160 "LoadStoreTLBMultihit",
161 "LoadStorePrivilege",
162 nullptr,
163 "LoadProhibited",
164 "StoreProhibited",
165 };
166 uint32_t cause = s_raw_crash_data.cause;
167 if (cause < sizeof(REASON) / sizeof(REASON[0]) && REASON[cause] != nullptr)
168 return REASON[cause];
169#elif CONFIG_IDF_TARGET_ARCH_RISCV
170 // For SoC-level panics (watchdog, cache error), mcause holds IDF-internal
171 // interrupt numbers, not standard RISC-V cause codes. The exception type
172 // field already identifies these, so just return null to use the type name.
173 if (s_raw_crash_data.pseudo_excause)
174 return nullptr;
175 static const char *const REASON[] = {
176 "Instruction address misaligned",
177 "Instruction access fault",
178 "Illegal instruction",
179 "Breakpoint",
180 "Load address misaligned",
181 "Load access fault",
182 "Store address misaligned",
183 "Store access fault",
184 "Environment call from U-mode",
185 "Environment call from S-mode",
186 nullptr,
187 "Environment call from M-mode",
188 "Instruction page fault",
189 "Load page fault",
190 nullptr,
191 "Store page fault",
192 };
193 uint32_t cause = s_raw_crash_data.cause;
194 if (cause < sizeof(REASON) / sizeof(REASON[0]) && REASON[cause] != nullptr)
195 return REASON[cause];
196#endif
197 return "Unknown";
198}
199
200// Exception type names matching panic_exception_t enum
201static const char *get_exception_type() {
202 static const char *const TYPES[] = {
203 "Debug exception", // PANIC_EXCEPTION_DEBUG
204 "Interrupt wdt", // PANIC_EXCEPTION_IWDT
205 "Task wdt", // PANIC_EXCEPTION_TWDT
206 "Abort", // PANIC_EXCEPTION_ABORT
207 "Fault", // PANIC_EXCEPTION_FAULT
208 };
209 uint8_t exc = s_raw_crash_data.exception;
210 if (exc < sizeof(TYPES) / sizeof(TYPES[0]))
211 return TYPES[exc];
212 return "Unknown";
213}
214
215// Intentionally uses separate ESP_LOGE calls per line instead of combining into
216// one multi-line log message. This ensures each address appears as its own line
217// on the serial console, making it possible to see partial output if the device
218// crashes again during boot, and allowing the CLI's process_stacktrace to match
219// and decode each address individually.
221 if (!s_crash_data_valid)
222 return;
223
224 ESP_LOGE(TAG, "*** CRASH DETECTED ON PREVIOUS BOOT ***");
225 const char *reason = get_exception_reason();
226 if (reason != nullptr) {
227 ESP_LOGE(TAG, " Reason: %s - %s", get_exception_type(), reason);
228 } else {
229 ESP_LOGE(TAG, " Reason: %s", get_exception_type());
230 }
231 ESP_LOGE(TAG, " PC: 0x%08" PRIX32 " (fault location)", s_raw_crash_data.pc);
232 uint8_t bt_num = 0;
233 for (uint8_t i = 0; i < s_raw_crash_data.backtrace_count; i++) {
234 uint32_t addr = s_raw_crash_data.backtrace[i];
235#if CONFIG_IDF_TARGET_ARCH_RISCV
236 // Register-sourced entries (MEPC/RA) are trusted; only filter stack-scanned ones.
237 if (i >= s_raw_crash_data.reg_frame_count && !is_return_addr(addr))
238 continue;
239#endif
240#if CONFIG_IDF_TARGET_ARCH_RISCV
241 const char *source = (i < s_raw_crash_data.reg_frame_count) ? "backtrace" : "stack scan";
242#else
243 const char *source = "backtrace";
244#endif
245 ESP_LOGE(TAG, " BT%d: 0x%08" PRIX32 " (%s)", bt_num++, addr, source);
246 }
247 // Build addr2line hint with all captured addresses for easy copy-paste
248 char hint[256];
249 int pos = snprintf(hint, sizeof(hint), "Use: addr2line -pfiaC -e firmware.elf 0x%08" PRIX32, s_raw_crash_data.pc);
250 for (uint8_t i = 0; i < s_raw_crash_data.backtrace_count && pos < (int) sizeof(hint) - 12; i++) {
251 uint32_t addr = s_raw_crash_data.backtrace[i];
252#if CONFIG_IDF_TARGET_ARCH_RISCV
253 if (i >= s_raw_crash_data.reg_frame_count && !is_return_addr(addr))
254 continue;
255#endif
256 pos += snprintf(hint + pos, sizeof(hint) - pos, " 0x%08" PRIX32, addr);
257 }
258 ESP_LOGE(TAG, "%s", hint);
259}
260
261} // namespace esphome::esp32
262
263// --- Panic handler wrapper ---
264// Intercepts esp_panic_handler() via --wrap linker flag to capture crash data
265// into NOINIT memory before the normal panic handler runs.
266//
267extern "C" {
268// NOLINTBEGIN(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp,readability-identifier-naming)
269// Names are mandated by the --wrap linker mechanism
270extern void __real_esp_panic_handler(panic_info_t *info);
271
272void IRAM_ATTR __wrap_esp_panic_handler(panic_info_t *info) {
273 // Save the faulting PC and exception info
274 s_raw_crash_data.pc = (uint32_t) info->addr;
275 s_raw_crash_data.backtrace_count = 0;
276 s_raw_crash_data.reg_frame_count = 0;
277 s_raw_crash_data.exception = (uint8_t) info->exception;
278 s_raw_crash_data.pseudo_excause = info->pseudo_excause ? 1 : 0;
279
280#if CONFIG_IDF_TARGET_ARCH_XTENSA
281 // Xtensa: walk the backtrace using the public API
282 if (info->frame != nullptr) {
283 auto *xt_frame = (XtExcFrame *) info->frame;
284 s_raw_crash_data.cause = xt_frame->exccause;
285 esp_backtrace_frame_t bt_frame = {
286 .pc = (uint32_t) xt_frame->pc,
287 .sp = (uint32_t) xt_frame->a1,
288 .next_pc = (uint32_t) xt_frame->a0,
289 .exc_frame = xt_frame,
290 };
291
292 uint8_t count = 0;
293 // First frame PC
294 uint32_t first_pc = esp_cpu_process_stack_pc(bt_frame.pc);
295 if (is_code_addr(first_pc)) {
296 s_raw_crash_data.backtrace[count++] = first_pc;
297 }
298 // Walk remaining frames
299 while (count < MAX_BACKTRACE && bt_frame.next_pc != 0) {
300 if (!esp_backtrace_get_next_frame(&bt_frame)) {
301 break;
302 }
303 uint32_t pc = esp_cpu_process_stack_pc(bt_frame.pc);
304 if (is_code_addr(pc)) {
305 s_raw_crash_data.backtrace[count++] = pc;
306 }
307 }
308 s_raw_crash_data.backtrace_count = count;
309 }
310
311#elif CONFIG_IDF_TARGET_ARCH_RISCV
312 // RISC-V: capture MEPC + RA, then scan stack for code addresses
313 if (info->frame != nullptr) {
314 auto *rv_frame = (RvExcFrame *) info->frame;
315 s_raw_crash_data.cause = rv_frame->mcause;
316 uint8_t count = 0;
317
318 // Save MEPC (fault PC) and RA (return address)
319 if (is_code_addr(rv_frame->mepc)) {
320 s_raw_crash_data.backtrace[count++] = rv_frame->mepc;
321 }
322 if (is_code_addr(rv_frame->ra) && rv_frame->ra != rv_frame->mepc) {
323 s_raw_crash_data.backtrace[count++] = rv_frame->ra;
324 }
325
326 // Track how many entries came from registers (MEPC/RA) so we can
327 // skip return-address validation for them at log time.
328 s_raw_crash_data.reg_frame_count = count;
329
330 // Scan stack for code addresses — captures broadly during panic,
331 // filtered by is_return_addr() at log time when flash is accessible.
332 auto *scan_start = (uint32_t *) rv_frame->sp;
333 for (uint32_t i = 0; i < 64 && count < MAX_BACKTRACE; i++) {
335 if (is_code_addr(val) && val != rv_frame->mepc && val != rv_frame->ra) {
336 s_raw_crash_data.backtrace[count++] = val;
337 }
338 }
339 s_raw_crash_data.backtrace_count = count;
340 }
341#endif
342
343 // Write version and magic last — ensures all data is written before we mark it valid
344 s_raw_crash_data.version = CRASH_DATA_VERSION;
345 s_raw_crash_data.magic = CRASH_MAGIC;
346
347 // Call the real panic handler (prints to UART, does core dump, reboots, etc.)
349}
350
351// NOLINTEND(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp,readability-identifier-naming)
352} // extern "C"
353
354#endif // USE_ESP32_CRASH_HANDLER
355#endif // USE_ESP32
media_source::MediaSource * source
struct @65::@66 __attribute__
void __real_esp_panic_handler(panic_info_t *info)
void IRAM_ATTR __wrap_esp_panic_handler(panic_info_t *info)
mopeka_std_values val[3]
bool crash_handler_has_data()
Returns true if crash data was found this boot.
void crash_handler_log()
Log crash data if a crash was detected on previous boot.
void crash_handler_read_and_clear()
Read crash data from NOINIT memory and clear the magic marker.
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
size_t size_t pos
Definition helpers.h:929
uint32_t * scan_start
static void uint32_t
uint32_t pc