Joel Stanley | 2d7d343 | 2019-10-09 16:55:26 +1030 | [diff] [blame] | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| 2 | From: Hari Bathini <hbathini@linux.ibm.com> |
| 3 | Date: Wed, 9 Oct 2019 10:45:34 +0530 |
Joel Stanley | a9726d1 | 2019-10-15 13:24:18 +1030 | [diff] [blame] | 4 | Subject: [PATCH 4/7] powerpc/fadump: add support to preserve crash data on |
Joel Stanley | 2d7d343 | 2019-10-09 16:55:26 +1030 | [diff] [blame] | 5 | FADUMP disabled kernel |
| 6 | |
| 7 | Add a new kernel config option, CONFIG_PRESERVE_FA_DUMP that ensures |
| 8 | that crash data, from previously crash'ed kernel, is preserved. This |
| 9 | helps in cases where FADump is not enabled but the subsequent memory |
| 10 | preserving kernel boot is likely to process this crash data. One |
| 11 | typical usecase for this config option is petitboot kernel. |
| 12 | |
| 13 | As OPAL allows registering address with it in the first kernel and |
| 14 | retrieving it after MPIPL, use it to store the top of boot memory. |
| 15 | A kernel that intends to preserve crash data retrieves it and avoids |
| 16 | using memory beyond this address. |
| 17 | |
| 18 | Move arch_reserved_kernel_pages() function as it is needed for both |
| 19 | FA_DUMP and PRESERVE_FA_DUMP configurations. |
| 20 | |
| 21 | This is the backport of upstream commit bec53196adf4 ("powerpc/fadump: |
| 22 | add support to preserve crash data on FADUMP disabled kernel"). |
| 23 | |
| 24 | Also, OPAL FADump metadata definition from upstream commit 742a265accd3 |
| 25 | ("powerpc/fadump: register kernel metadata address with opal") is |
| 26 | backported here for completeness. |
| 27 | |
| 28 | Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> |
| 29 | Signed-off-by: Joel Stanley <joel@jms.id.au> |
| 30 | --- |
| 31 | arch/powerpc/Kconfig | 9 +++ |
| 32 | arch/powerpc/include/asm/fadump.h | 17 ++++- |
| 33 | arch/powerpc/kernel/Makefile | 4 +- |
| 34 | arch/powerpc/kernel/fadump.c | 123 ++++++++++++++++++++++++++++-- |
| 35 | arch/powerpc/kernel/prom.c | 4 +- |
| 36 | 5 files changed, 146 insertions(+), 11 deletions(-) |
| 37 | |
| 38 | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig |
| 39 | index d8dcd8820369..a286c203e732 100644 |
| 40 | --- a/arch/powerpc/Kconfig |
| 41 | +++ b/arch/powerpc/Kconfig |
| 42 | @@ -579,6 +579,15 @@ config FA_DUMP |
| 43 | |
| 44 | If unsure, say "N" |
| 45 | |
| 46 | +config PRESERVE_FA_DUMP |
| 47 | + bool "Preserve Firmware-assisted dump" |
| 48 | + depends on PPC64 && PPC_POWERNV && !FA_DUMP |
| 49 | + help |
| 50 | + On a kernel with FA_DUMP disabled, this option helps to preserve |
| 51 | + crash data from a previously crash'ed kernel. Useful when the next |
| 52 | + memory preserving kernel boot would process this crash data. |
| 53 | + Petitboot kernel is the typical usecase for this option. |
| 54 | + |
| 55 | config IRQ_ALL_CPUS |
| 56 | bool "Distribute interrupts on all CPUs by default" |
| 57 | depends on SMP |
| 58 | diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h |
| 59 | index 17d9b6acaf63..0401a6ffde0c 100644 |
| 60 | --- a/arch/powerpc/include/asm/fadump.h |
| 61 | +++ b/arch/powerpc/include/asm/fadump.h |
| 62 | @@ -193,9 +193,6 @@ struct fad_crash_memory_ranges { |
| 63 | }; |
| 64 | |
| 65 | extern int is_fadump_memory_area(u64 addr, ulong size); |
| 66 | -extern int early_init_dt_scan_fw_dump(unsigned long node, |
| 67 | - const char *uname, int depth, void *data); |
| 68 | -extern int fadump_reserve_mem(void); |
| 69 | extern int setup_fadump(void); |
| 70 | extern int is_fadump_active(void); |
| 71 | extern int should_fadump_crash(void); |
| 72 | @@ -208,4 +205,18 @@ static inline int should_fadump_crash(void) { return 0; } |
| 73 | static inline void crash_fadump(struct pt_regs *regs, const char *str) { } |
| 74 | static inline void fadump_cleanup(void) { } |
| 75 | #endif |
| 76 | + |
| 77 | +#ifdef CONFIG_PRESERVE_FA_DUMP |
| 78 | +/* Firmware-assisted dump configuration details. */ |
| 79 | +struct fw_dump { |
| 80 | + u64 boot_mem_top; |
| 81 | + u64 dump_active; |
| 82 | +}; |
| 83 | +#endif |
| 84 | + |
| 85 | +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) |
| 86 | +extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname, |
| 87 | + int depth, void *data); |
| 88 | +extern int fadump_reserve_mem(void); |
| 89 | +#endif |
| 90 | #endif |
| 91 | diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile |
| 92 | index 56dfa7a2a6f2..ce70e7776cfd 100644 |
| 93 | --- a/arch/powerpc/kernel/Makefile |
| 94 | +++ b/arch/powerpc/kernel/Makefile |
| 95 | @@ -78,7 +78,9 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ |
| 96 | eeh_driver.o eeh_event.o eeh_sysfs.o |
| 97 | obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o |
| 98 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
| 99 | -obj-$(CONFIG_FA_DUMP) += fadump.o |
| 100 | +ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),) |
| 101 | +obj-y += fadump.o |
| 102 | +endif |
| 103 | ifdef CONFIG_PPC32 |
| 104 | obj-$(CONFIG_E500) += idle_e500.o |
| 105 | endif |
| 106 | diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c |
| 107 | index 4dacce3fac72..1202dadf1d58 100644 |
| 108 | --- a/arch/powerpc/kernel/fadump.c |
| 109 | +++ b/arch/powerpc/kernel/fadump.c |
| 110 | @@ -24,11 +24,13 @@ |
| 111 | #include <linux/slab.h> |
| 112 | #include <linux/cma.h> |
| 113 | #include <linux/hugetlb.h> |
| 114 | +#include <linux/libfdt.h> |
| 115 | |
| 116 | #include <asm/debugfs.h> |
| 117 | #include <asm/page.h> |
| 118 | #include <asm/prom.h> |
| 119 | #include <asm/rtas.h> |
| 120 | +#include <asm/opal.h> |
| 121 | #include <asm/fadump.h> |
| 122 | #include <asm/setup.h> |
| 123 | |
| 124 | @@ -36,6 +38,7 @@ static struct fw_dump fw_dump; |
| 125 | |
| 126 | static void __init fadump_reserve_crash_area(u64 base); |
| 127 | |
| 128 | +#ifndef CONFIG_PRESERVE_FA_DUMP |
| 129 | static struct fadump_mem_struct fdm; |
| 130 | static const struct fadump_mem_struct *fdm_active; |
| 131 | #ifdef CONFIG_CMA |
| 132 | @@ -542,11 +545,6 @@ int __init fadump_reserve_mem(void) |
| 133 | return 1; |
| 134 | } |
| 135 | |
| 136 | -unsigned long __init arch_reserved_kernel_pages(void) |
| 137 | -{ |
| 138 | - return memblock_reserved_size() / PAGE_SIZE; |
| 139 | -} |
| 140 | - |
| 141 | /* Look for fadump= cmdline option. */ |
| 142 | static int __init early_fadump_param(char *p) |
| 143 | { |
| 144 | @@ -1684,6 +1682,116 @@ int __init setup_fadump(void) |
| 145 | return 1; |
| 146 | } |
| 147 | subsys_initcall(setup_fadump); |
| 148 | +#else /* !CONFIG_PRESERVE_FA_DUMP */ |
| 149 | + |
| 150 | +/* Maximum number of memory regions kernel supports */ |
| 151 | +#define OPAL_FADUMP_MAX_MEM_REGS 128 |
| 152 | + |
| 153 | +/* |
| 154 | + * OPAL FADump kernel metadata |
| 155 | + * |
| 156 | + * The address of this structure will be registered with f/w for retrieving |
| 157 | + * and processing during crash dump. |
| 158 | + */ |
| 159 | +struct opal_fadump_mem_struct { |
| 160 | + u8 version; |
| 161 | + u8 reserved[3]; |
| 162 | + u16 region_cnt; /* number of regions */ |
| 163 | + u16 registered_regions; /* Regions registered for MPIPL */ |
| 164 | + u64 fadumphdr_addr; |
| 165 | + struct opal_mpipl_region rgn[OPAL_FADUMP_MAX_MEM_REGS]; |
| 166 | +} __packed; |
| 167 | + |
| 168 | + |
| 169 | +/* |
| 170 | + * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, |
| 171 | + * ensure crash data is preserved in hope that the subsequent memory |
| 172 | + * preserving kernel boot is going to process this crash data. |
| 173 | + */ |
| 174 | +void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) |
| 175 | +{ |
| 176 | + const struct opal_fadump_mem_struct *opal_fdm_active; |
| 177 | + const __be32 *prop; |
| 178 | + unsigned long dn; |
| 179 | + u64 addr = 0; |
| 180 | + s64 ret; |
| 181 | + |
| 182 | + dn = of_get_flat_dt_subnode_by_name(node, "dump"); |
| 183 | + if (dn == -FDT_ERR_NOTFOUND) |
| 184 | + return; |
| 185 | + |
| 186 | + /* |
| 187 | + * Check if dump has been initiated on last reboot. |
| 188 | + */ |
| 189 | + prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL); |
| 190 | + if (!prop) |
| 191 | + return; |
| 192 | + |
| 193 | + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); |
| 194 | + if ((ret != OPAL_SUCCESS) || !addr) { |
| 195 | + pr_debug("Could not get Kernel metadata (%lld)\n", ret); |
| 196 | + return; |
| 197 | + } |
| 198 | + |
| 199 | + /* |
| 200 | + * Preserve memory only if kernel memory regions are registered |
| 201 | + * with f/w for MPIPL. |
| 202 | + */ |
| 203 | + addr = be64_to_cpu(addr); |
| 204 | + pr_debug("Kernel metadata addr: %llx\n", addr); |
| 205 | + opal_fdm_active = (void *)addr; |
| 206 | + if (opal_fdm_active->registered_regions == 0) |
| 207 | + return; |
| 208 | + |
| 209 | + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); |
| 210 | + if ((ret != OPAL_SUCCESS) || !addr) { |
| 211 | + pr_err("Failed to get boot memory tag (%lld)\n", ret); |
| 212 | + return; |
| 213 | + } |
| 214 | + |
| 215 | + /* |
| 216 | + * Memory below this address can be used for booting a |
| 217 | + * capture kernel or petitboot kernel. Preserve everything |
| 218 | + * above this address for processing crashdump. |
| 219 | + */ |
| 220 | + fadump_conf->boot_mem_top = be64_to_cpu(addr); |
| 221 | + pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top); |
| 222 | + |
| 223 | + pr_info("Firmware-assisted dump is active.\n"); |
| 224 | + fadump_conf->dump_active = 1; |
| 225 | +} |
| 226 | + |
| 227 | +/* Scan the Firmware Assisted dump configuration details. */ |
| 228 | +int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, |
| 229 | + int depth, void *data) |
| 230 | +{ |
| 231 | + if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0)) |
| 232 | + return 0; |
| 233 | + |
| 234 | + opal_fadump_dt_scan(&fw_dump, node); |
| 235 | + return 1; |
| 236 | +} |
| 237 | + |
| 238 | +/* |
| 239 | + * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, |
| 240 | + * preserve crash data. The subsequent memory preserving kernel boot |
| 241 | + * is likely to process this crash data. |
| 242 | + */ |
| 243 | +int __init fadump_reserve_mem(void) |
| 244 | +{ |
| 245 | + if (fw_dump.dump_active) { |
| 246 | + /* |
| 247 | + * If last boot has crashed then reserve all the memory |
| 248 | + * above boot memory to preserve crash data. |
| 249 | + */ |
| 250 | + pr_info("Preserving crash data for processing in next boot.\n"); |
| 251 | + fadump_reserve_crash_area(fw_dump.boot_mem_top); |
| 252 | + } else |
| 253 | + pr_debug("FADump-aware kernel..\n"); |
| 254 | + |
| 255 | + return 1; |
| 256 | +} |
| 257 | +#endif /* CONFIG_PRESERVE_FA_DUMP */ |
| 258 | |
| 259 | /* Preserve everything above the base address */ |
| 260 | static void __init fadump_reserve_crash_area(u64 base) |
| 261 | @@ -1708,3 +1816,8 @@ static void __init fadump_reserve_crash_area(u64 base) |
| 262 | memblock_reserve(mstart, msize); |
| 263 | } |
| 264 | } |
| 265 | + |
| 266 | +unsigned long __init arch_reserved_kernel_pages(void) |
| 267 | +{ |
| 268 | + return memblock_reserved_size() / PAGE_SIZE; |
| 269 | +} |
| 270 | diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c |
| 271 | index 7159e791a70d..9c3861bda216 100644 |
| 272 | --- a/arch/powerpc/kernel/prom.c |
| 273 | +++ b/arch/powerpc/kernel/prom.c |
| 274 | @@ -704,7 +704,7 @@ void __init early_init_devtree(void *params) |
| 275 | of_scan_flat_dt(early_init_dt_scan_opal, NULL); |
| 276 | #endif |
| 277 | |
| 278 | -#ifdef CONFIG_FA_DUMP |
| 279 | +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) |
| 280 | /* scan tree to see if dump is active during last boot */ |
| 281 | of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); |
| 282 | #endif |
| 283 | @@ -731,7 +731,7 @@ void __init early_init_devtree(void *params) |
| 284 | if (PHYSICAL_START > MEMORY_START) |
| 285 | memblock_reserve(MEMORY_START, 0x8000); |
| 286 | reserve_kdump_trampoline(); |
| 287 | -#ifdef CONFIG_FA_DUMP |
| 288 | +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) |
| 289 | /* |
| 290 | * If we fail to reserve memory for firmware-assisted dump then |
| 291 | * fallback to kexec based kdump. |