blob: 5ec6134db152644499f3b492d47ee70467f026d3 [file] [log] [blame]
Joel Stanley2d7d3432019-10-09 16:55:26 +10301From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2From: Hari Bathini <hbathini@linux.ibm.com>
3Date: Wed, 9 Oct 2019 10:45:34 +0530
Joel Stanleya9726d12019-10-15 13:24:18 +10304Subject: [PATCH 4/7] powerpc/fadump: add support to preserve crash data on
Joel Stanley2d7d3432019-10-09 16:55:26 +10305 FADUMP disabled kernel
6
7Add a new kernel config option, CONFIG_PRESERVE_FA_DUMP that ensures
8that crash data, from previously crash'ed kernel, is preserved. This
9helps in cases where FADump is not enabled but the subsequent memory
10preserving kernel boot is likely to process this crash data. One
11typical usecase for this config option is petitboot kernel.
12
13As OPAL allows registering address with it in the first kernel and
14retrieving it after MPIPL, use it to store the top of boot memory.
15A kernel that intends to preserve crash data retrieves it and avoids
16using memory beyond this address.
17
18Move arch_reserved_kernel_pages() function as it is needed for both
19FA_DUMP and PRESERVE_FA_DUMP configurations.
20
21This is the backport of upstream commit bec53196adf4 ("powerpc/fadump:
22add support to preserve crash data on FADUMP disabled kernel").
23
24Also, OPAL FADump metadata definition from upstream commit 742a265accd3
25("powerpc/fadump: register kernel metadata address with opal") is
26backported here for completeness.
27
28Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
29Signed-off-by: Joel Stanley <joel@jms.id.au>
30---
31 arch/powerpc/Kconfig | 9 +++
32 arch/powerpc/include/asm/fadump.h | 17 ++++-
33 arch/powerpc/kernel/Makefile | 4 +-
34 arch/powerpc/kernel/fadump.c | 123 ++++++++++++++++++++++++++++--
35 arch/powerpc/kernel/prom.c | 4 +-
36 5 files changed, 146 insertions(+), 11 deletions(-)
37
38diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
39index d8dcd8820369..a286c203e732 100644
40--- a/arch/powerpc/Kconfig
41+++ b/arch/powerpc/Kconfig
42@@ -579,6 +579,15 @@ config FA_DUMP
43
44 If unsure, say "N"
45
46+config PRESERVE_FA_DUMP
47+ bool "Preserve Firmware-assisted dump"
48+ depends on PPC64 && PPC_POWERNV && !FA_DUMP
49+ help
50+ On a kernel with FA_DUMP disabled, this option helps to preserve
51+ crash data from a previously crash'ed kernel. Useful when the next
52+ memory preserving kernel boot would process this crash data.
53+ Petitboot kernel is the typical usecase for this option.
54+
55 config IRQ_ALL_CPUS
56 bool "Distribute interrupts on all CPUs by default"
57 depends on SMP
58diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
59index 17d9b6acaf63..0401a6ffde0c 100644
60--- a/arch/powerpc/include/asm/fadump.h
61+++ b/arch/powerpc/include/asm/fadump.h
62@@ -193,9 +193,6 @@ struct fad_crash_memory_ranges {
63 };
64
65 extern int is_fadump_memory_area(u64 addr, ulong size);
66-extern int early_init_dt_scan_fw_dump(unsigned long node,
67- const char *uname, int depth, void *data);
68-extern int fadump_reserve_mem(void);
69 extern int setup_fadump(void);
70 extern int is_fadump_active(void);
71 extern int should_fadump_crash(void);
72@@ -208,4 +205,18 @@ static inline int should_fadump_crash(void) { return 0; }
73 static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
74 static inline void fadump_cleanup(void) { }
75 #endif
76+
77+#ifdef CONFIG_PRESERVE_FA_DUMP
78+/* Firmware-assisted dump configuration details. */
79+struct fw_dump {
80+ u64 boot_mem_top;
81+ u64 dump_active;
82+};
83+#endif
84+
85+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
86+extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
87+ int depth, void *data);
88+extern int fadump_reserve_mem(void);
89+#endif
90 #endif
91diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
92index 56dfa7a2a6f2..ce70e7776cfd 100644
93--- a/arch/powerpc/kernel/Makefile
94+++ b/arch/powerpc/kernel/Makefile
95@@ -78,7 +78,9 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
96 eeh_driver.o eeh_event.o eeh_sysfs.o
97 obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
98 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
99-obj-$(CONFIG_FA_DUMP) += fadump.o
100+ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
101+obj-y += fadump.o
102+endif
103 ifdef CONFIG_PPC32
104 obj-$(CONFIG_E500) += idle_e500.o
105 endif
106diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
107index 4dacce3fac72..1202dadf1d58 100644
108--- a/arch/powerpc/kernel/fadump.c
109+++ b/arch/powerpc/kernel/fadump.c
110@@ -24,11 +24,13 @@
111 #include <linux/slab.h>
112 #include <linux/cma.h>
113 #include <linux/hugetlb.h>
114+#include <linux/libfdt.h>
115
116 #include <asm/debugfs.h>
117 #include <asm/page.h>
118 #include <asm/prom.h>
119 #include <asm/rtas.h>
120+#include <asm/opal.h>
121 #include <asm/fadump.h>
122 #include <asm/setup.h>
123
124@@ -36,6 +38,7 @@ static struct fw_dump fw_dump;
125
126 static void __init fadump_reserve_crash_area(u64 base);
127
128+#ifndef CONFIG_PRESERVE_FA_DUMP
129 static struct fadump_mem_struct fdm;
130 static const struct fadump_mem_struct *fdm_active;
131 #ifdef CONFIG_CMA
132@@ -542,11 +545,6 @@ int __init fadump_reserve_mem(void)
133 return 1;
134 }
135
136-unsigned long __init arch_reserved_kernel_pages(void)
137-{
138- return memblock_reserved_size() / PAGE_SIZE;
139-}
140-
141 /* Look for fadump= cmdline option. */
142 static int __init early_fadump_param(char *p)
143 {
144@@ -1684,6 +1682,116 @@ int __init setup_fadump(void)
145 return 1;
146 }
147 subsys_initcall(setup_fadump);
148+#else /* !CONFIG_PRESERVE_FA_DUMP */
149+
150+/* Maximum number of memory regions kernel supports */
151+#define OPAL_FADUMP_MAX_MEM_REGS 128
152+
153+/*
154+ * OPAL FADump kernel metadata
155+ *
156+ * The address of this structure will be registered with f/w for retrieving
157+ * and processing during crash dump.
158+ */
159+struct opal_fadump_mem_struct {
160+ u8 version;
161+ u8 reserved[3];
162+ u16 region_cnt; /* number of regions */
163+ u16 registered_regions; /* Regions registered for MPIPL */
164+ u64 fadumphdr_addr;
165+ struct opal_mpipl_region rgn[OPAL_FADUMP_MAX_MEM_REGS];
166+} __packed;
167+
168+
169+/*
170+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
171+ * ensure crash data is preserved in hope that the subsequent memory
172+ * preserving kernel boot is going to process this crash data.
173+ */
174+void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
175+{
176+ const struct opal_fadump_mem_struct *opal_fdm_active;
177+ const __be32 *prop;
178+ unsigned long dn;
179+ u64 addr = 0;
180+ s64 ret;
181+
182+ dn = of_get_flat_dt_subnode_by_name(node, "dump");
183+ if (dn == -FDT_ERR_NOTFOUND)
184+ return;
185+
186+ /*
187+ * Check if dump has been initiated on last reboot.
188+ */
189+ prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
190+ if (!prop)
191+ return;
192+
193+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
194+ if ((ret != OPAL_SUCCESS) || !addr) {
195+ pr_debug("Could not get Kernel metadata (%lld)\n", ret);
196+ return;
197+ }
198+
199+ /*
200+ * Preserve memory only if kernel memory regions are registered
201+ * with f/w for MPIPL.
202+ */
203+ addr = be64_to_cpu(addr);
204+ pr_debug("Kernel metadata addr: %llx\n", addr);
205+ opal_fdm_active = (void *)addr;
206+ if (opal_fdm_active->registered_regions == 0)
207+ return;
208+
209+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr);
210+ if ((ret != OPAL_SUCCESS) || !addr) {
211+ pr_err("Failed to get boot memory tag (%lld)\n", ret);
212+ return;
213+ }
214+
215+ /*
216+ * Memory below this address can be used for booting a
217+ * capture kernel or petitboot kernel. Preserve everything
218+ * above this address for processing crashdump.
219+ */
220+ fadump_conf->boot_mem_top = be64_to_cpu(addr);
221+ pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top);
222+
223+ pr_info("Firmware-assisted dump is active.\n");
224+ fadump_conf->dump_active = 1;
225+}
226+
227+/* Scan the Firmware Assisted dump configuration details. */
228+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
229+ int depth, void *data)
230+{
231+ if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
232+ return 0;
233+
234+ opal_fadump_dt_scan(&fw_dump, node);
235+ return 1;
236+}
237+
238+/*
239+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
240+ * preserve crash data. The subsequent memory preserving kernel boot
241+ * is likely to process this crash data.
242+ */
243+int __init fadump_reserve_mem(void)
244+{
245+ if (fw_dump.dump_active) {
246+ /*
247+ * If last boot has crashed then reserve all the memory
248+ * above boot memory to preserve crash data.
249+ */
250+ pr_info("Preserving crash data for processing in next boot.\n");
251+ fadump_reserve_crash_area(fw_dump.boot_mem_top);
252+ } else
253+ pr_debug("FADump-aware kernel..\n");
254+
255+ return 1;
256+}
257+#endif /* CONFIG_PRESERVE_FA_DUMP */
258
259 /* Preserve everything above the base address */
260 static void __init fadump_reserve_crash_area(u64 base)
261@@ -1708,3 +1816,8 @@ static void __init fadump_reserve_crash_area(u64 base)
262 memblock_reserve(mstart, msize);
263 }
264 }
265+
266+unsigned long __init arch_reserved_kernel_pages(void)
267+{
268+ return memblock_reserved_size() / PAGE_SIZE;
269+}
270diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
271index 7159e791a70d..9c3861bda216 100644
272--- a/arch/powerpc/kernel/prom.c
273+++ b/arch/powerpc/kernel/prom.c
274@@ -704,7 +704,7 @@ void __init early_init_devtree(void *params)
275 of_scan_flat_dt(early_init_dt_scan_opal, NULL);
276 #endif
277
278-#ifdef CONFIG_FA_DUMP
279+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
280 /* scan tree to see if dump is active during last boot */
281 of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
282 #endif
283@@ -731,7 +731,7 @@ void __init early_init_devtree(void *params)
284 if (PHYSICAL_START > MEMORY_START)
285 memblock_reserve(MEMORY_START, 0x8000);
286 reserve_kdump_trampoline();
287-#ifdef CONFIG_FA_DUMP
288+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
289 /*
290 * If we fail to reserve memory for firmware-assisted dump then
291 * fallback to kexec based kdump.