blob: 3f6630d5eb2216664986f2f0573a11429b5f9552 [file] [log] [blame]
Joel Stanley968bb762017-10-04 14:12:27 +10301From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2From: Michael Neuling <mikey@neuling.org>
Joel Stanleyacbcce82017-11-14 17:39:06 +10303Date: Fri, 15 Sep 2017 15:25:48 +1000
4Subject: [PATCH 2/3] powerpc/64s: Add workaround for P9 vector CI load issue
Joel Stanley968bb762017-10-04 14:12:27 +10305
6POWER9 DD2.1 and earlier has an issue where some cache inhibited
7vector load will return bad data. The workaround is two part, one
8firmware/microcode part triggers HMI interrupts when hitting such
9loads, the other part is this patch which then emulates the
10instructions in Linux.
11
12The affected instructions are limited to lxvd2x, lxvw4x, lxvb16x and
13lxvh8x.
14
15When an instruction triggers the HMI, all threads in the core will be
16sent to the HMI handler, not just the one running the vector load.
17
18In general, these spurious HMIs are detected by the emulation code and
19we just return back to the running process. Unfortunately, if a
20spurious interrupt occurs on a vector load that's to normal memory we
21have no way to detect that it's spurious (unless we walk the page
22tables, which is very expensive). In this case we emulate the load but
23we need do so using a vector load itself to ensure 128bit atomicity is
24preserved.
25
26Some additional debugfs emulated instruction counters are added also.
27
28Signed-off-by: Michael Neuling <mikey@neuling.org>
29Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
30[mpe: Switch CONFIG_PPC_BOOK3S_64 to CONFIG_VSX to unbreak the build]
31Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
32(cherry picked from commit 5080332c2c893118dbc18755f35c8b0131cf0fc4)
Joel Stanley968bb762017-10-04 14:12:27 +103033Signed-off-by: Joel Stanley <joel@jms.id.au>
34---
35 arch/powerpc/include/asm/emulated_ops.h | 4 +
36 arch/powerpc/include/asm/paca.h | 1 +
37 arch/powerpc/include/asm/uaccess.h | 17 +++
38 arch/powerpc/kernel/exceptions-64s.S | 16 ++-
39 arch/powerpc/kernel/mce.c | 30 ++++-
40 arch/powerpc/kernel/traps.c | 201 ++++++++++++++++++++++++++++++++
41 arch/powerpc/platforms/powernv/smp.c | 7 ++
42 7 files changed, 271 insertions(+), 5 deletions(-)
43
44diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
45index f00e10e2a335..651e1354498e 100644
46--- a/arch/powerpc/include/asm/emulated_ops.h
47+++ b/arch/powerpc/include/asm/emulated_ops.h
48@@ -55,6 +55,10 @@ extern struct ppc_emulated {
49 struct ppc_emulated_entry mfdscr;
50 struct ppc_emulated_entry mtdscr;
51 struct ppc_emulated_entry lq_stq;
52+ struct ppc_emulated_entry lxvw4x;
53+ struct ppc_emulated_entry lxvh8x;
54+ struct ppc_emulated_entry lxvd2x;
55+ struct ppc_emulated_entry lxvb16x;
56 #endif
57 } ppc_emulated;
58
59diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
Joel Stanley2f3fa072018-02-01 08:41:49 +103060index b8366df50d19..c9fff7b45691 100644
Joel Stanley968bb762017-10-04 14:12:27 +103061--- a/arch/powerpc/include/asm/paca.h
62+++ b/arch/powerpc/include/asm/paca.h
Joel Stanleyacbcce82017-11-14 17:39:06 +103063@@ -210,6 +210,7 @@ struct paca_struct {
Joel Stanley968bb762017-10-04 14:12:27 +103064 */
65 u16 in_mce;
66 u8 hmi_event_available; /* HMI event is available */
67+ u8 hmi_p9_special_emu; /* HMI P9 special emulation */
68 #endif
69
70 /* Stuff for accurate time accounting */
71diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
Joel Stanleyacbcce82017-11-14 17:39:06 +103072index 11f4bd07cce0..1e6ead577459 100644
Joel Stanley968bb762017-10-04 14:12:27 +103073--- a/arch/powerpc/include/asm/uaccess.h
74+++ b/arch/powerpc/include/asm/uaccess.h
Joel Stanleyacbcce82017-11-14 17:39:06 +103075@@ -174,6 +174,23 @@ do { \
Joel Stanley968bb762017-10-04 14:12:27 +103076
77 extern long __get_user_bad(void);
78
79+/*
80+ * This does an atomic 128 byte aligned load from userspace.
81+ * Upto caller to do enable_kernel_vmx() before calling!
82+ */
83+#define __get_user_atomic_128_aligned(kaddr, uaddr, err) \
84+ __asm__ __volatile__( \
85+ "1: lvx 0,0,%1 # get user\n" \
86+ " stvx 0,0,%2 # put kernel\n" \
87+ "2:\n" \
88+ ".section .fixup,\"ax\"\n" \
89+ "3: li %0,%3\n" \
90+ " b 2b\n" \
91+ ".previous\n" \
92+ EX_TABLE(1b, 3b) \
93+ : "=r" (err) \
94+ : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
95+
96 #define __get_user_asm(x, addr, err, op) \
97 __asm__ __volatile__( \
98 "1: "op" %1,0(%2) # get_user\n" \
99diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
Joel Stanley2f3fa072018-02-01 08:41:49 +1030100index e9f72abc52b7..2dcfe36ee247 100644
Joel Stanley968bb762017-10-04 14:12:27 +1030101--- a/arch/powerpc/kernel/exceptions-64s.S
102+++ b/arch/powerpc/kernel/exceptions-64s.S
Joel Stanley2f3fa072018-02-01 08:41:49 +1030103@@ -1060,6 +1060,8 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
Joel Stanley968bb762017-10-04 14:12:27 +1030104 EXCEPTION_PROLOG_COMMON_3(0xe60)
105 addi r3,r1,STACK_FRAME_OVERHEAD
106 BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
107+ cmpdi cr0,r3,0
108+
109 /* Windup the stack. */
110 /* Move original HSRR0 and HSRR1 into the respective regs */
111 ld r9,_MSR(r1)
Joel Stanley2f3fa072018-02-01 08:41:49 +1030112@@ -1076,10 +1078,15 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
Joel Stanley968bb762017-10-04 14:12:27 +1030113 REST_8GPRS(2, r1)
114 REST_GPR(10, r1)
115 ld r11,_CCR(r1)
116+ REST_2GPRS(12, r1)
117+ bne 1f
118 mtcr r11
119 REST_GPR(11, r1)
120- REST_2GPRS(12, r1)
121- /* restore original r1. */
122+ ld r1,GPR1(r1)
123+ hrfid
124+
125+1: mtcr r11
126+ REST_GPR(11, r1)
127 ld r1,GPR1(r1)
128
129 /*
Joel Stanley2f3fa072018-02-01 08:41:49 +1030130@@ -1092,8 +1099,9 @@ hmi_exception_after_realmode:
Joel Stanley968bb762017-10-04 14:12:27 +1030131 EXCEPTION_PROLOG_0(PACA_EXGEN)
132 b tramp_real_hmi_exception
133
134-EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception)
135-
136+EXC_COMMON_BEGIN(hmi_exception_common)
137+EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
138+ ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
139
140 EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
141 EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
142diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
143index 9b2ea7e71c06..f588951b171d 100644
144--- a/arch/powerpc/kernel/mce.c
145+++ b/arch/powerpc/kernel/mce.c
146@@ -470,6 +470,34 @@ long hmi_exception_realmode(struct pt_regs *regs)
147 {
148 __this_cpu_inc(irq_stat.hmi_exceptions);
149
150+#ifdef CONFIG_PPC_BOOK3S_64
151+ /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
152+ if (pvr_version_is(PVR_POWER9)) {
153+ unsigned long hmer = mfspr(SPRN_HMER);
154+
155+ /* Do we have the debug bit set */
156+ if (hmer & PPC_BIT(17)) {
157+ hmer &= ~PPC_BIT(17);
158+ mtspr(SPRN_HMER, hmer);
159+
160+ /*
161+ * Now to avoid problems with soft-disable we
162+ * only do the emulation if we are coming from
163+ * user space
164+ */
165+ if (user_mode(regs))
166+ local_paca->hmi_p9_special_emu = 1;
167+
168+ /*
169+ * Don't bother going to OPAL if that's the
170+ * only relevant bit.
171+ */
172+ if (!(hmer & mfspr(SPRN_HMEER)))
173+ return local_paca->hmi_p9_special_emu;
174+ }
175+ }
176+#endif /* CONFIG_PPC_BOOK3S_64 */
177+
178 wait_for_subcore_guest_exit();
179
180 if (ppc_md.hmi_exception_early)
181@@ -477,5 +505,5 @@ long hmi_exception_realmode(struct pt_regs *regs)
182
183 wait_for_tb_resync();
184
185- return 0;
186+ return 1;
187 }
188diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
Joel Stanleyacbcce82017-11-14 17:39:06 +1030189index 13c9dcdcba69..9ae1924c7d1a 100644
Joel Stanley968bb762017-10-04 14:12:27 +1030190--- a/arch/powerpc/kernel/traps.c
191+++ b/arch/powerpc/kernel/traps.c
192@@ -37,6 +37,7 @@
193 #include <linux/kdebug.h>
194 #include <linux/ratelimit.h>
195 #include <linux/context_tracking.h>
196+#include <linux/smp.h>
197
198 #include <asm/emulated_ops.h>
199 #include <asm/pgtable.h>
Joel Stanleyacbcce82017-11-14 17:39:06 +1030200@@ -699,6 +700,187 @@ void SMIException(struct pt_regs *regs)
Joel Stanley968bb762017-10-04 14:12:27 +1030201 die("System Management Interrupt", regs, SIGABRT);
202 }
203
204+#ifdef CONFIG_VSX
205+static void p9_hmi_special_emu(struct pt_regs *regs)
206+{
207+ unsigned int ra, rb, t, i, sel, instr, rc;
208+ const void __user *addr;
209+ u8 vbuf[16], *vdst;
210+ unsigned long ea, msr, msr_mask;
211+ bool swap;
212+
213+ if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
214+ return;
215+
216+ /*
217+ * lxvb16x opcode: 0x7c0006d8
218+ * lxvd2x opcode: 0x7c000698
219+ * lxvh8x opcode: 0x7c000658
220+ * lxvw4x opcode: 0x7c000618
221+ */
222+ if ((instr & 0xfc00073e) != 0x7c000618) {
223+ pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
224+ " instr=%08x\n",
225+ smp_processor_id(), current->comm, current->pid,
226+ regs->nip, instr);
227+ return;
228+ }
229+
230+ /* Grab vector registers into the task struct */
231+ msr = regs->msr; /* Grab msr before we flush the bits */
232+ flush_vsx_to_thread(current);
233+ enable_kernel_altivec();
234+
235+ /*
236+ * Is userspace running with a different endian (this is rare but
237+ * not impossible)
238+ */
239+ swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
240+
241+ /* Decode the instruction */
242+ ra = (instr >> 16) & 0x1f;
243+ rb = (instr >> 11) & 0x1f;
244+ t = (instr >> 21) & 0x1f;
245+ if (instr & 1)
246+ vdst = (u8 *)&current->thread.vr_state.vr[t];
247+ else
248+ vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
249+
250+ /* Grab the vector address */
251+ ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
252+ if (is_32bit_task())
253+ ea &= 0xfffffffful;
254+ addr = (__force const void __user *)ea;
255+
256+ /* Check it */
257+ if (!access_ok(VERIFY_READ, addr, 16)) {
258+ pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
259+ " instr=%08x addr=%016lx\n",
260+ smp_processor_id(), current->comm, current->pid,
261+ regs->nip, instr, (unsigned long)addr);
262+ return;
263+ }
264+
265+ /* Read the vector */
266+ rc = 0;
267+ if ((unsigned long)addr & 0xfUL)
268+ /* unaligned case */
269+ rc = __copy_from_user_inatomic(vbuf, addr, 16);
270+ else
271+ __get_user_atomic_128_aligned(vbuf, addr, rc);
272+ if (rc) {
273+ pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
274+ " instr=%08x addr=%016lx\n",
275+ smp_processor_id(), current->comm, current->pid,
276+ regs->nip, instr, (unsigned long)addr);
277+ return;
278+ }
279+
280+ pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
281+ " instr=%08x addr=%016lx\n",
282+ smp_processor_id(), current->comm, current->pid, regs->nip,
283+ instr, (unsigned long) addr);
284+
285+ /* Grab instruction "selector" */
286+ sel = (instr >> 6) & 3;
287+
288+ /*
289+ * Check to make sure the facility is actually enabled. This
290+ * could happen if we get a false positive hit.
291+ *
292+ * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
293+ * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
294+ */
295+ msr_mask = MSR_VSX;
296+ if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
297+ msr_mask = MSR_VEC;
298+ if (!(msr & msr_mask)) {
299+ pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
300+ " instr=%08x msr:%016lx\n",
301+ smp_processor_id(), current->comm, current->pid,
302+ regs->nip, instr, msr);
303+ return;
304+ }
305+
306+ /* Do logging here before we modify sel based on endian */
307+ switch (sel) {
308+ case 0: /* lxvw4x */
309+ PPC_WARN_EMULATED(lxvw4x, regs);
310+ break;
311+ case 1: /* lxvh8x */
312+ PPC_WARN_EMULATED(lxvh8x, regs);
313+ break;
314+ case 2: /* lxvd2x */
315+ PPC_WARN_EMULATED(lxvd2x, regs);
316+ break;
317+ case 3: /* lxvb16x */
318+ PPC_WARN_EMULATED(lxvb16x, regs);
319+ break;
320+ }
321+
322+#ifdef __LITTLE_ENDIAN__
323+ /*
324+ * An LE kernel stores the vector in the task struct as an LE
325+ * byte array (effectively swapping both the components and
326+ * the content of the components). Those instructions expect
327+ * the components to remain in ascending address order, so we
328+ * swap them back.
329+ *
330+ * If we are running a BE user space, the expectation is that
331+ * of a simple memcpy, so forcing the emulation to look like
332+ * a lxvb16x should do the trick.
333+ */
334+ if (swap)
335+ sel = 3;
336+
337+ switch (sel) {
338+ case 0: /* lxvw4x */
339+ for (i = 0; i < 4; i++)
340+ ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
341+ break;
342+ case 1: /* lxvh8x */
343+ for (i = 0; i < 8; i++)
344+ ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
345+ break;
346+ case 2: /* lxvd2x */
347+ for (i = 0; i < 2; i++)
348+ ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
349+ break;
350+ case 3: /* lxvb16x */
351+ for (i = 0; i < 16; i++)
352+ vdst[i] = vbuf[15-i];
353+ break;
354+ }
355+#else /* __LITTLE_ENDIAN__ */
356+ /* On a big endian kernel, a BE userspace only needs a memcpy */
357+ if (!swap)
358+ sel = 3;
359+
360+ /* Otherwise, we need to swap the content of the components */
361+ switch (sel) {
362+ case 0: /* lxvw4x */
363+ for (i = 0; i < 4; i++)
364+ ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
365+ break;
366+ case 1: /* lxvh8x */
367+ for (i = 0; i < 8; i++)
368+ ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
369+ break;
370+ case 2: /* lxvd2x */
371+ for (i = 0; i < 2; i++)
372+ ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
373+ break;
374+ case 3: /* lxvb16x */
375+ memcpy(vdst, vbuf, 16);
376+ break;
377+ }
378+#endif /* !__LITTLE_ENDIAN__ */
379+
380+ /* Go to next instruction */
381+ regs->nip += 4;
382+}
383+#endif /* CONFIG_VSX */
384+
385 void handle_hmi_exception(struct pt_regs *regs)
386 {
387 struct pt_regs *old_regs;
Joel Stanleyacbcce82017-11-14 17:39:06 +1030388@@ -706,6 +888,21 @@ void handle_hmi_exception(struct pt_regs *regs)
Joel Stanley968bb762017-10-04 14:12:27 +1030389 old_regs = set_irq_regs(regs);
390 irq_enter();
391
392+#ifdef CONFIG_VSX
393+ /* Real mode flagged P9 special emu is needed */
394+ if (local_paca->hmi_p9_special_emu) {
395+ local_paca->hmi_p9_special_emu = 0;
396+
397+ /*
398+ * We don't want to take page faults while doing the
399+ * emulation, we just replay the instruction if necessary.
400+ */
401+ pagefault_disable();
402+ p9_hmi_special_emu(regs);
403+ pagefault_enable();
404+ }
405+#endif /* CONFIG_VSX */
406+
407 if (ppc_md.handle_hmi_exception)
408 ppc_md.handle_hmi_exception(regs);
409
Joel Stanleyacbcce82017-11-14 17:39:06 +1030410@@ -1924,6 +2121,10 @@ struct ppc_emulated ppc_emulated = {
Joel Stanley968bb762017-10-04 14:12:27 +1030411 WARN_EMULATED_SETUP(mfdscr),
412 WARN_EMULATED_SETUP(mtdscr),
413 WARN_EMULATED_SETUP(lq_stq),
414+ WARN_EMULATED_SETUP(lxvw4x),
415+ WARN_EMULATED_SETUP(lxvh8x),
416+ WARN_EMULATED_SETUP(lxvd2x),
417+ WARN_EMULATED_SETUP(lxvb16x),
418 #endif
419 };
420
421diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
Joel Stanleyacbcce82017-11-14 17:39:06 +1030422index c17f81e433f7..355d3f99cafb 100644
Joel Stanley968bb762017-10-04 14:12:27 +1030423--- a/arch/powerpc/platforms/powernv/smp.c
424+++ b/arch/powerpc/platforms/powernv/smp.c
425@@ -49,6 +49,13 @@
426
427 static void pnv_smp_setup_cpu(int cpu)
428 {
429+ /*
430+ * P9 workaround for CI vector load (see traps.c),
431+ * enable the corresponding HMI interrupt
432+ */
433+ if (pvr_version_is(PVR_POWER9))
434+ mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
435+
436 if (xive_enabled())
437 xive_smp_setup_cpu();
438 else if (cpu != boot_cpuid)