blob: d5c04ce634f3d4c3aeb2c95e73ba585ea567e219 [file] [log] [blame]
Joel Stanley968bb762017-10-04 14:12:27 +10301From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2From: Michael Neuling <mikey@neuling.org>
3Date: Wed, 4 Oct 2017 13:42:52 +1100
4Subject: [PATCH 3/4] powerpc/64s: Add workaround for P9 vector CI load issue
5
6POWER9 DD2.1 and earlier has an issue where some cache inhibited
7vector load will return bad data. The workaround is two part, one
8firmware/microcode part triggers HMI interrupts when hitting such
9loads, the other part is this patch which then emulates the
10instructions in Linux.
11
12The affected instructions are limited to lxvd2x, lxvw4x, lxvb16x and
13lxvh8x.
14
15When an instruction triggers the HMI, all threads in the core will be
16sent to the HMI handler, not just the one running the vector load.
17
18In general, these spurious HMIs are detected by the emulation code and
19we just return back to the running process. Unfortunately, if a
20spurious interrupt occurs on a vector load that's to normal memory we
21have no way to detect that it's spurious (unless we walk the page
22tables, which is very expensive). In this case we emulate the load but
23we need do so using a vector load itself to ensure 128bit atomicity is
24preserved.
25
26Some additional debugfs emulated instruction counters are added also.
27
28Signed-off-by: Michael Neuling <mikey@neuling.org>
29Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
30[mpe: Switch CONFIG_PPC_BOOK3S_64 to CONFIG_VSX to unbreak the build]
31Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
32(cherry picked from commit 5080332c2c893118dbc18755f35c8b0131cf0fc4)
33Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
34Signed-off-by: Joel Stanley <joel@jms.id.au>
35---
36 arch/powerpc/include/asm/emulated_ops.h | 4 +
37 arch/powerpc/include/asm/paca.h | 1 +
38 arch/powerpc/include/asm/uaccess.h | 17 +++
39 arch/powerpc/kernel/exceptions-64s.S | 16 ++-
40 arch/powerpc/kernel/mce.c | 30 ++++-
41 arch/powerpc/kernel/traps.c | 201 ++++++++++++++++++++++++++++++++
42 arch/powerpc/platforms/powernv/smp.c | 7 ++
43 7 files changed, 271 insertions(+), 5 deletions(-)
44
45diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
46index f00e10e2a335..651e1354498e 100644
47--- a/arch/powerpc/include/asm/emulated_ops.h
48+++ b/arch/powerpc/include/asm/emulated_ops.h
49@@ -55,6 +55,10 @@ extern struct ppc_emulated {
50 struct ppc_emulated_entry mfdscr;
51 struct ppc_emulated_entry mtdscr;
52 struct ppc_emulated_entry lq_stq;
53+ struct ppc_emulated_entry lxvw4x;
54+ struct ppc_emulated_entry lxvh8x;
55+ struct ppc_emulated_entry lxvd2x;
56+ struct ppc_emulated_entry lxvb16x;
57 #endif
58 } ppc_emulated;
59
60diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
61index dc88a31cc79a..21061773149b 100644
62--- a/arch/powerpc/include/asm/paca.h
63+++ b/arch/powerpc/include/asm/paca.h
64@@ -203,6 +203,7 @@ struct paca_struct {
65 */
66 u16 in_mce;
67 u8 hmi_event_available; /* HMI event is available */
68+ u8 hmi_p9_special_emu; /* HMI P9 special emulation */
69 #endif
70
71 /* Stuff for accurate time accounting */
72diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
73index 9c0e60ca1666..e34f15e727d9 100644
74--- a/arch/powerpc/include/asm/uaccess.h
75+++ b/arch/powerpc/include/asm/uaccess.h
76@@ -173,6 +173,23 @@ do { \
77
78 extern long __get_user_bad(void);
79
80+/*
81+ * This does an atomic 128 byte aligned load from userspace.
82+ * Upto caller to do enable_kernel_vmx() before calling!
83+ */
84+#define __get_user_atomic_128_aligned(kaddr, uaddr, err) \
85+ __asm__ __volatile__( \
86+ "1: lvx 0,0,%1 # get user\n" \
87+ " stvx 0,0,%2 # put kernel\n" \
88+ "2:\n" \
89+ ".section .fixup,\"ax\"\n" \
90+ "3: li %0,%3\n" \
91+ " b 2b\n" \
92+ ".previous\n" \
93+ EX_TABLE(1b, 3b) \
94+ : "=r" (err) \
95+ : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
96+
97 #define __get_user_asm(x, addr, err, op) \
98 __asm__ __volatile__( \
99 "1: "op" %1,0(%2) # get_user\n" \
100diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
Samuel Mendoza-Jonas6d700a42017-10-17 13:53:35 +1100101index d9dfdf7ede45..0256286d7153 100644
Joel Stanley968bb762017-10-04 14:12:27 +1030102--- a/arch/powerpc/kernel/exceptions-64s.S
103+++ b/arch/powerpc/kernel/exceptions-64s.S
Samuel Mendoza-Jonas6d700a42017-10-17 13:53:35 +1100104@@ -1032,6 +1032,8 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
Joel Stanley968bb762017-10-04 14:12:27 +1030105 EXCEPTION_PROLOG_COMMON_3(0xe60)
106 addi r3,r1,STACK_FRAME_OVERHEAD
107 BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
108+ cmpdi cr0,r3,0
109+
110 /* Windup the stack. */
111 /* Move original HSRR0 and HSRR1 into the respective regs */
112 ld r9,_MSR(r1)
Samuel Mendoza-Jonas6d700a42017-10-17 13:53:35 +1100113@@ -1048,10 +1050,15 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
Joel Stanley968bb762017-10-04 14:12:27 +1030114 REST_8GPRS(2, r1)
115 REST_GPR(10, r1)
116 ld r11,_CCR(r1)
117+ REST_2GPRS(12, r1)
118+ bne 1f
119 mtcr r11
120 REST_GPR(11, r1)
121- REST_2GPRS(12, r1)
122- /* restore original r1. */
123+ ld r1,GPR1(r1)
124+ hrfid
125+
126+1: mtcr r11
127+ REST_GPR(11, r1)
128 ld r1,GPR1(r1)
129
130 /*
Samuel Mendoza-Jonas6d700a42017-10-17 13:53:35 +1100131@@ -1064,8 +1071,9 @@ hmi_exception_after_realmode:
Joel Stanley968bb762017-10-04 14:12:27 +1030132 EXCEPTION_PROLOG_0(PACA_EXGEN)
133 b tramp_real_hmi_exception
134
135-EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception)
136-
137+EXC_COMMON_BEGIN(hmi_exception_common)
138+EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
139+ ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
140
141 EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
142 EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
143diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
144index 9b2ea7e71c06..f588951b171d 100644
145--- a/arch/powerpc/kernel/mce.c
146+++ b/arch/powerpc/kernel/mce.c
147@@ -470,6 +470,34 @@ long hmi_exception_realmode(struct pt_regs *regs)
148 {
149 __this_cpu_inc(irq_stat.hmi_exceptions);
150
151+#ifdef CONFIG_PPC_BOOK3S_64
152+ /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
153+ if (pvr_version_is(PVR_POWER9)) {
154+ unsigned long hmer = mfspr(SPRN_HMER);
155+
156+ /* Do we have the debug bit set */
157+ if (hmer & PPC_BIT(17)) {
158+ hmer &= ~PPC_BIT(17);
159+ mtspr(SPRN_HMER, hmer);
160+
161+ /*
162+ * Now to avoid problems with soft-disable we
163+ * only do the emulation if we are coming from
164+ * user space
165+ */
166+ if (user_mode(regs))
167+ local_paca->hmi_p9_special_emu = 1;
168+
169+ /*
170+ * Don't bother going to OPAL if that's the
171+ * only relevant bit.
172+ */
173+ if (!(hmer & mfspr(SPRN_HMEER)))
174+ return local_paca->hmi_p9_special_emu;
175+ }
176+ }
177+#endif /* CONFIG_PPC_BOOK3S_64 */
178+
179 wait_for_subcore_guest_exit();
180
181 if (ppc_md.hmi_exception_early)
182@@ -477,5 +505,5 @@ long hmi_exception_realmode(struct pt_regs *regs)
183
184 wait_for_tb_resync();
185
186- return 0;
187+ return 1;
188 }
189diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
190index 559664e75fb5..5fbe81d4e648 100644
191--- a/arch/powerpc/kernel/traps.c
192+++ b/arch/powerpc/kernel/traps.c
193@@ -37,6 +37,7 @@
194 #include <linux/kdebug.h>
195 #include <linux/ratelimit.h>
196 #include <linux/context_tracking.h>
197+#include <linux/smp.h>
198
199 #include <asm/emulated_ops.h>
200 #include <asm/pgtable.h>
201@@ -761,6 +762,187 @@ void SMIException(struct pt_regs *regs)
202 die("System Management Interrupt", regs, SIGABRT);
203 }
204
205+#ifdef CONFIG_VSX
206+static void p9_hmi_special_emu(struct pt_regs *regs)
207+{
208+ unsigned int ra, rb, t, i, sel, instr, rc;
209+ const void __user *addr;
210+ u8 vbuf[16], *vdst;
211+ unsigned long ea, msr, msr_mask;
212+ bool swap;
213+
214+ if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
215+ return;
216+
217+ /*
218+ * lxvb16x opcode: 0x7c0006d8
219+ * lxvd2x opcode: 0x7c000698
220+ * lxvh8x opcode: 0x7c000658
221+ * lxvw4x opcode: 0x7c000618
222+ */
223+ if ((instr & 0xfc00073e) != 0x7c000618) {
224+ pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
225+ " instr=%08x\n",
226+ smp_processor_id(), current->comm, current->pid,
227+ regs->nip, instr);
228+ return;
229+ }
230+
231+ /* Grab vector registers into the task struct */
232+ msr = regs->msr; /* Grab msr before we flush the bits */
233+ flush_vsx_to_thread(current);
234+ enable_kernel_altivec();
235+
236+ /*
237+ * Is userspace running with a different endian (this is rare but
238+ * not impossible)
239+ */
240+ swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
241+
242+ /* Decode the instruction */
243+ ra = (instr >> 16) & 0x1f;
244+ rb = (instr >> 11) & 0x1f;
245+ t = (instr >> 21) & 0x1f;
246+ if (instr & 1)
247+ vdst = (u8 *)&current->thread.vr_state.vr[t];
248+ else
249+ vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
250+
251+ /* Grab the vector address */
252+ ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
253+ if (is_32bit_task())
254+ ea &= 0xfffffffful;
255+ addr = (__force const void __user *)ea;
256+
257+ /* Check it */
258+ if (!access_ok(VERIFY_READ, addr, 16)) {
259+ pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
260+ " instr=%08x addr=%016lx\n",
261+ smp_processor_id(), current->comm, current->pid,
262+ regs->nip, instr, (unsigned long)addr);
263+ return;
264+ }
265+
266+ /* Read the vector */
267+ rc = 0;
268+ if ((unsigned long)addr & 0xfUL)
269+ /* unaligned case */
270+ rc = __copy_from_user_inatomic(vbuf, addr, 16);
271+ else
272+ __get_user_atomic_128_aligned(vbuf, addr, rc);
273+ if (rc) {
274+ pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
275+ " instr=%08x addr=%016lx\n",
276+ smp_processor_id(), current->comm, current->pid,
277+ regs->nip, instr, (unsigned long)addr);
278+ return;
279+ }
280+
281+ pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
282+ " instr=%08x addr=%016lx\n",
283+ smp_processor_id(), current->comm, current->pid, regs->nip,
284+ instr, (unsigned long) addr);
285+
286+ /* Grab instruction "selector" */
287+ sel = (instr >> 6) & 3;
288+
289+ /*
290+ * Check to make sure the facility is actually enabled. This
291+ * could happen if we get a false positive hit.
292+ *
293+ * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
294+ * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
295+ */
296+ msr_mask = MSR_VSX;
297+ if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
298+ msr_mask = MSR_VEC;
299+ if (!(msr & msr_mask)) {
300+ pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
301+ " instr=%08x msr:%016lx\n",
302+ smp_processor_id(), current->comm, current->pid,
303+ regs->nip, instr, msr);
304+ return;
305+ }
306+
307+ /* Do logging here before we modify sel based on endian */
308+ switch (sel) {
309+ case 0: /* lxvw4x */
310+ PPC_WARN_EMULATED(lxvw4x, regs);
311+ break;
312+ case 1: /* lxvh8x */
313+ PPC_WARN_EMULATED(lxvh8x, regs);
314+ break;
315+ case 2: /* lxvd2x */
316+ PPC_WARN_EMULATED(lxvd2x, regs);
317+ break;
318+ case 3: /* lxvb16x */
319+ PPC_WARN_EMULATED(lxvb16x, regs);
320+ break;
321+ }
322+
323+#ifdef __LITTLE_ENDIAN__
324+ /*
325+ * An LE kernel stores the vector in the task struct as an LE
326+ * byte array (effectively swapping both the components and
327+ * the content of the components). Those instructions expect
328+ * the components to remain in ascending address order, so we
329+ * swap them back.
330+ *
331+ * If we are running a BE user space, the expectation is that
332+ * of a simple memcpy, so forcing the emulation to look like
333+ * a lxvb16x should do the trick.
334+ */
335+ if (swap)
336+ sel = 3;
337+
338+ switch (sel) {
339+ case 0: /* lxvw4x */
340+ for (i = 0; i < 4; i++)
341+ ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
342+ break;
343+ case 1: /* lxvh8x */
344+ for (i = 0; i < 8; i++)
345+ ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
346+ break;
347+ case 2: /* lxvd2x */
348+ for (i = 0; i < 2; i++)
349+ ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
350+ break;
351+ case 3: /* lxvb16x */
352+ for (i = 0; i < 16; i++)
353+ vdst[i] = vbuf[15-i];
354+ break;
355+ }
356+#else /* __LITTLE_ENDIAN__ */
357+ /* On a big endian kernel, a BE userspace only needs a memcpy */
358+ if (!swap)
359+ sel = 3;
360+
361+ /* Otherwise, we need to swap the content of the components */
362+ switch (sel) {
363+ case 0: /* lxvw4x */
364+ for (i = 0; i < 4; i++)
365+ ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
366+ break;
367+ case 1: /* lxvh8x */
368+ for (i = 0; i < 8; i++)
369+ ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
370+ break;
371+ case 2: /* lxvd2x */
372+ for (i = 0; i < 2; i++)
373+ ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
374+ break;
375+ case 3: /* lxvb16x */
376+ memcpy(vdst, vbuf, 16);
377+ break;
378+ }
379+#endif /* !__LITTLE_ENDIAN__ */
380+
381+ /* Go to next instruction */
382+ regs->nip += 4;
383+}
384+#endif /* CONFIG_VSX */
385+
386 void handle_hmi_exception(struct pt_regs *regs)
387 {
388 struct pt_regs *old_regs;
389@@ -768,6 +950,21 @@ void handle_hmi_exception(struct pt_regs *regs)
390 old_regs = set_irq_regs(regs);
391 irq_enter();
392
393+#ifdef CONFIG_VSX
394+ /* Real mode flagged P9 special emu is needed */
395+ if (local_paca->hmi_p9_special_emu) {
396+ local_paca->hmi_p9_special_emu = 0;
397+
398+ /*
399+ * We don't want to take page faults while doing the
400+ * emulation, we just replay the instruction if necessary.
401+ */
402+ pagefault_disable();
403+ p9_hmi_special_emu(regs);
404+ pagefault_enable();
405+ }
406+#endif /* CONFIG_VSX */
407+
408 if (ppc_md.handle_hmi_exception)
409 ppc_md.handle_hmi_exception(regs);
410
411@@ -2004,6 +2201,10 @@ struct ppc_emulated ppc_emulated = {
412 WARN_EMULATED_SETUP(mfdscr),
413 WARN_EMULATED_SETUP(mtdscr),
414 WARN_EMULATED_SETUP(lq_stq),
415+ WARN_EMULATED_SETUP(lxvw4x),
416+ WARN_EMULATED_SETUP(lxvh8x),
417+ WARN_EMULATED_SETUP(lxvd2x),
418+ WARN_EMULATED_SETUP(lxvb16x),
419 #endif
420 };
421
422diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
423index 40dae96f7e20..b9dd3cc1d217 100644
424--- a/arch/powerpc/platforms/powernv/smp.c
425+++ b/arch/powerpc/platforms/powernv/smp.c
426@@ -49,6 +49,13 @@
427
428 static void pnv_smp_setup_cpu(int cpu)
429 {
430+ /*
431+ * P9 workaround for CI vector load (see traps.c),
432+ * enable the corresponding HMI interrupt
433+ */
434+ if (pvr_version_is(PVR_POWER9))
435+ mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
436+
437 if (xive_enabled())
438 xive_smp_setup_cpu();
439 else if (cpu != boot_cpuid)