aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Neuling <mikey@neuling.org>2016-06-27 23:01:04 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2016-06-29 02:19:25 -0400
commit190ce8693c23eae09ba5f303a83bf2fbeb6478b1 (patch)
treeb9673547107051b468d9b091b60cd60fb6409fba
parentcca0e542e02e48cce541a49c4046ec094ec27c1e (diff)
powerpc/tm: Avoid SLB faults in treclaim/trecheckpoint when RI=0
Currently we have 2 segments that are bolted for the kernel linear mapping (ie 0xc000... addresses). This is 0 to 1TB and also the kernel stacks. Anything accessed outside of these regions may need to be faulted in. (In practice machines with TM always have 1T segments) If a machine has < 2TB of memory we never fault on the kernel linear mapping as these two segments cover all physical memory. If a machine has > 2TB of memory, there may be structures outside of these two segments that need to be faulted in. This faulting can occur when running as a guest as the hypervisor may remove any SLB that's not bolted. When we treclaim and trecheckpoint we have a window where we need to run with the userspace GPRs. This means that we no longer have a valid stack pointer in r1. For this window we therefore clear MSR RI to indicate that any exceptions taken at this point won't be able to be handled. This means that we can't take segment misses in this RI=0 window. In this RI=0 region, we currently access the thread_struct for the process being context switched to or from. This thread_struct access may cause a segment fault since it's not guaranteed to be covered by the two bolted segment entries described above. We've seen this with a crash when running as a guest with > 2TB of memory on PowerVM: Unrecoverable exception 4100 at c00000000004f138 Oops: Unrecoverable exception, sig: 6 [#1] SMP NR_CPUS=2048 NUMA pSeries CPU: 1280 PID: 7755 Comm: kworker/1280:1 Tainted: G X 4.4.13-46-default #1 task: c000189001df4210 ti: c000189001d5c000 task.ti: c000189001d5c000 NIP: c00000000004f138 LR: 0000000010003a24 CTR: 0000000010001b20 REGS: c000189001d5f730 TRAP: 4100 Tainted: G X (4.4.13-46-default) MSR: 8000000100001031 <SF,ME,IR,DR,LE> CR: 24000048 XER: 00000000 CFAR: c00000000004ed18 SOFTE: 0 GPR00: ffffffffc58d7b60 c000189001d5f9b0 00000000100d7d00 000000003a738288 GPR04: 0000000000002781 0000000000000006 0000000000000000 c0000d1f4d889620 GPR08: 000000000000c350 00000000000008ab 00000000000008ab 00000000100d7af0 GPR12: 00000000100d7ae8 00003ffe787e67a0 0000000000000000 0000000000000211 GPR16: 0000000010001b20 0000000000000000 0000000000800000 00003ffe787df110 GPR20: 0000000000000001 00000000100d1e10 0000000000000000 00003ffe787df050 GPR24: 0000000000000003 0000000000010000 0000000000000000 00003fffe79e2e30 GPR28: 00003fffe79e2e68 00000000003d0f00 00003ffe787e67a0 00003ffe787de680 NIP [c00000000004f138] restore_gprs+0xd0/0x16c LR [0000000010003a24] 0x10003a24 Call Trace: [c000189001d5f9b0] [c000189001d5f9f0] 0xc000189001d5f9f0 (unreliable) [c000189001d5fb90] [c00000000001583c] tm_recheckpoint+0x6c/0xa0 [c000189001d5fbd0] [c000000000015c40] __switch_to+0x2c0/0x350 [c000189001d5fc30] [c0000000007e647c] __schedule+0x32c/0x9c0 [c000189001d5fcb0] [c0000000007e6b58] schedule+0x48/0xc0 [c000189001d5fce0] [c0000000000deabc] worker_thread+0x22c/0x5b0 [c000189001d5fd80] [c0000000000e7000] kthread+0x110/0x130 [c000189001d5fe30] [c000000000009538] ret_from_kernel_thread+0x5c/0xa4 Instruction dump: 7cb103a6 7cc0e3a6 7ca222a6 78a58402 38c00800 7cc62838 08860000 7cc000a6 38a00006 78c60022 7cc62838 0b060000 <e8c701a0> 7ccff120 e8270078 e8a70098 ---[ end trace 602126d0a1dedd54 ]--- This fixes this by copying the required data from the thread_struct to the stack before we clear MSR RI. Then once we clear RI, we only access the stack, guaranteeing there's no segment miss. We also tighten the region over which we set RI=0 on the treclaim() path. This may have a slight performance impact since we're adding an mtmsr instruction. Fixes: 090b9284d725 ("powerpc/tm: Clear MSR RI in non-recoverable TM code") Signed-off-by: Michael Neuling <mikey@neuling.org> Reviewed-by: Cyril Bur <cyrilbur@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/kernel/tm.S61
1 files changed, 44 insertions, 17 deletions
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index bf8f34a58670..b7019b559ddb 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim)
110 std r3, STK_PARAM(R3)(r1) 110 std r3, STK_PARAM(R3)(r1)
111 SAVE_NVGPRS(r1) 111 SAVE_NVGPRS(r1)
112 112
113 /* We need to setup MSR for VSX register save instructions. Here we 113 /* We need to setup MSR for VSX register save instructions. */
114 * also clear the MSR RI since when we do the treclaim, we won't have a
115 * valid kernel pointer for a while. We clear RI here as it avoids
116 * adding another mtmsr closer to the treclaim. This makes the region
117 * maked as non-recoverable wider than it needs to be but it saves on
118 * inserting another mtmsrd later.
119 */
120 mfmsr r14 114 mfmsr r14
121 mr r15, r14 115 mr r15, r14
122 ori r15, r15, MSR_FP 116 ori r15, r15, MSR_FP
123 li r16, MSR_RI 117 li r16, 0
124 ori r16, r16, MSR_EE /* IRQs hard off */ 118 ori r16, r16, MSR_EE /* IRQs hard off */
125 andc r15, r15, r16 119 andc r15, r15, r16
126 oris r15, r15, MSR_VEC@h 120 oris r15, r15, MSR_VEC@h
@@ -176,7 +170,17 @@ dont_backup_fp:
1761: tdeqi r6, 0 1701: tdeqi r6, 0
177 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 171 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
178 172
179 /* The moment we treclaim, ALL of our GPRs will switch 173 /* Clear MSR RI since we are about to change r1, EE is already off. */
174 li r4, 0
175 mtmsrd r4, 1
176
177 /*
178 * BE CAREFUL HERE:
179 * At this point we can't take an SLB miss since we have MSR_RI
180 * off. Load only to/from the stack/paca which are in SLB bolted regions
181 * until we turn MSR RI back on.
182 *
183 * The moment we treclaim, ALL of our GPRs will switch
180 * to user register state. (FPRs, CCR etc. also!) 184 * to user register state. (FPRs, CCR etc. also!)
181 * Use an sprg and a tm_scratch in the PACA to shuffle. 185 * Use an sprg and a tm_scratch in the PACA to shuffle.
182 */ 186 */
@@ -197,6 +201,11 @@ dont_backup_fp:
197 201
198 /* Store the PPR in r11 and reset to decent value */ 202 /* Store the PPR in r11 and reset to decent value */
199 std r11, GPR11(r1) /* Temporary stash */ 203 std r11, GPR11(r1) /* Temporary stash */
204
205 /* Reset MSR RI so we can take SLB faults again */
206 li r11, MSR_RI
207 mtmsrd r11, 1
208
200 mfspr r11, SPRN_PPR 209 mfspr r11, SPRN_PPR
201 HMT_MEDIUM 210 HMT_MEDIUM
202 211
@@ -397,11 +406,6 @@ restore_gprs:
397 ld r5, THREAD_TM_DSCR(r3) 406 ld r5, THREAD_TM_DSCR(r3)
398 ld r6, THREAD_TM_PPR(r3) 407 ld r6, THREAD_TM_PPR(r3)
399 408
400 /* Clear the MSR RI since we are about to change R1. EE is already off
401 */
402 li r4, 0
403 mtmsrd r4, 1
404
405 REST_GPR(0, r7) /* GPR0 */ 409 REST_GPR(0, r7) /* GPR0 */
406 REST_2GPRS(2, r7) /* GPR2-3 */ 410 REST_2GPRS(2, r7) /* GPR2-3 */
407 REST_GPR(4, r7) /* GPR4 */ 411 REST_GPR(4, r7) /* GPR4 */
@@ -439,10 +443,33 @@ restore_gprs:
439 ld r6, _CCR(r7) 443 ld r6, _CCR(r7)
440 mtcr r6 444 mtcr r6
441 445
442 REST_GPR(1, r7) /* GPR1 */
443 REST_GPR(5, r7) /* GPR5-7 */
444 REST_GPR(6, r7) 446 REST_GPR(6, r7)
445 ld r7, GPR7(r7) 447
448 /*
449 * Store r1 and r5 on the stack so that we can access them
450 * after we clear MSR RI.
451 */
452
453 REST_GPR(5, r7)
454 std r5, -8(r1)
455 ld r5, GPR1(r7)
456 std r5, -16(r1)
457
458 REST_GPR(7, r7)
459
460 /* Clear MSR RI since we are about to change r1. EE is already off */
461 li r5, 0
462 mtmsrd r5, 1
463
464 /*
465 * BE CAREFUL HERE:
466 * At this point we can't take an SLB miss since we have MSR_RI
467 * off. Load only to/from the stack/paca which are in SLB bolted regions
468 * until we turn MSR RI back on.
469 */
470
471 ld r5, -8(r1)
472 ld r1, -16(r1)
446 473
447 /* Commit register state as checkpointed state: */ 474 /* Commit register state as checkpointed state: */
448 TRECHKPT 475 TRECHKPT