summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>2019-09-23 23:52:53 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2019-09-24 06:58:55 -0400
commit047e6575aec71d75b765c22111820c4776cd1c43 (patch)
tree8742d25f78cbcf3add79662e241546d2d7e8846c
parent09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0 (diff)
powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9
On POWER9, under some circumstances, a broadcast TLB invalidation will fail to invalidate the ERAT cache on some threads when there are parallel mtpidr/mtlpidr happening on other threads of the same core. This can cause stores to continue to go to a page after it's unmapped. The workaround is to force an ERAT flush using PID=0 or LPID=0 tlbie flush. This additional TLB flush will cause the ERAT cache invalidation. Since we are using PID=0 or LPID=0, we don't get filtered out by the TLB snoop filtering logic. We need to still follow this up with another tlbie to take care of store vs tlbie ordering issue explained in commit: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9"). The presence of ERAT cache implies we can still get new stores and they may miss store queue marking flush. Cc: stable@vger.kernel.org Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20190924035254.24612-3-aneesh.kumar@linux.ibm.com
-rw-r--r--arch/powerpc/include/asm/cputable.h3
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c42
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c29
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c80
5 files changed, 134 insertions, 22 deletions
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index f080fba48619..cf00ff0d121d 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -211,6 +211,7 @@ static inline void cpu_feature_keys_init(void) { }
211#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) 211#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
212#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) 212#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
213#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000) 213#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000)
214#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
214 215
215#ifndef __ASSEMBLY__ 216#ifndef __ASSEMBLY__
216 217
@@ -457,7 +458,7 @@ static inline void cpu_feature_keys_init(void) { }
457 CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ 458 CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
458 CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ 459 CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
459 CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ 460 CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
460 CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TIDR) 461 CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
461#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 462#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
462#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) 463#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
463#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ 464#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index a86486390c70..180b3a5d1001 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -715,6 +715,8 @@ static __init void update_tlbie_feature_flag(unsigned long pvr)
715 WARN_ONCE(1, "Unknown PVR"); 715 WARN_ONCE(1, "Unknown PVR");
716 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; 716 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
717 } 717 }
718
719 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
718 } 720 }
719} 721}
720 722
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index cfdf232aac34..220305454c23 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
433 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); 433 (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
434} 434}
435 435
436static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
437{
438
439 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
440 /* Radix flush for a hash guest */
441
442 unsigned long rb,rs,prs,r,ric;
443
444 rb = PPC_BIT(52); /* IS = 2 */
445 rs = 0; /* lpid = 0 */
446 prs = 0; /* partition scoped */
447 r = 1; /* radix format */
448 ric = 0; /* RIC_FLSUH_TLB */
449
450 /*
451 * Need the extra ptesync to make sure we don't
452 * re-order the tlbie
453 */
454 asm volatile("ptesync": : :"memory");
455 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
456 : : "r"(rb), "i"(r), "i"(prs),
457 "i"(ric), "r"(rs) : "memory");
458 }
459
460 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
461 asm volatile("ptesync": : :"memory");
462 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
463 "r" (rb_value), "r" (lpid));
464 }
465}
466
436static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, 467static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
437 long npages, int global, bool need_sync) 468 long npages, int global, bool need_sync)
438{ 469{
@@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
451 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 482 "r" (rbvalues[i]), "r" (kvm->arch.lpid));
452 } 483 }
453 484
454 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 485 fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
455 /*
456 * Need the extra ptesync to make sure we don't
457 * re-order the tlbie
458 */
459 asm volatile("ptesync": : :"memory");
460 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
461 "r" (rbvalues[0]), "r" (kvm->arch.lpid));
462 }
463
464 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 486 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
465 } else { 487 } else {
466 if (need_sync) 488 if (need_sync)
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 02568dae4695..523e42eb11da 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -197,8 +197,31 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize,
197 return va; 197 return va;
198} 198}
199 199
200static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) 200static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
201 int apsize, int ssize)
201{ 202{
203 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
204 /* Radix flush for a hash guest */
205
206 unsigned long rb,rs,prs,r,ric;
207
208 rb = PPC_BIT(52); /* IS = 2 */
209 rs = 0; /* lpid = 0 */
210 prs = 0; /* partition scoped */
211 r = 1; /* radix format */
212 ric = 0; /* RIC_FLSUH_TLB */
213
214 /*
215 * Need the extra ptesync to make sure we don't
216 * re-order the tlbie
217 */
218 asm volatile("ptesync": : :"memory");
219 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
220 : : "r"(rb), "i"(r), "i"(prs),
221 "i"(ric), "r"(rs) : "memory");
222 }
223
224
202 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 225 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
203 /* Need the extra ptesync to ensure we don't reorder tlbie*/ 226 /* Need the extra ptesync to ensure we don't reorder tlbie*/
204 asm volatile("ptesync": : :"memory"); 227 asm volatile("ptesync": : :"memory");
@@ -283,7 +306,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
283 asm volatile("ptesync": : :"memory"); 306 asm volatile("ptesync": : :"memory");
284 } else { 307 } else {
285 __tlbie(vpn, psize, apsize, ssize); 308 __tlbie(vpn, psize, apsize, ssize);
286 fixup_tlbie(vpn, psize, apsize, ssize); 309 fixup_tlbie_vpn(vpn, psize, apsize, ssize);
287 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 310 asm volatile("eieio; tlbsync; ptesync": : :"memory");
288 } 311 }
289 if (lock_tlbie && !use_local) 312 if (lock_tlbie && !use_local)
@@ -856,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local)
856 /* 879 /*
857 * Just do one more with the last used values. 880 * Just do one more with the last used values.
858 */ 881 */
859 fixup_tlbie(vpn, psize, psize, ssize); 882 fixup_tlbie_vpn(vpn, psize, psize, ssize);
860 asm volatile("eieio; tlbsync; ptesync":::"memory"); 883 asm volatile("eieio; tlbsync; ptesync":::"memory");
861 884
862 if (lock_tlbie) 885 if (lock_tlbie)
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 69fdc004d83f..67af871190c6 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -196,21 +196,82 @@ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid
196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
197} 197}
198 198
199static inline void fixup_tlbie(void) 199
200static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
201 unsigned long ap)
202{
203 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
204 asm volatile("ptesync": : :"memory");
205 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
206 }
207
208 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
209 asm volatile("ptesync": : :"memory");
210 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
211 }
212}
213
214static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
215 unsigned long ap)
216{
217 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
218 asm volatile("ptesync": : :"memory");
219 __tlbie_pid(0, RIC_FLUSH_TLB);
220 }
221
222 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
223 asm volatile("ptesync": : :"memory");
224 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
225 }
226}
227
228static inline void fixup_tlbie_pid(unsigned long pid)
200{ 229{
201 unsigned long pid = 0; 230 /*
231 * We can use any address for the invalidation, pick one which is
232 * probably unused as an optimisation.
233 */
202 unsigned long va = ((1UL << 52) - 1); 234 unsigned long va = ((1UL << 52) - 1);
203 235
236 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
237 asm volatile("ptesync": : :"memory");
238 __tlbie_pid(0, RIC_FLUSH_TLB);
239 }
240
204 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 241 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
205 asm volatile("ptesync": : :"memory"); 242 asm volatile("ptesync": : :"memory");
206 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 243 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
207 } 244 }
208} 245}
209 246
247
248static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
249 unsigned long ap)
250{
251 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
252 asm volatile("ptesync": : :"memory");
253 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
254 }
255
256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
257 asm volatile("ptesync": : :"memory");
258 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
259 }
260}
261
210static inline void fixup_tlbie_lpid(unsigned long lpid) 262static inline void fixup_tlbie_lpid(unsigned long lpid)
211{ 263{
264 /*
265 * We can use any address for the invalidation, pick one which is
266 * probably unused as an optimisation.
267 */
212 unsigned long va = ((1UL << 52) - 1); 268 unsigned long va = ((1UL << 52) - 1);
213 269
270 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
271 asm volatile("ptesync": : :"memory");
272 __tlbie_lpid(0, RIC_FLUSH_TLB);
273 }
274
214 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 275 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
215 asm volatile("ptesync": : :"memory"); 276 asm volatile("ptesync": : :"memory");
216 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 277 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
@@ -258,6 +319,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
258 switch (ric) { 319 switch (ric) {
259 case RIC_FLUSH_TLB: 320 case RIC_FLUSH_TLB:
260 __tlbie_pid(pid, RIC_FLUSH_TLB); 321 __tlbie_pid(pid, RIC_FLUSH_TLB);
322 fixup_tlbie_pid(pid);
261 break; 323 break;
262 case RIC_FLUSH_PWC: 324 case RIC_FLUSH_PWC:
263 __tlbie_pid(pid, RIC_FLUSH_PWC); 325 __tlbie_pid(pid, RIC_FLUSH_PWC);
@@ -265,8 +327,8 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
265 case RIC_FLUSH_ALL: 327 case RIC_FLUSH_ALL:
266 default: 328 default:
267 __tlbie_pid(pid, RIC_FLUSH_ALL); 329 __tlbie_pid(pid, RIC_FLUSH_ALL);
330 fixup_tlbie_pid(pid);
268 } 331 }
269 fixup_tlbie();
270 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 332 asm volatile("eieio; tlbsync; ptesync": : :"memory");
271} 333}
272 334
@@ -315,6 +377,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
315 switch (ric) { 377 switch (ric) {
316 case RIC_FLUSH_TLB: 378 case RIC_FLUSH_TLB:
317 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 379 __tlbie_lpid(lpid, RIC_FLUSH_TLB);
380 fixup_tlbie_lpid(lpid);
318 break; 381 break;
319 case RIC_FLUSH_PWC: 382 case RIC_FLUSH_PWC:
320 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 383 __tlbie_lpid(lpid, RIC_FLUSH_PWC);
@@ -322,8 +385,8 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
322 case RIC_FLUSH_ALL: 385 case RIC_FLUSH_ALL:
323 default: 386 default:
324 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 387 __tlbie_lpid(lpid, RIC_FLUSH_ALL);
388 fixup_tlbie_lpid(lpid);
325 } 389 }
326 fixup_tlbie_lpid(lpid);
327 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 390 asm volatile("eieio; tlbsync; ptesync": : :"memory");
328} 391}
329 392
@@ -390,6 +453,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
390 453
391 for (addr = start; addr < end; addr += page_size) 454 for (addr = start; addr < end; addr += page_size)
392 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 455 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
456
457 fixup_tlbie_va_range(addr - page_size, pid, ap);
393} 458}
394 459
395static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 460static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -399,7 +464,7 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
399 464
400 asm volatile("ptesync": : :"memory"); 465 asm volatile("ptesync": : :"memory");
401 __tlbie_va(va, pid, ap, ric); 466 __tlbie_va(va, pid, ap, ric);
402 fixup_tlbie(); 467 fixup_tlbie_va(va, pid, ap);
403 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 468 asm volatile("eieio; tlbsync; ptesync": : :"memory");
404} 469}
405 470
@@ -457,7 +522,7 @@ static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
457 522
458 asm volatile("ptesync": : :"memory"); 523 asm volatile("ptesync": : :"memory");
459 __tlbie_lpid_va(va, lpid, ap, ric); 524 __tlbie_lpid_va(va, lpid, ap, ric);
460 fixup_tlbie_lpid(lpid); 525 fixup_tlbie_lpid_va(va, lpid, ap);
461 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 526 asm volatile("eieio; tlbsync; ptesync": : :"memory");
462} 527}
463 528
@@ -469,7 +534,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
469 if (also_pwc) 534 if (also_pwc)
470 __tlbie_pid(pid, RIC_FLUSH_PWC); 535 __tlbie_pid(pid, RIC_FLUSH_PWC);
471 __tlbie_va_range(start, end, pid, page_size, psize); 536 __tlbie_va_range(start, end, pid, page_size, psize);
472 fixup_tlbie();
473 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 537 asm volatile("eieio; tlbsync; ptesync": : :"memory");
474} 538}
475 539
@@ -856,7 +920,7 @@ is_local:
856 if (gflush) 920 if (gflush)
857 __tlbie_va_range(gstart, gend, pid, 921 __tlbie_va_range(gstart, gend, pid,
858 PUD_SIZE, MMU_PAGE_1G); 922 PUD_SIZE, MMU_PAGE_1G);
859 fixup_tlbie(); 923
860 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 924 asm volatile("eieio; tlbsync; ptesync": : :"memory");
861 } else { 925 } else {
862 _tlbiel_va_range_multicast(mm, 926 _tlbiel_va_range_multicast(mm,