summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-28 16:43:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-28 16:43:00 -0400
commita2953204b576ea3ba4afd07b917811d50fc49778 (patch)
tree04eee2a327a8871ee991ece819cb8945728cc9ec
parentf19e00ee84be14e840386cb4f3c0bda5b9cfb5ab (diff)
parent253c892193ab58da6b1d94371285971b22c63260 (diff)
Merge tag 'powerpc-5.4-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fixes from Michael Ellerman: "An assortment of fixes that were either missed by me, or didn't arrive quite in time for the first v5.4 pull. - Most notable is a fix for an issue with tlbie (broadcast TLB invalidation) on Power9, when using the Radix MMU. The tlbie can race with an mtpid (move to PID register, essentially MMU context switch) on another thread of the core, which can cause stores to continue to go to a page after it's unmapped. - A fix in our KVM code to add a missing barrier, the lack of which has been observed to cause missed IPIs and subsequently stuck CPUs in the host. - A change to the way we initialise PCR (Processor Compatibility Register) to make it forward compatible with future CPUs. - On some older PowerVM systems our H_BLOCK_REMOVE support could oops, fix it to detect such systems and fallback to the old invalidation method. - A fix for an oops seen on some machines when using KASAN on 32-bit. - A handful of other minor fixes, and two new selftests. Thanks to: Alistair Popple, Aneesh Kumar K.V, Christophe Leroy, Gustavo Romero, Joel Stanley, Jordan Niethe, Laurent Dufour, Michael Roth, Oliver O'Halloran" * tag 'powerpc-5.4-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/eeh: Fix eeh eeh_debugfs_break_device() with SRIOV devices powerpc/nvdimm: use H_SCM_QUERY hcall on H_OVERLAP error powerpc/nvdimm: Use HCALL error as the return value selftests/powerpc: Add test case for tlbie vs mtpidr ordering issue powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9 powerpc/book3s64/radix: Rename CPU_FTR_P9_TLBIE_BUG feature flag powerpc/book3s64/mm: Don't do tlbie fixup for some hardware revisions powerpc/pseries: Call H_BLOCK_REMOVE when supported powerpc/pseries: Read TLB Block Invalidate Characteristics KVM: PPC: Book3S HV: use smp_mb() when setting/clearing host_ipi flag powerpc/mm: Fix an Oops in kasan_mmu_init() powerpc/mm: Add a helper to select PAGE_KERNEL_RO or PAGE_READONLY powerpc/64s: Set reserved PCR bits powerpc: Fix definition of PCR bits to work with old binutils powerpc/book3s64/radix: Remove WARN_ON in destroy_context() powerpc/tm: Add tm-poison test
-rw-r--r--arch/powerpc/include/asm/cputable.h5
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h100
-rw-r--r--arch/powerpc/include/asm/reg.h9
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S6
-rw-r--r--arch/powerpc/kernel/dbell.c6
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c35
-rw-r--r--arch/powerpc/kernel/eeh.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c11
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c42
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S10
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c31
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c15
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c84
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c34
-rw-r--r--arch/powerpc/platforms/powernv/smp.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c163
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c72
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c6
-rw-r--r--arch/powerpc/sysdev/xics/icp-opal.c6
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/mm/tlbie_test.c734
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c179
28 files changed, 1476 insertions, 93 deletions
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index a1ebcbc3931f..cf00ff0d121d 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -209,8 +209,9 @@ static inline void cpu_feature_keys_init(void) { }
209#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000) 209#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000)
210#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000) 210#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
211#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) 211#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
212#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000) 212#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
213#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000) 213#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000)
214#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
214 215
215#ifndef __ASSEMBLY__ 216#ifndef __ASSEMBLY__
216 217
@@ -457,7 +458,7 @@ static inline void cpu_feature_keys_init(void) { }
457 CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ 458 CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
458 CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ 459 CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
459 CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ 460 CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
460 CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR) 461 CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
461#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 462#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
462#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) 463#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
463#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ 464#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 8e8514efb124..ee62776e5433 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -452,9 +452,100 @@ static inline u32 kvmppc_get_xics_latch(void)
452 return xirr; 452 return xirr;
453} 453}
454 454
455static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) 455/*
456 * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
457 * a CPU thread that's running/napping inside of a guest is by default regarded
458 * as a request to wake the CPU (if needed) and continue execution within the
459 * guest, potentially to process new state like externally-generated
460 * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
461 *
462 * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
463 * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
464 * target CPU's PACA. To avoid unnecessary exits to the host, this flag should
465 * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
466 * the receiving side prior to processing the IPI work.
467 *
468 * NOTE:
469 *
470 * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
471 * This is to guard against sequences such as the following:
472 *
473 * CPU
474 * X: smp_muxed_ipi_set_message():
475 * X: smp_mb()
476 * X: message[RESCHEDULE] = 1
477 * X: doorbell_global_ipi(42):
478 * X: kvmppc_set_host_ipi(42)
479 * X: ppc_msgsnd_sync()/smp_mb()
480 * X: ppc_msgsnd() -> 42
481 * 42: doorbell_exception(): // from CPU X
482 * 42: ppc_msgsync()
483 * 105: smp_muxed_ipi_set_message():
484 * 105: smb_mb()
485 * // STORE DEFERRED DUE TO RE-ORDERING
486 * --105: message[CALL_FUNCTION] = 1
487 * | 105: doorbell_global_ipi(42):
488 * | 105: kvmppc_set_host_ipi(42)
489 * | 42: kvmppc_clear_host_ipi(42)
490 * | 42: smp_ipi_demux_relaxed()
491 * | 42: // returns to executing guest
492 * | // RE-ORDERED STORE COMPLETES
493 * ->105: message[CALL_FUNCTION] = 1
494 * 105: ppc_msgsnd_sync()/smp_mb()
495 * 105: ppc_msgsnd() -> 42
496 * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
497 * 105: // hangs waiting on 42 to process messages/call_single_queue
498 *
499 * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
500 * to guard against sequences such as the following (as well as to create
501 * a read-side pairing with the barrier in kvmppc_set_host_ipi()):
502 *
503 * CPU
504 * X: smp_muxed_ipi_set_message():
505 * X: smp_mb()
506 * X: message[RESCHEDULE] = 1
507 * X: doorbell_global_ipi(42):
508 * X: kvmppc_set_host_ipi(42)
509 * X: ppc_msgsnd_sync()/smp_mb()
510 * X: ppc_msgsnd() -> 42
511 * 42: doorbell_exception(): // from CPU X
512 * 42: ppc_msgsync()
513 * // STORE DEFERRED DUE TO RE-ORDERING
514 * -- 42: kvmppc_clear_host_ipi(42)
515 * | 42: smp_ipi_demux_relaxed()
516 * | 105: smp_muxed_ipi_set_message():
517 * | 105: smb_mb()
518 * | 105: message[CALL_FUNCTION] = 1
519 * | 105: doorbell_global_ipi(42):
520 * | 105: kvmppc_set_host_ipi(42)
521 * | // RE-ORDERED STORE COMPLETES
522 * -> 42: kvmppc_clear_host_ipi(42)
523 * 42: // returns to executing guest
524 * 105: ppc_msgsnd_sync()/smp_mb()
525 * 105: ppc_msgsnd() -> 42
526 * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
527 * 105: // hangs waiting on 42 to process messages/call_single_queue
528 */
529static inline void kvmppc_set_host_ipi(int cpu)
456{ 530{
457 paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi; 531 /*
532 * order stores of IPI messages vs. setting of host_ipi flag
533 *
534 * pairs with the barrier in kvmppc_clear_host_ipi()
535 */
536 smp_mb();
537 paca_ptrs[cpu]->kvm_hstate.host_ipi = 1;
538}
539
540static inline void kvmppc_clear_host_ipi(int cpu)
541{
542 paca_ptrs[cpu]->kvm_hstate.host_ipi = 0;
543 /*
544 * order clearing of host_ipi flag vs. processing of IPI messages
545 *
546 * pairs with the barrier in kvmppc_set_host_ipi()
547 */
548 smp_mb();
458} 549}
459 550
460static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) 551static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -486,7 +577,10 @@ static inline u32 kvmppc_get_xics_latch(void)
486 return 0; 577 return 0;
487} 578}
488 579
489static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) 580static inline void kvmppc_set_host_ipi(int cpu)
581{}
582
583static inline void kvmppc_clear_host_ipi(int cpu)
490{} 584{}
491 585
492static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) 586static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index ec3714cf0989..b3cbb1136bce 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -475,9 +475,10 @@
475#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ 475#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
476#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ 476#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
477#define SPRN_PCR 0x152 /* Processor compatibility register */ 477#define SPRN_PCR 0x152 /* Processor compatibility register */
478#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ 478#define PCR_VEC_DIS (__MASK(63-0)) /* Vec. disable (bit NA since POWER8) */
479#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ 479#define PCR_VSX_DIS (__MASK(63-1)) /* VSX disable (bit NA since POWER8) */
480#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */ 480#define PCR_TM_DIS (__MASK(63-2)) /* Trans. memory disable (POWER8) */
481#define PCR_HIGH_BITS (PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
481/* 482/*
482 * These bits are used in the function kvmppc_set_arch_compat() to specify and 483 * These bits are used in the function kvmppc_set_arch_compat() to specify and
483 * determine both the compatibility level which we want to emulate and the 484 * determine both the compatibility level which we want to emulate and the
@@ -486,6 +487,8 @@
486#define PCR_ARCH_207 0x8 /* Architecture 2.07 */ 487#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
487#define PCR_ARCH_206 0x4 /* Architecture 2.06 */ 488#define PCR_ARCH_206 0x4 /* Architecture 2.06 */
488#define PCR_ARCH_205 0x2 /* Architecture 2.05 */ 489#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
490#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205)
491#define PCR_MASK ~(PCR_HIGH_BITS | PCR_LOW_BITS) /* PCR Reserved Bits */
489#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ 492#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
490#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ 493#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */
491#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */ 494#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 3239a9fe6c1c..a460298c7ddb 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -23,6 +23,7 @@ _GLOBAL(__setup_cpu_power7)
23 beqlr 23 beqlr
24 li r0,0 24 li r0,0
25 mtspr SPRN_LPID,r0 25 mtspr SPRN_LPID,r0
26 LOAD_REG_IMMEDIATE(r0, PCR_MASK)
26 mtspr SPRN_PCR,r0 27 mtspr SPRN_PCR,r0
27 mfspr r3,SPRN_LPCR 28 mfspr r3,SPRN_LPCR
28 li r4,(LPCR_LPES1 >> LPCR_LPES_SH) 29 li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -37,6 +38,7 @@ _GLOBAL(__restore_cpu_power7)
37 beqlr 38 beqlr
38 li r0,0 39 li r0,0
39 mtspr SPRN_LPID,r0 40 mtspr SPRN_LPID,r0
41 LOAD_REG_IMMEDIATE(r0, PCR_MASK)
40 mtspr SPRN_PCR,r0 42 mtspr SPRN_PCR,r0
41 mfspr r3,SPRN_LPCR 43 mfspr r3,SPRN_LPCR
42 li r4,(LPCR_LPES1 >> LPCR_LPES_SH) 44 li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -54,6 +56,7 @@ _GLOBAL(__setup_cpu_power8)
54 beqlr 56 beqlr
55 li r0,0 57 li r0,0
56 mtspr SPRN_LPID,r0 58 mtspr SPRN_LPID,r0
59 LOAD_REG_IMMEDIATE(r0, PCR_MASK)
57 mtspr SPRN_PCR,r0 60 mtspr SPRN_PCR,r0
58 mfspr r3,SPRN_LPCR 61 mfspr r3,SPRN_LPCR
59 ori r3, r3, LPCR_PECEDH 62 ori r3, r3, LPCR_PECEDH
@@ -76,6 +79,7 @@ _GLOBAL(__restore_cpu_power8)
76 beqlr 79 beqlr
77 li r0,0 80 li r0,0
78 mtspr SPRN_LPID,r0 81 mtspr SPRN_LPID,r0
82 LOAD_REG_IMMEDIATE(r0, PCR_MASK)
79 mtspr SPRN_PCR,r0 83 mtspr SPRN_PCR,r0
80 mfspr r3,SPRN_LPCR 84 mfspr r3,SPRN_LPCR
81 ori r3, r3, LPCR_PECEDH 85 ori r3, r3, LPCR_PECEDH
@@ -98,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
98 mtspr SPRN_PSSCR,r0 102 mtspr SPRN_PSSCR,r0
99 mtspr SPRN_LPID,r0 103 mtspr SPRN_LPID,r0
100 mtspr SPRN_PID,r0 104 mtspr SPRN_PID,r0
105 LOAD_REG_IMMEDIATE(r0, PCR_MASK)
101 mtspr SPRN_PCR,r0 106 mtspr SPRN_PCR,r0
102 mfspr r3,SPRN_LPCR 107 mfspr r3,SPRN_LPCR
103 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) 108 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
123 mtspr SPRN_PSSCR,r0 128 mtspr SPRN_PSSCR,r0
124 mtspr SPRN_LPID,r0 129 mtspr SPRN_LPID,r0
125 mtspr SPRN_PID,r0 130 mtspr SPRN_PID,r0
131 LOAD_REG_IMMEDIATE(r0, PCR_MASK)
126 mtspr SPRN_PCR,r0 132 mtspr SPRN_PCR,r0
127 mfspr r3,SPRN_LPCR 133 mfspr r3,SPRN_LPCR
128 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) 134 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 804b1a6196fa..f17ff1200eaa 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -33,7 +33,7 @@ void doorbell_global_ipi(int cpu)
33{ 33{
34 u32 tag = get_hard_smp_processor_id(cpu); 34 u32 tag = get_hard_smp_processor_id(cpu);
35 35
36 kvmppc_set_host_ipi(cpu, 1); 36 kvmppc_set_host_ipi(cpu);
37 /* Order previous accesses vs. msgsnd, which is treated as a store */ 37 /* Order previous accesses vs. msgsnd, which is treated as a store */
38 ppc_msgsnd_sync(); 38 ppc_msgsnd_sync();
39 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); 39 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -48,7 +48,7 @@ void doorbell_core_ipi(int cpu)
48{ 48{
49 u32 tag = cpu_thread_in_core(cpu); 49 u32 tag = cpu_thread_in_core(cpu);
50 50
51 kvmppc_set_host_ipi(cpu, 1); 51 kvmppc_set_host_ipi(cpu);
52 /* Order previous accesses vs. msgsnd, which is treated as a store */ 52 /* Order previous accesses vs. msgsnd, which is treated as a store */
53 ppc_msgsnd_sync(); 53 ppc_msgsnd_sync();
54 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); 54 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -84,7 +84,7 @@ void doorbell_exception(struct pt_regs *regs)
84 84
85 may_hard_irq_enable(); 85 may_hard_irq_enable();
86 86
87 kvmppc_set_host_ipi(smp_processor_id(), 0); 87 kvmppc_clear_host_ipi(smp_processor_id());
88 __this_cpu_inc(irq_stat.doorbell_irqs); 88 __this_cpu_inc(irq_stat.doorbell_irqs);
89 89
90 smp_ipi_demux_relaxed(); /* already performed the barrier */ 90 smp_ipi_demux_relaxed(); /* already performed the barrier */
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index bd95318d2202..180b3a5d1001 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -101,7 +101,7 @@ static void __restore_cpu_cpufeatures(void)
101 if (hv_mode) { 101 if (hv_mode) {
102 mtspr(SPRN_LPID, 0); 102 mtspr(SPRN_LPID, 0);
103 mtspr(SPRN_HFSCR, system_registers.hfscr); 103 mtspr(SPRN_HFSCR, system_registers.hfscr);
104 mtspr(SPRN_PCR, 0); 104 mtspr(SPRN_PCR, PCR_MASK);
105 } 105 }
106 mtspr(SPRN_FSCR, system_registers.fscr); 106 mtspr(SPRN_FSCR, system_registers.fscr);
107 107
@@ -144,6 +144,7 @@ static void __init cpufeatures_setup_cpu(void)
144 mtspr(SPRN_HFSCR, 0); 144 mtspr(SPRN_HFSCR, 0);
145 } 145 }
146 mtspr(SPRN_FSCR, 0); 146 mtspr(SPRN_FSCR, 0);
147 mtspr(SPRN_PCR, PCR_MASK);
147 148
148 /* 149 /*
149 * LPCR does not get cleared, to match behaviour with secondaries 150 * LPCR does not get cleared, to match behaviour with secondaries
@@ -691,9 +692,37 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
691 return true; 692 return true;
692} 693}
693 694
695/*
696 * Handle POWER9 broadcast tlbie invalidation issue using
697 * cpu feature flag.
698 */
699static __init void update_tlbie_feature_flag(unsigned long pvr)
700{
701 if (PVR_VER(pvr) == PVR_POWER9) {
702 /*
703 * Set the tlbie feature flag for anything below
704 * Nimbus DD 2.3 and Cumulus DD 1.3
705 */
706 if ((pvr & 0xe000) == 0) {
707 /* Nimbus */
708 if ((pvr & 0xfff) < 0x203)
709 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
710 } else if ((pvr & 0xc000) == 0) {
711 /* Cumulus */
712 if ((pvr & 0xfff) < 0x103)
713 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
714 } else {
715 WARN_ONCE(1, "Unknown PVR");
716 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
717 }
718
719 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
720 }
721}
722
694static __init void cpufeatures_cpu_quirks(void) 723static __init void cpufeatures_cpu_quirks(void)
695{ 724{
696 int version = mfspr(SPRN_PVR); 725 unsigned long version = mfspr(SPRN_PVR);
697 726
698 /* 727 /*
699 * Not all quirks can be derived from the cpufeatures device tree. 728 * Not all quirks can be derived from the cpufeatures device tree.
@@ -712,10 +741,10 @@ static __init void cpufeatures_cpu_quirks(void)
712 741
713 if ((version & 0xffff0000) == 0x004e0000) { 742 if ((version & 0xffff0000) == 0x004e0000) {
714 cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); 743 cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
715 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
716 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR; 744 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
717 } 745 }
718 746
747 update_tlbie_feature_flag(version);
719 /* 748 /*
720 * PKEY was not in the initial base or feature node 749 * PKEY was not in the initial base or feature node
721 * specification, but it should become optional in the next 750 * specification, but it should become optional in the next
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 0a91dee51245..bc8a551013be 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1960,7 +1960,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
1960 pci_err(pdev, "Going to break: %pR\n", bar); 1960 pci_err(pdev, "Going to break: %pR\n", bar);
1961 1961
1962 if (pdev->is_virtfn) { 1962 if (pdev->is_virtfn) {
1963#ifndef CONFIG_IOV 1963#ifndef CONFIG_PCI_IOV
1964 return -ENXIO; 1964 return -ENXIO;
1965#else 1965#else
1966 /* 1966 /*
@@ -1980,7 +1980,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
1980 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); 1980 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
1981 pos += PCI_SRIOV_CTRL; 1981 pos += PCI_SRIOV_CTRL;
1982 bit = PCI_SRIOV_CTRL_MSE; 1982 bit = PCI_SRIOV_CTRL_MSE;
1983#endif /* !CONFIG_IOV */ 1983#endif /* !CONFIG_PCI_IOV */
1984 } else { 1984 } else {
1985 bit = PCI_COMMAND_MEMORY; 1985 bit = PCI_COMMAND_MEMORY;
1986 pos = PCI_COMMAND; 1986 pos = PCI_COMMAND;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index efd8f93bc9dc..709cf1fd4cf4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -401,8 +401,11 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
401 401
402 spin_lock(&vc->lock); 402 spin_lock(&vc->lock);
403 vc->arch_compat = arch_compat; 403 vc->arch_compat = arch_compat;
404 /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */ 404 /*
405 vc->pcr = host_pcr_bit - guest_pcr_bit; 405 * Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit
406 * Also set all reserved PCR bits
407 */
408 vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK;
406 spin_unlock(&vc->lock); 409 spin_unlock(&vc->lock);
407 410
408 return 0; 411 return 0;
@@ -3410,7 +3413,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
3410 } 3413 }
3411 3414
3412 if (vc->pcr) 3415 if (vc->pcr)
3413 mtspr(SPRN_PCR, vc->pcr); 3416 mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
3414 mtspr(SPRN_DPDES, vc->dpdes); 3417 mtspr(SPRN_DPDES, vc->dpdes);
3415 mtspr(SPRN_VTB, vc->vtb); 3418 mtspr(SPRN_VTB, vc->vtb);
3416 3419
@@ -3490,7 +3493,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
3490 vc->vtb = mfspr(SPRN_VTB); 3493 vc->vtb = mfspr(SPRN_VTB);
3491 mtspr(SPRN_DPDES, 0); 3494 mtspr(SPRN_DPDES, 0);
3492 if (vc->pcr) 3495 if (vc->pcr)
3493 mtspr(SPRN_PCR, 0); 3496 mtspr(SPRN_PCR, PCR_MASK);
3494 3497
3495 if (vc->tb_offset_applied) { 3498 if (vc->tb_offset_applied) {
3496 u64 new_tb = mftb() - vc->tb_offset_applied; 3499 u64 new_tb = mftb() - vc->tb_offset_applied;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index fff90f2c3de2..cdf30c6eaf54 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -29,7 +29,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
29{ 29{
30 struct kvmppc_vcore *vc = vcpu->arch.vcore; 30 struct kvmppc_vcore *vc = vcpu->arch.vcore;
31 31
32 hr->pcr = vc->pcr; 32 hr->pcr = vc->pcr | PCR_MASK;
33 hr->dpdes = vc->dpdes; 33 hr->dpdes = vc->dpdes;
34 hr->hfscr = vcpu->arch.hfscr; 34 hr->hfscr = vcpu->arch.hfscr;
35 hr->tb_offset = vc->tb_offset; 35 hr->tb_offset = vc->tb_offset;
@@ -65,7 +65,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
65 hr->lpid = swab32(hr->lpid); 65 hr->lpid = swab32(hr->lpid);
66 hr->vcpu_token = swab32(hr->vcpu_token); 66 hr->vcpu_token = swab32(hr->vcpu_token);
67 hr->lpcr = swab64(hr->lpcr); 67 hr->lpcr = swab64(hr->lpcr);
68 hr->pcr = swab64(hr->pcr); 68 hr->pcr = swab64(hr->pcr) | PCR_MASK;
69 hr->amor = swab64(hr->amor); 69 hr->amor = swab64(hr->amor);
70 hr->dpdes = swab64(hr->dpdes); 70 hr->dpdes = swab64(hr->dpdes);
71 hr->hfscr = swab64(hr->hfscr); 71 hr->hfscr = swab64(hr->hfscr);
@@ -148,7 +148,7 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
148{ 148{
149 struct kvmppc_vcore *vc = vcpu->arch.vcore; 149 struct kvmppc_vcore *vc = vcpu->arch.vcore;
150 150
151 vc->pcr = hr->pcr; 151 vc->pcr = hr->pcr | PCR_MASK;
152 vc->dpdes = hr->dpdes; 152 vc->dpdes = hr->dpdes;
153 vcpu->arch.hfscr = hr->hfscr; 153 vcpu->arch.hfscr = hr->hfscr;
154 vcpu->arch.dawr = hr->dawr0; 154 vcpu->arch.dawr = hr->dawr0;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7186c65c61c9..220305454c23 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
433 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); 433 (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
434} 434}
435 435
436static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
437{
438
439 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
440 /* Radix flush for a hash guest */
441
442 unsigned long rb,rs,prs,r,ric;
443
444 rb = PPC_BIT(52); /* IS = 2 */
445 rs = 0; /* lpid = 0 */
446 prs = 0; /* partition scoped */
447 r = 1; /* radix format */
448 ric = 0; /* RIC_FLSUH_TLB */
449
450 /*
451 * Need the extra ptesync to make sure we don't
452 * re-order the tlbie
453 */
454 asm volatile("ptesync": : :"memory");
455 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
456 : : "r"(rb), "i"(r), "i"(prs),
457 "i"(ric), "r"(rs) : "memory");
458 }
459
460 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
461 asm volatile("ptesync": : :"memory");
462 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
463 "r" (rb_value), "r" (lpid));
464 }
465}
466
436static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, 467static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
437 long npages, int global, bool need_sync) 468 long npages, int global, bool need_sync)
438{ 469{
@@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
451 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 482 "r" (rbvalues[i]), "r" (kvm->arch.lpid));
452 } 483 }
453 484
454 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 485 fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
455 /*
456 * Need the extra ptesync to make sure we don't
457 * re-order the tlbie
458 */
459 asm volatile("ptesync": : :"memory");
460 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
461 "r" (rbvalues[0]), "r" (kvm->arch.lpid));
462 }
463
464 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 486 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
465 } else { 487 } else {
466 if (need_sync) 488 if (need_sync)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 4d2ec77d806c..287d5911df0f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -58,7 +58,7 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
58 hcpu = hcore << threads_shift; 58 hcpu = hcore << threads_shift;
59 kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; 59 kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
60 smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); 60 smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
61 kvmppc_set_host_ipi(hcpu, 1); 61 kvmppc_set_host_ipi(hcpu);
62 smp_mb(); 62 smp_mb();
63 kvmhv_rm_send_ipi(hcpu); 63 kvmhv_rm_send_ipi(hcpu);
64} 64}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9a05b0d932ef..74a9cfe84aee 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -644,8 +644,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
644 644
645 /* Load guest PCR value to select appropriate compat mode */ 645 /* Load guest PCR value to select appropriate compat mode */
64637: ld r7, VCORE_PCR(r5) 64637: ld r7, VCORE_PCR(r5)
647 cmpdi r7, 0 647 LOAD_REG_IMMEDIATE(r6, PCR_MASK)
648 cmpld r7, r6
648 beq 38f 649 beq 38f
650 or r7, r7, r6
649 mtspr SPRN_PCR, r7 651 mtspr SPRN_PCR, r7
65038: 65238:
651 653
@@ -1913,10 +1915,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1913 1915
1914 /* Reset PCR */ 1916 /* Reset PCR */
1915 ld r0, VCORE_PCR(r5) 1917 ld r0, VCORE_PCR(r5)
1916 cmpdi r0, 0 1918 LOAD_REG_IMMEDIATE(r6, PCR_MASK)
1919 cmpld r0, r6
1917 beq 18f 1920 beq 18f
1918 li r0, 0 1921 mtspr SPRN_PCR, r6
1919 mtspr SPRN_PCR, r0
192018: 192218:
1921 /* Signal secondary CPUs to continue */ 1923 /* Signal secondary CPUs to continue */
1922 stb r0,VCORE_IN_GUEST(r5) 1924 stb r0,VCORE_IN_GUEST(r5)
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 90ab4f31e2b3..523e42eb11da 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -197,9 +197,32 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize,
197 return va; 197 return va;
198} 198}
199 199
200static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) 200static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
201 int apsize, int ssize)
201{ 202{
202 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 203 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
204 /* Radix flush for a hash guest */
205
206 unsigned long rb,rs,prs,r,ric;
207
208 rb = PPC_BIT(52); /* IS = 2 */
209 rs = 0; /* lpid = 0 */
210 prs = 0; /* partition scoped */
211 r = 1; /* radix format */
212 ric = 0; /* RIC_FLSUH_TLB */
213
214 /*
215 * Need the extra ptesync to make sure we don't
216 * re-order the tlbie
217 */
218 asm volatile("ptesync": : :"memory");
219 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
220 : : "r"(rb), "i"(r), "i"(prs),
221 "i"(ric), "r"(rs) : "memory");
222 }
223
224
225 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
203 /* Need the extra ptesync to ensure we don't reorder tlbie*/ 226 /* Need the extra ptesync to ensure we don't reorder tlbie*/
204 asm volatile("ptesync": : :"memory"); 227 asm volatile("ptesync": : :"memory");
205 ___tlbie(vpn, psize, apsize, ssize); 228 ___tlbie(vpn, psize, apsize, ssize);
@@ -283,7 +306,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
283 asm volatile("ptesync": : :"memory"); 306 asm volatile("ptesync": : :"memory");
284 } else { 307 } else {
285 __tlbie(vpn, psize, apsize, ssize); 308 __tlbie(vpn, psize, apsize, ssize);
286 fixup_tlbie(vpn, psize, apsize, ssize); 309 fixup_tlbie_vpn(vpn, psize, apsize, ssize);
287 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 310 asm volatile("eieio; tlbsync; ptesync": : :"memory");
288 } 311 }
289 if (lock_tlbie && !use_local) 312 if (lock_tlbie && !use_local)
@@ -856,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local)
856 /* 879 /*
857 * Just do one more with the last used values. 880 * Just do one more with the last used values.
858 */ 881 */
859 fixup_tlbie(vpn, psize, psize, ssize); 882 fixup_tlbie_vpn(vpn, psize, psize, ssize);
860 asm volatile("eieio; tlbsync; ptesync":::"memory"); 883 asm volatile("eieio; tlbsync; ptesync":::"memory");
861 884
862 if (lock_tlbie) 885 if (lock_tlbie)
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 2d0cb5ba9a47..0ba30b8b935b 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -256,8 +256,21 @@ void destroy_context(struct mm_struct *mm)
256#ifdef CONFIG_SPAPR_TCE_IOMMU 256#ifdef CONFIG_SPAPR_TCE_IOMMU
257 WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); 257 WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
258#endif 258#endif
259 /*
260 * For tasks which were successfully initialized we end up calling
261 * arch_exit_mmap() which clears the process table entry. And
262 * arch_exit_mmap() is called before the required fullmm TLB flush
263 * which does a RIC=2 flush. Hence for an initialized task, we do clear
264 * any cached process table entries.
265 *
266 * The condition below handles the error case during task init. We have
267 * set the process table entry early and if we fail a task
268 * initialization, we need to ensure the process table entry is zeroed.
269 * We need not worry about process table entry caches because the task
270 * never ran with the PID value.
271 */
259 if (radix_enabled()) 272 if (radix_enabled())
260 WARN_ON(process_tb[mm->context.id].prtb0 != 0); 273 process_tb[mm->context.id].prtb0 = 0;
261 else 274 else
262 subpage_prot_free(mm); 275 subpage_prot_free(mm);
263 destroy_contexts(&mm->context); 276 destroy_contexts(&mm->context);
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 631be42abd33..67af871190c6 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -196,22 +196,83 @@ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid
196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
197} 197}
198 198
199static inline void fixup_tlbie(void) 199
200static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
201 unsigned long ap)
202{
203 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
204 asm volatile("ptesync": : :"memory");
205 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
206 }
207
208 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
209 asm volatile("ptesync": : :"memory");
210 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
211 }
212}
213
214static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
215 unsigned long ap)
216{
217 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
218 asm volatile("ptesync": : :"memory");
219 __tlbie_pid(0, RIC_FLUSH_TLB);
220 }
221
222 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
223 asm volatile("ptesync": : :"memory");
224 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
225 }
226}
227
228static inline void fixup_tlbie_pid(unsigned long pid)
200{ 229{
201 unsigned long pid = 0; 230 /*
231 * We can use any address for the invalidation, pick one which is
232 * probably unused as an optimisation.
233 */
202 unsigned long va = ((1UL << 52) - 1); 234 unsigned long va = ((1UL << 52) - 1);
203 235
204 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 236 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
237 asm volatile("ptesync": : :"memory");
238 __tlbie_pid(0, RIC_FLUSH_TLB);
239 }
240
241 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
205 asm volatile("ptesync": : :"memory"); 242 asm volatile("ptesync": : :"memory");
206 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 243 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
207 } 244 }
208} 245}
209 246
247
248static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
249 unsigned long ap)
250{
251 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
252 asm volatile("ptesync": : :"memory");
253 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
254 }
255
256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
257 asm volatile("ptesync": : :"memory");
258 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
259 }
260}
261
210static inline void fixup_tlbie_lpid(unsigned long lpid) 262static inline void fixup_tlbie_lpid(unsigned long lpid)
211{ 263{
264 /*
265 * We can use any address for the invalidation, pick one which is
266 * probably unused as an optimisation.
267 */
212 unsigned long va = ((1UL << 52) - 1); 268 unsigned long va = ((1UL << 52) - 1);
213 269
214 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 270 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
271 asm volatile("ptesync": : :"memory");
272 __tlbie_lpid(0, RIC_FLUSH_TLB);
273 }
274
275 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
215 asm volatile("ptesync": : :"memory"); 276 asm volatile("ptesync": : :"memory");
216 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 277 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
217 } 278 }
@@ -258,6 +319,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
258 switch (ric) { 319 switch (ric) {
259 case RIC_FLUSH_TLB: 320 case RIC_FLUSH_TLB:
260 __tlbie_pid(pid, RIC_FLUSH_TLB); 321 __tlbie_pid(pid, RIC_FLUSH_TLB);
322 fixup_tlbie_pid(pid);
261 break; 323 break;
262 case RIC_FLUSH_PWC: 324 case RIC_FLUSH_PWC:
263 __tlbie_pid(pid, RIC_FLUSH_PWC); 325 __tlbie_pid(pid, RIC_FLUSH_PWC);
@@ -265,8 +327,8 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
265 case RIC_FLUSH_ALL: 327 case RIC_FLUSH_ALL:
266 default: 328 default:
267 __tlbie_pid(pid, RIC_FLUSH_ALL); 329 __tlbie_pid(pid, RIC_FLUSH_ALL);
330 fixup_tlbie_pid(pid);
268 } 331 }
269 fixup_tlbie();
270 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 332 asm volatile("eieio; tlbsync; ptesync": : :"memory");
271} 333}
272 334
@@ -315,6 +377,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
315 switch (ric) { 377 switch (ric) {
316 case RIC_FLUSH_TLB: 378 case RIC_FLUSH_TLB:
317 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 379 __tlbie_lpid(lpid, RIC_FLUSH_TLB);
380 fixup_tlbie_lpid(lpid);
318 break; 381 break;
319 case RIC_FLUSH_PWC: 382 case RIC_FLUSH_PWC:
320 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 383 __tlbie_lpid(lpid, RIC_FLUSH_PWC);
@@ -322,8 +385,8 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
322 case RIC_FLUSH_ALL: 385 case RIC_FLUSH_ALL:
323 default: 386 default:
324 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 387 __tlbie_lpid(lpid, RIC_FLUSH_ALL);
388 fixup_tlbie_lpid(lpid);
325 } 389 }
326 fixup_tlbie_lpid(lpid);
327 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 390 asm volatile("eieio; tlbsync; ptesync": : :"memory");
328} 391}
329 392
@@ -390,6 +453,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
390 453
391 for (addr = start; addr < end; addr += page_size) 454 for (addr = start; addr < end; addr += page_size)
392 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 455 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
456
457 fixup_tlbie_va_range(addr - page_size, pid, ap);
393} 458}
394 459
395static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 460static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -399,7 +464,7 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
399 464
400 asm volatile("ptesync": : :"memory"); 465 asm volatile("ptesync": : :"memory");
401 __tlbie_va(va, pid, ap, ric); 466 __tlbie_va(va, pid, ap, ric);
402 fixup_tlbie(); 467 fixup_tlbie_va(va, pid, ap);
403 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 468 asm volatile("eieio; tlbsync; ptesync": : :"memory");
404} 469}
405 470
@@ -457,7 +522,7 @@ static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
457 522
458 asm volatile("ptesync": : :"memory"); 523 asm volatile("ptesync": : :"memory");
459 __tlbie_lpid_va(va, lpid, ap, ric); 524 __tlbie_lpid_va(va, lpid, ap, ric);
460 fixup_tlbie_lpid(lpid); 525 fixup_tlbie_lpid_va(va, lpid, ap);
461 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 526 asm volatile("eieio; tlbsync; ptesync": : :"memory");
462} 527}
463 528
@@ -469,7 +534,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
469 if (also_pwc) 534 if (also_pwc)
470 __tlbie_pid(pid, RIC_FLUSH_PWC); 535 __tlbie_pid(pid, RIC_FLUSH_PWC);
471 __tlbie_va_range(start, end, pid, page_size, psize); 536 __tlbie_va_range(start, end, pid, page_size, psize);
472 fixup_tlbie();
473 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 537 asm volatile("eieio; tlbsync; ptesync": : :"memory");
474} 538}
475 539
@@ -856,7 +920,7 @@ is_local:
856 if (gflush) 920 if (gflush)
857 __tlbie_va_range(gstart, gend, pid, 921 __tlbie_va_range(gstart, gend, pid,
858 PUD_SIZE, MMU_PAGE_1G); 922 PUD_SIZE, MMU_PAGE_1G);
859 fixup_tlbie(); 923
860 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 924 asm volatile("eieio; tlbsync; ptesync": : :"memory");
861 } else { 925 } else {
862 _tlbiel_va_range_multicast(mm, 926 _tlbiel_va_range_multicast(mm,
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 802387b231ad..0e6ed4413eea 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -12,6 +12,14 @@
12#include <asm/code-patching.h> 12#include <asm/code-patching.h>
13#include <mm/mmu_decl.h> 13#include <mm/mmu_decl.h>
14 14
15static pgprot_t kasan_prot_ro(void)
16{
17 if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
18 return PAGE_READONLY;
19
20 return PAGE_KERNEL_RO;
21}
22
15static void kasan_populate_pte(pte_t *ptep, pgprot_t prot) 23static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
16{ 24{
17 unsigned long va = (unsigned long)kasan_early_shadow_page; 25 unsigned long va = (unsigned long)kasan_early_shadow_page;
@@ -26,6 +34,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l
26{ 34{
27 pmd_t *pmd; 35 pmd_t *pmd;
28 unsigned long k_cur, k_next; 36 unsigned long k_cur, k_next;
37 pgprot_t prot = slab_is_available() ? kasan_prot_ro() : PAGE_KERNEL;
29 38
30 pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); 39 pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
31 40
@@ -43,10 +52,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l
43 52
44 if (!new) 53 if (!new)
45 return -ENOMEM; 54 return -ENOMEM;
46 if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) 55 kasan_populate_pte(new, prot);
47 kasan_populate_pte(new, PAGE_READONLY);
48 else
49 kasan_populate_pte(new, PAGE_KERNEL_RO);
50 56
51 smp_wmb(); /* See comment in __pte_alloc */ 57 smp_wmb(); /* See comment in __pte_alloc */
52 58
@@ -103,11 +109,23 @@ static int __ref kasan_init_region(void *start, size_t size)
103 109
104static void __init kasan_remap_early_shadow_ro(void) 110static void __init kasan_remap_early_shadow_ro(void)
105{ 111{
106 if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) 112 pgprot_t prot = kasan_prot_ro();
107 kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY); 113 unsigned long k_start = KASAN_SHADOW_START;
108 else 114 unsigned long k_end = KASAN_SHADOW_END;
109 kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO); 115 unsigned long k_cur;
116 phys_addr_t pa = __pa(kasan_early_shadow_page);
117
118 kasan_populate_pte(kasan_early_shadow_pte, prot);
119
120 for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
121 pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
122 pte_t *ptep = pte_offset_kernel(pmd, k_cur);
123
124 if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
125 continue;
110 126
127 __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
128 }
111 flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); 129 flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
112} 130}
113 131
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 94cd96b9b7bb..fbd6e6b7bbf2 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -193,7 +193,7 @@ static void pnv_smp_cpu_kill_self(void)
193 * for coming online, which are handled via 193 * for coming online, which are handled via
194 * generic_check_cpu_restart() calls. 194 * generic_check_cpu_restart() calls.
195 */ 195 */
196 kvmppc_set_host_ipi(cpu, 0); 196 kvmppc_clear_host_ipi(cpu);
197 197
198 srr1 = pnv_cpu_offline(cpu); 198 srr1 = pnv_cpu_offline(cpu);
199 199
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 36b846f6e74e..b53359258d99 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -56,6 +56,22 @@ EXPORT_SYMBOL(plpar_hcall);
56EXPORT_SYMBOL(plpar_hcall9); 56EXPORT_SYMBOL(plpar_hcall9);
57EXPORT_SYMBOL(plpar_hcall_norets); 57EXPORT_SYMBOL(plpar_hcall_norets);
58 58
59/*
60 * H_BLOCK_REMOVE supported block size for this page size in segment who's base
61 * page size is that page size.
62 *
63 * The first index is the segment base page size, the second one is the actual
64 * page size.
65 */
66static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
67
68/*
69 * Due to the involved complexity, and that the current hypervisor is only
70 * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
71 * buffer size to 8 size block.
72 */
73#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
74
59#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 75#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
60static u8 dtl_mask = DTL_LOG_PREEMPT; 76static u8 dtl_mask = DTL_LOG_PREEMPT;
61#else 77#else
@@ -984,6 +1000,17 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
984#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL 1000#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
985#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL 1001#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
986 1002
1003/*
1004 * Returned true if we are supporting this block size for the specified segment
1005 * base page size and actual page size.
1006 *
1007 * Currently, we only support 8 size block.
1008 */
1009static inline bool is_supported_hlbkrm(int bpsize, int psize)
1010{
1011 return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
1012}
1013
987/** 1014/**
988 * H_BLOCK_REMOVE caller. 1015 * H_BLOCK_REMOVE caller.
989 * @idx should point to the latest @param entry set with a PTEX. 1016 * @idx should point to the latest @param entry set with a PTEX.
@@ -1143,7 +1170,8 @@ static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
1143 if (lock_tlbie) 1170 if (lock_tlbie)
1144 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 1171 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
1145 1172
1146 if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) 1173 /* Assuming THP size is 16M */
1174 if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
1147 hugepage_block_invalidate(slot, vpn, count, psize, ssize); 1175 hugepage_block_invalidate(slot, vpn, count, psize, ssize);
1148 else 1176 else
1149 hugepage_bulk_invalidate(slot, vpn, count, psize, ssize); 1177 hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
@@ -1312,6 +1340,137 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
1312} 1340}
1313 1341
1314/* 1342/*
1343 * TLB Block Invalidate Characteristics
1344 *
1345 * These characteristics define the size of the block the hcall H_BLOCK_REMOVE
1346 * is able to process for each couple segment base page size, actual page size.
1347 *
1348 * The ibm,get-system-parameter properties is returning a buffer with the
1349 * following layout:
1350 *
1351 * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
1352 * -----------------
1353 * TLB Block Invalidate Specifiers:
1354 * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
1355 * [ 1 byte Number of page sizes (N) that are supported for the specified
1356 * TLB invalidate block size ]
1357 * [ 1 byte Encoded segment base page size and actual page size
1358 * MSB=0 means 4k segment base page size and actual page size
1359 * MSB=1 the penc value in mmu_psize_def ]
1360 * ...
1361 * -----------------
1362 * Next TLB Block Invalidate Specifiers...
1363 * -----------------
1364 * [ 0 ]
1365 */
1366static inline void set_hblkrm_bloc_size(int bpsize, int psize,
1367 unsigned int block_size)
1368{
1369 if (block_size > hblkrm_size[bpsize][psize])
1370 hblkrm_size[bpsize][psize] = block_size;
1371}
1372
1373/*
1374 * Decode the Encoded segment base page size and actual page size.
1375 * PAPR specifies:
1376 * - bit 7 is the L bit
1377 * - bits 0-5 are the penc value
1378 * If the L bit is 0, this means 4K segment base page size and actual page size
1379 * otherwise the penc value should be read.
1380 */
1381#define HBLKRM_L_MASK 0x80
1382#define HBLKRM_PENC_MASK 0x3f
1383static inline void __init check_lp_set_hblkrm(unsigned int lp,
1384 unsigned int block_size)
1385{
1386 unsigned int bpsize, psize;
1387
1388 /* First, check the L bit, if not set, this means 4K */
1389 if ((lp & HBLKRM_L_MASK) == 0) {
1390 set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
1391 return;
1392 }
1393
1394 lp &= HBLKRM_PENC_MASK;
1395 for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
1396 struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
1397
1398 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
1399 if (def->penc[psize] == lp) {
1400 set_hblkrm_bloc_size(bpsize, psize, block_size);
1401 return;
1402 }
1403 }
1404 }
1405}
1406
1407#define SPLPAR_TLB_BIC_TOKEN 50
1408
1409/*
1410 * The size of the TLB Block Invalidate Characteristics is variable. But at the
1411 * maximum it will be the number of possible page sizes *2 + 10 bytes.
1412 * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
1413 * (128 bytes) for the buffer to get plenty of space.
1414 */
1415#define SPLPAR_TLB_BIC_MAXLENGTH 128
1416
1417void __init pseries_lpar_read_hblkrm_characteristics(void)
1418{
1419 unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
1420 int call_status, len, idx, bpsize;
1421
1422 spin_lock(&rtas_data_buf_lock);
1423 memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
1424 call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
1425 NULL,
1426 SPLPAR_TLB_BIC_TOKEN,
1427 __pa(rtas_data_buf),
1428 RTAS_DATA_BUF_SIZE);
1429 memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
1430 local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
1431 spin_unlock(&rtas_data_buf_lock);
1432
1433 if (call_status != 0) {
1434 pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
1435 __FILE__, __func__, call_status);
1436 return;
1437 }
1438
1439 /*
1440 * The first two (2) bytes of the data in the buffer are the length of
1441 * the returned data, not counting these first two (2) bytes.
1442 */
1443 len = be16_to_cpu(*((u16 *)local_buffer)) + 2;
1444 if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
1445 pr_warn("%s too large returned buffer %d", __func__, len);
1446 return;
1447 }
1448
1449 idx = 2;
1450 while (idx < len) {
1451 u8 block_shift = local_buffer[idx++];
1452 u32 block_size;
1453 unsigned int npsize;
1454
1455 if (!block_shift)
1456 break;
1457
1458 block_size = 1 << block_shift;
1459
1460 for (npsize = local_buffer[idx++];
1461 npsize > 0 && idx < len; npsize--)
1462 check_lp_set_hblkrm((unsigned int) local_buffer[idx++],
1463 block_size);
1464 }
1465
1466 for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
1467 for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
1468 if (hblkrm_size[bpsize][idx])
1469 pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
1470 bpsize, idx, hblkrm_size[bpsize][idx]);
1471}
1472
1473/*
1315 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie 1474 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
1316 * lock. 1475 * lock.
1317 */ 1476 */
@@ -1330,7 +1489,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
1330 if (lock_tlbie) 1489 if (lock_tlbie)
1331 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 1490 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
1332 1491
1333 if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) { 1492 if (is_supported_hlbkrm(batch->psize, batch->psize)) {
1334 do_block_remove(number, batch, param); 1493 do_block_remove(number, batch, param);
1335 goto out; 1494 goto out;
1336 } 1495 }
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a5ac371a3f06..61883291defc 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -65,29 +65,21 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
65 cond_resched(); 65 cond_resched();
66 } while (rc == H_BUSY); 66 } while (rc == H_BUSY);
67 67
68 if (rc) { 68 if (rc)
69 /* H_OVERLAP needs a separate error path */ 69 return rc;
70 if (rc == H_OVERLAP)
71 return -EBUSY;
72
73 dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
74 return -ENXIO;
75 }
76 70
77 p->bound_addr = saved; 71 p->bound_addr = saved;
78 72 dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res);
79 dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res); 73 return rc;
80
81 return 0;
82} 74}
83 75
84static int drc_pmem_unbind(struct papr_scm_priv *p) 76static void drc_pmem_unbind(struct papr_scm_priv *p)
85{ 77{
86 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 78 unsigned long ret[PLPAR_HCALL_BUFSIZE];
87 uint64_t token = 0; 79 uint64_t token = 0;
88 int64_t rc; 80 int64_t rc;
89 81
90 dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index); 82 dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index);
91 83
92 /* NB: unbind has the same retry requirements as drc_pmem_bind() */ 84 /* NB: unbind has the same retry requirements as drc_pmem_bind() */
93 do { 85 do {
@@ -110,12 +102,48 @@ static int drc_pmem_unbind(struct papr_scm_priv *p)
110 if (rc) 102 if (rc)
111 dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); 103 dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
112 else 104 else
113 dev_dbg(&p->pdev->dev, "unbind drc %x complete\n", 105 dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n",
114 p->drc_index); 106 p->drc_index);
115 107
116 return rc == H_SUCCESS ? 0 : -ENXIO; 108 return;
117} 109}
118 110
111static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
112{
113 unsigned long start_addr;
114 unsigned long end_addr;
115 unsigned long ret[PLPAR_HCALL_BUFSIZE];
116 int64_t rc;
117
118
119 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
120 p->drc_index, 0);
121 if (rc)
122 goto err_out;
123 start_addr = ret[0];
124
125 /* Make sure the full region is bound. */
126 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
127 p->drc_index, p->blocks - 1);
128 if (rc)
129 goto err_out;
130 end_addr = ret[0];
131
132 if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size))
133 goto err_out;
134
135 p->bound_addr = start_addr;
136 dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res);
137 return rc;
138
139err_out:
140 dev_info(&p->pdev->dev,
141 "Failed to query, trying an unbind followed by bind");
142 drc_pmem_unbind(p);
143 return drc_pmem_bind(p);
144}
145
146
119static int papr_scm_meta_get(struct papr_scm_priv *p, 147static int papr_scm_meta_get(struct papr_scm_priv *p,
120 struct nd_cmd_get_config_data_hdr *hdr) 148 struct nd_cmd_get_config_data_hdr *hdr)
121{ 149{
@@ -436,14 +464,14 @@ static int papr_scm_probe(struct platform_device *pdev)
436 rc = drc_pmem_bind(p); 464 rc = drc_pmem_bind(p);
437 465
438 /* If phyp says drc memory still bound then force unbound and retry */ 466 /* If phyp says drc memory still bound then force unbound and retry */
439 if (rc == -EBUSY) { 467 if (rc == H_OVERLAP)
440 dev_warn(&pdev->dev, "Retrying bind after unbinding\n"); 468 rc = drc_pmem_query_n_bind(p);
441 drc_pmem_unbind(p);
442 rc = drc_pmem_bind(p);
443 }
444 469
445 if (rc) 470 if (rc != H_SUCCESS) {
471 dev_err(&p->pdev->dev, "bind err: %d\n", rc);
472 rc = -ENXIO;
446 goto err; 473 goto err;
474 }
447 475
448 /* setup the resource for the newly bound range */ 476 /* setup the resource for the newly bound range */
449 p->res.start = p->bound_addr; 477 p->res.start = p->bound_addr;
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index a6624d4bd9d0..13fa370a87e4 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -112,5 +112,6 @@ static inline unsigned long cmo_get_page_size(void)
112int dlpar_workqueue_init(void); 112int dlpar_workqueue_init(void);
113 113
114void pseries_setup_rfi_flush(void); 114void pseries_setup_rfi_flush(void);
115void pseries_lpar_read_hblkrm_characteristics(void);
115 116
116#endif /* _PSERIES_PSERIES_H */ 117#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f8adcd0e4589..0a40201f315f 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -744,6 +744,7 @@ static void __init pSeries_setup_arch(void)
744 744
745 pseries_setup_rfi_flush(); 745 pseries_setup_rfi_flush();
746 setup_stf_barrier(); 746 setup_stf_barrier();
747 pseries_lpar_read_hblkrm_characteristics();
747 748
748 /* By default, only probe PCI (can be overridden by rtas_pci) */ 749 /* By default, only probe PCI (can be overridden by rtas_pci) */
749 pci_add_flags(PCI_PROBE_ONLY); 750 pci_add_flags(PCI_PROBE_ONLY);
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 485569ff7ef1..7d13d2ef5a90 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -140,7 +140,7 @@ static unsigned int icp_native_get_irq(void)
140 140
141static void icp_native_cause_ipi(int cpu) 141static void icp_native_cause_ipi(int cpu)
142{ 142{
143 kvmppc_set_host_ipi(cpu, 1); 143 kvmppc_set_host_ipi(cpu);
144 icp_native_set_qirr(cpu, IPI_PRIORITY); 144 icp_native_set_qirr(cpu, IPI_PRIORITY);
145} 145}
146 146
@@ -179,7 +179,7 @@ void icp_native_flush_interrupt(void)
179 if (vec == XICS_IPI) { 179 if (vec == XICS_IPI) {
180 /* Clear pending IPI */ 180 /* Clear pending IPI */
181 int cpu = smp_processor_id(); 181 int cpu = smp_processor_id();
182 kvmppc_set_host_ipi(cpu, 0); 182 kvmppc_clear_host_ipi(cpu);
183 icp_native_set_qirr(cpu, 0xff); 183 icp_native_set_qirr(cpu, 0xff);
184 } else { 184 } else {
185 pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n", 185 pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
@@ -200,7 +200,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
200{ 200{
201 int cpu = smp_processor_id(); 201 int cpu = smp_processor_id();
202 202
203 kvmppc_set_host_ipi(cpu, 0); 203 kvmppc_clear_host_ipi(cpu);
204 icp_native_set_qirr(cpu, 0xff); 204 icp_native_set_qirr(cpu, 0xff);
205 205
206 return smp_ipi_demux(); 206 return smp_ipi_demux();
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 8bb8dd7dd6ad..68fd2540b093 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -126,7 +126,7 @@ static void icp_opal_cause_ipi(int cpu)
126{ 126{
127 int hw_cpu = get_hard_smp_processor_id(cpu); 127 int hw_cpu = get_hard_smp_processor_id(cpu);
128 128
129 kvmppc_set_host_ipi(cpu, 1); 129 kvmppc_set_host_ipi(cpu);
130 opal_int_set_mfrr(hw_cpu, IPI_PRIORITY); 130 opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
131} 131}
132 132
@@ -134,7 +134,7 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
134{ 134{
135 int cpu = smp_processor_id(); 135 int cpu = smp_processor_id();
136 136
137 kvmppc_set_host_ipi(cpu, 0); 137 kvmppc_clear_host_ipi(cpu);
138 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff); 138 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
139 139
140 return smp_ipi_demux(); 140 return smp_ipi_demux();
@@ -157,7 +157,7 @@ void icp_opal_flush_interrupt(void)
157 if (vec == XICS_IPI) { 157 if (vec == XICS_IPI) {
158 /* Clear pending IPI */ 158 /* Clear pending IPI */
159 int cpu = smp_processor_id(); 159 int cpu = smp_processor_id();
160 kvmppc_set_host_ipi(cpu, 0); 160 kvmppc_clear_host_ipi(cpu);
161 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff); 161 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
162 } else { 162 } else {
163 pr_err("XICS: hw interrupt 0x%x to offline cpu, " 163 pr_err("XICS: hw interrupt 0x%x to offline cpu, "
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index f1fbc15800c4..ed1565809d2b 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -4,6 +4,7 @@ noarg:
4 4
5TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ 5TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
6 large_vm_fork_separation 6 large_vm_fork_separation
7TEST_GEN_PROGS_EXTENDED := tlbie_test
7TEST_GEN_FILES := tempfile 8TEST_GEN_FILES := tempfile
8 9
9top_srcdir = ../../../../.. 10top_srcdir = ../../../../..
@@ -19,3 +20,4 @@ $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
19$(OUTPUT)/tempfile: 20$(OUTPUT)/tempfile:
20 dd if=/dev/zero of=$@ bs=64k count=1 21 dd if=/dev/zero of=$@ bs=64k count=1
21 22
23$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c
new file mode 100644
index 000000000000..9868a5ddd847
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c
@@ -0,0 +1,734 @@
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp.
5 */
6
7/*
8 *
9 * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store
10 * sequence in a loop. The same threads also rung a context switch task
11 * that does sched_yield() in loop.
12 *
13 * The snapshot thread mark the mmap area PROT_READ in between, make a copy
14 * and copy it back to the original area. This helps us to detect if any
15 * store continued to happen after we marked the memory PROT_READ.
16 */
17
18#define _GNU_SOURCE
19#include <stdio.h>
20#include <sys/mman.h>
21#include <sys/types.h>
22#include <sys/wait.h>
23#include <sys/ipc.h>
24#include <sys/shm.h>
25#include <sys/stat.h>
26#include <sys/time.h>
27#include <linux/futex.h>
28#include <unistd.h>
29#include <asm/unistd.h>
30#include <string.h>
31#include <stdlib.h>
32#include <fcntl.h>
33#include <sched.h>
34#include <time.h>
35#include <stdarg.h>
36#include <sched.h>
37#include <pthread.h>
38#include <signal.h>
39#include <sys/prctl.h>
40
41static inline void dcbf(volatile unsigned int *addr)
42{
43 __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory");
44}
45
46static void err_msg(char *msg)
47{
48
49 time_t now;
50 time(&now);
51 printf("=================================\n");
52 printf(" Error: %s\n", msg);
53 printf(" %s", ctime(&now));
54 printf("=================================\n");
55 exit(1);
56}
57
58static char *map1;
59static char *map2;
60static pid_t rim_process_pid;
61
62/*
63 * A "rim-sequence" is defined to be the sequence of the following
64 * operations performed on a memory word:
65 * 1) FLUSH the contents of that word.
66 * 2) LOAD the contents of that word.
67 * 3) COMPARE the contents of that word with the content that was
68 * previously stored at that word
69 * 4) STORE new content into that word.
70 *
71 * The threads in this test that perform the rim-sequence are termed
72 * as rim_threads.
73 */
74
75/*
76 * A "corruption" is defined to be the failed COMPARE operation in a
77 * rim-sequence.
78 *
79 * A rim_thread that detects a corruption informs about it to all the
80 * other rim_threads, and the mem_snapshot thread.
81 */
82static volatile unsigned int corruption_found;
83
84/*
85 * This defines the maximum number of rim_threads in this test.
86 *
87 * The THREAD_ID_BITS denote the number of bits required
88 * to represent the thread_ids [0..MAX_THREADS - 1].
89 * We are being a bit paranoid here and set it to 8 bits,
90 * though 6 bits suffice.
91 *
92 */
93#define MAX_THREADS 64
94#define THREAD_ID_BITS 8
95#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1)
96static unsigned int rim_thread_ids[MAX_THREADS];
97static pthread_t rim_threads[MAX_THREADS];
98
99
100/*
101 * Each rim_thread works on an exclusive "chunk" of size
102 * RIM_CHUNK_SIZE.
103 *
104 * The ith rim_thread works on the ith chunk.
105 *
106 * The ith chunk begins at
107 * map1 + (i * RIM_CHUNK_SIZE)
108 */
109#define RIM_CHUNK_SIZE 1024
110#define BITS_PER_BYTE 8
111#define WORD_SIZE (sizeof(unsigned int))
112#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE)
113#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE)
114
115static inline char *compute_chunk_start_addr(unsigned int thread_id)
116{
117 char *chunk_start;
118
119 chunk_start = (char *)((unsigned long)map1 +
120 (thread_id * RIM_CHUNK_SIZE));
121
122 return chunk_start;
123}
124
125/*
126 * The "word-offset" of a word-aligned address inside a chunk, is
127 * defined to be the number of words that precede the address in that
128 * chunk.
129 *
130 * WORD_OFFSET_BITS denote the number of bits required to represent
131 * the word-offsets of all the word-aligned addresses of a chunk.
132 */
133#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK))
134#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1)
135
136static inline unsigned int compute_word_offset(char *start, unsigned int *addr)
137{
138 unsigned int delta_bytes, ret;
139 delta_bytes = (unsigned long)addr - (unsigned long)start;
140
141 ret = delta_bytes/WORD_SIZE;
142
143 return ret;
144}
145
146/*
147 * A "sweep" is defined to be the sequential execution of the
148 * rim-sequence by a rim_thread on its chunk one word at a time,
149 * starting from the first word of its chunk and ending with the last
150 * word of its chunk.
151 *
152 * Each sweep of a rim_thread is uniquely identified by a sweep_id.
153 * SWEEP_ID_BITS denote the number of bits required to represent
154 * the sweep_ids of rim_threads.
155 *
156 * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS,
157 * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below.
158 */
159#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS))
160#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1)
161
162/*
163 * A "store-pattern" is the word-pattern that is stored into a word
164 * location in the 4)STORE step of the rim-sequence.
165 *
166 * In the store-pattern, we shall encode:
167 *
168 * - The thread-id of the rim_thread performing the store
169 * (The most significant THREAD_ID_BITS)
170 *
171 * - The word-offset of the address into which the store is being
172 * performed (The next WORD_OFFSET_BITS)
173 *
174 * - The sweep_id of the current sweep in which the store is
175 * being performed. (The lower SWEEP_ID_BITS)
176 *
177 * Store Pattern: 32 bits
178 * |------------------|--------------------|---------------------------------|
179 * | Thread id | Word offset | sweep_id |
180 * |------------------|--------------------|---------------------------------|
181 * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS
182 *
183 * In the store pattern, the (Thread-id + Word-offset) uniquely identify the
184 * address to which the store is being performed i.e,
185 * address == map1 +
186 * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE)
187 *
188 * And the sweep_id in the store pattern identifies the time when the
189 * store was performed by the rim_thread.
190 *
191 * We shall use this property in the 3)COMPARE step of the
192 * rim-sequence.
193 */
194#define SWEEP_ID_SHIFT 0
195#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS)
196#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS)
197
198/*
199 * Compute the store pattern for a given thread with id @tid, at
200 * location @addr in the sweep identified by @sweep_id
201 */
202static inline unsigned int compute_store_pattern(unsigned int tid,
203 unsigned int *addr,
204 unsigned int sweep_id)
205{
206 unsigned int ret = 0;
207 char *start = compute_chunk_start_addr(tid);
208 unsigned int word_offset = compute_word_offset(start, addr);
209
210 ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT;
211 ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT;
212 ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT;
213 return ret;
214}
215
216/* Extract the thread-id from the given store-pattern */
217static inline unsigned int extract_tid(unsigned int pattern)
218{
219 unsigned int ret;
220
221 ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
222 return ret;
223}
224
225/* Extract the word-offset from the given store-pattern */
226static inline unsigned int extract_word_offset(unsigned int pattern)
227{
228 unsigned int ret;
229
230 ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK;
231
232 return ret;
233}
234
235/* Extract the sweep-id from the given store-pattern */
236static inline unsigned int extract_sweep_id(unsigned int pattern)
237
238{
239 unsigned int ret;
240
241 ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK;
242
243 return ret;
244}
245
246/************************************************************
247 * *
248 * Logging the output of the verification *
249 * *
250 ************************************************************/
251#define LOGDIR_NAME_SIZE 100
252static char logdir[LOGDIR_NAME_SIZE];
253
254static FILE *fp[MAX_THREADS];
255static const char logfilename[] ="Thread-%02d-Chunk";
256
257static inline void start_verification_log(unsigned int tid,
258 unsigned int *addr,
259 unsigned int cur_sweep_id,
260 unsigned int prev_sweep_id)
261{
262 FILE *f;
263 char logfile[30];
264 char path[LOGDIR_NAME_SIZE + 30];
265 char separator[2] = "/";
266 char *chunk_start = compute_chunk_start_addr(tid);
267 unsigned int size = RIM_CHUNK_SIZE;
268
269 sprintf(logfile, logfilename, tid);
270 strcpy(path, logdir);
271 strcat(path, separator);
272 strcat(path, logfile);
273 f = fopen(path, "w");
274
275 if (!f) {
276 err_msg("Unable to create logfile\n");
277 }
278
279 fp[tid] = f;
280
281 fprintf(f, "----------------------------------------------------------\n");
282 fprintf(f, "PID = %d\n", rim_process_pid);
283 fprintf(f, "Thread id = %02d\n", tid);
284 fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start);
285 fprintf(f, "Chunk Size = %d\n", size);
286 fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr);
287 fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id);
288 fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id);
289 fprintf(f, "----------------------------------------------------------\n");
290}
291
292static inline void log_anamoly(unsigned int tid, unsigned int *addr,
293 unsigned int expected, unsigned int observed)
294{
295 FILE *f = fp[tid];
296
297 fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n",
298 tid, (unsigned long)addr, expected, observed);
299 fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected));
300 fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed));
301 fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected));
302 fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed));
303 fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected));
304 fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed));
305 fprintf(f, "----------------------------------------------------------\n");
306}
307
308static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies)
309{
310 FILE *f = fp[tid];
311 char logfile[30];
312 char path[LOGDIR_NAME_SIZE + 30];
313 char separator[] = "/";
314
315 fclose(f);
316
317 if (nr_anamolies == 0) {
318 remove(path);
319 return;
320 }
321
322 sprintf(logfile, logfilename, tid);
323 strcpy(path, logdir);
324 strcat(path, separator);
325 strcat(path, logfile);
326
327 printf("Thread %02d chunk has %d corrupted words. For details check %s\n",
328 tid, nr_anamolies, path);
329}
330
331/*
332 * When a COMPARE step of a rim-sequence fails, the rim_thread informs
333 * everyone else via the shared_memory pointed to by
334 * corruption_found variable. On seeing this, every thread verifies the
335 * content of its chunk as follows.
336 *
337 * Suppose a thread identified with @tid was about to store (but not
338 * yet stored) to @next_store_addr in its current sweep identified
339 * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id.
340 *
341 * This implies that for all the addresses @addr < @next_store_addr,
342 * Thread @tid has already performed a store as part of its current
343 * sweep. Hence we expect the content of such @addr to be:
344 * |-------------------------------------------------|
345 * | tid | word_offset(addr) | cur_sweep_id |
346 * |-------------------------------------------------|
347 *
348 * Since Thread @tid is yet to perform stores on address
349 * @next_store_addr and above, we expect the content of such an
350 * address @addr to be:
351 * |-------------------------------------------------|
352 * | tid | word_offset(addr) | prev_sweep_id |
353 * |-------------------------------------------------|
354 *
355 * The verifier function @verify_chunk does this verification and logs
356 * any anamolies that it finds.
357 */
358static void verify_chunk(unsigned int tid, unsigned int *next_store_addr,
359 unsigned int cur_sweep_id,
360 unsigned int prev_sweep_id)
361{
362 unsigned int *iter_ptr;
363 unsigned int size = RIM_CHUNK_SIZE;
364 unsigned int expected;
365 unsigned int observed;
366 char *chunk_start = compute_chunk_start_addr(tid);
367
368 int nr_anamolies = 0;
369
370 start_verification_log(tid, next_store_addr,
371 cur_sweep_id, prev_sweep_id);
372
373 for (iter_ptr = (unsigned int *)chunk_start;
374 (unsigned long)iter_ptr < (unsigned long)chunk_start + size;
375 iter_ptr++) {
376 unsigned int expected_sweep_id;
377
378 if (iter_ptr < next_store_addr) {
379 expected_sweep_id = cur_sweep_id;
380 } else {
381 expected_sweep_id = prev_sweep_id;
382 }
383
384 expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id);
385
386 dcbf((volatile unsigned int*)iter_ptr); //Flush before reading
387 observed = *iter_ptr;
388
389 if (observed != expected) {
390 nr_anamolies++;
391 log_anamoly(tid, iter_ptr, expected, observed);
392 }
393 }
394
395 end_verification_log(tid, nr_anamolies);
396}
397
398static void set_pthread_cpu(pthread_t th, int cpu)
399{
400 cpu_set_t run_cpu_mask;
401 struct sched_param param;
402
403 CPU_ZERO(&run_cpu_mask);
404 CPU_SET(cpu, &run_cpu_mask);
405 pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask);
406
407 param.sched_priority = 1;
408 if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
409 /* haven't reproduced with this setting, it kills random preemption which may be a factor */
410 fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
411 }
412}
413
414static void set_mycpu(int cpu)
415{
416 cpu_set_t run_cpu_mask;
417 struct sched_param param;
418
419 CPU_ZERO(&run_cpu_mask);
420 CPU_SET(cpu, &run_cpu_mask);
421 sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask);
422
423 param.sched_priority = 1;
424 if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
425 fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
426 }
427}
428
429static volatile int segv_wait;
430
431static void segv_handler(int signo, siginfo_t *info, void *extra)
432{
433 while (segv_wait) {
434 sched_yield();
435 }
436
437}
438
439static void set_segv_handler(void)
440{
441 struct sigaction sa;
442
443 sa.sa_flags = SA_SIGINFO;
444 sa.sa_sigaction = segv_handler;
445
446 if (sigaction(SIGSEGV, &sa, NULL) == -1) {
447 perror("sigaction");
448 exit(EXIT_FAILURE);
449 }
450}
451
452int timeout = 0;
453/*
454 * This function is executed by every rim_thread.
455 *
456 * This function performs sweeps over the exclusive chunks of the
457 * rim_threads executing the rim-sequence one word at a time.
458 */
459static void *rim_fn(void *arg)
460{
461 unsigned int tid = *((unsigned int *)arg);
462
463 int size = RIM_CHUNK_SIZE;
464 char *chunk_start = compute_chunk_start_addr(tid);
465
466 unsigned int prev_sweep_id;
467 unsigned int cur_sweep_id = 0;
468
469 /* word access */
470 unsigned int pattern = cur_sweep_id;
471 unsigned int *pattern_ptr = &pattern;
472 unsigned int *w_ptr, read_data;
473
474 set_segv_handler();
475
476 /*
477 * Let us initialize the chunk:
478 *
479 * Each word-aligned address addr in the chunk,
480 * is initialized to :
481 * |-------------------------------------------------|
482 * | tid | word_offset(addr) | 0 |
483 * |-------------------------------------------------|
484 */
485 for (w_ptr = (unsigned int *)chunk_start;
486 (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
487 w_ptr++) {
488
489 *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
490 *w_ptr = *pattern_ptr;
491 }
492
493 while (!corruption_found && !timeout) {
494 prev_sweep_id = cur_sweep_id;
495 cur_sweep_id = cur_sweep_id + 1;
496
497 for (w_ptr = (unsigned int *)chunk_start;
498 (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
499 w_ptr++) {
500 unsigned int old_pattern;
501
502 /*
503 * Compute the pattern that we would have
504 * stored at this location in the previous
505 * sweep.
506 */
507 old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id);
508
509 /*
510 * FLUSH:Ensure that we flush the contents of
511 * the cache before loading
512 */
513 dcbf((volatile unsigned int*)w_ptr); //Flush
514
515 /* LOAD: Read the value */
516 read_data = *w_ptr; //Load
517
518 /*
519 * COMPARE: Is it the same as what we had stored
520 * in the previous sweep ? It better be!
521 */
522 if (read_data != old_pattern) {
523 /* No it isn't! Tell everyone */
524 corruption_found = 1;
525 }
526
527 /*
528 * Before performing a store, let us check if
529 * any rim_thread has found a corruption.
530 */
531 if (corruption_found || timeout) {
532 /*
533 * Yes. Someone (including us!) has found
534 * a corruption :(
535 *
536 * Let us verify that our chunk is
537 * correct.
538 */
539 /* But first, let us allow the dust to settle down! */
540 verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id);
541
542 return 0;
543 }
544
545 /*
546 * Compute the new pattern that we are going
547 * to write to this location
548 */
549 *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
550
551 /*
552 * STORE: Now let us write this pattern into
553 * the location
554 */
555 *w_ptr = *pattern_ptr;
556 }
557 }
558
559 return NULL;
560}
561
562
563static unsigned long start_cpu = 0;
564static unsigned long nrthreads = 4;
565
566static pthread_t mem_snapshot_thread;
567
568static void *mem_snapshot_fn(void *arg)
569{
570 int page_size = getpagesize();
571 size_t size = page_size;
572 void *tmp = malloc(size);
573
574 while (!corruption_found && !timeout) {
575 /* Stop memory migration once corruption is found */
576 segv_wait = 1;
577
578 mprotect(map1, size, PROT_READ);
579
580 /*
581 * Load from the working alias (map1). Loading from map2
582 * also fails.
583 */
584 memcpy(tmp, map1, size);
585
586 /*
587 * Stores must go via map2 which has write permissions, but
588 * the corrupted data tends to be seen in the snapshot buffer,
589 * so corruption does not appear to be introduced at the
590 * copy-back via map2 alias here.
591 */
592 memcpy(map2, tmp, size);
593 /*
594 * Before releasing other threads, must ensure the copy
595 * back to
596 */
597 asm volatile("sync" ::: "memory");
598 mprotect(map1, size, PROT_READ|PROT_WRITE);
599 asm volatile("sync" ::: "memory");
600 segv_wait = 0;
601
602 usleep(1); /* This value makes a big difference */
603 }
604
605 return 0;
606}
607
608void alrm_sighandler(int sig)
609{
610 timeout = 1;
611}
612
613int main(int argc, char *argv[])
614{
615 int c;
616 int page_size = getpagesize();
617 time_t now;
618 int i, dir_error;
619 pthread_attr_t attr;
620 key_t shm_key = (key_t) getpid();
621 int shmid, run_time = 20 * 60;
622 struct sigaction sa_alrm;
623
624 snprintf(logdir, LOGDIR_NAME_SIZE,
625 "/tmp/logdir-%u", (unsigned int)getpid());
626 while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) {
627 switch(c) {
628 case 'r':
629 start_cpu = strtoul(optarg, NULL, 10);
630 break;
631 case 'h':
632 printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]);
633 exit(0);
634 break;
635 case 'n':
636 nrthreads = strtoul(optarg, NULL, 10);
637 break;
638 case 'l':
639 strncpy(logdir, optarg, LOGDIR_NAME_SIZE);
640 break;
641 case 't':
642 run_time = strtoul(optarg, NULL, 10);
643 break;
644 default:
645 printf("invalid option\n");
646 exit(0);
647 break;
648 }
649 }
650
651 if (nrthreads > MAX_THREADS)
652 nrthreads = MAX_THREADS;
653
654 shmid = shmget(shm_key, page_size, IPC_CREAT|0666);
655 if (shmid < 0) {
656 err_msg("Failed shmget\n");
657 }
658
659 map1 = shmat(shmid, NULL, 0);
660 if (map1 == (void *) -1) {
661 err_msg("Failed shmat");
662 }
663
664 map2 = shmat(shmid, NULL, 0);
665 if (map2 == (void *) -1) {
666 err_msg("Failed shmat");
667 }
668
669 dir_error = mkdir(logdir, 0755);
670
671 if (dir_error) {
672 err_msg("Failed mkdir");
673 }
674
675 printf("start_cpu list:%lu\n", start_cpu);
676 printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads);
677 printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2);
678 printf("logdir at : %s\n", logdir);
679 printf("Timeout: %d seconds\n", run_time);
680
681 time(&now);
682 printf("=================================\n");
683 printf(" Starting Test\n");
684 printf(" %s", ctime(&now));
685 printf("=================================\n");
686
687 for (i = 0; i < nrthreads; i++) {
688 if (1 && !fork()) {
689 prctl(PR_SET_PDEATHSIG, SIGKILL);
690 set_mycpu(start_cpu + i);
691 for (;;)
692 sched_yield();
693 exit(0);
694 }
695 }
696
697
698 sa_alrm.sa_handler = &alrm_sighandler;
699 sigemptyset(&sa_alrm.sa_mask);
700 sa_alrm.sa_flags = 0;
701
702 if (sigaction(SIGALRM, &sa_alrm, 0) == -1) {
703 err_msg("Failed signal handler registration\n");
704 }
705
706 alarm(run_time);
707
708 pthread_attr_init(&attr);
709 for (i = 0; i < nrthreads; i++) {
710 rim_thread_ids[i] = i;
711 pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]);
712 set_pthread_cpu(rim_threads[i], start_cpu + i);
713 }
714
715 pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1);
716 set_pthread_cpu(mem_snapshot_thread, start_cpu + i);
717
718
719 pthread_join(mem_snapshot_thread, NULL);
720 for (i = 0; i < nrthreads; i++) {
721 pthread_join(rim_threads[i], NULL);
722 }
723
724 if (!timeout) {
725 time(&now);
726 printf("=================================\n");
727 printf(" Data Corruption Detected\n");
728 printf(" %s", ctime(&now));
729 printf(" See logfiles in %s\n", logdir);
730 printf("=================================\n");
731 return 1;
732 }
733 return 0;
734}
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 951fe855f7cd..98f2708d86cc 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -17,3 +17,4 @@ tm-vmx-unavail
17tm-unavailable 17tm-unavailable
18tm-trap 18tm-trap
19tm-sigreturn 19tm-sigreturn
20tm-poison
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index c0734ed0ef56..b15a1a325bd0 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -5,7 +5,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
5TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ 5TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
6 tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ 6 tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
7 $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \ 7 $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \
8 tm-signal-context-force-tm 8 tm-signal-context-force-tm tm-poison
9 9
10top_srcdir = ../../../../.. 10top_srcdir = ../../../../..
11include ../../lib.mk 11include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
new file mode 100644
index 000000000000..977558497c16
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -0,0 +1,179 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright 2019, Gustavo Romero, Michael Neuling, IBM Corp.
4 *
5 * This test will spawn two processes. Both will be attached to the same
6 * CPU (CPU 0). The child will be in a loop writing to FP register f31 and
7 * VMX/VEC/Altivec register vr31 a known value, called poison, calling
8 * sched_yield syscall after to allow the parent to switch on the CPU.
9 * Parent will set f31 and vr31 to 1 and in a loop will check if f31 and
10 * vr31 remain 1 as expected until a given timeout (2m). If the issue is
11 * present child's poison will leak into parent's f31 or vr31 registers,
12 * otherwise, poison will never leak into parent's f31 and vr31 registers.
13 */
14
15#define _GNU_SOURCE
16#include <stdio.h>
17#include <stdlib.h>
18#include <unistd.h>
19#include <inttypes.h>
20#include <sched.h>
21#include <sys/types.h>
22#include <signal.h>
23#include <inttypes.h>
24
25#include "tm.h"
26
27int tm_poison_test(void)
28{
29 int pid;
30 cpu_set_t cpuset;
31 uint64_t poison = 0xdeadbeefc0dec0fe;
32 uint64_t unknown = 0;
33 bool fail_fp = false;
34 bool fail_vr = false;
35
36 SKIP_IF(!have_htm());
37
38 /* Attach both Child and Parent to CPU 0 */
39 CPU_ZERO(&cpuset);
40 CPU_SET(0, &cpuset);
41 sched_setaffinity(0, sizeof(cpuset), &cpuset);
42
43 pid = fork();
44 if (!pid) {
45 /**
46 * child
47 */
48 while (1) {
49 sched_yield();
50 asm (
51 "mtvsrd 31, %[poison];" // f31 = poison
52 "mtvsrd 63, %[poison];" // vr31 = poison
53
54 : : [poison] "r" (poison) : );
55 }
56 }
57
58 /**
59 * parent
60 */
61 asm (
62 /*
63 * Set r3, r4, and f31 to known value 1 before entering
64 * in transaction. They won't be written after that.
65 */
66 " li 3, 0x1 ;"
67 " li 4, 0x1 ;"
68 " mtvsrd 31, 4 ;"
69
70 /*
71 * The Time Base (TB) is a 64-bit counter register that is
72 * independent of the CPU clock and which is incremented
73 * at a frequency of 512000000 Hz, so every 1.953125ns.
74 * So it's necessary 120s/0.000000001953125s = 61440000000
75 * increments to get a 2 minutes timeout. Below we set that
76 * value in r5 and then use r6 to track initial TB value,
77 * updating TB values in r7 at every iteration and comparing it
78 * to r6. When r7 (current) - r6 (initial) > 61440000000 we bail
79 * out since for sure we spent already 2 minutes in the loop.
80 * SPR 268 is the TB register.
81 */
82 " lis 5, 14 ;"
83 " ori 5, 5, 19996 ;"
84 " sldi 5, 5, 16 ;" // r5 = 61440000000
85
86 " mfspr 6, 268 ;" // r6 (TB initial)
87 "1: mfspr 7, 268 ;" // r7 (TB current)
88 " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ?
89 " cmpd 7, 5 ;"
90 " bgt 3f ;" // yes, exit
91
92 /*
93 * Main loop to check f31
94 */
95 " tbegin. ;" // no, try again
96 " beq 1b ;" // restart if no timeout
97 " mfvsrd 3, 31 ;" // read f31
98 " cmpd 3, 4 ;" // f31 == 1 ?
99 " bne 2f ;" // broken :-(
100 " tabort. 3 ;" // try another transaction
101 "2: tend. ;" // commit transaction
102 "3: mr %[unknown], 3 ;" // record r3
103
104 : [unknown] "=r" (unknown)
105 :
106 : "cr0", "r3", "r4", "r5", "r6", "r7", "vs31"
107
108 );
109
110 /*
111 * On leak 'unknown' will contain 'poison' value from child,
112 * otherwise (no leak) 'unknown' will contain the same value
113 * as r3 before entering in transactional mode, i.e. 0x1.
114 */
115 fail_fp = unknown != 0x1;
116 if (fail_fp)
117 printf("Unknown value %#"PRIx64" leaked into f31!\n", unknown);
118 else
119 printf("Good, no poison or leaked value into FP registers\n");
120
121 asm (
122 /*
123 * Set r3, r4, and vr31 to known value 1 before entering
124 * in transaction. They won't be written after that.
125 */
126 " li 3, 0x1 ;"
127 " li 4, 0x1 ;"
128 " mtvsrd 63, 4 ;"
129
130 " lis 5, 14 ;"
131 " ori 5, 5, 19996 ;"
132 " sldi 5, 5, 16 ;" // r5 = 61440000000
133
134 " mfspr 6, 268 ;" // r6 (TB initial)
135 "1: mfspr 7, 268 ;" // r7 (TB current)
136 " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ?
137 " cmpd 7, 5 ;"
138 " bgt 3f ;" // yes, exit
139
140 /*
141 * Main loop to check vr31
142 */
143 " tbegin. ;" // no, try again
144 " beq 1b ;" // restart if no timeout
145 " mfvsrd 3, 63 ;" // read vr31
146 " cmpd 3, 4 ;" // vr31 == 1 ?
147 " bne 2f ;" // broken :-(
148 " tabort. 3 ;" // try another transaction
149 "2: tend. ;" // commit transaction
150 "3: mr %[unknown], 3 ;" // record r3
151
152 : [unknown] "=r" (unknown)
153 :
154 : "cr0", "r3", "r4", "r5", "r6", "r7", "vs63"
155
156 );
157
158 /*
159 * On leak 'unknown' will contain 'poison' value from child,
160 * otherwise (no leak) 'unknown' will contain the same value
161 * as r3 before entering in transactional mode, i.e. 0x1.
162 */
163 fail_vr = unknown != 0x1;
164 if (fail_vr)
165 printf("Unknown value %#"PRIx64" leaked into vr31!\n", unknown);
166 else
167 printf("Good, no poison or leaked value into VEC registers\n");
168
169 kill(pid, SIGKILL);
170
171 return (fail_fp | fail_vr);
172}
173
174int main(int argc, char *argv[])
175{
176 /* Test completes in about 4m */
177 test_harness_set_timeout(250);
178 return test_harness(tm_poison_test, "tm_poison_test");
179}