aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRadim Krčmář <rkrcmar@redhat.com>2016-11-29 08:26:55 -0500
committerRadim Krčmář <rkrcmar@redhat.com>2016-11-29 08:26:55 -0500
commitffcb09f27f46ea21305c7846de1fd3b76e4e6a6f (patch)
tree7c0e2b94dfd54811714ad06705bf80df39a1f4e5
parentbf65014d0b89b4e315f216f1f65b0d3410efcdc0 (diff)
parent6ccad8cea5bcb0660f56677a5fdc52265f8ddf76 (diff)
Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
PPC KVM update for 4.10: * Support for KVM guests on POWER9 using the hashed page table MMU. * Updates and improvements to the halt-polling support on PPC, from Suraj Jitindar Singh. * An optimization to speed up emulated MMIO, from Yongji Xie. * Various other minor cleanups.
-rw-r--r--Documentation/virtual/kvm/00-INDEX2
-rw-r--r--Documentation/virtual/kvm/api.txt3
-rw-r--r--Documentation/virtual/kvm/halt-polling.txt127
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h44
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h47
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h27
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h7
-rw-r--r--arch/powerpc/include/asm/mmu.h5
-rw-r--r--arch/powerpc/include/asm/opal.h3
-rw-r--r--arch/powerpc/include/asm/reg.h15
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h5
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c63
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c254
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c73
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c224
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c24
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S140
-rw-r--r--arch/powerpc/kvm/powerpc.c16
-rw-r--r--arch/powerpc/kvm/trace_hv.h2
-rw-r--r--arch/powerpc/mm/hash_native_64.c30
-rw-r--r--arch/powerpc/mm/hash_utils_64.c28
-rw-r--r--arch/powerpc/mm/pgtable-radix.c18
-rw-r--r--arch/powerpc/mm/pgtable_64.c34
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S3
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/ps3/htab.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c2
-rw-r--r--include/linux/kvm_host.h4
-rw-r--r--include/uapi/linux/kvm.h5
-rw-r--r--virt/kvm/kvm_main.c9
35 files changed, 977 insertions, 251 deletions
diff --git a/Documentation/virtual/kvm/00-INDEX b/Documentation/virtual/kvm/00-INDEX
index fee9f2bf9c64..69fe1a8b7ad1 100644
--- a/Documentation/virtual/kvm/00-INDEX
+++ b/Documentation/virtual/kvm/00-INDEX
@@ -6,6 +6,8 @@ cpuid.txt
6 - KVM-specific cpuid leaves (x86). 6 - KVM-specific cpuid leaves (x86).
7devices/ 7devices/
8 - KVM_CAP_DEVICE_CTRL userspace API. 8 - KVM_CAP_DEVICE_CTRL userspace API.
9halt-polling.txt
10 - notes on halt-polling
9hypercalls.txt 11hypercalls.txt
10 - KVM hypercalls. 12 - KVM hypercalls.
11locking.txt 13locking.txt
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 739db9ab16b2..8a5ebd118313 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2023,6 +2023,8 @@ registers, find a list below:
2023 PPC | KVM_REG_PPC_WORT | 64 2023 PPC | KVM_REG_PPC_WORT | 64
2024 PPC | KVM_REG_PPC_SPRG9 | 64 2024 PPC | KVM_REG_PPC_SPRG9 | 64
2025 PPC | KVM_REG_PPC_DBSR | 32 2025 PPC | KVM_REG_PPC_DBSR | 32
2026 PPC | KVM_REG_PPC_TIDR | 64
2027 PPC | KVM_REG_PPC_PSSCR | 64
2026 PPC | KVM_REG_PPC_TM_GPR0 | 64 2028 PPC | KVM_REG_PPC_TM_GPR0 | 64
2027 ... 2029 ...
2028 PPC | KVM_REG_PPC_TM_GPR31 | 64 2030 PPC | KVM_REG_PPC_TM_GPR31 | 64
@@ -2039,6 +2041,7 @@ registers, find a list below:
2039 PPC | KVM_REG_PPC_TM_VSCR | 32 2041 PPC | KVM_REG_PPC_TM_VSCR | 32
2040 PPC | KVM_REG_PPC_TM_DSCR | 64 2042 PPC | KVM_REG_PPC_TM_DSCR | 64
2041 PPC | KVM_REG_PPC_TM_TAR | 64 2043 PPC | KVM_REG_PPC_TM_TAR | 64
2044 PPC | KVM_REG_PPC_TM_XER | 64
2042 | | 2045 | |
2043 MIPS | KVM_REG_MIPS_R0 | 64 2046 MIPS | KVM_REG_MIPS_R0 | 64
2044 ... 2047 ...
diff --git a/Documentation/virtual/kvm/halt-polling.txt b/Documentation/virtual/kvm/halt-polling.txt
new file mode 100644
index 000000000000..4a8418318769
--- /dev/null
+++ b/Documentation/virtual/kvm/halt-polling.txt
@@ -0,0 +1,127 @@
1The KVM halt polling system
2===========================
3
4The KVM halt polling system provides a feature within KVM whereby the latency
5of a guest can, under some circumstances, be reduced by polling in the host
6for some time period after the guest has elected to no longer run by cedeing.
7That is, when a guest vcpu has ceded, or in the case of powerpc when all of the
8vcpus of a single vcore have ceded, the host kernel polls for wakeup conditions
9before giving up the cpu to the scheduler in order to let something else run.
10
11Polling provides a latency advantage in cases where the guest can be run again
12very quickly by at least saving us a trip through the scheduler, normally on
13the order of a few micro-seconds, although performance benefits are workload
14dependant. In the event that no wakeup source arrives during the polling
15interval or some other task on the runqueue is runnable the scheduler is
16invoked. Thus halt polling is especially useful on workloads with very short
17wakeup periods where the time spent halt polling is minimised and the time
18savings of not invoking the scheduler are distinguishable.
19
20The generic halt polling code is implemented in:
21
22 virt/kvm/kvm_main.c: kvm_vcpu_block()
23
24The powerpc kvm-hv specific case is implemented in:
25
26 arch/powerpc/kvm/book3s_hv.c: kvmppc_vcore_blocked()
27
28Halt Polling Interval
29=====================
30
31The maximum time for which to poll before invoking the scheduler, referred to
32as the halt polling interval, is increased and decreased based on the perceived
33effectiveness of the polling in an attempt to limit pointless polling.
34This value is stored in either the vcpu struct:
35
36 kvm_vcpu->halt_poll_ns
37
38or in the case of powerpc kvm-hv, in the vcore struct:
39
40 kvmppc_vcore->halt_poll_ns
41
42Thus this is a per vcpu (or vcore) value.
43
44During polling if a wakeup source is received within the halt polling interval,
45the interval is left unchanged. In the event that a wakeup source isn't
46received during the polling interval (and thus schedule is invoked) there are
47two options, either the polling interval and total block time[0] were less than
48the global max polling interval (see module params below), or the total block
49time was greater than the global max polling interval.
50
51In the event that both the polling interval and total block time were less than
52the global max polling interval then the polling interval can be increased in
53the hope that next time during the longer polling interval the wake up source
54will be received while the host is polling and the latency benefits will be
55received. The polling interval is grown in the function grow_halt_poll_ns() and
56is multiplied by the module parameter halt_poll_ns_grow.
57
58In the event that the total block time was greater than the global max polling
59interval then the host will never poll for long enough (limited by the global
60max) to wakeup during the polling interval so it may as well be shrunk in order
61to avoid pointless polling. The polling interval is shrunk in the function
62shrink_halt_poll_ns() and is divided by the module parameter
63halt_poll_ns_shrink, or set to 0 iff halt_poll_ns_shrink == 0.
64
65It is worth noting that this adjustment process attempts to hone in on some
66steady state polling interval but will only really do a good job for wakeups
67which come at an approximately constant rate, otherwise there will be constant
68adjustment of the polling interval.
69
70[0] total block time: the time between when the halt polling function is
71 invoked and a wakeup source received (irrespective of
72 whether the scheduler is invoked within that function).
73
74Module Parameters
75=================
76
77The kvm module has 3 tuneable module parameters to adjust the global max
78polling interval as well as the rate at which the polling interval is grown and
79shrunk. These variables are defined in include/linux/kvm_host.h and as module
80parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
81powerpc kvm-hv case.
82
83Module Parameter | Description | Default Value
84--------------------------------------------------------------------------------
85halt_poll_ns | The global max polling interval | KVM_HALT_POLL_NS_DEFAULT
86 | which defines the ceiling value |
87 | of the polling interval for | (per arch value)
88 | each vcpu. |
89--------------------------------------------------------------------------------
90halt_poll_ns_grow | The value by which the halt | 2
91 | polling interval is multiplied |
92 | in the grow_halt_poll_ns() |
93 | function. |
94--------------------------------------------------------------------------------
95halt_poll_ns_shrink | The value by which the halt | 0
96 | polling interval is divided in |
97 | the shrink_halt_poll_ns() |
98 | function. |
99--------------------------------------------------------------------------------
100
101These module parameters can be set from the debugfs files in:
102
103 /sys/module/kvm/parameters/
104
105Note: that these module parameters are system wide values and are not able to
106 be tuned on a per vm basis.
107
108Further Notes
109=============
110
111- Care should be taken when setting the halt_poll_ns module parameter as a
112large value has the potential to drive the cpu usage to 100% on a machine which
113would be almost entirely idle otherwise. This is because even if a guest has
114wakeups during which very little work is done and which are quite far apart, if
115the period is shorter than the global max polling interval (halt_poll_ns) then
116the host will always poll for the entire block time and thus cpu utilisation
117will go to 100%.
118
119- Halt polling essentially presents a trade off between power usage and latency
120and the module parameters should be used to tune the affinity for this. Idle
121cpu time is essentially converted to host kernel time with the aim of decreasing
122latency when entering the guest.
123
124- Halt polling will only be conducted by the host when no other tasks are
125runnable on that cpu, otherwise the polling will cease immediately and
126schedule will be invoked to allow that other task to run. Thus this doesn't
127allow a guest to denial of service the cpu.
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index d1492736d852..6c853bcd11fa 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -14,6 +14,9 @@
14 14
15#include <linux/threads.h> 15#include <linux/threads.h>
16#include <linux/kprobes.h> 16#include <linux/kprobes.h>
17#ifdef CONFIG_KVM
18#include <linux/kvm_host.h>
19#endif
17 20
18#include <uapi/asm/ucontext.h> 21#include <uapi/asm/ucontext.h>
19 22
@@ -109,4 +112,45 @@ void early_setup_secondary(void);
109/* time */ 112/* time */
110void accumulate_stolen_time(void); 113void accumulate_stolen_time(void);
111 114
115/* kvm */
116#ifdef CONFIG_KVM
117long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
118 unsigned long ioba, unsigned long tce);
119long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
120 unsigned long liobn, unsigned long ioba,
121 unsigned long tce_list, unsigned long npages);
122long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
123 unsigned long liobn, unsigned long ioba,
124 unsigned long tce_value, unsigned long npages);
125long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
126 unsigned int yield_count);
127long kvmppc_h_random(struct kvm_vcpu *vcpu);
128void kvmhv_commence_exit(int trap);
129long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
130void kvmppc_subcore_enter_guest(void);
131void kvmppc_subcore_exit_guest(void);
132long kvmppc_realmode_hmi_handler(void);
133long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
134 long pte_index, unsigned long pteh, unsigned long ptel);
135long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
136 unsigned long pte_index, unsigned long avpn);
137long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
138long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
139 unsigned long pte_index, unsigned long avpn,
140 unsigned long va);
141long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
142 unsigned long pte_index);
143long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
144 unsigned long pte_index);
145long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
146 unsigned long pte_index);
147long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
148 unsigned long slb_v, unsigned int status, bool data);
149unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
150int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
151 unsigned long mfrr);
152int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
153int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
154#endif
155
112#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ 156#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e407af2b7333..2e6a823fa502 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -70,7 +70,9 @@
70 70
71#define HPTE_V_SSIZE_SHIFT 62 71#define HPTE_V_SSIZE_SHIFT 62
72#define HPTE_V_AVPN_SHIFT 7 72#define HPTE_V_AVPN_SHIFT 7
73#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff)
73#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) 74#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80)
75#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80)
74#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) 76#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
75#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) 77#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL))
76#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) 78#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
@@ -80,14 +82,16 @@
80#define HPTE_V_VALID ASM_CONST(0x0000000000000001) 82#define HPTE_V_VALID ASM_CONST(0x0000000000000001)
81 83
82/* 84/*
83 * ISA 3.0 have a different HPTE format. 85 * ISA 3.0 has a different HPTE format.
84 */ 86 */
85#define HPTE_R_3_0_SSIZE_SHIFT 58 87#define HPTE_R_3_0_SSIZE_SHIFT 58
88#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT)
86#define HPTE_R_PP0 ASM_CONST(0x8000000000000000) 89#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
87#define HPTE_R_TS ASM_CONST(0x4000000000000000) 90#define HPTE_R_TS ASM_CONST(0x4000000000000000)
88#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) 91#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
89#define HPTE_R_RPN_SHIFT 12 92#define HPTE_R_RPN_SHIFT 12
90#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) 93#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
94#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000)
91#define HPTE_R_PP ASM_CONST(0x0000000000000003) 95#define HPTE_R_PP ASM_CONST(0x0000000000000003)
92#define HPTE_R_PPP ASM_CONST(0x8000000000000003) 96#define HPTE_R_PPP ASM_CONST(0x8000000000000003)
93#define HPTE_R_N ASM_CONST(0x0000000000000004) 97#define HPTE_R_N ASM_CONST(0x0000000000000004)
@@ -316,12 +320,43 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
316 */ 320 */
317 v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); 321 v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
318 v <<= HPTE_V_AVPN_SHIFT; 322 v <<= HPTE_V_AVPN_SHIFT;
319 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 323 v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
320 v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
321 return v; 324 return v;
322} 325}
323 326
324/* 327/*
328 * ISA v3.0 defines a new HPTE format, which differs from the old
329 * format in having smaller AVPN and ARPN fields, and the B field
330 * in the second dword instead of the first.
331 */
332static inline unsigned long hpte_old_to_new_v(unsigned long v)
333{
334 /* trim AVPN, drop B */
335 return v & HPTE_V_COMMON_BITS;
336}
337
338static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r)
339{
340 /* move B field from 1st to 2nd dword, trim ARPN */
341 return (r & ~HPTE_R_3_0_SSIZE_MASK) |
342 (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT);
343}
344
345static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r)
346{
347 /* insert B field */
348 return (v & HPTE_V_COMMON_BITS) |
349 ((r & HPTE_R_3_0_SSIZE_MASK) <<
350 (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT));
351}
352
353static inline unsigned long hpte_new_to_old_r(unsigned long r)
354{
355 /* clear out B field */
356 return r & ~HPTE_R_3_0_SSIZE_MASK;
357}
358
359/*
325 * This function sets the AVPN and L fields of the HPTE appropriately 360 * This function sets the AVPN and L fields of the HPTE appropriately
326 * using the base page size and actual page size. 361 * using the base page size and actual page size.
327 */ 362 */
@@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
341 * aligned for the requested page size 376 * aligned for the requested page size
342 */ 377 */
343static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, 378static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
344 int actual_psize, int ssize) 379 int actual_psize)
345{ 380{
346
347 if (cpu_has_feature(CPU_FTR_ARCH_300))
348 pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
349
350 /* A 4K page needs no special encoding */ 381 /* A 4K page needs no special encoding */
351 if (actual_psize == MMU_PAGE_4K) 382 if (actual_psize == MMU_PAGE_4K)
352 return pa & HPTE_R_RPN; 383 return pa & HPTE_R_RPN;
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 05cabed3d1bd..09a802bb702f 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -99,6 +99,7 @@
99#define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 99#define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40
100#define BOOK3S_INTERRUPT_HMI 0xe60 100#define BOOK3S_INTERRUPT_HMI 0xe60
101#define BOOK3S_INTERRUPT_H_DOORBELL 0xe80 101#define BOOK3S_INTERRUPT_H_DOORBELL 0xe80
102#define BOOK3S_INTERRUPT_H_VIRT 0xea0
102#define BOOK3S_INTERRUPT_PERFMON 0xf00 103#define BOOK3S_INTERRUPT_PERFMON 0xf00
103#define BOOK3S_INTERRUPT_ALTIVEC 0xf20 104#define BOOK3S_INTERRUPT_ALTIVEC 0xf20
104#define BOOK3S_INTERRUPT_VSX 0xf40 105#define BOOK3S_INTERRUPT_VSX 0xf40
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 28350a294b1e..e59b172666cd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -48,7 +48,7 @@
48#ifdef CONFIG_KVM_MMIO 48#ifdef CONFIG_KVM_MMIO
49#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 49#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
50#endif 50#endif
51#define KVM_HALT_POLL_NS_DEFAULT 500000 51#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */
52 52
53/* These values are internal and can be increased later */ 53/* These values are internal and can be increased later */
54#define KVM_NR_IRQCHIPS 1 54#define KVM_NR_IRQCHIPS 1
@@ -244,8 +244,10 @@ struct kvm_arch_memory_slot {
244struct kvm_arch { 244struct kvm_arch {
245 unsigned int lpid; 245 unsigned int lpid;
246#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 246#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
247 unsigned int tlb_sets;
247 unsigned long hpt_virt; 248 unsigned long hpt_virt;
248 struct revmap_entry *revmap; 249 struct revmap_entry *revmap;
250 atomic64_t mmio_update;
249 unsigned int host_lpid; 251 unsigned int host_lpid;
250 unsigned long host_lpcr; 252 unsigned long host_lpcr;
251 unsigned long sdr1; 253 unsigned long sdr1;
@@ -408,6 +410,24 @@ struct kvmppc_passthru_irqmap {
408#define KVMPPC_IRQ_MPIC 1 410#define KVMPPC_IRQ_MPIC 1
409#define KVMPPC_IRQ_XICS 2 411#define KVMPPC_IRQ_XICS 2
410 412
413#define MMIO_HPTE_CACHE_SIZE 4
414
415struct mmio_hpte_cache_entry {
416 unsigned long hpte_v;
417 unsigned long hpte_r;
418 unsigned long rpte;
419 unsigned long pte_index;
420 unsigned long eaddr;
421 unsigned long slb_v;
422 long mmio_update;
423 unsigned int slb_base_pshift;
424};
425
426struct mmio_hpte_cache {
427 struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE];
428 unsigned int index;
429};
430
411struct openpic; 431struct openpic;
412 432
413struct kvm_vcpu_arch { 433struct kvm_vcpu_arch {
@@ -498,6 +518,8 @@ struct kvm_vcpu_arch {
498 ulong tcscr; 518 ulong tcscr;
499 ulong acop; 519 ulong acop;
500 ulong wort; 520 ulong wort;
521 ulong tid;
522 ulong psscr;
501 ulong shadow_srr1; 523 ulong shadow_srr1;
502#endif 524#endif
503 u32 vrsave; /* also USPRG0 */ 525 u32 vrsave; /* also USPRG0 */
@@ -546,6 +568,7 @@ struct kvm_vcpu_arch {
546 u64 tfiar; 568 u64 tfiar;
547 569
548 u32 cr_tm; 570 u32 cr_tm;
571 u64 xer_tm;
549 u64 lr_tm; 572 u64 lr_tm;
550 u64 ctr_tm; 573 u64 ctr_tm;
551 u64 amr_tm; 574 u64 amr_tm;
@@ -655,9 +678,11 @@ struct kvm_vcpu_arch {
655#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 678#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
656 struct kvm_vcpu_arch_shared shregs; 679 struct kvm_vcpu_arch_shared shregs;
657 680
681 struct mmio_hpte_cache mmio_cache;
658 unsigned long pgfault_addr; 682 unsigned long pgfault_addr;
659 long pgfault_index; 683 long pgfault_index;
660 unsigned long pgfault_hpte[2]; 684 unsigned long pgfault_hpte[2];
685 struct mmio_hpte_cache_entry *pgfault_cache;
661 686
662 struct task_struct *run_task; 687 struct task_struct *run_task;
663 struct kvm_run *kvm_run; 688 struct kvm_run *kvm_run;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index f6e49640dbe1..a5b94bed1423 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -483,9 +483,10 @@ extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
483 unsigned long host_irq); 483 unsigned long host_irq);
484extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq, 484extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
485 unsigned long host_irq); 485 unsigned long host_irq);
486extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr, 486extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
487 struct kvmppc_irq_map *irq_map, 487 struct kvmppc_irq_map *irq_map,
488 struct kvmppc_passthru_irqmap *pimap); 488 struct kvmppc_passthru_irqmap *pimap,
489 bool *again);
489extern int h_ipi_redirect; 490extern int h_ipi_redirect;
490#else 491#else
491static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( 492static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e88368354e49..060b40b1bc3d 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -208,6 +208,11 @@ extern u64 ppc64_rma_size;
208/* Cleanup function used by kexec */ 208/* Cleanup function used by kexec */
209extern void mmu_cleanup_all(void); 209extern void mmu_cleanup_all(void);
210extern void radix__mmu_cleanup_all(void); 210extern void radix__mmu_cleanup_all(void);
211
212/* Functions for creating and updating partition table on POWER9 */
213extern void mmu_partition_table_init(void);
214extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
215 unsigned long dw1);
211#endif /* CONFIG_PPC64 */ 216#endif /* CONFIG_PPC64 */
212 217
213struct mm_struct; 218struct mm_struct;
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index e958b7096f19..5c7db0f1a708 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -220,9 +220,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
220int64_t opal_pci_poll2(uint64_t id, uint64_t data); 220int64_t opal_pci_poll2(uint64_t id, uint64_t data);
221 221
222int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); 222int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll);
223int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll);
223int64_t opal_int_set_cppr(uint8_t cppr); 224int64_t opal_int_set_cppr(uint8_t cppr);
224int64_t opal_int_eoi(uint32_t xirr); 225int64_t opal_int_eoi(uint32_t xirr);
226int64_t opal_rm_int_eoi(uint32_t xirr);
225int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); 227int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
228int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
226int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, 229int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
227 uint32_t pe_num, uint32_t tce_size, 230 uint32_t pe_num, uint32_t tce_size,
228 uint64_t dma_addr, uint32_t npages); 231 uint64_t dma_addr, uint32_t npages);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 9cd4e8cbc78c..04aa1ee8cdb6 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -153,6 +153,8 @@
153#define PSSCR_EC 0x00100000 /* Exit Criterion */ 153#define PSSCR_EC 0x00100000 /* Exit Criterion */
154#define PSSCR_ESL 0x00200000 /* Enable State Loss */ 154#define PSSCR_ESL 0x00200000 /* Enable State Loss */
155#define PSSCR_SD 0x00400000 /* Status Disable */ 155#define PSSCR_SD 0x00400000 /* Status Disable */
156#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
157#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
156 158
157/* Floating Point Status and Control Register (FPSCR) Fields */ 159/* Floating Point Status and Control Register (FPSCR) Fields */
158#define FPSCR_FX 0x80000000 /* FPU exception summary */ 160#define FPSCR_FX 0x80000000 /* FPU exception summary */
@@ -236,6 +238,7 @@
236#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ 238#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
237#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ 239#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
238#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ 240#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
241#define SPRN_TIDR 144 /* Thread ID register */
239#define SPRN_CTRLF 0x088 242#define SPRN_CTRLF 0x088
240#define SPRN_CTRLT 0x098 243#define SPRN_CTRLT 0x098
241#define CTRL_CT 0xc0000000 /* current thread */ 244#define CTRL_CT 0xc0000000 /* current thread */
@@ -294,6 +297,7 @@
294#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ 297#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
295#define SPRN_LMRR 0x32D /* Load Monitor Region Register */ 298#define SPRN_LMRR 0x32D /* Load Monitor Region Register */
296#define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */ 299#define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */
300#define SPRN_ASDR 0x330 /* Access segment descriptor register */
297#define SPRN_IC 0x350 /* Virtual Instruction Count */ 301#define SPRN_IC 0x350 /* Virtual Instruction Count */
298#define SPRN_VTB 0x351 /* Virtual Time Base */ 302#define SPRN_VTB 0x351 /* Virtual Time Base */
299#define SPRN_LDBAR 0x352 /* LD Base Address Register */ 303#define SPRN_LDBAR 0x352 /* LD Base Address Register */
@@ -305,6 +309,7 @@
305 309
306/* HFSCR and FSCR bit numbers are the same */ 310/* HFSCR and FSCR bit numbers are the same */
307#define FSCR_LM_LG 11 /* Enable Load Monitor Registers */ 311#define FSCR_LM_LG 11 /* Enable Load Monitor Registers */
312#define FSCR_MSGP_LG 10 /* Enable MSGP */
308#define FSCR_TAR_LG 8 /* Enable Target Address Register */ 313#define FSCR_TAR_LG 8 /* Enable Target Address Register */
309#define FSCR_EBB_LG 7 /* Enable Event Based Branching */ 314#define FSCR_EBB_LG 7 /* Enable Event Based Branching */
310#define FSCR_TM_LG 5 /* Enable Transactional Memory */ 315#define FSCR_TM_LG 5 /* Enable Transactional Memory */
@@ -320,6 +325,7 @@
320#define FSCR_DSCR __MASK(FSCR_DSCR_LG) 325#define FSCR_DSCR __MASK(FSCR_DSCR_LG)
321#define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ 326#define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */
322#define HFSCR_LM __MASK(FSCR_LM_LG) 327#define HFSCR_LM __MASK(FSCR_LM_LG)
328#define HFSCR_MSGP __MASK(FSCR_MSGP_LG)
323#define HFSCR_TAR __MASK(FSCR_TAR_LG) 329#define HFSCR_TAR __MASK(FSCR_TAR_LG)
324#define HFSCR_EBB __MASK(FSCR_EBB_LG) 330#define HFSCR_EBB __MASK(FSCR_EBB_LG)
325#define HFSCR_TM __MASK(FSCR_TM_LG) 331#define HFSCR_TM __MASK(FSCR_TM_LG)
@@ -355,8 +361,10 @@
355#define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */ 361#define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */
356#define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */ 362#define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */
357#define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */ 363#define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */
364#define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */
358#define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */ 365#define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */
359#define LPCR_MER_SH 11 366#define LPCR_MER_SH 11
367#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */
360#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */ 368#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */
361#define LPCR_LPES 0x0000000c 369#define LPCR_LPES 0x0000000c
362#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */ 370#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */
@@ -377,6 +385,12 @@
377#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ 385#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
378#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ 386#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
379#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */ 387#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */
388/*
389 * These bits are used in the function kvmppc_set_arch_compat() to specify and
390 * determine both the compatibility level which we want to emulate and the
391 * compatibility level which the host is capable of emulating.
392 */
393#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
380#define PCR_ARCH_206 0x4 /* Architecture 2.06 */ 394#define PCR_ARCH_206 0x4 /* Architecture 2.06 */
381#define PCR_ARCH_205 0x2 /* Architecture 2.05 */ 395#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
382#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ 396#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
@@ -1218,6 +1232,7 @@
1218#define PVR_ARCH_206 0x0f000003 1232#define PVR_ARCH_206 0x0f000003
1219#define PVR_ARCH_206p 0x0f100003 1233#define PVR_ARCH_206p 0x0f100003
1220#define PVR_ARCH_207 0x0f000004 1234#define PVR_ARCH_207 0x0f000004
1235#define PVR_ARCH_300 0x0f000005
1221 1236
1222/* Macros for setting and retrieving special purpose registers */ 1237/* Macros for setting and retrieving special purpose registers */
1223#ifndef __ASSEMBLY__ 1238#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index c93cf35ce379..3603b6f51b11 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -573,6 +573,10 @@ struct kvm_get_htab_header {
573#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) 573#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
574#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) 574#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
575 575
576/* POWER9 registers */
577#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
578#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
579
576/* Transactional Memory checkpointed state: 580/* Transactional Memory checkpointed state:
577 * This is all GPRs, all VSX regs and a subset of SPRs 581 * This is all GPRs, all VSX regs and a subset of SPRs
578 */ 582 */
@@ -596,6 +600,7 @@ struct kvm_get_htab_header {
596#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) 600#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
597#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) 601#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
598#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) 602#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
603#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
599 604
600/* PPC64 eXternal Interrupt Controller Specification */ 605/* PPC64 eXternal Interrupt Controller Specification */
601#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ 606#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index caec7bf3b99a..195a9fc8f81c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -487,6 +487,7 @@ int main(void)
487 487
488 /* book3s */ 488 /* book3s */
489#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 489#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
490 DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets));
490 DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); 491 DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
491 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); 492 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
492 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); 493 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
@@ -548,6 +549,8 @@ int main(void)
548 DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); 549 DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
549 DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); 550 DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
550 DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); 551 DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
552 DEFINE(VCPU_TID, offsetof(struct kvm_vcpu, arch.tid));
553 DEFINE(VCPU_PSSCR, offsetof(struct kvm_vcpu, arch.psscr));
551 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); 554 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
552 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); 555 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
553 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); 556 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
@@ -569,6 +572,7 @@ int main(void)
569 DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); 572 DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
570 DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); 573 DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
571 DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); 574 DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
575 DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm));
572 DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); 576 DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
573 DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); 577 DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
574 DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); 578 DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 52ff3f025437..bdfc1c67eb38 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -174,7 +174,7 @@ __init_FSCR:
174__init_HFSCR: 174__init_HFSCR:
175 mfspr r3,SPRN_HFSCR 175 mfspr r3,SPRN_HFSCR
176 ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ 176 ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
177 HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB 177 HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP
178 mtspr SPRN_HFSCR,r3 178 mtspr SPRN_HFSCR,r3
179 blr 179 blr
180 180
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 05f09ae82587..b795dd1ac2ef 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
88 /* 128 (2**7) bytes in each HPTEG */ 88 /* 128 (2**7) bytes in each HPTEG */
89 kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; 89 kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
90 90
91 atomic64_set(&kvm->arch.mmio_update, 0);
92
91 /* Allocate reverse map array */ 93 /* Allocate reverse map array */
92 rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); 94 rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
93 if (!rev) { 95 if (!rev) {
@@ -255,7 +257,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
255 kvmppc_set_msr(vcpu, msr); 257 kvmppc_set_msr(vcpu, msr);
256} 258}
257 259
258long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, 260static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
259 long pte_index, unsigned long pteh, 261 long pte_index, unsigned long pteh,
260 unsigned long ptel, unsigned long *pte_idx_ret) 262 unsigned long ptel, unsigned long *pte_idx_ret)
261{ 263{
@@ -312,7 +314,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
312 struct kvmppc_slb *slbe; 314 struct kvmppc_slb *slbe;
313 unsigned long slb_v; 315 unsigned long slb_v;
314 unsigned long pp, key; 316 unsigned long pp, key;
315 unsigned long v, gr; 317 unsigned long v, orig_v, gr;
316 __be64 *hptep; 318 __be64 *hptep;
317 int index; 319 int index;
318 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); 320 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
@@ -337,10 +339,12 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
337 return -ENOENT; 339 return -ENOENT;
338 } 340 }
339 hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); 341 hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
340 v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; 342 v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
343 if (cpu_has_feature(CPU_FTR_ARCH_300))
344 v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
341 gr = kvm->arch.revmap[index].guest_rpte; 345 gr = kvm->arch.revmap[index].guest_rpte;
342 346
343 unlock_hpte(hptep, v); 347 unlock_hpte(hptep, orig_v);
344 preempt_enable(); 348 preempt_enable();
345 349
346 gpte->eaddr = eaddr; 350 gpte->eaddr = eaddr;
@@ -438,6 +442,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
438{ 442{
439 struct kvm *kvm = vcpu->kvm; 443 struct kvm *kvm = vcpu->kvm;
440 unsigned long hpte[3], r; 444 unsigned long hpte[3], r;
445 unsigned long hnow_v, hnow_r;
441 __be64 *hptep; 446 __be64 *hptep;
442 unsigned long mmu_seq, psize, pte_size; 447 unsigned long mmu_seq, psize, pte_size;
443 unsigned long gpa_base, gfn_base; 448 unsigned long gpa_base, gfn_base;
@@ -451,6 +456,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
451 unsigned int writing, write_ok; 456 unsigned int writing, write_ok;
452 struct vm_area_struct *vma; 457 struct vm_area_struct *vma;
453 unsigned long rcbits; 458 unsigned long rcbits;
459 long mmio_update;
454 460
455 /* 461 /*
456 * Real-mode code has already searched the HPT and found the 462 * Real-mode code has already searched the HPT and found the
@@ -460,6 +466,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
460 */ 466 */
461 if (ea != vcpu->arch.pgfault_addr) 467 if (ea != vcpu->arch.pgfault_addr)
462 return RESUME_GUEST; 468 return RESUME_GUEST;
469
470 if (vcpu->arch.pgfault_cache) {
471 mmio_update = atomic64_read(&kvm->arch.mmio_update);
472 if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
473 r = vcpu->arch.pgfault_cache->rpte;
474 psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r);
475 gpa_base = r & HPTE_R_RPN & ~(psize - 1);
476 gfn_base = gpa_base >> PAGE_SHIFT;
477 gpa = gpa_base | (ea & (psize - 1));
478 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
479 dsisr & DSISR_ISSTORE);
480 }
481 }
463 index = vcpu->arch.pgfault_index; 482 index = vcpu->arch.pgfault_index;
464 hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); 483 hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
465 rev = &kvm->arch.revmap[index]; 484 rev = &kvm->arch.revmap[index];
@@ -472,6 +491,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
472 unlock_hpte(hptep, hpte[0]); 491 unlock_hpte(hptep, hpte[0]);
473 preempt_enable(); 492 preempt_enable();
474 493
494 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
495 hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]);
496 hpte[1] = hpte_new_to_old_r(hpte[1]);
497 }
475 if (hpte[0] != vcpu->arch.pgfault_hpte[0] || 498 if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
476 hpte[1] != vcpu->arch.pgfault_hpte[1]) 499 hpte[1] != vcpu->arch.pgfault_hpte[1])
477 return RESUME_GUEST; 500 return RESUME_GUEST;
@@ -575,16 +598,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
575 */ 598 */
576 if (psize < PAGE_SIZE) 599 if (psize < PAGE_SIZE)
577 psize = PAGE_SIZE; 600 psize = PAGE_SIZE;
578 r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1)); 601 r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
602 ((pfn << PAGE_SHIFT) & ~(psize - 1));
579 if (hpte_is_writable(r) && !write_ok) 603 if (hpte_is_writable(r) && !write_ok)
580 r = hpte_make_readonly(r); 604 r = hpte_make_readonly(r);
581 ret = RESUME_GUEST; 605 ret = RESUME_GUEST;
582 preempt_disable(); 606 preempt_disable();
583 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) 607 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
584 cpu_relax(); 608 cpu_relax();
585 if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] || 609 hnow_v = be64_to_cpu(hptep[0]);
586 be64_to_cpu(hptep[1]) != hpte[1] || 610 hnow_r = be64_to_cpu(hptep[1]);
587 rev->guest_rpte != hpte[2]) 611 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
612 hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
613 hnow_r = hpte_new_to_old_r(hnow_r);
614 }
615 if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
616 rev->guest_rpte != hpte[2])
588 /* HPTE has been changed under us; let the guest retry */ 617 /* HPTE has been changed under us; let the guest retry */
589 goto out_unlock; 618 goto out_unlock;
590 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 619 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
@@ -615,6 +644,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
615 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); 644 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
616 } 645 }
617 646
647 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
648 r = hpte_old_to_new_r(hpte[0], r);
649 hpte[0] = hpte_old_to_new_v(hpte[0]);
650 }
618 hptep[1] = cpu_to_be64(r); 651 hptep[1] = cpu_to_be64(r);
619 eieio(); 652 eieio();
620 __unlock_hpte(hptep, hpte[0]); 653 __unlock_hpte(hptep, hpte[0]);
@@ -758,6 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
758 hpte_rpn(ptel, psize) == gfn) { 791 hpte_rpn(ptel, psize) == gfn) {
759 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); 792 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
760 kvmppc_invalidate_hpte(kvm, hptep, i); 793 kvmppc_invalidate_hpte(kvm, hptep, i);
794 hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
761 /* Harvest R and C */ 795 /* Harvest R and C */
762 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); 796 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
763 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 797 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
@@ -1165,7 +1199,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
1165 unsigned long *hpte, struct revmap_entry *revp, 1199 unsigned long *hpte, struct revmap_entry *revp,
1166 int want_valid, int first_pass) 1200 int want_valid, int first_pass)
1167{ 1201{
1168 unsigned long v, r; 1202 unsigned long v, r, hr;
1169 unsigned long rcbits_unset; 1203 unsigned long rcbits_unset;
1170 int ok = 1; 1204 int ok = 1;
1171 int valid, dirty; 1205 int valid, dirty;
@@ -1192,6 +1226,11 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
1192 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) 1226 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
1193 cpu_relax(); 1227 cpu_relax();
1194 v = be64_to_cpu(hptp[0]); 1228 v = be64_to_cpu(hptp[0]);
1229 hr = be64_to_cpu(hptp[1]);
1230 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1231 v = hpte_new_to_old_v(v, hr);
1232 hr = hpte_new_to_old_r(hr);
1233 }
1195 1234
1196 /* re-evaluate valid and dirty from synchronized HPTE value */ 1235 /* re-evaluate valid and dirty from synchronized HPTE value */
1197 valid = !!(v & HPTE_V_VALID); 1236 valid = !!(v & HPTE_V_VALID);
@@ -1199,8 +1238,8 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
1199 1238
1200 /* Harvest R and C into guest view if necessary */ 1239 /* Harvest R and C into guest view if necessary */
1201 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); 1240 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
1202 if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) { 1241 if (valid && (rcbits_unset & hr)) {
1203 revp->guest_rpte |= (be64_to_cpu(hptp[1]) & 1242 revp->guest_rpte |= (hr &
1204 (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED; 1243 (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
1205 dirty = 1; 1244 dirty = 1;
1206 } 1245 }
@@ -1608,7 +1647,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
1608 return ret; 1647 return ret;
1609} 1648}
1610 1649
1611ssize_t debugfs_htab_write(struct file *file, const char __user *buf, 1650static ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
1612 size_t len, loff_t *ppos) 1651 size_t len, loff_t *ppos)
1613{ 1652{
1614 return -EACCES; 1653 return -EACCES;
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index d461c440889a..66e98276d93e 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -39,7 +39,7 @@
39#include <asm/udbg.h> 39#include <asm/udbg.h>
40#include <asm/iommu.h> 40#include <asm/iommu.h>
41#include <asm/tce.h> 41#include <asm/tce.h>
42#include <asm/iommu.h> 42#include <asm/asm-prototypes.h>
43 43
44#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) 44#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
45 45
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3686471be32b..be8f83c999f3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -54,6 +54,9 @@
54#include <asm/dbell.h> 54#include <asm/dbell.h>
55#include <asm/hmi.h> 55#include <asm/hmi.h>
56#include <asm/pnv-pci.h> 56#include <asm/pnv-pci.h>
57#include <asm/mmu.h>
58#include <asm/opal.h>
59#include <asm/xics.h>
57#include <linux/gfp.h> 60#include <linux/gfp.h>
58#include <linux/vmalloc.h> 61#include <linux/vmalloc.h>
59#include <linux/highmem.h> 62#include <linux/highmem.h>
@@ -62,6 +65,7 @@
62#include <linux/irqbypass.h> 65#include <linux/irqbypass.h>
63#include <linux/module.h> 66#include <linux/module.h>
64#include <linux/compiler.h> 67#include <linux/compiler.h>
68#include <linux/of.h>
65 69
66#include "book3s.h" 70#include "book3s.h"
67 71
@@ -104,23 +108,6 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
104MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); 108MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
105#endif 109#endif
106 110
107/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
108static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
109module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
110MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
111
112/* Factor by which the vcore halt poll interval is grown, default is to double
113 */
114static unsigned int halt_poll_ns_grow = 2;
115module_param(halt_poll_ns_grow, int, S_IRUGO);
116MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
117
118/* Factor by which the vcore halt poll interval is shrunk, default is to reset
119 */
120static unsigned int halt_poll_ns_shrink;
121module_param(halt_poll_ns_shrink, int, S_IRUGO);
122MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
123
124static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 111static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
125static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 112static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
126 113
@@ -146,12 +133,21 @@ static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
146 133
147static bool kvmppc_ipi_thread(int cpu) 134static bool kvmppc_ipi_thread(int cpu)
148{ 135{
136 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
137
138 /* On POWER9 we can use msgsnd to IPI any cpu */
139 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
140 msg |= get_hard_smp_processor_id(cpu);
141 smp_mb();
142 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
143 return true;
144 }
145
149 /* On POWER8 for IPIs to threads in the same core, use msgsnd */ 146 /* On POWER8 for IPIs to threads in the same core, use msgsnd */
150 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 147 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
151 preempt_disable(); 148 preempt_disable();
152 if (cpu_first_thread_sibling(cpu) == 149 if (cpu_first_thread_sibling(cpu) ==
153 cpu_first_thread_sibling(smp_processor_id())) { 150 cpu_first_thread_sibling(smp_processor_id())) {
154 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
155 msg |= cpu_thread_in_core(cpu); 151 msg |= cpu_thread_in_core(cpu);
156 smp_mb(); 152 smp_mb();
157 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); 153 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
@@ -162,8 +158,12 @@ static bool kvmppc_ipi_thread(int cpu)
162 } 158 }
163 159
164#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 160#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
165 if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) { 161 if (cpu >= 0 && cpu < nr_cpu_ids) {
166 xics_wake_cpu(cpu); 162 if (paca[cpu].kvm_hstate.xics_phys) {
163 xics_wake_cpu(cpu);
164 return true;
165 }
166 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
167 return true; 167 return true;
168 } 168 }
169#endif 169#endif
@@ -299,41 +299,54 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
299 vcpu->arch.pvr = pvr; 299 vcpu->arch.pvr = pvr;
300} 300}
301 301
302/* Dummy value used in computing PCR value below */
303#define PCR_ARCH_300 (PCR_ARCH_207 << 1)
304
302static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) 305static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
303{ 306{
304 unsigned long pcr = 0; 307 unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
305 struct kvmppc_vcore *vc = vcpu->arch.vcore; 308 struct kvmppc_vcore *vc = vcpu->arch.vcore;
306 309
310 /* We can (emulate) our own architecture version and anything older */
311 if (cpu_has_feature(CPU_FTR_ARCH_300))
312 host_pcr_bit = PCR_ARCH_300;
313 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
314 host_pcr_bit = PCR_ARCH_207;
315 else if (cpu_has_feature(CPU_FTR_ARCH_206))
316 host_pcr_bit = PCR_ARCH_206;
317 else
318 host_pcr_bit = PCR_ARCH_205;
319
320 /* Determine lowest PCR bit needed to run guest in given PVR level */
321 guest_pcr_bit = host_pcr_bit;
307 if (arch_compat) { 322 if (arch_compat) {
308 switch (arch_compat) { 323 switch (arch_compat) {
309 case PVR_ARCH_205: 324 case PVR_ARCH_205:
310 /* 325 guest_pcr_bit = PCR_ARCH_205;
311 * If an arch bit is set in PCR, all the defined
312 * higher-order arch bits also have to be set.
313 */
314 pcr = PCR_ARCH_206 | PCR_ARCH_205;
315 break; 326 break;
316 case PVR_ARCH_206: 327 case PVR_ARCH_206:
317 case PVR_ARCH_206p: 328 case PVR_ARCH_206p:
318 pcr = PCR_ARCH_206; 329 guest_pcr_bit = PCR_ARCH_206;
319 break; 330 break;
320 case PVR_ARCH_207: 331 case PVR_ARCH_207:
332 guest_pcr_bit = PCR_ARCH_207;
333 break;
334 case PVR_ARCH_300:
335 guest_pcr_bit = PCR_ARCH_300;
321 break; 336 break;
322 default: 337 default:
323 return -EINVAL; 338 return -EINVAL;
324 } 339 }
325
326 if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
327 /* POWER7 can't emulate POWER8 */
328 if (!(pcr & PCR_ARCH_206))
329 return -EINVAL;
330 pcr &= ~PCR_ARCH_206;
331 }
332 } 340 }
333 341
342 /* Check requested PCR bits don't exceed our capabilities */
343 if (guest_pcr_bit > host_pcr_bit)
344 return -EINVAL;
345
334 spin_lock(&vc->lock); 346 spin_lock(&vc->lock);
335 vc->arch_compat = arch_compat; 347 vc->arch_compat = arch_compat;
336 vc->pcr = pcr; 348 /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */
349 vc->pcr = host_pcr_bit - guest_pcr_bit;
337 spin_unlock(&vc->lock); 350 spin_unlock(&vc->lock);
338 351
339 return 0; 352 return 0;
@@ -945,6 +958,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
945 break; 958 break;
946 case BOOK3S_INTERRUPT_EXTERNAL: 959 case BOOK3S_INTERRUPT_EXTERNAL:
947 case BOOK3S_INTERRUPT_H_DOORBELL: 960 case BOOK3S_INTERRUPT_H_DOORBELL:
961 case BOOK3S_INTERRUPT_H_VIRT:
948 vcpu->stat.ext_intr_exits++; 962 vcpu->stat.ext_intr_exits++;
949 r = RESUME_GUEST; 963 r = RESUME_GUEST;
950 break; 964 break;
@@ -1229,6 +1243,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1229 case KVM_REG_PPC_WORT: 1243 case KVM_REG_PPC_WORT:
1230 *val = get_reg_val(id, vcpu->arch.wort); 1244 *val = get_reg_val(id, vcpu->arch.wort);
1231 break; 1245 break;
1246 case KVM_REG_PPC_TIDR:
1247 *val = get_reg_val(id, vcpu->arch.tid);
1248 break;
1249 case KVM_REG_PPC_PSSCR:
1250 *val = get_reg_val(id, vcpu->arch.psscr);
1251 break;
1232 case KVM_REG_PPC_VPA_ADDR: 1252 case KVM_REG_PPC_VPA_ADDR:
1233 spin_lock(&vcpu->arch.vpa_update_lock); 1253 spin_lock(&vcpu->arch.vpa_update_lock);
1234 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); 1254 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
@@ -1288,6 +1308,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1288 case KVM_REG_PPC_TM_CR: 1308 case KVM_REG_PPC_TM_CR:
1289 *val = get_reg_val(id, vcpu->arch.cr_tm); 1309 *val = get_reg_val(id, vcpu->arch.cr_tm);
1290 break; 1310 break;
1311 case KVM_REG_PPC_TM_XER:
1312 *val = get_reg_val(id, vcpu->arch.xer_tm);
1313 break;
1291 case KVM_REG_PPC_TM_LR: 1314 case KVM_REG_PPC_TM_LR:
1292 *val = get_reg_val(id, vcpu->arch.lr_tm); 1315 *val = get_reg_val(id, vcpu->arch.lr_tm);
1293 break; 1316 break;
@@ -1427,6 +1450,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1427 case KVM_REG_PPC_WORT: 1450 case KVM_REG_PPC_WORT:
1428 vcpu->arch.wort = set_reg_val(id, *val); 1451 vcpu->arch.wort = set_reg_val(id, *val);
1429 break; 1452 break;
1453 case KVM_REG_PPC_TIDR:
1454 vcpu->arch.tid = set_reg_val(id, *val);
1455 break;
1456 case KVM_REG_PPC_PSSCR:
1457 vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
1458 break;
1430 case KVM_REG_PPC_VPA_ADDR: 1459 case KVM_REG_PPC_VPA_ADDR:
1431 addr = set_reg_val(id, *val); 1460 addr = set_reg_val(id, *val);
1432 r = -EINVAL; 1461 r = -EINVAL;
@@ -1498,6 +1527,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1498 case KVM_REG_PPC_TM_CR: 1527 case KVM_REG_PPC_TM_CR:
1499 vcpu->arch.cr_tm = set_reg_val(id, *val); 1528 vcpu->arch.cr_tm = set_reg_val(id, *val);
1500 break; 1529 break;
1530 case KVM_REG_PPC_TM_XER:
1531 vcpu->arch.xer_tm = set_reg_val(id, *val);
1532 break;
1501 case KVM_REG_PPC_TM_LR: 1533 case KVM_REG_PPC_TM_LR:
1502 vcpu->arch.lr_tm = set_reg_val(id, *val); 1534 vcpu->arch.lr_tm = set_reg_val(id, *val);
1503 break; 1535 break;
@@ -1540,6 +1572,20 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1540 return r; 1572 return r;
1541} 1573}
1542 1574
1575/*
1576 * On POWER9, threads are independent and can be in different partitions.
1577 * Therefore we consider each thread to be a subcore.
1578 * There is a restriction that all threads have to be in the same
1579 * MMU mode (radix or HPT), unfortunately, but since we only support
1580 * HPT guests on a HPT host so far, that isn't an impediment yet.
1581 */
1582static int threads_per_vcore(void)
1583{
1584 if (cpu_has_feature(CPU_FTR_ARCH_300))
1585 return 1;
1586 return threads_per_subcore;
1587}
1588
1543static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) 1589static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1544{ 1590{
1545 struct kvmppc_vcore *vcore; 1591 struct kvmppc_vcore *vcore;
@@ -1554,7 +1600,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1554 init_swait_queue_head(&vcore->wq); 1600 init_swait_queue_head(&vcore->wq);
1555 vcore->preempt_tb = TB_NIL; 1601 vcore->preempt_tb = TB_NIL;
1556 vcore->lpcr = kvm->arch.lpcr; 1602 vcore->lpcr = kvm->arch.lpcr;
1557 vcore->first_vcpuid = core * threads_per_subcore; 1603 vcore->first_vcpuid = core * threads_per_vcore();
1558 vcore->kvm = kvm; 1604 vcore->kvm = kvm;
1559 INIT_LIST_HEAD(&vcore->preempt_list); 1605 INIT_LIST_HEAD(&vcore->preempt_list);
1560 1606
@@ -1717,7 +1763,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
1717 int core; 1763 int core;
1718 struct kvmppc_vcore *vcore; 1764 struct kvmppc_vcore *vcore;
1719 1765
1720 core = id / threads_per_subcore; 1766 core = id / threads_per_vcore();
1721 if (core >= KVM_MAX_VCORES) 1767 if (core >= KVM_MAX_VCORES)
1722 goto out; 1768 goto out;
1723 1769
@@ -1935,7 +1981,10 @@ static void kvmppc_wait_for_nap(void)
1935{ 1981{
1936 int cpu = smp_processor_id(); 1982 int cpu = smp_processor_id();
1937 int i, loops; 1983 int i, loops;
1984 int n_threads = threads_per_vcore();
1938 1985
1986 if (n_threads <= 1)
1987 return;
1939 for (loops = 0; loops < 1000000; ++loops) { 1988 for (loops = 0; loops < 1000000; ++loops) {
1940 /* 1989 /*
1941 * Check if all threads are finished. 1990 * Check if all threads are finished.
@@ -1943,17 +1992,17 @@ static void kvmppc_wait_for_nap(void)
1943 * and the thread clears it when finished, so we look 1992 * and the thread clears it when finished, so we look
1944 * for any threads that still have a non-NULL vcore ptr. 1993 * for any threads that still have a non-NULL vcore ptr.
1945 */ 1994 */
1946 for (i = 1; i < threads_per_subcore; ++i) 1995 for (i = 1; i < n_threads; ++i)
1947 if (paca[cpu + i].kvm_hstate.kvm_vcore) 1996 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1948 break; 1997 break;
1949 if (i == threads_per_subcore) { 1998 if (i == n_threads) {
1950 HMT_medium(); 1999 HMT_medium();
1951 return; 2000 return;
1952 } 2001 }
1953 HMT_low(); 2002 HMT_low();
1954 } 2003 }
1955 HMT_medium(); 2004 HMT_medium();
1956 for (i = 1; i < threads_per_subcore; ++i) 2005 for (i = 1; i < n_threads; ++i)
1957 if (paca[cpu + i].kvm_hstate.kvm_vcore) 2006 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1958 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); 2007 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
1959} 2008}
@@ -2019,7 +2068,7 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
2019 2068
2020 vc->vcore_state = VCORE_PREEMPT; 2069 vc->vcore_state = VCORE_PREEMPT;
2021 vc->pcpu = smp_processor_id(); 2070 vc->pcpu = smp_processor_id();
2022 if (vc->num_threads < threads_per_subcore) { 2071 if (vc->num_threads < threads_per_vcore()) {
2023 spin_lock(&lp->lock); 2072 spin_lock(&lp->lock);
2024 list_add_tail(&vc->preempt_list, &lp->list); 2073 list_add_tail(&vc->preempt_list, &lp->list);
2025 spin_unlock(&lp->lock); 2074 spin_unlock(&lp->lock);
@@ -2123,8 +2172,7 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2123 cip->subcore_threads[sub] = vc->num_threads; 2172 cip->subcore_threads[sub] = vc->num_threads;
2124 cip->subcore_vm[sub] = vc->kvm; 2173 cip->subcore_vm[sub] = vc->kvm;
2125 init_master_vcore(vc); 2174 init_master_vcore(vc);
2126 list_del(&vc->preempt_list); 2175 list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
2127 list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
2128 2176
2129 return true; 2177 return true;
2130} 2178}
@@ -2307,6 +2355,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2307 unsigned long cmd_bit, stat_bit; 2355 unsigned long cmd_bit, stat_bit;
2308 int pcpu, thr; 2356 int pcpu, thr;
2309 int target_threads; 2357 int target_threads;
2358 int controlled_threads;
2310 2359
2311 /* 2360 /*
2312 * Remove from the list any threads that have a signal pending 2361 * Remove from the list any threads that have a signal pending
@@ -2325,11 +2374,18 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2325 vc->preempt_tb = TB_NIL; 2374 vc->preempt_tb = TB_NIL;
2326 2375
2327 /* 2376 /*
2377 * Number of threads that we will be controlling: the same as
2378 * the number of threads per subcore, except on POWER9,
2379 * where it's 1 because the threads are (mostly) independent.
2380 */
2381 controlled_threads = threads_per_vcore();
2382
2383 /*
2328 * Make sure we are running on primary threads, and that secondary 2384 * Make sure we are running on primary threads, and that secondary
2329 * threads are offline. Also check if the number of threads in this 2385 * threads are offline. Also check if the number of threads in this
2330 * guest are greater than the current system threads per guest. 2386 * guest are greater than the current system threads per guest.
2331 */ 2387 */
2332 if ((threads_per_core > 1) && 2388 if ((controlled_threads > 1) &&
2333 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { 2389 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
2334 for_each_runnable_thread(i, vcpu, vc) { 2390 for_each_runnable_thread(i, vcpu, vc) {
2335 vcpu->arch.ret = -EBUSY; 2391 vcpu->arch.ret = -EBUSY;
@@ -2345,7 +2401,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2345 */ 2401 */
2346 init_core_info(&core_info, vc); 2402 init_core_info(&core_info, vc);
2347 pcpu = smp_processor_id(); 2403 pcpu = smp_processor_id();
2348 target_threads = threads_per_subcore; 2404 target_threads = controlled_threads;
2349 if (target_smt_mode && target_smt_mode < target_threads) 2405 if (target_smt_mode && target_smt_mode < target_threads)
2350 target_threads = target_smt_mode; 2406 target_threads = target_smt_mode;
2351 if (vc->num_threads < target_threads) 2407 if (vc->num_threads < target_threads)
@@ -2381,7 +2437,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2381 smp_wmb(); 2437 smp_wmb();
2382 } 2438 }
2383 pcpu = smp_processor_id(); 2439 pcpu = smp_processor_id();
2384 for (thr = 0; thr < threads_per_subcore; ++thr) 2440 for (thr = 0; thr < controlled_threads; ++thr)
2385 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; 2441 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
2386 2442
2387 /* Initiate micro-threading (split-core) if required */ 2443 /* Initiate micro-threading (split-core) if required */
@@ -2491,7 +2547,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2491 } 2547 }
2492 2548
2493 /* Let secondaries go back to the offline loop */ 2549 /* Let secondaries go back to the offline loop */
2494 for (i = 0; i < threads_per_subcore; ++i) { 2550 for (i = 0; i < controlled_threads; ++i) {
2495 kvmppc_release_hwthread(pcpu + i); 2551 kvmppc_release_hwthread(pcpu + i);
2496 if (sip && sip->napped[i]) 2552 if (sip && sip->napped[i])
2497 kvmppc_ipi_thread(pcpu + i); 2553 kvmppc_ipi_thread(pcpu + i);
@@ -2543,9 +2599,6 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
2543 vc->halt_poll_ns = 10000; 2599 vc->halt_poll_ns = 10000;
2544 else 2600 else
2545 vc->halt_poll_ns *= halt_poll_ns_grow; 2601 vc->halt_poll_ns *= halt_poll_ns_grow;
2546
2547 if (vc->halt_poll_ns > halt_poll_max_ns)
2548 vc->halt_poll_ns = halt_poll_max_ns;
2549} 2602}
2550 2603
2551static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) 2604static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
@@ -2556,7 +2609,8 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
2556 vc->halt_poll_ns /= halt_poll_ns_shrink; 2609 vc->halt_poll_ns /= halt_poll_ns_shrink;
2557} 2610}
2558 2611
2559/* Check to see if any of the runnable vcpus on the vcore have pending 2612/*
2613 * Check to see if any of the runnable vcpus on the vcore have pending
2560 * exceptions or are no longer ceded 2614 * exceptions or are no longer ceded
2561 */ 2615 */
2562static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) 2616static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
@@ -2655,16 +2709,18 @@ out:
2655 } 2709 }
2656 2710
2657 /* Adjust poll time */ 2711 /* Adjust poll time */
2658 if (halt_poll_max_ns) { 2712 if (halt_poll_ns) {
2659 if (block_ns <= vc->halt_poll_ns) 2713 if (block_ns <= vc->halt_poll_ns)
2660 ; 2714 ;
2661 /* We slept and blocked for longer than the max halt time */ 2715 /* We slept and blocked for longer than the max halt time */
2662 else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns) 2716 else if (vc->halt_poll_ns && block_ns > halt_poll_ns)
2663 shrink_halt_poll_ns(vc); 2717 shrink_halt_poll_ns(vc);
2664 /* We slept and our poll time is too small */ 2718 /* We slept and our poll time is too small */
2665 else if (vc->halt_poll_ns < halt_poll_max_ns && 2719 else if (vc->halt_poll_ns < halt_poll_ns &&
2666 block_ns < halt_poll_max_ns) 2720 block_ns < halt_poll_ns)
2667 grow_halt_poll_ns(vc); 2721 grow_halt_poll_ns(vc);
2722 if (vc->halt_poll_ns > halt_poll_ns)
2723 vc->halt_poll_ns = halt_poll_ns;
2668 } else 2724 } else
2669 vc->halt_poll_ns = 0; 2725 vc->halt_poll_ns = 0;
2670 2726
@@ -2971,6 +3027,15 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
2971 struct kvm_memslots *slots; 3027 struct kvm_memslots *slots;
2972 struct kvm_memory_slot *memslot; 3028 struct kvm_memory_slot *memslot;
2973 3029
3030 /*
3031 * If we are making a new memslot, it might make
3032 * some address that was previously cached as emulated
3033 * MMIO be no longer emulated MMIO, so invalidate
3034 * all the caches of emulated MMIO translations.
3035 */
3036 if (npages)
3037 atomic64_inc(&kvm->arch.mmio_update);
3038
2974 if (npages && old->npages) { 3039 if (npages && old->npages) {
2975 /* 3040 /*
2976 * If modifying a memslot, reset all the rmap dirty bits. 3041 * If modifying a memslot, reset all the rmap dirty bits.
@@ -3015,6 +3080,22 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
3015 return; 3080 return;
3016} 3081}
3017 3082
3083static void kvmppc_setup_partition_table(struct kvm *kvm)
3084{
3085 unsigned long dw0, dw1;
3086
3087 /* PS field - page size for VRMA */
3088 dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
3089 ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
3090 /* HTABSIZE and HTABORG fields */
3091 dw0 |= kvm->arch.sdr1;
3092
3093 /* Second dword has GR=0; other fields are unused since UPRT=0 */
3094 dw1 = 0;
3095
3096 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
3097}
3098
3018static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 3099static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3019{ 3100{
3020 int err = 0; 3101 int err = 0;
@@ -3066,17 +3147,20 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3066 psize == 0x1000000)) 3147 psize == 0x1000000))
3067 goto out_srcu; 3148 goto out_srcu;
3068 3149
3069 /* Update VRMASD field in the LPCR */
3070 senc = slb_pgsize_encoding(psize); 3150 senc = slb_pgsize_encoding(psize);
3071 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 3151 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
3072 (VRMA_VSID << SLB_VSID_SHIFT_1T); 3152 (VRMA_VSID << SLB_VSID_SHIFT_1T);
3073 /* the -4 is to account for senc values starting at 0x10 */
3074 lpcr = senc << (LPCR_VRMASD_SH - 4);
3075
3076 /* Create HPTEs in the hash page table for the VRMA */ 3153 /* Create HPTEs in the hash page table for the VRMA */
3077 kvmppc_map_vrma(vcpu, memslot, porder); 3154 kvmppc_map_vrma(vcpu, memslot, porder);
3078 3155
3079 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 3156 /* Update VRMASD field in the LPCR */
3157 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
3158 /* the -4 is to account for senc values starting at 0x10 */
3159 lpcr = senc << (LPCR_VRMASD_SH - 4);
3160 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
3161 } else {
3162 kvmppc_setup_partition_table(kvm);
3163 }
3080 3164
3081 /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */ 3165 /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */
3082 smp_wmb(); 3166 smp_wmb();
@@ -3219,14 +3303,18 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3219 * Since we don't flush the TLB when tearing down a VM, 3303 * Since we don't flush the TLB when tearing down a VM,
3220 * and this lpid might have previously been used, 3304 * and this lpid might have previously been used,
3221 * make sure we flush on each core before running the new VM. 3305 * make sure we flush on each core before running the new VM.
3306 * On POWER9, the tlbie in mmu_partition_table_set_entry()
3307 * does this flush for us.
3222 */ 3308 */
3223 cpumask_setall(&kvm->arch.need_tlb_flush); 3309 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3310 cpumask_setall(&kvm->arch.need_tlb_flush);
3224 3311
3225 /* Start out with the default set of hcalls enabled */ 3312 /* Start out with the default set of hcalls enabled */
3226 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, 3313 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
3227 sizeof(kvm->arch.enabled_hcalls)); 3314 sizeof(kvm->arch.enabled_hcalls));
3228 3315
3229 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 3316 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3317 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
3230 3318
3231 /* Init LPCR for virtual RMA mode */ 3319 /* Init LPCR for virtual RMA mode */
3232 kvm->arch.host_lpid = mfspr(SPRN_LPID); 3320 kvm->arch.host_lpid = mfspr(SPRN_LPID);
@@ -3239,9 +3327,29 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3239 /* On POWER8 turn on online bit to enable PURR/SPURR */ 3327 /* On POWER8 turn on online bit to enable PURR/SPURR */
3240 if (cpu_has_feature(CPU_FTR_ARCH_207S)) 3328 if (cpu_has_feature(CPU_FTR_ARCH_207S))
3241 lpcr |= LPCR_ONL; 3329 lpcr |= LPCR_ONL;
3330 /*
3331 * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
3332 * Set HVICE bit to enable hypervisor virtualization interrupts.
3333 */
3334 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
3335 lpcr &= ~LPCR_VPM0;
3336 lpcr |= LPCR_HVICE;
3337 }
3338
3242 kvm->arch.lpcr = lpcr; 3339 kvm->arch.lpcr = lpcr;
3243 3340
3244 /* 3341 /*
3342 * Work out how many sets the TLB has, for the use of
3343 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
3344 */
3345 if (cpu_has_feature(CPU_FTR_ARCH_300))
3346 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */
3347 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
3348 kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */
3349 else
3350 kvm->arch.tlb_sets = POWER7_TLB_SETS; /* 128 */
3351
3352 /*
3245 * Track that we now have a HV mode VM active. This blocks secondary 3353 * Track that we now have a HV mode VM active. This blocks secondary
3246 * CPU threads from coming online. 3354 * CPU threads from coming online.
3247 */ 3355 */
@@ -3305,9 +3413,9 @@ static int kvmppc_core_check_processor_compat_hv(void)
3305 !cpu_has_feature(CPU_FTR_ARCH_206)) 3413 !cpu_has_feature(CPU_FTR_ARCH_206))
3306 return -EIO; 3414 return -EIO;
3307 /* 3415 /*
3308 * Disable KVM for Power9, untill the required bits merged. 3416 * Disable KVM for Power9 in radix mode.
3309 */ 3417 */
3310 if (cpu_has_feature(CPU_FTR_ARCH_300)) 3418 if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
3311 return -EIO; 3419 return -EIO;
3312 3420
3313 return 0; 3421 return 0;
@@ -3661,6 +3769,23 @@ static int kvmppc_book3s_init_hv(void)
3661 if (r) 3769 if (r)
3662 return r; 3770 return r;
3663 3771
3772 /*
3773 * We need a way of accessing the XICS interrupt controller,
3774 * either directly, via paca[cpu].kvm_hstate.xics_phys, or
3775 * indirectly, via OPAL.
3776 */
3777#ifdef CONFIG_SMP
3778 if (!get_paca()->kvm_hstate.xics_phys) {
3779 struct device_node *np;
3780
3781 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
3782 if (!np) {
3783 pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
3784 return -ENODEV;
3785 }
3786 }
3787#endif
3788
3664 kvm_ops_hv.owner = THIS_MODULE; 3789 kvm_ops_hv.owner = THIS_MODULE;
3665 kvmppc_hv_ops = &kvm_ops_hv; 3790 kvmppc_hv_ops = &kvm_ops_hv;
3666 3791
@@ -3683,3 +3808,4 @@ module_exit(kvmppc_book3s_exit_hv);
3683MODULE_LICENSE("GPL"); 3808MODULE_LICENSE("GPL");
3684MODULE_ALIAS_MISCDEV(KVM_MINOR); 3809MODULE_ALIAS_MISCDEV(KVM_MINOR);
3685MODULE_ALIAS("devname:kvm"); 3810MODULE_ALIAS("devname:kvm");
3811
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 0c84d6bc8356..11561f0ef83a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -26,6 +26,9 @@
26#include <asm/dbell.h> 26#include <asm/dbell.h>
27#include <asm/cputhreads.h> 27#include <asm/cputhreads.h>
28#include <asm/io.h> 28#include <asm/io.h>
29#include <asm/asm-prototypes.h>
30#include <asm/opal.h>
31#include <asm/smp.h>
29 32
30#define KVM_CMA_CHUNK_ORDER 18 33#define KVM_CMA_CHUNK_ORDER 18
31 34
@@ -205,12 +208,18 @@ static inline void rm_writeb(unsigned long paddr, u8 val)
205void kvmhv_rm_send_ipi(int cpu) 208void kvmhv_rm_send_ipi(int cpu)
206{ 209{
207 unsigned long xics_phys; 210 unsigned long xics_phys;
211 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
208 212
209 /* On POWER8 for IPIs to threads in the same core, use msgsnd */ 213 /* On POWER9 we can use msgsnd for any destination cpu. */
214 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
215 msg |= get_hard_smp_processor_id(cpu);
216 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
217 return;
218 }
219 /* On POWER8 for IPIs to threads in the same core, use msgsnd. */
210 if (cpu_has_feature(CPU_FTR_ARCH_207S) && 220 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
211 cpu_first_thread_sibling(cpu) == 221 cpu_first_thread_sibling(cpu) ==
212 cpu_first_thread_sibling(raw_smp_processor_id())) { 222 cpu_first_thread_sibling(raw_smp_processor_id())) {
213 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
214 msg |= cpu_thread_in_core(cpu); 223 msg |= cpu_thread_in_core(cpu);
215 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); 224 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
216 return; 225 return;
@@ -218,7 +227,11 @@ void kvmhv_rm_send_ipi(int cpu)
218 227
219 /* Else poke the target with an IPI */ 228 /* Else poke the target with an IPI */
220 xics_phys = paca[cpu].kvm_hstate.xics_phys; 229 xics_phys = paca[cpu].kvm_hstate.xics_phys;
221 rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); 230 if (xics_phys)
231 rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
232 else
233 opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu),
234 IPI_PRIORITY);
222} 235}
223 236
224/* 237/*
@@ -329,7 +342,7 @@ static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
329 * saved a copy of the XIRR in the PACA, it will be picked up by 342 * saved a copy of the XIRR in the PACA, it will be picked up by
330 * the host ICP driver. 343 * the host ICP driver.
331 */ 344 */
332static int kvmppc_check_passthru(u32 xisr, __be32 xirr) 345static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
333{ 346{
334 struct kvmppc_passthru_irqmap *pimap; 347 struct kvmppc_passthru_irqmap *pimap;
335 struct kvmppc_irq_map *irq_map; 348 struct kvmppc_irq_map *irq_map;
@@ -348,11 +361,11 @@ static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
348 /* We're handling this interrupt, generic code doesn't need to */ 361 /* We're handling this interrupt, generic code doesn't need to */
349 local_paca->kvm_hstate.saved_xirr = 0; 362 local_paca->kvm_hstate.saved_xirr = 0;
350 363
351 return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap); 364 return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again);
352} 365}
353 366
354#else 367#else
355static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) 368static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
356{ 369{
357 return 1; 370 return 1;
358} 371}
@@ -367,14 +380,31 @@ static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
367 * -1 if there was a guest wakeup IPI (which has now been cleared) 380 * -1 if there was a guest wakeup IPI (which has now been cleared)
368 * -2 if there is PCI passthrough external interrupt that was handled 381 * -2 if there is PCI passthrough external interrupt that was handled
369 */ 382 */
383static long kvmppc_read_one_intr(bool *again);
370 384
371long kvmppc_read_intr(void) 385long kvmppc_read_intr(void)
372{ 386{
387 long ret = 0;
388 long rc;
389 bool again;
390
391 do {
392 again = false;
393 rc = kvmppc_read_one_intr(&again);
394 if (rc && (ret == 0 || rc > ret))
395 ret = rc;
396 } while (again);
397 return ret;
398}
399
400static long kvmppc_read_one_intr(bool *again)
401{
373 unsigned long xics_phys; 402 unsigned long xics_phys;
374 u32 h_xirr; 403 u32 h_xirr;
375 __be32 xirr; 404 __be32 xirr;
376 u32 xisr; 405 u32 xisr;
377 u8 host_ipi; 406 u8 host_ipi;
407 int64_t rc;
378 408
379 /* see if a host IPI is pending */ 409 /* see if a host IPI is pending */
380 host_ipi = local_paca->kvm_hstate.host_ipi; 410 host_ipi = local_paca->kvm_hstate.host_ipi;
@@ -383,8 +413,14 @@ long kvmppc_read_intr(void)
383 413
384 /* Now read the interrupt from the ICP */ 414 /* Now read the interrupt from the ICP */
385 xics_phys = local_paca->kvm_hstate.xics_phys; 415 xics_phys = local_paca->kvm_hstate.xics_phys;
386 if (unlikely(!xics_phys)) 416 if (!xics_phys) {
387 return 1; 417 /* Use OPAL to read the XIRR */
418 rc = opal_rm_int_get_xirr(&xirr, false);
419 if (rc < 0)
420 return 1;
421 } else {
422 xirr = _lwzcix(xics_phys + XICS_XIRR);
423 }
388 424
389 /* 425 /*
390 * Save XIRR for later. Since we get control in reverse endian 426 * Save XIRR for later. Since we get control in reverse endian
@@ -392,7 +428,6 @@ long kvmppc_read_intr(void)
392 * host endian. Note that xirr is the value read from the 428 * host endian. Note that xirr is the value read from the
393 * XIRR register, while h_xirr is the host endian version. 429 * XIRR register, while h_xirr is the host endian version.
394 */ 430 */
395 xirr = _lwzcix(xics_phys + XICS_XIRR);
396 h_xirr = be32_to_cpu(xirr); 431 h_xirr = be32_to_cpu(xirr);
397 local_paca->kvm_hstate.saved_xirr = h_xirr; 432 local_paca->kvm_hstate.saved_xirr = h_xirr;
398 xisr = h_xirr & 0xffffff; 433 xisr = h_xirr & 0xffffff;
@@ -411,8 +446,16 @@ long kvmppc_read_intr(void)
411 * If it is an IPI, clear the MFRR and EOI it. 446 * If it is an IPI, clear the MFRR and EOI it.
412 */ 447 */
413 if (xisr == XICS_IPI) { 448 if (xisr == XICS_IPI) {
414 _stbcix(xics_phys + XICS_MFRR, 0xff); 449 if (xics_phys) {
415 _stwcix(xics_phys + XICS_XIRR, xirr); 450 _stbcix(xics_phys + XICS_MFRR, 0xff);
451 _stwcix(xics_phys + XICS_XIRR, xirr);
452 } else {
453 opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff);
454 rc = opal_rm_int_eoi(h_xirr);
455 /* If rc > 0, there is another interrupt pending */
456 *again = rc > 0;
457 }
458
416 /* 459 /*
417 * Need to ensure side effects of above stores 460 * Need to ensure side effects of above stores
418 * complete before proceeding. 461 * complete before proceeding.
@@ -429,7 +472,11 @@ long kvmppc_read_intr(void)
429 /* We raced with the host, 472 /* We raced with the host,
430 * we need to resend that IPI, bummer 473 * we need to resend that IPI, bummer
431 */ 474 */
432 _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); 475 if (xics_phys)
476 _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
477 else
478 opal_rm_int_set_mfrr(hard_smp_processor_id(),
479 IPI_PRIORITY);
433 /* Let side effects complete */ 480 /* Let side effects complete */
434 smp_mb(); 481 smp_mb();
435 return 1; 482 return 1;
@@ -440,5 +487,5 @@ long kvmppc_read_intr(void)
440 return -1; 487 return -1;
441 } 488 }
442 489
443 return kvmppc_check_passthru(xisr, xirr); 490 return kvmppc_check_passthru(xisr, xirr, again);
444} 491}
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 0fa70a9618d7..be1cee5dc032 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -16,6 +16,7 @@
16#include <asm/machdep.h> 16#include <asm/machdep.h>
17#include <asm/cputhreads.h> 17#include <asm/cputhreads.h>
18#include <asm/hmi.h> 18#include <asm/hmi.h>
19#include <asm/asm-prototypes.h>
19 20
20/* SRR1 bits for machine check on POWER7 */ 21/* SRR1 bits for machine check on POWER7 */
21#define SRR1_MC_LDSTERR (1ul << (63-42)) 22#define SRR1_MC_LDSTERR (1ul << (63-42))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 99b4e9d5dd23..378b962bcf2e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -21,6 +21,7 @@
21#include <asm/hvcall.h> 21#include <asm/hvcall.h>
22#include <asm/synch.h> 22#include <asm/synch.h>
23#include <asm/ppc-opcode.h> 23#include <asm/ppc-opcode.h>
24#include <asm/asm-prototypes.h>
24 25
25/* Translate address of a vmalloc'd thing to a linear map address */ 26/* Translate address of a vmalloc'd thing to a linear map address */
26static void *real_vmalloc_addr(void *x) 27static void *real_vmalloc_addr(void *x)
@@ -264,8 +265,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
264 265
265 if (pa) 266 if (pa)
266 pteh |= HPTE_V_VALID; 267 pteh |= HPTE_V_VALID;
267 else 268 else {
268 pteh |= HPTE_V_ABSENT; 269 pteh |= HPTE_V_ABSENT;
270 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
271 }
269 272
270 /*If we had host pte mapping then Check WIMG */ 273 /*If we had host pte mapping then Check WIMG */
271 if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { 274 if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
@@ -351,6 +354,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
351 /* inval in progress, write a non-present HPTE */ 354 /* inval in progress, write a non-present HPTE */
352 pteh |= HPTE_V_ABSENT; 355 pteh |= HPTE_V_ABSENT;
353 pteh &= ~HPTE_V_VALID; 356 pteh &= ~HPTE_V_VALID;
357 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
354 unlock_rmap(rmap); 358 unlock_rmap(rmap);
355 } else { 359 } else {
356 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, 360 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
@@ -361,6 +365,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
361 } 365 }
362 } 366 }
363 367
368 /* Convert to new format on P9 */
369 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
370 ptel = hpte_old_to_new_r(pteh, ptel);
371 pteh = hpte_old_to_new_v(pteh);
372 }
364 hpte[1] = cpu_to_be64(ptel); 373 hpte[1] = cpu_to_be64(ptel);
365 374
366 /* Write the first HPTE dword, unlocking the HPTE and making it valid */ 375 /* Write the first HPTE dword, unlocking the HPTE and making it valid */
@@ -386,6 +395,13 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
386#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) 395#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
387#endif 396#endif
388 397
398static inline int is_mmio_hpte(unsigned long v, unsigned long r)
399{
400 return ((v & HPTE_V_ABSENT) &&
401 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
402 (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
403}
404
389static inline int try_lock_tlbie(unsigned int *lock) 405static inline int try_lock_tlbie(unsigned int *lock)
390{ 406{
391 unsigned int tmp, old; 407 unsigned int tmp, old;
@@ -409,13 +425,18 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
409{ 425{
410 long i; 426 long i;
411 427
428 /*
429 * We use the POWER9 5-operand versions of tlbie and tlbiel here.
430 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
431 * the RS field, this is backwards-compatible with P7 and P8.
432 */
412 if (global) { 433 if (global) {
413 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 434 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
414 cpu_relax(); 435 cpu_relax();
415 if (need_sync) 436 if (need_sync)
416 asm volatile("ptesync" : : : "memory"); 437 asm volatile("ptesync" : : : "memory");
417 for (i = 0; i < npages; ++i) 438 for (i = 0; i < npages; ++i)
418 asm volatile(PPC_TLBIE(%1,%0) : : 439 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
419 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 440 "r" (rbvalues[i]), "r" (kvm->arch.lpid));
420 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 441 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
421 kvm->arch.tlbie_lock = 0; 442 kvm->arch.tlbie_lock = 0;
@@ -423,7 +444,8 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
423 if (need_sync) 444 if (need_sync)
424 asm volatile("ptesync" : : : "memory"); 445 asm volatile("ptesync" : : : "memory");
425 for (i = 0; i < npages; ++i) 446 for (i = 0; i < npages; ++i)
426 asm volatile("tlbiel %0" : : "r" (rbvalues[i])); 447 asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
448 "r" (rbvalues[i]), "r" (0));
427 asm volatile("ptesync" : : : "memory"); 449 asm volatile("ptesync" : : : "memory");
428 } 450 }
429} 451}
@@ -435,18 +457,23 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
435 __be64 *hpte; 457 __be64 *hpte;
436 unsigned long v, r, rb; 458 unsigned long v, r, rb;
437 struct revmap_entry *rev; 459 struct revmap_entry *rev;
438 u64 pte; 460 u64 pte, orig_pte, pte_r;
439 461
440 if (pte_index >= kvm->arch.hpt_npte) 462 if (pte_index >= kvm->arch.hpt_npte)
441 return H_PARAMETER; 463 return H_PARAMETER;
442 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); 464 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
443 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 465 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
444 cpu_relax(); 466 cpu_relax();
445 pte = be64_to_cpu(hpte[0]); 467 pte = orig_pte = be64_to_cpu(hpte[0]);
468 pte_r = be64_to_cpu(hpte[1]);
469 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
470 pte = hpte_new_to_old_v(pte, pte_r);
471 pte_r = hpte_new_to_old_r(pte_r);
472 }
446 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 473 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
447 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || 474 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
448 ((flags & H_ANDCOND) && (pte & avpn) != 0)) { 475 ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
449 __unlock_hpte(hpte, pte); 476 __unlock_hpte(hpte, orig_pte);
450 return H_NOT_FOUND; 477 return H_NOT_FOUND;
451 } 478 }
452 479
@@ -454,7 +481,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
454 v = pte & ~HPTE_V_HVLOCK; 481 v = pte & ~HPTE_V_HVLOCK;
455 if (v & HPTE_V_VALID) { 482 if (v & HPTE_V_VALID) {
456 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 483 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
457 rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); 484 rb = compute_tlbie_rb(v, pte_r, pte_index);
458 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); 485 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
459 /* 486 /*
460 * The reference (R) and change (C) bits in a HPT 487 * The reference (R) and change (C) bits in a HPT
@@ -472,6 +499,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
472 note_hpte_modification(kvm, rev); 499 note_hpte_modification(kvm, rev);
473 unlock_hpte(hpte, 0); 500 unlock_hpte(hpte, 0);
474 501
502 if (is_mmio_hpte(v, pte_r))
503 atomic64_inc(&kvm->arch.mmio_update);
504
475 if (v & HPTE_V_ABSENT) 505 if (v & HPTE_V_ABSENT)
476 v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; 506 v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
477 hpret[0] = v; 507 hpret[0] = v;
@@ -498,7 +528,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
498 int global; 528 int global;
499 long int ret = H_SUCCESS; 529 long int ret = H_SUCCESS;
500 struct revmap_entry *rev, *revs[4]; 530 struct revmap_entry *rev, *revs[4];
501 u64 hp0; 531 u64 hp0, hp1;
502 532
503 global = global_invalidates(kvm, 0); 533 global = global_invalidates(kvm, 0);
504 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 534 for (i = 0; i < 4 && ret == H_SUCCESS; ) {
@@ -531,6 +561,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
531 } 561 }
532 found = 0; 562 found = 0;
533 hp0 = be64_to_cpu(hp[0]); 563 hp0 = be64_to_cpu(hp[0]);
564 hp1 = be64_to_cpu(hp[1]);
565 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
566 hp0 = hpte_new_to_old_v(hp0, hp1);
567 hp1 = hpte_new_to_old_r(hp1);
568 }
534 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { 569 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
535 switch (flags & 3) { 570 switch (flags & 3) {
536 case 0: /* absolute */ 571 case 0: /* absolute */
@@ -561,13 +596,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
561 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 596 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
562 args[j] |= rcbits << (56 - 5); 597 args[j] |= rcbits << (56 - 5);
563 hp[0] = 0; 598 hp[0] = 0;
599 if (is_mmio_hpte(hp0, hp1))
600 atomic64_inc(&kvm->arch.mmio_update);
564 continue; 601 continue;
565 } 602 }
566 603
567 /* leave it locked */ 604 /* leave it locked */
568 hp[0] &= ~cpu_to_be64(HPTE_V_VALID); 605 hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
569 tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]), 606 tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index);
570 be64_to_cpu(hp[1]), pte_index);
571 indexes[n] = j; 607 indexes[n] = j;
572 hptes[n] = hp; 608 hptes[n] = hp;
573 revs[n] = rev; 609 revs[n] = rev;
@@ -605,7 +641,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
605 __be64 *hpte; 641 __be64 *hpte;
606 struct revmap_entry *rev; 642 struct revmap_entry *rev;
607 unsigned long v, r, rb, mask, bits; 643 unsigned long v, r, rb, mask, bits;
608 u64 pte; 644 u64 pte_v, pte_r;
609 645
610 if (pte_index >= kvm->arch.hpt_npte) 646 if (pte_index >= kvm->arch.hpt_npte)
611 return H_PARAMETER; 647 return H_PARAMETER;
@@ -613,14 +649,16 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
613 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); 649 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
614 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 650 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
615 cpu_relax(); 651 cpu_relax();
616 pte = be64_to_cpu(hpte[0]); 652 v = pte_v = be64_to_cpu(hpte[0]);
617 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 653 if (cpu_has_feature(CPU_FTR_ARCH_300))
618 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) { 654 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1]));
619 __unlock_hpte(hpte, pte); 655 if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
656 ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) {
657 __unlock_hpte(hpte, pte_v);
620 return H_NOT_FOUND; 658 return H_NOT_FOUND;
621 } 659 }
622 660
623 v = pte; 661 pte_r = be64_to_cpu(hpte[1]);
624 bits = (flags << 55) & HPTE_R_PP0; 662 bits = (flags << 55) & HPTE_R_PP0;
625 bits |= (flags << 48) & HPTE_R_KEY_HI; 663 bits |= (flags << 48) & HPTE_R_KEY_HI;
626 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 664 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
@@ -642,22 +680,26 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
642 * readonly to writable. If it should be writable, we'll 680 * readonly to writable. If it should be writable, we'll
643 * take a trap and let the page fault code sort it out. 681 * take a trap and let the page fault code sort it out.
644 */ 682 */
645 pte = be64_to_cpu(hpte[1]); 683 r = (pte_r & ~mask) | bits;
646 r = (pte & ~mask) | bits; 684 if (hpte_is_writable(r) && !hpte_is_writable(pte_r))
647 if (hpte_is_writable(r) && !hpte_is_writable(pte))
648 r = hpte_make_readonly(r); 685 r = hpte_make_readonly(r);
649 /* If the PTE is changing, invalidate it first */ 686 /* If the PTE is changing, invalidate it first */
650 if (r != pte) { 687 if (r != pte_r) {
651 rb = compute_tlbie_rb(v, r, pte_index); 688 rb = compute_tlbie_rb(v, r, pte_index);
652 hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) | 689 hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
653 HPTE_V_ABSENT); 690 HPTE_V_ABSENT);
654 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), 691 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
655 true); 692 true);
693 /* Don't lose R/C bit updates done by hardware */
694 r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
656 hpte[1] = cpu_to_be64(r); 695 hpte[1] = cpu_to_be64(r);
657 } 696 }
658 } 697 }
659 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 698 unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK);
660 asm volatile("ptesync" : : : "memory"); 699 asm volatile("ptesync" : : : "memory");
700 if (is_mmio_hpte(v, pte_r))
701 atomic64_inc(&kvm->arch.mmio_update);
702
661 return H_SUCCESS; 703 return H_SUCCESS;
662} 704}
663 705
@@ -681,6 +723,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
681 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); 723 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
682 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 724 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
683 r = be64_to_cpu(hpte[1]); 725 r = be64_to_cpu(hpte[1]);
726 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
727 v = hpte_new_to_old_v(v, r);
728 r = hpte_new_to_old_r(r);
729 }
684 if (v & HPTE_V_ABSENT) { 730 if (v & HPTE_V_ABSENT) {
685 v &= ~HPTE_V_ABSENT; 731 v &= ~HPTE_V_ABSENT;
686 v |= HPTE_V_VALID; 732 v |= HPTE_V_VALID;
@@ -798,10 +844,16 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
798 unsigned long pte_index) 844 unsigned long pte_index)
799{ 845{
800 unsigned long rb; 846 unsigned long rb;
847 u64 hp0, hp1;
801 848
802 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); 849 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
803 rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), 850 hp0 = be64_to_cpu(hptep[0]);
804 pte_index); 851 hp1 = be64_to_cpu(hptep[1]);
852 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
853 hp0 = hpte_new_to_old_v(hp0, hp1);
854 hp1 = hpte_new_to_old_r(hp1);
855 }
856 rb = compute_tlbie_rb(hp0, hp1, pte_index);
805 do_tlbies(kvm, &rb, 1, 1, true); 857 do_tlbies(kvm, &rb, 1, 1, true);
806} 858}
807EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); 859EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
@@ -811,9 +863,15 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
811{ 863{
812 unsigned long rb; 864 unsigned long rb;
813 unsigned char rbyte; 865 unsigned char rbyte;
866 u64 hp0, hp1;
814 867
815 rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), 868 hp0 = be64_to_cpu(hptep[0]);
816 pte_index); 869 hp1 = be64_to_cpu(hptep[1]);
870 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
871 hp0 = hpte_new_to_old_v(hp0, hp1);
872 hp1 = hpte_new_to_old_r(hp1);
873 }
874 rb = compute_tlbie_rb(hp0, hp1, pte_index);
817 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; 875 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
818 /* modify only the second-last byte, which contains the ref bit */ 876 /* modify only the second-last byte, which contains the ref bit */
819 *((char *)hptep + 14) = rbyte; 877 *((char *)hptep + 14) = rbyte;
@@ -828,6 +886,37 @@ static int slb_base_page_shift[4] = {
828 20, /* 1M, unsupported */ 886 20, /* 1M, unsupported */
829}; 887};
830 888
889static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu,
890 unsigned long eaddr, unsigned long slb_v, long mmio_update)
891{
892 struct mmio_hpte_cache_entry *entry = NULL;
893 unsigned int pshift;
894 unsigned int i;
895
896 for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) {
897 entry = &vcpu->arch.mmio_cache.entry[i];
898 if (entry->mmio_update == mmio_update) {
899 pshift = entry->slb_base_pshift;
900 if ((entry->eaddr >> pshift) == (eaddr >> pshift) &&
901 entry->slb_v == slb_v)
902 return entry;
903 }
904 }
905 return NULL;
906}
907
908static struct mmio_hpte_cache_entry *
909 next_mmio_cache_entry(struct kvm_vcpu *vcpu)
910{
911 unsigned int index = vcpu->arch.mmio_cache.index;
912
913 vcpu->arch.mmio_cache.index++;
914 if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE)
915 vcpu->arch.mmio_cache.index = 0;
916
917 return &vcpu->arch.mmio_cache.entry[index];
918}
919
831/* When called from virtmode, this func should be protected by 920/* When called from virtmode, this func should be protected by
832 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK 921 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
833 * can trigger deadlock issue. 922 * can trigger deadlock issue.
@@ -842,7 +931,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
842 unsigned long avpn; 931 unsigned long avpn;
843 __be64 *hpte; 932 __be64 *hpte;
844 unsigned long mask, val; 933 unsigned long mask, val;
845 unsigned long v, r; 934 unsigned long v, r, orig_v;
846 935
847 /* Get page shift, work out hash and AVPN etc. */ 936 /* Get page shift, work out hash and AVPN etc. */
848 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; 937 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
@@ -877,6 +966,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
877 for (i = 0; i < 16; i += 2) { 966 for (i = 0; i < 16; i += 2) {
878 /* Read the PTE racily */ 967 /* Read the PTE racily */
879 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 968 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
969 if (cpu_has_feature(CPU_FTR_ARCH_300))
970 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1]));
880 971
881 /* Check valid/absent, hash, segment size and AVPN */ 972 /* Check valid/absent, hash, segment size and AVPN */
882 if (!(v & valid) || (v & mask) != val) 973 if (!(v & valid) || (v & mask) != val)
@@ -885,8 +976,12 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
885 /* Lock the PTE and read it under the lock */ 976 /* Lock the PTE and read it under the lock */
886 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) 977 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
887 cpu_relax(); 978 cpu_relax();
888 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 979 v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
889 r = be64_to_cpu(hpte[i+1]); 980 r = be64_to_cpu(hpte[i+1]);
981 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
982 v = hpte_new_to_old_v(v, r);
983 r = hpte_new_to_old_r(r);
984 }
890 985
891 /* 986 /*
892 * Check the HPTE again, including base page size 987 * Check the HPTE again, including base page size
@@ -896,7 +991,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
896 /* Return with the HPTE still locked */ 991 /* Return with the HPTE still locked */
897 return (hash << 3) + (i >> 1); 992 return (hash << 3) + (i >> 1);
898 993
899 __unlock_hpte(&hpte[i], v); 994 __unlock_hpte(&hpte[i], orig_v);
900 } 995 }
901 996
902 if (val & HPTE_V_SECONDARY) 997 if (val & HPTE_V_SECONDARY)
@@ -924,30 +1019,45 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
924{ 1019{
925 struct kvm *kvm = vcpu->kvm; 1020 struct kvm *kvm = vcpu->kvm;
926 long int index; 1021 long int index;
927 unsigned long v, r, gr; 1022 unsigned long v, r, gr, orig_v;
928 __be64 *hpte; 1023 __be64 *hpte;
929 unsigned long valid; 1024 unsigned long valid;
930 struct revmap_entry *rev; 1025 struct revmap_entry *rev;
931 unsigned long pp, key; 1026 unsigned long pp, key;
1027 struct mmio_hpte_cache_entry *cache_entry = NULL;
1028 long mmio_update = 0;
932 1029
933 /* For protection fault, expect to find a valid HPTE */ 1030 /* For protection fault, expect to find a valid HPTE */
934 valid = HPTE_V_VALID; 1031 valid = HPTE_V_VALID;
935 if (status & DSISR_NOHPTE) 1032 if (status & DSISR_NOHPTE) {
936 valid |= HPTE_V_ABSENT; 1033 valid |= HPTE_V_ABSENT;
937 1034 mmio_update = atomic64_read(&kvm->arch.mmio_update);
938 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); 1035 cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update);
939 if (index < 0) {
940 if (status & DSISR_NOHPTE)
941 return status; /* there really was no HPTE */
942 return 0; /* for prot fault, HPTE disappeared */
943 } 1036 }
944 hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); 1037 if (cache_entry) {
945 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 1038 index = cache_entry->pte_index;
946 r = be64_to_cpu(hpte[1]); 1039 v = cache_entry->hpte_v;
947 rev = real_vmalloc_addr(&kvm->arch.revmap[index]); 1040 r = cache_entry->hpte_r;
948 gr = rev->guest_rpte; 1041 gr = cache_entry->rpte;
1042 } else {
1043 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
1044 if (index < 0) {
1045 if (status & DSISR_NOHPTE)
1046 return status; /* there really was no HPTE */
1047 return 0; /* for prot fault, HPTE disappeared */
1048 }
1049 hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
1050 v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
1051 r = be64_to_cpu(hpte[1]);
1052 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1053 v = hpte_new_to_old_v(v, r);
1054 r = hpte_new_to_old_r(r);
1055 }
1056 rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
1057 gr = rev->guest_rpte;
949 1058
950 unlock_hpte(hpte, v); 1059 unlock_hpte(hpte, orig_v);
1060 }
951 1061
952 /* For not found, if the HPTE is valid by now, retry the instruction */ 1062 /* For not found, if the HPTE is valid by now, retry the instruction */
953 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) 1063 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
@@ -985,12 +1095,32 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
985 vcpu->arch.pgfault_index = index; 1095 vcpu->arch.pgfault_index = index;
986 vcpu->arch.pgfault_hpte[0] = v; 1096 vcpu->arch.pgfault_hpte[0] = v;
987 vcpu->arch.pgfault_hpte[1] = r; 1097 vcpu->arch.pgfault_hpte[1] = r;
1098 vcpu->arch.pgfault_cache = cache_entry;
988 1099
989 /* Check the storage key to see if it is possibly emulated MMIO */ 1100 /* Check the storage key to see if it is possibly emulated MMIO */
990 if (data && (vcpu->arch.shregs.msr & MSR_IR) && 1101 if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
991 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 1102 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) {
992 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) 1103 if (!cache_entry) {
993 return -2; /* MMIO emulation - load instr word */ 1104 unsigned int pshift = 12;
1105 unsigned int pshift_index;
1106
1107 if (slb_v & SLB_VSID_L) {
1108 pshift_index = ((slb_v & SLB_VSID_LP) >> 4);
1109 pshift = slb_base_page_shift[pshift_index];
1110 }
1111 cache_entry = next_mmio_cache_entry(vcpu);
1112 cache_entry->eaddr = addr;
1113 cache_entry->slb_base_pshift = pshift;
1114 cache_entry->pte_index = index;
1115 cache_entry->hpte_v = v;
1116 cache_entry->hpte_r = r;
1117 cache_entry->rpte = gr;
1118 cache_entry->slb_v = slb_v;
1119 cache_entry->mmio_update = mmio_update;
1120 }
1121 if (data && (vcpu->arch.shregs.msr & MSR_IR))
1122 return -2; /* MMIO emulation - load instr word */
1123 }
994 1124
995 return -1; /* send fault up to host kernel mode */ 1125 return -1; /* send fault up to host kernel mode */
996} 1126}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index a0ea63ac2b52..6a4c4d758a9e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -24,6 +24,7 @@
24#include <asm/pnv-pci.h> 24#include <asm/pnv-pci.h>
25#include <asm/opal.h> 25#include <asm/opal.h>
26#include <asm/smp.h> 26#include <asm/smp.h>
27#include <asm/asm-prototypes.h>
27 28
28#include "book3s_xics.h" 29#include "book3s_xics.h"
29 30
@@ -70,7 +71,11 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
70 hcpu = hcore << threads_shift; 71 hcpu = hcore << threads_shift;
71 kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; 72 kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
72 smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); 73 smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
73 icp_native_cause_ipi_rm(hcpu); 74 if (paca[hcpu].kvm_hstate.xics_phys)
75 icp_native_cause_ipi_rm(hcpu);
76 else
77 opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu),
78 IPI_PRIORITY);
74} 79}
75#else 80#else
76static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } 81static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
@@ -737,7 +742,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
737 742
738unsigned long eoi_rc; 743unsigned long eoi_rc;
739 744
740static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) 745static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
741{ 746{
742 unsigned long xics_phys; 747 unsigned long xics_phys;
743 int64_t rc; 748 int64_t rc;
@@ -751,7 +756,12 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
751 756
752 /* EOI it */ 757 /* EOI it */
753 xics_phys = local_paca->kvm_hstate.xics_phys; 758 xics_phys = local_paca->kvm_hstate.xics_phys;
754 _stwcix(xics_phys + XICS_XIRR, xirr); 759 if (xics_phys) {
760 _stwcix(xics_phys + XICS_XIRR, xirr);
761 } else {
762 rc = opal_rm_int_eoi(be32_to_cpu(xirr));
763 *again = rc > 0;
764 }
755} 765}
756 766
757static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) 767static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
@@ -809,9 +819,10 @@ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
809} 819}
810 820
811long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, 821long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
812 u32 xirr, 822 __be32 xirr,
813 struct kvmppc_irq_map *irq_map, 823 struct kvmppc_irq_map *irq_map,
814 struct kvmppc_passthru_irqmap *pimap) 824 struct kvmppc_passthru_irqmap *pimap,
825 bool *again)
815{ 826{
816 struct kvmppc_xics *xics; 827 struct kvmppc_xics *xics;
817 struct kvmppc_icp *icp; 828 struct kvmppc_icp *icp;
@@ -825,7 +836,8 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
825 icp_rm_deliver_irq(xics, icp, irq); 836 icp_rm_deliver_irq(xics, icp, irq);
826 837
827 /* EOI the interrupt */ 838 /* EOI the interrupt */
828 icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr); 839 icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,
840 again);
829 841
830 if (check_too_hard(xics, icp) == H_TOO_HARD) 842 if (check_too_hard(xics, icp) == H_TOO_HARD)
831 return 2; 843 return 2;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c3c1d1bcfc67..9338a818e05c 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -501,17 +501,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
501 cmpwi r0, 0 501 cmpwi r0, 0
502 beq 57f 502 beq 57f
503 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 503 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
504 mfspr r4, SPRN_LPCR 504 mfspr r5, SPRN_LPCR
505 rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) 505 rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
506 mtspr SPRN_LPCR, r4 506 b kvm_nap_sequence
507 isync
508 std r0, HSTATE_SCRATCH0(r13)
509 ptesync
510 ld r0, HSTATE_SCRATCH0(r13)
5111: cmpd r0, r0
512 bne 1b
513 nap
514 b .
515 507
51657: li r0, 0 50857: li r0, 0
517 stbx r0, r3, r4 509 stbx r0, r3, r4
@@ -523,6 +515,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
523 * * 515 * *
524 *****************************************************************************/ 516 *****************************************************************************/
525 517
518/* Stack frame offsets */
519#define STACK_SLOT_TID (112-16)
520#define STACK_SLOT_PSSCR (112-24)
521
526.global kvmppc_hv_entry 522.global kvmppc_hv_entry
527kvmppc_hv_entry: 523kvmppc_hv_entry:
528 524
@@ -581,12 +577,14 @@ kvmppc_hv_entry:
581 ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ 577 ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
582 cmpwi r6,0 578 cmpwi r6,0
583 bne 10f 579 bne 10f
584 ld r6,KVM_SDR1(r9)
585 lwz r7,KVM_LPID(r9) 580 lwz r7,KVM_LPID(r9)
581BEGIN_FTR_SECTION
582 ld r6,KVM_SDR1(r9)
586 li r0,LPID_RSVD /* switch to reserved LPID */ 583 li r0,LPID_RSVD /* switch to reserved LPID */
587 mtspr SPRN_LPID,r0 584 mtspr SPRN_LPID,r0
588 ptesync 585 ptesync
589 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 586 mtspr SPRN_SDR1,r6 /* switch to partition page table */
587END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
590 mtspr SPRN_LPID,r7 588 mtspr SPRN_LPID,r7
591 isync 589 isync
592 590
@@ -607,12 +605,8 @@ kvmppc_hv_entry:
607 stdcx. r7,0,r6 605 stdcx. r7,0,r6
608 bne 23b 606 bne 23b
609 /* Flush the TLB of any entries for this LPID */ 607 /* Flush the TLB of any entries for this LPID */
610 /* use arch 2.07S as a proxy for POWER8 */ 608 lwz r6,KVM_TLB_SETS(r9)
611BEGIN_FTR_SECTION 609 li r0,0 /* RS for P9 version of tlbiel */
612 li r6,512 /* POWER8 has 512 sets */
613FTR_SECTION_ELSE
614 li r6,128 /* POWER7 has 128 sets */
615ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
616 mtctr r6 610 mtctr r6
617 li r7,0x800 /* IS field = 0b10 */ 611 li r7,0x800 /* IS field = 0b10 */
618 ptesync 612 ptesync
@@ -698,6 +692,14 @@ kvmppc_got_guest:
698 mtspr SPRN_PURR,r7 692 mtspr SPRN_PURR,r7
699 mtspr SPRN_SPURR,r8 693 mtspr SPRN_SPURR,r8
700 694
695 /* Save host values of some registers */
696BEGIN_FTR_SECTION
697 mfspr r5, SPRN_TIDR
698 mfspr r6, SPRN_PSSCR
699 std r5, STACK_SLOT_TID(r1)
700 std r6, STACK_SLOT_PSSCR(r1)
701END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
702
701BEGIN_FTR_SECTION 703BEGIN_FTR_SECTION
702 /* Set partition DABR */ 704 /* Set partition DABR */
703 /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ 705 /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
@@ -750,14 +752,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
750BEGIN_FTR_SECTION 752BEGIN_FTR_SECTION
751 ld r5, VCPU_MMCR + 24(r4) 753 ld r5, VCPU_MMCR + 24(r4)
752 ld r6, VCPU_SIER(r4) 754 ld r6, VCPU_SIER(r4)
755 mtspr SPRN_MMCR2, r5
756 mtspr SPRN_SIER, r6
757BEGIN_FTR_SECTION_NESTED(96)
753 lwz r7, VCPU_PMC + 24(r4) 758 lwz r7, VCPU_PMC + 24(r4)
754 lwz r8, VCPU_PMC + 28(r4) 759 lwz r8, VCPU_PMC + 28(r4)
755 ld r9, VCPU_MMCR + 32(r4) 760 ld r9, VCPU_MMCR + 32(r4)
756 mtspr SPRN_MMCR2, r5
757 mtspr SPRN_SIER, r6
758 mtspr SPRN_SPMC1, r7 761 mtspr SPRN_SPMC1, r7
759 mtspr SPRN_SPMC2, r8 762 mtspr SPRN_SPMC2, r8
760 mtspr SPRN_MMCRS, r9 763 mtspr SPRN_MMCRS, r9
764END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
761END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 765END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
762 mtspr SPRN_MMCR0, r3 766 mtspr SPRN_MMCR0, r3
763 isync 767 isync
@@ -813,20 +817,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
813 mtspr SPRN_EBBHR, r8 817 mtspr SPRN_EBBHR, r8
814 ld r5, VCPU_EBBRR(r4) 818 ld r5, VCPU_EBBRR(r4)
815 ld r6, VCPU_BESCR(r4) 819 ld r6, VCPU_BESCR(r4)
816 ld r7, VCPU_CSIGR(r4) 820 lwz r7, VCPU_GUEST_PID(r4)
817 ld r8, VCPU_TACR(r4) 821 ld r8, VCPU_WORT(r4)
818 mtspr SPRN_EBBRR, r5 822 mtspr SPRN_EBBRR, r5
819 mtspr SPRN_BESCR, r6 823 mtspr SPRN_BESCR, r6
820 mtspr SPRN_CSIGR, r7 824 mtspr SPRN_PID, r7
821 mtspr SPRN_TACR, r8 825 mtspr SPRN_WORT, r8
826BEGIN_FTR_SECTION
827 /* POWER8-only registers */
822 ld r5, VCPU_TCSCR(r4) 828 ld r5, VCPU_TCSCR(r4)
823 ld r6, VCPU_ACOP(r4) 829 ld r6, VCPU_ACOP(r4)
824 lwz r7, VCPU_GUEST_PID(r4) 830 ld r7, VCPU_CSIGR(r4)
825 ld r8, VCPU_WORT(r4) 831 ld r8, VCPU_TACR(r4)
826 mtspr SPRN_TCSCR, r5 832 mtspr SPRN_TCSCR, r5
827 mtspr SPRN_ACOP, r6 833 mtspr SPRN_ACOP, r6
828 mtspr SPRN_PID, r7 834 mtspr SPRN_CSIGR, r7
829 mtspr SPRN_WORT, r8 835 mtspr SPRN_TACR, r8
836FTR_SECTION_ELSE
837 /* POWER9-only registers */
838 ld r5, VCPU_TID(r4)
839 ld r6, VCPU_PSSCR(r4)
840 oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
841 mtspr SPRN_TIDR, r5
842 mtspr SPRN_PSSCR, r6
843ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8308: 8448:
831 845
832 /* 846 /*
@@ -1341,20 +1355,29 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
1341 std r8, VCPU_EBBHR(r9) 1355 std r8, VCPU_EBBHR(r9)
1342 mfspr r5, SPRN_EBBRR 1356 mfspr r5, SPRN_EBBRR
1343 mfspr r6, SPRN_BESCR 1357 mfspr r6, SPRN_BESCR
1344 mfspr r7, SPRN_CSIGR 1358 mfspr r7, SPRN_PID
1345 mfspr r8, SPRN_TACR 1359 mfspr r8, SPRN_WORT
1346 std r5, VCPU_EBBRR(r9) 1360 std r5, VCPU_EBBRR(r9)
1347 std r6, VCPU_BESCR(r9) 1361 std r6, VCPU_BESCR(r9)
1348 std r7, VCPU_CSIGR(r9) 1362 stw r7, VCPU_GUEST_PID(r9)
1349 std r8, VCPU_TACR(r9) 1363 std r8, VCPU_WORT(r9)
1364BEGIN_FTR_SECTION
1350 mfspr r5, SPRN_TCSCR 1365 mfspr r5, SPRN_TCSCR
1351 mfspr r6, SPRN_ACOP 1366 mfspr r6, SPRN_ACOP
1352 mfspr r7, SPRN_PID 1367 mfspr r7, SPRN_CSIGR
1353 mfspr r8, SPRN_WORT 1368 mfspr r8, SPRN_TACR
1354 std r5, VCPU_TCSCR(r9) 1369 std r5, VCPU_TCSCR(r9)
1355 std r6, VCPU_ACOP(r9) 1370 std r6, VCPU_ACOP(r9)
1356 stw r7, VCPU_GUEST_PID(r9) 1371 std r7, VCPU_CSIGR(r9)
1357 std r8, VCPU_WORT(r9) 1372 std r8, VCPU_TACR(r9)
1373FTR_SECTION_ELSE
1374 mfspr r5, SPRN_TIDR
1375 mfspr r6, SPRN_PSSCR
1376 std r5, VCPU_TID(r9)
1377 rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
1378 rotldi r6, r6, 60
1379 std r6, VCPU_PSSCR(r9)
1380ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
1358 /* 1381 /*
1359 * Restore various registers to 0, where non-zero values 1382 * Restore various registers to 0, where non-zero values
1360 * set by the guest could disrupt the host. 1383 * set by the guest could disrupt the host.
@@ -1363,12 +1386,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
1363 mtspr SPRN_IAMR, r0 1386 mtspr SPRN_IAMR, r0
1364 mtspr SPRN_CIABR, r0 1387 mtspr SPRN_CIABR, r0
1365 mtspr SPRN_DAWRX, r0 1388 mtspr SPRN_DAWRX, r0
1366 mtspr SPRN_TCSCR, r0
1367 mtspr SPRN_WORT, r0 1389 mtspr SPRN_WORT, r0
1390BEGIN_FTR_SECTION
1391 mtspr SPRN_TCSCR, r0
1368 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ 1392 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
1369 li r0, 1 1393 li r0, 1
1370 sldi r0, r0, 31 1394 sldi r0, r0, 31
1371 mtspr SPRN_MMCRS, r0 1395 mtspr SPRN_MMCRS, r0
1396END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
13728: 13978:
1373 1398
1374 /* Save and reset AMR and UAMOR before turning on the MMU */ 1399 /* Save and reset AMR and UAMOR before turning on the MMU */
@@ -1502,15 +1527,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1502 stw r8, VCPU_PMC + 20(r9) 1527 stw r8, VCPU_PMC + 20(r9)
1503BEGIN_FTR_SECTION 1528BEGIN_FTR_SECTION
1504 mfspr r5, SPRN_SIER 1529 mfspr r5, SPRN_SIER
1530 std r5, VCPU_SIER(r9)
1531BEGIN_FTR_SECTION_NESTED(96)
1505 mfspr r6, SPRN_SPMC1 1532 mfspr r6, SPRN_SPMC1
1506 mfspr r7, SPRN_SPMC2 1533 mfspr r7, SPRN_SPMC2
1507 mfspr r8, SPRN_MMCRS 1534 mfspr r8, SPRN_MMCRS
1508 std r5, VCPU_SIER(r9)
1509 stw r6, VCPU_PMC + 24(r9) 1535 stw r6, VCPU_PMC + 24(r9)
1510 stw r7, VCPU_PMC + 28(r9) 1536 stw r7, VCPU_PMC + 28(r9)
1511 std r8, VCPU_MMCR + 32(r9) 1537 std r8, VCPU_MMCR + 32(r9)
1512 lis r4, 0x8000 1538 lis r4, 0x8000
1513 mtspr SPRN_MMCRS, r4 1539 mtspr SPRN_MMCRS, r4
1540END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
1514END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1541END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
151522: 154222:
1516 /* Clear out SLB */ 1543 /* Clear out SLB */
@@ -1519,6 +1546,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1519 slbia 1546 slbia
1520 ptesync 1547 ptesync
1521 1548
1549 /* Restore host values of some registers */
1550BEGIN_FTR_SECTION
1551 ld r5, STACK_SLOT_TID(r1)
1552 ld r6, STACK_SLOT_PSSCR(r1)
1553 mtspr SPRN_TIDR, r5
1554 mtspr SPRN_PSSCR, r6
1555END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1556
1522 /* 1557 /*
1523 * POWER7/POWER8 guest -> host partition switch code. 1558 * POWER7/POWER8 guest -> host partition switch code.
1524 * We don't have to lock against tlbies but we do 1559 * We don't have to lock against tlbies but we do
@@ -1552,12 +1587,14 @@ kvmhv_switch_to_host:
1552 beq 19f 1587 beq 19f
1553 1588
1554 /* Primary thread switches back to host partition */ 1589 /* Primary thread switches back to host partition */
1555 ld r6,KVM_HOST_SDR1(r4)
1556 lwz r7,KVM_HOST_LPID(r4) 1590 lwz r7,KVM_HOST_LPID(r4)
1591BEGIN_FTR_SECTION
1592 ld r6,KVM_HOST_SDR1(r4)
1557 li r8,LPID_RSVD /* switch to reserved LPID */ 1593 li r8,LPID_RSVD /* switch to reserved LPID */
1558 mtspr SPRN_LPID,r8 1594 mtspr SPRN_LPID,r8
1559 ptesync 1595 ptesync
1560 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 1596 mtspr SPRN_SDR1,r6 /* switch to host page table */
1597END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
1561 mtspr SPRN_LPID,r7 1598 mtspr SPRN_LPID,r7
1562 isync 1599 isync
1563 1600
@@ -2211,6 +2248,21 @@ BEGIN_FTR_SECTION
2211 ori r5, r5, LPCR_PECEDH 2248 ori r5, r5, LPCR_PECEDH
2212 rlwimi r5, r3, 0, LPCR_PECEDP 2249 rlwimi r5, r3, 0, LPCR_PECEDP
2213END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 2250END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2251
2252kvm_nap_sequence: /* desired LPCR value in r5 */
2253BEGIN_FTR_SECTION
2254 /*
2255 * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset)
2256 * enable state loss = 1 (allow SMT mode switch)
2257 * requested level = 0 (just stop dispatching)
2258 */
2259 lis r3, (PSSCR_EC | PSSCR_ESL)@h
2260 mtspr SPRN_PSSCR, r3
2261 /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
2262 li r4, LPCR_PECE_HVEE@higher
2263 sldi r4, r4, 32
2264 or r5, r5, r4
2265END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2214 mtspr SPRN_LPCR,r5 2266 mtspr SPRN_LPCR,r5
2215 isync 2267 isync
2216 li r0, 0 2268 li r0, 0
@@ -2219,7 +2271,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2219 ld r0, HSTATE_SCRATCH0(r13) 2271 ld r0, HSTATE_SCRATCH0(r13)
22201: cmpd r0, r0 22721: cmpd r0, r0
2221 bne 1b 2273 bne 1b
2274BEGIN_FTR_SECTION
2222 nap 2275 nap
2276FTR_SECTION_ELSE
2277 PPC_STOP
2278ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
2223 b . 2279 b .
2224 2280
222533: mr r4, r3 228133: mr r4, r3
@@ -2600,11 +2656,13 @@ kvmppc_save_tm:
2600 mfctr r7 2656 mfctr r7
2601 mfspr r8, SPRN_AMR 2657 mfspr r8, SPRN_AMR
2602 mfspr r10, SPRN_TAR 2658 mfspr r10, SPRN_TAR
2659 mfxer r11
2603 std r5, VCPU_LR_TM(r9) 2660 std r5, VCPU_LR_TM(r9)
2604 stw r6, VCPU_CR_TM(r9) 2661 stw r6, VCPU_CR_TM(r9)
2605 std r7, VCPU_CTR_TM(r9) 2662 std r7, VCPU_CTR_TM(r9)
2606 std r8, VCPU_AMR_TM(r9) 2663 std r8, VCPU_AMR_TM(r9)
2607 std r10, VCPU_TAR_TM(r9) 2664 std r10, VCPU_TAR_TM(r9)
2665 std r11, VCPU_XER_TM(r9)
2608 2666
2609 /* Restore r12 as trap number. */ 2667 /* Restore r12 as trap number. */
2610 lwz r12, VCPU_TRAP(r9) 2668 lwz r12, VCPU_TRAP(r9)
@@ -2697,11 +2755,13 @@ kvmppc_restore_tm:
2697 ld r7, VCPU_CTR_TM(r4) 2755 ld r7, VCPU_CTR_TM(r4)
2698 ld r8, VCPU_AMR_TM(r4) 2756 ld r8, VCPU_AMR_TM(r4)
2699 ld r9, VCPU_TAR_TM(r4) 2757 ld r9, VCPU_TAR_TM(r4)
2758 ld r10, VCPU_XER_TM(r4)
2700 mtlr r5 2759 mtlr r5
2701 mtcr r6 2760 mtcr r6
2702 mtctr r7 2761 mtctr r7
2703 mtspr SPRN_AMR, r8 2762 mtspr SPRN_AMR, r8
2704 mtspr SPRN_TAR, r9 2763 mtspr SPRN_TAR, r9
2764 mtxer r10
2705 2765
2706 /* 2766 /*
2707 * Load up PPR and DSCR values but don't put them in the actual SPRs 2767 * Load up PPR and DSCR values but don't put them in the actual SPRs
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 70963c845e96..efd1183a6b16 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -536,7 +536,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
536#ifdef CONFIG_PPC_BOOK3S_64 536#ifdef CONFIG_PPC_BOOK3S_64
537 case KVM_CAP_SPAPR_TCE: 537 case KVM_CAP_SPAPR_TCE:
538 case KVM_CAP_SPAPR_TCE_64: 538 case KVM_CAP_SPAPR_TCE_64:
539 case KVM_CAP_PPC_ALLOC_HTAB:
540 case KVM_CAP_PPC_RTAS: 539 case KVM_CAP_PPC_RTAS:
541 case KVM_CAP_PPC_FIXUP_HCALL: 540 case KVM_CAP_PPC_FIXUP_HCALL:
542 case KVM_CAP_PPC_ENABLE_HCALL: 541 case KVM_CAP_PPC_ENABLE_HCALL:
@@ -545,13 +544,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
545#endif 544#endif
546 r = 1; 545 r = 1;
547 break; 546 break;
547
548 case KVM_CAP_PPC_ALLOC_HTAB:
549 r = hv_enabled;
550 break;
548#endif /* CONFIG_PPC_BOOK3S_64 */ 551#endif /* CONFIG_PPC_BOOK3S_64 */
549#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 552#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
550 case KVM_CAP_PPC_SMT: 553 case KVM_CAP_PPC_SMT:
551 if (hv_enabled) 554 r = 0;
552 r = threads_per_subcore; 555 if (hv_enabled) {
553 else 556 if (cpu_has_feature(CPU_FTR_ARCH_300))
554 r = 0; 557 r = 1;
558 else
559 r = threads_per_subcore;
560 }
555 break; 561 break;
556 case KVM_CAP_PPC_RMA: 562 case KVM_CAP_PPC_RMA:
557 r = 0; 563 r = 0;
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index fb21990c0fb4..ebc6dd449556 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -449,7 +449,7 @@ TRACE_EVENT(kvmppc_vcore_wakeup,
449 __entry->tgid = current->tgid; 449 __entry->tgid = current->tgid;
450 ), 450 ),
451 451
452 TP_printk("%s time %lld ns, tgid=%d", 452 TP_printk("%s time %llu ns, tgid=%d",
453 __entry->waited ? "wait" : "poll", 453 __entry->waited ? "wait" : "poll",
454 __entry->ns, __entry->tgid) 454 __entry->ns, __entry->tgid)
455); 455);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 83ddc0e171b0..ad9fd5245be2 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -221,13 +221,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
221 return -1; 221 return -1;
222 222
223 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; 223 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
224 hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; 224 hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
225 225
226 if (!(vflags & HPTE_V_BOLTED)) { 226 if (!(vflags & HPTE_V_BOLTED)) {
227 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", 227 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
228 i, hpte_v, hpte_r); 228 i, hpte_v, hpte_r);
229 } 229 }
230 230
231 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
232 hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
233 hpte_v = hpte_old_to_new_v(hpte_v);
234 }
235
231 hptep->r = cpu_to_be64(hpte_r); 236 hptep->r = cpu_to_be64(hpte_r);
232 /* Guarantee the second dword is visible before the valid bit */ 237 /* Guarantee the second dword is visible before the valid bit */
233 eieio(); 238 eieio();
@@ -295,6 +300,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
295 vpn, want_v & HPTE_V_AVPN, slot, newpp); 300 vpn, want_v & HPTE_V_AVPN, slot, newpp);
296 301
297 hpte_v = be64_to_cpu(hptep->v); 302 hpte_v = be64_to_cpu(hptep->v);
303 if (cpu_has_feature(CPU_FTR_ARCH_300))
304 hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
298 /* 305 /*
299 * We need to invalidate the TLB always because hpte_remove doesn't do 306 * We need to invalidate the TLB always because hpte_remove doesn't do
300 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less 307 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -309,6 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
309 native_lock_hpte(hptep); 316 native_lock_hpte(hptep);
310 /* recheck with locks held */ 317 /* recheck with locks held */
311 hpte_v = be64_to_cpu(hptep->v); 318 hpte_v = be64_to_cpu(hptep->v);
319 if (cpu_has_feature(CPU_FTR_ARCH_300))
320 hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
312 if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || 321 if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
313 !(hpte_v & HPTE_V_VALID))) { 322 !(hpte_v & HPTE_V_VALID))) {
314 ret = -1; 323 ret = -1;
@@ -350,6 +359,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
350 for (i = 0; i < HPTES_PER_GROUP; i++) { 359 for (i = 0; i < HPTES_PER_GROUP; i++) {
351 hptep = htab_address + slot; 360 hptep = htab_address + slot;
352 hpte_v = be64_to_cpu(hptep->v); 361 hpte_v = be64_to_cpu(hptep->v);
362 if (cpu_has_feature(CPU_FTR_ARCH_300))
363 hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
353 364
354 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) 365 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
355 /* HPTE matches */ 366 /* HPTE matches */
@@ -409,6 +420,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
409 want_v = hpte_encode_avpn(vpn, bpsize, ssize); 420 want_v = hpte_encode_avpn(vpn, bpsize, ssize);
410 native_lock_hpte(hptep); 421 native_lock_hpte(hptep);
411 hpte_v = be64_to_cpu(hptep->v); 422 hpte_v = be64_to_cpu(hptep->v);
423 if (cpu_has_feature(CPU_FTR_ARCH_300))
424 hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
412 425
413 /* 426 /*
414 * We need to invalidate the TLB always because hpte_remove doesn't do 427 * We need to invalidate the TLB always because hpte_remove doesn't do
@@ -467,6 +480,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
467 want_v = hpte_encode_avpn(vpn, psize, ssize); 480 want_v = hpte_encode_avpn(vpn, psize, ssize);
468 native_lock_hpte(hptep); 481 native_lock_hpte(hptep);
469 hpte_v = be64_to_cpu(hptep->v); 482 hpte_v = be64_to_cpu(hptep->v);
483 if (cpu_has_feature(CPU_FTR_ARCH_300))
484 hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
470 485
471 /* Even if we miss, we need to invalidate the TLB */ 486 /* Even if we miss, we need to invalidate the TLB */
472 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) 487 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
@@ -504,6 +519,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
504 /* Look at the 8 bit LP value */ 519 /* Look at the 8 bit LP value */
505 unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1); 520 unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
506 521
522 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
523 hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
524 hpte_r = hpte_new_to_old_r(hpte_r);
525 }
507 if (!(hpte_v & HPTE_V_LARGE)) { 526 if (!(hpte_v & HPTE_V_LARGE)) {
508 size = MMU_PAGE_4K; 527 size = MMU_PAGE_4K;
509 a_size = MMU_PAGE_4K; 528 a_size = MMU_PAGE_4K;
@@ -512,11 +531,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
512 a_size = hpte_page_sizes[lp] >> 4; 531 a_size = hpte_page_sizes[lp] >> 4;
513 } 532 }
514 /* This works for all page sizes, and for 256M and 1T segments */ 533 /* This works for all page sizes, and for 256M and 1T segments */
515 if (cpu_has_feature(CPU_FTR_ARCH_300)) 534 *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
516 *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
517 else
518 *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
519
520 shift = mmu_psize_defs[size].shift; 535 shift = mmu_psize_defs[size].shift;
521 536
522 avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); 537 avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
@@ -639,6 +654,9 @@ static void native_flush_hash_range(unsigned long number, int local)
639 want_v = hpte_encode_avpn(vpn, psize, ssize); 654 want_v = hpte_encode_avpn(vpn, psize, ssize);
640 native_lock_hpte(hptep); 655 native_lock_hpte(hptep);
641 hpte_v = be64_to_cpu(hptep->v); 656 hpte_v = be64_to_cpu(hptep->v);
657 if (cpu_has_feature(CPU_FTR_ARCH_300))
658 hpte_v = hpte_new_to_old_v(hpte_v,
659 be64_to_cpu(hptep->r));
642 if (!HPTE_V_COMPARE(hpte_v, want_v) || 660 if (!HPTE_V_COMPARE(hpte_v, want_v) ||
643 !(hpte_v & HPTE_V_VALID)) 661 !(hpte_v & HPTE_V_VALID))
644 native_unlock_hpte(hptep); 662 native_unlock_hpte(hptep);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 44d3c3a38e3e..b9a062f5805b 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -792,37 +792,17 @@ static void update_hid_for_hash(void)
792static void __init hash_init_partition_table(phys_addr_t hash_table, 792static void __init hash_init_partition_table(phys_addr_t hash_table,
793 unsigned long htab_size) 793 unsigned long htab_size)
794{ 794{
795 unsigned long ps_field; 795 mmu_partition_table_init();
796 unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
797 796
798 /* 797 /*
799 * slb llp encoding for the page size used in VPM real mode. 798 * PS field (VRMA page size) is not used for LPID 0, hence set to 0.
800 * We can ignore that for lpid 0 799 * For now, UPRT is 0 and we have no segment table.
801 */ 800 */
802 ps_field = 0;
803 htab_size = __ilog2(htab_size) - 18; 801 htab_size = __ilog2(htab_size) - 18;
804 802 mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
805 BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
806 partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
807 MEMBLOCK_ALLOC_ANYWHERE));
808
809 /* Initialize the Partition Table with no entries */
810 memset((void *)partition_tb, 0, patb_size);
811 partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size);
812 /*
813 * FIXME!! This should be done via update_partition table
814 * For now UPRT is 0 for us.
815 */
816 partition_tb->patb1 = 0;
817 pr_info("Partition table %p\n", partition_tb); 803 pr_info("Partition table %p\n", partition_tb);
818 if (cpu_has_feature(CPU_FTR_POWER9_DD1)) 804 if (cpu_has_feature(CPU_FTR_POWER9_DD1))
819 update_hid_for_hash(); 805 update_hid_for_hash();
820 /*
821 * update partition table control register,
822 * 64 K size.
823 */
824 mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
825
826} 806}
827 807
828static void __init htab_initialize(void) 808static void __init htab_initialize(void)
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index ed7bddc456b7..186f1adb04ec 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -177,23 +177,15 @@ redo:
177 177
178static void __init radix_init_partition_table(void) 178static void __init radix_init_partition_table(void)
179{ 179{
180 unsigned long rts_field; 180 unsigned long rts_field, dw0;
181 181
182 mmu_partition_table_init();
182 rts_field = radix__get_tree_size(); 183 rts_field = radix__get_tree_size();
184 dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
185 mmu_partition_table_set_entry(0, dw0, 0);
183 186
184 BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
185 partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
186 partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
187 RADIX_PGD_INDEX_SIZE | PATB_HR);
188 pr_info("Initializing Radix MMU\n"); 187 pr_info("Initializing Radix MMU\n");
189 pr_info("Partition table %p\n", partition_tb); 188 pr_info("Partition table %p\n", partition_tb);
190
191 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
192 /*
193 * update partition table control register,
194 * 64 K size.
195 */
196 mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
197} 189}
198 190
199void __init radix_init_native(void) 191void __init radix_init_native(void)
@@ -378,6 +370,8 @@ void __init radix__early_init_mmu(void)
378 radix_init_partition_table(); 370 radix_init_partition_table();
379 } 371 }
380 372
373 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
374
381 radix_init_pgtable(); 375 radix_init_pgtable();
382} 376}
383 377
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index f5e8d4edb808..8bca7f58afc4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
431 } 431 }
432} 432}
433#endif 433#endif
434
435#ifdef CONFIG_PPC_BOOK3S_64
436void __init mmu_partition_table_init(void)
437{
438 unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
439
440 BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
441 partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
442 MEMBLOCK_ALLOC_ANYWHERE));
443
444 /* Initialize the Partition Table with no entries */
445 memset((void *)partition_tb, 0, patb_size);
446
447 /*
448 * update partition table control register,
449 * 64 K size.
450 */
451 mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
452}
453
454void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
455 unsigned long dw1)
456{
457 partition_tb[lpid].patb0 = cpu_to_be64(dw0);
458 partition_tb[lpid].patb1 = cpu_to_be64(dw1);
459
460 /* Global flush of TLBs and partition table caches for this lpid */
461 asm volatile("ptesync" : : : "memory");
462 asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
463 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
464 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
465}
466EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
467#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 44d2d842cee7..3aa40f1b20f5 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -304,8 +304,11 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
304OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); 304OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
305OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); 305OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
306OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); 306OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
307OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR);
307OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); 308OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
308OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); 309OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
310OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI);
309OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); 311OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
312OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR);
310OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); 313OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
311OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); 314OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 6c9a65b52e63..b3b8930ac52f 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -896,3 +896,5 @@ EXPORT_SYMBOL_GPL(opal_leds_get_ind);
896EXPORT_SYMBOL_GPL(opal_leds_set_ind); 896EXPORT_SYMBOL_GPL(opal_leds_set_ind);
897/* Export this symbol for PowerNV Operator Panel class driver */ 897/* Export this symbol for PowerNV Operator Panel class driver */
898EXPORT_SYMBOL_GPL(opal_write_oppanel_async); 898EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
899/* Export this for KVM */
900EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index cb3c50328de8..cc2b281a3766 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
63 vflags &= ~HPTE_V_SECONDARY; 63 vflags &= ~HPTE_V_SECONDARY;
64 64
65 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; 65 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
66 hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags; 66 hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
67 67
68 spin_lock_irqsave(&ps3_htab_lock, flags); 68 spin_lock_irqsave(&ps3_htab_lock, flags);
69 69
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index aa35245d8d6d..f2c98f6c1c9c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
145 hpte_group, vpn, pa, rflags, vflags, psize); 145 hpte_group, vpn, pa, rflags, vflags, psize);
146 146
147 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; 147 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
148 hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; 148 hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
149 149
150 if (!(vflags & HPTE_V_BOLTED)) 150 if (!(vflags & HPTE_V_BOLTED))
151 pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); 151 pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 274bf343cbd0..913e4d77c0a8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1113,6 +1113,10 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
1113 1113
1114extern bool kvm_rebooting; 1114extern bool kvm_rebooting;
1115 1115
1116extern unsigned int halt_poll_ns;
1117extern unsigned int halt_poll_ns_grow;
1118extern unsigned int halt_poll_ns_shrink;
1119
1116struct kvm_device { 1120struct kvm_device {
1117 struct kvm_device_ops *ops; 1121 struct kvm_device_ops *ops;
1118 struct kvm *kvm; 1122 struct kvm *kvm;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 300ef255d1e0..e9f5ceffd741 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -651,6 +651,9 @@ struct kvm_enable_cap {
651}; 651};
652 652
653/* for KVM_PPC_GET_PVINFO */ 653/* for KVM_PPC_GET_PVINFO */
654
655#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
656
654struct kvm_ppc_pvinfo { 657struct kvm_ppc_pvinfo {
655 /* out */ 658 /* out */
656 __u32 flags; 659 __u32 flags;
@@ -682,8 +685,6 @@ struct kvm_ppc_smmu_info {
682 struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; 685 struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
683}; 686};
684 687
685#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
686
687#define KVMIO 0xAE 688#define KVMIO 0xAE
688 689
689/* machine type bits, to be used as argument to KVM_CREATE_VM */ 690/* machine type bits, to be used as argument to KVM_CREATE_VM */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fbf04c0c898c..9831cdf35436 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,16 +70,19 @@ MODULE_AUTHOR("Qumranet");
70MODULE_LICENSE("GPL"); 70MODULE_LICENSE("GPL");
71 71
72/* Architectures should define their poll value according to the halt latency */ 72/* Architectures should define their poll value according to the halt latency */
73static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; 73unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
74module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); 74module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
75EXPORT_SYMBOL_GPL(halt_poll_ns);
75 76
76/* Default doubles per-vcpu halt_poll_ns. */ 77/* Default doubles per-vcpu halt_poll_ns. */
77static unsigned int halt_poll_ns_grow = 2; 78unsigned int halt_poll_ns_grow = 2;
78module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); 79module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
80EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
79 81
80/* Default resets per-vcpu halt_poll_ns . */ 82/* Default resets per-vcpu halt_poll_ns . */
81static unsigned int halt_poll_ns_shrink; 83unsigned int halt_poll_ns_shrink;
82module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); 84module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
85EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
83 86
84/* 87/*
85 * Ordering of locks: 88 * Ordering of locks: