aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/00-INDEX24
-rw-r--r--Documentation/virtual/kvm/api.txt152
-rw-r--r--Documentation/virtual/kvm/cpuid.txt7
-rw-r--r--Documentation/virtual/kvm/devices/vfio.txt22
-rw-r--r--Documentation/virtual/kvm/locking.txt19
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/arm/include/asm/kvm_arm.h9
-rw-r--r--arch/arm/include/asm/kvm_asm.h2
-rw-r--r--arch/arm/include/asm/kvm_emulate.h51
-rw-r--r--arch/arm/include/asm/kvm_host.h6
-rw-r--r--arch/arm/include/asm/kvm_mmu.h17
-rw-r--r--arch/arm/include/asm/pgtable-3level.h2
-rw-r--r--arch/arm/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm/kvm/Kconfig1
-rw-r--r--arch/arm/kvm/Makefile2
-rw-r--r--arch/arm/kvm/arm.c18
-rw-r--r--arch/arm/kvm/coproc.c120
-rw-r--r--arch/arm/kvm/coproc_a15.c117
-rw-r--r--arch/arm/kvm/coproc_a7.c54
-rw-r--r--arch/arm/kvm/emulate.c2
-rw-r--r--arch/arm/kvm/guest.c24
-rw-r--r--arch/arm/kvm/handle_exit.c20
-rw-r--r--arch/arm/kvm/mmio.c86
-rw-r--r--arch/arm/kvm/mmu.c223
-rw-r--r--arch/arm/kvm/psci.c21
-rw-r--r--arch/arm/kvm/reset.c15
-rw-r--r--arch/arm64/include/asm/kvm_arm.h8
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h61
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h12
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/guest.c20
-rw-r--r--arch/arm64/kvm/handle_exit.c18
-rw-r--r--arch/ia64/include/asm/kvm_host.h6
-rw-r--r--arch/ia64/kvm/kvm-ia64.c5
-rw-r--r--arch/mips/include/asm/kvm_host.h7
-rw-r--r--arch/mips/kvm/kvm_mips.c5
-rw-r--r--arch/powerpc/include/asm/disassemble.h4
-rw-r--r--arch/powerpc/include/asm/exception-64s.h21
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h232
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_32.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h8
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h9
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h7
-rw-r--r--arch/powerpc/include/asm/kvm_host.h57
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h107
-rw-r--r--arch/powerpc/include/asm/paca.h2
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/include/asm/pte-book3e.h2
-rw-r--r--arch/powerpc/include/asm/reg.h15
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h86
-rw-r--r--arch/powerpc/kernel/asm-offsets.c21
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S30
-rw-r--r--arch/powerpc/kernel/idle_power7.S2
-rw-r--r--arch/powerpc/kernel/traps.c2
-rw-r--r--arch/powerpc/kvm/44x.c58
-rw-r--r--arch/powerpc/kvm/44x_emulate.c8
-rw-r--r--arch/powerpc/kvm/44x_tlb.c2
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile29
-rw-r--r--arch/powerpc/kvm/book3s.c257
-rw-r--r--arch/powerpc/kvm/book3s.h34
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c73
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c16
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c181
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c106
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c24
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c1
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c18
-rw-r--r--arch/powerpc/kvm/book3s_exports.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv.c389
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S618
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S32
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c66
-rw-r--r--arch/powerpc/kvm/book3s_pr.c498
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c52
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S32
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c1
-rw-r--r--arch/powerpc/kvm/book3s_segment.S4
-rw-r--r--arch/powerpc/kvm/book3s_xics.c7
-rw-r--r--arch/powerpc/kvm/booke.c337
-rw-r--r--arch/powerpc/kvm/booke.h29
-rw-r--r--arch/powerpc/kvm/e500.c59
-rw-r--r--arch/powerpc/kvm/e500.h2
-rw-r--r--arch/powerpc/kvm/e500_emulate.c34
-rw-r--r--arch/powerpc/kvm/e500_mmu.c4
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c6
-rw-r--r--arch/powerpc/kvm/e500mc.c58
-rw-r--r--arch/powerpc/kvm/emulate.c12
-rw-r--r--arch/powerpc/kvm/powerpc.c171
-rw-r--r--arch/powerpc/kvm/trace.h429
-rw-r--r--arch/powerpc/kvm/trace_booke.h177
-rw-r--r--arch/powerpc/kvm/trace_pr.h297
-rw-r--r--arch/s390/include/asm/kvm_host.h8
-rw-r--r--arch/s390/kvm/diag.c4
-rw-r--r--arch/s390/kvm/gaccess.h21
-rw-r--r--arch/s390/kvm/intercept.c6
-rw-r--r--arch/s390/kvm/interrupt.c3
-rw-r--r--arch/s390/kvm/kvm-s390.c96
-rw-r--r--arch/s390/kvm/kvm-s390.h9
-rw-r--r--arch/s390/kvm/priv.c61
-rw-r--r--arch/x86/include/asm/kvm_emulate.h10
-rw-r--r--arch/x86/include/asm/kvm_host.h23
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h6
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/kvmclock.c1
-rw-r--r--arch/x86/kernel/pvclock.c13
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/cpuid.c115
-rw-r--r--arch/x86/kvm/cpuid.h5
-rw-r--r--arch/x86/kvm/emulate.c130
-rw-r--r--arch/x86/kvm/mmu.c115
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c158
-rw-r--r--arch/x86/kvm/x86.c108
-rw-r--r--arch/x86/kvm/x86.h1
-rw-r--r--include/linux/kvm_host.h42
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/srcu.h14
-rw-r--r--include/trace/events/kvm.h10
-rw-r--r--include/uapi/linux/kvm.h11
-rw-r--r--kernel/hung_task.c11
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/async_pf.c22
-rw-r--r--virt/kvm/iommu.c38
-rw-r--r--virt/kvm/kvm_main.c134
-rw-r--r--virt/kvm/vfio.c264
133 files changed, 5170 insertions, 2239 deletions
diff --git a/Documentation/virtual/kvm/00-INDEX b/Documentation/virtual/kvm/00-INDEX
new file mode 100644
index 000000000000..641ec9220179
--- /dev/null
+++ b/Documentation/virtual/kvm/00-INDEX
@@ -0,0 +1,24 @@
100-INDEX
2 - this file.
3api.txt
4 - KVM userspace API.
5cpuid.txt
6 - KVM-specific cpuid leaves (x86).
7devices/
8 - KVM_CAP_DEVICE_CTRL userspace API.
9hypercalls.txt
10 - KVM hypercalls.
11locking.txt
12 - notes on KVM locks.
13mmu.txt
14 - the x86 kvm shadow mmu.
15msr.txt
16 - KVM-specific MSRs (x86).
17nested-vmx.txt
18 - notes on nested virtualization for Intel x86 processors.
19ppc-pv.txt
20 - the paravirtualization interface on PowerPC.
21review-checklist.txt
22 - review checklist for KVM patches.
23timekeeping.txt
24 - timekeeping virtualization for x86-based architectures.
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 858aecf21db2..a30035dd4c26 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1122,9 +1122,9 @@ struct kvm_cpuid2 {
1122 struct kvm_cpuid_entry2 entries[0]; 1122 struct kvm_cpuid_entry2 entries[0];
1123}; 1123};
1124 1124
1125#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 1125#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
1126#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 1126#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
1127#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 1127#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
1128 1128
1129struct kvm_cpuid_entry2 { 1129struct kvm_cpuid_entry2 {
1130 __u32 function; 1130 __u32 function;
@@ -1810,6 +1810,50 @@ registers, find a list below:
1810 PPC | KVM_REG_PPC_TLB3PS | 32 1810 PPC | KVM_REG_PPC_TLB3PS | 32
1811 PPC | KVM_REG_PPC_EPTCFG | 32 1811 PPC | KVM_REG_PPC_EPTCFG | 32
1812 PPC | KVM_REG_PPC_ICP_STATE | 64 1812 PPC | KVM_REG_PPC_ICP_STATE | 64
1813 PPC | KVM_REG_PPC_TB_OFFSET | 64
1814 PPC | KVM_REG_PPC_SPMC1 | 32
1815 PPC | KVM_REG_PPC_SPMC2 | 32
1816 PPC | KVM_REG_PPC_IAMR | 64
1817 PPC | KVM_REG_PPC_TFHAR | 64
1818 PPC | KVM_REG_PPC_TFIAR | 64
1819 PPC | KVM_REG_PPC_TEXASR | 64
1820 PPC | KVM_REG_PPC_FSCR | 64
1821 PPC | KVM_REG_PPC_PSPB | 32
1822 PPC | KVM_REG_PPC_EBBHR | 64
1823 PPC | KVM_REG_PPC_EBBRR | 64
1824 PPC | KVM_REG_PPC_BESCR | 64
1825 PPC | KVM_REG_PPC_TAR | 64
1826 PPC | KVM_REG_PPC_DPDES | 64
1827 PPC | KVM_REG_PPC_DAWR | 64
1828 PPC | KVM_REG_PPC_DAWRX | 64
1829 PPC | KVM_REG_PPC_CIABR | 64
1830 PPC | KVM_REG_PPC_IC | 64
1831 PPC | KVM_REG_PPC_VTB | 64
1832 PPC | KVM_REG_PPC_CSIGR | 64
1833 PPC | KVM_REG_PPC_TACR | 64
1834 PPC | KVM_REG_PPC_TCSCR | 64
1835 PPC | KVM_REG_PPC_PID | 64
1836 PPC | KVM_REG_PPC_ACOP | 64
1837 PPC | KVM_REG_PPC_VRSAVE | 32
1838 PPC | KVM_REG_PPC_LPCR | 64
1839 PPC | KVM_REG_PPC_PPR | 64
1840 PPC | KVM_REG_PPC_ARCH_COMPAT 32
1841 PPC | KVM_REG_PPC_TM_GPR0 | 64
1842 ...
1843 PPC | KVM_REG_PPC_TM_GPR31 | 64
1844 PPC | KVM_REG_PPC_TM_VSR0 | 128
1845 ...
1846 PPC | KVM_REG_PPC_TM_VSR63 | 128
1847 PPC | KVM_REG_PPC_TM_CR | 64
1848 PPC | KVM_REG_PPC_TM_LR | 64
1849 PPC | KVM_REG_PPC_TM_CTR | 64
1850 PPC | KVM_REG_PPC_TM_FPSCR | 64
1851 PPC | KVM_REG_PPC_TM_AMR | 64
1852 PPC | KVM_REG_PPC_TM_PPR | 64
1853 PPC | KVM_REG_PPC_TM_VRSAVE | 64
1854 PPC | KVM_REG_PPC_TM_VSCR | 32
1855 PPC | KVM_REG_PPC_TM_DSCR | 64
1856 PPC | KVM_REG_PPC_TM_TAR | 64
1813 1857
1814ARM registers are mapped using the lower 32 bits. The upper 16 of that 1858ARM registers are mapped using the lower 32 bits. The upper 16 of that
1815is the register group type, or coprocessor number: 1859is the register group type, or coprocessor number:
@@ -2304,7 +2348,31 @@ Possible features:
2304 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). 2348 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
2305 2349
2306 2350
23074.83 KVM_GET_REG_LIST 23514.83 KVM_ARM_PREFERRED_TARGET
2352
2353Capability: basic
2354Architectures: arm, arm64
2355Type: vm ioctl
2356Parameters: struct struct kvm_vcpu_init (out)
2357Returns: 0 on success; -1 on error
2358Errors:
2359 ENODEV: no preferred target available for the host
2360
2361This queries KVM for preferred CPU target type which can be emulated
2362by KVM on underlying host.
2363
2364The ioctl returns struct kvm_vcpu_init instance containing information
2365about preferred CPU target type and recommended features for it. The
2366kvm_vcpu_init->features bitmap returned will have feature bits set if
2367the preferred target recommends setting these features, but this is
2368not mandatory.
2369
2370The information returned by this ioctl can be used to prepare an instance
2371of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in
2372in VCPU matching underlying host.
2373
2374
23754.84 KVM_GET_REG_LIST
2308 2376
2309Capability: basic 2377Capability: basic
2310Architectures: arm, arm64 2378Architectures: arm, arm64
@@ -2323,8 +2391,7 @@ struct kvm_reg_list {
2323This ioctl returns the guest registers that are supported for the 2391This ioctl returns the guest registers that are supported for the
2324KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 2392KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
2325 2393
2326 23944.85 KVM_ARM_SET_DEVICE_ADDR
23274.84 KVM_ARM_SET_DEVICE_ADDR
2328 2395
2329Capability: KVM_CAP_ARM_SET_DEVICE_ADDR 2396Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
2330Architectures: arm, arm64 2397Architectures: arm, arm64
@@ -2362,7 +2429,7 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
2362KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the 2429KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the
2363base addresses will return -EEXIST. 2430base addresses will return -EEXIST.
2364 2431
23654.85 KVM_PPC_RTAS_DEFINE_TOKEN 24324.86 KVM_PPC_RTAS_DEFINE_TOKEN
2366 2433
2367Capability: KVM_CAP_PPC_RTAS 2434Capability: KVM_CAP_PPC_RTAS
2368Architectures: ppc 2435Architectures: ppc
@@ -2661,6 +2728,77 @@ and usually define the validity of a groups of registers. (e.g. one bit
2661}; 2728};
2662 2729
2663 2730
27314.81 KVM_GET_EMULATED_CPUID
2732
2733Capability: KVM_CAP_EXT_EMUL_CPUID
2734Architectures: x86
2735Type: system ioctl
2736Parameters: struct kvm_cpuid2 (in/out)
2737Returns: 0 on success, -1 on error
2738
2739struct kvm_cpuid2 {
2740 __u32 nent;
2741 __u32 flags;
2742 struct kvm_cpuid_entry2 entries[0];
2743};
2744
2745The member 'flags' is used for passing flags from userspace.
2746
2747#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
2748#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
2749#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
2750
2751struct kvm_cpuid_entry2 {
2752 __u32 function;
2753 __u32 index;
2754 __u32 flags;
2755 __u32 eax;
2756 __u32 ebx;
2757 __u32 ecx;
2758 __u32 edx;
2759 __u32 padding[3];
2760};
2761
2762This ioctl returns x86 cpuid features which are emulated by
2763kvm.Userspace can use the information returned by this ioctl to query
2764which features are emulated by kvm instead of being present natively.
2765
2766Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
2767structure with the 'nent' field indicating the number of entries in
2768the variable-size array 'entries'. If the number of entries is too low
2769to describe the cpu capabilities, an error (E2BIG) is returned. If the
2770number is too high, the 'nent' field is adjusted and an error (ENOMEM)
2771is returned. If the number is just right, the 'nent' field is adjusted
2772to the number of valid entries in the 'entries' array, which is then
2773filled.
2774
2775The entries returned are the set CPUID bits of the respective features
2776which kvm emulates, as returned by the CPUID instruction, with unknown
2777or unsupported feature bits cleared.
2778
2779Features like x2apic, for example, may not be present in the host cpu
2780but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
2781emulated efficiently and thus not included here.
2782
2783The fields in each entry are defined as follows:
2784
2785 function: the eax value used to obtain the entry
2786 index: the ecx value used to obtain the entry (for entries that are
2787 affected by ecx)
2788 flags: an OR of zero or more of the following:
2789 KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
2790 if the index field is valid
2791 KVM_CPUID_FLAG_STATEFUL_FUNC:
2792 if cpuid for this function returns different values for successive
2793 invocations; there will be several entries with the same function,
2794 all with this flag set
2795 KVM_CPUID_FLAG_STATE_READ_NEXT:
2796 for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
2797 the first entry to be read by a cpu
2798 eax, ebx, ecx, edx: the values returned by the cpuid instruction for
2799 this function/index combination
2800
2801
26646. Capabilities that can be enabled 28026. Capabilities that can be enabled
2665----------------------------------- 2803-----------------------------------
2666 2804
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 22ff659bc0fb..3c65feb83010 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -43,6 +43,13 @@ KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
43KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by 43KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
44 || || writing to msr 0x4b564d02 44 || || writing to msr 0x4b564d02
45------------------------------------------------------------------------------ 45------------------------------------------------------------------------------
46KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by
47 || || writing to msr 0x4b564d03.
48------------------------------------------------------------------------------
49KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt
50 || || handler can be enabled by writing
51 || || to msr 0x4b564d04.
52------------------------------------------------------------------------------
46KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit 53KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit
47 || || before enabling paravirtualized 54 || || before enabling paravirtualized
48 || || spinlock support. 55 || || spinlock support.
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
new file mode 100644
index 000000000000..ef51740c67ca
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -0,0 +1,22 @@
1VFIO virtual device
2===================
3
4Device types supported:
5 KVM_DEV_TYPE_VFIO
6
7Only one VFIO instance may be created per VM. The created device
8tracks VFIO groups in use by the VM and features of those groups
9important to the correctness and acceleration of the VM. As groups
10are enabled and disabled for use by the VM, KVM should be updated
11about their presence. When registered with KVM, a reference to the
12VFIO-group is held by KVM.
13
14Groups:
15 KVM_DEV_VFIO_GROUP
16
17KVM_DEV_VFIO_GROUP attributes:
18 KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
19 KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
20
21For each, kvm_device_attr.addr points to an int32_t file descriptor
22for the VFIO group.
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 41b7ac9884b5..f8869410d40c 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -132,10 +132,14 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
132------------ 132------------
133 133
134Name: kvm_lock 134Name: kvm_lock
135Type: raw_spinlock 135Type: spinlock_t
136Arch: any 136Arch: any
137Protects: - vm_list 137Protects: - vm_list
138 - hardware virtualization enable/disable 138
139Name: kvm_count_lock
140Type: raw_spinlock_t
141Arch: any
142Protects: - hardware virtualization enable/disable
139Comment: 'raw' because hardware enabling/disabling must be atomic /wrt 143Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
140 migration. 144 migration.
141 145
@@ -151,3 +155,14 @@ Type: spinlock_t
151Arch: any 155Arch: any
152Protects: -shadow page/shadow tlb entry 156Protects: -shadow page/shadow tlb entry
153Comment: it is a spinlock since it is used in mmu notifier. 157Comment: it is a spinlock since it is used in mmu notifier.
158
159Name: kvm->srcu
160Type: srcu lock
161Arch: any
162Protects: - kvm->memslots
163 - kvm->buses
164Comment: The srcu read lock must be held while accessing memslots (e.g.
165 when using gfn_to_* functions) and while accessing in-kernel
166 MMIO/PIO address->device structure mapping (kvm->buses).
167 The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
168 if it is needed by multiple functions.
diff --git a/MAINTAINERS b/MAINTAINERS
index f3ef1d1f6029..583af4b72ad0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4871,7 +4871,8 @@ KERNEL VIRTUAL MACHINE (KVM)
4871M: Gleb Natapov <gleb@redhat.com> 4871M: Gleb Natapov <gleb@redhat.com>
4872M: Paolo Bonzini <pbonzini@redhat.com> 4872M: Paolo Bonzini <pbonzini@redhat.com>
4873L: kvm@vger.kernel.org 4873L: kvm@vger.kernel.org
4874W: http://linux-kvm.org 4874W: http://www.linux-kvm.org
4875T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
4875S: Supported 4876S: Supported
4876F: Documentation/*/kvm*.txt 4877F: Documentation/*/kvm*.txt
4877F: Documentation/virtual/kvm/ 4878F: Documentation/virtual/kvm/
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 64e96960de29..1d3153c7eb41 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -57,6 +57,7 @@
57 * TSC: Trap SMC 57 * TSC: Trap SMC
58 * TSW: Trap cache operations by set/way 58 * TSW: Trap cache operations by set/way
59 * TWI: Trap WFI 59 * TWI: Trap WFI
60 * TWE: Trap WFE
60 * TIDCP: Trap L2CTLR/L2ECTLR 61 * TIDCP: Trap L2CTLR/L2ECTLR
61 * BSU_IS: Upgrade barriers to the inner shareable domain 62 * BSU_IS: Upgrade barriers to the inner shareable domain
62 * FB: Force broadcast of all maintainance operations 63 * FB: Force broadcast of all maintainance operations
@@ -67,7 +68,7 @@
67 */ 68 */
68#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ 69#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
69 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ 70 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
70 HCR_SWIO | HCR_TIDCP) 71 HCR_TWE | HCR_SWIO | HCR_TIDCP)
71#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) 72#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
72 73
73/* System Control Register (SCTLR) bits */ 74/* System Control Register (SCTLR) bits */
@@ -95,12 +96,12 @@
95#define TTBCR_IRGN1 (3 << 24) 96#define TTBCR_IRGN1 (3 << 24)
96#define TTBCR_EPD1 (1 << 23) 97#define TTBCR_EPD1 (1 << 23)
97#define TTBCR_A1 (1 << 22) 98#define TTBCR_A1 (1 << 22)
98#define TTBCR_T1SZ (3 << 16) 99#define TTBCR_T1SZ (7 << 16)
99#define TTBCR_SH0 (3 << 12) 100#define TTBCR_SH0 (3 << 12)
100#define TTBCR_ORGN0 (3 << 10) 101#define TTBCR_ORGN0 (3 << 10)
101#define TTBCR_IRGN0 (3 << 8) 102#define TTBCR_IRGN0 (3 << 8)
102#define TTBCR_EPD0 (1 << 7) 103#define TTBCR_EPD0 (1 << 7)
103#define TTBCR_T0SZ 3 104#define TTBCR_T0SZ (7 << 0)
104#define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) 105#define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
105 106
106/* Hyp System Trap Register */ 107/* Hyp System Trap Register */
@@ -208,6 +209,8 @@
208#define HSR_EC_DABT (0x24) 209#define HSR_EC_DABT (0x24)
209#define HSR_EC_DABT_HYP (0x25) 210#define HSR_EC_DABT_HYP (0x25)
210 211
212#define HSR_WFI_IS_WFE (1U << 0)
213
211#define HSR_HVC_IMM_MASK ((1UL << 16) - 1) 214#define HSR_HVC_IMM_MASK ((1UL << 16) - 1)
212 215
213#define HSR_DABT_S1PTW (1U << 7) 216#define HSR_DABT_S1PTW (1U << 7)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index a2f43ddcc300..661da11f76f4 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -39,7 +39,7 @@
39#define c6_IFAR 17 /* Instruction Fault Address Register */ 39#define c6_IFAR 17 /* Instruction Fault Address Register */
40#define c7_PAR 18 /* Physical Address Register */ 40#define c7_PAR 18 /* Physical Address Register */
41#define c7_PAR_high 19 /* PAR top 32 bits */ 41#define c7_PAR_high 19 /* PAR top 32 bits */
42#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ 42#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */
43#define c10_PRRR 21 /* Primary Region Remap Register */ 43#define c10_PRRR 21 /* Primary Region Remap Register */
44#define c10_NMRR 22 /* Normal Memory Remap Register */ 44#define c10_NMRR 22 /* Normal Memory Remap Register */
45#define c12_VBAR 23 /* Vector Base Address Register */ 45#define c12_VBAR 23 /* Vector Base Address Register */
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index a464e8d7b6c5..0fa90c962ac8 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -157,4 +157,55 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
157 return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; 157 return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
158} 158}
159 159
160static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
161{
162 return vcpu->arch.cp15[c0_MPIDR];
163}
164
165static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
166{
167 *vcpu_cpsr(vcpu) |= PSR_E_BIT;
168}
169
170static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
171{
172 return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT);
173}
174
175static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
176 unsigned long data,
177 unsigned int len)
178{
179 if (kvm_vcpu_is_be(vcpu)) {
180 switch (len) {
181 case 1:
182 return data & 0xff;
183 case 2:
184 return be16_to_cpu(data & 0xffff);
185 default:
186 return be32_to_cpu(data);
187 }
188 }
189
190 return data; /* Leave LE untouched */
191}
192
193static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
194 unsigned long data,
195 unsigned int len)
196{
197 if (kvm_vcpu_is_be(vcpu)) {
198 switch (len) {
199 case 1:
200 return data & 0xff;
201 case 2:
202 return cpu_to_be16(data & 0xffff);
203 default:
204 return cpu_to_be32(data);
205 }
206 }
207
208 return data; /* Leave LE untouched */
209}
210
160#endif /* __ARM_KVM_EMULATE_H__ */ 211#endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7d22517d8071..8a6f6db14ee4 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -38,11 +38,6 @@
38 38
39#define KVM_VCPU_MAX_FEATURES 1 39#define KVM_VCPU_MAX_FEATURES 1
40 40
41/* We don't currently support large pages. */
42#define KVM_HPAGE_GFN_SHIFT(x) 0
43#define KVM_NR_PAGE_SIZES 1
44#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
45
46#include <kvm/arm_vgic.h> 41#include <kvm/arm_vgic.h>
47 42
48struct kvm_vcpu; 43struct kvm_vcpu;
@@ -154,6 +149,7 @@ struct kvm_vcpu_stat {
154struct kvm_vcpu_init; 149struct kvm_vcpu_init;
155int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 150int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
156 const struct kvm_vcpu_init *init); 151 const struct kvm_vcpu_init *init);
152int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
157unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 153unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
158int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 154int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
159struct kvm_one_reg; 155struct kvm_one_reg;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 9b28c41f4ba9..77de4a41cc50 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -62,6 +62,12 @@ phys_addr_t kvm_get_idmap_vector(void);
62int kvm_mmu_init(void); 62int kvm_mmu_init(void);
63void kvm_clear_hyp_idmap(void); 63void kvm_clear_hyp_idmap(void);
64 64
65static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
66{
67 *pmd = new_pmd;
68 flush_pmd_entry(pmd);
69}
70
65static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) 71static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
66{ 72{
67 *pte = new_pte; 73 *pte = new_pte;
@@ -103,9 +109,15 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
103 pte_val(*pte) |= L_PTE_S2_RDWR; 109 pte_val(*pte) |= L_PTE_S2_RDWR;
104} 110}
105 111
112static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
113{
114 pmd_val(*pmd) |= L_PMD_S2_RDWR;
115}
116
106struct kvm; 117struct kvm;
107 118
108static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) 119static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
120 unsigned long size)
109{ 121{
110 /* 122 /*
111 * If we are going to insert an instruction page and the icache is 123 * If we are going to insert an instruction page and the icache is
@@ -120,8 +132,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
120 * need any kind of flushing (DDI 0406C.b - Page B3-1392). 132 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
121 */ 133 */
122 if (icache_is_pipt()) { 134 if (icache_is_pipt()) {
123 unsigned long hva = gfn_to_hva(kvm, gfn); 135 __cpuc_coherent_user_range(hva, hva + size);
124 __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
125 } else if (!icache_is_vivt_asid_tagged()) { 136 } else if (!icache_is_vivt_asid_tagged()) {
126 /* any kind of VIPT cache */ 137 /* any kind of VIPT cache */
127 __flush_icache_all(); 138 __flush_icache_all();
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 39c54cfa03e9..4f9503908dca 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -126,6 +126,8 @@
126#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ 126#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */
127#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 127#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
128 128
129#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
130
129/* 131/*
130 * Hyp-mode PL2 PTE definitions for LPAE. 132 * Hyp-mode PL2 PTE definitions for LPAE.
131 */ 133 */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c1ee007523d7..c498b60c0505 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -63,7 +63,8 @@ struct kvm_regs {
63 63
64/* Supported Processor Types */ 64/* Supported Processor Types */
65#define KVM_ARM_TARGET_CORTEX_A15 0 65#define KVM_ARM_TARGET_CORTEX_A15 0
66#define KVM_ARM_NUM_TARGETS 1 66#define KVM_ARM_TARGET_CORTEX_A7 1
67#define KVM_ARM_NUM_TARGETS 2
67 68
68/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ 69/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
69#define KVM_ARM_DEVICE_TYPE_SHIFT 0 70#define KVM_ARM_DEVICE_TYPE_SHIFT 0
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index ebf5015508b5..466bd299b1a8 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
20 bool "Kernel-based Virtual Machine (KVM) support" 20 bool "Kernel-based Virtual Machine (KVM) support"
21 select PREEMPT_NOTIFIERS 21 select PREEMPT_NOTIFIERS
22 select ANON_INODES 22 select ANON_INODES
23 select HAVE_KVM_CPU_RELAX_INTERCEPT
23 select KVM_MMIO 24 select KVM_MMIO
24 select KVM_ARM_HOST 25 select KVM_ARM_HOST
25 depends on ARM_VIRT_EXT && ARM_LPAE 26 depends on ARM_VIRT_EXT && ARM_LPAE
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index d99bee4950e5..789bca9e64a7 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
19 19
20obj-y += kvm-arm.o init.o interrupts.o 20obj-y += kvm-arm.o init.o interrupts.o
21obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o 21obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
22obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o 22obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
23obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o 23obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
24obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o 24obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index aea7ccb8d397..2a700e00528d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
152 return VM_FAULT_SIGBUS; 152 return VM_FAULT_SIGBUS;
153} 153}
154 154
155void kvm_arch_free_memslot(struct kvm_memory_slot *free, 155void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
156 struct kvm_memory_slot *dont) 156 struct kvm_memory_slot *dont)
157{ 157{
158} 158}
159 159
160int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 160int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
161 unsigned long npages)
161{ 162{
162 return 0; 163 return 0;
163} 164}
@@ -797,6 +798,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
797 return -EFAULT; 798 return -EFAULT;
798 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); 799 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
799 } 800 }
801 case KVM_ARM_PREFERRED_TARGET: {
802 int err;
803 struct kvm_vcpu_init init;
804
805 err = kvm_vcpu_preferred_target(&init);
806 if (err)
807 return err;
808
809 if (copy_to_user(argp, &init, sizeof(init)))
810 return -EFAULT;
811
812 return 0;
813 }
800 default: 814 default:
801 return -EINVAL; 815 return -EINVAL;
802 } 816 }
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index db9cf692d4dd..78c0885d6501 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -71,6 +71,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
71 return 1; 71 return 1;
72} 72}
73 73
74static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
75{
76 /*
77 * Compute guest MPIDR. We build a virtual cluster out of the
78 * vcpu_id, but we read the 'U' bit from the underlying
79 * hardware directly.
80 */
81 vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
82 ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
83 (vcpu->vcpu_id & 3));
84}
85
86/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
87static bool access_actlr(struct kvm_vcpu *vcpu,
88 const struct coproc_params *p,
89 const struct coproc_reg *r)
90{
91 if (p->is_write)
92 return ignore_write(vcpu, p);
93
94 *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
95 return true;
96}
97
98/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
99static bool access_cbar(struct kvm_vcpu *vcpu,
100 const struct coproc_params *p,
101 const struct coproc_reg *r)
102{
103 if (p->is_write)
104 return write_to_read_only(vcpu, p);
105 return read_zero(vcpu, p);
106}
107
108/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
109static bool access_l2ctlr(struct kvm_vcpu *vcpu,
110 const struct coproc_params *p,
111 const struct coproc_reg *r)
112{
113 if (p->is_write)
114 return ignore_write(vcpu, p);
115
116 *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
117 return true;
118}
119
120static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
121{
122 u32 l2ctlr, ncores;
123
124 asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
125 l2ctlr &= ~(3 << 24);
126 ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
127 /* How many cores in the current cluster and the next ones */
128 ncores -= (vcpu->vcpu_id & ~3);
129 /* Cap it to the maximum number of cores in a single cluster */
130 ncores = min(ncores, 3U);
131 l2ctlr |= (ncores & 3) << 24;
132
133 vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
134}
135
136static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
137{
138 u32 actlr;
139
140 /* ACTLR contains SMP bit: make sure you create all cpus first! */
141 asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
142 /* Make the SMP bit consistent with the guest configuration */
143 if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
144 actlr |= 1U << 6;
145 else
146 actlr &= ~(1U << 6);
147
148 vcpu->arch.cp15[c1_ACTLR] = actlr;
149}
150
151/*
152 * TRM entries: A7:4.3.50, A15:4.3.49
153 * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
154 */
155static bool access_l2ectlr(struct kvm_vcpu *vcpu,
156 const struct coproc_params *p,
157 const struct coproc_reg *r)
158{
159 if (p->is_write)
160 return ignore_write(vcpu, p);
161
162 *vcpu_reg(vcpu, p->Rt1) = 0;
163 return true;
164}
165
74/* See note at ARM ARM B1.14.4 */ 166/* See note at ARM ARM B1.14.4 */
75static bool access_dcsw(struct kvm_vcpu *vcpu, 167static bool access_dcsw(struct kvm_vcpu *vcpu,
76 const struct coproc_params *p, 168 const struct coproc_params *p,
@@ -153,10 +245,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
153 * registers preceding 32-bit ones. 245 * registers preceding 32-bit ones.
154 */ 246 */
155static const struct coproc_reg cp15_regs[] = { 247static const struct coproc_reg cp15_regs[] = {
248 /* MPIDR: we use VMPIDR for guest access. */
249 { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
250 NULL, reset_mpidr, c0_MPIDR },
251
156 /* CSSELR: swapped by interrupt.S. */ 252 /* CSSELR: swapped by interrupt.S. */
157 { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32, 253 { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
158 NULL, reset_unknown, c0_CSSELR }, 254 NULL, reset_unknown, c0_CSSELR },
159 255
256 /* ACTLR: trapped by HCR.TAC bit. */
257 { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
258 access_actlr, reset_actlr, c1_ACTLR },
259
260 /* CPACR: swapped by interrupt.S. */
261 { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
262 NULL, reset_val, c1_CPACR, 0x00000000 },
263
160 /* TTBR0/TTBR1: swapped by interrupt.S. */ 264 /* TTBR0/TTBR1: swapped by interrupt.S. */
161 { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, 265 { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
162 { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, 266 { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
@@ -195,6 +299,13 @@ static const struct coproc_reg cp15_regs[] = {
195 { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw}, 299 { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
196 { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw}, 300 { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
197 /* 301 /*
302 * L2CTLR access (guest wants to know #CPUs).
303 */
304 { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
305 access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
306 { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
307
308 /*
198 * Dummy performance monitor implementation. 309 * Dummy performance monitor implementation.
199 */ 310 */
200 { CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr}, 311 { CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
@@ -234,6 +345,9 @@ static const struct coproc_reg cp15_regs[] = {
234 /* CNTKCTL: swapped by interrupt.S. */ 345 /* CNTKCTL: swapped by interrupt.S. */
235 { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, 346 { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
236 NULL, reset_val, c14_CNTKCTL, 0x00000000 }, 347 NULL, reset_val, c14_CNTKCTL, 0x00000000 },
348
349 /* The Configuration Base Address Register. */
350 { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
237}; 351};
238 352
239/* Target specific emulation tables */ 353/* Target specific emulation tables */
@@ -241,6 +355,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
241 355
242void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) 356void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
243{ 357{
358 unsigned int i;
359
360 for (i = 1; i < table->num; i++)
361 BUG_ON(cmp_reg(&table->table[i-1],
362 &table->table[i]) >= 0);
363
244 target_tables[table->target] = table; 364 target_tables[table->target] = table;
245} 365}
246 366
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index cf93472b9dd6..bb0cac1410cc 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -17,101 +17,12 @@
17 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 */ 18 */
19#include <linux/kvm_host.h> 19#include <linux/kvm_host.h>
20#include <asm/cputype.h>
21#include <asm/kvm_arm.h>
22#include <asm/kvm_host.h>
23#include <asm/kvm_emulate.h>
24#include <asm/kvm_coproc.h> 20#include <asm/kvm_coproc.h>
21#include <asm/kvm_emulate.h>
25#include <linux/init.h> 22#include <linux/init.h>
26 23
27static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
28{
29 /*
30 * Compute guest MPIDR:
31 * (Even if we present only one VCPU to the guest on an SMP
32 * host we don't set the U bit in the MPIDR, or vice versa, as
33 * revealing the underlying hardware properties is likely to
34 * be the best choice).
35 */
36 vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
37 | (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
38}
39
40#include "coproc.h" 24#include "coproc.h"
41 25
42/* A15 TRM 4.3.28: RO WI */
43static bool access_actlr(struct kvm_vcpu *vcpu,
44 const struct coproc_params *p,
45 const struct coproc_reg *r)
46{
47 if (p->is_write)
48 return ignore_write(vcpu, p);
49
50 *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
51 return true;
52}
53
54/* A15 TRM 4.3.60: R/O. */
55static bool access_cbar(struct kvm_vcpu *vcpu,
56 const struct coproc_params *p,
57 const struct coproc_reg *r)
58{
59 if (p->is_write)
60 return write_to_read_only(vcpu, p);
61 return read_zero(vcpu, p);
62}
63
64/* A15 TRM 4.3.48: R/O WI. */
65static bool access_l2ctlr(struct kvm_vcpu *vcpu,
66 const struct coproc_params *p,
67 const struct coproc_reg *r)
68{
69 if (p->is_write)
70 return ignore_write(vcpu, p);
71
72 *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
73 return true;
74}
75
76static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
77{
78 u32 l2ctlr, ncores;
79
80 asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
81 l2ctlr &= ~(3 << 24);
82 ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
83 l2ctlr |= (ncores & 3) << 24;
84
85 vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
86}
87
88static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
89{
90 u32 actlr;
91
92 /* ACTLR contains SMP bit: make sure you create all cpus first! */
93 asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
94 /* Make the SMP bit consistent with the guest configuration */
95 if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
96 actlr |= 1U << 6;
97 else
98 actlr &= ~(1U << 6);
99
100 vcpu->arch.cp15[c1_ACTLR] = actlr;
101}
102
103/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
104static bool access_l2ectlr(struct kvm_vcpu *vcpu,
105 const struct coproc_params *p,
106 const struct coproc_reg *r)
107{
108 if (p->is_write)
109 return ignore_write(vcpu, p);
110
111 *vcpu_reg(vcpu, p->Rt1) = 0;
112 return true;
113}
114
115/* 26/*
116 * A15-specific CP15 registers. 27 * A15-specific CP15 registers.
117 * CRn denotes the primary register number, but is copied to the CRm in the 28 * CRn denotes the primary register number, but is copied to the CRm in the
@@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
121 * registers preceding 32-bit ones. 32 * registers preceding 32-bit ones.
122 */ 33 */
123static const struct coproc_reg a15_regs[] = { 34static const struct coproc_reg a15_regs[] = {
124 /* MPIDR: we use VMPIDR for guest access. */
125 { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
126 NULL, reset_mpidr, c0_MPIDR },
127
128 /* SCTLR: swapped by interrupt.S. */ 35 /* SCTLR: swapped by interrupt.S. */
129 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 36 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
130 NULL, reset_val, c1_SCTLR, 0x00C50078 }, 37 NULL, reset_val, c1_SCTLR, 0x00C50078 },
131 /* ACTLR: trapped by HCR.TAC bit. */
132 { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
133 access_actlr, reset_actlr, c1_ACTLR },
134 /* CPACR: swapped by interrupt.S. */
135 { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
136 NULL, reset_val, c1_CPACR, 0x00000000 },
137
138 /*
139 * L2CTLR access (guest wants to know #CPUs).
140 */
141 { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
142 access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
143 { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
144
145 /* The Configuration Base Address Register. */
146 { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
147}; 38};
148 39
149static struct kvm_coproc_target_table a15_target_table = { 40static struct kvm_coproc_target_table a15_target_table = {
@@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
154 45
155static int __init coproc_a15_init(void) 46static int __init coproc_a15_init(void)
156{ 47{
157 unsigned int i;
158
159 for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
160 BUG_ON(cmp_reg(&a15_regs[i-1],
161 &a15_regs[i]) >= 0);
162
163 kvm_register_target_coproc_table(&a15_target_table); 48 kvm_register_target_coproc_table(&a15_target_table);
164 return 0; 49 return 0;
165} 50}
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644
index 000000000000..1df767331588
--- /dev/null
+++ b/arch/arm/kvm/coproc_a7.c
@@ -0,0 +1,54 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Copyright (C) 2013 - ARM Ltd
4 *
5 * Authors: Rusty Russell <rusty@rustcorp.au>
6 * Christoffer Dall <c.dall@virtualopensystems.com>
7 * Jonathan Austin <jonathan.austin@arm.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License, version 2, as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 */
22#include <linux/kvm_host.h>
23#include <asm/kvm_coproc.h>
24#include <asm/kvm_emulate.h>
25#include <linux/init.h>
26
27#include "coproc.h"
28
29/*
30 * Cortex-A7 specific CP15 registers.
31 * CRn denotes the primary register number, but is copied to the CRm in the
32 * user space API for 64-bit register access in line with the terminology used
33 * in the ARM ARM.
34 * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
35 * registers preceding 32-bit ones.
36 */
37static const struct coproc_reg a7_regs[] = {
38 /* SCTLR: swapped by interrupt.S. */
39 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
40 NULL, reset_val, c1_SCTLR, 0x00C50878 },
41};
42
43static struct kvm_coproc_target_table a7_target_table = {
44 .target = KVM_ARM_TARGET_CORTEX_A7,
45 .table = a7_regs,
46 .num = ARRAY_SIZE(a7_regs),
47};
48
49static int __init coproc_a7_init(void)
50{
51 kvm_register_target_coproc_table(&a7_target_table);
52 return 0;
53}
54late_initcall(coproc_a7_init);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index bdede9e7da51..d6c005283678 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
354 *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; 354 *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
355 355
356 if (is_pabt) { 356 if (is_pabt) {
357 /* Set DFAR and DFSR */ 357 /* Set IFAR and IFSR */
358 vcpu->arch.cp15[c6_IFAR] = addr; 358 vcpu->arch.cp15[c6_IFAR] = addr;
359 is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); 359 is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
360 /* Always give debug fault for now - should give guest a clue */ 360 /* Always give debug fault for now - should give guest a clue */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 152d03612181..20f8d97904af 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void)
190 return -EINVAL; 190 return -EINVAL;
191 191
192 switch (part_number) { 192 switch (part_number) {
193 case ARM_CPU_PART_CORTEX_A7:
194 return KVM_ARM_TARGET_CORTEX_A7;
193 case ARM_CPU_PART_CORTEX_A15: 195 case ARM_CPU_PART_CORTEX_A15:
194 return KVM_ARM_TARGET_CORTEX_A15; 196 return KVM_ARM_TARGET_CORTEX_A15;
195 default: 197 default:
@@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
202{ 204{
203 unsigned int i; 205 unsigned int i;
204 206
205 /* We can only do a cortex A15 for now. */ 207 /* We can only cope with guest==host and only on A15/A7 (for now). */
206 if (init->target != kvm_target_cpu()) 208 if (init->target != kvm_target_cpu())
207 return -EINVAL; 209 return -EINVAL;
208 210
@@ -222,6 +224,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
222 return kvm_reset_vcpu(vcpu); 224 return kvm_reset_vcpu(vcpu);
223} 225}
224 226
227int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
228{
229 int target = kvm_target_cpu();
230
231 if (target < 0)
232 return -ENODEV;
233
234 memset(init, 0, sizeof(*init));
235
236 /*
237 * For now, we don't return any features.
238 * In future, we might use features to return target
239 * specific features available for the preferred
240 * target type.
241 */
242 init->target = (__u32)target;
243
244 return 0;
245}
246
225int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 247int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
226{ 248{
227 return -EINVAL; 249 return -EINVAL;
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index df4c82d47ad7..a92079011a83 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -73,23 +73,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
73} 73}
74 74
75/** 75/**
76 * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest 76 * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests
77 * @vcpu: the vcpu pointer 77 * @vcpu: the vcpu pointer
78 * @run: the kvm_run structure pointer 78 * @run: the kvm_run structure pointer
79 * 79 *
80 * Simply sets the wait_for_interrupts flag on the vcpu structure, which will 80 * WFE: Yield the CPU and come back to this vcpu when the scheduler
81 * halt execution of world-switches and schedule other host processes until 81 * decides to.
82 * there is an incoming IRQ or FIQ to the VM. 82 * WFI: Simply call kvm_vcpu_block(), which will halt execution of
83 * world-switches and schedule other host processes until there is an
84 * incoming IRQ or FIQ to the VM.
83 */ 85 */
84static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) 86static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
85{ 87{
86 trace_kvm_wfi(*vcpu_pc(vcpu)); 88 trace_kvm_wfi(*vcpu_pc(vcpu));
87 kvm_vcpu_block(vcpu); 89 if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
90 kvm_vcpu_on_spin(vcpu);
91 else
92 kvm_vcpu_block(vcpu);
93
88 return 1; 94 return 1;
89} 95}
90 96
91static exit_handle_fn arm_exit_handlers[] = { 97static exit_handle_fn arm_exit_handlers[] = {
92 [HSR_EC_WFI] = kvm_handle_wfi, 98 [HSR_EC_WFI] = kvm_handle_wfx,
93 [HSR_EC_CP15_32] = kvm_handle_cp15_32, 99 [HSR_EC_CP15_32] = kvm_handle_cp15_32,
94 [HSR_EC_CP15_64] = kvm_handle_cp15_64, 100 [HSR_EC_CP15_64] = kvm_handle_cp15_64,
95 [HSR_EC_CP14_MR] = kvm_handle_cp14_access, 101 [HSR_EC_CP14_MR] = kvm_handle_cp14_access,
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 0c25d9487d53..4cb5a93182e9 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -23,6 +23,68 @@
23 23
24#include "trace.h" 24#include "trace.h"
25 25
26static void mmio_write_buf(char *buf, unsigned int len, unsigned long data)
27{
28 void *datap = NULL;
29 union {
30 u8 byte;
31 u16 hword;
32 u32 word;
33 u64 dword;
34 } tmp;
35
36 switch (len) {
37 case 1:
38 tmp.byte = data;
39 datap = &tmp.byte;
40 break;
41 case 2:
42 tmp.hword = data;
43 datap = &tmp.hword;
44 break;
45 case 4:
46 tmp.word = data;
47 datap = &tmp.word;
48 break;
49 case 8:
50 tmp.dword = data;
51 datap = &tmp.dword;
52 break;
53 }
54
55 memcpy(buf, datap, len);
56}
57
58static unsigned long mmio_read_buf(char *buf, unsigned int len)
59{
60 unsigned long data = 0;
61 union {
62 u16 hword;
63 u32 word;
64 u64 dword;
65 } tmp;
66
67 switch (len) {
68 case 1:
69 data = buf[0];
70 break;
71 case 2:
72 memcpy(&tmp.hword, buf, len);
73 data = tmp.hword;
74 break;
75 case 4:
76 memcpy(&tmp.word, buf, len);
77 data = tmp.word;
78 break;
79 case 8:
80 memcpy(&tmp.dword, buf, len);
81 data = tmp.dword;
82 break;
83 }
84
85 return data;
86}
87
26/** 88/**
27 * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation 89 * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
28 * @vcpu: The VCPU pointer 90 * @vcpu: The VCPU pointer
@@ -33,28 +95,27 @@
33 */ 95 */
34int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) 96int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
35{ 97{
36 unsigned long *dest; 98 unsigned long data;
37 unsigned int len; 99 unsigned int len;
38 int mask; 100 int mask;
39 101
40 if (!run->mmio.is_write) { 102 if (!run->mmio.is_write) {
41 dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
42 *dest = 0;
43
44 len = run->mmio.len; 103 len = run->mmio.len;
45 if (len > sizeof(unsigned long)) 104 if (len > sizeof(unsigned long))
46 return -EINVAL; 105 return -EINVAL;
47 106
48 memcpy(dest, run->mmio.data, len); 107 data = mmio_read_buf(run->mmio.data, len);
49
50 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
51 *((u64 *)run->mmio.data));
52 108
53 if (vcpu->arch.mmio_decode.sign_extend && 109 if (vcpu->arch.mmio_decode.sign_extend &&
54 len < sizeof(unsigned long)) { 110 len < sizeof(unsigned long)) {
55 mask = 1U << ((len * 8) - 1); 111 mask = 1U << ((len * 8) - 1);
56 *dest = (*dest ^ mask) - mask; 112 data = (data ^ mask) - mask;
57 } 113 }
114
115 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
116 data);
117 data = vcpu_data_host_to_guest(vcpu, data, len);
118 *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
58 } 119 }
59 120
60 return 0; 121 return 0;
@@ -105,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
105 phys_addr_t fault_ipa) 166 phys_addr_t fault_ipa)
106{ 167{
107 struct kvm_exit_mmio mmio; 168 struct kvm_exit_mmio mmio;
169 unsigned long data;
108 unsigned long rt; 170 unsigned long rt;
109 int ret; 171 int ret;
110 172
@@ -125,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
125 } 187 }
126 188
127 rt = vcpu->arch.mmio_decode.rt; 189 rt = vcpu->arch.mmio_decode.rt;
190 data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
191
128 trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : 192 trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
129 KVM_TRACE_MMIO_READ_UNSATISFIED, 193 KVM_TRACE_MMIO_READ_UNSATISFIED,
130 mmio.len, fault_ipa, 194 mmio.len, fault_ipa,
131 (mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0); 195 (mmio.is_write) ? data : 0);
132 196
133 if (mmio.is_write) 197 if (mmio.is_write)
134 memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len); 198 mmio_write_buf(mmio.data, mmio.len, data);
135 199
136 if (vgic_handle_mmio(vcpu, run, &mmio)) 200 if (vgic_handle_mmio(vcpu, run, &mmio))
137 return 1; 201 return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index b0de86b56c13..371958370de4 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
19#include <linux/mman.h> 19#include <linux/mman.h>
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21#include <linux/io.h> 21#include <linux/io.h>
22#include <linux/hugetlb.h>
22#include <trace/events/kvm.h> 23#include <trace/events/kvm.h>
23#include <asm/pgalloc.h> 24#include <asm/pgalloc.h>
24#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
@@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start;
41static unsigned long hyp_idmap_end; 42static unsigned long hyp_idmap_end;
42static phys_addr_t hyp_idmap_vector; 43static phys_addr_t hyp_idmap_vector;
43 44
45#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
46
44static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 47static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
45{ 48{
46 /* 49 /*
@@ -93,19 +96,29 @@ static bool page_empty(void *ptr)
93 96
94static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 97static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
95{ 98{
96 pmd_t *pmd_table = pmd_offset(pud, 0); 99 if (pud_huge(*pud)) {
97 pud_clear(pud); 100 pud_clear(pud);
98 kvm_tlb_flush_vmid_ipa(kvm, addr); 101 kvm_tlb_flush_vmid_ipa(kvm, addr);
99 pmd_free(NULL, pmd_table); 102 } else {
103 pmd_t *pmd_table = pmd_offset(pud, 0);
104 pud_clear(pud);
105 kvm_tlb_flush_vmid_ipa(kvm, addr);
106 pmd_free(NULL, pmd_table);
107 }
100 put_page(virt_to_page(pud)); 108 put_page(virt_to_page(pud));
101} 109}
102 110
103static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 111static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
104{ 112{
105 pte_t *pte_table = pte_offset_kernel(pmd, 0); 113 if (kvm_pmd_huge(*pmd)) {
106 pmd_clear(pmd); 114 pmd_clear(pmd);
107 kvm_tlb_flush_vmid_ipa(kvm, addr); 115 kvm_tlb_flush_vmid_ipa(kvm, addr);
108 pte_free_kernel(NULL, pte_table); 116 } else {
117 pte_t *pte_table = pte_offset_kernel(pmd, 0);
118 pmd_clear(pmd);
119 kvm_tlb_flush_vmid_ipa(kvm, addr);
120 pte_free_kernel(NULL, pte_table);
121 }
109 put_page(virt_to_page(pmd)); 122 put_page(virt_to_page(pmd));
110} 123}
111 124
@@ -136,18 +149,32 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
136 continue; 149 continue;
137 } 150 }
138 151
152 if (pud_huge(*pud)) {
153 /*
154 * If we are dealing with a huge pud, just clear it and
155 * move on.
156 */
157 clear_pud_entry(kvm, pud, addr);
158 addr = pud_addr_end(addr, end);
159 continue;
160 }
161
139 pmd = pmd_offset(pud, addr); 162 pmd = pmd_offset(pud, addr);
140 if (pmd_none(*pmd)) { 163 if (pmd_none(*pmd)) {
141 addr = pmd_addr_end(addr, end); 164 addr = pmd_addr_end(addr, end);
142 continue; 165 continue;
143 } 166 }
144 167
145 pte = pte_offset_kernel(pmd, addr); 168 if (!kvm_pmd_huge(*pmd)) {
146 clear_pte_entry(kvm, pte, addr); 169 pte = pte_offset_kernel(pmd, addr);
147 next = addr + PAGE_SIZE; 170 clear_pte_entry(kvm, pte, addr);
171 next = addr + PAGE_SIZE;
172 }
148 173
149 /* If we emptied the pte, walk back up the ladder */ 174 /*
150 if (page_empty(pte)) { 175 * If the pmd entry is to be cleared, walk back up the ladder
176 */
177 if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
151 clear_pmd_entry(kvm, pmd, addr); 178 clear_pmd_entry(kvm, pmd, addr);
152 next = pmd_addr_end(addr, end); 179 next = pmd_addr_end(addr, end);
153 if (page_empty(pmd) && !page_empty(pud)) { 180 if (page_empty(pmd) && !page_empty(pud)) {
@@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
420 kvm->arch.pgd = NULL; 447 kvm->arch.pgd = NULL;
421} 448}
422 449
423 450static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
424static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 451 phys_addr_t addr)
425 phys_addr_t addr, const pte_t *new_pte, bool iomap)
426{ 452{
427 pgd_t *pgd; 453 pgd_t *pgd;
428 pud_t *pud; 454 pud_t *pud;
429 pmd_t *pmd; 455 pmd_t *pmd;
430 pte_t *pte, old_pte;
431 456
432 /* Create 2nd stage page table mapping - Level 1 */
433 pgd = kvm->arch.pgd + pgd_index(addr); 457 pgd = kvm->arch.pgd + pgd_index(addr);
434 pud = pud_offset(pgd, addr); 458 pud = pud_offset(pgd, addr);
435 if (pud_none(*pud)) { 459 if (pud_none(*pud)) {
436 if (!cache) 460 if (!cache)
437 return 0; /* ignore calls from kvm_set_spte_hva */ 461 return NULL;
438 pmd = mmu_memory_cache_alloc(cache); 462 pmd = mmu_memory_cache_alloc(cache);
439 pud_populate(NULL, pud, pmd); 463 pud_populate(NULL, pud, pmd);
440 get_page(virt_to_page(pud)); 464 get_page(virt_to_page(pud));
441 } 465 }
442 466
443 pmd = pmd_offset(pud, addr); 467 return pmd_offset(pud, addr);
468}
469
470static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
471 *cache, phys_addr_t addr, const pmd_t *new_pmd)
472{
473 pmd_t *pmd, old_pmd;
474
475 pmd = stage2_get_pmd(kvm, cache, addr);
476 VM_BUG_ON(!pmd);
477
478 /*
479 * Mapping in huge pages should only happen through a fault. If a
480 * page is merged into a transparent huge page, the individual
481 * subpages of that huge page should be unmapped through MMU
482 * notifiers before we get here.
483 *
484 * Merging of CompoundPages is not supported; they should become
485 * splitting first, unmapped, merged, and mapped back in on-demand.
486 */
487 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
488
489 old_pmd = *pmd;
490 kvm_set_pmd(pmd, *new_pmd);
491 if (pmd_present(old_pmd))
492 kvm_tlb_flush_vmid_ipa(kvm, addr);
493 else
494 get_page(virt_to_page(pmd));
495 return 0;
496}
497
498static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
499 phys_addr_t addr, const pte_t *new_pte, bool iomap)
500{
501 pmd_t *pmd;
502 pte_t *pte, old_pte;
444 503
445 /* Create 2nd stage page table mapping - Level 2 */ 504 /* Create stage-2 page table mapping - Level 1 */
505 pmd = stage2_get_pmd(kvm, cache, addr);
506 if (!pmd) {
507 /*
508 * Ignore calls from kvm_set_spte_hva for unallocated
509 * address ranges.
510 */
511 return 0;
512 }
513
514 /* Create stage-2 page mappings - Level 2 */
446 if (pmd_none(*pmd)) { 515 if (pmd_none(*pmd)) {
447 if (!cache) 516 if (!cache)
448 return 0; /* ignore calls from kvm_set_spte_hva */ 517 return 0; /* ignore calls from kvm_set_spte_hva */
@@ -507,16 +576,60 @@ out:
507 return ret; 576 return ret;
508} 577}
509 578
579static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
580{
581 pfn_t pfn = *pfnp;
582 gfn_t gfn = *ipap >> PAGE_SHIFT;
583
584 if (PageTransCompound(pfn_to_page(pfn))) {
585 unsigned long mask;
586 /*
587 * The address we faulted on is backed by a transparent huge
588 * page. However, because we map the compound huge page and
589 * not the individual tail page, we need to transfer the
590 * refcount to the head page. We have to be careful that the
591 * THP doesn't start to split while we are adjusting the
592 * refcounts.
593 *
594 * We are sure this doesn't happen, because mmu_notifier_retry
595 * was successful and we are holding the mmu_lock, so if this
596 * THP is trying to split, it will be blocked in the mmu
597 * notifier before touching any of the pages, specifically
598 * before being able to call __split_huge_page_refcount().
599 *
600 * We can therefore safely transfer the refcount from PG_tail
601 * to PG_head and switch the pfn from a tail page to the head
602 * page accordingly.
603 */
604 mask = PTRS_PER_PMD - 1;
605 VM_BUG_ON((gfn & mask) != (pfn & mask));
606 if (pfn & mask) {
607 *ipap &= PMD_MASK;
608 kvm_release_pfn_clean(pfn);
609 pfn &= ~mask;
610 kvm_get_pfn(pfn);
611 *pfnp = pfn;
612 }
613
614 return true;
615 }
616
617 return false;
618}
619
510static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 620static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
511 gfn_t gfn, struct kvm_memory_slot *memslot, 621 struct kvm_memory_slot *memslot,
512 unsigned long fault_status) 622 unsigned long fault_status)
513{ 623{
514 pte_t new_pte;
515 pfn_t pfn;
516 int ret; 624 int ret;
517 bool write_fault, writable; 625 bool write_fault, writable, hugetlb = false, force_pte = false;
518 unsigned long mmu_seq; 626 unsigned long mmu_seq;
627 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
628 unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
629 struct kvm *kvm = vcpu->kvm;
519 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 630 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
631 struct vm_area_struct *vma;
632 pfn_t pfn;
520 633
521 write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); 634 write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
522 if (fault_status == FSC_PERM && !write_fault) { 635 if (fault_status == FSC_PERM && !write_fault) {
@@ -524,6 +637,26 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
524 return -EFAULT; 637 return -EFAULT;
525 } 638 }
526 639
640 /* Let's check if we will get back a huge page backed by hugetlbfs */
641 down_read(&current->mm->mmap_sem);
642 vma = find_vma_intersection(current->mm, hva, hva + 1);
643 if (is_vm_hugetlb_page(vma)) {
644 hugetlb = true;
645 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
646 } else {
647 /*
648 * Pages belonging to VMAs not aligned to the PMD mapping
649 * granularity cannot be mapped using block descriptors even
650 * if the pages belong to a THP for the process, because the
651 * stage-2 block descriptor will cover more than a single THP
652 * and we loose atomicity for unmapping, updates, and splits
653 * of the THP or other pages in the stage-2 block range.
654 */
655 if (vma->vm_start & ~PMD_MASK)
656 force_pte = true;
657 }
658 up_read(&current->mm->mmap_sem);
659
527 /* We need minimum second+third level pages */ 660 /* We need minimum second+third level pages */
528 ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); 661 ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
529 if (ret) 662 if (ret)
@@ -541,26 +674,40 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
541 */ 674 */
542 smp_rmb(); 675 smp_rmb();
543 676
544 pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); 677 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
545 if (is_error_pfn(pfn)) 678 if (is_error_pfn(pfn))
546 return -EFAULT; 679 return -EFAULT;
547 680
548 new_pte = pfn_pte(pfn, PAGE_S2); 681 spin_lock(&kvm->mmu_lock);
549 coherent_icache_guest_page(vcpu->kvm, gfn); 682 if (mmu_notifier_retry(kvm, mmu_seq))
550
551 spin_lock(&vcpu->kvm->mmu_lock);
552 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
553 goto out_unlock; 683 goto out_unlock;
554 if (writable) { 684 if (!hugetlb && !force_pte)
555 kvm_set_s2pte_writable(&new_pte); 685 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
556 kvm_set_pfn_dirty(pfn); 686
687 if (hugetlb) {
688 pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
689 new_pmd = pmd_mkhuge(new_pmd);
690 if (writable) {
691 kvm_set_s2pmd_writable(&new_pmd);
692 kvm_set_pfn_dirty(pfn);
693 }
694 coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
695 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
696 } else {
697 pte_t new_pte = pfn_pte(pfn, PAGE_S2);
698 if (writable) {
699 kvm_set_s2pte_writable(&new_pte);
700 kvm_set_pfn_dirty(pfn);
701 }
702 coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
703 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
557 } 704 }
558 stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); 705
559 706
560out_unlock: 707out_unlock:
561 spin_unlock(&vcpu->kvm->mmu_lock); 708 spin_unlock(&kvm->mmu_lock);
562 kvm_release_pfn_clean(pfn); 709 kvm_release_pfn_clean(pfn);
563 return 0; 710 return ret;
564} 711}
565 712
566/** 713/**
@@ -629,7 +776,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
629 776
630 memslot = gfn_to_memslot(vcpu->kvm, gfn); 777 memslot = gfn_to_memslot(vcpu->kvm, gfn);
631 778
632 ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); 779 ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
633 if (ret == 0) 780 if (ret == 0)
634 ret = 1; 781 ret = 1;
635out_unlock: 782out_unlock:
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 86a693a02ba3..0881bf169fbc 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -18,6 +18,7 @@
18#include <linux/kvm_host.h> 18#include <linux/kvm_host.h>
19#include <linux/wait.h> 19#include <linux/wait.h>
20 20
21#include <asm/cputype.h>
21#include <asm/kvm_emulate.h> 22#include <asm/kvm_emulate.h>
22#include <asm/kvm_psci.h> 23#include <asm/kvm_psci.h>
23 24
@@ -34,22 +35,30 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
34static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 35static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
35{ 36{
36 struct kvm *kvm = source_vcpu->kvm; 37 struct kvm *kvm = source_vcpu->kvm;
37 struct kvm_vcpu *vcpu; 38 struct kvm_vcpu *vcpu = NULL, *tmp;
38 wait_queue_head_t *wq; 39 wait_queue_head_t *wq;
39 unsigned long cpu_id; 40 unsigned long cpu_id;
41 unsigned long mpidr;
40 phys_addr_t target_pc; 42 phys_addr_t target_pc;
43 int i;
41 44
42 cpu_id = *vcpu_reg(source_vcpu, 1); 45 cpu_id = *vcpu_reg(source_vcpu, 1);
43 if (vcpu_mode_is_32bit(source_vcpu)) 46 if (vcpu_mode_is_32bit(source_vcpu))
44 cpu_id &= ~((u32) 0); 47 cpu_id &= ~((u32) 0);
45 48
46 if (cpu_id >= atomic_read(&kvm->online_vcpus)) 49 kvm_for_each_vcpu(i, tmp, kvm) {
50 mpidr = kvm_vcpu_get_mpidr(tmp);
51 if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
52 vcpu = tmp;
53 break;
54 }
55 }
56
57 if (!vcpu)
47 return KVM_PSCI_RET_INVAL; 58 return KVM_PSCI_RET_INVAL;
48 59
49 target_pc = *vcpu_reg(source_vcpu, 2); 60 target_pc = *vcpu_reg(source_vcpu, 2);
50 61
51 vcpu = kvm_get_vcpu(kvm, cpu_id);
52
53 wq = kvm_arch_vcpu_wq(vcpu); 62 wq = kvm_arch_vcpu_wq(vcpu);
54 if (!waitqueue_active(wq)) 63 if (!waitqueue_active(wq))
55 return KVM_PSCI_RET_INVAL; 64 return KVM_PSCI_RET_INVAL;
@@ -62,6 +71,10 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
62 vcpu_set_thumb(vcpu); 71 vcpu_set_thumb(vcpu);
63 } 72 }
64 73
74 /* Propagate caller endianness */
75 if (kvm_vcpu_is_be(source_vcpu))
76 kvm_vcpu_set_be(vcpu);
77
65 *vcpu_pc(vcpu) = target_pc; 78 *vcpu_pc(vcpu) = target_pc;
66 vcpu->arch.pause = false; 79 vcpu->arch.pause = false;
67 smp_mb(); /* Make sure the above is visible */ 80 smp_mb(); /* Make sure the above is visible */
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index c02ba4af599f..f558c073c023 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -30,16 +30,14 @@
30#include <kvm/arm_arch_timer.h> 30#include <kvm/arm_arch_timer.h>
31 31
32/****************************************************************************** 32/******************************************************************************
33 * Cortex-A15 Reset Values 33 * Cortex-A15 and Cortex-A7 Reset Values
34 */ 34 */
35 35
36static const int a15_max_cpu_idx = 3; 36static struct kvm_regs cortexa_regs_reset = {
37
38static struct kvm_regs a15_regs_reset = {
39 .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, 37 .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
40}; 38};
41 39
42static const struct kvm_irq_level a15_vtimer_irq = { 40static const struct kvm_irq_level cortexa_vtimer_irq = {
43 { .irq = 27 }, 41 { .irq = 27 },
44 .level = 1, 42 .level = 1,
45}; 43};
@@ -62,12 +60,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
62 const struct kvm_irq_level *cpu_vtimer_irq; 60 const struct kvm_irq_level *cpu_vtimer_irq;
63 61
64 switch (vcpu->arch.target) { 62 switch (vcpu->arch.target) {
63 case KVM_ARM_TARGET_CORTEX_A7:
65 case KVM_ARM_TARGET_CORTEX_A15: 64 case KVM_ARM_TARGET_CORTEX_A15:
66 if (vcpu->vcpu_id > a15_max_cpu_idx) 65 reset_regs = &cortexa_regs_reset;
67 return -EINVAL;
68 reset_regs = &a15_regs_reset;
69 vcpu->arch.midr = read_cpuid_id(); 66 vcpu->arch.midr = read_cpuid_id();
70 cpu_vtimer_irq = &a15_vtimer_irq; 67 cpu_vtimer_irq = &cortexa_vtimer_irq;
71 break; 68 break;
72 default: 69 default:
73 return -ENODEV; 70 return -ENODEV;
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a5f28e2720c7..c98ef4771c73 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -63,6 +63,7 @@
63 * TAC: Trap ACTLR 63 * TAC: Trap ACTLR
64 * TSC: Trap SMC 64 * TSC: Trap SMC
65 * TSW: Trap cache operations by set/way 65 * TSW: Trap cache operations by set/way
66 * TWE: Trap WFE
66 * TWI: Trap WFI 67 * TWI: Trap WFI
67 * TIDCP: Trap L2CTLR/L2ECTLR 68 * TIDCP: Trap L2CTLR/L2ECTLR
68 * BSU_IS: Upgrade barriers to the inner shareable domain 69 * BSU_IS: Upgrade barriers to the inner shareable domain
@@ -72,8 +73,9 @@
72 * FMO: Override CPSR.F and enable signaling with VF 73 * FMO: Override CPSR.F and enable signaling with VF
73 * SWIO: Turn set/way invalidates into set/way clean+invalidate 74 * SWIO: Turn set/way invalidates into set/way clean+invalidate
74 */ 75 */
75#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ 76#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
76 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ 77 HCR_BSU_IS | HCR_FB | HCR_TAC | \
78 HCR_AMO | HCR_IMO | HCR_FMO | \
77 HCR_SWIO | HCR_TIDCP | HCR_RW) 79 HCR_SWIO | HCR_TIDCP | HCR_RW)
78#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) 80#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
79 81
@@ -242,4 +244,6 @@
242 244
243#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 245#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10
244 246
247#define ESR_EL2_EC_WFI_ISS_WFE (1 << 0)
248
245#endif /* __ARM64_KVM_ARM_H__ */ 249#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index eec073875218..dd8ecfc3f995 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -177,4 +177,65 @@ static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
177 return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; 177 return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
178} 178}
179 179
180static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
181{
182 return vcpu_sys_reg(vcpu, MPIDR_EL1);
183}
184
185static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
186{
187 if (vcpu_mode_is_32bit(vcpu))
188 *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
189 else
190 vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
191}
192
193static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
194{
195 if (vcpu_mode_is_32bit(vcpu))
196 return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
197
198 return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
199}
200
201static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
202 unsigned long data,
203 unsigned int len)
204{
205 if (kvm_vcpu_is_be(vcpu)) {
206 switch (len) {
207 case 1:
208 return data & 0xff;
209 case 2:
210 return be16_to_cpu(data & 0xffff);
211 case 4:
212 return be32_to_cpu(data & 0xffffffff);
213 default:
214 return be64_to_cpu(data);
215 }
216 }
217
218 return data; /* Leave LE untouched */
219}
220
221static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
222 unsigned long data,
223 unsigned int len)
224{
225 if (kvm_vcpu_is_be(vcpu)) {
226 switch (len) {
227 case 1:
228 return data & 0xff;
229 case 2:
230 return cpu_to_be16(data & 0xffff);
231 case 4:
232 return cpu_to_be32(data & 0xffffffff);
233 default:
234 return cpu_to_be64(data);
235 }
236 }
237
238 return data; /* Leave LE untouched */
239}
240
180#endif /* __ARM64_KVM_EMULATE_H__ */ 241#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0859a4ddd1e7..5d85a02d1231 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -36,11 +36,6 @@
36 36
37#define KVM_VCPU_MAX_FEATURES 2 37#define KVM_VCPU_MAX_FEATURES 2
38 38
39/* We don't currently support large pages. */
40#define KVM_HPAGE_GFN_SHIFT(x) 0
41#define KVM_NR_PAGE_SIZES 1
42#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
43
44struct kvm_vcpu; 39struct kvm_vcpu;
45int kvm_target_cpu(void); 40int kvm_target_cpu(void);
46int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 41int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -151,6 +146,7 @@ struct kvm_vcpu_stat {
151struct kvm_vcpu_init; 146struct kvm_vcpu_init;
152int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 147int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
153 const struct kvm_vcpu_init *init); 148 const struct kvm_vcpu_init *init);
149int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
154unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 150unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
155int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 151int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
156struct kvm_one_reg; 152struct kvm_one_reg;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index efe609c6a3c9..680f74e67497 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -91,6 +91,7 @@ int kvm_mmu_init(void);
91void kvm_clear_hyp_idmap(void); 91void kvm_clear_hyp_idmap(void);
92 92
93#define kvm_set_pte(ptep, pte) set_pte(ptep, pte) 93#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
94#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
94 95
95static inline bool kvm_is_write_fault(unsigned long esr) 96static inline bool kvm_is_write_fault(unsigned long esr)
96{ 97{
@@ -116,13 +117,18 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
116 pte_val(*pte) |= PTE_S2_RDWR; 117 pte_val(*pte) |= PTE_S2_RDWR;
117} 118}
118 119
120static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
121{
122 pmd_val(*pmd) |= PMD_S2_RDWR;
123}
124
119struct kvm; 125struct kvm;
120 126
121static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) 127static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
128 unsigned long size)
122{ 129{
123 if (!icache_is_aliasing()) { /* PIPT */ 130 if (!icache_is_aliasing()) { /* PIPT */
124 unsigned long hva = gfn_to_hva(kvm, gfn); 131 flush_icache_range(hva, hva + size);
125 flush_icache_range(hva, hva + PAGE_SIZE);
126 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ 132 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
127 /* any kind of VIPT cache */ 133 /* any kind of VIPT cache */
128 __flush_icache_all(); 134 __flush_icache_all();
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index d57e66845c86..755f86143320 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -85,6 +85,8 @@
85#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ 85#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
86#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 86#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
87 87
88#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
89
88/* 90/*
89 * Memory Attribute override for Stage-2 (MemAttr[3:0]) 91 * Memory Attribute override for Stage-2 (MemAttr[3:0])
90 */ 92 */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 21e90820bd23..4480ab339a00 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
21 select MMU_NOTIFIER 21 select MMU_NOTIFIER
22 select PREEMPT_NOTIFIERS 22 select PREEMPT_NOTIFIERS
23 select ANON_INODES 23 select ANON_INODES
24 select HAVE_KVM_CPU_RELAX_INTERCEPT
24 select KVM_MMIO 25 select KVM_MMIO
25 select KVM_ARM_HOST 26 select KVM_ARM_HOST
26 select KVM_ARM_VGIC 27 select KVM_ARM_VGIC
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2c3ff67a8ecb..3f0731e53274 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -248,6 +248,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
248 return kvm_reset_vcpu(vcpu); 248 return kvm_reset_vcpu(vcpu);
249} 249}
250 250
251int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
252{
253 int target = kvm_target_cpu();
254
255 if (target < 0)
256 return -ENODEV;
257
258 memset(init, 0, sizeof(*init));
259
260 /*
261 * For now, we don't return any features.
262 * In future, we might use features to return target
263 * specific features available for the preferred
264 * target type.
265 */
266 init->target = (__u32)target;
267
268 return 0;
269}
270
251int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 271int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
252{ 272{
253 return -EINVAL; 273 return -EINVAL;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 9beaca033437..8da56067c304 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -47,21 +47,29 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
47} 47}
48 48
49/** 49/**
50 * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest 50 * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
51 * instruction executed by a guest
52 *
51 * @vcpu: the vcpu pointer 53 * @vcpu: the vcpu pointer
52 * 54 *
53 * Simply call kvm_vcpu_block(), which will halt execution of 55 * WFE: Yield the CPU and come back to this vcpu when the scheduler
56 * decides to.
57 * WFI: Simply call kvm_vcpu_block(), which will halt execution of
54 * world-switches and schedule other host processes until there is an 58 * world-switches and schedule other host processes until there is an
55 * incoming IRQ or FIQ to the VM. 59 * incoming IRQ or FIQ to the VM.
56 */ 60 */
57static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) 61static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
58{ 62{
59 kvm_vcpu_block(vcpu); 63 if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE)
64 kvm_vcpu_on_spin(vcpu);
65 else
66 kvm_vcpu_block(vcpu);
67
60 return 1; 68 return 1;
61} 69}
62 70
63static exit_handle_fn arm_exit_handlers[] = { 71static exit_handle_fn arm_exit_handlers[] = {
64 [ESR_EL2_EC_WFI] = kvm_handle_wfi, 72 [ESR_EL2_EC_WFI] = kvm_handle_wfx,
65 [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, 73 [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32,
66 [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, 74 [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64,
67 [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access, 75 [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access,
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 989dd3fe8de1..db95f570705f 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -234,10 +234,6 @@ struct kvm_vm_data {
234#define KVM_REQ_PTC_G 32 234#define KVM_REQ_PTC_G 32
235#define KVM_REQ_RESUME 33 235#define KVM_REQ_RESUME 33
236 236
237#define KVM_HPAGE_GFN_SHIFT(x) 0
238#define KVM_NR_PAGE_SIZES 1
239#define KVM_PAGES_PER_HPAGE(x) 1
240
241struct kvm; 237struct kvm;
242struct kvm_vcpu; 238struct kvm_vcpu;
243 239
@@ -480,7 +476,7 @@ struct kvm_arch {
480 476
481 struct list_head assigned_dev_head; 477 struct list_head assigned_dev_head;
482 struct iommu_domain *iommu_domain; 478 struct iommu_domain *iommu_domain;
483 int iommu_flags; 479 bool iommu_noncoherent;
484 480
485 unsigned long irq_sources_bitmap; 481 unsigned long irq_sources_bitmap;
486 unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; 482 unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bdfd8789b376..985bf80c622e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1550 return VM_FAULT_SIGBUS; 1550 return VM_FAULT_SIGBUS;
1551} 1551}
1552 1552
1553void kvm_arch_free_memslot(struct kvm_memory_slot *free, 1553void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1554 struct kvm_memory_slot *dont) 1554 struct kvm_memory_slot *dont)
1555{ 1555{
1556} 1556}
1557 1557
1558int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 1558int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1559 unsigned long npages)
1559{ 1560{
1560 return 0; 1561 return 0;
1561} 1562}
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 4d6fa0bf1305..32966969f2f9 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -27,13 +27,6 @@
27 27
28#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 28#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
29 29
30/* Don't support huge pages */
31#define KVM_HPAGE_GFN_SHIFT(x) 0
32
33/* We don't currently support large pages. */
34#define KVM_NR_PAGE_SIZES 1
35#define KVM_PAGES_PER_HPAGE(x) 1
36
37 30
38 31
39/* Special address that contains the comm page, used for reducing # of traps */ 32/* Special address that contains the comm page, used for reducing # of traps */
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index a7b044536de4..73b34827826c 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
198 return -ENOIOCTLCMD; 198 return -ENOIOCTLCMD;
199} 199}
200 200
201void kvm_arch_free_memslot(struct kvm_memory_slot *free, 201void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
202 struct kvm_memory_slot *dont) 202 struct kvm_memory_slot *dont)
203{ 203{
204} 204}
205 205
206int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 206int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
207 unsigned long npages)
207{ 208{
208 return 0; 209 return 0;
209} 210}
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
index 9b198d1b3b2b..856f8deb557a 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst)
77 return inst & 0xffff; 77 return inst & 0xffff;
78} 78}
79 79
80static inline unsigned int get_oc(u32 inst)
81{
82 return (inst >> 11) & 0x7fff;
83}
80#endif /* __ASM_PPC_DISASSEMBLE_H__ */ 84#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index cca12f084842..894662a5d4d5 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -198,12 +198,27 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
198 cmpwi r10,0; \ 198 cmpwi r10,0; \
199 bne do_kvm_##n 199 bne do_kvm_##n
200 200
201#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
202/*
203 * If hv is possible, interrupts come into to the hv version
204 * of the kvmppc_interrupt code, which then jumps to the PR handler,
205 * kvmppc_interrupt_pr, if the guest is a PR guest.
206 */
207#define kvmppc_interrupt kvmppc_interrupt_hv
208#else
209#define kvmppc_interrupt kvmppc_interrupt_pr
210#endif
211
201#define __KVM_HANDLER(area, h, n) \ 212#define __KVM_HANDLER(area, h, n) \
202do_kvm_##n: \ 213do_kvm_##n: \
203 BEGIN_FTR_SECTION_NESTED(947) \ 214 BEGIN_FTR_SECTION_NESTED(947) \
204 ld r10,area+EX_CFAR(r13); \ 215 ld r10,area+EX_CFAR(r13); \
205 std r10,HSTATE_CFAR(r13); \ 216 std r10,HSTATE_CFAR(r13); \
206 END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947); \ 217 END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947); \
218 BEGIN_FTR_SECTION_NESTED(948) \
219 ld r10,area+EX_PPR(r13); \
220 std r10,HSTATE_PPR(r13); \
221 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
207 ld r10,area+EX_R10(r13); \ 222 ld r10,area+EX_R10(r13); \
208 stw r9,HSTATE_SCRATCH1(r13); \ 223 stw r9,HSTATE_SCRATCH1(r13); \
209 ld r9,area+EX_R9(r13); \ 224 ld r9,area+EX_R9(r13); \
@@ -217,6 +232,10 @@ do_kvm_##n: \
217 ld r10,area+EX_R10(r13); \ 232 ld r10,area+EX_R10(r13); \
218 beq 89f; \ 233 beq 89f; \
219 stw r9,HSTATE_SCRATCH1(r13); \ 234 stw r9,HSTATE_SCRATCH1(r13); \
235 BEGIN_FTR_SECTION_NESTED(948) \
236 ld r9,area+EX_PPR(r13); \
237 std r9,HSTATE_PPR(r13); \
238 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
220 ld r9,area+EX_R9(r13); \ 239 ld r9,area+EX_R9(r13); \
221 std r12,HSTATE_SCRATCH0(r13); \ 240 std r12,HSTATE_SCRATCH0(r13); \
222 li r12,n; \ 241 li r12,n; \
@@ -236,7 +255,7 @@ do_kvm_##n: \
236#define KVM_HANDLER_SKIP(area, h, n) 255#define KVM_HANDLER_SKIP(area, h, n)
237#endif 256#endif
238 257
239#ifdef CONFIG_KVM_BOOK3S_PR 258#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
240#define KVMTEST_PR(n) __KVMTEST(n) 259#define KVMTEST_PR(n) __KVMTEST(n)
241#define KVM_HANDLER_PR(area, h, n) __KVM_HANDLER(area, h, n) 260#define KVM_HANDLER_PR(area, h, n) __KVM_HANDLER(area, h, n)
242#define KVM_HANDLER_PR_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n) 261#define KVM_HANDLER_PR_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n)
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 851bac7afa4b..1bd92fd43cfb 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -123,6 +123,8 @@
123#define BOOK3S_HFLAG_SLB 0x2 123#define BOOK3S_HFLAG_SLB 0x2
124#define BOOK3S_HFLAG_PAIRED_SINGLE 0x4 124#define BOOK3S_HFLAG_PAIRED_SINGLE 0x4
125#define BOOK3S_HFLAG_NATIVE_PS 0x8 125#define BOOK3S_HFLAG_NATIVE_PS 0x8
126#define BOOK3S_HFLAG_MULTI_PGSIZE 0x10
127#define BOOK3S_HFLAG_NEW_TLBIE 0x20
126 128
127#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ 129#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
128#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ 130#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
@@ -136,6 +138,8 @@
136#define KVM_GUEST_MODE_NONE 0 138#define KVM_GUEST_MODE_NONE 0
137#define KVM_GUEST_MODE_GUEST 1 139#define KVM_GUEST_MODE_GUEST 1
138#define KVM_GUEST_MODE_SKIP 2 140#define KVM_GUEST_MODE_SKIP 2
141#define KVM_GUEST_MODE_GUEST_HV 3
142#define KVM_GUEST_MODE_HOST_HV 4
139 143
140#define KVM_INST_FETCH_FAILED -1 144#define KVM_INST_FETCH_FAILED -1
141 145
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index fa19e2f1a874..4a594b76674d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -58,16 +58,18 @@ struct hpte_cache {
58 struct hlist_node list_pte_long; 58 struct hlist_node list_pte_long;
59 struct hlist_node list_vpte; 59 struct hlist_node list_vpte;
60 struct hlist_node list_vpte_long; 60 struct hlist_node list_vpte_long;
61#ifdef CONFIG_PPC_BOOK3S_64
62 struct hlist_node list_vpte_64k;
63#endif
61 struct rcu_head rcu_head; 64 struct rcu_head rcu_head;
62 u64 host_vpn; 65 u64 host_vpn;
63 u64 pfn; 66 u64 pfn;
64 ulong slot; 67 ulong slot;
65 struct kvmppc_pte pte; 68 struct kvmppc_pte pte;
69 int pagesize;
66}; 70};
67 71
68struct kvmppc_vcpu_book3s { 72struct kvmppc_vcpu_book3s {
69 struct kvm_vcpu vcpu;
70 struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
71 struct kvmppc_sid_map sid_map[SID_MAP_NUM]; 73 struct kvmppc_sid_map sid_map[SID_MAP_NUM];
72 struct { 74 struct {
73 u64 esid; 75 u64 esid;
@@ -99,6 +101,9 @@ struct kvmppc_vcpu_book3s {
99 struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; 101 struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
100 struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; 102 struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
101 struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; 103 struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
104#ifdef CONFIG_PPC_BOOK3S_64
105 struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K];
106#endif
102 int hpte_cache_count; 107 int hpte_cache_count;
103 spinlock_t mmu_lock; 108 spinlock_t mmu_lock;
104}; 109};
@@ -107,8 +112,9 @@ struct kvmppc_vcpu_book3s {
107#define CONTEXT_GUEST 1 112#define CONTEXT_GUEST 1
108#define CONTEXT_GUEST_END 2 113#define CONTEXT_GUEST_END 2
109 114
110#define VSID_REAL 0x0fffffffffc00000ULL 115#define VSID_REAL 0x07ffffffffc00000ULL
111#define VSID_BAT 0x0fffffffffb00000ULL 116#define VSID_BAT 0x07ffffffffb00000ULL
117#define VSID_64K 0x0800000000000000ULL
112#define VSID_1T 0x1000000000000000ULL 118#define VSID_1T 0x1000000000000000ULL
113#define VSID_REAL_DR 0x2000000000000000ULL 119#define VSID_REAL_DR 0x2000000000000000ULL
114#define VSID_REAL_IR 0x4000000000000000ULL 120#define VSID_REAL_IR 0x4000000000000000ULL
@@ -118,11 +124,12 @@ extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask)
118extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); 124extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask);
119extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end); 125extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end);
120extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); 126extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr);
121extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr);
122extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); 127extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
123extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); 128extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
124extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); 129extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
125extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); 130extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
131 bool iswrite);
132extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
126extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); 133extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
127extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size); 134extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
128extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); 135extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
@@ -134,6 +141,7 @@ extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
134 141
135extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); 142extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
136extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); 143extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
144extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte);
137extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu); 145extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
138extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); 146extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
139extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); 147extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
@@ -151,7 +159,8 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
151 bool upper, u32 val); 159 bool upper, u32 val);
152extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); 160extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
153extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); 161extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
154extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); 162extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
163 bool *writable);
155extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 164extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
156 unsigned long *rmap, long pte_index, int realmode); 165 unsigned long *rmap, long pte_index, int realmode);
157extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, 166extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
@@ -172,6 +181,8 @@ extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
172 unsigned long *hpret); 181 unsigned long *hpret);
173extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, 182extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
174 struct kvm_memory_slot *memslot, unsigned long *map); 183 struct kvm_memory_slot *memslot, unsigned long *map);
184extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
185 unsigned long mask);
175 186
176extern void kvmppc_entry_trampoline(void); 187extern void kvmppc_entry_trampoline(void);
177extern void kvmppc_hv_entry_trampoline(void); 188extern void kvmppc_hv_entry_trampoline(void);
@@ -184,11 +195,9 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
184 195
185static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) 196static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
186{ 197{
187 return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu); 198 return vcpu->arch.book3s;
188} 199}
189 200
190extern void kvm_return_point(void);
191
192/* Also add subarch specific defines */ 201/* Also add subarch specific defines */
193 202
194#ifdef CONFIG_KVM_BOOK3S_32_HANDLER 203#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -198,203 +207,6 @@ extern void kvm_return_point(void);
198#include <asm/kvm_book3s_64.h> 207#include <asm/kvm_book3s_64.h>
199#endif 208#endif
200 209
201#ifdef CONFIG_KVM_BOOK3S_PR
202
203static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
204{
205 return to_book3s(vcpu)->hior;
206}
207
208static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
209 unsigned long pending_now, unsigned long old_pending)
210{
211 if (pending_now)
212 vcpu->arch.shared->int_pending = 1;
213 else if (old_pending)
214 vcpu->arch.shared->int_pending = 0;
215}
216
217static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
218{
219 if ( num < 14 ) {
220 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
221 svcpu->gpr[num] = val;
222 svcpu_put(svcpu);
223 to_book3s(vcpu)->shadow_vcpu->gpr[num] = val;
224 } else
225 vcpu->arch.gpr[num] = val;
226}
227
228static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
229{
230 if ( num < 14 ) {
231 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
232 ulong r = svcpu->gpr[num];
233 svcpu_put(svcpu);
234 return r;
235 } else
236 return vcpu->arch.gpr[num];
237}
238
239static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
240{
241 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
242 svcpu->cr = val;
243 svcpu_put(svcpu);
244 to_book3s(vcpu)->shadow_vcpu->cr = val;
245}
246
247static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
248{
249 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
250 u32 r;
251 r = svcpu->cr;
252 svcpu_put(svcpu);
253 return r;
254}
255
256static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
257{
258 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
259 svcpu->xer = val;
260 to_book3s(vcpu)->shadow_vcpu->xer = val;
261 svcpu_put(svcpu);
262}
263
264static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
265{
266 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
267 u32 r;
268 r = svcpu->xer;
269 svcpu_put(svcpu);
270 return r;
271}
272
273static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
274{
275 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
276 svcpu->ctr = val;
277 svcpu_put(svcpu);
278}
279
280static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
281{
282 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
283 ulong r;
284 r = svcpu->ctr;
285 svcpu_put(svcpu);
286 return r;
287}
288
289static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
290{
291 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
292 svcpu->lr = val;
293 svcpu_put(svcpu);
294}
295
296static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
297{
298 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
299 ulong r;
300 r = svcpu->lr;
301 svcpu_put(svcpu);
302 return r;
303}
304
305static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
306{
307 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
308 svcpu->pc = val;
309 svcpu_put(svcpu);
310}
311
312static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
313{
314 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
315 ulong r;
316 r = svcpu->pc;
317 svcpu_put(svcpu);
318 return r;
319}
320
321static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
322{
323 ulong pc = kvmppc_get_pc(vcpu);
324 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
325 u32 r;
326
327 /* Load the instruction manually if it failed to do so in the
328 * exit path */
329 if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
330 kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
331
332 r = svcpu->last_inst;
333 svcpu_put(svcpu);
334 return r;
335}
336
337/*
338 * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
339 * Because the sc instruction sets SRR0 to point to the following
340 * instruction, we have to fetch from pc - 4.
341 */
342static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
343{
344 ulong pc = kvmppc_get_pc(vcpu) - 4;
345 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
346 u32 r;
347
348 /* Load the instruction manually if it failed to do so in the
349 * exit path */
350 if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
351 kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
352
353 r = svcpu->last_inst;
354 svcpu_put(svcpu);
355 return r;
356}
357
358static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
359{
360 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
361 ulong r;
362 r = svcpu->fault_dar;
363 svcpu_put(svcpu);
364 return r;
365}
366
367static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
368{
369 ulong crit_raw = vcpu->arch.shared->critical;
370 ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
371 bool crit;
372
373 /* Truncate crit indicators in 32 bit mode */
374 if (!(vcpu->arch.shared->msr & MSR_SF)) {
375 crit_raw &= 0xffffffff;
376 crit_r1 &= 0xffffffff;
377 }
378
379 /* Critical section when crit == r1 */
380 crit = (crit_raw == crit_r1);
381 /* ... and we're in supervisor mode */
382 crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
383
384 return crit;
385}
386#else /* CONFIG_KVM_BOOK3S_PR */
387
388static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
389{
390 return 0;
391}
392
393static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
394 unsigned long pending_now, unsigned long old_pending)
395{
396}
397
398static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 210static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
399{ 211{
400 vcpu->arch.gpr[num] = val; 212 vcpu->arch.gpr[num] = val;
@@ -489,12 +301,6 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
489 return vcpu->arch.fault_dar; 301 return vcpu->arch.fault_dar;
490} 302}
491 303
492static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
493{
494 return false;
495}
496#endif
497
498/* Magic register values loaded into r3 and r4 before the 'sc' assembly 304/* Magic register values loaded into r3 and r4 before the 'sc' assembly
499 * instruction for the OSI hypercalls */ 305 * instruction for the OSI hypercalls */
500#define OSI_SC_MAGIC_R3 0x113724FA 306#define OSI_SC_MAGIC_R3 0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h
index ce0ef6ce8f86..c720e0b3238d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_32.h
+++ b/arch/powerpc/include/asm/kvm_book3s_32.h
@@ -22,7 +22,7 @@
22 22
23static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) 23static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
24{ 24{
25 return to_book3s(vcpu)->shadow_vcpu; 25 return vcpu->arch.shadow_vcpu;
26} 26}
27 27
28static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) 28static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 86d638a3b359..bf0fa8b0a883 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -20,7 +20,7 @@
20#ifndef __ASM_KVM_BOOK3S_64_H__ 20#ifndef __ASM_KVM_BOOK3S_64_H__
21#define __ASM_KVM_BOOK3S_64_H__ 21#define __ASM_KVM_BOOK3S_64_H__
22 22
23#ifdef CONFIG_KVM_BOOK3S_PR 23#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
24static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) 24static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
25{ 25{
26 preempt_disable(); 26 preempt_disable();
@@ -35,7 +35,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
35 35
36#define SPAPR_TCE_SHIFT 12 36#define SPAPR_TCE_SHIFT 12
37 37
38#ifdef CONFIG_KVM_BOOK3S_64_HV 38#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
39#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ 39#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
40extern unsigned long kvm_rma_pages; 40extern unsigned long kvm_rma_pages;
41#endif 41#endif
@@ -278,7 +278,7 @@ static inline int is_vrma_hpte(unsigned long hpte_v)
278 (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); 278 (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
279} 279}
280 280
281#ifdef CONFIG_KVM_BOOK3S_64_HV 281#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
282/* 282/*
283 * Note modification of an HPTE; set the HPTE modified bit 283 * Note modification of an HPTE; set the HPTE modified bit
284 * if anyone is interested. 284 * if anyone is interested.
@@ -289,6 +289,6 @@ static inline void note_hpte_modification(struct kvm *kvm,
289 if (atomic_read(&kvm->arch.hpte_mod_interest)) 289 if (atomic_read(&kvm->arch.hpte_mod_interest))
290 rev->guest_rpte |= HPTE_GR_MODIFIED; 290 rev->guest_rpte |= HPTE_GR_MODIFIED;
291} 291}
292#endif /* CONFIG_KVM_BOOK3S_64_HV */ 292#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
293 293
294#endif /* __ASM_KVM_BOOK3S_64_H__ */ 294#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 9039d3c97eec..0bd9348a4db9 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -83,7 +83,7 @@ struct kvmppc_host_state {
83 u8 restore_hid5; 83 u8 restore_hid5;
84 u8 napping; 84 u8 napping;
85 85
86#ifdef CONFIG_KVM_BOOK3S_64_HV 86#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
87 u8 hwthread_req; 87 u8 hwthread_req;
88 u8 hwthread_state; 88 u8 hwthread_state;
89 u8 host_ipi; 89 u8 host_ipi;
@@ -101,6 +101,7 @@ struct kvmppc_host_state {
101#endif 101#endif
102#ifdef CONFIG_PPC_BOOK3S_64 102#ifdef CONFIG_PPC_BOOK3S_64
103 u64 cfar; 103 u64 cfar;
104 u64 ppr;
104#endif 105#endif
105}; 106};
106 107
@@ -108,14 +109,14 @@ struct kvmppc_book3s_shadow_vcpu {
108 ulong gpr[14]; 109 ulong gpr[14];
109 u32 cr; 110 u32 cr;
110 u32 xer; 111 u32 xer;
111
112 u32 fault_dsisr;
113 u32 last_inst;
114 ulong ctr; 112 ulong ctr;
115 ulong lr; 113 ulong lr;
116 ulong pc; 114 ulong pc;
115
117 ulong shadow_srr1; 116 ulong shadow_srr1;
118 ulong fault_dar; 117 ulong fault_dar;
118 u32 fault_dsisr;
119 u32 last_inst;
119 120
120#ifdef CONFIG_PPC_BOOK3S_32 121#ifdef CONFIG_PPC_BOOK3S_32
121 u32 sr[16]; /* Guest SRs */ 122 u32 sr[16]; /* Guest SRs */
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index d3c1eb34c986..dd8f61510dfd 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -26,7 +26,12 @@
26/* LPIDs we support with this build -- runtime limit may be lower */ 26/* LPIDs we support with this build -- runtime limit may be lower */
27#define KVMPPC_NR_LPIDS 64 27#define KVMPPC_NR_LPIDS 64
28 28
29#define KVMPPC_INST_EHPRIV 0x7c00021c 29#define KVMPPC_INST_EHPRIV 0x7c00021c
30#define EHPRIV_OC_SHIFT 11
31/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */
32#define EHPRIV_OC_DEBUG 1
33#define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \
34 (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT))
30 35
31static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 36static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
32{ 37{
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 33283532e9d8..237d1d25b448 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -63,20 +63,17 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
63 63
64#endif 64#endif
65 65
66/* We don't currently support large pages. */
67#define KVM_HPAGE_GFN_SHIFT(x) 0
68#define KVM_NR_PAGE_SIZES 1
69#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
70
71#define HPTEG_CACHE_NUM (1 << 15) 66#define HPTEG_CACHE_NUM (1 << 15)
72#define HPTEG_HASH_BITS_PTE 13 67#define HPTEG_HASH_BITS_PTE 13
73#define HPTEG_HASH_BITS_PTE_LONG 12 68#define HPTEG_HASH_BITS_PTE_LONG 12
74#define HPTEG_HASH_BITS_VPTE 13 69#define HPTEG_HASH_BITS_VPTE 13
75#define HPTEG_HASH_BITS_VPTE_LONG 5 70#define HPTEG_HASH_BITS_VPTE_LONG 5
71#define HPTEG_HASH_BITS_VPTE_64K 11
76#define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) 72#define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE)
77#define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG) 73#define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG)
78#define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) 74#define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE)
79#define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) 75#define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG)
76#define HPTEG_HASH_NUM_VPTE_64K (1 << HPTEG_HASH_BITS_VPTE_64K)
80 77
81/* Physical Address Mask - allowed range of real mode RAM access */ 78/* Physical Address Mask - allowed range of real mode RAM access */
82#define KVM_PAM 0x0fffffffffffffffULL 79#define KVM_PAM 0x0fffffffffffffffULL
@@ -89,6 +86,9 @@ struct lppaca;
89struct slb_shadow; 86struct slb_shadow;
90struct dtl_entry; 87struct dtl_entry;
91 88
89struct kvmppc_vcpu_book3s;
90struct kvmppc_book3s_shadow_vcpu;
91
92struct kvm_vm_stat { 92struct kvm_vm_stat {
93 u32 remote_tlb_flush; 93 u32 remote_tlb_flush;
94}; 94};
@@ -224,15 +224,15 @@ struct revmap_entry {
224#define KVMPPC_GOT_PAGE 0x80 224#define KVMPPC_GOT_PAGE 0x80
225 225
226struct kvm_arch_memory_slot { 226struct kvm_arch_memory_slot {
227#ifdef CONFIG_KVM_BOOK3S_64_HV 227#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
228 unsigned long *rmap; 228 unsigned long *rmap;
229 unsigned long *slot_phys; 229 unsigned long *slot_phys;
230#endif /* CONFIG_KVM_BOOK3S_64_HV */ 230#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
231}; 231};
232 232
233struct kvm_arch { 233struct kvm_arch {
234 unsigned int lpid; 234 unsigned int lpid;
235#ifdef CONFIG_KVM_BOOK3S_64_HV 235#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
236 unsigned long hpt_virt; 236 unsigned long hpt_virt;
237 struct revmap_entry *revmap; 237 struct revmap_entry *revmap;
238 unsigned int host_lpid; 238 unsigned int host_lpid;
@@ -256,7 +256,10 @@ struct kvm_arch {
256 cpumask_t need_tlb_flush; 256 cpumask_t need_tlb_flush;
257 struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; 257 struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
258 int hpt_cma_alloc; 258 int hpt_cma_alloc;
259#endif /* CONFIG_KVM_BOOK3S_64_HV */ 259#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
260#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
261 struct mutex hpt_mutex;
262#endif
260#ifdef CONFIG_PPC_BOOK3S_64 263#ifdef CONFIG_PPC_BOOK3S_64
261 struct list_head spapr_tce_tables; 264 struct list_head spapr_tce_tables;
262 struct list_head rtas_tokens; 265 struct list_head rtas_tokens;
@@ -267,6 +270,7 @@ struct kvm_arch {
267#ifdef CONFIG_KVM_XICS 270#ifdef CONFIG_KVM_XICS
268 struct kvmppc_xics *xics; 271 struct kvmppc_xics *xics;
269#endif 272#endif
273 struct kvmppc_ops *kvm_ops;
270}; 274};
271 275
272/* 276/*
@@ -294,6 +298,10 @@ struct kvmppc_vcore {
294 u64 stolen_tb; 298 u64 stolen_tb;
295 u64 preempt_tb; 299 u64 preempt_tb;
296 struct kvm_vcpu *runner; 300 struct kvm_vcpu *runner;
301 u64 tb_offset; /* guest timebase - host timebase */
302 ulong lpcr;
303 u32 arch_compat;
304 ulong pcr;
297}; 305};
298 306
299#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) 307#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
@@ -328,6 +336,7 @@ struct kvmppc_pte {
328 bool may_read : 1; 336 bool may_read : 1;
329 bool may_write : 1; 337 bool may_write : 1;
330 bool may_execute : 1; 338 bool may_execute : 1;
339 u8 page_size; /* MMU_PAGE_xxx */
331}; 340};
332 341
333struct kvmppc_mmu { 342struct kvmppc_mmu {
@@ -340,7 +349,8 @@ struct kvmppc_mmu {
340 /* book3s */ 349 /* book3s */
341 void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value); 350 void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value);
342 u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum); 351 u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
343 int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); 352 int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
353 struct kvmppc_pte *pte, bool data, bool iswrite);
344 void (*reset_msr)(struct kvm_vcpu *vcpu); 354 void (*reset_msr)(struct kvm_vcpu *vcpu);
345 void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); 355 void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
346 int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); 356 int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
@@ -360,6 +370,7 @@ struct kvmppc_slb {
360 bool large : 1; /* PTEs are 16MB */ 370 bool large : 1; /* PTEs are 16MB */
361 bool tb : 1; /* 1TB segment */ 371 bool tb : 1; /* 1TB segment */
362 bool class : 1; 372 bool class : 1;
373 u8 base_page_size; /* MMU_PAGE_xxx */
363}; 374};
364 375
365# ifdef CONFIG_PPC_FSL_BOOK3E 376# ifdef CONFIG_PPC_FSL_BOOK3E
@@ -377,17 +388,6 @@ struct kvmppc_slb {
377#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ 388#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */
378#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ 389#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */
379 390
380struct kvmppc_booke_debug_reg {
381 u32 dbcr0;
382 u32 dbcr1;
383 u32 dbcr2;
384#ifdef CONFIG_KVM_E500MC
385 u32 dbcr4;
386#endif
387 u64 iac[KVMPPC_BOOKE_MAX_IAC];
388 u64 dac[KVMPPC_BOOKE_MAX_DAC];
389};
390
391#define KVMPPC_IRQ_DEFAULT 0 391#define KVMPPC_IRQ_DEFAULT 0
392#define KVMPPC_IRQ_MPIC 1 392#define KVMPPC_IRQ_MPIC 1
393#define KVMPPC_IRQ_XICS 2 393#define KVMPPC_IRQ_XICS 2
@@ -402,6 +402,10 @@ struct kvm_vcpu_arch {
402 int slb_max; /* 1 + index of last valid entry in slb[] */ 402 int slb_max; /* 1 + index of last valid entry in slb[] */
403 int slb_nr; /* total number of entries in SLB */ 403 int slb_nr; /* total number of entries in SLB */
404 struct kvmppc_mmu mmu; 404 struct kvmppc_mmu mmu;
405 struct kvmppc_vcpu_book3s *book3s;
406#endif
407#ifdef CONFIG_PPC_BOOK3S_32
408 struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
405#endif 409#endif
406 410
407 ulong gpr[32]; 411 ulong gpr[32];
@@ -463,6 +467,8 @@ struct kvm_vcpu_arch {
463 u32 ctrl; 467 u32 ctrl;
464 ulong dabr; 468 ulong dabr;
465 ulong cfar; 469 ulong cfar;
470 ulong ppr;
471 ulong shadow_srr1;
466#endif 472#endif
467 u32 vrsave; /* also USPRG0 */ 473 u32 vrsave; /* also USPRG0 */
468 u32 mmucr; 474 u32 mmucr;
@@ -498,6 +504,8 @@ struct kvm_vcpu_arch {
498 504
499 u64 mmcr[3]; 505 u64 mmcr[3];
500 u32 pmc[8]; 506 u32 pmc[8];
507 u64 siar;
508 u64 sdar;
501 509
502#ifdef CONFIG_KVM_EXIT_TIMING 510#ifdef CONFIG_KVM_EXIT_TIMING
503 struct mutex exit_timing_lock; 511 struct mutex exit_timing_lock;
@@ -531,7 +539,10 @@ struct kvm_vcpu_arch {
531 u32 eptcfg; 539 u32 eptcfg;
532 u32 epr; 540 u32 epr;
533 u32 crit_save; 541 u32 crit_save;
534 struct kvmppc_booke_debug_reg dbg_reg; 542 /* guest debug registers*/
543 struct debug_reg dbg_reg;
544 /* hardware visible debug registers when in guest state */
545 struct debug_reg shadow_dbg_reg;
535#endif 546#endif
536 gpa_t paddr_accessed; 547 gpa_t paddr_accessed;
537 gva_t vaddr_accessed; 548 gva_t vaddr_accessed;
@@ -582,7 +593,7 @@ struct kvm_vcpu_arch {
582 struct kvmppc_icp *icp; /* XICS presentation controller */ 593 struct kvmppc_icp *icp; /* XICS presentation controller */
583#endif 594#endif
584 595
585#ifdef CONFIG_KVM_BOOK3S_64_HV 596#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
586 struct kvm_vcpu_arch_shared shregs; 597 struct kvm_vcpu_arch_shared shregs;
587 598
588 unsigned long pgfault_addr; 599 unsigned long pgfault_addr;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b15554a26c20..c8317fbf92c4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -106,13 +106,6 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
106 struct kvm_interrupt *irq); 106 struct kvm_interrupt *irq);
107extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); 107extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
108extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); 108extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
109
110extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
111 unsigned int op, int *advance);
112extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
113 ulong val);
114extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
115 ulong *val);
116extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); 109extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
117 110
118extern int kvmppc_booke_init(void); 111extern int kvmppc_booke_init(void);
@@ -135,17 +128,17 @@ extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
135 struct kvm_create_spapr_tce *args); 128 struct kvm_create_spapr_tce *args);
136extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 129extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
137 unsigned long ioba, unsigned long tce); 130 unsigned long ioba, unsigned long tce);
138extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
139 struct kvm_allocate_rma *rma);
140extern struct kvm_rma_info *kvm_alloc_rma(void); 131extern struct kvm_rma_info *kvm_alloc_rma(void);
141extern void kvm_release_rma(struct kvm_rma_info *ri); 132extern void kvm_release_rma(struct kvm_rma_info *ri);
142extern struct page *kvm_alloc_hpt(unsigned long nr_pages); 133extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
143extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); 134extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
144extern int kvmppc_core_init_vm(struct kvm *kvm); 135extern int kvmppc_core_init_vm(struct kvm *kvm);
145extern void kvmppc_core_destroy_vm(struct kvm *kvm); 136extern void kvmppc_core_destroy_vm(struct kvm *kvm);
146extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 137extern void kvmppc_core_free_memslot(struct kvm *kvm,
138 struct kvm_memory_slot *free,
147 struct kvm_memory_slot *dont); 139 struct kvm_memory_slot *dont);
148extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 140extern int kvmppc_core_create_memslot(struct kvm *kvm,
141 struct kvm_memory_slot *slot,
149 unsigned long npages); 142 unsigned long npages);
150extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, 143extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
151 struct kvm_memory_slot *memslot, 144 struct kvm_memory_slot *memslot,
@@ -177,6 +170,72 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
177extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); 170extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
178extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); 171extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
179 172
173union kvmppc_one_reg {
174 u32 wval;
175 u64 dval;
176 vector128 vval;
177 u64 vsxval[2];
178 struct {
179 u64 addr;
180 u64 length;
181 } vpaval;
182};
183
184struct kvmppc_ops {
185 struct module *owner;
186 int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
187 int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
188 int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id,
189 union kvmppc_one_reg *val);
190 int (*set_one_reg)(struct kvm_vcpu *vcpu, u64 id,
191 union kvmppc_one_reg *val);
192 void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
193 void (*vcpu_put)(struct kvm_vcpu *vcpu);
194 void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
195 int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
196 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
197 void (*vcpu_free)(struct kvm_vcpu *vcpu);
198 int (*check_requests)(struct kvm_vcpu *vcpu);
199 int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
200 void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
201 int (*prepare_memory_region)(struct kvm *kvm,
202 struct kvm_memory_slot *memslot,
203 struct kvm_userspace_memory_region *mem);
204 void (*commit_memory_region)(struct kvm *kvm,
205 struct kvm_userspace_memory_region *mem,
206 const struct kvm_memory_slot *old);
207 int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
208 int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
209 unsigned long end);
210 int (*age_hva)(struct kvm *kvm, unsigned long hva);
211 int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
212 void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
213 void (*mmu_destroy)(struct kvm_vcpu *vcpu);
214 void (*free_memslot)(struct kvm_memory_slot *free,
215 struct kvm_memory_slot *dont);
216 int (*create_memslot)(struct kvm_memory_slot *slot,
217 unsigned long npages);
218 int (*init_vm)(struct kvm *kvm);
219 void (*destroy_vm)(struct kvm *kvm);
220 int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info);
221 int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu,
222 unsigned int inst, int *advance);
223 int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
224 int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
225 void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu);
226 long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
227 unsigned long arg);
228
229};
230
231extern struct kvmppc_ops *kvmppc_hv_ops;
232extern struct kvmppc_ops *kvmppc_pr_ops;
233
234static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
235{
236 return kvm->arch.kvm_ops == kvmppc_hv_ops;
237}
238
180/* 239/*
181 * Cuts out inst bits with ordering according to spec. 240 * Cuts out inst bits with ordering according to spec.
182 * That means the leftmost bit is zero. All given bits are included. 241 * That means the leftmost bit is zero. All given bits are included.
@@ -210,17 +269,6 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
210 return r; 269 return r;
211} 270}
212 271
213union kvmppc_one_reg {
214 u32 wval;
215 u64 dval;
216 vector128 vval;
217 u64 vsxval[2];
218 struct {
219 u64 addr;
220 u64 length;
221 } vpaval;
222};
223
224#define one_reg_size(id) \ 272#define one_reg_size(id) \
225 (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) 273 (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
226 274
@@ -245,10 +293,10 @@ union kvmppc_one_reg {
245 __v; \ 293 __v; \
246}) 294})
247 295
248void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 296int kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
249int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 297int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
250 298
251void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 299int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
252int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 300int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
253 301
254int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); 302int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
@@ -260,7 +308,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
260 308
261struct openpic; 309struct openpic;
262 310
263#ifdef CONFIG_KVM_BOOK3S_64_HV 311#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
264extern void kvm_cma_reserve(void) __init; 312extern void kvm_cma_reserve(void) __init;
265static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) 313static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
266{ 314{
@@ -269,10 +317,10 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
269 317
270static inline u32 kvmppc_get_xics_latch(void) 318static inline u32 kvmppc_get_xics_latch(void)
271{ 319{
272 u32 xirr = get_paca()->kvm_hstate.saved_xirr; 320 u32 xirr;
273 321
322 xirr = get_paca()->kvm_hstate.saved_xirr;
274 get_paca()->kvm_hstate.saved_xirr = 0; 323 get_paca()->kvm_hstate.saved_xirr = 0;
275
276 return xirr; 324 return xirr;
277} 325}
278 326
@@ -281,7 +329,10 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
281 paca[cpu].kvm_hstate.host_ipi = host_ipi; 329 paca[cpu].kvm_hstate.host_ipi = host_ipi;
282} 330}
283 331
284extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); 332static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
333{
334 vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu);
335}
285 336
286#else 337#else
287static inline void __init kvm_cma_reserve(void) 338static inline void __init kvm_cma_reserve(void)
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index a5954cebbc55..b6ea9e068c13 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -166,7 +166,7 @@ struct paca_struct {
166 struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ 166 struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */
167 167
168#ifdef CONFIG_KVM_BOOK3S_HANDLER 168#ifdef CONFIG_KVM_BOOK3S_HANDLER
169#ifdef CONFIG_KVM_BOOK3S_PR 169#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
170 /* We use this to store guest state in */ 170 /* We use this to store guest state in */
171 struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 171 struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
172#endif 172#endif
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 7794b2b04eb2..fc14a38c7ccf 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -208,6 +208,7 @@ struct debug_reg {
208 208
209struct thread_struct { 209struct thread_struct {
210 unsigned long ksp; /* Kernel stack pointer */ 210 unsigned long ksp; /* Kernel stack pointer */
211
211#ifdef CONFIG_PPC64 212#ifdef CONFIG_PPC64
212 unsigned long ksp_vsid; 213 unsigned long ksp_vsid;
213#endif 214#endif
@@ -221,6 +222,7 @@ struct thread_struct {
221 void *pgdir; /* root of page-table tree */ 222 void *pgdir; /* root of page-table tree */
222 unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ 223 unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
223#endif 224#endif
225 /* Debug Registers */
224 struct debug_reg debug; 226 struct debug_reg debug;
225 struct thread_fp_state fp_state; 227 struct thread_fp_state fp_state;
226 struct thread_fp_state *fp_save_area; 228 struct thread_fp_state *fp_save_area;
diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h
index 0156702ba24e..576ad88104cb 100644
--- a/arch/powerpc/include/asm/pte-book3e.h
+++ b/arch/powerpc/include/asm/pte-book3e.h
@@ -40,7 +40,7 @@
40#define _PAGE_U1 0x010000 40#define _PAGE_U1 0x010000
41#define _PAGE_U0 0x020000 41#define _PAGE_U0 0x020000
42#define _PAGE_ACCESSED 0x040000 42#define _PAGE_ACCESSED 0x040000
43#define _PAGE_LENDIAN 0x080000 43#define _PAGE_ENDIAN 0x080000
44#define _PAGE_GUARDED 0x100000 44#define _PAGE_GUARDED 0x100000
45#define _PAGE_COHERENT 0x200000 /* M: enforce memory coherence */ 45#define _PAGE_COHERENT 0x200000 /* M: enforce memory coherence */
46#define _PAGE_NO_CACHE 0x400000 /* I: cache inhibit */ 46#define _PAGE_NO_CACHE 0x400000 /* I: cache inhibit */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 126f6e98f84d..5c45787d551e 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -248,6 +248,7 @@
248#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ 248#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */
249#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ 249#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */
250#define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ 250#define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */
251#define SPRN_TBU40 0x11E /* Timebase upper 40 bits (hyper, R/W) */
251#define SPRN_SPURR 0x134 /* Scaled PURR */ 252#define SPRN_SPURR 0x134 /* Scaled PURR */
252#define SPRN_HSPRG0 0x130 /* Hypervisor Scratch 0 */ 253#define SPRN_HSPRG0 0x130 /* Hypervisor Scratch 0 */
253#define SPRN_HSPRG1 0x131 /* Hypervisor Scratch 1 */ 254#define SPRN_HSPRG1 0x131 /* Hypervisor Scratch 1 */
@@ -288,6 +289,7 @@
288#define LPCR_ISL (1ul << (63-2)) 289#define LPCR_ISL (1ul << (63-2))
289#define LPCR_VC_SH (63-2) 290#define LPCR_VC_SH (63-2)
290#define LPCR_DPFD_SH (63-11) 291#define LPCR_DPFD_SH (63-11)
292#define LPCR_DPFD (7ul << LPCR_DPFD_SH)
291#define LPCR_VRMASD (0x1ful << (63-16)) 293#define LPCR_VRMASD (0x1ful << (63-16))
292#define LPCR_VRMA_L (1ul << (63-12)) 294#define LPCR_VRMA_L (1ul << (63-12))
293#define LPCR_VRMA_LP0 (1ul << (63-15)) 295#define LPCR_VRMA_LP0 (1ul << (63-15))
@@ -304,6 +306,7 @@
304#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ 306#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */
305#define LPCR_MER 0x00000800 /* Mediated External Exception */ 307#define LPCR_MER 0x00000800 /* Mediated External Exception */
306#define LPCR_MER_SH 11 308#define LPCR_MER_SH 11
309#define LPCR_TC 0x00000200 /* Translation control */
307#define LPCR_LPES 0x0000000c 310#define LPCR_LPES 0x0000000c
308#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ 311#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */
309#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ 312#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */
@@ -316,6 +319,10 @@
316#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ 319#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
317#define SPRN_HMER 0x150 /* Hardware m? error recovery */ 320#define SPRN_HMER 0x150 /* Hardware m? error recovery */
318#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ 321#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
322#define SPRN_PCR 0x152 /* Processor compatibility register */
323#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
324#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
325#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
319#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ 326#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
320#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ 327#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */
321#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */ 328#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */
@@ -425,6 +432,7 @@
425#define HID4_RMLS2_SH (63 - 2) /* Real mode limit bottom 2 bits */ 432#define HID4_RMLS2_SH (63 - 2) /* Real mode limit bottom 2 bits */
426#define HID4_LPID5_SH (63 - 6) /* partition ID bottom 4 bits */ 433#define HID4_LPID5_SH (63 - 6) /* partition ID bottom 4 bits */
427#define HID4_RMOR_SH (63 - 22) /* real mode offset (16 bits) */ 434#define HID4_RMOR_SH (63 - 22) /* real mode offset (16 bits) */
435#define HID4_RMOR (0xFFFFul << HID4_RMOR_SH)
428#define HID4_LPES1 (1 << (63-57)) /* LPAR env. sel. bit 1 */ 436#define HID4_LPES1 (1 << (63-57)) /* LPAR env. sel. bit 1 */
429#define HID4_RMLS0_SH (63 - 58) /* Real mode limit top bit */ 437#define HID4_RMLS0_SH (63 - 58) /* Real mode limit top bit */
430#define HID4_LPID1_SH 0 /* partition ID top 2 bits */ 438#define HID4_LPID1_SH 0 /* partition ID top 2 bits */
@@ -1107,6 +1115,13 @@
1107#define PVR_BE 0x0070 1115#define PVR_BE 0x0070
1108#define PVR_PA6T 0x0090 1116#define PVR_PA6T 0x0090
1109 1117
1118/* "Logical" PVR values defined in PAPR, representing architecture levels */
1119#define PVR_ARCH_204 0x0f000001
1120#define PVR_ARCH_205 0x0f000002
1121#define PVR_ARCH_206 0x0f000003
1122#define PVR_ARCH_206p 0x0f100003
1123#define PVR_ARCH_207 0x0f000004
1124
1110/* Macros for setting and retrieving special purpose registers */ 1125/* Macros for setting and retrieving special purpose registers */
1111#ifndef __ASSEMBLY__ 1126#ifndef __ASSEMBLY__
1112#define mfmsr() ({unsigned long rval; \ 1127#define mfmsr() ({unsigned long rval; \
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 0fb1a6e9ff90..6836ec79a830 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -27,6 +27,7 @@
27#define __KVM_HAVE_PPC_SMT 27#define __KVM_HAVE_PPC_SMT
28#define __KVM_HAVE_IRQCHIP 28#define __KVM_HAVE_IRQCHIP
29#define __KVM_HAVE_IRQ_LINE 29#define __KVM_HAVE_IRQ_LINE
30#define __KVM_HAVE_GUEST_DEBUG
30 31
31struct kvm_regs { 32struct kvm_regs {
32 __u64 pc; 33 __u64 pc;
@@ -269,7 +270,24 @@ struct kvm_fpu {
269 __u64 fpr[32]; 270 __u64 fpr[32];
270}; 271};
271 272
273/*
274 * Defines for h/w breakpoint, watchpoint (read, write or both) and
275 * software breakpoint.
276 * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
277 * for KVM_DEBUG_EXIT.
278 */
279#define KVMPPC_DEBUG_NONE 0x0
280#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1)
281#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2)
282#define KVMPPC_DEBUG_WATCH_READ (1UL << 3)
272struct kvm_debug_exit_arch { 283struct kvm_debug_exit_arch {
284 __u64 address;
285 /*
286 * exiting to userspace because of h/w breakpoint, watchpoint
287 * (read, write or both) and software breakpoint.
288 */
289 __u32 status;
290 __u32 reserved;
273}; 291};
274 292
275/* for KVM_SET_GUEST_DEBUG */ 293/* for KVM_SET_GUEST_DEBUG */
@@ -281,10 +299,6 @@ struct kvm_guest_debug_arch {
281 * Type denotes h/w breakpoint, read watchpoint, write 299 * Type denotes h/w breakpoint, read watchpoint, write
282 * watchpoint or watchpoint (both read and write). 300 * watchpoint or watchpoint (both read and write).
283 */ 301 */
284#define KVMPPC_DEBUG_NONE 0x0
285#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1)
286#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2)
287#define KVMPPC_DEBUG_WATCH_READ (1UL << 3)
288 __u32 type; 302 __u32 type;
289 __u32 reserved; 303 __u32 reserved;
290 } bp[16]; 304 } bp[16];
@@ -429,6 +443,11 @@ struct kvm_get_htab_header {
429#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10) 443#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
430#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11) 444#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
431#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12) 445#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
446#define KVM_REG_PPC_MMCR2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x13)
447#define KVM_REG_PPC_MMCRS (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x14)
448#define KVM_REG_PPC_SIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x15)
449#define KVM_REG_PPC_SDAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x16)
450#define KVM_REG_PPC_SIER (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x17)
432 451
433#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18) 452#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
434#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19) 453#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
@@ -499,6 +518,65 @@ struct kvm_get_htab_header {
499#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) 518#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
500#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) 519#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
501 520
521/* Timebase offset */
522#define KVM_REG_PPC_TB_OFFSET (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9c)
523
524/* POWER8 registers */
525#define KVM_REG_PPC_SPMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d)
526#define KVM_REG_PPC_SPMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e)
527#define KVM_REG_PPC_IAMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9f)
528#define KVM_REG_PPC_TFHAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa0)
529#define KVM_REG_PPC_TFIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa1)
530#define KVM_REG_PPC_TEXASR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa2)
531#define KVM_REG_PPC_FSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa3)
532#define KVM_REG_PPC_PSPB (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xa4)
533#define KVM_REG_PPC_EBBHR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa5)
534#define KVM_REG_PPC_EBBRR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa6)
535#define KVM_REG_PPC_BESCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7)
536#define KVM_REG_PPC_TAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8)
537#define KVM_REG_PPC_DPDES (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9)
538#define KVM_REG_PPC_DAWR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa)
539#define KVM_REG_PPC_DAWRX (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab)
540#define KVM_REG_PPC_CIABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac)
541#define KVM_REG_PPC_IC (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad)
542#define KVM_REG_PPC_VTB (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae)
543#define KVM_REG_PPC_CSIGR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaf)
544#define KVM_REG_PPC_TACR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb0)
545#define KVM_REG_PPC_TCSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
546#define KVM_REG_PPC_PID (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
547#define KVM_REG_PPC_ACOP (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
548
549#define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
550#define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
551#define KVM_REG_PPC_PPR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6)
552
553/* Architecture compatibility level */
554#define KVM_REG_PPC_ARCH_COMPAT (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
555
556/* Transactional Memory checkpointed state:
557 * This is all GPRs, all VSX regs and a subset of SPRs
558 */
559#define KVM_REG_PPC_TM (KVM_REG_PPC | 0x80000000)
560/* TM GPRs */
561#define KVM_REG_PPC_TM_GPR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0)
562#define KVM_REG_PPC_TM_GPR(n) (KVM_REG_PPC_TM_GPR0 + (n))
563#define KVM_REG_PPC_TM_GPR31 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x1f)
564/* TM VSX */
565#define KVM_REG_PPC_TM_VSR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x20)
566#define KVM_REG_PPC_TM_VSR(n) (KVM_REG_PPC_TM_VSR0 + (n))
567#define KVM_REG_PPC_TM_VSR63 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x5f)
568/* TM SPRS */
569#define KVM_REG_PPC_TM_CR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x60)
570#define KVM_REG_PPC_TM_LR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x61)
571#define KVM_REG_PPC_TM_CTR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x62)
572#define KVM_REG_PPC_TM_FPSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x63)
573#define KVM_REG_PPC_TM_AMR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x64)
574#define KVM_REG_PPC_TM_PPR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x65)
575#define KVM_REG_PPC_TM_VRSAVE (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x66)
576#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
577#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
578#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
579
502/* PPC64 eXternal Interrupt Controller Specification */ 580/* PPC64 eXternal Interrupt Controller Specification */
503#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ 581#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
504 582
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index e60a3697932c..2ea5cc033ec8 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -439,7 +439,7 @@ int main(void)
439 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 439 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
440 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 440 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
441 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 441 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
442#ifdef CONFIG_KVM_BOOK3S_64_HV 442#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
443 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr)); 443 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
444 DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0)); 444 DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
445 DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); 445 DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
@@ -470,7 +470,7 @@ int main(void)
470 DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); 470 DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
471 471
472 /* book3s */ 472 /* book3s */
473#ifdef CONFIG_KVM_BOOK3S_64_HV 473#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
474 DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); 474 DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
475 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); 475 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
476 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); 476 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
@@ -502,6 +502,8 @@ int main(void)
502 DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); 502 DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
503 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); 503 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
504 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); 504 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
505 DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
506 DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
505 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); 507 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
506 DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); 508 DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
507 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); 509 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
@@ -511,18 +513,22 @@ int main(void)
511 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); 513 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
512 DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); 514 DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
513 DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); 515 DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
516 DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
517 DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
514 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); 518 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
515 DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); 519 DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
516 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); 520 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
517 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); 521 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
518 DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - 522 DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
519 offsetof(struct kvmppc_vcpu_book3s, vcpu)); 523 DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
524 DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
520 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); 525 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
521 DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); 526 DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
522 DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); 527 DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
523 528
524#ifdef CONFIG_PPC_BOOK3S_64 529#ifdef CONFIG_PPC_BOOK3S_64
525#ifdef CONFIG_KVM_BOOK3S_PR 530#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
531 DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
526# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) 532# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
527#else 533#else
528# define SVCPU_FIELD(x, f) 534# define SVCPU_FIELD(x, f)
@@ -574,7 +580,7 @@ int main(void)
574 HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); 580 HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
575 HSTATE_FIELD(HSTATE_NAPPING, napping); 581 HSTATE_FIELD(HSTATE_NAPPING, napping);
576 582
577#ifdef CONFIG_KVM_BOOK3S_64_HV 583#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
578 HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); 584 HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
579 HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); 585 HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
580 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); 586 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
@@ -590,10 +596,11 @@ int main(void)
590 HSTATE_FIELD(HSTATE_DABR, dabr); 596 HSTATE_FIELD(HSTATE_DABR, dabr);
591 HSTATE_FIELD(HSTATE_DECEXP, dec_expires); 597 HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
592 DEFINE(IPI_PRIORITY, IPI_PRIORITY); 598 DEFINE(IPI_PRIORITY, IPI_PRIORITY);
593#endif /* CONFIG_KVM_BOOK3S_64_HV */ 599#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
594 600
595#ifdef CONFIG_PPC_BOOK3S_64 601#ifdef CONFIG_PPC_BOOK3S_64
596 HSTATE_FIELD(HSTATE_CFAR, cfar); 602 HSTATE_FIELD(HSTATE_CFAR, cfar);
603 HSTATE_FIELD(HSTATE_PPR, ppr);
597#endif /* CONFIG_PPC_BOOK3S_64 */ 604#endif /* CONFIG_PPC_BOOK3S_64 */
598 605
599#else /* CONFIG_PPC_BOOK3S */ 606#else /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 3a9ed6ac224b..9f905e40922e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -126,7 +126,7 @@ BEGIN_FTR_SECTION
126 bgt cr1,. 126 bgt cr1,.
127 GET_PACA(r13) 127 GET_PACA(r13)
128 128
129#ifdef CONFIG_KVM_BOOK3S_64_HV 129#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
130 li r0,KVM_HWTHREAD_IN_KERNEL 130 li r0,KVM_HWTHREAD_IN_KERNEL
131 stb r0,HSTATE_HWTHREAD_STATE(r13) 131 stb r0,HSTATE_HWTHREAD_STATE(r13)
132 /* Order setting hwthread_state vs. testing hwthread_req */ 132 /* Order setting hwthread_state vs. testing hwthread_req */
@@ -425,7 +425,7 @@ data_access_check_stab:
425 mfspr r9,SPRN_DSISR 425 mfspr r9,SPRN_DSISR
426 srdi r10,r10,60 426 srdi r10,r10,60
427 rlwimi r10,r9,16,0x20 427 rlwimi r10,r9,16,0x20
428#ifdef CONFIG_KVM_BOOK3S_PR 428#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
429 lbz r9,HSTATE_IN_GUEST(r13) 429 lbz r9,HSTATE_IN_GUEST(r13)
430 rlwimi r10,r9,8,0x300 430 rlwimi r10,r9,8,0x300
431#endif 431#endif
@@ -650,6 +650,32 @@ slb_miss_user_pseries:
650 b . /* prevent spec. execution */ 650 b . /* prevent spec. execution */
651#endif /* __DISABLED__ */ 651#endif /* __DISABLED__ */
652 652
653#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
654kvmppc_skip_interrupt:
655 /*
656 * Here all GPRs are unchanged from when the interrupt happened
657 * except for r13, which is saved in SPRG_SCRATCH0.
658 */
659 mfspr r13, SPRN_SRR0
660 addi r13, r13, 4
661 mtspr SPRN_SRR0, r13
662 GET_SCRATCH0(r13)
663 rfid
664 b .
665
666kvmppc_skip_Hinterrupt:
667 /*
668 * Here all GPRs are unchanged from when the interrupt happened
669 * except for r13, which is saved in SPRG_SCRATCH0.
670 */
671 mfspr r13, SPRN_HSRR0
672 addi r13, r13, 4
673 mtspr SPRN_HSRR0, r13
674 GET_SCRATCH0(r13)
675 hrfid
676 b .
677#endif
678
653/* 679/*
654 * Code from here down to __end_handlers is invoked from the 680 * Code from here down to __end_handlers is invoked from the
655 * exception prologs above. Because the prologs assemble the 681 * exception prologs above. Because the prologs assemble the
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index e11863f4e595..847e40e62fce 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -84,7 +84,7 @@ _GLOBAL(power7_nap)
84 std r9,_MSR(r1) 84 std r9,_MSR(r1)
85 std r1,PACAR1(r13) 85 std r1,PACAR1(r13)
86 86
87#ifdef CONFIG_KVM_BOOK3S_64_HV 87#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
88 /* Tell KVM we're napping */ 88 /* Tell KVM we're napping */
89 li r4,KVM_HWTHREAD_IN_NAP 89 li r4,KVM_HWTHREAD_IN_NAP
90 stb r4,HSTATE_HWTHREAD_STATE(r13) 90 stb r4,HSTATE_HWTHREAD_STATE(r13)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 62c3dd8c69f2..907a472f9a9e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1529,7 +1529,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
1529 * back on or not. 1529 * back on or not.
1530 */ 1530 */
1531 if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, 1531 if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
1532 current->thread.debug.dbcr1)) 1532 current->thread.debug.dbcr1))
1533 regs->msr |= MSR_DE; 1533 regs->msr |= MSR_DE;
1534 else 1534 else
1535 /* Make sure the IDM flag is off */ 1535 /* Make sure the IDM flag is off */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 2f5c6b6d6877..93221e87b911 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -31,13 +31,13 @@
31#include "44x_tlb.h" 31#include "44x_tlb.h"
32#include "booke.h" 32#include "booke.h"
33 33
34void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 34static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu)
35{ 35{
36 kvmppc_booke_vcpu_load(vcpu, cpu); 36 kvmppc_booke_vcpu_load(vcpu, cpu);
37 kvmppc_44x_tlb_load(vcpu); 37 kvmppc_44x_tlb_load(vcpu);
38} 38}
39 39
40void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 40static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu)
41{ 41{
42 kvmppc_44x_tlb_put(vcpu); 42 kvmppc_44x_tlb_put(vcpu);
43 kvmppc_booke_vcpu_put(vcpu); 43 kvmppc_booke_vcpu_put(vcpu);
@@ -114,29 +114,32 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
114 return 0; 114 return 0;
115} 115}
116 116
117void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 117static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu,
118 struct kvm_sregs *sregs)
118{ 119{
119 kvmppc_get_sregs_ivor(vcpu, sregs); 120 return kvmppc_get_sregs_ivor(vcpu, sregs);
120} 121}
121 122
122int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 123static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu,
124 struct kvm_sregs *sregs)
123{ 125{
124 return kvmppc_set_sregs_ivor(vcpu, sregs); 126 return kvmppc_set_sregs_ivor(vcpu, sregs);
125} 127}
126 128
127int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, 129static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
128 union kvmppc_one_reg *val) 130 union kvmppc_one_reg *val)
129{ 131{
130 return -EINVAL; 132 return -EINVAL;
131} 133}
132 134
133int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, 135static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
134 union kvmppc_one_reg *val) 136 union kvmppc_one_reg *val)
135{ 137{
136 return -EINVAL; 138 return -EINVAL;
137} 139}
138 140
139struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 141static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm,
142 unsigned int id)
140{ 143{
141 struct kvmppc_vcpu_44x *vcpu_44x; 144 struct kvmppc_vcpu_44x *vcpu_44x;
142 struct kvm_vcpu *vcpu; 145 struct kvm_vcpu *vcpu;
@@ -167,7 +170,7 @@ out:
167 return ERR_PTR(err); 170 return ERR_PTR(err);
168} 171}
169 172
170void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 173static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu)
171{ 174{
172 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 175 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
173 176
@@ -176,28 +179,53 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
176 kmem_cache_free(kvm_vcpu_cache, vcpu_44x); 179 kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
177} 180}
178 181
179int kvmppc_core_init_vm(struct kvm *kvm) 182static int kvmppc_core_init_vm_44x(struct kvm *kvm)
180{ 183{
181 return 0; 184 return 0;
182} 185}
183 186
184void kvmppc_core_destroy_vm(struct kvm *kvm) 187static void kvmppc_core_destroy_vm_44x(struct kvm *kvm)
185{ 188{
186} 189}
187 190
191static struct kvmppc_ops kvm_ops_44x = {
192 .get_sregs = kvmppc_core_get_sregs_44x,
193 .set_sregs = kvmppc_core_set_sregs_44x,
194 .get_one_reg = kvmppc_get_one_reg_44x,
195 .set_one_reg = kvmppc_set_one_reg_44x,
196 .vcpu_load = kvmppc_core_vcpu_load_44x,
197 .vcpu_put = kvmppc_core_vcpu_put_44x,
198 .vcpu_create = kvmppc_core_vcpu_create_44x,
199 .vcpu_free = kvmppc_core_vcpu_free_44x,
200 .mmu_destroy = kvmppc_mmu_destroy_44x,
201 .init_vm = kvmppc_core_init_vm_44x,
202 .destroy_vm = kvmppc_core_destroy_vm_44x,
203 .emulate_op = kvmppc_core_emulate_op_44x,
204 .emulate_mtspr = kvmppc_core_emulate_mtspr_44x,
205 .emulate_mfspr = kvmppc_core_emulate_mfspr_44x,
206};
207
188static int __init kvmppc_44x_init(void) 208static int __init kvmppc_44x_init(void)
189{ 209{
190 int r; 210 int r;
191 211
192 r = kvmppc_booke_init(); 212 r = kvmppc_booke_init();
193 if (r) 213 if (r)
194 return r; 214 goto err_out;
215
216 r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
217 if (r)
218 goto err_out;
219 kvm_ops_44x.owner = THIS_MODULE;
220 kvmppc_pr_ops = &kvm_ops_44x;
195 221
196 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); 222err_out:
223 return r;
197} 224}
198 225
199static void __exit kvmppc_44x_exit(void) 226static void __exit kvmppc_44x_exit(void)
200{ 227{
228 kvmppc_pr_ops = NULL;
201 kvmppc_booke_exit(); 229 kvmppc_booke_exit();
202} 230}
203 231
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 35ec0a8547da..92c9ab4bcfec 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -91,8 +91,8 @@ static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
91 return EMULATE_DONE; 91 return EMULATE_DONE;
92} 92}
93 93
94int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 94int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
95 unsigned int inst, int *advance) 95 unsigned int inst, int *advance)
96{ 96{
97 int emulated = EMULATE_DONE; 97 int emulated = EMULATE_DONE;
98 int dcrn = get_dcrn(inst); 98 int dcrn = get_dcrn(inst);
@@ -152,7 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
152 return emulated; 152 return emulated;
153} 153}
154 154
155int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 155int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
156{ 156{
157 int emulated = EMULATE_DONE; 157 int emulated = EMULATE_DONE;
158 158
@@ -172,7 +172,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
172 return emulated; 172 return emulated;
173} 173}
174 174
175int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 175int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
176{ 176{
177 int emulated = EMULATE_DONE; 177 int emulated = EMULATE_DONE;
178 178
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ed0385448148..0deef1082e02 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -268,7 +268,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
268 trace_kvm_stlb_inval(stlb_index); 268 trace_kvm_stlb_inval(stlb_index);
269} 269}
270 270
271void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 271void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu)
272{ 272{
273 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 273 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
274 int i; 274 int i;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index e593ff257bd3..141b2027189a 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -35,17 +35,20 @@ config KVM_BOOK3S_64_HANDLER
35 bool 35 bool
36 select KVM_BOOK3S_HANDLER 36 select KVM_BOOK3S_HANDLER
37 37
38config KVM_BOOK3S_PR 38config KVM_BOOK3S_PR_POSSIBLE
39 bool 39 bool
40 select KVM_MMIO 40 select KVM_MMIO
41 select MMU_NOTIFIER 41 select MMU_NOTIFIER
42 42
43config KVM_BOOK3S_HV_POSSIBLE
44 bool
45
43config KVM_BOOK3S_32 46config KVM_BOOK3S_32
44 tristate "KVM support for PowerPC book3s_32 processors" 47 tristate "KVM support for PowerPC book3s_32 processors"
45 depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT 48 depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
46 select KVM 49 select KVM
47 select KVM_BOOK3S_32_HANDLER 50 select KVM_BOOK3S_32_HANDLER
48 select KVM_BOOK3S_PR 51 select KVM_BOOK3S_PR_POSSIBLE
49 ---help--- 52 ---help---
50 Support running unmodified book3s_32 guest kernels 53 Support running unmodified book3s_32 guest kernels
51 in virtual machines on book3s_32 host processors. 54 in virtual machines on book3s_32 host processors.
@@ -60,6 +63,7 @@ config KVM_BOOK3S_64
60 depends on PPC_BOOK3S_64 63 depends on PPC_BOOK3S_64
61 select KVM_BOOK3S_64_HANDLER 64 select KVM_BOOK3S_64_HANDLER
62 select KVM 65 select KVM
66 select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
63 ---help--- 67 ---help---
64 Support running unmodified book3s_64 and book3s_32 guest kernels 68 Support running unmodified book3s_64 and book3s_32 guest kernels
65 in virtual machines on book3s_64 host processors. 69 in virtual machines on book3s_64 host processors.
@@ -70,8 +74,9 @@ config KVM_BOOK3S_64
70 If unsure, say N. 74 If unsure, say N.
71 75
72config KVM_BOOK3S_64_HV 76config KVM_BOOK3S_64_HV
73 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" 77 tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
74 depends on KVM_BOOK3S_64 78 depends on KVM_BOOK3S_64
79 select KVM_BOOK3S_HV_POSSIBLE
75 select MMU_NOTIFIER 80 select MMU_NOTIFIER
76 select CMA 81 select CMA
77 ---help--- 82 ---help---
@@ -90,9 +95,20 @@ config KVM_BOOK3S_64_HV
90 If unsure, say N. 95 If unsure, say N.
91 96
92config KVM_BOOK3S_64_PR 97config KVM_BOOK3S_64_PR
93 def_bool y 98 tristate "KVM support without using hypervisor mode in host"
94 depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV 99 depends on KVM_BOOK3S_64
95 select KVM_BOOK3S_PR 100 select KVM_BOOK3S_PR_POSSIBLE
101 ---help---
102 Support running guest kernels in virtual machines on processors
103 without using hypervisor mode in the host, by running the
104 guest in user mode (problem state) and emulating all
105 privileged instructions and registers.
106
107 This is not as fast as using hypervisor mode, but works on
108 machines where hypervisor mode is not available or not usable,
109 and can emulate processors that are different from the host
110 processor, including emulating 32-bit processors on a 64-bit
111 host.
96 112
97config KVM_BOOKE_HV 113config KVM_BOOKE_HV
98 bool 114 bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 6646c952c5e3..ce569b6bf4d8 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -53,41 +53,51 @@ kvm-e500mc-objs := \
53 e500_emulate.o 53 e500_emulate.o
54kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) 54kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
55 55
56kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ 56kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
57 $(KVM)/coalesced_mmio.o \ 57 book3s_64_vio_hv.o
58
59kvm-pr-y := \
58 fpu.o \ 60 fpu.o \
59 book3s_paired_singles.o \ 61 book3s_paired_singles.o \
60 book3s_pr.o \ 62 book3s_pr.o \
61 book3s_pr_papr.o \ 63 book3s_pr_papr.o \
62 book3s_64_vio_hv.o \
63 book3s_emulate.o \ 64 book3s_emulate.o \
64 book3s_interrupts.o \ 65 book3s_interrupts.o \
65 book3s_mmu_hpte.o \ 66 book3s_mmu_hpte.o \
66 book3s_64_mmu_host.o \ 67 book3s_64_mmu_host.o \
67 book3s_64_mmu.o \ 68 book3s_64_mmu.o \
68 book3s_32_mmu.o 69 book3s_32_mmu.o
69kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ 70
71ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
72kvm-book3s_64-module-objs := \
73 $(KVM)/coalesced_mmio.o
74
75kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
70 book3s_rmhandlers.o 76 book3s_rmhandlers.o
77endif
71 78
72kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 79kvm-hv-y += \
73 book3s_hv.o \ 80 book3s_hv.o \
74 book3s_hv_interrupts.o \ 81 book3s_hv_interrupts.o \
75 book3s_64_mmu_hv.o 82 book3s_64_mmu_hv.o
83
76kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ 84kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
77 book3s_hv_rm_xics.o 85 book3s_hv_rm_xics.o
78kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 86
87ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
88kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
79 book3s_hv_rmhandlers.o \ 89 book3s_hv_rmhandlers.o \
80 book3s_hv_rm_mmu.o \ 90 book3s_hv_rm_mmu.o \
81 book3s_64_vio_hv.o \
82 book3s_hv_ras.o \ 91 book3s_hv_ras.o \
83 book3s_hv_builtin.o \ 92 book3s_hv_builtin.o \
84 book3s_hv_cma.o \ 93 book3s_hv_cma.o \
85 $(kvm-book3s_64-builtin-xics-objs-y) 94 $(kvm-book3s_64-builtin-xics-objs-y)
95endif
86 96
87kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ 97kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
88 book3s_xics.o 98 book3s_xics.o
89 99
90kvm-book3s_64-module-objs := \ 100kvm-book3s_64-module-objs += \
91 $(KVM)/kvm_main.o \ 101 $(KVM)/kvm_main.o \
92 $(KVM)/eventfd.o \ 102 $(KVM)/eventfd.o \
93 powerpc.o \ 103 powerpc.o \
@@ -123,4 +133,7 @@ obj-$(CONFIG_KVM_E500MC) += kvm.o
123obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o 133obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
124obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o 134obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
125 135
136obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o
137obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o
138
126obj-y += $(kvm-book3s_64-builtin-objs-y) 139obj-y += $(kvm-book3s_64-builtin-objs-y)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 700df6f1d32c..8912608b7e1b 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -34,6 +34,7 @@
34#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
35#include <linux/highmem.h> 35#include <linux/highmem.h>
36 36
37#include "book3s.h"
37#include "trace.h" 38#include "trace.h"
38 39
39#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 40#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -69,6 +70,50 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
69{ 70{
70} 71}
71 72
73static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
74{
75 if (!is_kvmppc_hv_enabled(vcpu->kvm))
76 return to_book3s(vcpu)->hior;
77 return 0;
78}
79
80static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
81 unsigned long pending_now, unsigned long old_pending)
82{
83 if (is_kvmppc_hv_enabled(vcpu->kvm))
84 return;
85 if (pending_now)
86 vcpu->arch.shared->int_pending = 1;
87 else if (old_pending)
88 vcpu->arch.shared->int_pending = 0;
89}
90
91static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
92{
93 ulong crit_raw;
94 ulong crit_r1;
95 bool crit;
96
97 if (is_kvmppc_hv_enabled(vcpu->kvm))
98 return false;
99
100 crit_raw = vcpu->arch.shared->critical;
101 crit_r1 = kvmppc_get_gpr(vcpu, 1);
102
103 /* Truncate crit indicators in 32 bit mode */
104 if (!(vcpu->arch.shared->msr & MSR_SF)) {
105 crit_raw &= 0xffffffff;
106 crit_r1 &= 0xffffffff;
107 }
108
109 /* Critical section when crit == r1 */
110 crit = (crit_raw == crit_r1);
111 /* ... and we're in supervisor mode */
112 crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
113
114 return crit;
115}
116
72void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) 117void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
73{ 118{
74 vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu); 119 vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
@@ -126,28 +171,32 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
126 printk(KERN_INFO "Queueing interrupt %x\n", vec); 171 printk(KERN_INFO "Queueing interrupt %x\n", vec);
127#endif 172#endif
128} 173}
129 174EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
130 175
131void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) 176void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
132{ 177{
133 /* might as well deliver this straight away */ 178 /* might as well deliver this straight away */
134 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags); 179 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
135} 180}
181EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
136 182
137void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 183void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
138{ 184{
139 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 185 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
140} 186}
187EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec);
141 188
142int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) 189int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
143{ 190{
144 return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 191 return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
145} 192}
193EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec);
146 194
147void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) 195void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
148{ 196{
149 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 197 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
150} 198}
199EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
151 200
152void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 201void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
153 struct kvm_interrupt *irq) 202 struct kvm_interrupt *irq)
@@ -285,8 +334,10 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
285 334
286 return 0; 335 return 0;
287} 336}
337EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
288 338
289pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) 339pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
340 bool *writable)
290{ 341{
291 ulong mp_pa = vcpu->arch.magic_page_pa; 342 ulong mp_pa = vcpu->arch.magic_page_pa;
292 343
@@ -302,20 +353,23 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
302 353
303 pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; 354 pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
304 get_page(pfn_to_page(pfn)); 355 get_page(pfn_to_page(pfn));
356 if (writable)
357 *writable = true;
305 return pfn; 358 return pfn;
306 } 359 }
307 360
308 return gfn_to_pfn(vcpu->kvm, gfn); 361 return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
309} 362}
363EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);
310 364
311static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, 365static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
312 struct kvmppc_pte *pte) 366 bool iswrite, struct kvmppc_pte *pte)
313{ 367{
314 int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR)); 368 int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
315 int r; 369 int r;
316 370
317 if (relocated) { 371 if (relocated) {
318 r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); 372 r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite);
319 } else { 373 } else {
320 pte->eaddr = eaddr; 374 pte->eaddr = eaddr;
321 pte->raddr = eaddr & KVM_PAM; 375 pte->raddr = eaddr & KVM_PAM;
@@ -361,7 +415,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
361 415
362 vcpu->stat.st++; 416 vcpu->stat.st++;
363 417
364 if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 418 if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte))
365 return -ENOENT; 419 return -ENOENT;
366 420
367 *eaddr = pte.raddr; 421 *eaddr = pte.raddr;
@@ -374,6 +428,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
374 428
375 return EMULATE_DONE; 429 return EMULATE_DONE;
376} 430}
431EXPORT_SYMBOL_GPL(kvmppc_st);
377 432
378int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 433int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
379 bool data) 434 bool data)
@@ -383,7 +438,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
383 438
384 vcpu->stat.ld++; 439 vcpu->stat.ld++;
385 440
386 if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 441 if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte))
387 goto nopte; 442 goto nopte;
388 443
389 *eaddr = pte.raddr; 444 *eaddr = pte.raddr;
@@ -404,6 +459,7 @@ nopte:
404mmio: 459mmio:
405 return EMULATE_DO_MMIO; 460 return EMULATE_DO_MMIO;
406} 461}
462EXPORT_SYMBOL_GPL(kvmppc_ld);
407 463
408int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 464int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
409{ 465{
@@ -419,6 +475,18 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
419{ 475{
420} 476}
421 477
478int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
479 struct kvm_sregs *sregs)
480{
481 return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
482}
483
484int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
485 struct kvm_sregs *sregs)
486{
487 return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
488}
489
422int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 490int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
423{ 491{
424 int i; 492 int i;
@@ -495,8 +563,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
495 if (size > sizeof(val)) 563 if (size > sizeof(val))
496 return -EINVAL; 564 return -EINVAL;
497 565
498 r = kvmppc_get_one_reg(vcpu, reg->id, &val); 566 r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
499
500 if (r == -EINVAL) { 567 if (r == -EINVAL) {
501 r = 0; 568 r = 0;
502 switch (reg->id) { 569 switch (reg->id) {
@@ -528,6 +595,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
528 } 595 }
529 val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); 596 val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
530 break; 597 break;
598 case KVM_REG_PPC_VRSAVE:
599 val = get_reg_val(reg->id, vcpu->arch.vrsave);
600 break;
531#endif /* CONFIG_ALTIVEC */ 601#endif /* CONFIG_ALTIVEC */
532 case KVM_REG_PPC_DEBUG_INST: { 602 case KVM_REG_PPC_DEBUG_INST: {
533 u32 opcode = INS_TW; 603 u32 opcode = INS_TW;
@@ -572,8 +642,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
572 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) 642 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
573 return -EFAULT; 643 return -EFAULT;
574 644
575 r = kvmppc_set_one_reg(vcpu, reg->id, &val); 645 r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
576
577 if (r == -EINVAL) { 646 if (r == -EINVAL) {
578 r = 0; 647 r = 0;
579 switch (reg->id) { 648 switch (reg->id) {
@@ -605,6 +674,13 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
605 } 674 }
606 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); 675 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
607 break; 676 break;
677 case KVM_REG_PPC_VRSAVE:
678 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
679 r = -ENXIO;
680 break;
681 }
682 vcpu->arch.vrsave = set_reg_val(reg->id, val);
683 break;
608#endif /* CONFIG_ALTIVEC */ 684#endif /* CONFIG_ALTIVEC */
609#ifdef CONFIG_KVM_XICS 685#ifdef CONFIG_KVM_XICS
610 case KVM_REG_PPC_ICP_STATE: 686 case KVM_REG_PPC_ICP_STATE:
@@ -625,6 +701,27 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
625 return r; 701 return r;
626} 702}
627 703
704void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
705{
706 vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
707}
708
709void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
710{
711 vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
712}
713
714void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
715{
716 vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr);
717}
718EXPORT_SYMBOL_GPL(kvmppc_set_msr);
719
720int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
721{
722 return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
723}
724
628int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 725int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
629 struct kvm_translation *tr) 726 struct kvm_translation *tr)
630{ 727{
@@ -644,3 +741,141 @@ void kvmppc_decrementer_func(unsigned long data)
644 kvmppc_core_queue_dec(vcpu); 741 kvmppc_core_queue_dec(vcpu);
645 kvm_vcpu_kick(vcpu); 742 kvm_vcpu_kick(vcpu);
646} 743}
744
745struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
746{
747 return kvm->arch.kvm_ops->vcpu_create(kvm, id);
748}
749
750void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
751{
752 vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
753}
754
755int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
756{
757 return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
758}
759
760int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
761{
762 return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
763}
764
765void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
766 struct kvm_memory_slot *dont)
767{
768 kvm->arch.kvm_ops->free_memslot(free, dont);
769}
770
771int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
772 unsigned long npages)
773{
774 return kvm->arch.kvm_ops->create_memslot(slot, npages);
775}
776
777void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
778{
779 kvm->arch.kvm_ops->flush_memslot(kvm, memslot);
780}
781
782int kvmppc_core_prepare_memory_region(struct kvm *kvm,
783 struct kvm_memory_slot *memslot,
784 struct kvm_userspace_memory_region *mem)
785{
786 return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
787}
788
789void kvmppc_core_commit_memory_region(struct kvm *kvm,
790 struct kvm_userspace_memory_region *mem,
791 const struct kvm_memory_slot *old)
792{
793 kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
794}
795
796int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
797{
798 return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
799}
800EXPORT_SYMBOL_GPL(kvm_unmap_hva);
801
802int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
803{
804 return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
805}
806
807int kvm_age_hva(struct kvm *kvm, unsigned long hva)
808{
809 return kvm->arch.kvm_ops->age_hva(kvm, hva);
810}
811
812int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
813{
814 return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
815}
816
817void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
818{
819 kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
820}
821
822void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
823{
824 vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
825}
826
827int kvmppc_core_init_vm(struct kvm *kvm)
828{
829
830#ifdef CONFIG_PPC64
831 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
832 INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
833#endif
834
835 return kvm->arch.kvm_ops->init_vm(kvm);
836}
837
838void kvmppc_core_destroy_vm(struct kvm *kvm)
839{
840 kvm->arch.kvm_ops->destroy_vm(kvm);
841
842#ifdef CONFIG_PPC64
843 kvmppc_rtas_tokens_free(kvm);
844 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
845#endif
846}
847
848int kvmppc_core_check_processor_compat(void)
849{
850 /*
851 * We always return 0 for book3s. We check
852 * for compatability while loading the HV
853 * or PR module
854 */
855 return 0;
856}
857
858static int kvmppc_book3s_init(void)
859{
860 int r;
861
862 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
863 if (r)
864 return r;
865#ifdef CONFIG_KVM_BOOK3S_32
866 r = kvmppc_book3s_init_pr();
867#endif
868 return r;
869
870}
871
872static void kvmppc_book3s_exit(void)
873{
874#ifdef CONFIG_KVM_BOOK3S_32
875 kvmppc_book3s_exit_pr();
876#endif
877 kvm_exit();
878}
879
880module_init(kvmppc_book3s_init);
881module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
new file mode 100644
index 000000000000..4bf956cf94d6
--- /dev/null
+++ b/arch/powerpc/kvm/book3s.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright IBM Corporation, 2013
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License or (at your optional) any later version of the license.
9 *
10 */
11
12#ifndef __POWERPC_KVM_BOOK3S_H__
13#define __POWERPC_KVM_BOOK3S_H__
14
15extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
16 struct kvm_memory_slot *memslot);
17extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
18extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
19 unsigned long end);
20extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
21extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
22extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
23
24extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
25extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
26 unsigned int inst, int *advance);
27extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
28 int sprn, ulong spr_val);
29extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
30 int sprn, ulong *spr_val);
31extern int kvmppc_book3s_init_pr(void);
32extern void kvmppc_book3s_exit_pr(void);
33
34#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index c8cefdd15fd8..76a64ce6a5b6 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -84,7 +84,8 @@ static inline bool sr_nx(u32 sr_raw)
84} 84}
85 85
86static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, 86static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
87 struct kvmppc_pte *pte, bool data); 87 struct kvmppc_pte *pte, bool data,
88 bool iswrite);
88static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 89static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
89 u64 *vsid); 90 u64 *vsid);
90 91
@@ -99,7 +100,7 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
99 u64 vsid; 100 u64 vsid;
100 struct kvmppc_pte pte; 101 struct kvmppc_pte pte;
101 102
102 if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) 103 if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false))
103 return pte.vpage; 104 return pte.vpage;
104 105
105 kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); 106 kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
@@ -111,10 +112,11 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
111 kvmppc_set_msr(vcpu, 0); 112 kvmppc_set_msr(vcpu, 0);
112} 113}
113 114
114static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s, 115static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
115 u32 sre, gva_t eaddr, 116 u32 sre, gva_t eaddr,
116 bool primary) 117 bool primary)
117{ 118{
119 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
118 u32 page, hash, pteg, htabmask; 120 u32 page, hash, pteg, htabmask;
119 hva_t r; 121 hva_t r;
120 122
@@ -132,7 +134,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
132 kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg, 134 kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
133 sr_vsid(sre)); 135 sr_vsid(sre));
134 136
135 r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); 137 r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
136 if (kvm_is_error_hva(r)) 138 if (kvm_is_error_hva(r))
137 return r; 139 return r;
138 return r | (pteg & ~PAGE_MASK); 140 return r | (pteg & ~PAGE_MASK);
@@ -145,7 +147,8 @@ static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
145} 147}
146 148
147static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, 149static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
148 struct kvmppc_pte *pte, bool data) 150 struct kvmppc_pte *pte, bool data,
151 bool iswrite)
149{ 152{
150 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 153 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
151 struct kvmppc_bat *bat; 154 struct kvmppc_bat *bat;
@@ -186,8 +189,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
186 printk(KERN_INFO "BAT is not readable!\n"); 189 printk(KERN_INFO "BAT is not readable!\n");
187 continue; 190 continue;
188 } 191 }
189 if (!pte->may_write) { 192 if (iswrite && !pte->may_write) {
190 /* let's treat r/o BATs as not-readable for now */
191 dprintk_pte("BAT is read-only!\n"); 193 dprintk_pte("BAT is read-only!\n");
192 continue; 194 continue;
193 } 195 }
@@ -201,9 +203,8 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
201 203
202static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, 204static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
203 struct kvmppc_pte *pte, bool data, 205 struct kvmppc_pte *pte, bool data,
204 bool primary) 206 bool iswrite, bool primary)
205{ 207{
206 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
207 u32 sre; 208 u32 sre;
208 hva_t ptegp; 209 hva_t ptegp;
209 u32 pteg[16]; 210 u32 pteg[16];
@@ -218,7 +219,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
218 219
219 pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); 220 pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
220 221
221 ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary); 222 ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary);
222 if (kvm_is_error_hva(ptegp)) { 223 if (kvm_is_error_hva(ptegp)) {
223 printk(KERN_INFO "KVM: Invalid PTEG!\n"); 224 printk(KERN_INFO "KVM: Invalid PTEG!\n");
224 goto no_page_found; 225 goto no_page_found;
@@ -258,9 +259,6 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
258 break; 259 break;
259 } 260 }
260 261
261 if ( !pte->may_read )
262 continue;
263
264 dprintk_pte("MMU: Found PTE -> %x %x - %x\n", 262 dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
265 pteg[i], pteg[i+1], pp); 263 pteg[i], pteg[i+1], pp);
266 found = 1; 264 found = 1;
@@ -271,19 +269,23 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
271 /* Update PTE C and A bits, so the guest's swapper knows we used the 269 /* Update PTE C and A bits, so the guest's swapper knows we used the
272 page */ 270 page */
273 if (found) { 271 if (found) {
274 u32 oldpte = pteg[i+1]; 272 u32 pte_r = pteg[i+1];
275 273 char __user *addr = (char __user *) &pteg[i+1];
276 if (pte->may_read) 274
277 pteg[i+1] |= PTEG_FLAG_ACCESSED; 275 /*
278 if (pte->may_write) 276 * Use single-byte writes to update the HPTE, to
279 pteg[i+1] |= PTEG_FLAG_DIRTY; 277 * conform to what real hardware does.
280 else 278 */
281 dprintk_pte("KVM: Mapping read-only page!\n"); 279 if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) {
282 280 pte_r |= PTEG_FLAG_ACCESSED;
283 /* Write back into the PTEG */ 281 put_user(pte_r >> 8, addr + 2);
284 if (pteg[i+1] != oldpte) 282 }
285 copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); 283 if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) {
286 284 pte_r |= PTEG_FLAG_DIRTY;
285 put_user(pte_r, addr + 3);
286 }
287 if (!pte->may_read || (iswrite && !pte->may_write))
288 return -EPERM;
287 return 0; 289 return 0;
288 } 290 }
289 291
@@ -302,12 +304,14 @@ no_page_found:
302} 304}
303 305
304static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 306static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
305 struct kvmppc_pte *pte, bool data) 307 struct kvmppc_pte *pte, bool data,
308 bool iswrite)
306{ 309{
307 int r; 310 int r;
308 ulong mp_ea = vcpu->arch.magic_page_ea; 311 ulong mp_ea = vcpu->arch.magic_page_ea;
309 312
310 pte->eaddr = eaddr; 313 pte->eaddr = eaddr;
314 pte->page_size = MMU_PAGE_4K;
311 315
312 /* Magic page override */ 316 /* Magic page override */
313 if (unlikely(mp_ea) && 317 if (unlikely(mp_ea) &&
@@ -323,11 +327,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
323 return 0; 327 return 0;
324 } 328 }
325 329
326 r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data); 330 r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite);
327 if (r < 0) 331 if (r < 0)
328 r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true); 332 r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
333 data, iswrite, true);
329 if (r < 0) 334 if (r < 0)
330 r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false); 335 r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
336 data, iswrite, false);
331 337
332 return r; 338 return r;
333} 339}
@@ -347,7 +353,12 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
347 353
348static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) 354static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
349{ 355{
350 kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); 356 int i;
357 struct kvm_vcpu *v;
358
359 /* flush this VA on all cpus */
360 kvm_for_each_vcpu(i, v, vcpu->kvm)
361 kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000);
351} 362}
352 363
353static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 364static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 00e619bf608e..3a0abd2e5a15 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -138,7 +138,8 @@ static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,
138 138
139extern char etext[]; 139extern char etext[];
140 140
141int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) 141int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
142 bool iswrite)
142{ 143{
143 pfn_t hpaddr; 144 pfn_t hpaddr;
144 u64 vpn; 145 u64 vpn;
@@ -152,9 +153,11 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
152 bool evict = false; 153 bool evict = false;
153 struct hpte_cache *pte; 154 struct hpte_cache *pte;
154 int r = 0; 155 int r = 0;
156 bool writable;
155 157
156 /* Get host physical address for gpa */ 158 /* Get host physical address for gpa */
157 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 159 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
160 iswrite, &writable);
158 if (is_error_noslot_pfn(hpaddr)) { 161 if (is_error_noslot_pfn(hpaddr)) {
159 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", 162 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
160 orig_pte->eaddr); 163 orig_pte->eaddr);
@@ -204,7 +207,7 @@ next_pteg:
204 (primary ? 0 : PTE_SEC); 207 (primary ? 0 : PTE_SEC);
205 pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; 208 pteg1 = hpaddr | PTE_M | PTE_R | PTE_C;
206 209
207 if (orig_pte->may_write) { 210 if (orig_pte->may_write && writable) {
208 pteg1 |= PP_RWRW; 211 pteg1 |= PP_RWRW;
209 mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 212 mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
210 } else { 213 } else {
@@ -259,6 +262,11 @@ out:
259 return r; 262 return r;
260} 263}
261 264
265void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
266{
267 kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL);
268}
269
262static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) 270static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
263{ 271{
264 struct kvmppc_sid_map *map; 272 struct kvmppc_sid_map *map;
@@ -341,7 +349,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
341 svcpu_put(svcpu); 349 svcpu_put(svcpu);
342} 350}
343 351
344void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 352void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
345{ 353{
346 int i; 354 int i;
347 355
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 7e345e00661a..83da1f868fd5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -107,9 +107,20 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
107 return kvmppc_slb_calc_vpn(slb, eaddr); 107 return kvmppc_slb_calc_vpn(slb, eaddr);
108} 108}
109 109
110static int mmu_pagesize(int mmu_pg)
111{
112 switch (mmu_pg) {
113 case MMU_PAGE_64K:
114 return 16;
115 case MMU_PAGE_16M:
116 return 24;
117 }
118 return 12;
119}
120
110static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) 121static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
111{ 122{
112 return slbe->large ? 24 : 12; 123 return mmu_pagesize(slbe->base_page_size);
113} 124}
114 125
115static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) 126static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
@@ -119,11 +130,11 @@ static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
119 return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p); 130 return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
120} 131}
121 132
122static hva_t kvmppc_mmu_book3s_64_get_pteg( 133static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu,
123 struct kvmppc_vcpu_book3s *vcpu_book3s,
124 struct kvmppc_slb *slbe, gva_t eaddr, 134 struct kvmppc_slb *slbe, gva_t eaddr,
125 bool second) 135 bool second)
126{ 136{
137 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
127 u64 hash, pteg, htabsize; 138 u64 hash, pteg, htabsize;
128 u32 ssize; 139 u32 ssize;
129 hva_t r; 140 hva_t r;
@@ -148,10 +159,10 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
148 159
149 /* When running a PAPR guest, SDR1 contains a HVA address instead 160 /* When running a PAPR guest, SDR1 contains a HVA address instead
150 of a GPA */ 161 of a GPA */
151 if (vcpu_book3s->vcpu.arch.papr_enabled) 162 if (vcpu->arch.papr_enabled)
152 r = pteg; 163 r = pteg;
153 else 164 else
154 r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); 165 r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
155 166
156 if (kvm_is_error_hva(r)) 167 if (kvm_is_error_hva(r))
157 return r; 168 return r;
@@ -166,18 +177,38 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
166 avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); 177 avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
167 avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p); 178 avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
168 179
169 if (p < 24) 180 if (p < 16)
170 avpn >>= ((80 - p) - 56) - 8; 181 avpn >>= ((80 - p) - 56) - 8; /* 16 - p */
171 else 182 else
172 avpn <<= 8; 183 avpn <<= p - 16;
173 184
174 return avpn; 185 return avpn;
175} 186}
176 187
188/*
189 * Return page size encoded in the second word of a HPTE, or
190 * -1 for an invalid encoding for the base page size indicated by
191 * the SLB entry. This doesn't handle mixed pagesize segments yet.
192 */
193static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
194{
195 switch (slbe->base_page_size) {
196 case MMU_PAGE_64K:
197 if ((r & 0xf000) == 0x1000)
198 return MMU_PAGE_64K;
199 break;
200 case MMU_PAGE_16M:
201 if ((r & 0xff000) == 0)
202 return MMU_PAGE_16M;
203 break;
204 }
205 return -1;
206}
207
177static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 208static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
178 struct kvmppc_pte *gpte, bool data) 209 struct kvmppc_pte *gpte, bool data,
210 bool iswrite)
179{ 211{
180 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
181 struct kvmppc_slb *slbe; 212 struct kvmppc_slb *slbe;
182 hva_t ptegp; 213 hva_t ptegp;
183 u64 pteg[16]; 214 u64 pteg[16];
@@ -189,6 +220,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
189 u8 pp, key = 0; 220 u8 pp, key = 0;
190 bool found = false; 221 bool found = false;
191 bool second = false; 222 bool second = false;
223 int pgsize;
192 ulong mp_ea = vcpu->arch.magic_page_ea; 224 ulong mp_ea = vcpu->arch.magic_page_ea;
193 225
194 /* Magic page override */ 226 /* Magic page override */
@@ -202,6 +234,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
202 gpte->may_execute = true; 234 gpte->may_execute = true;
203 gpte->may_read = true; 235 gpte->may_read = true;
204 gpte->may_write = true; 236 gpte->may_write = true;
237 gpte->page_size = MMU_PAGE_4K;
205 238
206 return 0; 239 return 0;
207 } 240 }
@@ -222,8 +255,12 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
222 v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID | 255 v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
223 HPTE_V_SECONDARY; 256 HPTE_V_SECONDARY;
224 257
258 pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
259
260 mutex_lock(&vcpu->kvm->arch.hpt_mutex);
261
225do_second: 262do_second:
226 ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); 263 ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second);
227 if (kvm_is_error_hva(ptegp)) 264 if (kvm_is_error_hva(ptegp))
228 goto no_page_found; 265 goto no_page_found;
229 266
@@ -240,6 +277,13 @@ do_second:
240 for (i=0; i<16; i+=2) { 277 for (i=0; i<16; i+=2) {
241 /* Check all relevant fields of 1st dword */ 278 /* Check all relevant fields of 1st dword */
242 if ((pteg[i] & v_mask) == v_val) { 279 if ((pteg[i] & v_mask) == v_val) {
280 /* If large page bit is set, check pgsize encoding */
281 if (slbe->large &&
282 (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
283 pgsize = decode_pagesize(slbe, pteg[i+1]);
284 if (pgsize < 0)
285 continue;
286 }
243 found = true; 287 found = true;
244 break; 288 break;
245 } 289 }
@@ -256,13 +300,15 @@ do_second:
256 v = pteg[i]; 300 v = pteg[i];
257 r = pteg[i+1]; 301 r = pteg[i+1];
258 pp = (r & HPTE_R_PP) | key; 302 pp = (r & HPTE_R_PP) | key;
259 eaddr_mask = 0xFFF; 303 if (r & HPTE_R_PP0)
304 pp |= 8;
260 305
261 gpte->eaddr = eaddr; 306 gpte->eaddr = eaddr;
262 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); 307 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
263 if (slbe->large) 308
264 eaddr_mask = 0xFFFFFF; 309 eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;
265 gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); 310 gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
311 gpte->page_size = pgsize;
266 gpte->may_execute = ((r & HPTE_R_N) ? false : true); 312 gpte->may_execute = ((r & HPTE_R_N) ? false : true);
267 gpte->may_read = false; 313 gpte->may_read = false;
268 gpte->may_write = false; 314 gpte->may_write = false;
@@ -277,6 +323,7 @@ do_second:
277 case 3: 323 case 3:
278 case 5: 324 case 5:
279 case 7: 325 case 7:
326 case 10:
280 gpte->may_read = true; 327 gpte->may_read = true;
281 break; 328 break;
282 } 329 }
@@ -287,30 +334,37 @@ do_second:
287 334
288 /* Update PTE R and C bits, so the guest's swapper knows we used the 335 /* Update PTE R and C bits, so the guest's swapper knows we used the
289 * page */ 336 * page */
290 if (gpte->may_read) { 337 if (gpte->may_read && !(r & HPTE_R_R)) {
291 /* Set the accessed flag */ 338 /*
339 * Set the accessed flag.
340 * We have to write this back with a single byte write
341 * because another vcpu may be accessing this on
342 * non-PAPR platforms such as mac99, and this is
343 * what real hardware does.
344 */
345 char __user *addr = (char __user *) &pteg[i+1];
292 r |= HPTE_R_R; 346 r |= HPTE_R_R;
347 put_user(r >> 8, addr + 6);
293 } 348 }
294 if (data && gpte->may_write) { 349 if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
295 /* Set the dirty flag -- XXX even if not writing */ 350 /* Set the dirty flag */
351 /* Use a single byte write */
352 char __user *addr = (char __user *) &pteg[i+1];
296 r |= HPTE_R_C; 353 r |= HPTE_R_C;
354 put_user(r, addr + 7);
297 } 355 }
298 356
299 /* Write back into the PTEG */ 357 mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
300 if (pteg[i+1] != r) {
301 pteg[i+1] = r;
302 copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
303 }
304 358
305 if (!gpte->may_read) 359 if (!gpte->may_read || (iswrite && !gpte->may_write))
306 return -EPERM; 360 return -EPERM;
307 return 0; 361 return 0;
308 362
309no_page_found: 363no_page_found:
364 mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
310 return -ENOENT; 365 return -ENOENT;
311 366
312no_seg_found: 367no_seg_found:
313
314 dprintk("KVM MMU: Trigger segment fault\n"); 368 dprintk("KVM MMU: Trigger segment fault\n");
315 return -EINVAL; 369 return -EINVAL;
316} 370}
@@ -345,6 +399,21 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
345 slbe->nx = (rs & SLB_VSID_N) ? 1 : 0; 399 slbe->nx = (rs & SLB_VSID_N) ? 1 : 0;
346 slbe->class = (rs & SLB_VSID_C) ? 1 : 0; 400 slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
347 401
402 slbe->base_page_size = MMU_PAGE_4K;
403 if (slbe->large) {
404 if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) {
405 switch (rs & SLB_VSID_LP) {
406 case SLB_VSID_LP_00:
407 slbe->base_page_size = MMU_PAGE_16M;
408 break;
409 case SLB_VSID_LP_01:
410 slbe->base_page_size = MMU_PAGE_64K;
411 break;
412 }
413 } else
414 slbe->base_page_size = MMU_PAGE_16M;
415 }
416
348 slbe->orige = rb & (ESID_MASK | SLB_ESID_V); 417 slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
349 slbe->origv = rs; 418 slbe->origv = rs;
350 419
@@ -460,14 +529,45 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
460 bool large) 529 bool large)
461{ 530{
462 u64 mask = 0xFFFFFFFFFULL; 531 u64 mask = 0xFFFFFFFFFULL;
532 long i;
533 struct kvm_vcpu *v;
463 534
464 dprintk("KVM MMU: tlbie(0x%lx)\n", va); 535 dprintk("KVM MMU: tlbie(0x%lx)\n", va);
465 536
466 if (large) 537 /*
467 mask = 0xFFFFFF000ULL; 538 * The tlbie instruction changed behaviour starting with
468 kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); 539 * POWER6. POWER6 and later don't have the large page flag
540 * in the instruction but in the RB value, along with bits
541 * indicating page and segment sizes.
542 */
543 if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) {
544 /* POWER6 or later */
545 if (va & 1) { /* L bit */
546 if ((va & 0xf000) == 0x1000)
547 mask = 0xFFFFFFFF0ULL; /* 64k page */
548 else
549 mask = 0xFFFFFF000ULL; /* 16M page */
550 }
551 } else {
552 /* older processors, e.g. PPC970 */
553 if (large)
554 mask = 0xFFFFFF000ULL;
555 }
556 /* flush this VA on all vcpus */
557 kvm_for_each_vcpu(i, v, vcpu->kvm)
558 kvmppc_mmu_pte_vflush(v, va >> 12, mask);
469} 559}
470 560
561#ifdef CONFIG_PPC_64K_PAGES
562static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
563{
564 ulong mp_ea = vcpu->arch.magic_page_ea;
565
566 return mp_ea && !(vcpu->arch.shared->msr & MSR_PR) &&
567 (mp_ea >> SID_SHIFT) == esid;
568}
569#endif
570
471static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 571static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
472 u64 *vsid) 572 u64 *vsid)
473{ 573{
@@ -475,11 +575,13 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
475 struct kvmppc_slb *slb; 575 struct kvmppc_slb *slb;
476 u64 gvsid = esid; 576 u64 gvsid = esid;
477 ulong mp_ea = vcpu->arch.magic_page_ea; 577 ulong mp_ea = vcpu->arch.magic_page_ea;
578 int pagesize = MMU_PAGE_64K;
478 579
479 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 580 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
480 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); 581 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
481 if (slb) { 582 if (slb) {
482 gvsid = slb->vsid; 583 gvsid = slb->vsid;
584 pagesize = slb->base_page_size;
483 if (slb->tb) { 585 if (slb->tb) {
484 gvsid <<= SID_SHIFT_1T - SID_SHIFT; 586 gvsid <<= SID_SHIFT_1T - SID_SHIFT;
485 gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1); 587 gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
@@ -490,28 +592,41 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
490 592
491 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 593 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
492 case 0: 594 case 0:
493 *vsid = VSID_REAL | esid; 595 gvsid = VSID_REAL | esid;
494 break; 596 break;
495 case MSR_IR: 597 case MSR_IR:
496 *vsid = VSID_REAL_IR | gvsid; 598 gvsid |= VSID_REAL_IR;
497 break; 599 break;
498 case MSR_DR: 600 case MSR_DR:
499 *vsid = VSID_REAL_DR | gvsid; 601 gvsid |= VSID_REAL_DR;
500 break; 602 break;
501 case MSR_DR|MSR_IR: 603 case MSR_DR|MSR_IR:
502 if (!slb) 604 if (!slb)
503 goto no_slb; 605 goto no_slb;
504 606
505 *vsid = gvsid;
506 break; 607 break;
507 default: 608 default:
508 BUG(); 609 BUG();
509 break; 610 break;
510 } 611 }
511 612
613#ifdef CONFIG_PPC_64K_PAGES
614 /*
615 * Mark this as a 64k segment if the host is using
616 * 64k pages, the host MMU supports 64k pages and
617 * the guest segment page size is >= 64k,
618 * but not if this segment contains the magic page.
619 */
620 if (pagesize >= MMU_PAGE_64K &&
621 mmu_psize_defs[MMU_PAGE_64K].shift &&
622 !segment_contains_magic_page(vcpu, esid))
623 gvsid |= VSID_64K;
624#endif
625
512 if (vcpu->arch.shared->msr & MSR_PR) 626 if (vcpu->arch.shared->msr & MSR_PR)
513 *vsid |= VSID_PR; 627 gvsid |= VSID_PR;
514 628
629 *vsid = gvsid;
515 return 0; 630 return 0;
516 631
517no_slb: 632no_slb:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index e5240524bf6c..0d513af62bba 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -27,14 +27,14 @@
27#include <asm/machdep.h> 27#include <asm/machdep.h>
28#include <asm/mmu_context.h> 28#include <asm/mmu_context.h>
29#include <asm/hw_irq.h> 29#include <asm/hw_irq.h>
30#include "trace.h" 30#include "trace_pr.h"
31 31
32#define PTE_SIZE 12 32#define PTE_SIZE 12
33 33
34void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 34void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
35{ 35{
36 ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, 36 ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
37 MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M, 37 pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M,
38 false); 38 false);
39} 39}
40 40
@@ -78,7 +78,8 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
78 return NULL; 78 return NULL;
79} 79}
80 80
81int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) 81int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
82 bool iswrite)
82{ 83{
83 unsigned long vpn; 84 unsigned long vpn;
84 pfn_t hpaddr; 85 pfn_t hpaddr;
@@ -90,16 +91,26 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
90 int attempt = 0; 91 int attempt = 0;
91 struct kvmppc_sid_map *map; 92 struct kvmppc_sid_map *map;
92 int r = 0; 93 int r = 0;
94 int hpsize = MMU_PAGE_4K;
95 bool writable;
96 unsigned long mmu_seq;
97 struct kvm *kvm = vcpu->kvm;
98 struct hpte_cache *cpte;
99 unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT;
100 unsigned long pfn;
101
102 /* used to check for invalidations in progress */
103 mmu_seq = kvm->mmu_notifier_seq;
104 smp_rmb();
93 105
94 /* Get host physical address for gpa */ 106 /* Get host physical address for gpa */
95 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 107 pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable);
96 if (is_error_noslot_pfn(hpaddr)) { 108 if (is_error_noslot_pfn(pfn)) {
97 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); 109 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn);
98 r = -EINVAL; 110 r = -EINVAL;
99 goto out; 111 goto out;
100 } 112 }
101 hpaddr <<= PAGE_SHIFT; 113 hpaddr = pfn << PAGE_SHIFT;
102 hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
103 114
104 /* and write the mapping ea -> hpa into the pt */ 115 /* and write the mapping ea -> hpa into the pt */
105 vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); 116 vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
@@ -117,20 +128,39 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
117 goto out; 128 goto out;
118 } 129 }
119 130
120 vsid = map->host_vsid; 131 vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M);
121 vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
122 132
123 if (!orig_pte->may_write) 133 kvm_set_pfn_accessed(pfn);
124 rflags |= HPTE_R_PP; 134 if (!orig_pte->may_write || !writable)
125 else 135 rflags |= PP_RXRX;
126 mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 136 else {
137 mark_page_dirty(vcpu->kvm, gfn);
138 kvm_set_pfn_dirty(pfn);
139 }
127 140
128 if (!orig_pte->may_execute) 141 if (!orig_pte->may_execute)
129 rflags |= HPTE_R_N; 142 rflags |= HPTE_R_N;
130 else 143 else
131 kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT); 144 kvmppc_mmu_flush_icache(pfn);
145
146 /*
147 * Use 64K pages if possible; otherwise, on 64K page kernels,
148 * we need to transfer 4 more bits from guest real to host real addr.
149 */
150 if (vsid & VSID_64K)
151 hpsize = MMU_PAGE_64K;
152 else
153 hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
154
155 hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
132 156
133 hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M); 157 cpte = kvmppc_mmu_hpte_cache_next(vcpu);
158
159 spin_lock(&kvm->mmu_lock);
160 if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
161 r = -EAGAIN;
162 goto out_unlock;
163 }
134 164
135map_again: 165map_again:
136 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 166 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -139,11 +169,11 @@ map_again:
139 if (attempt > 1) 169 if (attempt > 1)
140 if (ppc_md.hpte_remove(hpteg) < 0) { 170 if (ppc_md.hpte_remove(hpteg) < 0) {
141 r = -1; 171 r = -1;
142 goto out; 172 goto out_unlock;
143 } 173 }
144 174
145 ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, 175 ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
146 MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M); 176 hpsize, hpsize, MMU_SEGSIZE_256M);
147 177
148 if (ret < 0) { 178 if (ret < 0) {
149 /* If we couldn't map a primary PTE, try a secondary */ 179 /* If we couldn't map a primary PTE, try a secondary */
@@ -152,8 +182,6 @@ map_again:
152 attempt++; 182 attempt++;
153 goto map_again; 183 goto map_again;
154 } else { 184 } else {
155 struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
156
157 trace_kvm_book3s_64_mmu_map(rflags, hpteg, 185 trace_kvm_book3s_64_mmu_map(rflags, hpteg,
158 vpn, hpaddr, orig_pte); 186 vpn, hpaddr, orig_pte);
159 187
@@ -164,19 +192,37 @@ map_again:
164 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 192 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
165 } 193 }
166 194
167 pte->slot = hpteg + (ret & 7); 195 cpte->slot = hpteg + (ret & 7);
168 pte->host_vpn = vpn; 196 cpte->host_vpn = vpn;
169 pte->pte = *orig_pte; 197 cpte->pte = *orig_pte;
170 pte->pfn = hpaddr >> PAGE_SHIFT; 198 cpte->pfn = pfn;
199 cpte->pagesize = hpsize;
171 200
172 kvmppc_mmu_hpte_cache_map(vcpu, pte); 201 kvmppc_mmu_hpte_cache_map(vcpu, cpte);
202 cpte = NULL;
173 } 203 }
174 kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); 204
205out_unlock:
206 spin_unlock(&kvm->mmu_lock);
207 kvm_release_pfn_clean(pfn);
208 if (cpte)
209 kvmppc_mmu_hpte_cache_free(cpte);
175 210
176out: 211out:
177 return r; 212 return r;
178} 213}
179 214
215void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
216{
217 u64 mask = 0xfffffffffULL;
218 u64 vsid;
219
220 vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid);
221 if (vsid & VSID_64K)
222 mask = 0xffffffff0ULL;
223 kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask);
224}
225
180static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) 226static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
181{ 227{
182 struct kvmppc_sid_map *map; 228 struct kvmppc_sid_map *map;
@@ -291,6 +337,12 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
291 slb_vsid &= ~SLB_VSID_KP; 337 slb_vsid &= ~SLB_VSID_KP;
292 slb_esid |= slb_index; 338 slb_esid |= slb_index;
293 339
340#ifdef CONFIG_PPC_64K_PAGES
341 /* Set host segment base page size to 64K if possible */
342 if (gvsid & VSID_64K)
343 slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp;
344#endif
345
294 svcpu->slb[slb_index].esid = slb_esid; 346 svcpu->slb[slb_index].esid = slb_esid;
295 svcpu->slb[slb_index].vsid = slb_vsid; 347 svcpu->slb[slb_index].vsid = slb_vsid;
296 348
@@ -326,7 +378,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
326 svcpu_put(svcpu); 378 svcpu_put(svcpu);
327} 379}
328 380
329void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 381void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
330{ 382{
331 kvmppc_mmu_hpte_destroy(vcpu); 383 kvmppc_mmu_hpte_destroy(vcpu);
332 __destroy_context(to_book3s(vcpu)->context_id[0]); 384 __destroy_context(to_book3s(vcpu)->context_id[0]);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 043eec8461e7..f3ff587a8b7d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -260,10 +260,6 @@ int kvmppc_mmu_hv_init(void)
260 return 0; 260 return 0;
261} 261}
262 262
263void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
264{
265}
266
267static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) 263static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
268{ 264{
269 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); 265 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
@@ -451,7 +447,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
451} 447}
452 448
453static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 449static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
454 struct kvmppc_pte *gpte, bool data) 450 struct kvmppc_pte *gpte, bool data, bool iswrite)
455{ 451{
456 struct kvm *kvm = vcpu->kvm; 452 struct kvm *kvm = vcpu->kvm;
457 struct kvmppc_slb *slbe; 453 struct kvmppc_slb *slbe;
@@ -906,21 +902,22 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
906 return 0; 902 return 0;
907} 903}
908 904
909int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 905int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
910{ 906{
911 if (kvm->arch.using_mmu_notifiers) 907 if (kvm->arch.using_mmu_notifiers)
912 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 908 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
913 return 0; 909 return 0;
914} 910}
915 911
916int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 912int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
917{ 913{
918 if (kvm->arch.using_mmu_notifiers) 914 if (kvm->arch.using_mmu_notifiers)
919 kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); 915 kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
920 return 0; 916 return 0;
921} 917}
922 918
923void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 919void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
920 struct kvm_memory_slot *memslot)
924{ 921{
925 unsigned long *rmapp; 922 unsigned long *rmapp;
926 unsigned long gfn; 923 unsigned long gfn;
@@ -994,7 +991,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
994 return ret; 991 return ret;
995} 992}
996 993
997int kvm_age_hva(struct kvm *kvm, unsigned long hva) 994int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
998{ 995{
999 if (!kvm->arch.using_mmu_notifiers) 996 if (!kvm->arch.using_mmu_notifiers)
1000 return 0; 997 return 0;
@@ -1032,14 +1029,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1032 return ret; 1029 return ret;
1033} 1030}
1034 1031
1035int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 1032int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
1036{ 1033{
1037 if (!kvm->arch.using_mmu_notifiers) 1034 if (!kvm->arch.using_mmu_notifiers)
1038 return 0; 1035 return 0;
1039 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 1036 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
1040} 1037}
1041 1038
1042void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 1039void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
1043{ 1040{
1044 if (!kvm->arch.using_mmu_notifiers) 1041 if (!kvm->arch.using_mmu_notifiers)
1045 return; 1042 return;
@@ -1512,9 +1509,8 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1512 1509
1513 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1510 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1514 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1511 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1515 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1512 lpcr = senc << (LPCR_VRMASD_SH - 4);
1516 lpcr |= senc << (LPCR_VRMASD_SH - 4); 1513 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
1517 kvm->arch.lpcr = lpcr;
1518 rma_setup = 1; 1514 rma_setup = 1;
1519 } 1515 }
1520 ++i; 1516 ++i;
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 30c2f3b134c6..2c25f5412bdb 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -74,3 +74,4 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
74 /* Didn't find the liobn, punt it to userspace */ 74 /* Didn't find the liobn, punt it to userspace */
75 return H_TOO_HARD; 75 return H_TOO_HARD;
76} 76}
77EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 360ce68c9809..99d40f8977e8 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -86,8 +86,8 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
86 return true; 86 return true;
87} 87}
88 88
89int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 89int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
90 unsigned int inst, int *advance) 90 unsigned int inst, int *advance)
91{ 91{
92 int emulated = EMULATE_DONE; 92 int emulated = EMULATE_DONE;
93 int rt = get_rt(inst); 93 int rt = get_rt(inst);
@@ -172,7 +172,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
172 vcpu->arch.mmu.tlbie(vcpu, addr, large); 172 vcpu->arch.mmu.tlbie(vcpu, addr, large);
173 break; 173 break;
174 } 174 }
175#ifdef CONFIG_KVM_BOOK3S_64_PR 175#ifdef CONFIG_PPC_BOOK3S_64
176 case OP_31_XOP_FAKE_SC1: 176 case OP_31_XOP_FAKE_SC1:
177 { 177 {
178 /* SC 1 papr hypercalls */ 178 /* SC 1 papr hypercalls */
@@ -267,12 +267,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
267 267
268 r = kvmppc_st(vcpu, &addr, 32, zeros, true); 268 r = kvmppc_st(vcpu, &addr, 32, zeros, true);
269 if ((r == -ENOENT) || (r == -EPERM)) { 269 if ((r == -ENOENT) || (r == -EPERM)) {
270 struct kvmppc_book3s_shadow_vcpu *svcpu;
271
272 svcpu = svcpu_get(vcpu);
273 *advance = 0; 270 *advance = 0;
274 vcpu->arch.shared->dar = vaddr; 271 vcpu->arch.shared->dar = vaddr;
275 svcpu->fault_dar = vaddr; 272 vcpu->arch.fault_dar = vaddr;
276 273
277 dsisr = DSISR_ISSTORE; 274 dsisr = DSISR_ISSTORE;
278 if (r == -ENOENT) 275 if (r == -ENOENT)
@@ -281,8 +278,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
281 dsisr |= DSISR_PROTFAULT; 278 dsisr |= DSISR_PROTFAULT;
282 279
283 vcpu->arch.shared->dsisr = dsisr; 280 vcpu->arch.shared->dsisr = dsisr;
284 svcpu->fault_dsisr = dsisr; 281 vcpu->arch.fault_dsisr = dsisr;
285 svcpu_put(svcpu);
286 282
287 kvmppc_book3s_queue_irqprio(vcpu, 283 kvmppc_book3s_queue_irqprio(vcpu,
288 BOOK3S_INTERRUPT_DATA_STORAGE); 284 BOOK3S_INTERRUPT_DATA_STORAGE);
@@ -349,7 +345,7 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
349 return bat; 345 return bat;
350} 346}
351 347
352int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 348int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
353{ 349{
354 int emulated = EMULATE_DONE; 350 int emulated = EMULATE_DONE;
355 351
@@ -472,7 +468,7 @@ unprivileged:
472 return emulated; 468 return emulated;
473} 469}
474 470
475int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 471int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
476{ 472{
477 int emulated = EMULATE_DONE; 473 int emulated = EMULATE_DONE;
478 474
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 7057a02f0906..852989a9bad3 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -20,9 +20,10 @@
20#include <linux/export.h> 20#include <linux/export.h>
21#include <asm/kvm_book3s.h> 21#include <asm/kvm_book3s.h>
22 22
23#ifdef CONFIG_KVM_BOOK3S_64_HV 23#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
24EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); 24EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
25#else 25#endif
26#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
26EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); 27EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
27EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); 28EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
28#ifdef CONFIG_ALTIVEC 29#ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 62a2b5ab08ed..072287f1c3bc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -52,6 +52,9 @@
52#include <linux/vmalloc.h> 52#include <linux/vmalloc.h>
53#include <linux/highmem.h> 53#include <linux/highmem.h>
54#include <linux/hugetlb.h> 54#include <linux/hugetlb.h>
55#include <linux/module.h>
56
57#include "book3s.h"
55 58
56/* #define EXIT_DEBUG */ 59/* #define EXIT_DEBUG */
57/* #define EXIT_DEBUG_SIMPLE */ 60/* #define EXIT_DEBUG_SIMPLE */
@@ -66,7 +69,7 @@
66static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 69static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
67static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 70static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
68 71
69void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) 72static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
70{ 73{
71 int me; 74 int me;
72 int cpu = vcpu->cpu; 75 int cpu = vcpu->cpu;
@@ -125,7 +128,7 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
125 * purely defensive; they should never fail.) 128 * purely defensive; they should never fail.)
126 */ 129 */
127 130
128void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 131static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
129{ 132{
130 struct kvmppc_vcore *vc = vcpu->arch.vcore; 133 struct kvmppc_vcore *vc = vcpu->arch.vcore;
131 134
@@ -143,7 +146,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
143 spin_unlock(&vcpu->arch.tbacct_lock); 146 spin_unlock(&vcpu->arch.tbacct_lock);
144} 147}
145 148
146void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 149static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
147{ 150{
148 struct kvmppc_vcore *vc = vcpu->arch.vcore; 151 struct kvmppc_vcore *vc = vcpu->arch.vcore;
149 152
@@ -155,17 +158,46 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
155 spin_unlock(&vcpu->arch.tbacct_lock); 158 spin_unlock(&vcpu->arch.tbacct_lock);
156} 159}
157 160
158void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 161static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
159{ 162{
160 vcpu->arch.shregs.msr = msr; 163 vcpu->arch.shregs.msr = msr;
161 kvmppc_end_cede(vcpu); 164 kvmppc_end_cede(vcpu);
162} 165}
163 166
164void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 167void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
165{ 168{
166 vcpu->arch.pvr = pvr; 169 vcpu->arch.pvr = pvr;
167} 170}
168 171
172int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
173{
174 unsigned long pcr = 0;
175 struct kvmppc_vcore *vc = vcpu->arch.vcore;
176
177 if (arch_compat) {
178 if (!cpu_has_feature(CPU_FTR_ARCH_206))
179 return -EINVAL; /* 970 has no compat mode support */
180
181 switch (arch_compat) {
182 case PVR_ARCH_205:
183 pcr = PCR_ARCH_205;
184 break;
185 case PVR_ARCH_206:
186 case PVR_ARCH_206p:
187 break;
188 default:
189 return -EINVAL;
190 }
191 }
192
193 spin_lock(&vc->lock);
194 vc->arch_compat = arch_compat;
195 vc->pcr = pcr;
196 spin_unlock(&vc->lock);
197
198 return 0;
199}
200
169void kvmppc_dump_regs(struct kvm_vcpu *vcpu) 201void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
170{ 202{
171 int r; 203 int r;
@@ -195,7 +227,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
195 pr_err(" ESID = %.16llx VSID = %.16llx\n", 227 pr_err(" ESID = %.16llx VSID = %.16llx\n",
196 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); 228 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
197 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", 229 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
198 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, 230 vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
199 vcpu->arch.last_inst); 231 vcpu->arch.last_inst);
200} 232}
201 233
@@ -489,7 +521,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
489 memset(dt, 0, sizeof(struct dtl_entry)); 521 memset(dt, 0, sizeof(struct dtl_entry));
490 dt->dispatch_reason = 7; 522 dt->dispatch_reason = 7;
491 dt->processor_id = vc->pcpu + vcpu->arch.ptid; 523 dt->processor_id = vc->pcpu + vcpu->arch.ptid;
492 dt->timebase = now; 524 dt->timebase = now + vc->tb_offset;
493 dt->enqueue_to_dispatch_time = stolen; 525 dt->enqueue_to_dispatch_time = stolen;
494 dt->srr0 = kvmppc_get_pc(vcpu); 526 dt->srr0 = kvmppc_get_pc(vcpu);
495 dt->srr1 = vcpu->arch.shregs.msr; 527 dt->srr1 = vcpu->arch.shregs.msr;
@@ -538,6 +570,15 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
538 } 570 }
539 break; 571 break;
540 case H_CONFER: 572 case H_CONFER:
573 target = kvmppc_get_gpr(vcpu, 4);
574 if (target == -1)
575 break;
576 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
577 if (!tvcpu) {
578 ret = H_PARAMETER;
579 break;
580 }
581 kvm_vcpu_yield_to(tvcpu);
541 break; 582 break;
542 case H_REGISTER_VPA: 583 case H_REGISTER_VPA:
543 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), 584 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -576,8 +617,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
576 return RESUME_GUEST; 617 return RESUME_GUEST;
577} 618}
578 619
579static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 620static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
580 struct task_struct *tsk) 621 struct task_struct *tsk)
581{ 622{
582 int r = RESUME_HOST; 623 int r = RESUME_HOST;
583 624
@@ -671,16 +712,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
671 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", 712 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
672 vcpu->arch.trap, kvmppc_get_pc(vcpu), 713 vcpu->arch.trap, kvmppc_get_pc(vcpu),
673 vcpu->arch.shregs.msr); 714 vcpu->arch.shregs.msr);
715 run->hw.hardware_exit_reason = vcpu->arch.trap;
674 r = RESUME_HOST; 716 r = RESUME_HOST;
675 BUG();
676 break; 717 break;
677 } 718 }
678 719
679 return r; 720 return r;
680} 721}
681 722
682int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 723static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
683 struct kvm_sregs *sregs) 724 struct kvm_sregs *sregs)
684{ 725{
685 int i; 726 int i;
686 727
@@ -694,12 +735,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
694 return 0; 735 return 0;
695} 736}
696 737
697int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 738static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
698 struct kvm_sregs *sregs) 739 struct kvm_sregs *sregs)
699{ 740{
700 int i, j; 741 int i, j;
701 742
702 kvmppc_set_pvr(vcpu, sregs->pvr); 743 kvmppc_set_pvr_hv(vcpu, sregs->pvr);
703 744
704 j = 0; 745 j = 0;
705 for (i = 0; i < vcpu->arch.slb_nr; i++) { 746 for (i = 0; i < vcpu->arch.slb_nr; i++) {
@@ -714,7 +755,23 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
714 return 0; 755 return 0;
715} 756}
716 757
717int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 758static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
759{
760 struct kvmppc_vcore *vc = vcpu->arch.vcore;
761 u64 mask;
762
763 spin_lock(&vc->lock);
764 /*
765 * Userspace can only modify DPFD (default prefetch depth),
766 * ILE (interrupt little-endian) and TC (translation control).
767 */
768 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
769 vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
770 spin_unlock(&vc->lock);
771}
772
773static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
774 union kvmppc_one_reg *val)
718{ 775{
719 int r = 0; 776 int r = 0;
720 long int i; 777 long int i;
@@ -749,6 +806,12 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
749 i = id - KVM_REG_PPC_PMC1; 806 i = id - KVM_REG_PPC_PMC1;
750 *val = get_reg_val(id, vcpu->arch.pmc[i]); 807 *val = get_reg_val(id, vcpu->arch.pmc[i]);
751 break; 808 break;
809 case KVM_REG_PPC_SIAR:
810 *val = get_reg_val(id, vcpu->arch.siar);
811 break;
812 case KVM_REG_PPC_SDAR:
813 *val = get_reg_val(id, vcpu->arch.sdar);
814 break;
752#ifdef CONFIG_VSX 815#ifdef CONFIG_VSX
753 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 816 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
754 if (cpu_has_feature(CPU_FTR_VSX)) { 817 if (cpu_has_feature(CPU_FTR_VSX)) {
@@ -787,6 +850,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
787 val->vpaval.length = vcpu->arch.dtl.len; 850 val->vpaval.length = vcpu->arch.dtl.len;
788 spin_unlock(&vcpu->arch.vpa_update_lock); 851 spin_unlock(&vcpu->arch.vpa_update_lock);
789 break; 852 break;
853 case KVM_REG_PPC_TB_OFFSET:
854 *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
855 break;
856 case KVM_REG_PPC_LPCR:
857 *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
858 break;
859 case KVM_REG_PPC_PPR:
860 *val = get_reg_val(id, vcpu->arch.ppr);
861 break;
862 case KVM_REG_PPC_ARCH_COMPAT:
863 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
864 break;
790 default: 865 default:
791 r = -EINVAL; 866 r = -EINVAL;
792 break; 867 break;
@@ -795,7 +870,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
795 return r; 870 return r;
796} 871}
797 872
798int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 873static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
874 union kvmppc_one_reg *val)
799{ 875{
800 int r = 0; 876 int r = 0;
801 long int i; 877 long int i;
@@ -833,6 +909,12 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
833 i = id - KVM_REG_PPC_PMC1; 909 i = id - KVM_REG_PPC_PMC1;
834 vcpu->arch.pmc[i] = set_reg_val(id, *val); 910 vcpu->arch.pmc[i] = set_reg_val(id, *val);
835 break; 911 break;
912 case KVM_REG_PPC_SIAR:
913 vcpu->arch.siar = set_reg_val(id, *val);
914 break;
915 case KVM_REG_PPC_SDAR:
916 vcpu->arch.sdar = set_reg_val(id, *val);
917 break;
836#ifdef CONFIG_VSX 918#ifdef CONFIG_VSX
837 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 919 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
838 if (cpu_has_feature(CPU_FTR_VSX)) { 920 if (cpu_has_feature(CPU_FTR_VSX)) {
@@ -880,6 +962,20 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
880 len -= len % sizeof(struct dtl_entry); 962 len -= len % sizeof(struct dtl_entry);
881 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); 963 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
882 break; 964 break;
965 case KVM_REG_PPC_TB_OFFSET:
966 /* round up to multiple of 2^24 */
967 vcpu->arch.vcore->tb_offset =
968 ALIGN(set_reg_val(id, *val), 1UL << 24);
969 break;
970 case KVM_REG_PPC_LPCR:
971 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val));
972 break;
973 case KVM_REG_PPC_PPR:
974 vcpu->arch.ppr = set_reg_val(id, *val);
975 break;
976 case KVM_REG_PPC_ARCH_COMPAT:
977 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
978 break;
883 default: 979 default:
884 r = -EINVAL; 980 r = -EINVAL;
885 break; 981 break;
@@ -888,14 +984,8 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
888 return r; 984 return r;
889} 985}
890 986
891int kvmppc_core_check_processor_compat(void) 987static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
892{ 988 unsigned int id)
893 if (cpu_has_feature(CPU_FTR_HVMODE))
894 return 0;
895 return -EIO;
896}
897
898struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
899{ 989{
900 struct kvm_vcpu *vcpu; 990 struct kvm_vcpu *vcpu;
901 int err = -EINVAL; 991 int err = -EINVAL;
@@ -919,8 +1009,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
919 vcpu->arch.mmcr[0] = MMCR0_FC; 1009 vcpu->arch.mmcr[0] = MMCR0_FC;
920 vcpu->arch.ctrl = CTRL_RUNLATCH; 1010 vcpu->arch.ctrl = CTRL_RUNLATCH;
921 /* default to host PVR, since we can't spoof it */ 1011 /* default to host PVR, since we can't spoof it */
922 vcpu->arch.pvr = mfspr(SPRN_PVR); 1012 kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
923 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
924 spin_lock_init(&vcpu->arch.vpa_update_lock); 1013 spin_lock_init(&vcpu->arch.vpa_update_lock);
925 spin_lock_init(&vcpu->arch.tbacct_lock); 1014 spin_lock_init(&vcpu->arch.tbacct_lock);
926 vcpu->arch.busy_preempt = TB_NIL; 1015 vcpu->arch.busy_preempt = TB_NIL;
@@ -940,6 +1029,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
940 spin_lock_init(&vcore->lock); 1029 spin_lock_init(&vcore->lock);
941 init_waitqueue_head(&vcore->wq); 1030 init_waitqueue_head(&vcore->wq);
942 vcore->preempt_tb = TB_NIL; 1031 vcore->preempt_tb = TB_NIL;
1032 vcore->lpcr = kvm->arch.lpcr;
943 } 1033 }
944 kvm->arch.vcores[core] = vcore; 1034 kvm->arch.vcores[core] = vcore;
945 kvm->arch.online_vcores++; 1035 kvm->arch.online_vcores++;
@@ -972,7 +1062,7 @@ static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
972 vpa->dirty); 1062 vpa->dirty);
973} 1063}
974 1064
975void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 1065static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
976{ 1066{
977 spin_lock(&vcpu->arch.vpa_update_lock); 1067 spin_lock(&vcpu->arch.vpa_update_lock);
978 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); 1068 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
@@ -983,6 +1073,12 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
983 kmem_cache_free(kvm_vcpu_cache, vcpu); 1073 kmem_cache_free(kvm_vcpu_cache, vcpu);
984} 1074}
985 1075
1076static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
1077{
1078 /* Indicate we want to get back into the guest */
1079 return 1;
1080}
1081
986static void kvmppc_set_timer(struct kvm_vcpu *vcpu) 1082static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
987{ 1083{
988 unsigned long dec_nsec, now; 1084 unsigned long dec_nsec, now;
@@ -1264,8 +1360,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1264 1360
1265 ret = RESUME_GUEST; 1361 ret = RESUME_GUEST;
1266 if (vcpu->arch.trap) 1362 if (vcpu->arch.trap)
1267 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, 1363 ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
1268 vcpu->arch.run_task); 1364 vcpu->arch.run_task);
1269 1365
1270 vcpu->arch.ret = ret; 1366 vcpu->arch.ret = ret;
1271 vcpu->arch.trap = 0; 1367 vcpu->arch.trap = 0;
@@ -1424,7 +1520,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1424 return vcpu->arch.ret; 1520 return vcpu->arch.ret;
1425} 1521}
1426 1522
1427int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 1523static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
1428{ 1524{
1429 int r; 1525 int r;
1430 int srcu_idx; 1526 int srcu_idx;
@@ -1546,7 +1642,8 @@ static const struct file_operations kvm_rma_fops = {
1546 .release = kvm_rma_release, 1642 .release = kvm_rma_release,
1547}; 1643};
1548 1644
1549long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) 1645static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
1646 struct kvm_allocate_rma *ret)
1550{ 1647{
1551 long fd; 1648 long fd;
1552 struct kvm_rma_info *ri; 1649 struct kvm_rma_info *ri;
@@ -1592,7 +1689,8 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
1592 (*sps)++; 1689 (*sps)++;
1593} 1690}
1594 1691
1595int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1692static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
1693 struct kvm_ppc_smmu_info *info)
1596{ 1694{
1597 struct kvm_ppc_one_seg_page_size *sps; 1695 struct kvm_ppc_one_seg_page_size *sps;
1598 1696
@@ -1613,7 +1711,8 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1613/* 1711/*
1614 * Get (and clear) the dirty memory log for a memory slot. 1712 * Get (and clear) the dirty memory log for a memory slot.
1615 */ 1713 */
1616int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 1714static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
1715 struct kvm_dirty_log *log)
1617{ 1716{
1618 struct kvm_memory_slot *memslot; 1717 struct kvm_memory_slot *memslot;
1619 int r; 1718 int r;
@@ -1667,8 +1766,8 @@ static void unpin_slot(struct kvm_memory_slot *memslot)
1667 } 1766 }
1668} 1767}
1669 1768
1670void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1769static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
1671 struct kvm_memory_slot *dont) 1770 struct kvm_memory_slot *dont)
1672{ 1771{
1673 if (!dont || free->arch.rmap != dont->arch.rmap) { 1772 if (!dont || free->arch.rmap != dont->arch.rmap) {
1674 vfree(free->arch.rmap); 1773 vfree(free->arch.rmap);
@@ -1681,8 +1780,8 @@ void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1681 } 1780 }
1682} 1781}
1683 1782
1684int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1783static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
1685 unsigned long npages) 1784 unsigned long npages)
1686{ 1785{
1687 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 1786 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
1688 if (!slot->arch.rmap) 1787 if (!slot->arch.rmap)
@@ -1692,9 +1791,9 @@ int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1692 return 0; 1791 return 0;
1693} 1792}
1694 1793
1695int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1794static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
1696 struct kvm_memory_slot *memslot, 1795 struct kvm_memory_slot *memslot,
1697 struct kvm_userspace_memory_region *mem) 1796 struct kvm_userspace_memory_region *mem)
1698{ 1797{
1699 unsigned long *phys; 1798 unsigned long *phys;
1700 1799
@@ -1710,9 +1809,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1710 return 0; 1809 return 0;
1711} 1810}
1712 1811
1713void kvmppc_core_commit_memory_region(struct kvm *kvm, 1812static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
1714 struct kvm_userspace_memory_region *mem, 1813 struct kvm_userspace_memory_region *mem,
1715 const struct kvm_memory_slot *old) 1814 const struct kvm_memory_slot *old)
1716{ 1815{
1717 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 1816 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
1718 struct kvm_memory_slot *memslot; 1817 struct kvm_memory_slot *memslot;
@@ -1729,6 +1828,37 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
1729 } 1828 }
1730} 1829}
1731 1830
1831/*
1832 * Update LPCR values in kvm->arch and in vcores.
1833 * Caller must hold kvm->lock.
1834 */
1835void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
1836{
1837 long int i;
1838 u32 cores_done = 0;
1839
1840 if ((kvm->arch.lpcr & mask) == lpcr)
1841 return;
1842
1843 kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
1844
1845 for (i = 0; i < KVM_MAX_VCORES; ++i) {
1846 struct kvmppc_vcore *vc = kvm->arch.vcores[i];
1847 if (!vc)
1848 continue;
1849 spin_lock(&vc->lock);
1850 vc->lpcr = (vc->lpcr & ~mask) | lpcr;
1851 spin_unlock(&vc->lock);
1852 if (++cores_done >= kvm->arch.online_vcores)
1853 break;
1854 }
1855}
1856
1857static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
1858{
1859 return;
1860}
1861
1732static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 1862static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1733{ 1863{
1734 int err = 0; 1864 int err = 0;
@@ -1737,7 +1867,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1737 unsigned long hva; 1867 unsigned long hva;
1738 struct kvm_memory_slot *memslot; 1868 struct kvm_memory_slot *memslot;
1739 struct vm_area_struct *vma; 1869 struct vm_area_struct *vma;
1740 unsigned long lpcr, senc; 1870 unsigned long lpcr = 0, senc;
1871 unsigned long lpcr_mask = 0;
1741 unsigned long psize, porder; 1872 unsigned long psize, porder;
1742 unsigned long rma_size; 1873 unsigned long rma_size;
1743 unsigned long rmls; 1874 unsigned long rmls;
@@ -1802,9 +1933,9 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1802 senc = slb_pgsize_encoding(psize); 1933 senc = slb_pgsize_encoding(psize);
1803 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1934 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1804 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1935 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1805 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1936 lpcr_mask = LPCR_VRMASD;
1806 lpcr |= senc << (LPCR_VRMASD_SH - 4); 1937 /* the -4 is to account for senc values starting at 0x10 */
1807 kvm->arch.lpcr = lpcr; 1938 lpcr = senc << (LPCR_VRMASD_SH - 4);
1808 1939
1809 /* Create HPTEs in the hash page table for the VRMA */ 1940 /* Create HPTEs in the hash page table for the VRMA */
1810 kvmppc_map_vrma(vcpu, memslot, porder); 1941 kvmppc_map_vrma(vcpu, memslot, porder);
@@ -1825,23 +1956,21 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1825 kvm->arch.rma = ri; 1956 kvm->arch.rma = ri;
1826 1957
1827 /* Update LPCR and RMOR */ 1958 /* Update LPCR and RMOR */
1828 lpcr = kvm->arch.lpcr;
1829 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1959 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1830 /* PPC970; insert RMLS value (split field) in HID4 */ 1960 /* PPC970; insert RMLS value (split field) in HID4 */
1831 lpcr &= ~((1ul << HID4_RMLS0_SH) | 1961 lpcr_mask = (1ul << HID4_RMLS0_SH) |
1832 (3ul << HID4_RMLS2_SH)); 1962 (3ul << HID4_RMLS2_SH) | HID4_RMOR;
1833 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | 1963 lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
1834 ((rmls & 3) << HID4_RMLS2_SH); 1964 ((rmls & 3) << HID4_RMLS2_SH);
1835 /* RMOR is also in HID4 */ 1965 /* RMOR is also in HID4 */
1836 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) 1966 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
1837 << HID4_RMOR_SH; 1967 << HID4_RMOR_SH;
1838 } else { 1968 } else {
1839 /* POWER7 */ 1969 /* POWER7 */
1840 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); 1970 lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
1841 lpcr |= rmls << LPCR_RMLS_SH; 1971 lpcr = rmls << LPCR_RMLS_SH;
1842 kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; 1972 kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
1843 } 1973 }
1844 kvm->arch.lpcr = lpcr;
1845 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", 1974 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
1846 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1975 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
1847 1976
@@ -1860,6 +1989,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1860 } 1989 }
1861 } 1990 }
1862 1991
1992 kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
1993
1863 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 1994 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
1864 smp_wmb(); 1995 smp_wmb();
1865 kvm->arch.rma_setup_done = 1; 1996 kvm->arch.rma_setup_done = 1;
@@ -1875,7 +2006,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1875 goto out_srcu; 2006 goto out_srcu;
1876} 2007}
1877 2008
1878int kvmppc_core_init_vm(struct kvm *kvm) 2009static int kvmppc_core_init_vm_hv(struct kvm *kvm)
1879{ 2010{
1880 unsigned long lpcr, lpid; 2011 unsigned long lpcr, lpid;
1881 2012
@@ -1893,9 +2024,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1893 */ 2024 */
1894 cpumask_setall(&kvm->arch.need_tlb_flush); 2025 cpumask_setall(&kvm->arch.need_tlb_flush);
1895 2026
1896 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1897 INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
1898
1899 kvm->arch.rma = NULL; 2027 kvm->arch.rma = NULL;
1900 2028
1901 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 2029 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -1931,61 +2059,162 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1931 return 0; 2059 return 0;
1932} 2060}
1933 2061
1934void kvmppc_core_destroy_vm(struct kvm *kvm) 2062static void kvmppc_free_vcores(struct kvm *kvm)
2063{
2064 long int i;
2065
2066 for (i = 0; i < KVM_MAX_VCORES; ++i)
2067 kfree(kvm->arch.vcores[i]);
2068 kvm->arch.online_vcores = 0;
2069}
2070
2071static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
1935{ 2072{
1936 uninhibit_secondary_onlining(); 2073 uninhibit_secondary_onlining();
1937 2074
2075 kvmppc_free_vcores(kvm);
1938 if (kvm->arch.rma) { 2076 if (kvm->arch.rma) {
1939 kvm_release_rma(kvm->arch.rma); 2077 kvm_release_rma(kvm->arch.rma);
1940 kvm->arch.rma = NULL; 2078 kvm->arch.rma = NULL;
1941 } 2079 }
1942 2080
1943 kvmppc_rtas_tokens_free(kvm);
1944
1945 kvmppc_free_hpt(kvm); 2081 kvmppc_free_hpt(kvm);
1946 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
1947} 2082}
1948 2083
1949/* These are stubs for now */ 2084/* We don't need to emulate any privileged instructions or dcbz */
1950void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 2085static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
2086 unsigned int inst, int *advance)
1951{ 2087{
2088 return EMULATE_FAIL;
1952} 2089}
1953 2090
1954/* We don't need to emulate any privileged instructions or dcbz */ 2091static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
1955int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 2092 ulong spr_val)
1956 unsigned int inst, int *advance)
1957{ 2093{
1958 return EMULATE_FAIL; 2094 return EMULATE_FAIL;
1959} 2095}
1960 2096
1961int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 2097static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
2098 ulong *spr_val)
1962{ 2099{
1963 return EMULATE_FAIL; 2100 return EMULATE_FAIL;
1964} 2101}
1965 2102
1966int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 2103static int kvmppc_core_check_processor_compat_hv(void)
1967{ 2104{
1968 return EMULATE_FAIL; 2105 if (!cpu_has_feature(CPU_FTR_HVMODE))
2106 return -EIO;
2107 return 0;
1969} 2108}
1970 2109
1971static int kvmppc_book3s_hv_init(void) 2110static long kvm_arch_vm_ioctl_hv(struct file *filp,
2111 unsigned int ioctl, unsigned long arg)
1972{ 2112{
1973 int r; 2113 struct kvm *kvm __maybe_unused = filp->private_data;
2114 void __user *argp = (void __user *)arg;
2115 long r;
1974 2116
1975 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 2117 switch (ioctl) {
1976 2118
1977 if (r) 2119 case KVM_ALLOCATE_RMA: {
2120 struct kvm_allocate_rma rma;
2121 struct kvm *kvm = filp->private_data;
2122
2123 r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
2124 if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
2125 r = -EFAULT;
2126 break;
2127 }
2128
2129 case KVM_PPC_ALLOCATE_HTAB: {
2130 u32 htab_order;
2131
2132 r = -EFAULT;
2133 if (get_user(htab_order, (u32 __user *)argp))
2134 break;
2135 r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
2136 if (r)
2137 break;
2138 r = -EFAULT;
2139 if (put_user(htab_order, (u32 __user *)argp))
2140 break;
2141 r = 0;
2142 break;
2143 }
2144
2145 case KVM_PPC_GET_HTAB_FD: {
2146 struct kvm_get_htab_fd ghf;
2147
2148 r = -EFAULT;
2149 if (copy_from_user(&ghf, argp, sizeof(ghf)))
2150 break;
2151 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
2152 break;
2153 }
2154
2155 default:
2156 r = -ENOTTY;
2157 }
2158
2159 return r;
2160}
2161
2162static struct kvmppc_ops kvm_ops_hv = {
2163 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
2164 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
2165 .get_one_reg = kvmppc_get_one_reg_hv,
2166 .set_one_reg = kvmppc_set_one_reg_hv,
2167 .vcpu_load = kvmppc_core_vcpu_load_hv,
2168 .vcpu_put = kvmppc_core_vcpu_put_hv,
2169 .set_msr = kvmppc_set_msr_hv,
2170 .vcpu_run = kvmppc_vcpu_run_hv,
2171 .vcpu_create = kvmppc_core_vcpu_create_hv,
2172 .vcpu_free = kvmppc_core_vcpu_free_hv,
2173 .check_requests = kvmppc_core_check_requests_hv,
2174 .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv,
2175 .flush_memslot = kvmppc_core_flush_memslot_hv,
2176 .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
2177 .commit_memory_region = kvmppc_core_commit_memory_region_hv,
2178 .unmap_hva = kvm_unmap_hva_hv,
2179 .unmap_hva_range = kvm_unmap_hva_range_hv,
2180 .age_hva = kvm_age_hva_hv,
2181 .test_age_hva = kvm_test_age_hva_hv,
2182 .set_spte_hva = kvm_set_spte_hva_hv,
2183 .mmu_destroy = kvmppc_mmu_destroy_hv,
2184 .free_memslot = kvmppc_core_free_memslot_hv,
2185 .create_memslot = kvmppc_core_create_memslot_hv,
2186 .init_vm = kvmppc_core_init_vm_hv,
2187 .destroy_vm = kvmppc_core_destroy_vm_hv,
2188 .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
2189 .emulate_op = kvmppc_core_emulate_op_hv,
2190 .emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
2191 .emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
2192 .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
2193 .arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
2194};
2195
2196static int kvmppc_book3s_init_hv(void)
2197{
2198 int r;
2199 /*
2200 * FIXME!! Do we need to check on all cpus ?
2201 */
2202 r = kvmppc_core_check_processor_compat_hv();
2203 if (r < 0)
1978 return r; 2204 return r;
1979 2205
1980 r = kvmppc_mmu_hv_init(); 2206 kvm_ops_hv.owner = THIS_MODULE;
2207 kvmppc_hv_ops = &kvm_ops_hv;
1981 2208
2209 r = kvmppc_mmu_hv_init();
1982 return r; 2210 return r;
1983} 2211}
1984 2212
1985static void kvmppc_book3s_hv_exit(void) 2213static void kvmppc_book3s_exit_hv(void)
1986{ 2214{
1987 kvm_exit(); 2215 kvmppc_hv_ops = NULL;
1988} 2216}
1989 2217
1990module_init(kvmppc_book3s_hv_init); 2218module_init(kvmppc_book3s_init_hv);
1991module_exit(kvmppc_book3s_hv_exit); 2219module_exit(kvmppc_book3s_exit_hv);
2220MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 37f1cc417ca0..928142c64cb0 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -158,9 +158,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
158 * Interrupts are enabled again at this point. 158 * Interrupts are enabled again at this point.
159 */ 159 */
160 160
161.global kvmppc_handler_highmem
162kvmppc_handler_highmem:
163
164 /* 161 /*
165 * Register usage at this point: 162 * Register usage at this point:
166 * 163 *
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c71103b8a748..bc8de75b1925 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -33,30 +33,6 @@
33#error Need to fix lppaca and SLB shadow accesses in little endian mode 33#error Need to fix lppaca and SLB shadow accesses in little endian mode
34#endif 34#endif
35 35
36/*****************************************************************************
37 * *
38 * Real Mode handlers that need to be in the linear mapping *
39 * *
40 ****************************************************************************/
41
42 .globl kvmppc_skip_interrupt
43kvmppc_skip_interrupt:
44 mfspr r13,SPRN_SRR0
45 addi r13,r13,4
46 mtspr SPRN_SRR0,r13
47 GET_SCRATCH0(r13)
48 rfid
49 b .
50
51 .globl kvmppc_skip_Hinterrupt
52kvmppc_skip_Hinterrupt:
53 mfspr r13,SPRN_HSRR0
54 addi r13,r13,4
55 mtspr SPRN_HSRR0,r13
56 GET_SCRATCH0(r13)
57 hrfid
58 b .
59
60/* 36/*
61 * Call kvmppc_hv_entry in real mode. 37 * Call kvmppc_hv_entry in real mode.
62 * Must be called with interrupts hard-disabled. 38 * Must be called with interrupts hard-disabled.
@@ -66,8 +42,11 @@ kvmppc_skip_Hinterrupt:
66 * LR = return address to continue at after eventually re-enabling MMU 42 * LR = return address to continue at after eventually re-enabling MMU
67 */ 43 */
68_GLOBAL(kvmppc_hv_entry_trampoline) 44_GLOBAL(kvmppc_hv_entry_trampoline)
45 mflr r0
46 std r0, PPC_LR_STKOFF(r1)
47 stdu r1, -112(r1)
69 mfmsr r10 48 mfmsr r10
70 LOAD_REG_ADDR(r5, kvmppc_hv_entry) 49 LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
71 li r0,MSR_RI 50 li r0,MSR_RI
72 andc r0,r10,r0 51 andc r0,r10,r0
73 li r6,MSR_IR | MSR_DR 52 li r6,MSR_IR | MSR_DR
@@ -77,11 +56,103 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
77 mtsrr1 r6 56 mtsrr1 r6
78 RFI 57 RFI
79 58
80/****************************************************************************** 59kvmppc_call_hv_entry:
81 * * 60 bl kvmppc_hv_entry
82 * Entry code * 61
83 * * 62 /* Back from guest - restore host state and return to caller */
84 *****************************************************************************/ 63
64 /* Restore host DABR and DABRX */
65 ld r5,HSTATE_DABR(r13)
66 li r6,7
67 mtspr SPRN_DABR,r5
68 mtspr SPRN_DABRX,r6
69
70 /* Restore SPRG3 */
71 ld r3,PACA_SPRG3(r13)
72 mtspr SPRN_SPRG3,r3
73
74 /*
75 * Reload DEC. HDEC interrupts were disabled when
76 * we reloaded the host's LPCR value.
77 */
78 ld r3, HSTATE_DECEXP(r13)
79 mftb r4
80 subf r4, r4, r3
81 mtspr SPRN_DEC, r4
82
83 /* Reload the host's PMU registers */
84 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
85 lbz r4, LPPACA_PMCINUSE(r3)
86 cmpwi r4, 0
87 beq 23f /* skip if not */
88 lwz r3, HSTATE_PMC(r13)
89 lwz r4, HSTATE_PMC + 4(r13)
90 lwz r5, HSTATE_PMC + 8(r13)
91 lwz r6, HSTATE_PMC + 12(r13)
92 lwz r8, HSTATE_PMC + 16(r13)
93 lwz r9, HSTATE_PMC + 20(r13)
94BEGIN_FTR_SECTION
95 lwz r10, HSTATE_PMC + 24(r13)
96 lwz r11, HSTATE_PMC + 28(r13)
97END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
98 mtspr SPRN_PMC1, r3
99 mtspr SPRN_PMC2, r4
100 mtspr SPRN_PMC3, r5
101 mtspr SPRN_PMC4, r6
102 mtspr SPRN_PMC5, r8
103 mtspr SPRN_PMC6, r9
104BEGIN_FTR_SECTION
105 mtspr SPRN_PMC7, r10
106 mtspr SPRN_PMC8, r11
107END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
108 ld r3, HSTATE_MMCR(r13)
109 ld r4, HSTATE_MMCR + 8(r13)
110 ld r5, HSTATE_MMCR + 16(r13)
111 mtspr SPRN_MMCR1, r4
112 mtspr SPRN_MMCRA, r5
113 mtspr SPRN_MMCR0, r3
114 isync
11523:
116
117 /*
118 * For external and machine check interrupts, we need
119 * to call the Linux handler to process the interrupt.
120 * We do that by jumping to absolute address 0x500 for
121 * external interrupts, or the machine_check_fwnmi label
122 * for machine checks (since firmware might have patched
123 * the vector area at 0x200). The [h]rfid at the end of the
124 * handler will return to the book3s_hv_interrupts.S code.
125 * For other interrupts we do the rfid to get back
126 * to the book3s_hv_interrupts.S code here.
127 */
128 ld r8, 112+PPC_LR_STKOFF(r1)
129 addi r1, r1, 112
130 ld r7, HSTATE_HOST_MSR(r13)
131
132 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
133 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
134BEGIN_FTR_SECTION
135 beq 11f
136END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
137
138 /* RFI into the highmem handler, or branch to interrupt handler */
139 mfmsr r6
140 li r0, MSR_RI
141 andc r6, r6, r0
142 mtmsrd r6, 1 /* Clear RI in MSR */
143 mtsrr0 r8
144 mtsrr1 r7
145 beqa 0x500 /* external interrupt (PPC970) */
146 beq cr1, 13f /* machine check */
147 RFI
148
149 /* On POWER7, we have external interrupts set to use HSRR0/1 */
15011: mtspr SPRN_HSRR0, r8
151 mtspr SPRN_HSRR1, r7
152 ba 0x500
153
15413: b machine_check_fwnmi
155
85 156
86/* 157/*
87 * We come in here when wakened from nap mode on a secondary hw thread. 158 * We come in here when wakened from nap mode on a secondary hw thread.
@@ -137,7 +208,7 @@ kvm_start_guest:
137 cmpdi r4,0 208 cmpdi r4,0
138 /* if we have no vcpu to run, go back to sleep */ 209 /* if we have no vcpu to run, go back to sleep */
139 beq kvm_no_guest 210 beq kvm_no_guest
140 b kvmppc_hv_entry 211 b 30f
141 212
14227: /* XXX should handle hypervisor maintenance interrupts etc. here */ 21327: /* XXX should handle hypervisor maintenance interrupts etc. here */
143 b kvm_no_guest 214 b kvm_no_guest
@@ -147,6 +218,57 @@ kvm_start_guest:
147 stw r8,HSTATE_SAVED_XIRR(r13) 218 stw r8,HSTATE_SAVED_XIRR(r13)
148 b kvm_no_guest 219 b kvm_no_guest
149 220
22130: bl kvmppc_hv_entry
222
223 /* Back from the guest, go back to nap */
224 /* Clear our vcpu pointer so we don't come back in early */
225 li r0, 0
226 std r0, HSTATE_KVM_VCPU(r13)
227 lwsync
228 /* Clear any pending IPI - we're an offline thread */
229 ld r5, HSTATE_XICS_PHYS(r13)
230 li r7, XICS_XIRR
231 lwzcix r3, r5, r7 /* ack any pending interrupt */
232 rlwinm. r0, r3, 0, 0xffffff /* any pending? */
233 beq 37f
234 sync
235 li r0, 0xff
236 li r6, XICS_MFRR
237 stbcix r0, r5, r6 /* clear the IPI */
238 stwcix r3, r5, r7 /* EOI it */
23937: sync
240
241 /* increment the nap count and then go to nap mode */
242 ld r4, HSTATE_KVM_VCORE(r13)
243 addi r4, r4, VCORE_NAP_COUNT
244 lwsync /* make previous updates visible */
24551: lwarx r3, 0, r4
246 addi r3, r3, 1
247 stwcx. r3, 0, r4
248 bne 51b
249
250kvm_no_guest:
251 li r0, KVM_HWTHREAD_IN_NAP
252 stb r0, HSTATE_HWTHREAD_STATE(r13)
253 li r3, LPCR_PECE0
254 mfspr r4, SPRN_LPCR
255 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
256 mtspr SPRN_LPCR, r4
257 isync
258 std r0, HSTATE_SCRATCH0(r13)
259 ptesync
260 ld r0, HSTATE_SCRATCH0(r13)
2611: cmpd r0, r0
262 bne 1b
263 nap
264 b .
265
266/******************************************************************************
267 * *
268 * Entry code *
269 * *
270 *****************************************************************************/
271
150.global kvmppc_hv_entry 272.global kvmppc_hv_entry
151kvmppc_hv_entry: 273kvmppc_hv_entry:
152 274
@@ -159,7 +281,8 @@ kvmppc_hv_entry:
159 * all other volatile GPRS = free 281 * all other volatile GPRS = free
160 */ 282 */
161 mflr r0 283 mflr r0
162 std r0, HSTATE_VMHANDLER(r13) 284 std r0, PPC_LR_STKOFF(r1)
285 stdu r1, -112(r1)
163 286
164 /* Set partition DABR */ 287 /* Set partition DABR */
165 /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ 288 /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
@@ -200,8 +323,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
200 ld r3, VCPU_MMCR(r4) 323 ld r3, VCPU_MMCR(r4)
201 ld r5, VCPU_MMCR + 8(r4) 324 ld r5, VCPU_MMCR + 8(r4)
202 ld r6, VCPU_MMCR + 16(r4) 325 ld r6, VCPU_MMCR + 16(r4)
326 ld r7, VCPU_SIAR(r4)
327 ld r8, VCPU_SDAR(r4)
203 mtspr SPRN_MMCR1, r5 328 mtspr SPRN_MMCR1, r5
204 mtspr SPRN_MMCRA, r6 329 mtspr SPRN_MMCRA, r6
330 mtspr SPRN_SIAR, r7
331 mtspr SPRN_SDAR, r8
205 mtspr SPRN_MMCR0, r3 332 mtspr SPRN_MMCR0, r3
206 isync 333 isync
207 334
@@ -254,22 +381,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
254 /* Save R1 in the PACA */ 381 /* Save R1 in the PACA */
255 std r1, HSTATE_HOST_R1(r13) 382 std r1, HSTATE_HOST_R1(r13)
256 383
257 /* Increment yield count if they have a VPA */
258 ld r3, VCPU_VPA(r4)
259 cmpdi r3, 0
260 beq 25f
261 lwz r5, LPPACA_YIELDCOUNT(r3)
262 addi r5, r5, 1
263 stw r5, LPPACA_YIELDCOUNT(r3)
264 li r6, 1
265 stb r6, VCPU_VPA_DIRTY(r4)
26625:
267 /* Load up DAR and DSISR */ 384 /* Load up DAR and DSISR */
268 ld r5, VCPU_DAR(r4) 385 ld r5, VCPU_DAR(r4)
269 lwz r6, VCPU_DSISR(r4) 386 lwz r6, VCPU_DSISR(r4)
270 mtspr SPRN_DAR, r5 387 mtspr SPRN_DAR, r5
271 mtspr SPRN_DSISR, r6 388 mtspr SPRN_DSISR, r6
272 389
390 li r6, KVM_GUEST_MODE_HOST_HV
391 stb r6, HSTATE_IN_GUEST(r13)
392
273BEGIN_FTR_SECTION 393BEGIN_FTR_SECTION
274 /* Restore AMR and UAMOR, set AMOR to all 1s */ 394 /* Restore AMR and UAMOR, set AMOR to all 1s */
275 ld r5,VCPU_AMR(r4) 395 ld r5,VCPU_AMR(r4)
@@ -343,7 +463,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
343 bdnz 28b 463 bdnz 28b
344 ptesync 464 ptesync
345 465
34622: li r0,1 466 /* Add timebase offset onto timebase */
46722: ld r8,VCORE_TB_OFFSET(r5)
468 cmpdi r8,0
469 beq 37f
470 mftb r6 /* current host timebase */
471 add r8,r8,r6
472 mtspr SPRN_TBU40,r8 /* update upper 40 bits */
473 mftb r7 /* check if lower 24 bits overflowed */
474 clrldi r6,r6,40
475 clrldi r7,r7,40
476 cmpld r7,r6
477 bge 37f
478 addis r8,r8,0x100 /* if so, increment upper 40 bits */
479 mtspr SPRN_TBU40,r8
480
481 /* Load guest PCR value to select appropriate compat mode */
48237: ld r7, VCORE_PCR(r5)
483 cmpdi r7, 0
484 beq 38f
485 mtspr SPRN_PCR, r7
48638:
487 li r0,1
347 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ 488 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
348 b 10f 489 b 10f
349 490
@@ -353,12 +494,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
353 beq 20b 494 beq 20b
354 495
355 /* Set LPCR and RMOR. */ 496 /* Set LPCR and RMOR. */
35610: ld r8,KVM_LPCR(r9) 49710: ld r8,VCORE_LPCR(r5)
357 mtspr SPRN_LPCR,r8 498 mtspr SPRN_LPCR,r8
358 ld r8,KVM_RMOR(r9) 499 ld r8,KVM_RMOR(r9)
359 mtspr SPRN_RMOR,r8 500 mtspr SPRN_RMOR,r8
360 isync 501 isync
361 502
503 /* Increment yield count if they have a VPA */
504 ld r3, VCPU_VPA(r4)
505 cmpdi r3, 0
506 beq 25f
507 lwz r5, LPPACA_YIELDCOUNT(r3)
508 addi r5, r5, 1
509 stw r5, LPPACA_YIELDCOUNT(r3)
510 li r6, 1
511 stb r6, VCPU_VPA_DIRTY(r4)
51225:
362 /* Check if HDEC expires soon */ 513 /* Check if HDEC expires soon */
363 mfspr r3,SPRN_HDEC 514 mfspr r3,SPRN_HDEC
364 cmpwi r3,10 515 cmpwi r3,10
@@ -405,7 +556,8 @@ toc_tlbie_lock:
405 bne 24b 556 bne 24b
406 isync 557 isync
407 558
408 ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */ 559 ld r5,HSTATE_KVM_VCORE(r13)
560 ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */
409 li r0,0x18f 561 li r0,0x18f
410 rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ 562 rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
411 or r0,r7,r0 563 or r0,r7,r0
@@ -541,7 +693,7 @@ fast_guest_return:
541 mtspr SPRN_HSRR1,r11 693 mtspr SPRN_HSRR1,r11
542 694
543 /* Activate guest mode, so faults get handled by KVM */ 695 /* Activate guest mode, so faults get handled by KVM */
544 li r9, KVM_GUEST_MODE_GUEST 696 li r9, KVM_GUEST_MODE_GUEST_HV
545 stb r9, HSTATE_IN_GUEST(r13) 697 stb r9, HSTATE_IN_GUEST(r13)
546 698
547 /* Enter guest */ 699 /* Enter guest */
@@ -550,13 +702,15 @@ BEGIN_FTR_SECTION
550 ld r5, VCPU_CFAR(r4) 702 ld r5, VCPU_CFAR(r4)
551 mtspr SPRN_CFAR, r5 703 mtspr SPRN_CFAR, r5
552END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 704END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
705BEGIN_FTR_SECTION
706 ld r0, VCPU_PPR(r4)
707END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
553 708
554 ld r5, VCPU_LR(r4) 709 ld r5, VCPU_LR(r4)
555 lwz r6, VCPU_CR(r4) 710 lwz r6, VCPU_CR(r4)
556 mtlr r5 711 mtlr r5
557 mtcr r6 712 mtcr r6
558 713
559 ld r0, VCPU_GPR(R0)(r4)
560 ld r1, VCPU_GPR(R1)(r4) 714 ld r1, VCPU_GPR(R1)(r4)
561 ld r2, VCPU_GPR(R2)(r4) 715 ld r2, VCPU_GPR(R2)(r4)
562 ld r3, VCPU_GPR(R3)(r4) 716 ld r3, VCPU_GPR(R3)(r4)
@@ -570,6 +724,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
570 ld r12, VCPU_GPR(R12)(r4) 724 ld r12, VCPU_GPR(R12)(r4)
571 ld r13, VCPU_GPR(R13)(r4) 725 ld r13, VCPU_GPR(R13)(r4)
572 726
727BEGIN_FTR_SECTION
728 mtspr SPRN_PPR, r0
729END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
730 ld r0, VCPU_GPR(R0)(r4)
573 ld r4, VCPU_GPR(R4)(r4) 731 ld r4, VCPU_GPR(R4)(r4)
574 732
575 hrfid 733 hrfid
@@ -584,8 +742,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
584/* 742/*
585 * We come here from the first-level interrupt handlers. 743 * We come here from the first-level interrupt handlers.
586 */ 744 */
587 .globl kvmppc_interrupt 745 .globl kvmppc_interrupt_hv
588kvmppc_interrupt: 746kvmppc_interrupt_hv:
589 /* 747 /*
590 * Register contents: 748 * Register contents:
591 * R12 = interrupt vector 749 * R12 = interrupt vector
@@ -595,6 +753,19 @@ kvmppc_interrupt:
595 */ 753 */
596 /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */ 754 /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
597 std r9, HSTATE_HOST_R2(r13) 755 std r9, HSTATE_HOST_R2(r13)
756
757 lbz r9, HSTATE_IN_GUEST(r13)
758 cmpwi r9, KVM_GUEST_MODE_HOST_HV
759 beq kvmppc_bad_host_intr
760#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
761 cmpwi r9, KVM_GUEST_MODE_GUEST
762 ld r9, HSTATE_HOST_R2(r13)
763 beq kvmppc_interrupt_pr
764#endif
765 /* We're now back in the host but in guest MMU context */
766 li r9, KVM_GUEST_MODE_HOST_HV
767 stb r9, HSTATE_IN_GUEST(r13)
768
598 ld r9, HSTATE_KVM_VCPU(r13) 769 ld r9, HSTATE_KVM_VCPU(r13)
599 770
600 /* Save registers */ 771 /* Save registers */
@@ -620,6 +791,10 @@ BEGIN_FTR_SECTION
620 ld r3, HSTATE_CFAR(r13) 791 ld r3, HSTATE_CFAR(r13)
621 std r3, VCPU_CFAR(r9) 792 std r3, VCPU_CFAR(r9)
622END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 793END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
794BEGIN_FTR_SECTION
795 ld r4, HSTATE_PPR(r13)
796 std r4, VCPU_PPR(r9)
797END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
623 798
624 /* Restore R1/R2 so we can handle faults */ 799 /* Restore R1/R2 so we can handle faults */
625 ld r1, HSTATE_HOST_R1(r13) 800 ld r1, HSTATE_HOST_R1(r13)
@@ -642,10 +817,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
642 std r3, VCPU_GPR(R13)(r9) 817 std r3, VCPU_GPR(R13)(r9)
643 std r4, VCPU_LR(r9) 818 std r4, VCPU_LR(r9)
644 819
645 /* Unset guest mode */
646 li r0, KVM_GUEST_MODE_NONE
647 stb r0, HSTATE_IN_GUEST(r13)
648
649 stw r12,VCPU_TRAP(r9) 820 stw r12,VCPU_TRAP(r9)
650 821
651 /* Save HEIR (HV emulation assist reg) in last_inst 822 /* Save HEIR (HV emulation assist reg) in last_inst
@@ -696,46 +867,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
696 * set, we know the host wants us out so let's do it now 867 * set, we know the host wants us out so let's do it now
697 */ 868 */
698do_ext_interrupt: 869do_ext_interrupt:
699 lbz r0, HSTATE_HOST_IPI(r13) 870 bl kvmppc_read_intr
700 cmpwi r0, 0 871 cmpdi r3, 0
701 bne ext_interrupt_to_host 872 bgt ext_interrupt_to_host
702
703 /* Now read the interrupt from the ICP */
704 ld r5, HSTATE_XICS_PHYS(r13)
705 li r7, XICS_XIRR
706 cmpdi r5, 0
707 beq- ext_interrupt_to_host
708 lwzcix r3, r5, r7
709 rlwinm. r0, r3, 0, 0xffffff
710 sync
711 beq 3f /* if nothing pending in the ICP */
712
713 /* We found something in the ICP...
714 *
715 * If it's not an IPI, stash it in the PACA and return to
716 * the host, we don't (yet) handle directing real external
717 * interrupts directly to the guest
718 */
719 cmpwi r0, XICS_IPI
720 bne ext_stash_for_host
721
722 /* It's an IPI, clear the MFRR and EOI it */
723 li r0, 0xff
724 li r6, XICS_MFRR
725 stbcix r0, r5, r6 /* clear the IPI */
726 stwcix r3, r5, r7 /* EOI it */
727 sync
728
729 /* We need to re-check host IPI now in case it got set in the
730 * meantime. If it's clear, we bounce the interrupt to the
731 * guest
732 */
733 lbz r0, HSTATE_HOST_IPI(r13)
734 cmpwi r0, 0
735 bne- 1f
736 873
737 /* Allright, looks like an IPI for the guest, we need to set MER */ 874 /* Allright, looks like an IPI for the guest, we need to set MER */
7383:
739 /* Check if any CPU is heading out to the host, if so head out too */ 875 /* Check if any CPU is heading out to the host, if so head out too */
740 ld r5, HSTATE_KVM_VCORE(r13) 876 ld r5, HSTATE_KVM_VCORE(r13)
741 lwz r0, VCORE_ENTRY_EXIT(r5) 877 lwz r0, VCORE_ENTRY_EXIT(r5)
@@ -764,27 +900,9 @@ do_ext_interrupt:
764 mtspr SPRN_LPCR, r8 900 mtspr SPRN_LPCR, r8
765 b fast_guest_return 901 b fast_guest_return
766 902
767 /* We raced with the host, we need to resend that IPI, bummer */
7681: li r0, IPI_PRIORITY
769 stbcix r0, r5, r6 /* set the IPI */
770 sync
771 b ext_interrupt_to_host
772
773ext_stash_for_host:
774 /* It's not an IPI and it's for the host, stash it in the PACA
775 * before exit, it will be picked up by the host ICP driver
776 */
777 stw r3, HSTATE_SAVED_XIRR(r13)
778ext_interrupt_to_host: 903ext_interrupt_to_host:
779 904
780guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 905guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
781 /* Save DEC */
782 mfspr r5,SPRN_DEC
783 mftb r6
784 extsw r5,r5
785 add r5,r5,r6
786 std r5,VCPU_DEC_EXPIRES(r9)
787
788 /* Save more register state */ 906 /* Save more register state */
789 mfdar r6 907 mfdar r6
790 mfdsisr r7 908 mfdsisr r7
@@ -954,7 +1072,30 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
954 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 1072 mtspr SPRN_SDR1,r6 /* switch to partition page table */
955 mtspr SPRN_LPID,r7 1073 mtspr SPRN_LPID,r7
956 isync 1074 isync
957 li r0,0 1075
1076 /* Subtract timebase offset from timebase */
1077 ld r8,VCORE_TB_OFFSET(r5)
1078 cmpdi r8,0
1079 beq 17f
1080 mftb r6 /* current host timebase */
1081 subf r8,r8,r6
1082 mtspr SPRN_TBU40,r8 /* update upper 40 bits */
1083 mftb r7 /* check if lower 24 bits overflowed */
1084 clrldi r6,r6,40
1085 clrldi r7,r7,40
1086 cmpld r7,r6
1087 bge 17f
1088 addis r8,r8,0x100 /* if so, increment upper 40 bits */
1089 mtspr SPRN_TBU40,r8
1090
1091 /* Reset PCR */
109217: ld r0, VCORE_PCR(r5)
1093 cmpdi r0, 0
1094 beq 18f
1095 li r0, 0
1096 mtspr SPRN_PCR, r0
109718:
1098 /* Signal secondary CPUs to continue */
958 stb r0,VCORE_IN_GUEST(r5) 1099 stb r0,VCORE_IN_GUEST(r5)
959 lis r8,0x7fff /* MAX_INT@h */ 1100 lis r8,0x7fff /* MAX_INT@h */
960 mtspr SPRN_HDEC,r8 1101 mtspr SPRN_HDEC,r8
@@ -1052,6 +1193,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
10521: addi r8,r8,16 11931: addi r8,r8,16
1053 .endr 1194 .endr
1054 1195
1196 /* Save DEC */
1197 mfspr r5,SPRN_DEC
1198 mftb r6
1199 extsw r5,r5
1200 add r5,r5,r6
1201 std r5,VCPU_DEC_EXPIRES(r9)
1202
1055 /* Save and reset AMR and UAMOR before turning on the MMU */ 1203 /* Save and reset AMR and UAMOR before turning on the MMU */
1056BEGIN_FTR_SECTION 1204BEGIN_FTR_SECTION
1057 mfspr r5,SPRN_AMR 1205 mfspr r5,SPRN_AMR
@@ -1062,6 +1210,10 @@ BEGIN_FTR_SECTION
1062 mtspr SPRN_AMR,r6 1210 mtspr SPRN_AMR,r6
1063END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1211END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1064 1212
1213 /* Unset guest mode */
1214 li r0, KVM_GUEST_MODE_NONE
1215 stb r0, HSTATE_IN_GUEST(r13)
1216
1065 /* Switch DSCR back to host value */ 1217 /* Switch DSCR back to host value */
1066BEGIN_FTR_SECTION 1218BEGIN_FTR_SECTION
1067 mfspr r8, SPRN_DSCR 1219 mfspr r8, SPRN_DSCR
@@ -1134,9 +1286,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1134 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ 1286 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
1135 b 22f 1287 b 22f
113621: mfspr r5, SPRN_MMCR1 128821: mfspr r5, SPRN_MMCR1
1289 mfspr r7, SPRN_SIAR
1290 mfspr r8, SPRN_SDAR
1137 std r4, VCPU_MMCR(r9) 1291 std r4, VCPU_MMCR(r9)
1138 std r5, VCPU_MMCR + 8(r9) 1292 std r5, VCPU_MMCR + 8(r9)
1139 std r6, VCPU_MMCR + 16(r9) 1293 std r6, VCPU_MMCR + 16(r9)
1294 std r7, VCPU_SIAR(r9)
1295 std r8, VCPU_SDAR(r9)
1140 mfspr r3, SPRN_PMC1 1296 mfspr r3, SPRN_PMC1
1141 mfspr r4, SPRN_PMC2 1297 mfspr r4, SPRN_PMC2
1142 mfspr r5, SPRN_PMC3 1298 mfspr r5, SPRN_PMC3
@@ -1158,103 +1314,30 @@ BEGIN_FTR_SECTION
1158 stw r11, VCPU_PMC + 28(r9) 1314 stw r11, VCPU_PMC + 28(r9)
1159END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1315END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
116022: 131622:
1317 ld r0, 112+PPC_LR_STKOFF(r1)
1318 addi r1, r1, 112
1319 mtlr r0
1320 blr
1321secondary_too_late:
1322 ld r5,HSTATE_KVM_VCORE(r13)
1323 HMT_LOW
132413: lbz r3,VCORE_IN_GUEST(r5)
1325 cmpwi r3,0
1326 bne 13b
1327 HMT_MEDIUM
1328 li r0, KVM_GUEST_MODE_NONE
1329 stb r0, HSTATE_IN_GUEST(r13)
1330 ld r11,PACA_SLBSHADOWPTR(r13)
1161 1331
1162 /* Secondary threads go off to take a nap on POWER7 */ 1332 .rept SLB_NUM_BOLTED
1163BEGIN_FTR_SECTION 1333 ld r5,SLBSHADOW_SAVEAREA(r11)
1164 lwz r0,VCPU_PTID(r9) 1334 ld r6,SLBSHADOW_SAVEAREA+8(r11)
1165 cmpwi r0,0 1335 andis. r7,r5,SLB_ESID_V@h
1166 bne secondary_nap 1336 beq 1f
1167END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1337 slbmte r6,r5
1168 13381: addi r11,r11,16
1169 /* Restore host DABR and DABRX */ 1339 .endr
1170 ld r5,HSTATE_DABR(r13) 1340 b 22b
1171 li r6,7
1172 mtspr SPRN_DABR,r5
1173 mtspr SPRN_DABRX,r6
1174
1175 /* Restore SPRG3 */
1176 ld r3,PACA_SPRG3(r13)
1177 mtspr SPRN_SPRG3,r3
1178
1179 /*
1180 * Reload DEC. HDEC interrupts were disabled when
1181 * we reloaded the host's LPCR value.
1182 */
1183 ld r3, HSTATE_DECEXP(r13)
1184 mftb r4
1185 subf r4, r4, r3
1186 mtspr SPRN_DEC, r4
1187
1188 /* Reload the host's PMU registers */
1189 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
1190 lbz r4, LPPACA_PMCINUSE(r3)
1191 cmpwi r4, 0
1192 beq 23f /* skip if not */
1193 lwz r3, HSTATE_PMC(r13)
1194 lwz r4, HSTATE_PMC + 4(r13)
1195 lwz r5, HSTATE_PMC + 8(r13)
1196 lwz r6, HSTATE_PMC + 12(r13)
1197 lwz r8, HSTATE_PMC + 16(r13)
1198 lwz r9, HSTATE_PMC + 20(r13)
1199BEGIN_FTR_SECTION
1200 lwz r10, HSTATE_PMC + 24(r13)
1201 lwz r11, HSTATE_PMC + 28(r13)
1202END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1203 mtspr SPRN_PMC1, r3
1204 mtspr SPRN_PMC2, r4
1205 mtspr SPRN_PMC3, r5
1206 mtspr SPRN_PMC4, r6
1207 mtspr SPRN_PMC5, r8
1208 mtspr SPRN_PMC6, r9
1209BEGIN_FTR_SECTION
1210 mtspr SPRN_PMC7, r10
1211 mtspr SPRN_PMC8, r11
1212END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1213 ld r3, HSTATE_MMCR(r13)
1214 ld r4, HSTATE_MMCR + 8(r13)
1215 ld r5, HSTATE_MMCR + 16(r13)
1216 mtspr SPRN_MMCR1, r4
1217 mtspr SPRN_MMCRA, r5
1218 mtspr SPRN_MMCR0, r3
1219 isync
122023:
1221 /*
1222 * For external and machine check interrupts, we need
1223 * to call the Linux handler to process the interrupt.
1224 * We do that by jumping to absolute address 0x500 for
1225 * external interrupts, or the machine_check_fwnmi label
1226 * for machine checks (since firmware might have patched
1227 * the vector area at 0x200). The [h]rfid at the end of the
1228 * handler will return to the book3s_hv_interrupts.S code.
1229 * For other interrupts we do the rfid to get back
1230 * to the book3s_hv_interrupts.S code here.
1231 */
1232 ld r8, HSTATE_VMHANDLER(r13)
1233 ld r7, HSTATE_HOST_MSR(r13)
1234
1235 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1236 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
1237BEGIN_FTR_SECTION
1238 beq 11f
1239END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1240
1241 /* RFI into the highmem handler, or branch to interrupt handler */
1242 mfmsr r6
1243 li r0, MSR_RI
1244 andc r6, r6, r0
1245 mtmsrd r6, 1 /* Clear RI in MSR */
1246 mtsrr0 r8
1247 mtsrr1 r7
1248 beqa 0x500 /* external interrupt (PPC970) */
1249 beq cr1, 13f /* machine check */
1250 RFI
1251
1252 /* On POWER7, we have external interrupts set to use HSRR0/1 */
125311: mtspr SPRN_HSRR0, r8
1254 mtspr SPRN_HSRR1, r7
1255 ba 0x500
1256
125713: b machine_check_fwnmi
1258 1341
1259/* 1342/*
1260 * Check whether an HDSI is an HPTE not found fault or something else. 1343 * Check whether an HDSI is an HPTE not found fault or something else.
@@ -1333,7 +1416,7 @@ fast_interrupt_c_return:
1333 stw r8, VCPU_LAST_INST(r9) 1416 stw r8, VCPU_LAST_INST(r9)
1334 1417
1335 /* Unset guest mode. */ 1418 /* Unset guest mode. */
1336 li r0, KVM_GUEST_MODE_NONE 1419 li r0, KVM_GUEST_MODE_HOST_HV
1337 stb r0, HSTATE_IN_GUEST(r13) 1420 stb r0, HSTATE_IN_GUEST(r13)
1338 b guest_exit_cont 1421 b guest_exit_cont
1339 1422
@@ -1701,67 +1784,70 @@ machine_check_realmode:
1701 rotldi r11, r11, 63 1784 rotldi r11, r11, 63
1702 b fast_interrupt_c_return 1785 b fast_interrupt_c_return
1703 1786
1704secondary_too_late: 1787/*
1705 ld r5,HSTATE_KVM_VCORE(r13) 1788 * Determine what sort of external interrupt is pending (if any).
1706 HMT_LOW 1789 * Returns:
170713: lbz r3,VCORE_IN_GUEST(r5) 1790 * 0 if no interrupt is pending
1708 cmpwi r3,0 1791 * 1 if an interrupt is pending that needs to be handled by the host
1709 bne 13b 1792 * -1 if there was a guest wakeup IPI (which has now been cleared)
1710 HMT_MEDIUM 1793 */
1711 ld r11,PACA_SLBSHADOWPTR(r13) 1794kvmppc_read_intr:
1712 1795 /* see if a host IPI is pending */
1713 .rept SLB_NUM_BOLTED 1796 li r3, 1
1714 ld r5,SLBSHADOW_SAVEAREA(r11) 1797 lbz r0, HSTATE_HOST_IPI(r13)
1715 ld r6,SLBSHADOW_SAVEAREA+8(r11) 1798 cmpwi r0, 0
1716 andis. r7,r5,SLB_ESID_V@h 1799 bne 1f
1717 beq 1f
1718 slbmte r6,r5
17191: addi r11,r11,16
1720 .endr
1721 1800
1722secondary_nap: 1801 /* Now read the interrupt from the ICP */
1723 /* Clear our vcpu pointer so we don't come back in early */ 1802 ld r6, HSTATE_XICS_PHYS(r13)
1724 li r0, 0
1725 std r0, HSTATE_KVM_VCPU(r13)
1726 lwsync
1727 /* Clear any pending IPI - assume we're a secondary thread */
1728 ld r5, HSTATE_XICS_PHYS(r13)
1729 li r7, XICS_XIRR 1803 li r7, XICS_XIRR
1730 lwzcix r3, r5, r7 /* ack any pending interrupt */ 1804 cmpdi r6, 0
1731 rlwinm. r0, r3, 0, 0xffffff /* any pending? */ 1805 beq- 1f
1732 beq 37f 1806 lwzcix r0, r6, r7
1807 rlwinm. r3, r0, 0, 0xffffff
1733 sync 1808 sync
1734 li r0, 0xff 1809 beq 1f /* if nothing pending in the ICP */
1735 li r6, XICS_MFRR
1736 stbcix r0, r5, r6 /* clear the IPI */
1737 stwcix r3, r5, r7 /* EOI it */
173837: sync
1739 1810
1740 /* increment the nap count and then go to nap mode */ 1811 /* We found something in the ICP...
1741 ld r4, HSTATE_KVM_VCORE(r13) 1812 *
1742 addi r4, r4, VCORE_NAP_COUNT 1813 * If it's not an IPI, stash it in the PACA and return to
1743 lwsync /* make previous updates visible */ 1814 * the host, we don't (yet) handle directing real external
174451: lwarx r3, 0, r4 1815 * interrupts directly to the guest
1745 addi r3, r3, 1 1816 */
1746 stwcx. r3, 0, r4 1817 cmpwi r3, XICS_IPI /* if there is, is it an IPI? */
1747 bne 51b 1818 li r3, 1
1819 bne 42f
1748 1820
1749kvm_no_guest: 1821 /* It's an IPI, clear the MFRR and EOI it */
1750 li r0, KVM_HWTHREAD_IN_NAP 1822 li r3, 0xff
1751 stb r0, HSTATE_HWTHREAD_STATE(r13) 1823 li r8, XICS_MFRR
1824 stbcix r3, r6, r8 /* clear the IPI */
1825 stwcix r0, r6, r7 /* EOI it */
1826 sync
1752 1827
1753 li r3, LPCR_PECE0 1828 /* We need to re-check host IPI now in case it got set in the
1754 mfspr r4, SPRN_LPCR 1829 * meantime. If it's clear, we bounce the interrupt to the
1755 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 1830 * guest
1756 mtspr SPRN_LPCR, r4 1831 */
1757 isync 1832 lbz r0, HSTATE_HOST_IPI(r13)
1758 std r0, HSTATE_SCRATCH0(r13) 1833 cmpwi r0, 0
1759 ptesync 1834 bne- 43f
1760 ld r0, HSTATE_SCRATCH0(r13) 1835
17611: cmpd r0, r0 1836 /* OK, it's an IPI for us */
1762 bne 1b 1837 li r3, -1
1763 nap 18381: blr
1764 b . 1839
184042: /* It's not an IPI and it's for the host, stash it in the PACA
1841 * before exit, it will be picked up by the host ICP driver
1842 */
1843 stw r0, HSTATE_SAVED_XIRR(r13)
1844 b 1b
1845
184643: /* We raced with the host, we need to resend that IPI, bummer */
1847 li r0, IPI_PRIORITY
1848 stbcix r0, r6, r8 /* set the IPI */
1849 sync
1850 b 1b
1765 1851
1766/* 1852/*
1767 * Save away FP, VMX and VSX registers. 1853 * Save away FP, VMX and VSX registers.
@@ -1879,3 +1965,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
1879 lwz r7,VCPU_VRSAVE(r4) 1965 lwz r7,VCPU_VRSAVE(r4)
1880 mtspr SPRN_VRSAVE,r7 1966 mtspr SPRN_VRSAVE,r7
1881 blr 1967 blr
1968
1969/*
1970 * We come here if we get any exception or interrupt while we are
1971 * executing host real mode code while in guest MMU context.
1972 * For now just spin, but we should do something better.
1973 */
1974kvmppc_bad_host_intr:
1975 b .
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 17cfae5497a3..f4dd041c14ea 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -26,8 +26,12 @@
26 26
27#if defined(CONFIG_PPC_BOOK3S_64) 27#if defined(CONFIG_PPC_BOOK3S_64)
28#define FUNC(name) GLUE(.,name) 28#define FUNC(name) GLUE(.,name)
29#define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU
30
29#elif defined(CONFIG_PPC_BOOK3S_32) 31#elif defined(CONFIG_PPC_BOOK3S_32)
30#define FUNC(name) name 32#define FUNC(name) name
33#define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2)
34
31#endif /* CONFIG_PPC_BOOK3S_XX */ 35#endif /* CONFIG_PPC_BOOK3S_XX */
32 36
33#define VCPU_LOAD_NVGPRS(vcpu) \ 37#define VCPU_LOAD_NVGPRS(vcpu) \
@@ -87,8 +91,14 @@ kvm_start_entry:
87 VCPU_LOAD_NVGPRS(r4) 91 VCPU_LOAD_NVGPRS(r4)
88 92
89kvm_start_lightweight: 93kvm_start_lightweight:
94 /* Copy registers into shadow vcpu so we can access them in real mode */
95 GET_SHADOW_VCPU(r3)
96 bl FUNC(kvmppc_copy_to_svcpu)
97 nop
98 REST_GPR(4, r1)
90 99
91#ifdef CONFIG_PPC_BOOK3S_64 100#ifdef CONFIG_PPC_BOOK3S_64
101 /* Get the dcbz32 flag */
92 PPC_LL r3, VCPU_HFLAGS(r4) 102 PPC_LL r3, VCPU_HFLAGS(r4)
93 rldicl r3, r3, 0, 63 /* r3 &= 1 */ 103 rldicl r3, r3, 0, 63 /* r3 &= 1 */
94 stb r3, HSTATE_RESTORE_HID5(r13) 104 stb r3, HSTATE_RESTORE_HID5(r13)
@@ -111,9 +121,6 @@ kvm_start_lightweight:
111 * 121 *
112 */ 122 */
113 123
114.global kvmppc_handler_highmem
115kvmppc_handler_highmem:
116
117 /* 124 /*
118 * Register usage at this point: 125 * Register usage at this point:
119 * 126 *
@@ -125,18 +132,31 @@ kvmppc_handler_highmem:
125 * 132 *
126 */ 133 */
127 134
128 /* R7 = vcpu */ 135 /* Transfer reg values from shadow vcpu back to vcpu struct */
129 PPC_LL r7, GPR4(r1) 136 /* On 64-bit, interrupts are still off at this point */
137 PPC_LL r3, GPR4(r1) /* vcpu pointer */
138 GET_SHADOW_VCPU(r4)
139 bl FUNC(kvmppc_copy_from_svcpu)
140 nop
130 141
131#ifdef CONFIG_PPC_BOOK3S_64 142#ifdef CONFIG_PPC_BOOK3S_64
143 /* Re-enable interrupts */
144 ld r3, HSTATE_HOST_MSR(r13)
145 ori r3, r3, MSR_EE
146 MTMSR_EERI(r3)
147
132 /* 148 /*
133 * Reload kernel SPRG3 value. 149 * Reload kernel SPRG3 value.
134 * No need to save guest value as usermode can't modify SPRG3. 150 * No need to save guest value as usermode can't modify SPRG3.
135 */ 151 */
136 ld r3, PACA_SPRG3(r13) 152 ld r3, PACA_SPRG3(r13)
137 mtspr SPRN_SPRG3, r3 153 mtspr SPRN_SPRG3, r3
154
138#endif /* CONFIG_PPC_BOOK3S_64 */ 155#endif /* CONFIG_PPC_BOOK3S_64 */
139 156
157 /* R7 = vcpu */
158 PPC_LL r7, GPR4(r1)
159
140 PPC_STL r14, VCPU_GPR(R14)(r7) 160 PPC_STL r14, VCPU_GPR(R14)(r7)
141 PPC_STL r15, VCPU_GPR(R15)(r7) 161 PPC_STL r15, VCPU_GPR(R15)(r7)
142 PPC_STL r16, VCPU_GPR(R16)(r7) 162 PPC_STL r16, VCPU_GPR(R16)(r7)
@@ -161,7 +181,7 @@ kvmppc_handler_highmem:
161 181
162 /* Restore r3 (kvm_run) and r4 (vcpu) */ 182 /* Restore r3 (kvm_run) and r4 (vcpu) */
163 REST_2GPRS(3, r1) 183 REST_2GPRS(3, r1)
164 bl FUNC(kvmppc_handle_exit) 184 bl FUNC(kvmppc_handle_exit_pr)
165 185
166 /* If RESUME_GUEST, get back in the loop */ 186 /* If RESUME_GUEST, get back in the loop */
167 cmpwi r3, RESUME_GUEST 187 cmpwi r3, RESUME_GUEST
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index da8b13c4b776..5a1ab1250a05 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -28,7 +28,7 @@
28#include <asm/mmu_context.h> 28#include <asm/mmu_context.h>
29#include <asm/hw_irq.h> 29#include <asm/hw_irq.h>
30 30
31#include "trace.h" 31#include "trace_pr.h"
32 32
33#define PTE_SIZE 12 33#define PTE_SIZE 12
34 34
@@ -56,6 +56,14 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
56 HPTEG_HASH_BITS_VPTE_LONG); 56 HPTEG_HASH_BITS_VPTE_LONG);
57} 57}
58 58
59#ifdef CONFIG_PPC_BOOK3S_64
60static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage)
61{
62 return hash_64((vpage & 0xffffffff0ULL) >> 4,
63 HPTEG_HASH_BITS_VPTE_64K);
64}
65#endif
66
59void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 67void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
60{ 68{
61 u64 index; 69 u64 index;
@@ -83,6 +91,15 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
83 hlist_add_head_rcu(&pte->list_vpte_long, 91 hlist_add_head_rcu(&pte->list_vpte_long,
84 &vcpu3s->hpte_hash_vpte_long[index]); 92 &vcpu3s->hpte_hash_vpte_long[index]);
85 93
94#ifdef CONFIG_PPC_BOOK3S_64
95 /* Add to vPTE_64k list */
96 index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage);
97 hlist_add_head_rcu(&pte->list_vpte_64k,
98 &vcpu3s->hpte_hash_vpte_64k[index]);
99#endif
100
101 vcpu3s->hpte_cache_count++;
102
86 spin_unlock(&vcpu3s->mmu_lock); 103 spin_unlock(&vcpu3s->mmu_lock);
87} 104}
88 105
@@ -113,10 +130,13 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
113 hlist_del_init_rcu(&pte->list_pte_long); 130 hlist_del_init_rcu(&pte->list_pte_long);
114 hlist_del_init_rcu(&pte->list_vpte); 131 hlist_del_init_rcu(&pte->list_vpte);
115 hlist_del_init_rcu(&pte->list_vpte_long); 132 hlist_del_init_rcu(&pte->list_vpte_long);
133#ifdef CONFIG_PPC_BOOK3S_64
134 hlist_del_init_rcu(&pte->list_vpte_64k);
135#endif
136 vcpu3s->hpte_cache_count--;
116 137
117 spin_unlock(&vcpu3s->mmu_lock); 138 spin_unlock(&vcpu3s->mmu_lock);
118 139
119 vcpu3s->hpte_cache_count--;
120 call_rcu(&pte->rcu_head, free_pte_rcu); 140 call_rcu(&pte->rcu_head, free_pte_rcu);
121} 141}
122 142
@@ -219,6 +239,29 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
219 rcu_read_unlock(); 239 rcu_read_unlock();
220} 240}
221 241
242#ifdef CONFIG_PPC_BOOK3S_64
243/* Flush with mask 0xffffffff0 */
244static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp)
245{
246 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
247 struct hlist_head *list;
248 struct hpte_cache *pte;
249 u64 vp_mask = 0xffffffff0ULL;
250
251 list = &vcpu3s->hpte_hash_vpte_64k[
252 kvmppc_mmu_hash_vpte_64k(guest_vp)];
253
254 rcu_read_lock();
255
256 /* Check the list for matching entries and invalidate */
257 hlist_for_each_entry_rcu(pte, list, list_vpte_64k)
258 if ((pte->pte.vpage & vp_mask) == guest_vp)
259 invalidate_pte(vcpu, pte);
260
261 rcu_read_unlock();
262}
263#endif
264
222/* Flush with mask 0xffffff000 */ 265/* Flush with mask 0xffffff000 */
223static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) 266static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
224{ 267{
@@ -249,6 +292,11 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
249 case 0xfffffffffULL: 292 case 0xfffffffffULL:
250 kvmppc_mmu_pte_vflush_short(vcpu, guest_vp); 293 kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
251 break; 294 break;
295#ifdef CONFIG_PPC_BOOK3S_64
296 case 0xffffffff0ULL:
297 kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp);
298 break;
299#endif
252 case 0xffffff000ULL: 300 case 0xffffff000ULL:
253 kvmppc_mmu_pte_vflush_long(vcpu, guest_vp); 301 kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
254 break; 302 break;
@@ -285,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
285 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 333 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
286 struct hpte_cache *pte; 334 struct hpte_cache *pte;
287 335
288 pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
289 vcpu3s->hpte_cache_count++;
290
291 if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM) 336 if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
292 kvmppc_mmu_pte_flush_all(vcpu); 337 kvmppc_mmu_pte_flush_all(vcpu);
293 338
339 pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
340
294 return pte; 341 return pte;
295} 342}
296 343
344void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte)
345{
346 kmem_cache_free(hpte_cache, pte);
347}
348
297void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu) 349void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
298{ 350{
299 kvmppc_mmu_pte_flush(vcpu, 0, 0); 351 kvmppc_mmu_pte_flush(vcpu, 0, 0);
@@ -320,6 +372,10 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
320 ARRAY_SIZE(vcpu3s->hpte_hash_vpte)); 372 ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
321 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long, 373 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
322 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long)); 374 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
375#ifdef CONFIG_PPC_BOOK3S_64
376 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k,
377 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k));
378#endif
323 379
324 spin_lock_init(&vcpu3s->mmu_lock); 380 spin_lock_init(&vcpu3s->mmu_lock);
325 381
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c0b48f96a91c..fe14ca3dd171 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -40,8 +40,12 @@
40#include <linux/sched.h> 40#include <linux/sched.h>
41#include <linux/vmalloc.h> 41#include <linux/vmalloc.h>
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/module.h>
43 44
44#include "trace.h" 45#include "book3s.h"
46
47#define CREATE_TRACE_POINTS
48#include "trace_pr.h"
45 49
46/* #define EXIT_DEBUG */ 50/* #define EXIT_DEBUG */
47/* #define DEBUG_EXT */ 51/* #define DEBUG_EXT */
@@ -56,29 +60,25 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
56#define HW_PAGE_SIZE PAGE_SIZE 60#define HW_PAGE_SIZE PAGE_SIZE
57#endif 61#endif
58 62
59void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 63static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
60{ 64{
61#ifdef CONFIG_PPC_BOOK3S_64 65#ifdef CONFIG_PPC_BOOK3S_64
62 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 66 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
63 memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); 67 memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
64 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
65 sizeof(get_paca()->shadow_vcpu));
66 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; 68 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
67 svcpu_put(svcpu); 69 svcpu_put(svcpu);
68#endif 70#endif
69 vcpu->cpu = smp_processor_id(); 71 vcpu->cpu = smp_processor_id();
70#ifdef CONFIG_PPC_BOOK3S_32 72#ifdef CONFIG_PPC_BOOK3S_32
71 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; 73 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
72#endif 74#endif
73} 75}
74 76
75void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 77static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
76{ 78{
77#ifdef CONFIG_PPC_BOOK3S_64 79#ifdef CONFIG_PPC_BOOK3S_64
78 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 80 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
79 memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); 81 memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
80 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
81 sizeof(get_paca()->shadow_vcpu));
82 to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; 82 to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
83 svcpu_put(svcpu); 83 svcpu_put(svcpu);
84#endif 84#endif
@@ -87,7 +87,61 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
87 vcpu->cpu = -1; 87 vcpu->cpu = -1;
88} 88}
89 89
90int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) 90/* Copy data needed by real-mode code from vcpu to shadow vcpu */
91void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
92 struct kvm_vcpu *vcpu)
93{
94 svcpu->gpr[0] = vcpu->arch.gpr[0];
95 svcpu->gpr[1] = vcpu->arch.gpr[1];
96 svcpu->gpr[2] = vcpu->arch.gpr[2];
97 svcpu->gpr[3] = vcpu->arch.gpr[3];
98 svcpu->gpr[4] = vcpu->arch.gpr[4];
99 svcpu->gpr[5] = vcpu->arch.gpr[5];
100 svcpu->gpr[6] = vcpu->arch.gpr[6];
101 svcpu->gpr[7] = vcpu->arch.gpr[7];
102 svcpu->gpr[8] = vcpu->arch.gpr[8];
103 svcpu->gpr[9] = vcpu->arch.gpr[9];
104 svcpu->gpr[10] = vcpu->arch.gpr[10];
105 svcpu->gpr[11] = vcpu->arch.gpr[11];
106 svcpu->gpr[12] = vcpu->arch.gpr[12];
107 svcpu->gpr[13] = vcpu->arch.gpr[13];
108 svcpu->cr = vcpu->arch.cr;
109 svcpu->xer = vcpu->arch.xer;
110 svcpu->ctr = vcpu->arch.ctr;
111 svcpu->lr = vcpu->arch.lr;
112 svcpu->pc = vcpu->arch.pc;
113}
114
115/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
116void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
117 struct kvmppc_book3s_shadow_vcpu *svcpu)
118{
119 vcpu->arch.gpr[0] = svcpu->gpr[0];
120 vcpu->arch.gpr[1] = svcpu->gpr[1];
121 vcpu->arch.gpr[2] = svcpu->gpr[2];
122 vcpu->arch.gpr[3] = svcpu->gpr[3];
123 vcpu->arch.gpr[4] = svcpu->gpr[4];
124 vcpu->arch.gpr[5] = svcpu->gpr[5];
125 vcpu->arch.gpr[6] = svcpu->gpr[6];
126 vcpu->arch.gpr[7] = svcpu->gpr[7];
127 vcpu->arch.gpr[8] = svcpu->gpr[8];
128 vcpu->arch.gpr[9] = svcpu->gpr[9];
129 vcpu->arch.gpr[10] = svcpu->gpr[10];
130 vcpu->arch.gpr[11] = svcpu->gpr[11];
131 vcpu->arch.gpr[12] = svcpu->gpr[12];
132 vcpu->arch.gpr[13] = svcpu->gpr[13];
133 vcpu->arch.cr = svcpu->cr;
134 vcpu->arch.xer = svcpu->xer;
135 vcpu->arch.ctr = svcpu->ctr;
136 vcpu->arch.lr = svcpu->lr;
137 vcpu->arch.pc = svcpu->pc;
138 vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
139 vcpu->arch.fault_dar = svcpu->fault_dar;
140 vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
141 vcpu->arch.last_inst = svcpu->last_inst;
142}
143
144static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
91{ 145{
92 int r = 1; /* Indicate we want to get back into the guest */ 146 int r = 1; /* Indicate we want to get back into the guest */
93 147
@@ -100,44 +154,69 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
100} 154}
101 155
102/************* MMU Notifiers *************/ 156/************* MMU Notifiers *************/
157static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
158 unsigned long end)
159{
160 long i;
161 struct kvm_vcpu *vcpu;
162 struct kvm_memslots *slots;
163 struct kvm_memory_slot *memslot;
164
165 slots = kvm_memslots(kvm);
166 kvm_for_each_memslot(memslot, slots) {
167 unsigned long hva_start, hva_end;
168 gfn_t gfn, gfn_end;
169
170 hva_start = max(start, memslot->userspace_addr);
171 hva_end = min(end, memslot->userspace_addr +
172 (memslot->npages << PAGE_SHIFT));
173 if (hva_start >= hva_end)
174 continue;
175 /*
176 * {gfn(page) | page intersects with [hva_start, hva_end)} =
177 * {gfn, gfn+1, ..., gfn_end-1}.
178 */
179 gfn = hva_to_gfn_memslot(hva_start, memslot);
180 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
181 kvm_for_each_vcpu(i, vcpu, kvm)
182 kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT,
183 gfn_end << PAGE_SHIFT);
184 }
185}
103 186
104int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 187static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
105{ 188{
106 trace_kvm_unmap_hva(hva); 189 trace_kvm_unmap_hva(hva);
107 190
108 /* 191 do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
109 * Flush all shadow tlb entries everywhere. This is slow, but
110 * we are 100% sure that we catch the to be unmapped page
111 */
112 kvm_flush_remote_tlbs(kvm);
113 192
114 return 0; 193 return 0;
115} 194}
116 195
117int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 196static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
197 unsigned long end)
118{ 198{
119 /* kvm_unmap_hva flushes everything anyways */ 199 do_kvm_unmap_hva(kvm, start, end);
120 kvm_unmap_hva(kvm, start);
121 200
122 return 0; 201 return 0;
123} 202}
124 203
125int kvm_age_hva(struct kvm *kvm, unsigned long hva) 204static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
126{ 205{
127 /* XXX could be more clever ;) */ 206 /* XXX could be more clever ;) */
128 return 0; 207 return 0;
129} 208}
130 209
131int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 210static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
132{ 211{
133 /* XXX could be more clever ;) */ 212 /* XXX could be more clever ;) */
134 return 0; 213 return 0;
135} 214}
136 215
137void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 216static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
138{ 217{
139 /* The page will get remapped properly on its next fault */ 218 /* The page will get remapped properly on its next fault */
140 kvm_unmap_hva(kvm, hva); 219 do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
141} 220}
142 221
143/*****************************************/ 222/*****************************************/
@@ -159,7 +238,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
159 vcpu->arch.shadow_msr = smsr; 238 vcpu->arch.shadow_msr = smsr;
160} 239}
161 240
162void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 241static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
163{ 242{
164 ulong old_msr = vcpu->arch.shared->msr; 243 ulong old_msr = vcpu->arch.shared->msr;
165 244
@@ -219,7 +298,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
219 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 298 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
220} 299}
221 300
222void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 301void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
223{ 302{
224 u32 host_pvr; 303 u32 host_pvr;
225 304
@@ -256,6 +335,23 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
256 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) 335 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
257 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); 336 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
258 337
338 /*
339 * If they're asking for POWER6 or later, set the flag
340 * indicating that we can do multiple large page sizes
341 * and 1TB segments.
342 * Also set the flag that indicates that tlbie has the large
343 * page bit in the RB operand instead of the instruction.
344 */
345 switch (PVR_VER(pvr)) {
346 case PVR_POWER6:
347 case PVR_POWER7:
348 case PVR_POWER7p:
349 case PVR_POWER8:
350 vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
351 BOOK3S_HFLAG_NEW_TLBIE;
352 break;
353 }
354
259#ifdef CONFIG_PPC_BOOK3S_32 355#ifdef CONFIG_PPC_BOOK3S_32
260 /* 32 bit Book3S always has 32 byte dcbz */ 356 /* 32 bit Book3S always has 32 byte dcbz */
261 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 357 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
@@ -334,6 +430,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
334 ulong eaddr, int vec) 430 ulong eaddr, int vec)
335{ 431{
336 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); 432 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
433 bool iswrite = false;
337 int r = RESUME_GUEST; 434 int r = RESUME_GUEST;
338 int relocated; 435 int relocated;
339 int page_found = 0; 436 int page_found = 0;
@@ -344,10 +441,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
344 u64 vsid; 441 u64 vsid;
345 442
346 relocated = data ? dr : ir; 443 relocated = data ? dr : ir;
444 if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
445 iswrite = true;
347 446
348 /* Resolve real address if translation turned on */ 447 /* Resolve real address if translation turned on */
349 if (relocated) { 448 if (relocated) {
350 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); 449 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
351 } else { 450 } else {
352 pte.may_execute = true; 451 pte.may_execute = true;
353 pte.may_read = true; 452 pte.may_read = true;
@@ -355,6 +454,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
355 pte.raddr = eaddr & KVM_PAM; 454 pte.raddr = eaddr & KVM_PAM;
356 pte.eaddr = eaddr; 455 pte.eaddr = eaddr;
357 pte.vpage = eaddr >> 12; 456 pte.vpage = eaddr >> 12;
457 pte.page_size = MMU_PAGE_64K;
358 } 458 }
359 459
360 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 460 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
@@ -388,22 +488,18 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
388 488
389 if (page_found == -ENOENT) { 489 if (page_found == -ENOENT) {
390 /* Page not found in guest PTE entries */ 490 /* Page not found in guest PTE entries */
391 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
392 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 491 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
393 vcpu->arch.shared->dsisr = svcpu->fault_dsisr; 492 vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr;
394 vcpu->arch.shared->msr |= 493 vcpu->arch.shared->msr |=
395 (svcpu->shadow_srr1 & 0x00000000f8000000ULL); 494 vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
396 svcpu_put(svcpu);
397 kvmppc_book3s_queue_irqprio(vcpu, vec); 495 kvmppc_book3s_queue_irqprio(vcpu, vec);
398 } else if (page_found == -EPERM) { 496 } else if (page_found == -EPERM) {
399 /* Storage protection */ 497 /* Storage protection */
400 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
401 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 498 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
402 vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE; 499 vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
403 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; 500 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
404 vcpu->arch.shared->msr |= 501 vcpu->arch.shared->msr |=
405 svcpu->shadow_srr1 & 0x00000000f8000000ULL; 502 vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
406 svcpu_put(svcpu);
407 kvmppc_book3s_queue_irqprio(vcpu, vec); 503 kvmppc_book3s_queue_irqprio(vcpu, vec);
408 } else if (page_found == -EINVAL) { 504 } else if (page_found == -EINVAL) {
409 /* Page not found in guest SLB */ 505 /* Page not found in guest SLB */
@@ -411,12 +507,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
411 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 507 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
412 } else if (!is_mmio && 508 } else if (!is_mmio &&
413 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { 509 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
510 if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
511 /*
512 * There is already a host HPTE there, presumably
513 * a read-only one for a page the guest thinks
514 * is writable, so get rid of it first.
515 */
516 kvmppc_mmu_unmap_page(vcpu, &pte);
517 }
414 /* The guest's PTE is not mapped yet. Map on the host */ 518 /* The guest's PTE is not mapped yet. Map on the host */
415 kvmppc_mmu_map_page(vcpu, &pte); 519 kvmppc_mmu_map_page(vcpu, &pte, iswrite);
416 if (data) 520 if (data)
417 vcpu->stat.sp_storage++; 521 vcpu->stat.sp_storage++;
418 else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 522 else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
419 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) 523 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
420 kvmppc_patch_dcbz(vcpu, &pte); 524 kvmppc_patch_dcbz(vcpu, &pte);
421 } else { 525 } else {
422 /* MMIO */ 526 /* MMIO */
@@ -619,13 +723,15 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
619 723
620 if (lost_ext & MSR_FP) 724 if (lost_ext & MSR_FP)
621 kvmppc_load_up_fpu(); 725 kvmppc_load_up_fpu();
726#ifdef CONFIG_ALTIVEC
622 if (lost_ext & MSR_VEC) 727 if (lost_ext & MSR_VEC)
623 kvmppc_load_up_altivec(); 728 kvmppc_load_up_altivec();
729#endif
624 current->thread.regs->msr |= lost_ext; 730 current->thread.regs->msr |= lost_ext;
625} 731}
626 732
627int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 733int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
628 unsigned int exit_nr) 734 unsigned int exit_nr)
629{ 735{
630 int r = RESUME_HOST; 736 int r = RESUME_HOST;
631 int s; 737 int s;
@@ -643,25 +749,32 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
643 switch (exit_nr) { 749 switch (exit_nr) {
644 case BOOK3S_INTERRUPT_INST_STORAGE: 750 case BOOK3S_INTERRUPT_INST_STORAGE:
645 { 751 {
646 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 752 ulong shadow_srr1 = vcpu->arch.shadow_srr1;
647 ulong shadow_srr1 = svcpu->shadow_srr1;
648 vcpu->stat.pf_instruc++; 753 vcpu->stat.pf_instruc++;
649 754
650#ifdef CONFIG_PPC_BOOK3S_32 755#ifdef CONFIG_PPC_BOOK3S_32
651 /* We set segments as unused segments when invalidating them. So 756 /* We set segments as unused segments when invalidating them. So
652 * treat the respective fault as segment fault. */ 757 * treat the respective fault as segment fault. */
653 if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) { 758 {
654 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 759 struct kvmppc_book3s_shadow_vcpu *svcpu;
655 r = RESUME_GUEST; 760 u32 sr;
761
762 svcpu = svcpu_get(vcpu);
763 sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
656 svcpu_put(svcpu); 764 svcpu_put(svcpu);
657 break; 765 if (sr == SR_INVALID) {
766 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
767 r = RESUME_GUEST;
768 break;
769 }
658 } 770 }
659#endif 771#endif
660 svcpu_put(svcpu);
661 772
662 /* only care about PTEG not found errors, but leave NX alone */ 773 /* only care about PTEG not found errors, but leave NX alone */
663 if (shadow_srr1 & 0x40000000) { 774 if (shadow_srr1 & 0x40000000) {
775 int idx = srcu_read_lock(&vcpu->kvm->srcu);
664 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); 776 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
777 srcu_read_unlock(&vcpu->kvm->srcu, idx);
665 vcpu->stat.sp_instruc++; 778 vcpu->stat.sp_instruc++;
666 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 779 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
667 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 780 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
@@ -682,25 +795,36 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
682 case BOOK3S_INTERRUPT_DATA_STORAGE: 795 case BOOK3S_INTERRUPT_DATA_STORAGE:
683 { 796 {
684 ulong dar = kvmppc_get_fault_dar(vcpu); 797 ulong dar = kvmppc_get_fault_dar(vcpu);
685 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 798 u32 fault_dsisr = vcpu->arch.fault_dsisr;
686 u32 fault_dsisr = svcpu->fault_dsisr;
687 vcpu->stat.pf_storage++; 799 vcpu->stat.pf_storage++;
688 800
689#ifdef CONFIG_PPC_BOOK3S_32 801#ifdef CONFIG_PPC_BOOK3S_32
690 /* We set segments as unused segments when invalidating them. So 802 /* We set segments as unused segments when invalidating them. So
691 * treat the respective fault as segment fault. */ 803 * treat the respective fault as segment fault. */
692 if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) { 804 {
693 kvmppc_mmu_map_segment(vcpu, dar); 805 struct kvmppc_book3s_shadow_vcpu *svcpu;
694 r = RESUME_GUEST; 806 u32 sr;
807
808 svcpu = svcpu_get(vcpu);
809 sr = svcpu->sr[dar >> SID_SHIFT];
695 svcpu_put(svcpu); 810 svcpu_put(svcpu);
696 break; 811 if (sr == SR_INVALID) {
812 kvmppc_mmu_map_segment(vcpu, dar);
813 r = RESUME_GUEST;
814 break;
815 }
697 } 816 }
698#endif 817#endif
699 svcpu_put(svcpu);
700 818
701 /* The only case we need to handle is missing shadow PTEs */ 819 /*
702 if (fault_dsisr & DSISR_NOHPTE) { 820 * We need to handle missing shadow PTEs, and
821 * protection faults due to us mapping a page read-only
822 * when the guest thinks it is writable.
823 */
824 if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
825 int idx = srcu_read_lock(&vcpu->kvm->srcu);
703 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 826 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
827 srcu_read_unlock(&vcpu->kvm->srcu, idx);
704 } else { 828 } else {
705 vcpu->arch.shared->dar = dar; 829 vcpu->arch.shared->dar = dar;
706 vcpu->arch.shared->dsisr = fault_dsisr; 830 vcpu->arch.shared->dsisr = fault_dsisr;
@@ -743,13 +867,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
743 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 867 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
744 { 868 {
745 enum emulation_result er; 869 enum emulation_result er;
746 struct kvmppc_book3s_shadow_vcpu *svcpu;
747 ulong flags; 870 ulong flags;
748 871
749program_interrupt: 872program_interrupt:
750 svcpu = svcpu_get(vcpu); 873 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
751 flags = svcpu->shadow_srr1 & 0x1f0000ull;
752 svcpu_put(svcpu);
753 874
754 if (vcpu->arch.shared->msr & MSR_PR) { 875 if (vcpu->arch.shared->msr & MSR_PR) {
755#ifdef EXIT_DEBUG 876#ifdef EXIT_DEBUG
@@ -798,7 +919,7 @@ program_interrupt:
798 ulong cmd = kvmppc_get_gpr(vcpu, 3); 919 ulong cmd = kvmppc_get_gpr(vcpu, 3);
799 int i; 920 int i;
800 921
801#ifdef CONFIG_KVM_BOOK3S_64_PR 922#ifdef CONFIG_PPC_BOOK3S_64
802 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { 923 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
803 r = RESUME_GUEST; 924 r = RESUME_GUEST;
804 break; 925 break;
@@ -881,9 +1002,7 @@ program_interrupt:
881 break; 1002 break;
882 default: 1003 default:
883 { 1004 {
884 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 1005 ulong shadow_srr1 = vcpu->arch.shadow_srr1;
885 ulong shadow_srr1 = svcpu->shadow_srr1;
886 svcpu_put(svcpu);
887 /* Ugh - bork here! What did we get? */ 1006 /* Ugh - bork here! What did we get? */
888 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 1007 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
889 exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); 1008 exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
@@ -920,8 +1039,8 @@ program_interrupt:
920 return r; 1039 return r;
921} 1040}
922 1041
923int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 1042static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
924 struct kvm_sregs *sregs) 1043 struct kvm_sregs *sregs)
925{ 1044{
926 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1045 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
927 int i; 1046 int i;
@@ -947,13 +1066,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
947 return 0; 1066 return 0;
948} 1067}
949 1068
950int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 1069static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
951 struct kvm_sregs *sregs) 1070 struct kvm_sregs *sregs)
952{ 1071{
953 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1072 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
954 int i; 1073 int i;
955 1074
956 kvmppc_set_pvr(vcpu, sregs->pvr); 1075 kvmppc_set_pvr_pr(vcpu, sregs->pvr);
957 1076
958 vcpu3s->sdr1 = sregs->u.s.sdr1; 1077 vcpu3s->sdr1 = sregs->u.s.sdr1;
959 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 1078 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
@@ -983,7 +1102,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
983 return 0; 1102 return 0;
984} 1103}
985 1104
986int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 1105static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1106 union kvmppc_one_reg *val)
987{ 1107{
988 int r = 0; 1108 int r = 0;
989 1109
@@ -1012,7 +1132,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
1012 return r; 1132 return r;
1013} 1133}
1014 1134
1015int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 1135static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1136 union kvmppc_one_reg *val)
1016{ 1137{
1017 int r = 0; 1138 int r = 0;
1018 1139
@@ -1042,28 +1163,30 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
1042 return r; 1163 return r;
1043} 1164}
1044 1165
1045int kvmppc_core_check_processor_compat(void) 1166static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
1046{ 1167 unsigned int id)
1047 return 0;
1048}
1049
1050struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1051{ 1168{
1052 struct kvmppc_vcpu_book3s *vcpu_book3s; 1169 struct kvmppc_vcpu_book3s *vcpu_book3s;
1053 struct kvm_vcpu *vcpu; 1170 struct kvm_vcpu *vcpu;
1054 int err = -ENOMEM; 1171 int err = -ENOMEM;
1055 unsigned long p; 1172 unsigned long p;
1056 1173
1057 vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); 1174 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1058 if (!vcpu_book3s) 1175 if (!vcpu)
1059 goto out; 1176 goto out;
1060 1177
1061 vcpu_book3s->shadow_vcpu = 1178 vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
1062 kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); 1179 if (!vcpu_book3s)
1063 if (!vcpu_book3s->shadow_vcpu)
1064 goto free_vcpu; 1180 goto free_vcpu;
1181 vcpu->arch.book3s = vcpu_book3s;
1182
1183#ifdef CONFIG_KVM_BOOK3S_32
1184 vcpu->arch.shadow_vcpu =
1185 kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
1186 if (!vcpu->arch.shadow_vcpu)
1187 goto free_vcpu3s;
1188#endif
1065 1189
1066 vcpu = &vcpu_book3s->vcpu;
1067 err = kvm_vcpu_init(vcpu, kvm, id); 1190 err = kvm_vcpu_init(vcpu, kvm, id);
1068 if (err) 1191 if (err)
1069 goto free_shadow_vcpu; 1192 goto free_shadow_vcpu;
@@ -1076,13 +1199,19 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1076 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); 1199 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
1077 1200
1078#ifdef CONFIG_PPC_BOOK3S_64 1201#ifdef CONFIG_PPC_BOOK3S_64
1079 /* default to book3s_64 (970fx) */ 1202 /*
1203 * Default to the same as the host if we're on sufficiently
1204 * recent machine that we have 1TB segments;
1205 * otherwise default to PPC970FX.
1206 */
1080 vcpu->arch.pvr = 0x3C0301; 1207 vcpu->arch.pvr = 0x3C0301;
1208 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1209 vcpu->arch.pvr = mfspr(SPRN_PVR);
1081#else 1210#else
1082 /* default to book3s_32 (750) */ 1211 /* default to book3s_32 (750) */
1083 vcpu->arch.pvr = 0x84202; 1212 vcpu->arch.pvr = 0x84202;
1084#endif 1213#endif
1085 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 1214 kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
1086 vcpu->arch.slb_nr = 64; 1215 vcpu->arch.slb_nr = 64;
1087 1216
1088 vcpu->arch.shadow_msr = MSR_USER64; 1217 vcpu->arch.shadow_msr = MSR_USER64;
@@ -1096,24 +1225,31 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1096uninit_vcpu: 1225uninit_vcpu:
1097 kvm_vcpu_uninit(vcpu); 1226 kvm_vcpu_uninit(vcpu);
1098free_shadow_vcpu: 1227free_shadow_vcpu:
1099 kfree(vcpu_book3s->shadow_vcpu); 1228#ifdef CONFIG_KVM_BOOK3S_32
1100free_vcpu: 1229 kfree(vcpu->arch.shadow_vcpu);
1230free_vcpu3s:
1231#endif
1101 vfree(vcpu_book3s); 1232 vfree(vcpu_book3s);
1233free_vcpu:
1234 kmem_cache_free(kvm_vcpu_cache, vcpu);
1102out: 1235out:
1103 return ERR_PTR(err); 1236 return ERR_PTR(err);
1104} 1237}
1105 1238
1106void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 1239static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
1107{ 1240{
1108 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 1241 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
1109 1242
1110 free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); 1243 free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
1111 kvm_vcpu_uninit(vcpu); 1244 kvm_vcpu_uninit(vcpu);
1112 kfree(vcpu_book3s->shadow_vcpu); 1245#ifdef CONFIG_KVM_BOOK3S_32
1246 kfree(vcpu->arch.shadow_vcpu);
1247#endif
1113 vfree(vcpu_book3s); 1248 vfree(vcpu_book3s);
1249 kmem_cache_free(kvm_vcpu_cache, vcpu);
1114} 1250}
1115 1251
1116int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1252static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1117{ 1253{
1118 int ret; 1254 int ret;
1119 struct thread_fp_state fp; 1255 struct thread_fp_state fp;
@@ -1216,8 +1352,8 @@ out:
1216/* 1352/*
1217 * Get (and clear) the dirty memory log for a memory slot. 1353 * Get (and clear) the dirty memory log for a memory slot.
1218 */ 1354 */
1219int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 1355static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
1220 struct kvm_dirty_log *log) 1356 struct kvm_dirty_log *log)
1221{ 1357{
1222 struct kvm_memory_slot *memslot; 1358 struct kvm_memory_slot *memslot;
1223 struct kvm_vcpu *vcpu; 1359 struct kvm_vcpu *vcpu;
@@ -1252,67 +1388,100 @@ out:
1252 return r; 1388 return r;
1253} 1389}
1254 1390
1255#ifdef CONFIG_PPC64 1391static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
1256int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1392 struct kvm_memory_slot *memslot)
1257{ 1393{
1258 info->flags = KVM_PPC_1T_SEGMENTS; 1394 return;
1259 1395}
1260 /* SLB is always 64 entries */
1261 info->slb_size = 64;
1262
1263 /* Standard 4k base page size segment */
1264 info->sps[0].page_shift = 12;
1265 info->sps[0].slb_enc = 0;
1266 info->sps[0].enc[0].page_shift = 12;
1267 info->sps[0].enc[0].pte_enc = 0;
1268
1269 /* Standard 16M large page size segment */
1270 info->sps[1].page_shift = 24;
1271 info->sps[1].slb_enc = SLB_VSID_L;
1272 info->sps[1].enc[0].page_shift = 24;
1273 info->sps[1].enc[0].pte_enc = 0;
1274 1396
1397static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
1398 struct kvm_memory_slot *memslot,
1399 struct kvm_userspace_memory_region *mem)
1400{
1275 return 0; 1401 return 0;
1276} 1402}
1277#endif /* CONFIG_PPC64 */
1278 1403
1279void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1404static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
1280 struct kvm_memory_slot *dont) 1405 struct kvm_userspace_memory_region *mem,
1406 const struct kvm_memory_slot *old)
1281{ 1407{
1408 return;
1282} 1409}
1283 1410
1284int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1411static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
1285 unsigned long npages) 1412 struct kvm_memory_slot *dont)
1286{ 1413{
1287 return 0; 1414 return;
1288} 1415}
1289 1416
1290int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1417static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
1291 struct kvm_memory_slot *memslot, 1418 unsigned long npages)
1292 struct kvm_userspace_memory_region *mem)
1293{ 1419{
1294 return 0; 1420 return 0;
1295} 1421}
1296 1422
1297void kvmppc_core_commit_memory_region(struct kvm *kvm, 1423
1298 struct kvm_userspace_memory_region *mem, 1424#ifdef CONFIG_PPC64
1299 const struct kvm_memory_slot *old) 1425static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
1426 struct kvm_ppc_smmu_info *info)
1300{ 1427{
1301} 1428 long int i;
1429 struct kvm_vcpu *vcpu;
1430
1431 info->flags = 0;
1432
1433 /* SLB is always 64 entries */
1434 info->slb_size = 64;
1435
1436 /* Standard 4k base page size segment */
1437 info->sps[0].page_shift = 12;
1438 info->sps[0].slb_enc = 0;
1439 info->sps[0].enc[0].page_shift = 12;
1440 info->sps[0].enc[0].pte_enc = 0;
1441
1442 /*
1443 * 64k large page size.
1444 * We only want to put this in if the CPUs we're emulating
1445 * support it, but unfortunately we don't have a vcpu easily
1446 * to hand here to test. Just pick the first vcpu, and if
1447 * that doesn't exist yet, report the minimum capability,
1448 * i.e., no 64k pages.
1449 * 1T segment support goes along with 64k pages.
1450 */
1451 i = 1;
1452 vcpu = kvm_get_vcpu(kvm, 0);
1453 if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
1454 info->flags = KVM_PPC_1T_SEGMENTS;
1455 info->sps[i].page_shift = 16;
1456 info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
1457 info->sps[i].enc[0].page_shift = 16;
1458 info->sps[i].enc[0].pte_enc = 1;
1459 ++i;
1460 }
1461
1462 /* Standard 16M large page size segment */
1463 info->sps[i].page_shift = 24;
1464 info->sps[i].slb_enc = SLB_VSID_L;
1465 info->sps[i].enc[0].page_shift = 24;
1466 info->sps[i].enc[0].pte_enc = 0;
1302 1467
1303void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 1468 return 0;
1469}
1470#else
1471static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
1472 struct kvm_ppc_smmu_info *info)
1304{ 1473{
1474 /* We should not get called */
1475 BUG();
1305} 1476}
1477#endif /* CONFIG_PPC64 */
1306 1478
1307static unsigned int kvm_global_user_count = 0; 1479static unsigned int kvm_global_user_count = 0;
1308static DEFINE_SPINLOCK(kvm_global_user_count_lock); 1480static DEFINE_SPINLOCK(kvm_global_user_count_lock);
1309 1481
1310int kvmppc_core_init_vm(struct kvm *kvm) 1482static int kvmppc_core_init_vm_pr(struct kvm *kvm)
1311{ 1483{
1312#ifdef CONFIG_PPC64 1484 mutex_init(&kvm->arch.hpt_mutex);
1313 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1314 INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
1315#endif
1316 1485
1317 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 1486 if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
1318 spin_lock(&kvm_global_user_count_lock); 1487 spin_lock(&kvm_global_user_count_lock);
@@ -1323,7 +1492,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1323 return 0; 1492 return 0;
1324} 1493}
1325 1494
1326void kvmppc_core_destroy_vm(struct kvm *kvm) 1495static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
1327{ 1496{
1328#ifdef CONFIG_PPC64 1497#ifdef CONFIG_PPC64
1329 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 1498 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
@@ -1338,26 +1507,81 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
1338 } 1507 }
1339} 1508}
1340 1509
1341static int kvmppc_book3s_init(void) 1510static int kvmppc_core_check_processor_compat_pr(void)
1342{ 1511{
1343 int r; 1512 /* we are always compatible */
1513 return 0;
1514}
1344 1515
1345 r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, 1516static long kvm_arch_vm_ioctl_pr(struct file *filp,
1346 THIS_MODULE); 1517 unsigned int ioctl, unsigned long arg)
1518{
1519 return -ENOTTY;
1520}
1347 1521
1348 if (r) 1522static struct kvmppc_ops kvm_ops_pr = {
1523 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
1524 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
1525 .get_one_reg = kvmppc_get_one_reg_pr,
1526 .set_one_reg = kvmppc_set_one_reg_pr,
1527 .vcpu_load = kvmppc_core_vcpu_load_pr,
1528 .vcpu_put = kvmppc_core_vcpu_put_pr,
1529 .set_msr = kvmppc_set_msr_pr,
1530 .vcpu_run = kvmppc_vcpu_run_pr,
1531 .vcpu_create = kvmppc_core_vcpu_create_pr,
1532 .vcpu_free = kvmppc_core_vcpu_free_pr,
1533 .check_requests = kvmppc_core_check_requests_pr,
1534 .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
1535 .flush_memslot = kvmppc_core_flush_memslot_pr,
1536 .prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
1537 .commit_memory_region = kvmppc_core_commit_memory_region_pr,
1538 .unmap_hva = kvm_unmap_hva_pr,
1539 .unmap_hva_range = kvm_unmap_hva_range_pr,
1540 .age_hva = kvm_age_hva_pr,
1541 .test_age_hva = kvm_test_age_hva_pr,
1542 .set_spte_hva = kvm_set_spte_hva_pr,
1543 .mmu_destroy = kvmppc_mmu_destroy_pr,
1544 .free_memslot = kvmppc_core_free_memslot_pr,
1545 .create_memslot = kvmppc_core_create_memslot_pr,
1546 .init_vm = kvmppc_core_init_vm_pr,
1547 .destroy_vm = kvmppc_core_destroy_vm_pr,
1548 .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
1549 .emulate_op = kvmppc_core_emulate_op_pr,
1550 .emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
1551 .emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
1552 .fast_vcpu_kick = kvm_vcpu_kick,
1553 .arch_vm_ioctl = kvm_arch_vm_ioctl_pr,
1554};
1555
1556
1557int kvmppc_book3s_init_pr(void)
1558{
1559 int r;
1560
1561 r = kvmppc_core_check_processor_compat_pr();
1562 if (r < 0)
1349 return r; 1563 return r;
1350 1564
1351 r = kvmppc_mmu_hpte_sysinit(); 1565 kvm_ops_pr.owner = THIS_MODULE;
1566 kvmppc_pr_ops = &kvm_ops_pr;
1352 1567
1568 r = kvmppc_mmu_hpte_sysinit();
1353 return r; 1569 return r;
1354} 1570}
1355 1571
1356static void kvmppc_book3s_exit(void) 1572void kvmppc_book3s_exit_pr(void)
1357{ 1573{
1574 kvmppc_pr_ops = NULL;
1358 kvmppc_mmu_hpte_sysexit(); 1575 kvmppc_mmu_hpte_sysexit();
1359 kvm_exit();
1360} 1576}
1361 1577
1362module_init(kvmppc_book3s_init); 1578/*
1363module_exit(kvmppc_book3s_exit); 1579 * We only support separate modules for book3s 64
1580 */
1581#ifdef CONFIG_PPC_BOOK3S_64
1582
1583module_init(kvmppc_book3s_init_pr);
1584module_exit(kvmppc_book3s_exit_pr);
1585
1586MODULE_LICENSE("GPL");
1587#endif
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index da0e0bc268bd..5efa97b993d8 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -21,6 +21,8 @@
21#include <asm/kvm_ppc.h> 21#include <asm/kvm_ppc.h>
22#include <asm/kvm_book3s.h> 22#include <asm/kvm_book3s.h>
23 23
24#define HPTE_SIZE 16 /* bytes per HPT entry */
25
24static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index) 26static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
25{ 27{
26 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 28 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
@@ -40,32 +42,41 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
40 long pte_index = kvmppc_get_gpr(vcpu, 5); 42 long pte_index = kvmppc_get_gpr(vcpu, 5);
41 unsigned long pteg[2 * 8]; 43 unsigned long pteg[2 * 8];
42 unsigned long pteg_addr, i, *hpte; 44 unsigned long pteg_addr, i, *hpte;
45 long int ret;
43 46
47 i = pte_index & 7;
44 pte_index &= ~7UL; 48 pte_index &= ~7UL;
45 pteg_addr = get_pteg_addr(vcpu, pte_index); 49 pteg_addr = get_pteg_addr(vcpu, pte_index);
46 50
51 mutex_lock(&vcpu->kvm->arch.hpt_mutex);
47 copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); 52 copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
48 hpte = pteg; 53 hpte = pteg;
49 54
55 ret = H_PTEG_FULL;
50 if (likely((flags & H_EXACT) == 0)) { 56 if (likely((flags & H_EXACT) == 0)) {
51 pte_index &= ~7UL;
52 for (i = 0; ; ++i) { 57 for (i = 0; ; ++i) {
53 if (i == 8) 58 if (i == 8)
54 return H_PTEG_FULL; 59 goto done;
55 if ((*hpte & HPTE_V_VALID) == 0) 60 if ((*hpte & HPTE_V_VALID) == 0)
56 break; 61 break;
57 hpte += 2; 62 hpte += 2;
58 } 63 }
59 } else { 64 } else {
60 i = kvmppc_get_gpr(vcpu, 5) & 7UL;
61 hpte += i * 2; 65 hpte += i * 2;
66 if (*hpte & HPTE_V_VALID)
67 goto done;
62 } 68 }
63 69
64 hpte[0] = kvmppc_get_gpr(vcpu, 6); 70 hpte[0] = kvmppc_get_gpr(vcpu, 6);
65 hpte[1] = kvmppc_get_gpr(vcpu, 7); 71 hpte[1] = kvmppc_get_gpr(vcpu, 7);
66 copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg)); 72 pteg_addr += i * HPTE_SIZE;
67 kvmppc_set_gpr(vcpu, 3, H_SUCCESS); 73 copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
68 kvmppc_set_gpr(vcpu, 4, pte_index | i); 74 kvmppc_set_gpr(vcpu, 4, pte_index | i);
75 ret = H_SUCCESS;
76
77 done:
78 mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
79 kvmppc_set_gpr(vcpu, 3, ret);
69 80
70 return EMULATE_DONE; 81 return EMULATE_DONE;
71} 82}
@@ -77,26 +88,31 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
77 unsigned long avpn = kvmppc_get_gpr(vcpu, 6); 88 unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
78 unsigned long v = 0, pteg, rb; 89 unsigned long v = 0, pteg, rb;
79 unsigned long pte[2]; 90 unsigned long pte[2];
91 long int ret;
80 92
81 pteg = get_pteg_addr(vcpu, pte_index); 93 pteg = get_pteg_addr(vcpu, pte_index);
94 mutex_lock(&vcpu->kvm->arch.hpt_mutex);
82 copy_from_user(pte, (void __user *)pteg, sizeof(pte)); 95 copy_from_user(pte, (void __user *)pteg, sizeof(pte));
83 96
97 ret = H_NOT_FOUND;
84 if ((pte[0] & HPTE_V_VALID) == 0 || 98 if ((pte[0] & HPTE_V_VALID) == 0 ||
85 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || 99 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
86 ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) { 100 ((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
87 kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); 101 goto done;
88 return EMULATE_DONE;
89 }
90 102
91 copy_to_user((void __user *)pteg, &v, sizeof(v)); 103 copy_to_user((void __user *)pteg, &v, sizeof(v));
92 104
93 rb = compute_tlbie_rb(pte[0], pte[1], pte_index); 105 rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
94 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); 106 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
95 107
96 kvmppc_set_gpr(vcpu, 3, H_SUCCESS); 108 ret = H_SUCCESS;
97 kvmppc_set_gpr(vcpu, 4, pte[0]); 109 kvmppc_set_gpr(vcpu, 4, pte[0]);
98 kvmppc_set_gpr(vcpu, 5, pte[1]); 110 kvmppc_set_gpr(vcpu, 5, pte[1]);
99 111
112 done:
113 mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
114 kvmppc_set_gpr(vcpu, 3, ret);
115
100 return EMULATE_DONE; 116 return EMULATE_DONE;
101} 117}
102 118
@@ -124,6 +140,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
124 int paramnr = 4; 140 int paramnr = 4;
125 int ret = H_SUCCESS; 141 int ret = H_SUCCESS;
126 142
143 mutex_lock(&vcpu->kvm->arch.hpt_mutex);
127 for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { 144 for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
128 unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i)); 145 unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
129 unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1); 146 unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
@@ -172,6 +189,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
172 } 189 }
173 kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); 190 kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
174 } 191 }
192 mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
175 kvmppc_set_gpr(vcpu, 3, ret); 193 kvmppc_set_gpr(vcpu, 3, ret);
176 194
177 return EMULATE_DONE; 195 return EMULATE_DONE;
@@ -184,15 +202,16 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
184 unsigned long avpn = kvmppc_get_gpr(vcpu, 6); 202 unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
185 unsigned long rb, pteg, r, v; 203 unsigned long rb, pteg, r, v;
186 unsigned long pte[2]; 204 unsigned long pte[2];
205 long int ret;
187 206
188 pteg = get_pteg_addr(vcpu, pte_index); 207 pteg = get_pteg_addr(vcpu, pte_index);
208 mutex_lock(&vcpu->kvm->arch.hpt_mutex);
189 copy_from_user(pte, (void __user *)pteg, sizeof(pte)); 209 copy_from_user(pte, (void __user *)pteg, sizeof(pte));
190 210
211 ret = H_NOT_FOUND;
191 if ((pte[0] & HPTE_V_VALID) == 0 || 212 if ((pte[0] & HPTE_V_VALID) == 0 ||
192 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) { 213 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn))
193 kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); 214 goto done;
194 return EMULATE_DONE;
195 }
196 215
197 v = pte[0]; 216 v = pte[0];
198 r = pte[1]; 217 r = pte[1];
@@ -207,8 +226,11 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
207 rb = compute_tlbie_rb(v, r, pte_index); 226 rb = compute_tlbie_rb(v, r, pte_index);
208 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); 227 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
209 copy_to_user((void __user *)pteg, pte, sizeof(pte)); 228 copy_to_user((void __user *)pteg, pte, sizeof(pte));
229 ret = H_SUCCESS;
210 230
211 kvmppc_set_gpr(vcpu, 3, H_SUCCESS); 231 done:
232 mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
233 kvmppc_set_gpr(vcpu, 3, ret);
212 234
213 return EMULATE_DONE; 235 return EMULATE_DONE;
214} 236}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 8f7633e3afb8..a38c4c9edab8 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -38,32 +38,6 @@
38 38
39#define FUNC(name) GLUE(.,name) 39#define FUNC(name) GLUE(.,name)
40 40
41 .globl kvmppc_skip_interrupt
42kvmppc_skip_interrupt:
43 /*
44 * Here all GPRs are unchanged from when the interrupt happened
45 * except for r13, which is saved in SPRG_SCRATCH0.
46 */
47 mfspr r13, SPRN_SRR0
48 addi r13, r13, 4
49 mtspr SPRN_SRR0, r13
50 GET_SCRATCH0(r13)
51 rfid
52 b .
53
54 .globl kvmppc_skip_Hinterrupt
55kvmppc_skip_Hinterrupt:
56 /*
57 * Here all GPRs are unchanged from when the interrupt happened
58 * except for r13, which is saved in SPRG_SCRATCH0.
59 */
60 mfspr r13, SPRN_HSRR0
61 addi r13, r13, 4
62 mtspr SPRN_HSRR0, r13
63 GET_SCRATCH0(r13)
64 hrfid
65 b .
66
67#elif defined(CONFIG_PPC_BOOK3S_32) 41#elif defined(CONFIG_PPC_BOOK3S_32)
68 42
69#define FUNC(name) name 43#define FUNC(name) name
@@ -179,11 +153,15 @@ _GLOBAL(kvmppc_entry_trampoline)
179 153
180 li r6, MSR_IR | MSR_DR 154 li r6, MSR_IR | MSR_DR
181 andc r6, r5, r6 /* Clear DR and IR in MSR value */ 155 andc r6, r5, r6 /* Clear DR and IR in MSR value */
156#ifdef CONFIG_PPC_BOOK3S_32
182 /* 157 /*
183 * Set EE in HOST_MSR so that it's enabled when we get into our 158 * Set EE in HOST_MSR so that it's enabled when we get into our
184 * C exit handler function 159 * C exit handler function. On 64-bit we delay enabling
160 * interrupts until we have finished transferring stuff
161 * to or from the PACA.
185 */ 162 */
186 ori r5, r5, MSR_EE 163 ori r5, r5, MSR_EE
164#endif
187 mtsrr0 r7 165 mtsrr0 r7
188 mtsrr1 r6 166 mtsrr1 r6
189 RFI 167 RFI
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 3219ba895246..cf95cdef73c9 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -260,6 +260,7 @@ fail:
260 */ 260 */
261 return rc; 261 return rc;
262} 262}
263EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall);
263 264
264void kvmppc_rtas_tokens_free(struct kvm *kvm) 265void kvmppc_rtas_tokens_free(struct kvm *kvm)
265{ 266{
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1abe4788191a..bc50c97751d3 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -161,8 +161,8 @@ kvmppc_handler_trampoline_enter_end:
161.global kvmppc_handler_trampoline_exit 161.global kvmppc_handler_trampoline_exit
162kvmppc_handler_trampoline_exit: 162kvmppc_handler_trampoline_exit:
163 163
164.global kvmppc_interrupt 164.global kvmppc_interrupt_pr
165kvmppc_interrupt: 165kvmppc_interrupt_pr:
166 166
167 /* Register usage at this point: 167 /* Register usage at this point:
168 * 168 *
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index a3a5cb8ee7ea..02a17dcf1610 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
818 } 818 }
819 819
820 /* Check for real mode returning too hard */ 820 /* Check for real mode returning too hard */
821 if (xics->real_mode) 821 if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm))
822 return kvmppc_xics_rm_complete(vcpu, req); 822 return kvmppc_xics_rm_complete(vcpu, req);
823 823
824 switch (req) { 824 switch (req) {
@@ -840,6 +840,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
840 840
841 return rc; 841 return rc;
842} 842}
843EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
843 844
844 845
845/* -- Initialisation code etc. -- */ 846/* -- Initialisation code etc. -- */
@@ -1250,13 +1251,13 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
1250 1251
1251 xics_debugfs_init(xics); 1252 xics_debugfs_init(xics);
1252 1253
1253#ifdef CONFIG_KVM_BOOK3S_64_HV 1254#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1254 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 1255 if (cpu_has_feature(CPU_FTR_ARCH_206)) {
1255 /* Enable real mode support */ 1256 /* Enable real mode support */
1256 xics->real_mode = ENABLE_REALMODE; 1257 xics->real_mode = ENABLE_REALMODE;
1257 xics->real_mode_dbg = DEBUG_REALMODE; 1258 xics->real_mode_dbg = DEBUG_REALMODE;
1258 } 1259 }
1259#endif /* CONFIG_KVM_BOOK3S_64_HV */ 1260#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1260 1261
1261 return 0; 1262 return 0;
1262} 1263}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 5133199f6cb7..53e65a210b9a 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -40,7 +40,9 @@
40 40
41#include "timing.h" 41#include "timing.h"
42#include "booke.h" 42#include "booke.h"
43#include "trace.h" 43
44#define CREATE_TRACE_POINTS
45#include "trace_booke.h"
44 46
45unsigned long kvmppc_booke_handlers; 47unsigned long kvmppc_booke_handlers;
46 48
@@ -133,6 +135,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
133#endif 135#endif
134} 136}
135 137
138static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
139{
140 /* Synchronize guest's desire to get debug interrupts into shadow MSR */
141#ifndef CONFIG_KVM_BOOKE_HV
142 vcpu->arch.shadow_msr &= ~MSR_DE;
143 vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
144#endif
145
146 /* Force enable debug interrupts when user space wants to debug */
147 if (vcpu->guest_debug) {
148#ifdef CONFIG_KVM_BOOKE_HV
149 /*
150 * Since there is no shadow MSR, sync MSR_DE into the guest
151 * visible MSR.
152 */
153 vcpu->arch.shared->msr |= MSR_DE;
154#else
155 vcpu->arch.shadow_msr |= MSR_DE;
156 vcpu->arch.shared->msr &= ~MSR_DE;
157#endif
158 }
159}
160
136/* 161/*
137 * Helper function for "full" MSR writes. No need to call this if only 162 * Helper function for "full" MSR writes. No need to call this if only
138 * EE/CE/ME/DE/RI are changing. 163 * EE/CE/ME/DE/RI are changing.
@@ -150,6 +175,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
150 kvmppc_mmu_msr_notify(vcpu, old_msr); 175 kvmppc_mmu_msr_notify(vcpu, old_msr);
151 kvmppc_vcpu_sync_spe(vcpu); 176 kvmppc_vcpu_sync_spe(vcpu);
152 kvmppc_vcpu_sync_fpu(vcpu); 177 kvmppc_vcpu_sync_fpu(vcpu);
178 kvmppc_vcpu_sync_debug(vcpu);
153} 179}
154 180
155static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, 181static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@ -655,6 +681,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
655int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 681int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
656{ 682{
657 int ret, s; 683 int ret, s;
684 struct thread_struct thread;
658#ifdef CONFIG_PPC_FPU 685#ifdef CONFIG_PPC_FPU
659 struct thread_fp_state fp; 686 struct thread_fp_state fp;
660 int fpexc_mode; 687 int fpexc_mode;
@@ -695,6 +722,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
695 kvmppc_load_guest_fp(vcpu); 722 kvmppc_load_guest_fp(vcpu);
696#endif 723#endif
697 724
725 /* Switch to guest debug context */
726 thread.debug = vcpu->arch.shadow_dbg_reg;
727 switch_booke_debug_regs(&thread);
728 thread.debug = current->thread.debug;
729 current->thread.debug = vcpu->arch.shadow_dbg_reg;
730
698 kvmppc_fix_ee_before_entry(); 731 kvmppc_fix_ee_before_entry();
699 732
700 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 733 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -702,6 +735,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
702 /* No need for kvm_guest_exit. It's done in handle_exit. 735 /* No need for kvm_guest_exit. It's done in handle_exit.
703 We also get here with interrupts enabled. */ 736 We also get here with interrupts enabled. */
704 737
738 /* Switch back to user space debug context */
739 switch_booke_debug_regs(&thread);
740 current->thread.debug = thread.debug;
741
705#ifdef CONFIG_PPC_FPU 742#ifdef CONFIG_PPC_FPU
706 kvmppc_save_guest_fp(vcpu); 743 kvmppc_save_guest_fp(vcpu);
707 744
@@ -757,6 +794,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
757 } 794 }
758} 795}
759 796
797static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
798{
799 struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
800 u32 dbsr = vcpu->arch.dbsr;
801
802 run->debug.arch.status = 0;
803 run->debug.arch.address = vcpu->arch.pc;
804
805 if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
806 run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
807 } else {
808 if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
809 run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
810 else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
811 run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
812 if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
813 run->debug.arch.address = dbg_reg->dac1;
814 else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
815 run->debug.arch.address = dbg_reg->dac2;
816 }
817
818 return RESUME_HOST;
819}
820
760static void kvmppc_fill_pt_regs(struct pt_regs *regs) 821static void kvmppc_fill_pt_regs(struct pt_regs *regs)
761{ 822{
762 ulong r1, ip, msr, lr; 823 ulong r1, ip, msr, lr;
@@ -817,6 +878,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
817 case BOOKE_INTERRUPT_CRITICAL: 878 case BOOKE_INTERRUPT_CRITICAL:
818 unknown_exception(&regs); 879 unknown_exception(&regs);
819 break; 880 break;
881 case BOOKE_INTERRUPT_DEBUG:
882 /* Save DBSR before preemption is enabled */
883 vcpu->arch.dbsr = mfspr(SPRN_DBSR);
884 kvmppc_clear_dbsr();
885 break;
820 } 886 }
821} 887}
822 888
@@ -1134,18 +1200,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
1134 } 1200 }
1135 1201
1136 case BOOKE_INTERRUPT_DEBUG: { 1202 case BOOKE_INTERRUPT_DEBUG: {
1137 u32 dbsr; 1203 r = kvmppc_handle_debug(run, vcpu);
1138 1204 if (r == RESUME_HOST)
1139 vcpu->arch.pc = mfspr(SPRN_CSRR0); 1205 run->exit_reason = KVM_EXIT_DEBUG;
1140
1141 /* clear IAC events in DBSR register */
1142 dbsr = mfspr(SPRN_DBSR);
1143 dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
1144 mtspr(SPRN_DBSR, dbsr);
1145
1146 run->exit_reason = KVM_EXIT_DEBUG;
1147 kvmppc_account_exit(vcpu, DEBUG_EXITS); 1206 kvmppc_account_exit(vcpu, DEBUG_EXITS);
1148 r = RESUME_HOST;
1149 break; 1207 break;
1150 } 1208 }
1151 1209
@@ -1196,7 +1254,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1196 kvmppc_set_msr(vcpu, 0); 1254 kvmppc_set_msr(vcpu, 0);
1197 1255
1198#ifndef CONFIG_KVM_BOOKE_HV 1256#ifndef CONFIG_KVM_BOOKE_HV
1199 vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; 1257 vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
1200 vcpu->arch.shadow_pid = 1; 1258 vcpu->arch.shadow_pid = 1;
1201 vcpu->arch.shared->msr = 0; 1259 vcpu->arch.shared->msr = 0;
1202#endif 1260#endif
@@ -1358,7 +1416,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu,
1358 return 0; 1416 return 0;
1359} 1417}
1360 1418
1361void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 1419int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
1362{ 1420{
1363 sregs->u.e.features |= KVM_SREGS_E_IVOR; 1421 sregs->u.e.features |= KVM_SREGS_E_IVOR;
1364 1422
@@ -1378,6 +1436,7 @@ void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
1378 sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; 1436 sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
1379 sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; 1437 sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
1380 sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 1438 sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
1439 return 0;
1381} 1440}
1382 1441
1383int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 1442int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
@@ -1412,8 +1471,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1412 1471
1413 get_sregs_base(vcpu, sregs); 1472 get_sregs_base(vcpu, sregs);
1414 get_sregs_arch206(vcpu, sregs); 1473 get_sregs_arch206(vcpu, sregs);
1415 kvmppc_core_get_sregs(vcpu, sregs); 1474 return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
1416 return 0;
1417} 1475}
1418 1476
1419int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 1477int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
@@ -1432,7 +1490,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1432 if (ret < 0) 1490 if (ret < 0)
1433 return ret; 1491 return ret;
1434 1492
1435 return kvmppc_core_set_sregs(vcpu, sregs); 1493 return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
1436} 1494}
1437 1495
1438int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1496int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
@@ -1440,7 +1498,6 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1440 int r = 0; 1498 int r = 0;
1441 union kvmppc_one_reg val; 1499 union kvmppc_one_reg val;
1442 int size; 1500 int size;
1443 long int i;
1444 1501
1445 size = one_reg_size(reg->id); 1502 size = one_reg_size(reg->id);
1446 if (size > sizeof(val)) 1503 if (size > sizeof(val))
@@ -1448,16 +1505,24 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1448 1505
1449 switch (reg->id) { 1506 switch (reg->id) {
1450 case KVM_REG_PPC_IAC1: 1507 case KVM_REG_PPC_IAC1:
1508 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
1509 break;
1451 case KVM_REG_PPC_IAC2: 1510 case KVM_REG_PPC_IAC2:
1511 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
1512 break;
1513#if CONFIG_PPC_ADV_DEBUG_IACS > 2
1452 case KVM_REG_PPC_IAC3: 1514 case KVM_REG_PPC_IAC3:
1515 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
1516 break;
1453 case KVM_REG_PPC_IAC4: 1517 case KVM_REG_PPC_IAC4:
1454 i = reg->id - KVM_REG_PPC_IAC1; 1518 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
1455 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
1456 break; 1519 break;
1520#endif
1457 case KVM_REG_PPC_DAC1: 1521 case KVM_REG_PPC_DAC1:
1522 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
1523 break;
1458 case KVM_REG_PPC_DAC2: 1524 case KVM_REG_PPC_DAC2:
1459 i = reg->id - KVM_REG_PPC_DAC1; 1525 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
1460 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
1461 break; 1526 break;
1462 case KVM_REG_PPC_EPR: { 1527 case KVM_REG_PPC_EPR: {
1463 u32 epr = get_guest_epr(vcpu); 1528 u32 epr = get_guest_epr(vcpu);
@@ -1476,10 +1541,13 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1476 val = get_reg_val(reg->id, vcpu->arch.tsr); 1541 val = get_reg_val(reg->id, vcpu->arch.tsr);
1477 break; 1542 break;
1478 case KVM_REG_PPC_DEBUG_INST: 1543 case KVM_REG_PPC_DEBUG_INST:
1479 val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV); 1544 val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
1545 break;
1546 case KVM_REG_PPC_VRSAVE:
1547 val = get_reg_val(reg->id, vcpu->arch.vrsave);
1480 break; 1548 break;
1481 default: 1549 default:
1482 r = kvmppc_get_one_reg(vcpu, reg->id, &val); 1550 r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
1483 break; 1551 break;
1484 } 1552 }
1485 1553
@@ -1497,7 +1565,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1497 int r = 0; 1565 int r = 0;
1498 union kvmppc_one_reg val; 1566 union kvmppc_one_reg val;
1499 int size; 1567 int size;
1500 long int i;
1501 1568
1502 size = one_reg_size(reg->id); 1569 size = one_reg_size(reg->id);
1503 if (size > sizeof(val)) 1570 if (size > sizeof(val))
@@ -1508,16 +1575,24 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1508 1575
1509 switch (reg->id) { 1576 switch (reg->id) {
1510 case KVM_REG_PPC_IAC1: 1577 case KVM_REG_PPC_IAC1:
1578 vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
1579 break;
1511 case KVM_REG_PPC_IAC2: 1580 case KVM_REG_PPC_IAC2:
1581 vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
1582 break;
1583#if CONFIG_PPC_ADV_DEBUG_IACS > 2
1512 case KVM_REG_PPC_IAC3: 1584 case KVM_REG_PPC_IAC3:
1585 vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
1586 break;
1513 case KVM_REG_PPC_IAC4: 1587 case KVM_REG_PPC_IAC4:
1514 i = reg->id - KVM_REG_PPC_IAC1; 1588 vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
1515 vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
1516 break; 1589 break;
1590#endif
1517 case KVM_REG_PPC_DAC1: 1591 case KVM_REG_PPC_DAC1:
1592 vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
1593 break;
1518 case KVM_REG_PPC_DAC2: 1594 case KVM_REG_PPC_DAC2:
1519 i = reg->id - KVM_REG_PPC_DAC1; 1595 vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
1520 vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
1521 break; 1596 break;
1522 case KVM_REG_PPC_EPR: { 1597 case KVM_REG_PPC_EPR: {
1523 u32 new_epr = set_reg_val(reg->id, val); 1598 u32 new_epr = set_reg_val(reg->id, val);
@@ -1551,20 +1626,17 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1551 kvmppc_set_tcr(vcpu, tcr); 1626 kvmppc_set_tcr(vcpu, tcr);
1552 break; 1627 break;
1553 } 1628 }
1629 case KVM_REG_PPC_VRSAVE:
1630 vcpu->arch.vrsave = set_reg_val(reg->id, val);
1631 break;
1554 default: 1632 default:
1555 r = kvmppc_set_one_reg(vcpu, reg->id, &val); 1633 r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
1556 break; 1634 break;
1557 } 1635 }
1558 1636
1559 return r; 1637 return r;
1560} 1638}
1561 1639
1562int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1563 struct kvm_guest_debug *dbg)
1564{
1565 return -EINVAL;
1566}
1567
1568int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1640int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1569{ 1641{
1570 return -ENOTSUPP; 1642 return -ENOTSUPP;
@@ -1589,12 +1661,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1589 return -ENOTSUPP; 1661 return -ENOTSUPP;
1590} 1662}
1591 1663
1592void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1664void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1593 struct kvm_memory_slot *dont) 1665 struct kvm_memory_slot *dont)
1594{ 1666{
1595} 1667}
1596 1668
1597int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1669int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1598 unsigned long npages) 1670 unsigned long npages)
1599{ 1671{
1600 return 0; 1672 return 0;
@@ -1670,6 +1742,157 @@ void kvmppc_decrementer_func(unsigned long data)
1670 kvmppc_set_tsr_bits(vcpu, TSR_DIS); 1742 kvmppc_set_tsr_bits(vcpu, TSR_DIS);
1671} 1743}
1672 1744
1745static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg,
1746 uint64_t addr, int index)
1747{
1748 switch (index) {
1749 case 0:
1750 dbg_reg->dbcr0 |= DBCR0_IAC1;
1751 dbg_reg->iac1 = addr;
1752 break;
1753 case 1:
1754 dbg_reg->dbcr0 |= DBCR0_IAC2;
1755 dbg_reg->iac2 = addr;
1756 break;
1757#if CONFIG_PPC_ADV_DEBUG_IACS > 2
1758 case 2:
1759 dbg_reg->dbcr0 |= DBCR0_IAC3;
1760 dbg_reg->iac3 = addr;
1761 break;
1762 case 3:
1763 dbg_reg->dbcr0 |= DBCR0_IAC4;
1764 dbg_reg->iac4 = addr;
1765 break;
1766#endif
1767 default:
1768 return -EINVAL;
1769 }
1770
1771 dbg_reg->dbcr0 |= DBCR0_IDM;
1772 return 0;
1773}
1774
1775static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr,
1776 int type, int index)
1777{
1778 switch (index) {
1779 case 0:
1780 if (type & KVMPPC_DEBUG_WATCH_READ)
1781 dbg_reg->dbcr0 |= DBCR0_DAC1R;
1782 if (type & KVMPPC_DEBUG_WATCH_WRITE)
1783 dbg_reg->dbcr0 |= DBCR0_DAC1W;
1784 dbg_reg->dac1 = addr;
1785 break;
1786 case 1:
1787 if (type & KVMPPC_DEBUG_WATCH_READ)
1788 dbg_reg->dbcr0 |= DBCR0_DAC2R;
1789 if (type & KVMPPC_DEBUG_WATCH_WRITE)
1790 dbg_reg->dbcr0 |= DBCR0_DAC2W;
1791 dbg_reg->dac2 = addr;
1792 break;
1793 default:
1794 return -EINVAL;
1795 }
1796
1797 dbg_reg->dbcr0 |= DBCR0_IDM;
1798 return 0;
1799}
1800void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set)
1801{
1802 /* XXX: Add similar MSR protection for BookE-PR */
1803#ifdef CONFIG_KVM_BOOKE_HV
1804 BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP));
1805 if (set) {
1806 if (prot_bitmap & MSR_UCLE)
1807 vcpu->arch.shadow_msrp |= MSRP_UCLEP;
1808 if (prot_bitmap & MSR_DE)
1809 vcpu->arch.shadow_msrp |= MSRP_DEP;
1810 if (prot_bitmap & MSR_PMM)
1811 vcpu->arch.shadow_msrp |= MSRP_PMMP;
1812 } else {
1813 if (prot_bitmap & MSR_UCLE)
1814 vcpu->arch.shadow_msrp &= ~MSRP_UCLEP;
1815 if (prot_bitmap & MSR_DE)
1816 vcpu->arch.shadow_msrp &= ~MSRP_DEP;
1817 if (prot_bitmap & MSR_PMM)
1818 vcpu->arch.shadow_msrp &= ~MSRP_PMMP;
1819 }
1820#endif
1821}
1822
1823int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1824 struct kvm_guest_debug *dbg)
1825{
1826 struct debug_reg *dbg_reg;
1827 int n, b = 0, w = 0;
1828
1829 if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
1830 vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
1831 vcpu->guest_debug = 0;
1832 kvm_guest_protect_msr(vcpu, MSR_DE, false);
1833 return 0;
1834 }
1835
1836 kvm_guest_protect_msr(vcpu, MSR_DE, true);
1837 vcpu->guest_debug = dbg->control;
1838 vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
1839 /* Set DBCR0_EDM in guest visible DBCR0 register. */
1840 vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
1841
1842 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
1843 vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
1844
1845 /* Code below handles only HW breakpoints */
1846 dbg_reg = &(vcpu->arch.shadow_dbg_reg);
1847
1848#ifdef CONFIG_KVM_BOOKE_HV
1849 /*
1850 * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1
1851 * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0
1852 */
1853 dbg_reg->dbcr1 = 0;
1854 dbg_reg->dbcr2 = 0;
1855#else
1856 /*
1857 * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1
1858 * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR
1859 * is set.
1860 */
1861 dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
1862 DBCR1_IAC4US;
1863 dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
1864#endif
1865
1866 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1867 return 0;
1868
1869 for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
1870 uint64_t addr = dbg->arch.bp[n].addr;
1871 uint32_t type = dbg->arch.bp[n].type;
1872
1873 if (type == KVMPPC_DEBUG_NONE)
1874 continue;
1875
1876 if (type & !(KVMPPC_DEBUG_WATCH_READ |
1877 KVMPPC_DEBUG_WATCH_WRITE |
1878 KVMPPC_DEBUG_BREAKPOINT))
1879 return -EINVAL;
1880
1881 if (type & KVMPPC_DEBUG_BREAKPOINT) {
1882 /* Setting H/W breakpoint */
1883 if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
1884 return -EINVAL;
1885 } else {
1886 /* Setting H/W watchpoint */
1887 if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
1888 type, w++))
1889 return -EINVAL;
1890 }
1891 }
1892
1893 return 0;
1894}
1895
1673void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1896void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1674{ 1897{
1675 vcpu->cpu = smp_processor_id(); 1898 vcpu->cpu = smp_processor_id();
@@ -1680,6 +1903,44 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
1680{ 1903{
1681 current->thread.kvm_vcpu = NULL; 1904 current->thread.kvm_vcpu = NULL;
1682 vcpu->cpu = -1; 1905 vcpu->cpu = -1;
1906
1907 /* Clear pending debug event in DBSR */
1908 kvmppc_clear_dbsr();
1909}
1910
1911void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
1912{
1913 vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
1914}
1915
1916int kvmppc_core_init_vm(struct kvm *kvm)
1917{
1918 return kvm->arch.kvm_ops->init_vm(kvm);
1919}
1920
1921struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1922{
1923 return kvm->arch.kvm_ops->vcpu_create(kvm, id);
1924}
1925
1926void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
1927{
1928 vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
1929}
1930
1931void kvmppc_core_destroy_vm(struct kvm *kvm)
1932{
1933 kvm->arch.kvm_ops->destroy_vm(kvm);
1934}
1935
1936void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1937{
1938 vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
1939}
1940
1941void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
1942{
1943 vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
1683} 1944}
1684 1945
1685int __init kvmppc_booke_init(void) 1946int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 5fd1ba693579..09bfd9bc7cf8 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -99,6 +99,30 @@ enum int_class {
99 99
100void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); 100void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
101 101
102extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu);
103extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
104 unsigned int inst, int *advance);
105extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn,
106 ulong spr_val);
107extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn,
108 ulong *spr_val);
109extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
110extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
111 struct kvm_vcpu *vcpu,
112 unsigned int inst, int *advance);
113extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
114 ulong spr_val);
115extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
116 ulong *spr_val);
117extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
118extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
119 struct kvm_vcpu *vcpu,
120 unsigned int inst, int *advance);
121extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
122 ulong spr_val);
123extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
124 ulong *spr_val);
125
102/* 126/*
103 * Load up guest vcpu FP state if it's needed. 127 * Load up guest vcpu FP state if it's needed.
104 * It also set the MSR_FP in thread so that host know 128 * It also set the MSR_FP in thread so that host know
@@ -129,4 +153,9 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
129 giveup_fpu(current); 153 giveup_fpu(current);
130#endif 154#endif
131} 155}
156
157static inline void kvmppc_clear_dbsr(void)
158{
159 mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
160}
132#endif /* __KVM_BOOKE_H__ */ 161#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index ce6b73c29612..497b142f651c 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -305,7 +305,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
305{ 305{
306} 306}
307 307
308void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 308static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
309{ 309{
310 kvmppc_booke_vcpu_load(vcpu, cpu); 310 kvmppc_booke_vcpu_load(vcpu, cpu);
311 311
@@ -313,7 +313,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
313 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); 313 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
314} 314}
315 315
316void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 316static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu)
317{ 317{
318#ifdef CONFIG_SPE 318#ifdef CONFIG_SPE
319 if (vcpu->arch.shadow_msr & MSR_SPE) 319 if (vcpu->arch.shadow_msr & MSR_SPE)
@@ -367,7 +367,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
367 return 0; 367 return 0;
368} 368}
369 369
370void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 370static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu,
371 struct kvm_sregs *sregs)
371{ 372{
372 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 373 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
373 374
@@ -388,9 +389,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
388 389
389 kvmppc_get_sregs_ivor(vcpu, sregs); 390 kvmppc_get_sregs_ivor(vcpu, sregs);
390 kvmppc_get_sregs_e500_tlb(vcpu, sregs); 391 kvmppc_get_sregs_e500_tlb(vcpu, sregs);
392 return 0;
391} 393}
392 394
393int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 395static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu,
396 struct kvm_sregs *sregs)
394{ 397{
395 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 398 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
396 int ret; 399 int ret;
@@ -425,21 +428,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
425 return kvmppc_set_sregs_ivor(vcpu, sregs); 428 return kvmppc_set_sregs_ivor(vcpu, sregs);
426} 429}
427 430
428int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, 431static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
429 union kvmppc_one_reg *val) 432 union kvmppc_one_reg *val)
430{ 433{
431 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); 434 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
432 return r; 435 return r;
433} 436}
434 437
435int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, 438static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
436 union kvmppc_one_reg *val) 439 union kvmppc_one_reg *val)
437{ 440{
438 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); 441 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
439 return r; 442 return r;
440} 443}
441 444
442struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 445static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
446 unsigned int id)
443{ 447{
444 struct kvmppc_vcpu_e500 *vcpu_e500; 448 struct kvmppc_vcpu_e500 *vcpu_e500;
445 struct kvm_vcpu *vcpu; 449 struct kvm_vcpu *vcpu;
@@ -481,7 +485,7 @@ out:
481 return ERR_PTR(err); 485 return ERR_PTR(err);
482} 486}
483 487
484void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 488static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
485{ 489{
486 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 490 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
487 491
@@ -492,15 +496,32 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
492 kmem_cache_free(kvm_vcpu_cache, vcpu_e500); 496 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
493} 497}
494 498
495int kvmppc_core_init_vm(struct kvm *kvm) 499static int kvmppc_core_init_vm_e500(struct kvm *kvm)
496{ 500{
497 return 0; 501 return 0;
498} 502}
499 503
500void kvmppc_core_destroy_vm(struct kvm *kvm) 504static void kvmppc_core_destroy_vm_e500(struct kvm *kvm)
501{ 505{
502} 506}
503 507
508static struct kvmppc_ops kvm_ops_e500 = {
509 .get_sregs = kvmppc_core_get_sregs_e500,
510 .set_sregs = kvmppc_core_set_sregs_e500,
511 .get_one_reg = kvmppc_get_one_reg_e500,
512 .set_one_reg = kvmppc_set_one_reg_e500,
513 .vcpu_load = kvmppc_core_vcpu_load_e500,
514 .vcpu_put = kvmppc_core_vcpu_put_e500,
515 .vcpu_create = kvmppc_core_vcpu_create_e500,
516 .vcpu_free = kvmppc_core_vcpu_free_e500,
517 .mmu_destroy = kvmppc_mmu_destroy_e500,
518 .init_vm = kvmppc_core_init_vm_e500,
519 .destroy_vm = kvmppc_core_destroy_vm_e500,
520 .emulate_op = kvmppc_core_emulate_op_e500,
521 .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
522 .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
523};
524
504static int __init kvmppc_e500_init(void) 525static int __init kvmppc_e500_init(void)
505{ 526{
506 int r, i; 527 int r, i;
@@ -512,11 +533,11 @@ static int __init kvmppc_e500_init(void)
512 533
513 r = kvmppc_core_check_processor_compat(); 534 r = kvmppc_core_check_processor_compat();
514 if (r) 535 if (r)
515 return r; 536 goto err_out;
516 537
517 r = kvmppc_booke_init(); 538 r = kvmppc_booke_init();
518 if (r) 539 if (r)
519 return r; 540 goto err_out;
520 541
521 /* copy extra E500 exception handlers */ 542 /* copy extra E500 exception handlers */
522 ivor[0] = mfspr(SPRN_IVOR32); 543 ivor[0] = mfspr(SPRN_IVOR32);
@@ -534,11 +555,19 @@ static int __init kvmppc_e500_init(void)
534 flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + 555 flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
535 ivor[max_ivor] + handler_len); 556 ivor[max_ivor] + handler_len);
536 557
537 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 558 r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
559 if (r)
560 goto err_out;
561 kvm_ops_e500.owner = THIS_MODULE;
562 kvmppc_pr_ops = &kvm_ops_e500;
563
564err_out:
565 return r;
538} 566}
539 567
540static void __exit kvmppc_e500_exit(void) 568static void __exit kvmppc_e500_exit(void)
541{ 569{
570 kvmppc_pr_ops = NULL;
542 kvmppc_booke_exit(); 571 kvmppc_booke_exit();
543} 572}
544 573
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index c2e5e98453a6..4fd9650eb018 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -117,7 +117,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
117#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) 117#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
118#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) 118#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
119#define MAS2_ATTRIB_MASK \ 119#define MAS2_ATTRIB_MASK \
120 (MAS2_X0 | MAS2_X1) 120 (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G)
121#define MAS3_ATTRIB_MASK \ 121#define MAS3_ATTRIB_MASK \
122 (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ 122 (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
123 | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) 123 | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index b10a01243abd..89b7f821f6c4 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -26,6 +26,7 @@
26#define XOP_TLBRE 946 26#define XOP_TLBRE 946
27#define XOP_TLBWE 978 27#define XOP_TLBWE 978
28#define XOP_TLBILX 18 28#define XOP_TLBILX 18
29#define XOP_EHPRIV 270
29 30
30#ifdef CONFIG_KVM_E500MC 31#ifdef CONFIG_KVM_E500MC
31static int dbell2prio(ulong param) 32static int dbell2prio(ulong param)
@@ -82,8 +83,28 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
82} 83}
83#endif 84#endif
84 85
85int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 86static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
86 unsigned int inst, int *advance) 87 unsigned int inst, int *advance)
88{
89 int emulated = EMULATE_DONE;
90
91 switch (get_oc(inst)) {
92 case EHPRIV_OC_DEBUG:
93 run->exit_reason = KVM_EXIT_DEBUG;
94 run->debug.arch.address = vcpu->arch.pc;
95 run->debug.arch.status = 0;
96 kvmppc_account_exit(vcpu, DEBUG_EXITS);
97 emulated = EMULATE_EXIT_USER;
98 *advance = 0;
99 break;
100 default:
101 emulated = EMULATE_FAIL;
102 }
103 return emulated;
104}
105
106int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
107 unsigned int inst, int *advance)
87{ 108{
88 int emulated = EMULATE_DONE; 109 int emulated = EMULATE_DONE;
89 int ra = get_ra(inst); 110 int ra = get_ra(inst);
@@ -130,6 +151,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
130 emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); 151 emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
131 break; 152 break;
132 153
154 case XOP_EHPRIV:
155 emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
156 advance);
157 break;
158
133 default: 159 default:
134 emulated = EMULATE_FAIL; 160 emulated = EMULATE_FAIL;
135 } 161 }
@@ -146,7 +172,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
146 return emulated; 172 return emulated;
147} 173}
148 174
149int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 175int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
150{ 176{
151 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 177 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
152 int emulated = EMULATE_DONE; 178 int emulated = EMULATE_DONE;
@@ -237,7 +263,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
237 return emulated; 263 return emulated;
238} 264}
239 265
240int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 266int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
241{ 267{
242 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 268 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
243 int emulated = EMULATE_DONE; 269 int emulated = EMULATE_DONE;
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 6d6f153b6c1d..ebca6b88ea5e 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -32,7 +32,7 @@
32#include <asm/kvm_ppc.h> 32#include <asm/kvm_ppc.h>
33 33
34#include "e500.h" 34#include "e500.h"
35#include "trace.h" 35#include "trace_booke.h"
36#include "timing.h" 36#include "timing.h"
37#include "e500_mmu_host.h" 37#include "e500_mmu_host.h"
38 38
@@ -536,7 +536,7 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
536 return get_tlb_raddr(gtlbe) | (eaddr & pgmask); 536 return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
537} 537}
538 538
539void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 539void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu)
540{ 540{
541} 541}
542 542
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index c65593abae8e..ecf2247b13be 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -32,10 +32,11 @@
32#include <asm/kvm_ppc.h> 32#include <asm/kvm_ppc.h>
33 33
34#include "e500.h" 34#include "e500.h"
35#include "trace.h"
36#include "timing.h" 35#include "timing.h"
37#include "e500_mmu_host.h" 36#include "e500_mmu_host.h"
38 37
38#include "trace_booke.h"
39
39#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) 40#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
40 41
41static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; 42static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
@@ -253,6 +254,9 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
253 ref->pfn = pfn; 254 ref->pfn = pfn;
254 ref->flags |= E500_TLB_VALID; 255 ref->flags |= E500_TLB_VALID;
255 256
257 /* Mark the page accessed */
258 kvm_set_pfn_accessed(pfn);
259
256 if (tlbe_is_writable(gtlbe)) 260 if (tlbe_is_writable(gtlbe))
257 kvm_set_pfn_dirty(pfn); 261 kvm_set_pfn_dirty(pfn);
258} 262}
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 19c8379575f7..4132cd2fc171 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -110,7 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
110 110
111static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); 111static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu);
112 112
113void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 113static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
114{ 114{
115 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 115 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
116 116
@@ -147,7 +147,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
147 kvmppc_load_guest_fp(vcpu); 147 kvmppc_load_guest_fp(vcpu);
148} 148}
149 149
150void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 150static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
151{ 151{
152 vcpu->arch.eplc = mfspr(SPRN_EPLC); 152 vcpu->arch.eplc = mfspr(SPRN_EPLC);
153 vcpu->arch.epsc = mfspr(SPRN_EPSC); 153 vcpu->arch.epsc = mfspr(SPRN_EPSC);
@@ -204,7 +204,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
204 return 0; 204 return 0;
205} 205}
206 206
207void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 207static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu,
208 struct kvm_sregs *sregs)
208{ 209{
209 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 210 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
210 211
@@ -224,10 +225,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
224 sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]; 225 sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
225 sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; 226 sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
226 227
227 kvmppc_get_sregs_ivor(vcpu, sregs); 228 return kvmppc_get_sregs_ivor(vcpu, sregs);
228} 229}
229 230
230int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 231static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu,
232 struct kvm_sregs *sregs)
231{ 233{
232 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 234 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
233 int ret; 235 int ret;
@@ -260,21 +262,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
260 return kvmppc_set_sregs_ivor(vcpu, sregs); 262 return kvmppc_set_sregs_ivor(vcpu, sregs);
261} 263}
262 264
263int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, 265static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
264 union kvmppc_one_reg *val) 266 union kvmppc_one_reg *val)
265{ 267{
266 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); 268 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
267 return r; 269 return r;
268} 270}
269 271
270int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, 272static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
271 union kvmppc_one_reg *val) 273 union kvmppc_one_reg *val)
272{ 274{
273 int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); 275 int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
274 return r; 276 return r;
275} 277}
276 278
277struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 279static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
280 unsigned int id)
278{ 281{
279 struct kvmppc_vcpu_e500 *vcpu_e500; 282 struct kvmppc_vcpu_e500 *vcpu_e500;
280 struct kvm_vcpu *vcpu; 283 struct kvm_vcpu *vcpu;
@@ -315,7 +318,7 @@ out:
315 return ERR_PTR(err); 318 return ERR_PTR(err);
316} 319}
317 320
318void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 321static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
319{ 322{
320 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 323 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
321 324
@@ -325,7 +328,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
325 kmem_cache_free(kvm_vcpu_cache, vcpu_e500); 328 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
326} 329}
327 330
328int kvmppc_core_init_vm(struct kvm *kvm) 331static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
329{ 332{
330 int lpid; 333 int lpid;
331 334
@@ -337,27 +340,52 @@ int kvmppc_core_init_vm(struct kvm *kvm)
337 return 0; 340 return 0;
338} 341}
339 342
340void kvmppc_core_destroy_vm(struct kvm *kvm) 343static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
341{ 344{
342 kvmppc_free_lpid(kvm->arch.lpid); 345 kvmppc_free_lpid(kvm->arch.lpid);
343} 346}
344 347
348static struct kvmppc_ops kvm_ops_e500mc = {
349 .get_sregs = kvmppc_core_get_sregs_e500mc,
350 .set_sregs = kvmppc_core_set_sregs_e500mc,
351 .get_one_reg = kvmppc_get_one_reg_e500mc,
352 .set_one_reg = kvmppc_set_one_reg_e500mc,
353 .vcpu_load = kvmppc_core_vcpu_load_e500mc,
354 .vcpu_put = kvmppc_core_vcpu_put_e500mc,
355 .vcpu_create = kvmppc_core_vcpu_create_e500mc,
356 .vcpu_free = kvmppc_core_vcpu_free_e500mc,
357 .mmu_destroy = kvmppc_mmu_destroy_e500,
358 .init_vm = kvmppc_core_init_vm_e500mc,
359 .destroy_vm = kvmppc_core_destroy_vm_e500mc,
360 .emulate_op = kvmppc_core_emulate_op_e500,
361 .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
362 .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
363};
364
345static int __init kvmppc_e500mc_init(void) 365static int __init kvmppc_e500mc_init(void)
346{ 366{
347 int r; 367 int r;
348 368
349 r = kvmppc_booke_init(); 369 r = kvmppc_booke_init();
350 if (r) 370 if (r)
351 return r; 371 goto err_out;
352 372
353 kvmppc_init_lpid(64); 373 kvmppc_init_lpid(64);
354 kvmppc_claim_lpid(0); /* host */ 374 kvmppc_claim_lpid(0); /* host */
355 375
356 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 376 r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
377 if (r)
378 goto err_out;
379 kvm_ops_e500mc.owner = THIS_MODULE;
380 kvmppc_pr_ops = &kvm_ops_e500mc;
381
382err_out:
383 return r;
357} 384}
358 385
359static void __exit kvmppc_e500mc_exit(void) 386static void __exit kvmppc_e500mc_exit(void)
360{ 387{
388 kvmppc_pr_ops = NULL;
361 kvmppc_booke_exit(); 389 kvmppc_booke_exit();
362} 390}
363 391
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 751cd45f65a0..2f9a0873b44f 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -130,8 +130,8 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
130 case SPRN_PIR: break; 130 case SPRN_PIR: break;
131 131
132 default: 132 default:
133 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, 133 emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn,
134 spr_val); 134 spr_val);
135 if (emulated == EMULATE_FAIL) 135 if (emulated == EMULATE_FAIL)
136 printk(KERN_INFO "mtspr: unknown spr " 136 printk(KERN_INFO "mtspr: unknown spr "
137 "0x%x\n", sprn); 137 "0x%x\n", sprn);
@@ -191,8 +191,8 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
191 spr_val = kvmppc_get_dec(vcpu, get_tb()); 191 spr_val = kvmppc_get_dec(vcpu, get_tb());
192 break; 192 break;
193 default: 193 default:
194 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, 194 emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn,
195 &spr_val); 195 &spr_val);
196 if (unlikely(emulated == EMULATE_FAIL)) { 196 if (unlikely(emulated == EMULATE_FAIL)) {
197 printk(KERN_INFO "mfspr: unknown spr " 197 printk(KERN_INFO "mfspr: unknown spr "
198 "0x%x\n", sprn); 198 "0x%x\n", sprn);
@@ -464,7 +464,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
464 } 464 }
465 465
466 if (emulated == EMULATE_FAIL) { 466 if (emulated == EMULATE_FAIL) {
467 emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); 467 emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst,
468 &advance);
468 if (emulated == EMULATE_AGAIN) { 469 if (emulated == EMULATE_AGAIN) {
469 advance = 0; 470 advance = 0;
470 } else if (emulated == EMULATE_FAIL) { 471 } else if (emulated == EMULATE_FAIL) {
@@ -483,3 +484,4 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
483 484
484 return emulated; 485 return emulated;
485} 486}
487EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 07c0106fab76..9ae97686e9f4 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -26,6 +26,7 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/file.h> 28#include <linux/file.h>
29#include <linux/module.h>
29#include <asm/cputable.h> 30#include <asm/cputable.h>
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31#include <asm/kvm_ppc.h> 32#include <asm/kvm_ppc.h>
@@ -39,6 +40,12 @@
39#define CREATE_TRACE_POINTS 40#define CREATE_TRACE_POINTS
40#include "trace.h" 41#include "trace.h"
41 42
43struct kvmppc_ops *kvmppc_hv_ops;
44EXPORT_SYMBOL_GPL(kvmppc_hv_ops);
45struct kvmppc_ops *kvmppc_pr_ops;
46EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
47
48
42int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 49int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
43{ 50{
44 return !!(v->arch.pending_exceptions) || 51 return !!(v->arch.pending_exceptions) ||
@@ -50,7 +57,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
50 return 1; 57 return 1;
51} 58}
52 59
53#ifndef CONFIG_KVM_BOOK3S_64_HV
54/* 60/*
55 * Common checks before entering the guest world. Call with interrupts 61 * Common checks before entering the guest world. Call with interrupts
56 * disabled. 62 * disabled.
@@ -125,7 +131,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
125 131
126 return r; 132 return r;
127} 133}
128#endif /* CONFIG_KVM_BOOK3S_64_HV */ 134EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
129 135
130int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 136int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
131{ 137{
@@ -179,6 +185,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
179 185
180 return r; 186 return r;
181} 187}
188EXPORT_SYMBOL_GPL(kvmppc_kvm_pv);
182 189
183int kvmppc_sanity_check(struct kvm_vcpu *vcpu) 190int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
184{ 191{
@@ -192,11 +199,9 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
192 if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled) 199 if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
193 goto out; 200 goto out;
194 201
195#ifdef CONFIG_KVM_BOOK3S_64_HV
196 /* HV KVM can only do PAPR mode for now */ 202 /* HV KVM can only do PAPR mode for now */
197 if (!vcpu->arch.papr_enabled) 203 if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm))
198 goto out; 204 goto out;
199#endif
200 205
201#ifdef CONFIG_KVM_BOOKE_HV 206#ifdef CONFIG_KVM_BOOKE_HV
202 if (!cpu_has_feature(CPU_FTR_EMB_HV)) 207 if (!cpu_has_feature(CPU_FTR_EMB_HV))
@@ -209,6 +214,7 @@ out:
209 vcpu->arch.sane = r; 214 vcpu->arch.sane = r;
210 return r ? 0 : -EINVAL; 215 return r ? 0 : -EINVAL;
211} 216}
217EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
212 218
213int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) 219int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
214{ 220{
@@ -243,6 +249,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
243 249
244 return r; 250 return r;
245} 251}
252EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
246 253
247int kvm_arch_hardware_enable(void *garbage) 254int kvm_arch_hardware_enable(void *garbage)
248{ 255{
@@ -269,10 +276,35 @@ void kvm_arch_check_processor_compat(void *rtn)
269 276
270int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 277int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
271{ 278{
272 if (type) 279 struct kvmppc_ops *kvm_ops = NULL;
273 return -EINVAL; 280 /*
274 281 * if we have both HV and PR enabled, default is HV
282 */
283 if (type == 0) {
284 if (kvmppc_hv_ops)
285 kvm_ops = kvmppc_hv_ops;
286 else
287 kvm_ops = kvmppc_pr_ops;
288 if (!kvm_ops)
289 goto err_out;
290 } else if (type == KVM_VM_PPC_HV) {
291 if (!kvmppc_hv_ops)
292 goto err_out;
293 kvm_ops = kvmppc_hv_ops;
294 } else if (type == KVM_VM_PPC_PR) {
295 if (!kvmppc_pr_ops)
296 goto err_out;
297 kvm_ops = kvmppc_pr_ops;
298 } else
299 goto err_out;
300
301 if (kvm_ops->owner && !try_module_get(kvm_ops->owner))
302 return -ENOENT;
303
304 kvm->arch.kvm_ops = kvm_ops;
275 return kvmppc_core_init_vm(kvm); 305 return kvmppc_core_init_vm(kvm);
306err_out:
307 return -EINVAL;
276} 308}
277 309
278void kvm_arch_destroy_vm(struct kvm *kvm) 310void kvm_arch_destroy_vm(struct kvm *kvm)
@@ -292,6 +324,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
292 kvmppc_core_destroy_vm(kvm); 324 kvmppc_core_destroy_vm(kvm);
293 325
294 mutex_unlock(&kvm->lock); 326 mutex_unlock(&kvm->lock);
327
328 /* drop the module reference */
329 module_put(kvm->arch.kvm_ops->owner);
295} 330}
296 331
297void kvm_arch_sync_events(struct kvm *kvm) 332void kvm_arch_sync_events(struct kvm *kvm)
@@ -301,6 +336,10 @@ void kvm_arch_sync_events(struct kvm *kvm)
301int kvm_dev_ioctl_check_extension(long ext) 336int kvm_dev_ioctl_check_extension(long ext)
302{ 337{
303 int r; 338 int r;
339 /* FIXME!!
340 * Should some of this be vm ioctl ? is it possible now ?
341 */
342 int hv_enabled = kvmppc_hv_ops ? 1 : 0;
304 343
305 switch (ext) { 344 switch (ext) {
306#ifdef CONFIG_BOOKE 345#ifdef CONFIG_BOOKE
@@ -320,22 +359,26 @@ int kvm_dev_ioctl_check_extension(long ext)
320 case KVM_CAP_DEVICE_CTRL: 359 case KVM_CAP_DEVICE_CTRL:
321 r = 1; 360 r = 1;
322 break; 361 break;
323#ifndef CONFIG_KVM_BOOK3S_64_HV
324 case KVM_CAP_PPC_PAIRED_SINGLES: 362 case KVM_CAP_PPC_PAIRED_SINGLES:
325 case KVM_CAP_PPC_OSI: 363 case KVM_CAP_PPC_OSI:
326 case KVM_CAP_PPC_GET_PVINFO: 364 case KVM_CAP_PPC_GET_PVINFO:
327#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) 365#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
328 case KVM_CAP_SW_TLB: 366 case KVM_CAP_SW_TLB:
329#endif 367#endif
330#ifdef CONFIG_KVM_MPIC 368 /* We support this only for PR */
331 case KVM_CAP_IRQ_MPIC: 369 r = !hv_enabled;
332#endif
333 r = 1;
334 break; 370 break;
371#ifdef CONFIG_KVM_MMIO
335 case KVM_CAP_COALESCED_MMIO: 372 case KVM_CAP_COALESCED_MMIO:
336 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 373 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
337 break; 374 break;
338#endif 375#endif
376#ifdef CONFIG_KVM_MPIC
377 case KVM_CAP_IRQ_MPIC:
378 r = 1;
379 break;
380#endif
381
339#ifdef CONFIG_PPC_BOOK3S_64 382#ifdef CONFIG_PPC_BOOK3S_64
340 case KVM_CAP_SPAPR_TCE: 383 case KVM_CAP_SPAPR_TCE:
341 case KVM_CAP_PPC_ALLOC_HTAB: 384 case KVM_CAP_PPC_ALLOC_HTAB:
@@ -346,32 +389,37 @@ int kvm_dev_ioctl_check_extension(long ext)
346 r = 1; 389 r = 1;
347 break; 390 break;
348#endif /* CONFIG_PPC_BOOK3S_64 */ 391#endif /* CONFIG_PPC_BOOK3S_64 */
349#ifdef CONFIG_KVM_BOOK3S_64_HV 392#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
350 case KVM_CAP_PPC_SMT: 393 case KVM_CAP_PPC_SMT:
351 r = threads_per_core; 394 if (hv_enabled)
395 r = threads_per_core;
396 else
397 r = 0;
352 break; 398 break;
353 case KVM_CAP_PPC_RMA: 399 case KVM_CAP_PPC_RMA:
354 r = 1; 400 r = hv_enabled;
355 /* PPC970 requires an RMA */ 401 /* PPC970 requires an RMA */
356 if (cpu_has_feature(CPU_FTR_ARCH_201)) 402 if (r && cpu_has_feature(CPU_FTR_ARCH_201))
357 r = 2; 403 r = 2;
358 break; 404 break;
359#endif 405#endif
360 case KVM_CAP_SYNC_MMU: 406 case KVM_CAP_SYNC_MMU:
361#ifdef CONFIG_KVM_BOOK3S_64_HV 407#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
362 r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; 408 if (hv_enabled)
409 r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
410 else
411 r = 0;
363#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) 412#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
364 r = 1; 413 r = 1;
365#else 414#else
366 r = 0; 415 r = 0;
367 break;
368#endif 416#endif
369#ifdef CONFIG_KVM_BOOK3S_64_HV 417 break;
418#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
370 case KVM_CAP_PPC_HTAB_FD: 419 case KVM_CAP_PPC_HTAB_FD:
371 r = 1; 420 r = hv_enabled;
372 break; 421 break;
373#endif 422#endif
374 break;
375 case KVM_CAP_NR_VCPUS: 423 case KVM_CAP_NR_VCPUS:
376 /* 424 /*
377 * Recommending a number of CPUs is somewhat arbitrary; we 425 * Recommending a number of CPUs is somewhat arbitrary; we
@@ -379,11 +427,10 @@ int kvm_dev_ioctl_check_extension(long ext)
379 * will have secondary threads "offline"), and for other KVM 427 * will have secondary threads "offline"), and for other KVM
380 * implementations just count online CPUs. 428 * implementations just count online CPUs.
381 */ 429 */
382#ifdef CONFIG_KVM_BOOK3S_64_HV 430 if (hv_enabled)
383 r = num_present_cpus(); 431 r = num_present_cpus();
384#else 432 else
385 r = num_online_cpus(); 433 r = num_online_cpus();
386#endif
387 break; 434 break;
388 case KVM_CAP_MAX_VCPUS: 435 case KVM_CAP_MAX_VCPUS:
389 r = KVM_MAX_VCPUS; 436 r = KVM_MAX_VCPUS;
@@ -407,15 +454,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
407 return -EINVAL; 454 return -EINVAL;
408} 455}
409 456
410void kvm_arch_free_memslot(struct kvm_memory_slot *free, 457void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
411 struct kvm_memory_slot *dont) 458 struct kvm_memory_slot *dont)
412{ 459{
413 kvmppc_core_free_memslot(free, dont); 460 kvmppc_core_free_memslot(kvm, free, dont);
414} 461}
415 462
416int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 463int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
464 unsigned long npages)
417{ 465{
418 return kvmppc_core_create_memslot(slot, npages); 466 return kvmppc_core_create_memslot(kvm, slot, npages);
419} 467}
420 468
421void kvm_arch_memslots_updated(struct kvm *kvm) 469void kvm_arch_memslots_updated(struct kvm *kvm)
@@ -659,6 +707,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
659 707
660 return EMULATE_DO_MMIO; 708 return EMULATE_DO_MMIO;
661} 709}
710EXPORT_SYMBOL_GPL(kvmppc_handle_load);
662 711
663/* Same as above, but sign extends */ 712/* Same as above, but sign extends */
664int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, 713int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -720,6 +769,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
720 769
721 return EMULATE_DO_MMIO; 770 return EMULATE_DO_MMIO;
722} 771}
772EXPORT_SYMBOL_GPL(kvmppc_handle_store);
723 773
724int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 774int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
725{ 775{
@@ -1024,52 +1074,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
1024 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); 1074 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
1025 goto out; 1075 goto out;
1026 } 1076 }
1027#endif /* CONFIG_PPC_BOOK3S_64 */
1028
1029#ifdef CONFIG_KVM_BOOK3S_64_HV
1030 case KVM_ALLOCATE_RMA: {
1031 struct kvm_allocate_rma rma;
1032 struct kvm *kvm = filp->private_data;
1033
1034 r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
1035 if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
1036 r = -EFAULT;
1037 break;
1038 }
1039
1040 case KVM_PPC_ALLOCATE_HTAB: {
1041 u32 htab_order;
1042
1043 r = -EFAULT;
1044 if (get_user(htab_order, (u32 __user *)argp))
1045 break;
1046 r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
1047 if (r)
1048 break;
1049 r = -EFAULT;
1050 if (put_user(htab_order, (u32 __user *)argp))
1051 break;
1052 r = 0;
1053 break;
1054 }
1055
1056 case KVM_PPC_GET_HTAB_FD: {
1057 struct kvm_get_htab_fd ghf;
1058
1059 r = -EFAULT;
1060 if (copy_from_user(&ghf, argp, sizeof(ghf)))
1061 break;
1062 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
1063 break;
1064 }
1065#endif /* CONFIG_KVM_BOOK3S_64_HV */
1066
1067#ifdef CONFIG_PPC_BOOK3S_64
1068 case KVM_PPC_GET_SMMU_INFO: { 1077 case KVM_PPC_GET_SMMU_INFO: {
1069 struct kvm_ppc_smmu_info info; 1078 struct kvm_ppc_smmu_info info;
1079 struct kvm *kvm = filp->private_data;
1070 1080
1071 memset(&info, 0, sizeof(info)); 1081 memset(&info, 0, sizeof(info));
1072 r = kvm_vm_ioctl_get_smmu_info(kvm, &info); 1082 r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info);
1073 if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) 1083 if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
1074 r = -EFAULT; 1084 r = -EFAULT;
1075 break; 1085 break;
@@ -1080,11 +1090,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
1080 r = kvm_vm_ioctl_rtas_define_token(kvm, argp); 1090 r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
1081 break; 1091 break;
1082 } 1092 }
1083#endif /* CONFIG_PPC_BOOK3S_64 */ 1093 default: {
1094 struct kvm *kvm = filp->private_data;
1095 r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
1096 }
1097#else /* CONFIG_PPC_BOOK3S_64 */
1084 default: 1098 default:
1085 r = -ENOTTY; 1099 r = -ENOTTY;
1100#endif
1086 } 1101 }
1087
1088out: 1102out:
1089 return r; 1103 return r;
1090} 1104}
@@ -1106,22 +1120,26 @@ long kvmppc_alloc_lpid(void)
1106 1120
1107 return lpid; 1121 return lpid;
1108} 1122}
1123EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);
1109 1124
1110void kvmppc_claim_lpid(long lpid) 1125void kvmppc_claim_lpid(long lpid)
1111{ 1126{
1112 set_bit(lpid, lpid_inuse); 1127 set_bit(lpid, lpid_inuse);
1113} 1128}
1129EXPORT_SYMBOL_GPL(kvmppc_claim_lpid);
1114 1130
1115void kvmppc_free_lpid(long lpid) 1131void kvmppc_free_lpid(long lpid)
1116{ 1132{
1117 clear_bit(lpid, lpid_inuse); 1133 clear_bit(lpid, lpid_inuse);
1118} 1134}
1135EXPORT_SYMBOL_GPL(kvmppc_free_lpid);
1119 1136
1120void kvmppc_init_lpid(unsigned long nr_lpids_param) 1137void kvmppc_init_lpid(unsigned long nr_lpids_param)
1121{ 1138{
1122 nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param); 1139 nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
1123 memset(lpid_inuse, 0, sizeof(lpid_inuse)); 1140 memset(lpid_inuse, 0, sizeof(lpid_inuse));
1124} 1141}
1142EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
1125 1143
1126int kvm_arch_init(void *opaque) 1144int kvm_arch_init(void *opaque)
1127{ 1145{
@@ -1130,4 +1148,5 @@ int kvm_arch_init(void *opaque)
1130 1148
1131void kvm_arch_exit(void) 1149void kvm_arch_exit(void)
1132{ 1150{
1151
1133} 1152}
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index e326489a5420..2e0e67ef3544 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -31,126 +31,6 @@ TRACE_EVENT(kvm_ppc_instr,
31 __entry->inst, __entry->pc, __entry->emulate) 31 __entry->inst, __entry->pc, __entry->emulate)
32); 32);
33 33
34#ifdef CONFIG_PPC_BOOK3S
35#define kvm_trace_symbol_exit \
36 {0x100, "SYSTEM_RESET"}, \
37 {0x200, "MACHINE_CHECK"}, \
38 {0x300, "DATA_STORAGE"}, \
39 {0x380, "DATA_SEGMENT"}, \
40 {0x400, "INST_STORAGE"}, \
41 {0x480, "INST_SEGMENT"}, \
42 {0x500, "EXTERNAL"}, \
43 {0x501, "EXTERNAL_LEVEL"}, \
44 {0x502, "EXTERNAL_HV"}, \
45 {0x600, "ALIGNMENT"}, \
46 {0x700, "PROGRAM"}, \
47 {0x800, "FP_UNAVAIL"}, \
48 {0x900, "DECREMENTER"}, \
49 {0x980, "HV_DECREMENTER"}, \
50 {0xc00, "SYSCALL"}, \
51 {0xd00, "TRACE"}, \
52 {0xe00, "H_DATA_STORAGE"}, \
53 {0xe20, "H_INST_STORAGE"}, \
54 {0xe40, "H_EMUL_ASSIST"}, \
55 {0xf00, "PERFMON"}, \
56 {0xf20, "ALTIVEC"}, \
57 {0xf40, "VSX"}
58#else
59#define kvm_trace_symbol_exit \
60 {0, "CRITICAL"}, \
61 {1, "MACHINE_CHECK"}, \
62 {2, "DATA_STORAGE"}, \
63 {3, "INST_STORAGE"}, \
64 {4, "EXTERNAL"}, \
65 {5, "ALIGNMENT"}, \
66 {6, "PROGRAM"}, \
67 {7, "FP_UNAVAIL"}, \
68 {8, "SYSCALL"}, \
69 {9, "AP_UNAVAIL"}, \
70 {10, "DECREMENTER"}, \
71 {11, "FIT"}, \
72 {12, "WATCHDOG"}, \
73 {13, "DTLB_MISS"}, \
74 {14, "ITLB_MISS"}, \
75 {15, "DEBUG"}, \
76 {32, "SPE_UNAVAIL"}, \
77 {33, "SPE_FP_DATA"}, \
78 {34, "SPE_FP_ROUND"}, \
79 {35, "PERFORMANCE_MONITOR"}, \
80 {36, "DOORBELL"}, \
81 {37, "DOORBELL_CRITICAL"}, \
82 {38, "GUEST_DBELL"}, \
83 {39, "GUEST_DBELL_CRIT"}, \
84 {40, "HV_SYSCALL"}, \
85 {41, "HV_PRIV"}
86#endif
87
88TRACE_EVENT(kvm_exit,
89 TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
90 TP_ARGS(exit_nr, vcpu),
91
92 TP_STRUCT__entry(
93 __field( unsigned int, exit_nr )
94 __field( unsigned long, pc )
95 __field( unsigned long, msr )
96 __field( unsigned long, dar )
97#ifdef CONFIG_KVM_BOOK3S_PR
98 __field( unsigned long, srr1 )
99#endif
100 __field( unsigned long, last_inst )
101 ),
102
103 TP_fast_assign(
104#ifdef CONFIG_KVM_BOOK3S_PR
105 struct kvmppc_book3s_shadow_vcpu *svcpu;
106#endif
107 __entry->exit_nr = exit_nr;
108 __entry->pc = kvmppc_get_pc(vcpu);
109 __entry->dar = kvmppc_get_fault_dar(vcpu);
110 __entry->msr = vcpu->arch.shared->msr;
111#ifdef CONFIG_KVM_BOOK3S_PR
112 svcpu = svcpu_get(vcpu);
113 __entry->srr1 = svcpu->shadow_srr1;
114 svcpu_put(svcpu);
115#endif
116 __entry->last_inst = vcpu->arch.last_inst;
117 ),
118
119 TP_printk("exit=%s"
120 " | pc=0x%lx"
121 " | msr=0x%lx"
122 " | dar=0x%lx"
123#ifdef CONFIG_KVM_BOOK3S_PR
124 " | srr1=0x%lx"
125#endif
126 " | last_inst=0x%lx"
127 ,
128 __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
129 __entry->pc,
130 __entry->msr,
131 __entry->dar,
132#ifdef CONFIG_KVM_BOOK3S_PR
133 __entry->srr1,
134#endif
135 __entry->last_inst
136 )
137);
138
139TRACE_EVENT(kvm_unmap_hva,
140 TP_PROTO(unsigned long hva),
141 TP_ARGS(hva),
142
143 TP_STRUCT__entry(
144 __field( unsigned long, hva )
145 ),
146
147 TP_fast_assign(
148 __entry->hva = hva;
149 ),
150
151 TP_printk("unmap hva 0x%lx\n", __entry->hva)
152);
153
154TRACE_EVENT(kvm_stlb_inval, 34TRACE_EVENT(kvm_stlb_inval,
155 TP_PROTO(unsigned int stlb_index), 35 TP_PROTO(unsigned int stlb_index),
156 TP_ARGS(stlb_index), 36 TP_ARGS(stlb_index),
@@ -236,315 +116,6 @@ TRACE_EVENT(kvm_check_requests,
236 __entry->cpu_nr, __entry->requests) 116 __entry->cpu_nr, __entry->requests)
237); 117);
238 118
239
240/*************************************************************************
241 * Book3S trace points *
242 *************************************************************************/
243
244#ifdef CONFIG_KVM_BOOK3S_PR
245
246TRACE_EVENT(kvm_book3s_reenter,
247 TP_PROTO(int r, struct kvm_vcpu *vcpu),
248 TP_ARGS(r, vcpu),
249
250 TP_STRUCT__entry(
251 __field( unsigned int, r )
252 __field( unsigned long, pc )
253 ),
254
255 TP_fast_assign(
256 __entry->r = r;
257 __entry->pc = kvmppc_get_pc(vcpu);
258 ),
259
260 TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
261);
262
263#ifdef CONFIG_PPC_BOOK3S_64
264
265TRACE_EVENT(kvm_book3s_64_mmu_map,
266 TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
267 struct kvmppc_pte *orig_pte),
268 TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
269
270 TP_STRUCT__entry(
271 __field( unsigned char, flag_w )
272 __field( unsigned char, flag_x )
273 __field( unsigned long, eaddr )
274 __field( unsigned long, hpteg )
275 __field( unsigned long, va )
276 __field( unsigned long long, vpage )
277 __field( unsigned long, hpaddr )
278 ),
279
280 TP_fast_assign(
281 __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
282 __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
283 __entry->eaddr = orig_pte->eaddr;
284 __entry->hpteg = hpteg;
285 __entry->va = va;
286 __entry->vpage = orig_pte->vpage;
287 __entry->hpaddr = hpaddr;
288 ),
289
290 TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
291 __entry->flag_w, __entry->flag_x, __entry->eaddr,
292 __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
293);
294
295#endif /* CONFIG_PPC_BOOK3S_64 */
296
297TRACE_EVENT(kvm_book3s_mmu_map,
298 TP_PROTO(struct hpte_cache *pte),
299 TP_ARGS(pte),
300
301 TP_STRUCT__entry(
302 __field( u64, host_vpn )
303 __field( u64, pfn )
304 __field( ulong, eaddr )
305 __field( u64, vpage )
306 __field( ulong, raddr )
307 __field( int, flags )
308 ),
309
310 TP_fast_assign(
311 __entry->host_vpn = pte->host_vpn;
312 __entry->pfn = pte->pfn;
313 __entry->eaddr = pte->pte.eaddr;
314 __entry->vpage = pte->pte.vpage;
315 __entry->raddr = pte->pte.raddr;
316 __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
317 (pte->pte.may_write ? 0x2 : 0) |
318 (pte->pte.may_execute ? 0x1 : 0);
319 ),
320
321 TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
322 __entry->host_vpn, __entry->pfn, __entry->eaddr,
323 __entry->vpage, __entry->raddr, __entry->flags)
324);
325
326TRACE_EVENT(kvm_book3s_mmu_invalidate,
327 TP_PROTO(struct hpte_cache *pte),
328 TP_ARGS(pte),
329
330 TP_STRUCT__entry(
331 __field( u64, host_vpn )
332 __field( u64, pfn )
333 __field( ulong, eaddr )
334 __field( u64, vpage )
335 __field( ulong, raddr )
336 __field( int, flags )
337 ),
338
339 TP_fast_assign(
340 __entry->host_vpn = pte->host_vpn;
341 __entry->pfn = pte->pfn;
342 __entry->eaddr = pte->pte.eaddr;
343 __entry->vpage = pte->pte.vpage;
344 __entry->raddr = pte->pte.raddr;
345 __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
346 (pte->pte.may_write ? 0x2 : 0) |
347 (pte->pte.may_execute ? 0x1 : 0);
348 ),
349
350 TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
351 __entry->host_vpn, __entry->pfn, __entry->eaddr,
352 __entry->vpage, __entry->raddr, __entry->flags)
353);
354
355TRACE_EVENT(kvm_book3s_mmu_flush,
356 TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
357 unsigned long long p2),
358 TP_ARGS(type, vcpu, p1, p2),
359
360 TP_STRUCT__entry(
361 __field( int, count )
362 __field( unsigned long long, p1 )
363 __field( unsigned long long, p2 )
364 __field( const char *, type )
365 ),
366
367 TP_fast_assign(
368 __entry->count = to_book3s(vcpu)->hpte_cache_count;
369 __entry->p1 = p1;
370 __entry->p2 = p2;
371 __entry->type = type;
372 ),
373
374 TP_printk("Flush %d %sPTEs: %llx - %llx",
375 __entry->count, __entry->type, __entry->p1, __entry->p2)
376);
377
378TRACE_EVENT(kvm_book3s_slb_found,
379 TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
380 TP_ARGS(gvsid, hvsid),
381
382 TP_STRUCT__entry(
383 __field( unsigned long long, gvsid )
384 __field( unsigned long long, hvsid )
385 ),
386
387 TP_fast_assign(
388 __entry->gvsid = gvsid;
389 __entry->hvsid = hvsid;
390 ),
391
392 TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
393);
394
395TRACE_EVENT(kvm_book3s_slb_fail,
396 TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
397 TP_ARGS(sid_map_mask, gvsid),
398
399 TP_STRUCT__entry(
400 __field( unsigned short, sid_map_mask )
401 __field( unsigned long long, gvsid )
402 ),
403
404 TP_fast_assign(
405 __entry->sid_map_mask = sid_map_mask;
406 __entry->gvsid = gvsid;
407 ),
408
409 TP_printk("%x/%x: %llx", __entry->sid_map_mask,
410 SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
411);
412
413TRACE_EVENT(kvm_book3s_slb_map,
414 TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
415 unsigned long long hvsid),
416 TP_ARGS(sid_map_mask, gvsid, hvsid),
417
418 TP_STRUCT__entry(
419 __field( unsigned short, sid_map_mask )
420 __field( unsigned long long, guest_vsid )
421 __field( unsigned long long, host_vsid )
422 ),
423
424 TP_fast_assign(
425 __entry->sid_map_mask = sid_map_mask;
426 __entry->guest_vsid = gvsid;
427 __entry->host_vsid = hvsid;
428 ),
429
430 TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
431 __entry->guest_vsid, __entry->host_vsid)
432);
433
434TRACE_EVENT(kvm_book3s_slbmte,
435 TP_PROTO(u64 slb_vsid, u64 slb_esid),
436 TP_ARGS(slb_vsid, slb_esid),
437
438 TP_STRUCT__entry(
439 __field( u64, slb_vsid )
440 __field( u64, slb_esid )
441 ),
442
443 TP_fast_assign(
444 __entry->slb_vsid = slb_vsid;
445 __entry->slb_esid = slb_esid;
446 ),
447
448 TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
449);
450
451#endif /* CONFIG_PPC_BOOK3S */
452
453
454/*************************************************************************
455 * Book3E trace points *
456 *************************************************************************/
457
458#ifdef CONFIG_BOOKE
459
460TRACE_EVENT(kvm_booke206_stlb_write,
461 TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
462 TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
463
464 TP_STRUCT__entry(
465 __field( __u32, mas0 )
466 __field( __u32, mas8 )
467 __field( __u32, mas1 )
468 __field( __u64, mas2 )
469 __field( __u64, mas7_3 )
470 ),
471
472 TP_fast_assign(
473 __entry->mas0 = mas0;
474 __entry->mas8 = mas8;
475 __entry->mas1 = mas1;
476 __entry->mas2 = mas2;
477 __entry->mas7_3 = mas7_3;
478 ),
479
480 TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
481 __entry->mas0, __entry->mas8, __entry->mas1,
482 __entry->mas2, __entry->mas7_3)
483);
484
485TRACE_EVENT(kvm_booke206_gtlb_write,
486 TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
487 TP_ARGS(mas0, mas1, mas2, mas7_3),
488
489 TP_STRUCT__entry(
490 __field( __u32, mas0 )
491 __field( __u32, mas1 )
492 __field( __u64, mas2 )
493 __field( __u64, mas7_3 )
494 ),
495
496 TP_fast_assign(
497 __entry->mas0 = mas0;
498 __entry->mas1 = mas1;
499 __entry->mas2 = mas2;
500 __entry->mas7_3 = mas7_3;
501 ),
502
503 TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
504 __entry->mas0, __entry->mas1,
505 __entry->mas2, __entry->mas7_3)
506);
507
508TRACE_EVENT(kvm_booke206_ref_release,
509 TP_PROTO(__u64 pfn, __u32 flags),
510 TP_ARGS(pfn, flags),
511
512 TP_STRUCT__entry(
513 __field( __u64, pfn )
514 __field( __u32, flags )
515 ),
516
517 TP_fast_assign(
518 __entry->pfn = pfn;
519 __entry->flags = flags;
520 ),
521
522 TP_printk("pfn=%llx flags=%x",
523 __entry->pfn, __entry->flags)
524);
525
526TRACE_EVENT(kvm_booke_queue_irqprio,
527 TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
528 TP_ARGS(vcpu, priority),
529
530 TP_STRUCT__entry(
531 __field( __u32, cpu_nr )
532 __field( __u32, priority )
533 __field( unsigned long, pending )
534 ),
535
536 TP_fast_assign(
537 __entry->cpu_nr = vcpu->vcpu_id;
538 __entry->priority = priority;
539 __entry->pending = vcpu->arch.pending_exceptions;
540 ),
541
542 TP_printk("vcpu=%x prio=%x pending=%lx",
543 __entry->cpu_nr, __entry->priority, __entry->pending)
544);
545
546#endif
547
548#endif /* _TRACE_KVM_H */ 119#endif /* _TRACE_KVM_H */
549 120
550/* This part must be outside protection */ 121/* This part must be outside protection */
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
new file mode 100644
index 000000000000..f7537cf26ce7
--- /dev/null
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -0,0 +1,177 @@
1#if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_KVM_BOOKE_H
3
4#include <linux/tracepoint.h>
5
6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM kvm_booke
8#define TRACE_INCLUDE_PATH .
9#define TRACE_INCLUDE_FILE trace_booke
10
11#define kvm_trace_symbol_exit \
12 {0, "CRITICAL"}, \
13 {1, "MACHINE_CHECK"}, \
14 {2, "DATA_STORAGE"}, \
15 {3, "INST_STORAGE"}, \
16 {4, "EXTERNAL"}, \
17 {5, "ALIGNMENT"}, \
18 {6, "PROGRAM"}, \
19 {7, "FP_UNAVAIL"}, \
20 {8, "SYSCALL"}, \
21 {9, "AP_UNAVAIL"}, \
22 {10, "DECREMENTER"}, \
23 {11, "FIT"}, \
24 {12, "WATCHDOG"}, \
25 {13, "DTLB_MISS"}, \
26 {14, "ITLB_MISS"}, \
27 {15, "DEBUG"}, \
28 {32, "SPE_UNAVAIL"}, \
29 {33, "SPE_FP_DATA"}, \
30 {34, "SPE_FP_ROUND"}, \
31 {35, "PERFORMANCE_MONITOR"}, \
32 {36, "DOORBELL"}, \
33 {37, "DOORBELL_CRITICAL"}, \
34 {38, "GUEST_DBELL"}, \
35 {39, "GUEST_DBELL_CRIT"}, \
36 {40, "HV_SYSCALL"}, \
37 {41, "HV_PRIV"}
38
39TRACE_EVENT(kvm_exit,
40 TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
41 TP_ARGS(exit_nr, vcpu),
42
43 TP_STRUCT__entry(
44 __field( unsigned int, exit_nr )
45 __field( unsigned long, pc )
46 __field( unsigned long, msr )
47 __field( unsigned long, dar )
48 __field( unsigned long, last_inst )
49 ),
50
51 TP_fast_assign(
52 __entry->exit_nr = exit_nr;
53 __entry->pc = kvmppc_get_pc(vcpu);
54 __entry->dar = kvmppc_get_fault_dar(vcpu);
55 __entry->msr = vcpu->arch.shared->msr;
56 __entry->last_inst = vcpu->arch.last_inst;
57 ),
58
59 TP_printk("exit=%s"
60 " | pc=0x%lx"
61 " | msr=0x%lx"
62 " | dar=0x%lx"
63 " | last_inst=0x%lx"
64 ,
65 __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
66 __entry->pc,
67 __entry->msr,
68 __entry->dar,
69 __entry->last_inst
70 )
71);
72
73TRACE_EVENT(kvm_unmap_hva,
74 TP_PROTO(unsigned long hva),
75 TP_ARGS(hva),
76
77 TP_STRUCT__entry(
78 __field( unsigned long, hva )
79 ),
80
81 TP_fast_assign(
82 __entry->hva = hva;
83 ),
84
85 TP_printk("unmap hva 0x%lx\n", __entry->hva)
86);
87
88TRACE_EVENT(kvm_booke206_stlb_write,
89 TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
90 TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
91
92 TP_STRUCT__entry(
93 __field( __u32, mas0 )
94 __field( __u32, mas8 )
95 __field( __u32, mas1 )
96 __field( __u64, mas2 )
97 __field( __u64, mas7_3 )
98 ),
99
100 TP_fast_assign(
101 __entry->mas0 = mas0;
102 __entry->mas8 = mas8;
103 __entry->mas1 = mas1;
104 __entry->mas2 = mas2;
105 __entry->mas7_3 = mas7_3;
106 ),
107
108 TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
109 __entry->mas0, __entry->mas8, __entry->mas1,
110 __entry->mas2, __entry->mas7_3)
111);
112
113TRACE_EVENT(kvm_booke206_gtlb_write,
114 TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
115 TP_ARGS(mas0, mas1, mas2, mas7_3),
116
117 TP_STRUCT__entry(
118 __field( __u32, mas0 )
119 __field( __u32, mas1 )
120 __field( __u64, mas2 )
121 __field( __u64, mas7_3 )
122 ),
123
124 TP_fast_assign(
125 __entry->mas0 = mas0;
126 __entry->mas1 = mas1;
127 __entry->mas2 = mas2;
128 __entry->mas7_3 = mas7_3;
129 ),
130
131 TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
132 __entry->mas0, __entry->mas1,
133 __entry->mas2, __entry->mas7_3)
134);
135
136TRACE_EVENT(kvm_booke206_ref_release,
137 TP_PROTO(__u64 pfn, __u32 flags),
138 TP_ARGS(pfn, flags),
139
140 TP_STRUCT__entry(
141 __field( __u64, pfn )
142 __field( __u32, flags )
143 ),
144
145 TP_fast_assign(
146 __entry->pfn = pfn;
147 __entry->flags = flags;
148 ),
149
150 TP_printk("pfn=%llx flags=%x",
151 __entry->pfn, __entry->flags)
152);
153
154TRACE_EVENT(kvm_booke_queue_irqprio,
155 TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
156 TP_ARGS(vcpu, priority),
157
158 TP_STRUCT__entry(
159 __field( __u32, cpu_nr )
160 __field( __u32, priority )
161 __field( unsigned long, pending )
162 ),
163
164 TP_fast_assign(
165 __entry->cpu_nr = vcpu->vcpu_id;
166 __entry->priority = priority;
167 __entry->pending = vcpu->arch.pending_exceptions;
168 ),
169
170 TP_printk("vcpu=%x prio=%x pending=%lx",
171 __entry->cpu_nr, __entry->priority, __entry->pending)
172);
173
174#endif
175
176/* This part must be outside protection */
177#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
new file mode 100644
index 000000000000..8b22e4748344
--- /dev/null
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -0,0 +1,297 @@
1
2#if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ)
3#define _TRACE_KVM_PR_H
4
5#include <linux/tracepoint.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM kvm_pr
9#define TRACE_INCLUDE_PATH .
10#define TRACE_INCLUDE_FILE trace_pr
11
12#define kvm_trace_symbol_exit \
13 {0x100, "SYSTEM_RESET"}, \
14 {0x200, "MACHINE_CHECK"}, \
15 {0x300, "DATA_STORAGE"}, \
16 {0x380, "DATA_SEGMENT"}, \
17 {0x400, "INST_STORAGE"}, \
18 {0x480, "INST_SEGMENT"}, \
19 {0x500, "EXTERNAL"}, \
20 {0x501, "EXTERNAL_LEVEL"}, \
21 {0x502, "EXTERNAL_HV"}, \
22 {0x600, "ALIGNMENT"}, \
23 {0x700, "PROGRAM"}, \
24 {0x800, "FP_UNAVAIL"}, \
25 {0x900, "DECREMENTER"}, \
26 {0x980, "HV_DECREMENTER"}, \
27 {0xc00, "SYSCALL"}, \
28 {0xd00, "TRACE"}, \
29 {0xe00, "H_DATA_STORAGE"}, \
30 {0xe20, "H_INST_STORAGE"}, \
31 {0xe40, "H_EMUL_ASSIST"}, \
32 {0xf00, "PERFMON"}, \
33 {0xf20, "ALTIVEC"}, \
34 {0xf40, "VSX"}
35
36TRACE_EVENT(kvm_book3s_reenter,
37 TP_PROTO(int r, struct kvm_vcpu *vcpu),
38 TP_ARGS(r, vcpu),
39
40 TP_STRUCT__entry(
41 __field( unsigned int, r )
42 __field( unsigned long, pc )
43 ),
44
45 TP_fast_assign(
46 __entry->r = r;
47 __entry->pc = kvmppc_get_pc(vcpu);
48 ),
49
50 TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
51);
52
53#ifdef CONFIG_PPC_BOOK3S_64
54
55TRACE_EVENT(kvm_book3s_64_mmu_map,
56 TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
57 struct kvmppc_pte *orig_pte),
58 TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
59
60 TP_STRUCT__entry(
61 __field( unsigned char, flag_w )
62 __field( unsigned char, flag_x )
63 __field( unsigned long, eaddr )
64 __field( unsigned long, hpteg )
65 __field( unsigned long, va )
66 __field( unsigned long long, vpage )
67 __field( unsigned long, hpaddr )
68 ),
69
70 TP_fast_assign(
71 __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
72 __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
73 __entry->eaddr = orig_pte->eaddr;
74 __entry->hpteg = hpteg;
75 __entry->va = va;
76 __entry->vpage = orig_pte->vpage;
77 __entry->hpaddr = hpaddr;
78 ),
79
80 TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
81 __entry->flag_w, __entry->flag_x, __entry->eaddr,
82 __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
83);
84
85#endif /* CONFIG_PPC_BOOK3S_64 */
86
87TRACE_EVENT(kvm_book3s_mmu_map,
88 TP_PROTO(struct hpte_cache *pte),
89 TP_ARGS(pte),
90
91 TP_STRUCT__entry(
92 __field( u64, host_vpn )
93 __field( u64, pfn )
94 __field( ulong, eaddr )
95 __field( u64, vpage )
96 __field( ulong, raddr )
97 __field( int, flags )
98 ),
99
100 TP_fast_assign(
101 __entry->host_vpn = pte->host_vpn;
102 __entry->pfn = pte->pfn;
103 __entry->eaddr = pte->pte.eaddr;
104 __entry->vpage = pte->pte.vpage;
105 __entry->raddr = pte->pte.raddr;
106 __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
107 (pte->pte.may_write ? 0x2 : 0) |
108 (pte->pte.may_execute ? 0x1 : 0);
109 ),
110
111 TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
112 __entry->host_vpn, __entry->pfn, __entry->eaddr,
113 __entry->vpage, __entry->raddr, __entry->flags)
114);
115
116TRACE_EVENT(kvm_book3s_mmu_invalidate,
117 TP_PROTO(struct hpte_cache *pte),
118 TP_ARGS(pte),
119
120 TP_STRUCT__entry(
121 __field( u64, host_vpn )
122 __field( u64, pfn )
123 __field( ulong, eaddr )
124 __field( u64, vpage )
125 __field( ulong, raddr )
126 __field( int, flags )
127 ),
128
129 TP_fast_assign(
130 __entry->host_vpn = pte->host_vpn;
131 __entry->pfn = pte->pfn;
132 __entry->eaddr = pte->pte.eaddr;
133 __entry->vpage = pte->pte.vpage;
134 __entry->raddr = pte->pte.raddr;
135 __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
136 (pte->pte.may_write ? 0x2 : 0) |
137 (pte->pte.may_execute ? 0x1 : 0);
138 ),
139
140 TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
141 __entry->host_vpn, __entry->pfn, __entry->eaddr,
142 __entry->vpage, __entry->raddr, __entry->flags)
143);
144
145TRACE_EVENT(kvm_book3s_mmu_flush,
146 TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
147 unsigned long long p2),
148 TP_ARGS(type, vcpu, p1, p2),
149
150 TP_STRUCT__entry(
151 __field( int, count )
152 __field( unsigned long long, p1 )
153 __field( unsigned long long, p2 )
154 __field( const char *, type )
155 ),
156
157 TP_fast_assign(
158 __entry->count = to_book3s(vcpu)->hpte_cache_count;
159 __entry->p1 = p1;
160 __entry->p2 = p2;
161 __entry->type = type;
162 ),
163
164 TP_printk("Flush %d %sPTEs: %llx - %llx",
165 __entry->count, __entry->type, __entry->p1, __entry->p2)
166);
167
168TRACE_EVENT(kvm_book3s_slb_found,
169 TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
170 TP_ARGS(gvsid, hvsid),
171
172 TP_STRUCT__entry(
173 __field( unsigned long long, gvsid )
174 __field( unsigned long long, hvsid )
175 ),
176
177 TP_fast_assign(
178 __entry->gvsid = gvsid;
179 __entry->hvsid = hvsid;
180 ),
181
182 TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
183);
184
185TRACE_EVENT(kvm_book3s_slb_fail,
186 TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
187 TP_ARGS(sid_map_mask, gvsid),
188
189 TP_STRUCT__entry(
190 __field( unsigned short, sid_map_mask )
191 __field( unsigned long long, gvsid )
192 ),
193
194 TP_fast_assign(
195 __entry->sid_map_mask = sid_map_mask;
196 __entry->gvsid = gvsid;
197 ),
198
199 TP_printk("%x/%x: %llx", __entry->sid_map_mask,
200 SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
201);
202
203TRACE_EVENT(kvm_book3s_slb_map,
204 TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
205 unsigned long long hvsid),
206 TP_ARGS(sid_map_mask, gvsid, hvsid),
207
208 TP_STRUCT__entry(
209 __field( unsigned short, sid_map_mask )
210 __field( unsigned long long, guest_vsid )
211 __field( unsigned long long, host_vsid )
212 ),
213
214 TP_fast_assign(
215 __entry->sid_map_mask = sid_map_mask;
216 __entry->guest_vsid = gvsid;
217 __entry->host_vsid = hvsid;
218 ),
219
220 TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
221 __entry->guest_vsid, __entry->host_vsid)
222);
223
224TRACE_EVENT(kvm_book3s_slbmte,
225 TP_PROTO(u64 slb_vsid, u64 slb_esid),
226 TP_ARGS(slb_vsid, slb_esid),
227
228 TP_STRUCT__entry(
229 __field( u64, slb_vsid )
230 __field( u64, slb_esid )
231 ),
232
233 TP_fast_assign(
234 __entry->slb_vsid = slb_vsid;
235 __entry->slb_esid = slb_esid;
236 ),
237
238 TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
239);
240
241TRACE_EVENT(kvm_exit,
242 TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
243 TP_ARGS(exit_nr, vcpu),
244
245 TP_STRUCT__entry(
246 __field( unsigned int, exit_nr )
247 __field( unsigned long, pc )
248 __field( unsigned long, msr )
249 __field( unsigned long, dar )
250 __field( unsigned long, srr1 )
251 __field( unsigned long, last_inst )
252 ),
253
254 TP_fast_assign(
255 __entry->exit_nr = exit_nr;
256 __entry->pc = kvmppc_get_pc(vcpu);
257 __entry->dar = kvmppc_get_fault_dar(vcpu);
258 __entry->msr = vcpu->arch.shared->msr;
259 __entry->srr1 = vcpu->arch.shadow_srr1;
260 __entry->last_inst = vcpu->arch.last_inst;
261 ),
262
263 TP_printk("exit=%s"
264 " | pc=0x%lx"
265 " | msr=0x%lx"
266 " | dar=0x%lx"
267 " | srr1=0x%lx"
268 " | last_inst=0x%lx"
269 ,
270 __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
271 __entry->pc,
272 __entry->msr,
273 __entry->dar,
274 __entry->srr1,
275 __entry->last_inst
276 )
277);
278
279TRACE_EVENT(kvm_unmap_hva,
280 TP_PROTO(unsigned long hva),
281 TP_ARGS(hva),
282
283 TP_STRUCT__entry(
284 __field( unsigned long, hva )
285 ),
286
287 TP_fast_assign(
288 __entry->hva = hva;
289 ),
290
291 TP_printk("unmap hva 0x%lx\n", __entry->hva)
292);
293
294#endif /* _TRACE_KVM_H */
295
296/* This part must be outside protection */
297#include <trace/define_trace.h>
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index e87ecaa2c569..d5bc3750616e 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -38,13 +38,6 @@ struct sca_block {
38 struct sca_entry cpu[64]; 38 struct sca_entry cpu[64];
39} __attribute__((packed)); 39} __attribute__((packed));
40 40
41#define KVM_NR_PAGE_SIZES 2
42#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8)
43#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
44#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
45#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
46#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
47
48#define CPUSTAT_STOPPED 0x80000000 41#define CPUSTAT_STOPPED 0x80000000
49#define CPUSTAT_WAIT 0x10000000 42#define CPUSTAT_WAIT 0x10000000
50#define CPUSTAT_ECALL_PEND 0x08000000 43#define CPUSTAT_ECALL_PEND 0x08000000
@@ -220,7 +213,6 @@ struct kvm_s390_interrupt_info {
220/* for local_interrupt.action_flags */ 213/* for local_interrupt.action_flags */
221#define ACTION_STORE_ON_STOP (1<<0) 214#define ACTION_STORE_ON_STOP (1<<0)
222#define ACTION_STOP_ON_STOP (1<<1) 215#define ACTION_STOP_ON_STOP (1<<1)
223#define ACTION_RELOADVCPU_ON_STOP (1<<2)
224 216
225struct kvm_s390_local_interrupt { 217struct kvm_s390_local_interrupt {
226 spinlock_t lock; 218 spinlock_t lock;
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 3a74d8af0d69..78d967f180f4 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -107,14 +107,13 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
107 107
108static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) 108static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
109{ 109{
110 int ret, idx; 110 int ret;
111 111
112 /* No virtio-ccw notification? Get out quickly. */ 112 /* No virtio-ccw notification? Get out quickly. */
113 if (!vcpu->kvm->arch.css_support || 113 if (!vcpu->kvm->arch.css_support ||
114 (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) 114 (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
115 return -EOPNOTSUPP; 115 return -EOPNOTSUPP;
116 116
117 idx = srcu_read_lock(&vcpu->kvm->srcu);
118 /* 117 /*
119 * The layout is as follows: 118 * The layout is as follows:
120 * - gpr 2 contains the subchannel id (passed as addr) 119 * - gpr 2 contains the subchannel id (passed as addr)
@@ -125,7 +124,6 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
125 vcpu->run->s.regs.gprs[2], 124 vcpu->run->s.regs.gprs[2],
126 8, &vcpu->run->s.regs.gprs[3], 125 8, &vcpu->run->s.regs.gprs[3],
127 vcpu->run->s.regs.gprs[4]); 126 vcpu->run->s.regs.gprs[4]);
128 srcu_read_unlock(&vcpu->kvm->srcu, idx);
129 127
130 /* 128 /*
131 * Return cookie in gpr 2, but don't overwrite the register if the 129 * Return cookie in gpr 2, but don't overwrite the register if the
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 99d789e8a018..374a439ccc60 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -18,20 +18,27 @@
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include "kvm-s390.h" 19#include "kvm-s390.h"
20 20
21/* Convert real to absolute address by applying the prefix of the CPU */
22static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
23 unsigned long gaddr)
24{
25 unsigned long prefix = vcpu->arch.sie_block->prefix;
26 if (gaddr < 2 * PAGE_SIZE)
27 gaddr += prefix;
28 else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE)
29 gaddr -= prefix;
30 return gaddr;
31}
32
21static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, 33static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
22 void __user *gptr, 34 void __user *gptr,
23 int prefixing) 35 int prefixing)
24{ 36{
25 unsigned long prefix = vcpu->arch.sie_block->prefix;
26 unsigned long gaddr = (unsigned long) gptr; 37 unsigned long gaddr = (unsigned long) gptr;
27 unsigned long uaddr; 38 unsigned long uaddr;
28 39
29 if (prefixing) { 40 if (prefixing)
30 if (gaddr < 2 * PAGE_SIZE) 41 gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
31 gaddr += prefix;
32 else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE))
33 gaddr -= prefix;
34 }
35 uaddr = gmap_fault(gaddr, vcpu->arch.gmap); 42 uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
36 if (IS_ERR_VALUE(uaddr)) 43 if (IS_ERR_VALUE(uaddr))
37 uaddr = -EFAULT; 44 uaddr = -EFAULT;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 5ee56e5acc23..5ddbbde6f65c 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -62,12 +62,6 @@ static int handle_stop(struct kvm_vcpu *vcpu)
62 62
63 trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); 63 trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
64 64
65 if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
66 vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
67 rc = SIE_INTERCEPT_RERUNVCPU;
68 vcpu->run->exit_reason = KVM_EXIT_INTR;
69 }
70
71 if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { 65 if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
72 atomic_set_mask(CPUSTAT_STOPPED, 66 atomic_set_mask(CPUSTAT_STOPPED,
73 &vcpu->arch.sie_block->cpuflags); 67 &vcpu->arch.sie_block->cpuflags);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 7f1f7ac5cf7f..5f79d2d79ca7 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -436,6 +436,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
436 hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); 436 hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
437 VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); 437 VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
438no_timer: 438no_timer:
439 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
439 spin_lock(&vcpu->arch.local_int.float_int->lock); 440 spin_lock(&vcpu->arch.local_int.float_int->lock);
440 spin_lock_bh(&vcpu->arch.local_int.lock); 441 spin_lock_bh(&vcpu->arch.local_int.lock);
441 add_wait_queue(&vcpu->wq, &wait); 442 add_wait_queue(&vcpu->wq, &wait);
@@ -455,6 +456,8 @@ no_timer:
455 remove_wait_queue(&vcpu->wq, &wait); 456 remove_wait_queue(&vcpu->wq, &wait);
456 spin_unlock_bh(&vcpu->arch.local_int.lock); 457 spin_unlock_bh(&vcpu->arch.local_int.lock);
457 spin_unlock(&vcpu->arch.local_int.float_int->lock); 458 spin_unlock(&vcpu->arch.local_int.float_int->lock);
459 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
460
458 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); 461 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
459 return 0; 462 return 0;
460} 463}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ed8064cb5c49..569494e01ec6 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -695,9 +695,9 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
695 return 0; 695 return 0;
696} 696}
697 697
698static int __vcpu_run(struct kvm_vcpu *vcpu) 698static int vcpu_pre_run(struct kvm_vcpu *vcpu)
699{ 699{
700 int rc; 700 int rc, cpuflags;
701 701
702 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 702 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
703 703
@@ -715,28 +715,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
715 return rc; 715 return rc;
716 716
717 vcpu->arch.sie_block->icptcode = 0; 717 vcpu->arch.sie_block->icptcode = 0;
718 VCPU_EVENT(vcpu, 6, "entering sie flags %x", 718 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
719 atomic_read(&vcpu->arch.sie_block->cpuflags)); 719 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
720 trace_kvm_s390_sie_enter(vcpu, 720 trace_kvm_s390_sie_enter(vcpu, cpuflags);
721 atomic_read(&vcpu->arch.sie_block->cpuflags));
722 721
723 /* 722 return 0;
724 * As PF_VCPU will be used in fault handler, between guest_enter 723}
725 * and guest_exit should be no uaccess. 724
726 */ 725static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
727 preempt_disable(); 726{
728 kvm_guest_enter(); 727 int rc;
729 preempt_enable();
730 rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
731 kvm_guest_exit();
732 728
733 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 729 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
734 vcpu->arch.sie_block->icptcode); 730 vcpu->arch.sie_block->icptcode);
735 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 731 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
736 732
737 if (rc > 0) 733 if (exit_reason >= 0) {
738 rc = 0; 734 rc = 0;
739 if (rc < 0) { 735 } else {
740 if (kvm_is_ucontrol(vcpu->kvm)) { 736 if (kvm_is_ucontrol(vcpu->kvm)) {
741 rc = SIE_INTERCEPT_UCONTROL; 737 rc = SIE_INTERCEPT_UCONTROL;
742 } else { 738 } else {
@@ -747,6 +743,49 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
747 } 743 }
748 744
749 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 745 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
746
747 if (rc == 0) {
748 if (kvm_is_ucontrol(vcpu->kvm))
749 rc = -EOPNOTSUPP;
750 else
751 rc = kvm_handle_sie_intercept(vcpu);
752 }
753
754 return rc;
755}
756
757static int __vcpu_run(struct kvm_vcpu *vcpu)
758{
759 int rc, exit_reason;
760
761 /*
762 * We try to hold kvm->srcu during most of vcpu_run (except when run-
763 * ning the guest), so that memslots (and other stuff) are protected
764 */
765 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
766
767 do {
768 rc = vcpu_pre_run(vcpu);
769 if (rc)
770 break;
771
772 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
773 /*
774 * As PF_VCPU will be used in fault handler, between
775 * guest_enter and guest_exit should be no uaccess.
776 */
777 preempt_disable();
778 kvm_guest_enter();
779 preempt_enable();
780 exit_reason = sie64a(vcpu->arch.sie_block,
781 vcpu->run->s.regs.gprs);
782 kvm_guest_exit();
783 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
784
785 rc = vcpu_post_run(vcpu, exit_reason);
786 } while (!signal_pending(current) && !rc);
787
788 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
750 return rc; 789 return rc;
751} 790}
752 791
@@ -755,7 +794,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
755 int rc; 794 int rc;
756 sigset_t sigsaved; 795 sigset_t sigsaved;
757 796
758rerun_vcpu:
759 if (vcpu->sigset_active) 797 if (vcpu->sigset_active)
760 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 798 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
761 799
@@ -788,19 +826,7 @@ rerun_vcpu:
788 } 826 }
789 827
790 might_fault(); 828 might_fault();
791 829 rc = __vcpu_run(vcpu);
792 do {
793 rc = __vcpu_run(vcpu);
794 if (rc)
795 break;
796 if (kvm_is_ucontrol(vcpu->kvm))
797 rc = -EOPNOTSUPP;
798 else
799 rc = kvm_handle_sie_intercept(vcpu);
800 } while (!signal_pending(current) && !rc);
801
802 if (rc == SIE_INTERCEPT_RERUNVCPU)
803 goto rerun_vcpu;
804 830
805 if (signal_pending(current) && !rc) { 831 if (signal_pending(current) && !rc) {
806 kvm_run->exit_reason = KVM_EXIT_INTR; 832 kvm_run->exit_reason = KVM_EXIT_INTR;
@@ -958,6 +984,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
958{ 984{
959 struct kvm_vcpu *vcpu = filp->private_data; 985 struct kvm_vcpu *vcpu = filp->private_data;
960 void __user *argp = (void __user *)arg; 986 void __user *argp = (void __user *)arg;
987 int idx;
961 long r; 988 long r;
962 989
963 switch (ioctl) { 990 switch (ioctl) {
@@ -971,7 +998,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
971 break; 998 break;
972 } 999 }
973 case KVM_S390_STORE_STATUS: 1000 case KVM_S390_STORE_STATUS:
1001 idx = srcu_read_lock(&vcpu->kvm->srcu);
974 r = kvm_s390_vcpu_store_status(vcpu, arg); 1002 r = kvm_s390_vcpu_store_status(vcpu, arg);
1003 srcu_read_unlock(&vcpu->kvm->srcu, idx);
975 break; 1004 break;
976 case KVM_S390_SET_INITIAL_PSW: { 1005 case KVM_S390_SET_INITIAL_PSW: {
977 psw_t psw; 1006 psw_t psw;
@@ -1067,12 +1096,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1067 return VM_FAULT_SIGBUS; 1096 return VM_FAULT_SIGBUS;
1068} 1097}
1069 1098
1070void kvm_arch_free_memslot(struct kvm_memory_slot *free, 1099void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1071 struct kvm_memory_slot *dont) 1100 struct kvm_memory_slot *dont)
1072{ 1101{
1073} 1102}
1074 1103
1075int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 1104int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1105 unsigned long npages)
1076{ 1106{
1077 return 0; 1107 return 0;
1078} 1108}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index dc99f1ca4267..b44912a32949 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -28,8 +28,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
28extern unsigned long *vfacilities; 28extern unsigned long *vfacilities;
29 29
30/* negativ values are error codes, positive values for internal conditions */ 30/* negativ values are error codes, positive values for internal conditions */
31#define SIE_INTERCEPT_RERUNVCPU (1<<0) 31#define SIE_INTERCEPT_UCONTROL (1<<0)
32#define SIE_INTERCEPT_UCONTROL (1<<1)
33int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); 32int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
34 33
35#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ 34#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
@@ -91,8 +90,10 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
91 90
92static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) 91static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
93{ 92{
94 *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; 93 if (r1)
95 *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; 94 *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
95 if (r2)
96 *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
96} 97}
97 98
98static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) 99static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 59200ee275e5..2440602e6df1 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -30,6 +30,38 @@
30#include "kvm-s390.h" 30#include "kvm-s390.h"
31#include "trace.h" 31#include "trace.h"
32 32
33/* Handle SCK (SET CLOCK) interception */
34static int handle_set_clock(struct kvm_vcpu *vcpu)
35{
36 struct kvm_vcpu *cpup;
37 s64 hostclk, val;
38 u64 op2;
39 int i;
40
41 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
42 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
43
44 op2 = kvm_s390_get_base_disp_s(vcpu);
45 if (op2 & 7) /* Operand must be on a doubleword boundary */
46 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
47 if (get_guest(vcpu, val, (u64 __user *) op2))
48 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
49
50 if (store_tod_clock(&hostclk)) {
51 kvm_s390_set_psw_cc(vcpu, 3);
52 return 0;
53 }
54 val = (val - hostclk) & ~0x3fUL;
55
56 mutex_lock(&vcpu->kvm->lock);
57 kvm_for_each_vcpu(i, cpup, vcpu->kvm)
58 cpup->arch.sie_block->epoch = val;
59 mutex_unlock(&vcpu->kvm->lock);
60
61 kvm_s390_set_psw_cc(vcpu, 0);
62 return 0;
63}
64
33static int handle_set_prefix(struct kvm_vcpu *vcpu) 65static int handle_set_prefix(struct kvm_vcpu *vcpu)
34{ 66{
35 u64 operand2; 67 u64 operand2;
@@ -128,6 +160,33 @@ static int handle_skey(struct kvm_vcpu *vcpu)
128 return 0; 160 return 0;
129} 161}
130 162
163static int handle_test_block(struct kvm_vcpu *vcpu)
164{
165 unsigned long hva;
166 gpa_t addr;
167 int reg2;
168
169 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
170 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
171
172 kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
173 addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
174 addr = kvm_s390_real_to_abs(vcpu, addr);
175
176 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
177 if (kvm_is_error_hva(hva))
178 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
179 /*
180 * We don't expect errors on modern systems, and do not care
181 * about storage keys (yet), so let's just clear the page.
182 */
183 if (clear_user((void __user *)hva, PAGE_SIZE) != 0)
184 return -EFAULT;
185 kvm_s390_set_psw_cc(vcpu, 0);
186 vcpu->run->s.regs.gprs[0] = 0;
187 return 0;
188}
189
131static int handle_tpi(struct kvm_vcpu *vcpu) 190static int handle_tpi(struct kvm_vcpu *vcpu)
132{ 191{
133 struct kvm_s390_interrupt_info *inti; 192 struct kvm_s390_interrupt_info *inti;
@@ -438,12 +497,14 @@ out_exception:
438 497
439static const intercept_handler_t b2_handlers[256] = { 498static const intercept_handler_t b2_handlers[256] = {
440 [0x02] = handle_stidp, 499 [0x02] = handle_stidp,
500 [0x04] = handle_set_clock,
441 [0x10] = handle_set_prefix, 501 [0x10] = handle_set_prefix,
442 [0x11] = handle_store_prefix, 502 [0x11] = handle_store_prefix,
443 [0x12] = handle_store_cpu_address, 503 [0x12] = handle_store_cpu_address,
444 [0x29] = handle_skey, 504 [0x29] = handle_skey,
445 [0x2a] = handle_skey, 505 [0x2a] = handle_skey,
446 [0x2b] = handle_skey, 506 [0x2b] = handle_skey,
507 [0x2c] = handle_test_block,
447 [0x30] = handle_io_inst, 508 [0x30] = handle_io_inst,
448 [0x31] = handle_io_inst, 509 [0x31] = handle_io_inst,
449 [0x32] = handle_io_inst, 510 [0x32] = handle_io_inst,
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 15f960c06ff7..24ec1216596e 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -274,13 +274,17 @@ struct x86_emulate_ctxt {
274 274
275 bool guest_mode; /* guest running a nested guest */ 275 bool guest_mode; /* guest running a nested guest */
276 bool perm_ok; /* do not check permissions if true */ 276 bool perm_ok; /* do not check permissions if true */
277 bool only_vendor_specific_insn; 277 bool ud; /* inject an #UD if host doesn't support insn */
278 278
279 bool have_exception; 279 bool have_exception;
280 struct x86_exception exception; 280 struct x86_exception exception;
281 281
282 /* decode cache */ 282 /*
283 u8 twobyte; 283 * decode cache
284 */
285
286 /* current opcode length in bytes */
287 u8 opcode_len;
284 u8 b; 288 u8 b;
285 u8 intercept; 289 u8 intercept;
286 u8 lock_prefix; 290 u8 lock_prefix;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c76ff74a98f2..ae5d7830855c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
79#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 79#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
80#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) 80#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
81 81
82static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
83{
84 /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
85 return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
86 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
87}
88
82#define SELECTOR_TI_MASK (1 << 2) 89#define SELECTOR_TI_MASK (1 << 2)
83#define SELECTOR_RPL_MASK 0x03 90#define SELECTOR_RPL_MASK 0x03
84 91
@@ -253,7 +260,6 @@ struct kvm_pio_request {
253 * mode. 260 * mode.
254 */ 261 */
255struct kvm_mmu { 262struct kvm_mmu {
256 void (*new_cr3)(struct kvm_vcpu *vcpu);
257 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); 263 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
258 unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); 264 unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
259 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); 265 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
@@ -261,7 +267,6 @@ struct kvm_mmu {
261 bool prefault); 267 bool prefault);
262 void (*inject_page_fault)(struct kvm_vcpu *vcpu, 268 void (*inject_page_fault)(struct kvm_vcpu *vcpu,
263 struct x86_exception *fault); 269 struct x86_exception *fault);
264 void (*free)(struct kvm_vcpu *vcpu);
265 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, 270 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
266 struct x86_exception *exception); 271 struct x86_exception *exception);
267 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); 272 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
@@ -389,6 +394,8 @@ struct kvm_vcpu_arch {
389 394
390 struct fpu guest_fpu; 395 struct fpu guest_fpu;
391 u64 xcr0; 396 u64 xcr0;
397 u64 guest_supported_xcr0;
398 u32 guest_xstate_size;
392 399
393 struct kvm_pio_request pio; 400 struct kvm_pio_request pio;
394 void *pio_data; 401 void *pio_data;
@@ -557,7 +564,9 @@ struct kvm_arch {
557 564
558 struct list_head assigned_dev_head; 565 struct list_head assigned_dev_head;
559 struct iommu_domain *iommu_domain; 566 struct iommu_domain *iommu_domain;
560 int iommu_flags; 567 bool iommu_noncoherent;
568#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
569 atomic_t noncoherent_dma_count;
561 struct kvm_pic *vpic; 570 struct kvm_pic *vpic;
562 struct kvm_ioapic *vioapic; 571 struct kvm_ioapic *vioapic;
563 struct kvm_pit *vpit; 572 struct kvm_pit *vpit;
@@ -780,11 +789,11 @@ void kvm_mmu_module_exit(void);
780 789
781void kvm_mmu_destroy(struct kvm_vcpu *vcpu); 790void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
782int kvm_mmu_create(struct kvm_vcpu *vcpu); 791int kvm_mmu_create(struct kvm_vcpu *vcpu);
783int kvm_mmu_setup(struct kvm_vcpu *vcpu); 792void kvm_mmu_setup(struct kvm_vcpu *vcpu);
784void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 793void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
785 u64 dirty_mask, u64 nx_mask, u64 x_mask); 794 u64 dirty_mask, u64 nx_mask, u64 x_mask);
786 795
787int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 796void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
788void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 797void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
789void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, 798void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
790 struct kvm_memory_slot *slot, 799 struct kvm_memory_slot *slot,
@@ -922,13 +931,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
922int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, 931int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
923 void *insn, int insn_len); 932 void *insn, int insn_len);
924void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); 933void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
934void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
925 935
926void kvm_enable_tdp(void); 936void kvm_enable_tdp(void);
927void kvm_disable_tdp(void); 937void kvm_disable_tdp(void);
928 938
929int complete_pio(struct kvm_vcpu *vcpu);
930bool kvm_check_iopl(struct kvm_vcpu *vcpu);
931
932static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 939static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
933{ 940{
934 return gpa; 941 return gpa;
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index be8269b00e2a..d6b078e9fa28 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -14,6 +14,8 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
14 struct timespec *ts); 14 struct timespec *ts);
15void pvclock_resume(void); 15void pvclock_resume(void);
16 16
17void pvclock_touch_watchdogs(void);
18
17/* 19/*
18 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 20 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
19 * yielding a 64-bit result. 21 * yielding a 64-bit result.
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5d9a3033b3d7..d3a87780c70b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 {
211 __u32 padding[3]; 211 __u32 padding[3];
212}; 212};
213 213
214#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 214#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
215#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 215#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
216#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 216#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
217 217
218/* for KVM_SET_CPUID2 */ 218/* for KVM_SET_CPUID2 */
219struct kvm_cpuid2 { 219struct kvm_cpuid2 {
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bb0465090ae5..b93e09a0fa21 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -536,6 +536,7 @@
536 536
537/* MSR_IA32_VMX_MISC bits */ 537/* MSR_IA32_VMX_MISC bits */
538#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) 538#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
539#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
539/* AMD-V MSRs */ 540/* AMD-V MSRs */
540 541
541#define MSR_VM_CR 0xc0010114 542#define MSR_VM_CR 0xc0010114
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1570e0741344..e6041094ff26 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -139,6 +139,7 @@ bool kvm_check_and_clear_guest_paused(void)
139 src = &hv_clock[cpu].pvti; 139 src = &hv_clock[cpu].pvti;
140 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { 140 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
141 src->flags &= ~PVCLOCK_GUEST_STOPPED; 141 src->flags &= ~PVCLOCK_GUEST_STOPPED;
142 pvclock_touch_watchdogs();
142 ret = true; 143 ret = true;
143 } 144 }
144 145
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index a16bae3f83b3..2f355d229a58 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -43,6 +43,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
43 return pv_tsc_khz; 43 return pv_tsc_khz;
44} 44}
45 45
46void pvclock_touch_watchdogs(void)
47{
48 touch_softlockup_watchdog_sync();
49 clocksource_touch_watchdog();
50 rcu_cpu_stall_reset();
51 reset_hung_task_detector();
52}
53
46static atomic64_t last_value = ATOMIC64_INIT(0); 54static atomic64_t last_value = ATOMIC64_INIT(0);
47 55
48void pvclock_resume(void) 56void pvclock_resume(void)
@@ -74,6 +82,11 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
74 version = __pvclock_read_cycles(src, &ret, &flags); 82 version = __pvclock_read_cycles(src, &ret, &flags);
75 } while ((src->version & 1) || version != src->version); 83 } while ((src->version & 1) || version != src->version);
76 84
85 if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
86 src->flags &= ~PVCLOCK_GUEST_STOPPED;
87 pvclock_touch_watchdogs();
88 }
89
77 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 90 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
78 (flags & PVCLOCK_TSC_STABLE_BIT)) 91 (flags & PVCLOCK_TSC_STABLE_BIT))
79 return ret; 92 return ret;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a47a3e54b964..b89c5db2b832 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -38,6 +38,7 @@ config KVM
38 select PERF_EVENTS 38 select PERF_EVENTS
39 select HAVE_KVM_MSI 39 select HAVE_KVM_MSI
40 select HAVE_KVM_CPU_RELAX_INTERCEPT 40 select HAVE_KVM_CPU_RELAX_INTERCEPT
41 select KVM_VFIO
41 ---help--- 42 ---help---
42 Support hosting fully virtualized guest machines using hardware 43 Support hosting fully virtualized guest machines using hardware
43 virtualization extensions. You will need a fairly recent 44 virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index bf4fb04d0112..25d22b2d6509 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,7 +9,7 @@ KVM := ../../../virt/kvm
9 9
10kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ 10kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \
11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ 11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
12 $(KVM)/eventfd.o $(KVM)/irqchip.o 12 $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
13kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o 13kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o
14kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o 14kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
15 15
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b110fe6c03d4..c6976257eff5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,6 +23,26 @@
23#include "mmu.h" 23#include "mmu.h"
24#include "trace.h" 24#include "trace.h"
25 25
26static u32 xstate_required_size(u64 xstate_bv)
27{
28 int feature_bit = 0;
29 u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
30
31 xstate_bv &= ~XSTATE_FPSSE;
32 while (xstate_bv) {
33 if (xstate_bv & 0x1) {
34 u32 eax, ebx, ecx, edx;
35 cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
36 ret = max(ret, eax + ebx);
37 }
38
39 xstate_bv >>= 1;
40 feature_bit++;
41 }
42
43 return ret;
44}
45
26void kvm_update_cpuid(struct kvm_vcpu *vcpu) 46void kvm_update_cpuid(struct kvm_vcpu *vcpu)
27{ 47{
28 struct kvm_cpuid_entry2 *best; 48 struct kvm_cpuid_entry2 *best;
@@ -46,6 +66,18 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
46 apic->lapic_timer.timer_mode_mask = 1 << 17; 66 apic->lapic_timer.timer_mode_mask = 1 << 17;
47 } 67 }
48 68
69 best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
70 if (!best) {
71 vcpu->arch.guest_supported_xcr0 = 0;
72 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
73 } else {
74 vcpu->arch.guest_supported_xcr0 =
75 (best->eax | ((u64)best->edx << 32)) &
76 host_xcr0 & KVM_SUPPORTED_XCR0;
77 vcpu->arch.guest_xstate_size =
78 xstate_required_size(vcpu->arch.guest_supported_xcr0);
79 }
80
49 kvm_pmu_cpuid_update(vcpu); 81 kvm_pmu_cpuid_update(vcpu);
50} 82}
51 83
@@ -182,13 +214,35 @@ static bool supported_xcr0_bit(unsigned bit)
182{ 214{
183 u64 mask = ((u64)1 << bit); 215 u64 mask = ((u64)1 << bit);
184 216
185 return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; 217 return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
186} 218}
187 219
188#define F(x) bit(X86_FEATURE_##x) 220#define F(x) bit(X86_FEATURE_##x)
189 221
190static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 222static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
191 u32 index, int *nent, int maxnent) 223 u32 func, u32 index, int *nent, int maxnent)
224{
225 switch (func) {
226 case 0:
227 entry->eax = 1; /* only one leaf currently */
228 ++*nent;
229 break;
230 case 1:
231 entry->ecx = F(MOVBE);
232 ++*nent;
233 break;
234 default:
235 break;
236 }
237
238 entry->function = func;
239 entry->index = index;
240
241 return 0;
242}
243
244static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
245 u32 index, int *nent, int maxnent)
192{ 246{
193 int r; 247 int r;
194 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 248 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -383,6 +437,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
383 case 0xd: { 437 case 0xd: {
384 int idx, i; 438 int idx, i;
385 439
440 entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
441 entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
386 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 442 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
387 for (idx = 1, i = 1; idx < 64; ++idx) { 443 for (idx = 1, i = 1; idx < 64; ++idx) {
388 if (*nent >= maxnent) 444 if (*nent >= maxnent)
@@ -481,6 +537,15 @@ out:
481 return r; 537 return r;
482} 538}
483 539
540static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
541 u32 idx, int *nent, int maxnent, unsigned int type)
542{
543 if (type == KVM_GET_EMULATED_CPUID)
544 return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
545
546 return __do_cpuid_ent(entry, func, idx, nent, maxnent);
547}
548
484#undef F 549#undef F
485 550
486struct kvm_cpuid_param { 551struct kvm_cpuid_param {
@@ -495,8 +560,36 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
495 return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; 560 return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
496} 561}
497 562
498int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 563static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
499 struct kvm_cpuid_entry2 __user *entries) 564 __u32 num_entries, unsigned int ioctl_type)
565{
566 int i;
567 __u32 pad[3];
568
569 if (ioctl_type != KVM_GET_EMULATED_CPUID)
570 return false;
571
572 /*
573 * We want to make sure that ->padding is being passed clean from
574 * userspace in case we want to use it for something in the future.
575 *
576 * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
577 * have to give ourselves satisfied only with the emulated side. /me
578 * sheds a tear.
579 */
580 for (i = 0; i < num_entries; i++) {
581 if (copy_from_user(pad, entries[i].padding, sizeof(pad)))
582 return true;
583
584 if (pad[0] || pad[1] || pad[2])
585 return true;
586 }
587 return false;
588}
589
590int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
591 struct kvm_cpuid_entry2 __user *entries,
592 unsigned int type)
500{ 593{
501 struct kvm_cpuid_entry2 *cpuid_entries; 594 struct kvm_cpuid_entry2 *cpuid_entries;
502 int limit, nent = 0, r = -E2BIG, i; 595 int limit, nent = 0, r = -E2BIG, i;
@@ -513,8 +606,12 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
513 goto out; 606 goto out;
514 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 607 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
515 cpuid->nent = KVM_MAX_CPUID_ENTRIES; 608 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
609
610 if (sanity_check_entries(entries, cpuid->nent, type))
611 return -EINVAL;
612
516 r = -ENOMEM; 613 r = -ENOMEM;
517 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 614 cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
518 if (!cpuid_entries) 615 if (!cpuid_entries)
519 goto out; 616 goto out;
520 617
@@ -526,7 +623,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
526 continue; 623 continue;
527 624
528 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, 625 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
529 &nent, cpuid->nent); 626 &nent, cpuid->nent, type);
530 627
531 if (r) 628 if (r)
532 goto out_free; 629 goto out_free;
@@ -537,7 +634,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
537 limit = cpuid_entries[nent - 1].eax; 634 limit = cpuid_entries[nent - 1].eax;
538 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) 635 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
539 r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, 636 r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
540 &nent, cpuid->nent); 637 &nent, cpuid->nent, type);
541 638
542 if (r) 639 if (r)
543 goto out_free; 640 goto out_free;
@@ -661,6 +758,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
661 *edx = best->edx; 758 *edx = best->edx;
662 } else 759 } else
663 *eax = *ebx = *ecx = *edx = 0; 760 *eax = *ebx = *ecx = *edx = 0;
761 trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
664} 762}
665EXPORT_SYMBOL_GPL(kvm_cpuid); 763EXPORT_SYMBOL_GPL(kvm_cpuid);
666 764
@@ -676,6 +774,5 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
676 kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); 774 kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
677 kvm_register_write(vcpu, VCPU_REGS_RDX, edx); 775 kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
678 kvm_x86_ops->skip_emulated_instruction(vcpu); 776 kvm_x86_ops->skip_emulated_instruction(vcpu);
679 trace_kvm_cpuid(function, eax, ebx, ecx, edx);
680} 777}
681EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 778EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index b7fd07984888..f1e4895174b2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -6,8 +6,9 @@
6void kvm_update_cpuid(struct kvm_vcpu *vcpu); 6void kvm_update_cpuid(struct kvm_vcpu *vcpu);
7struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 7struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
8 u32 function, u32 index); 8 u32 function, u32 index);
9int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 9int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
10 struct kvm_cpuid_entry2 __user *entries); 10 struct kvm_cpuid_entry2 __user *entries,
11 unsigned int type);
11int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 12int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
12 struct kvm_cpuid *cpuid, 13 struct kvm_cpuid *cpuid,
13 struct kvm_cpuid_entry __user *entries); 14 struct kvm_cpuid_entry __user *entries);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ddc3f3d2afdb..07ffca0a89e9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -130,7 +130,7 @@
130#define Mov (1<<20) 130#define Mov (1<<20)
131/* Misc flags */ 131/* Misc flags */
132#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ 132#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
133#define VendorSpecific (1<<22) /* Vendor specific instruction */ 133#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
134#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ 134#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
135#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ 135#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
136#define Undefined (1<<25) /* No Such Instruction */ 136#define Undefined (1<<25) /* No Such Instruction */
@@ -785,9 +785,10 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
785 * @highbyte_regs specifies whether to decode AH,CH,DH,BH. 785 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
786 */ 786 */
787static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, 787static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
788 int highbyte_regs) 788 int byteop)
789{ 789{
790 void *p; 790 void *p;
791 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
791 792
792 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) 793 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
793 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; 794 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
@@ -1024,7 +1025,6 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1024 struct operand *op) 1025 struct operand *op)
1025{ 1026{
1026 unsigned reg = ctxt->modrm_reg; 1027 unsigned reg = ctxt->modrm_reg;
1027 int highbyte_regs = ctxt->rex_prefix == 0;
1028 1028
1029 if (!(ctxt->d & ModRM)) 1029 if (!(ctxt->d & ModRM))
1030 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); 1030 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
@@ -1045,13 +1045,9 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1045 } 1045 }
1046 1046
1047 op->type = OP_REG; 1047 op->type = OP_REG;
1048 if (ctxt->d & ByteOp) { 1048 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1049 op->addr.reg = decode_register(ctxt, reg, highbyte_regs); 1049 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1050 op->bytes = 1; 1050
1051 } else {
1052 op->addr.reg = decode_register(ctxt, reg, 0);
1053 op->bytes = ctxt->op_bytes;
1054 }
1055 fetch_register_operand(op); 1051 fetch_register_operand(op);
1056 op->orig_val = op->val; 1052 op->orig_val = op->val;
1057} 1053}
@@ -1082,12 +1078,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1082 ctxt->modrm_seg = VCPU_SREG_DS; 1078 ctxt->modrm_seg = VCPU_SREG_DS;
1083 1079
1084 if (ctxt->modrm_mod == 3) { 1080 if (ctxt->modrm_mod == 3) {
1085 int highbyte_regs = ctxt->rex_prefix == 0;
1086
1087 op->type = OP_REG; 1081 op->type = OP_REG;
1088 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 1082 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1089 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1083 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1090 highbyte_regs && (ctxt->d & ByteOp)); 1084 ctxt->d & ByteOp);
1091 if (ctxt->d & Sse) { 1085 if (ctxt->d & Sse) {
1092 op->type = OP_XMM; 1086 op->type = OP_XMM;
1093 op->bytes = 16; 1087 op->bytes = 16;
@@ -2961,6 +2955,46 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
2961 return X86EMUL_CONTINUE; 2955 return X86EMUL_CONTINUE;
2962} 2956}
2963 2957
2958#define FFL(x) bit(X86_FEATURE_##x)
2959
2960static int em_movbe(struct x86_emulate_ctxt *ctxt)
2961{
2962 u32 ebx, ecx, edx, eax = 1;
2963 u16 tmp;
2964
2965 /*
2966 * Check MOVBE is set in the guest-visible CPUID leaf.
2967 */
2968 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2969 if (!(ecx & FFL(MOVBE)))
2970 return emulate_ud(ctxt);
2971
2972 switch (ctxt->op_bytes) {
2973 case 2:
2974 /*
2975 * From MOVBE definition: "...When the operand size is 16 bits,
2976 * the upper word of the destination register remains unchanged
2977 * ..."
2978 *
2979 * Both casting ->valptr and ->val to u16 breaks strict aliasing
2980 * rules so we have to do the operation almost per hand.
2981 */
2982 tmp = (u16)ctxt->src.val;
2983 ctxt->dst.val &= ~0xffffUL;
2984 ctxt->dst.val |= (unsigned long)swab16(tmp);
2985 break;
2986 case 4:
2987 ctxt->dst.val = swab32((u32)ctxt->src.val);
2988 break;
2989 case 8:
2990 ctxt->dst.val = swab64(ctxt->src.val);
2991 break;
2992 default:
2993 return X86EMUL_PROPAGATE_FAULT;
2994 }
2995 return X86EMUL_CONTINUE;
2996}
2997
2964static int em_cr_write(struct x86_emulate_ctxt *ctxt) 2998static int em_cr_write(struct x86_emulate_ctxt *ctxt)
2965{ 2999{
2966 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) 3000 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
@@ -3256,6 +3290,18 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3256 return X86EMUL_CONTINUE; 3290 return X86EMUL_CONTINUE;
3257} 3291}
3258 3292
3293static int em_sahf(struct x86_emulate_ctxt *ctxt)
3294{
3295 u32 flags;
3296
3297 flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
3298 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3299
3300 ctxt->eflags &= ~0xffUL;
3301 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3302 return X86EMUL_CONTINUE;
3303}
3304
3259static int em_lahf(struct x86_emulate_ctxt *ctxt) 3305static int em_lahf(struct x86_emulate_ctxt *ctxt)
3260{ 3306{
3261 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; 3307 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
@@ -3502,7 +3548,7 @@ static const struct opcode group7_rm1[] = {
3502 3548
3503static const struct opcode group7_rm3[] = { 3549static const struct opcode group7_rm3[] = {
3504 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), 3550 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
3505 II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), 3551 II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall),
3506 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), 3552 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
3507 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), 3553 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
3508 DIP(SrcNone | Prot | Priv, stgi, check_svme), 3554 DIP(SrcNone | Prot | Priv, stgi, check_svme),
@@ -3587,7 +3633,7 @@ static const struct group_dual group7 = { {
3587 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), 3633 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
3588 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), 3634 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
3589}, { 3635}, {
3590 I(SrcNone | Priv | VendorSpecific, em_vmcall), 3636 I(SrcNone | Priv | EmulateOnUD, em_vmcall),
3591 EXT(0, group7_rm1), 3637 EXT(0, group7_rm1),
3592 N, EXT(0, group7_rm3), 3638 N, EXT(0, group7_rm3),
3593 II(SrcNone | DstMem | Mov, em_smsw, smsw), N, 3639 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
@@ -3750,7 +3796,8 @@ static const struct opcode opcode_table[256] = {
3750 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), 3796 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
3751 I(SrcImmFAddr | No64, em_call_far), N, 3797 I(SrcImmFAddr | No64, em_call_far), N,
3752 II(ImplicitOps | Stack, em_pushf, pushf), 3798 II(ImplicitOps | Stack, em_pushf, pushf),
3753 II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf), 3799 II(ImplicitOps | Stack, em_popf, popf),
3800 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
3754 /* 0xA0 - 0xA7 */ 3801 /* 0xA0 - 0xA7 */
3755 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3802 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
3756 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), 3803 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
@@ -3810,7 +3857,7 @@ static const struct opcode opcode_table[256] = {
3810static const struct opcode twobyte_table[256] = { 3857static const struct opcode twobyte_table[256] = {
3811 /* 0x00 - 0x0F */ 3858 /* 0x00 - 0x0F */
3812 G(0, group6), GD(0, &group7), N, N, 3859 G(0, group6), GD(0, &group7), N, N,
3813 N, I(ImplicitOps | VendorSpecific, em_syscall), 3860 N, I(ImplicitOps | EmulateOnUD, em_syscall),
3814 II(ImplicitOps | Priv, em_clts, clts), N, 3861 II(ImplicitOps | Priv, em_clts, clts), N,
3815 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 3862 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
3816 N, D(ImplicitOps | ModRM), N, N, 3863 N, D(ImplicitOps | ModRM), N, N,
@@ -3830,8 +3877,8 @@ static const struct opcode twobyte_table[256] = {
3830 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), 3877 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
3831 II(ImplicitOps | Priv, em_rdmsr, rdmsr), 3878 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
3832 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), 3879 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
3833 I(ImplicitOps | VendorSpecific, em_sysenter), 3880 I(ImplicitOps | EmulateOnUD, em_sysenter),
3834 I(ImplicitOps | Priv | VendorSpecific, em_sysexit), 3881 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
3835 N, N, 3882 N, N,
3836 N, N, N, N, N, N, N, N, 3883 N, N, N, N, N, N, N, N,
3837 /* 0x40 - 0x4F */ 3884 /* 0x40 - 0x4F */
@@ -3892,6 +3939,30 @@ static const struct opcode twobyte_table[256] = {
3892 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N 3939 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
3893}; 3940};
3894 3941
3942static const struct gprefix three_byte_0f_38_f0 = {
3943 I(DstReg | SrcMem | Mov, em_movbe), N, N, N
3944};
3945
3946static const struct gprefix three_byte_0f_38_f1 = {
3947 I(DstMem | SrcReg | Mov, em_movbe), N, N, N
3948};
3949
3950/*
3951 * Insns below are selected by the prefix which indexed by the third opcode
3952 * byte.
3953 */
3954static const struct opcode opcode_map_0f_38[256] = {
3955 /* 0x00 - 0x7f */
3956 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
3957 /* 0x80 - 0xef */
3958 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
3959 /* 0xf0 - 0xf1 */
3960 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
3961 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
3962 /* 0xf2 - 0xff */
3963 N, N, X4(N), X8(N)
3964};
3965
3895#undef D 3966#undef D
3896#undef N 3967#undef N
3897#undef G 3968#undef G
@@ -4040,7 +4111,8 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4040 case OpMem8: 4111 case OpMem8:
4041 ctxt->memop.bytes = 1; 4112 ctxt->memop.bytes = 1;
4042 if (ctxt->memop.type == OP_REG) { 4113 if (ctxt->memop.type == OP_REG) {
4043 ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); 4114 ctxt->memop.addr.reg = decode_register(ctxt,
4115 ctxt->modrm_rm, true);
4044 fetch_register_operand(&ctxt->memop); 4116 fetch_register_operand(&ctxt->memop);
4045 } 4117 }
4046 goto mem_common; 4118 goto mem_common;
@@ -4126,6 +4198,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4126 ctxt->_eip = ctxt->eip; 4198 ctxt->_eip = ctxt->eip;
4127 ctxt->fetch.start = ctxt->_eip; 4199 ctxt->fetch.start = ctxt->_eip;
4128 ctxt->fetch.end = ctxt->fetch.start + insn_len; 4200 ctxt->fetch.end = ctxt->fetch.start + insn_len;
4201 ctxt->opcode_len = 1;
4129 if (insn_len > 0) 4202 if (insn_len > 0)
4130 memcpy(ctxt->fetch.data, insn, insn_len); 4203 memcpy(ctxt->fetch.data, insn, insn_len);
4131 4204
@@ -4208,9 +4281,16 @@ done_prefixes:
4208 opcode = opcode_table[ctxt->b]; 4281 opcode = opcode_table[ctxt->b];
4209 /* Two-byte opcode? */ 4282 /* Two-byte opcode? */
4210 if (ctxt->b == 0x0f) { 4283 if (ctxt->b == 0x0f) {
4211 ctxt->twobyte = 1; 4284 ctxt->opcode_len = 2;
4212 ctxt->b = insn_fetch(u8, ctxt); 4285 ctxt->b = insn_fetch(u8, ctxt);
4213 opcode = twobyte_table[ctxt->b]; 4286 opcode = twobyte_table[ctxt->b];
4287
4288 /* 0F_38 opcode map */
4289 if (ctxt->b == 0x38) {
4290 ctxt->opcode_len = 3;
4291 ctxt->b = insn_fetch(u8, ctxt);
4292 opcode = opcode_map_0f_38[ctxt->b];
4293 }
4214 } 4294 }
4215 ctxt->d = opcode.flags; 4295 ctxt->d = opcode.flags;
4216 4296
@@ -4267,7 +4347,7 @@ done_prefixes:
4267 if (ctxt->d == 0 || (ctxt->d & NotImpl)) 4347 if (ctxt->d == 0 || (ctxt->d & NotImpl))
4268 return EMULATION_FAILED; 4348 return EMULATION_FAILED;
4269 4349
4270 if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) 4350 if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
4271 return EMULATION_FAILED; 4351 return EMULATION_FAILED;
4272 4352
4273 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) 4353 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
@@ -4540,8 +4620,10 @@ special_insn:
4540 goto writeback; 4620 goto writeback;
4541 } 4621 }
4542 4622
4543 if (ctxt->twobyte) 4623 if (ctxt->opcode_len == 2)
4544 goto twobyte_insn; 4624 goto twobyte_insn;
4625 else if (ctxt->opcode_len == 3)
4626 goto threebyte_insn;
4545 4627
4546 switch (ctxt->b) { 4628 switch (ctxt->b) {
4547 case 0x63: /* movsxd */ 4629 case 0x63: /* movsxd */
@@ -4726,6 +4808,8 @@ twobyte_insn:
4726 goto cannot_emulate; 4808 goto cannot_emulate;
4727 } 4809 }
4728 4810
4811threebyte_insn:
4812
4729 if (rc != X86EMUL_CONTINUE) 4813 if (rc != X86EMUL_CONTINUE)
4730 goto done; 4814 goto done;
4731 4815
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dce0df8150df..40772ef0f2b1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2570,11 +2570,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2570 kvm_release_pfn_clean(pfn); 2570 kvm_release_pfn_clean(pfn);
2571} 2571}
2572 2572
2573static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
2574{
2575 mmu_free_roots(vcpu);
2576}
2577
2578static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, 2573static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
2579 bool no_dirty_log) 2574 bool no_dirty_log)
2580{ 2575{
@@ -3424,18 +3419,11 @@ out_unlock:
3424 return 0; 3419 return 0;
3425} 3420}
3426 3421
3427static void nonpaging_free(struct kvm_vcpu *vcpu) 3422static void nonpaging_init_context(struct kvm_vcpu *vcpu,
3428{ 3423 struct kvm_mmu *context)
3429 mmu_free_roots(vcpu);
3430}
3431
3432static int nonpaging_init_context(struct kvm_vcpu *vcpu,
3433 struct kvm_mmu *context)
3434{ 3424{
3435 context->new_cr3 = nonpaging_new_cr3;
3436 context->page_fault = nonpaging_page_fault; 3425 context->page_fault = nonpaging_page_fault;
3437 context->gva_to_gpa = nonpaging_gva_to_gpa; 3426 context->gva_to_gpa = nonpaging_gva_to_gpa;
3438 context->free = nonpaging_free;
3439 context->sync_page = nonpaging_sync_page; 3427 context->sync_page = nonpaging_sync_page;
3440 context->invlpg = nonpaging_invlpg; 3428 context->invlpg = nonpaging_invlpg;
3441 context->update_pte = nonpaging_update_pte; 3429 context->update_pte = nonpaging_update_pte;
@@ -3444,7 +3432,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu,
3444 context->root_hpa = INVALID_PAGE; 3432 context->root_hpa = INVALID_PAGE;
3445 context->direct_map = true; 3433 context->direct_map = true;
3446 context->nx = false; 3434 context->nx = false;
3447 return 0;
3448} 3435}
3449 3436
3450void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 3437void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -3454,9 +3441,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
3454} 3441}
3455EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); 3442EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
3456 3443
3457static void paging_new_cr3(struct kvm_vcpu *vcpu) 3444void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
3458{ 3445{
3459 pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
3460 mmu_free_roots(vcpu); 3446 mmu_free_roots(vcpu);
3461} 3447}
3462 3448
@@ -3471,11 +3457,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
3471 vcpu->arch.mmu.inject_page_fault(vcpu, fault); 3457 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
3472} 3458}
3473 3459
3474static void paging_free(struct kvm_vcpu *vcpu)
3475{
3476 nonpaging_free(vcpu);
3477}
3478
3479static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, 3460static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
3480 unsigned access, int *nr_present) 3461 unsigned access, int *nr_present)
3481{ 3462{
@@ -3665,9 +3646,9 @@ static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
3665 mmu->last_pte_bitmap = map; 3646 mmu->last_pte_bitmap = map;
3666} 3647}
3667 3648
3668static int paging64_init_context_common(struct kvm_vcpu *vcpu, 3649static void paging64_init_context_common(struct kvm_vcpu *vcpu,
3669 struct kvm_mmu *context, 3650 struct kvm_mmu *context,
3670 int level) 3651 int level)
3671{ 3652{
3672 context->nx = is_nx(vcpu); 3653 context->nx = is_nx(vcpu);
3673 context->root_level = level; 3654 context->root_level = level;
@@ -3677,27 +3658,24 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
3677 update_last_pte_bitmap(vcpu, context); 3658 update_last_pte_bitmap(vcpu, context);
3678 3659
3679 ASSERT(is_pae(vcpu)); 3660 ASSERT(is_pae(vcpu));
3680 context->new_cr3 = paging_new_cr3;
3681 context->page_fault = paging64_page_fault; 3661 context->page_fault = paging64_page_fault;
3682 context->gva_to_gpa = paging64_gva_to_gpa; 3662 context->gva_to_gpa = paging64_gva_to_gpa;
3683 context->sync_page = paging64_sync_page; 3663 context->sync_page = paging64_sync_page;
3684 context->invlpg = paging64_invlpg; 3664 context->invlpg = paging64_invlpg;
3685 context->update_pte = paging64_update_pte; 3665 context->update_pte = paging64_update_pte;
3686 context->free = paging_free;
3687 context->shadow_root_level = level; 3666 context->shadow_root_level = level;
3688 context->root_hpa = INVALID_PAGE; 3667 context->root_hpa = INVALID_PAGE;
3689 context->direct_map = false; 3668 context->direct_map = false;
3690 return 0;
3691} 3669}
3692 3670
3693static int paging64_init_context(struct kvm_vcpu *vcpu, 3671static void paging64_init_context(struct kvm_vcpu *vcpu,
3694 struct kvm_mmu *context) 3672 struct kvm_mmu *context)
3695{ 3673{
3696 return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); 3674 paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
3697} 3675}
3698 3676
3699static int paging32_init_context(struct kvm_vcpu *vcpu, 3677static void paging32_init_context(struct kvm_vcpu *vcpu,
3700 struct kvm_mmu *context) 3678 struct kvm_mmu *context)
3701{ 3679{
3702 context->nx = false; 3680 context->nx = false;
3703 context->root_level = PT32_ROOT_LEVEL; 3681 context->root_level = PT32_ROOT_LEVEL;
@@ -3706,33 +3684,28 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
3706 update_permission_bitmask(vcpu, context, false); 3684 update_permission_bitmask(vcpu, context, false);
3707 update_last_pte_bitmap(vcpu, context); 3685 update_last_pte_bitmap(vcpu, context);
3708 3686
3709 context->new_cr3 = paging_new_cr3;
3710 context->page_fault = paging32_page_fault; 3687 context->page_fault = paging32_page_fault;
3711 context->gva_to_gpa = paging32_gva_to_gpa; 3688 context->gva_to_gpa = paging32_gva_to_gpa;
3712 context->free = paging_free;
3713 context->sync_page = paging32_sync_page; 3689 context->sync_page = paging32_sync_page;
3714 context->invlpg = paging32_invlpg; 3690 context->invlpg = paging32_invlpg;
3715 context->update_pte = paging32_update_pte; 3691 context->update_pte = paging32_update_pte;
3716 context->shadow_root_level = PT32E_ROOT_LEVEL; 3692 context->shadow_root_level = PT32E_ROOT_LEVEL;
3717 context->root_hpa = INVALID_PAGE; 3693 context->root_hpa = INVALID_PAGE;
3718 context->direct_map = false; 3694 context->direct_map = false;
3719 return 0;
3720} 3695}
3721 3696
3722static int paging32E_init_context(struct kvm_vcpu *vcpu, 3697static void paging32E_init_context(struct kvm_vcpu *vcpu,
3723 struct kvm_mmu *context) 3698 struct kvm_mmu *context)
3724{ 3699{
3725 return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); 3700 paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
3726} 3701}
3727 3702
3728static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) 3703static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3729{ 3704{
3730 struct kvm_mmu *context = vcpu->arch.walk_mmu; 3705 struct kvm_mmu *context = vcpu->arch.walk_mmu;
3731 3706
3732 context->base_role.word = 0; 3707 context->base_role.word = 0;
3733 context->new_cr3 = nonpaging_new_cr3;
3734 context->page_fault = tdp_page_fault; 3708 context->page_fault = tdp_page_fault;
3735 context->free = nonpaging_free;
3736 context->sync_page = nonpaging_sync_page; 3709 context->sync_page = nonpaging_sync_page;
3737 context->invlpg = nonpaging_invlpg; 3710 context->invlpg = nonpaging_invlpg;
3738 context->update_pte = nonpaging_update_pte; 3711 context->update_pte = nonpaging_update_pte;
@@ -3767,37 +3740,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3767 3740
3768 update_permission_bitmask(vcpu, context, false); 3741 update_permission_bitmask(vcpu, context, false);
3769 update_last_pte_bitmap(vcpu, context); 3742 update_last_pte_bitmap(vcpu, context);
3770
3771 return 0;
3772} 3743}
3773 3744
3774int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) 3745void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
3775{ 3746{
3776 int r;
3777 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); 3747 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
3778 ASSERT(vcpu); 3748 ASSERT(vcpu);
3779 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3749 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
3780 3750
3781 if (!is_paging(vcpu)) 3751 if (!is_paging(vcpu))
3782 r = nonpaging_init_context(vcpu, context); 3752 nonpaging_init_context(vcpu, context);
3783 else if (is_long_mode(vcpu)) 3753 else if (is_long_mode(vcpu))
3784 r = paging64_init_context(vcpu, context); 3754 paging64_init_context(vcpu, context);
3785 else if (is_pae(vcpu)) 3755 else if (is_pae(vcpu))
3786 r = paging32E_init_context(vcpu, context); 3756 paging32E_init_context(vcpu, context);
3787 else 3757 else
3788 r = paging32_init_context(vcpu, context); 3758 paging32_init_context(vcpu, context);
3789 3759
3790 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); 3760 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
3791 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); 3761 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
3792 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); 3762 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
3793 vcpu->arch.mmu.base_role.smep_andnot_wp 3763 vcpu->arch.mmu.base_role.smep_andnot_wp
3794 = smep && !is_write_protection(vcpu); 3764 = smep && !is_write_protection(vcpu);
3795
3796 return r;
3797} 3765}
3798EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); 3766EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
3799 3767
3800int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 3768void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
3801 bool execonly) 3769 bool execonly)
3802{ 3770{
3803 ASSERT(vcpu); 3771 ASSERT(vcpu);
@@ -3806,37 +3774,30 @@ int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
3806 context->shadow_root_level = kvm_x86_ops->get_tdp_level(); 3774 context->shadow_root_level = kvm_x86_ops->get_tdp_level();
3807 3775
3808 context->nx = true; 3776 context->nx = true;
3809 context->new_cr3 = paging_new_cr3;
3810 context->page_fault = ept_page_fault; 3777 context->page_fault = ept_page_fault;
3811 context->gva_to_gpa = ept_gva_to_gpa; 3778 context->gva_to_gpa = ept_gva_to_gpa;
3812 context->sync_page = ept_sync_page; 3779 context->sync_page = ept_sync_page;
3813 context->invlpg = ept_invlpg; 3780 context->invlpg = ept_invlpg;
3814 context->update_pte = ept_update_pte; 3781 context->update_pte = ept_update_pte;
3815 context->free = paging_free;
3816 context->root_level = context->shadow_root_level; 3782 context->root_level = context->shadow_root_level;
3817 context->root_hpa = INVALID_PAGE; 3783 context->root_hpa = INVALID_PAGE;
3818 context->direct_map = false; 3784 context->direct_map = false;
3819 3785
3820 update_permission_bitmask(vcpu, context, true); 3786 update_permission_bitmask(vcpu, context, true);
3821 reset_rsvds_bits_mask_ept(vcpu, context, execonly); 3787 reset_rsvds_bits_mask_ept(vcpu, context, execonly);
3822
3823 return 0;
3824} 3788}
3825EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); 3789EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
3826 3790
3827static int init_kvm_softmmu(struct kvm_vcpu *vcpu) 3791static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
3828{ 3792{
3829 int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); 3793 kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
3830
3831 vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; 3794 vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
3832 vcpu->arch.walk_mmu->get_cr3 = get_cr3; 3795 vcpu->arch.walk_mmu->get_cr3 = get_cr3;
3833 vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; 3796 vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read;
3834 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 3797 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
3835
3836 return r;
3837} 3798}
3838 3799
3839static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) 3800static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
3840{ 3801{
3841 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; 3802 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
3842 3803
@@ -3873,11 +3834,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
3873 3834
3874 update_permission_bitmask(vcpu, g_context, false); 3835 update_permission_bitmask(vcpu, g_context, false);
3875 update_last_pte_bitmap(vcpu, g_context); 3836 update_last_pte_bitmap(vcpu, g_context);
3876
3877 return 0;
3878} 3837}
3879 3838
3880static int init_kvm_mmu(struct kvm_vcpu *vcpu) 3839static void init_kvm_mmu(struct kvm_vcpu *vcpu)
3881{ 3840{
3882 if (mmu_is_nested(vcpu)) 3841 if (mmu_is_nested(vcpu))
3883 return init_kvm_nested_mmu(vcpu); 3842 return init_kvm_nested_mmu(vcpu);
@@ -3887,18 +3846,12 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
3887 return init_kvm_softmmu(vcpu); 3846 return init_kvm_softmmu(vcpu);
3888} 3847}
3889 3848
3890static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 3849void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
3891{ 3850{
3892 ASSERT(vcpu); 3851 ASSERT(vcpu);
3893 if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
3894 /* mmu.free() should set root_hpa = INVALID_PAGE */
3895 vcpu->arch.mmu.free(vcpu);
3896}
3897 3852
3898int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 3853 kvm_mmu_unload(vcpu);
3899{ 3854 init_kvm_mmu(vcpu);
3900 destroy_kvm_mmu(vcpu);
3901 return init_kvm_mmu(vcpu);
3902} 3855}
3903EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); 3856EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
3904 3857
@@ -3923,6 +3876,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
3923void kvm_mmu_unload(struct kvm_vcpu *vcpu) 3876void kvm_mmu_unload(struct kvm_vcpu *vcpu)
3924{ 3877{
3925 mmu_free_roots(vcpu); 3878 mmu_free_roots(vcpu);
3879 WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
3926} 3880}
3927EXPORT_SYMBOL_GPL(kvm_mmu_unload); 3881EXPORT_SYMBOL_GPL(kvm_mmu_unload);
3928 3882
@@ -4281,12 +4235,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
4281 return alloc_mmu_pages(vcpu); 4235 return alloc_mmu_pages(vcpu);
4282} 4236}
4283 4237
4284int kvm_mmu_setup(struct kvm_vcpu *vcpu) 4238void kvm_mmu_setup(struct kvm_vcpu *vcpu)
4285{ 4239{
4286 ASSERT(vcpu); 4240 ASSERT(vcpu);
4287 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 4241 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
4288 4242
4289 return init_kvm_mmu(vcpu); 4243 init_kvm_mmu(vcpu);
4290} 4244}
4291 4245
4292void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) 4246void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
@@ -4428,7 +4382,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
4428 int nr_to_scan = sc->nr_to_scan; 4382 int nr_to_scan = sc->nr_to_scan;
4429 unsigned long freed = 0; 4383 unsigned long freed = 0;
4430 4384
4431 raw_spin_lock(&kvm_lock); 4385 spin_lock(&kvm_lock);
4432 4386
4433 list_for_each_entry(kvm, &vm_list, vm_list) { 4387 list_for_each_entry(kvm, &vm_list, vm_list) {
4434 int idx; 4388 int idx;
@@ -4478,9 +4432,8 @@ unlock:
4478 break; 4432 break;
4479 } 4433 }
4480 4434
4481 raw_spin_unlock(&kvm_lock); 4435 spin_unlock(&kvm_lock);
4482 return freed; 4436 return freed;
4483
4484} 4437}
4485 4438
4486static unsigned long 4439static unsigned long
@@ -4574,7 +4527,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
4574{ 4527{
4575 ASSERT(vcpu); 4528 ASSERT(vcpu);
4576 4529
4577 destroy_kvm_mmu(vcpu); 4530 kvm_mmu_unload(vcpu);
4578 free_mmu_pages(vcpu); 4531 free_mmu_pages(vcpu);
4579 mmu_free_memory_caches(vcpu); 4532 mmu_free_memory_caches(vcpu);
4580} 4533}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 77e044a0f5f7..292615274358 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -70,8 +70,8 @@ enum {
70}; 70};
71 71
72int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); 72int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
73int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); 73void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
74int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 74void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
75 bool execonly); 75 bool execonly);
76 76
77static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) 77static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c0bc80391e40..c7168a5cff1b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1959,11 +1959,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1959 nested_svm_vmexit(svm); 1959 nested_svm_vmexit(svm);
1960} 1960}
1961 1961
1962static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 1962static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1963{ 1963{
1964 int r; 1964 kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1965
1966 r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1967 1965
1968 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; 1966 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
1969 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; 1967 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
@@ -1971,8 +1969,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1971 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; 1969 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
1972 vcpu->arch.mmu.shadow_root_level = get_npt_level(); 1970 vcpu->arch.mmu.shadow_root_level = get_npt_level();
1973 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 1971 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
1974
1975 return r;
1976} 1972}
1977 1973
1978static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) 1974static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2b2fce1b2009..b2fe1c252f35 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1498,7 +1498,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1498 break; 1498 break;
1499 1499
1500 if (i == NR_AUTOLOAD_MSRS) { 1500 if (i == NR_AUTOLOAD_MSRS) {
1501 printk_once(KERN_WARNING"Not enough mst switch entries. " 1501 printk_once(KERN_WARNING "Not enough msr switch entries. "
1502 "Can't add msr %x\n", msr); 1502 "Can't add msr %x\n", msr);
1503 return; 1503 return;
1504 } else if (i == m->nr) { 1504 } else if (i == m->nr) {
@@ -1898,16 +1898,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
1898/* 1898/*
1899 * KVM wants to inject page-faults which it got to the guest. This function 1899 * KVM wants to inject page-faults which it got to the guest. This function
1900 * checks whether in a nested guest, we need to inject them to L1 or L2. 1900 * checks whether in a nested guest, we need to inject them to L1 or L2.
1901 * This function assumes it is called with the exit reason in vmcs02 being
1902 * a #PF exception (this is the only case in which KVM injects a #PF when L2
1903 * is running).
1904 */ 1901 */
1905static int nested_pf_handled(struct kvm_vcpu *vcpu) 1902static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
1906{ 1903{
1907 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 1904 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1908 1905
1909 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 1906 if (!(vmcs12->exception_bitmap & (1u << nr)))
1910 if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
1911 return 0; 1907 return 0;
1912 1908
1913 nested_vmx_vmexit(vcpu); 1909 nested_vmx_vmexit(vcpu);
@@ -1921,8 +1917,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1921 struct vcpu_vmx *vmx = to_vmx(vcpu); 1917 struct vcpu_vmx *vmx = to_vmx(vcpu);
1922 u32 intr_info = nr | INTR_INFO_VALID_MASK; 1918 u32 intr_info = nr | INTR_INFO_VALID_MASK;
1923 1919
1924 if (nr == PF_VECTOR && is_guest_mode(vcpu) && 1920 if (!reinject && is_guest_mode(vcpu) &&
1925 !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) 1921 nested_vmx_check_exception(vcpu, nr))
1926 return; 1922 return;
1927 1923
1928 if (has_error_code) { 1924 if (has_error_code) {
@@ -2204,9 +2200,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2204#ifdef CONFIG_X86_64 2200#ifdef CONFIG_X86_64
2205 VM_EXIT_HOST_ADDR_SPACE_SIZE | 2201 VM_EXIT_HOST_ADDR_SPACE_SIZE |
2206#endif 2202#endif
2207 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; 2203 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
2204 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2205 if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
2206 !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
2207 nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2208 nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
2209 }
2208 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | 2210 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
2209 VM_EXIT_LOAD_IA32_EFER); 2211 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
2210 2212
2211 /* entry controls */ 2213 /* entry controls */
2212 rdmsr(MSR_IA32_VMX_ENTRY_CTLS, 2214 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2226,7 +2228,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2226 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); 2228 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
2227 nested_vmx_procbased_ctls_low = 0; 2229 nested_vmx_procbased_ctls_low = 0;
2228 nested_vmx_procbased_ctls_high &= 2230 nested_vmx_procbased_ctls_high &=
2229 CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING | 2231 CPU_BASED_VIRTUAL_INTR_PENDING |
2232 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
2230 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | 2233 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
2231 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | 2234 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
2232 CPU_BASED_CR3_STORE_EXITING | 2235 CPU_BASED_CR3_STORE_EXITING |
@@ -2252,13 +2255,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2252 nested_vmx_secondary_ctls_low = 0; 2255 nested_vmx_secondary_ctls_low = 0;
2253 nested_vmx_secondary_ctls_high &= 2256 nested_vmx_secondary_ctls_high &=
2254 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2257 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2258 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2255 SECONDARY_EXEC_WBINVD_EXITING; 2259 SECONDARY_EXEC_WBINVD_EXITING;
2256 2260
2257 if (enable_ept) { 2261 if (enable_ept) {
2258 /* nested EPT: emulate EPT also to L1 */ 2262 /* nested EPT: emulate EPT also to L1 */
2259 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; 2263 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
2260 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2264 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
2261 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; 2265 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
2266 VMX_EPT_INVEPT_BIT;
2262 nested_vmx_ept_caps &= vmx_capability.ept; 2267 nested_vmx_ept_caps &= vmx_capability.ept;
2263 /* 2268 /*
2264 * Since invept is completely emulated we support both global 2269 * Since invept is completely emulated we support both global
@@ -3380,8 +3385,10 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
3380 if (enable_ept) { 3385 if (enable_ept) {
3381 eptp = construct_eptp(cr3); 3386 eptp = construct_eptp(cr3);
3382 vmcs_write64(EPT_POINTER, eptp); 3387 vmcs_write64(EPT_POINTER, eptp);
3383 guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : 3388 if (is_paging(vcpu) || is_guest_mode(vcpu))
3384 vcpu->kvm->arch.ept_identity_map_addr; 3389 guest_cr3 = kvm_read_cr3(vcpu);
3390 else
3391 guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
3385 ept_load_pdptrs(vcpu); 3392 ept_load_pdptrs(vcpu);
3386 } 3393 }
3387 3394
@@ -4879,6 +4886,17 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
4879 hypercall[2] = 0xc1; 4886 hypercall[2] = 0xc1;
4880} 4887}
4881 4888
4889static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
4890{
4891 unsigned long always_on = VMXON_CR0_ALWAYSON;
4892
4893 if (nested_vmx_secondary_ctls_high &
4894 SECONDARY_EXEC_UNRESTRICTED_GUEST &&
4895 nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
4896 always_on &= ~(X86_CR0_PE | X86_CR0_PG);
4897 return (val & always_on) == always_on;
4898}
4899
4882/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ 4900/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
4883static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) 4901static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
4884{ 4902{
@@ -4897,9 +4915,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
4897 val = (val & ~vmcs12->cr0_guest_host_mask) | 4915 val = (val & ~vmcs12->cr0_guest_host_mask) |
4898 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); 4916 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
4899 4917
4900 /* TODO: will have to take unrestricted guest mode into 4918 if (!nested_cr0_valid(vmcs12, val))
4901 * account */
4902 if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
4903 return 1; 4919 return 1;
4904 4920
4905 if (kvm_set_cr0(vcpu, val)) 4921 if (kvm_set_cr0(vcpu, val))
@@ -6627,6 +6643,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6627 return 0; 6643 return 0;
6628 else if (is_page_fault(intr_info)) 6644 else if (is_page_fault(intr_info))
6629 return enable_ept; 6645 return enable_ept;
6646 else if (is_no_device(intr_info) &&
6647 !(nested_read_cr0(vmcs12) & X86_CR0_TS))
6648 return 0;
6630 return vmcs12->exception_bitmap & 6649 return vmcs12->exception_bitmap &
6631 (1u << (intr_info & INTR_INFO_VECTOR_MASK)); 6650 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
6632 case EXIT_REASON_EXTERNAL_INTERRUPT: 6651 case EXIT_REASON_EXTERNAL_INTERRUPT:
@@ -6722,6 +6741,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
6722 *info2 = vmcs_read32(VM_EXIT_INTR_INFO); 6741 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
6723} 6742}
6724 6743
6744static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
6745{
6746 u64 delta_tsc_l1;
6747 u32 preempt_val_l1, preempt_val_l2, preempt_scale;
6748
6749 if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
6750 PIN_BASED_VMX_PREEMPTION_TIMER))
6751 return;
6752 preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
6753 MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
6754 preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
6755 delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
6756 - vcpu->arch.last_guest_tsc;
6757 preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
6758 if (preempt_val_l2 <= preempt_val_l1)
6759 preempt_val_l2 = 0;
6760 else
6761 preempt_val_l2 -= preempt_val_l1;
6762 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
6763}
6764
6725/* 6765/*
6726 * The guest has exited. See if we can fix it or if we need userspace 6766 * The guest has exited. See if we can fix it or if we need userspace
6727 * assistance. 6767 * assistance.
@@ -6736,20 +6776,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6736 if (vmx->emulation_required) 6776 if (vmx->emulation_required)
6737 return handle_invalid_guest_state(vcpu); 6777 return handle_invalid_guest_state(vcpu);
6738 6778
6739 /*
6740 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
6741 * we did not inject a still-pending event to L1 now because of
6742 * nested_run_pending, we need to re-enable this bit.
6743 */
6744 if (vmx->nested.nested_run_pending)
6745 kvm_make_request(KVM_REQ_EVENT, vcpu);
6746
6747 if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH ||
6748 exit_reason == EXIT_REASON_VMRESUME))
6749 vmx->nested.nested_run_pending = 1;
6750 else
6751 vmx->nested.nested_run_pending = 0;
6752
6753 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6779 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
6754 nested_vmx_vmexit(vcpu); 6780 nested_vmx_vmexit(vcpu);
6755 return 1; 6781 return 1;
@@ -7061,9 +7087,9 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
7061 case INTR_TYPE_HARD_EXCEPTION: 7087 case INTR_TYPE_HARD_EXCEPTION:
7062 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 7088 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
7063 u32 err = vmcs_read32(error_code_field); 7089 u32 err = vmcs_read32(error_code_field);
7064 kvm_queue_exception_e(vcpu, vector, err); 7090 kvm_requeue_exception_e(vcpu, vector, err);
7065 } else 7091 } else
7066 kvm_queue_exception(vcpu, vector); 7092 kvm_requeue_exception(vcpu, vector);
7067 break; 7093 break;
7068 case INTR_TYPE_SOFT_INTR: 7094 case INTR_TYPE_SOFT_INTR:
7069 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); 7095 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
@@ -7146,6 +7172,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7146 atomic_switch_perf_msrs(vmx); 7172 atomic_switch_perf_msrs(vmx);
7147 debugctlmsr = get_debugctlmsr(); 7173 debugctlmsr = get_debugctlmsr();
7148 7174
7175 if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
7176 nested_adjust_preemption_timer(vcpu);
7149 vmx->__launched = vmx->loaded_vmcs->launched; 7177 vmx->__launched = vmx->loaded_vmcs->launched;
7150 asm( 7178 asm(
7151 /* Store host registers */ 7179 /* Store host registers */
@@ -7284,6 +7312,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7284 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 7312 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
7285 trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); 7313 trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
7286 7314
7315 /*
7316 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
7317 * we did not inject a still-pending event to L1 now because of
7318 * nested_run_pending, we need to re-enable this bit.
7319 */
7320 if (vmx->nested.nested_run_pending)
7321 kvm_make_request(KVM_REQ_EVENT, vcpu);
7322
7323 vmx->nested.nested_run_pending = 0;
7324
7287 vmx_complete_atomic_exit(vmx); 7325 vmx_complete_atomic_exit(vmx);
7288 vmx_recover_nmi_blocking(vmx); 7326 vmx_recover_nmi_blocking(vmx);
7289 vmx_complete_interrupts(vmx); 7327 vmx_complete_interrupts(vmx);
@@ -7410,8 +7448,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
7410 */ 7448 */
7411 if (is_mmio) 7449 if (is_mmio)
7412 ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; 7450 ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
7413 else if (vcpu->kvm->arch.iommu_domain && 7451 else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
7414 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
7415 ret = kvm_get_guest_memory_type(vcpu, gfn) << 7452 ret = kvm_get_guest_memory_type(vcpu, gfn) <<
7416 VMX_EPT_MT_EPTE_SHIFT; 7453 VMX_EPT_MT_EPTE_SHIFT;
7417 else 7454 else
@@ -7501,9 +7538,9 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
7501 return get_vmcs12(vcpu)->ept_pointer; 7538 return get_vmcs12(vcpu)->ept_pointer;
7502} 7539}
7503 7540
7504static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 7541static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
7505{ 7542{
7506 int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, 7543 kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
7507 nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); 7544 nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
7508 7545
7509 vcpu->arch.mmu.set_cr3 = vmx_set_cr3; 7546 vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
@@ -7511,8 +7548,6 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
7511 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; 7548 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
7512 7549
7513 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 7550 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
7514
7515 return r;
7516} 7551}
7517 7552
7518static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) 7553static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -7520,6 +7555,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
7520 vcpu->arch.walk_mmu = &vcpu->arch.mmu; 7555 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
7521} 7556}
7522 7557
7558static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7559 struct x86_exception *fault)
7560{
7561 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7562
7563 WARN_ON(!is_guest_mode(vcpu));
7564
7565 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
7566 if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
7567 nested_vmx_vmexit(vcpu);
7568 else
7569 kvm_inject_page_fault(vcpu, fault);
7570}
7571
7523/* 7572/*
7524 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested 7573 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
7525 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it 7574 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7533,6 +7582,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7533{ 7582{
7534 struct vcpu_vmx *vmx = to_vmx(vcpu); 7583 struct vcpu_vmx *vmx = to_vmx(vcpu);
7535 u32 exec_control; 7584 u32 exec_control;
7585 u32 exit_control;
7536 7586
7537 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 7587 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
7538 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); 7588 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7706,7 +7756,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7706 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER 7756 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
7707 * bits are further modified by vmx_set_efer() below. 7757 * bits are further modified by vmx_set_efer() below.
7708 */ 7758 */
7709 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 7759 exit_control = vmcs_config.vmexit_ctrl;
7760 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7761 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7762 vmcs_write32(VM_EXIT_CONTROLS, exit_control);
7710 7763
7711 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7764 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7712 * emulated by vmx_set_efer(), below. 7765 * emulated by vmx_set_efer(), below.
@@ -7773,6 +7826,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7773 kvm_set_cr3(vcpu, vmcs12->guest_cr3); 7826 kvm_set_cr3(vcpu, vmcs12->guest_cr3);
7774 kvm_mmu_reset_context(vcpu); 7827 kvm_mmu_reset_context(vcpu);
7775 7828
7829 if (!enable_ept)
7830 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
7831
7776 /* 7832 /*
7777 * L1 may access the L2's PDPTR, so save them to construct vmcs12 7833 * L1 may access the L2's PDPTR, so save them to construct vmcs12
7778 */ 7834 */
@@ -7876,7 +7932,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7876 return 1; 7932 return 1;
7877 } 7933 }
7878 7934
7879 if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || 7935 if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
7880 ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { 7936 ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
7881 nested_vmx_entry_failure(vcpu, vmcs12, 7937 nested_vmx_entry_failure(vcpu, vmcs12,
7882 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); 7938 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
@@ -7938,6 +7994,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7938 7994
7939 enter_guest_mode(vcpu); 7995 enter_guest_mode(vcpu);
7940 7996
7997 vmx->nested.nested_run_pending = 1;
7998
7941 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 7999 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
7942 8000
7943 cpu = get_cpu(); 8001 cpu = get_cpu();
@@ -8005,7 +8063,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8005 u32 idt_vectoring; 8063 u32 idt_vectoring;
8006 unsigned int nr; 8064 unsigned int nr;
8007 8065
8008 if (vcpu->arch.exception.pending) { 8066 if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
8009 nr = vcpu->arch.exception.nr; 8067 nr = vcpu->arch.exception.nr;
8010 idt_vectoring = nr | VECTORING_INFO_VALID_MASK; 8068 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
8011 8069
@@ -8023,7 +8081,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8023 } 8081 }
8024 8082
8025 vmcs12->idt_vectoring_info_field = idt_vectoring; 8083 vmcs12->idt_vectoring_info_field = idt_vectoring;
8026 } else if (vcpu->arch.nmi_pending) { 8084 } else if (vcpu->arch.nmi_injected) {
8027 vmcs12->idt_vectoring_info_field = 8085 vmcs12->idt_vectoring_info_field =
8028 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; 8086 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
8029 } else if (vcpu->arch.interrupt.pending) { 8087 } else if (vcpu->arch.interrupt.pending) {
@@ -8105,6 +8163,11 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8105 vmcs12->guest_pending_dbg_exceptions = 8163 vmcs12->guest_pending_dbg_exceptions =
8106 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8164 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
8107 8165
8166 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
8167 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
8168 vmcs12->vmx_preemption_timer_value =
8169 vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
8170
8108 /* 8171 /*
8109 * In some cases (usually, nested EPT), L2 is allowed to change its 8172 * In some cases (usually, nested EPT), L2 is allowed to change its
8110 * own CR3 without exiting. If it has changed it, we must keep it. 8173 * own CR3 without exiting. If it has changed it, we must keep it.
@@ -8130,6 +8193,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8130 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 8193 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
8131 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) 8194 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
8132 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); 8195 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
8196 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
8197 vmcs12->guest_ia32_efer = vcpu->arch.efer;
8133 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 8198 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
8134 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 8199 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
8135 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); 8200 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
@@ -8201,7 +8266,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8201 * fpu_active (which may have changed). 8266 * fpu_active (which may have changed).
8202 * Note that vmx_set_cr0 refers to efer set above. 8267 * Note that vmx_set_cr0 refers to efer set above.
8203 */ 8268 */
8204 kvm_set_cr0(vcpu, vmcs12->host_cr0); 8269 vmx_set_cr0(vcpu, vmcs12->host_cr0);
8205 /* 8270 /*
8206 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need 8271 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
8207 * to apply the same changes to L1's vmcs. We just set cr0 correctly, 8272 * to apply the same changes to L1's vmcs. We just set cr0 correctly,
@@ -8224,6 +8289,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8224 kvm_set_cr3(vcpu, vmcs12->host_cr3); 8289 kvm_set_cr3(vcpu, vmcs12->host_cr3);
8225 kvm_mmu_reset_context(vcpu); 8290 kvm_mmu_reset_context(vcpu);
8226 8291
8292 if (!enable_ept)
8293 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
8294
8227 if (enable_vpid) { 8295 if (enable_vpid) {
8228 /* 8296 /*
8229 * Trivially support vpid by letting L2s share their parent 8297 * Trivially support vpid by letting L2s share their parent
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5ca72a5cdb6..21ef1ba184ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -577,6 +577,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
577int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) 577int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
578{ 578{
579 u64 xcr0; 579 u64 xcr0;
580 u64 valid_bits;
580 581
581 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ 582 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
582 if (index != XCR_XFEATURE_ENABLED_MASK) 583 if (index != XCR_XFEATURE_ENABLED_MASK)
@@ -586,8 +587,16 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
586 return 1; 587 return 1;
587 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) 588 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
588 return 1; 589 return 1;
589 if (xcr0 & ~host_xcr0) 590
591 /*
592 * Do not allow the guest to set bits that we do not support
593 * saving. However, xcr0 bit 0 is always set, even if the
594 * emulated CPU does not support XSAVE (see fx_init).
595 */
596 valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
597 if (xcr0 & ~valid_bits)
590 return 1; 598 return 1;
599
591 kvm_put_guest_xcr0(vcpu); 600 kvm_put_guest_xcr0(vcpu);
592 vcpu->arch.xcr0 = xcr0; 601 vcpu->arch.xcr0 = xcr0;
593 return 0; 602 return 0;
@@ -684,7 +693,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
684 693
685 vcpu->arch.cr3 = cr3; 694 vcpu->arch.cr3 = cr3;
686 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); 695 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
687 vcpu->arch.mmu.new_cr3(vcpu); 696 kvm_mmu_new_cr3(vcpu);
688 return 0; 697 return 0;
689} 698}
690EXPORT_SYMBOL_GPL(kvm_set_cr3); 699EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2564,6 +2573,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2564 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 2573 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2565 case KVM_CAP_SET_TSS_ADDR: 2574 case KVM_CAP_SET_TSS_ADDR:
2566 case KVM_CAP_EXT_CPUID: 2575 case KVM_CAP_EXT_CPUID:
2576 case KVM_CAP_EXT_EMUL_CPUID:
2567 case KVM_CAP_CLOCKSOURCE: 2577 case KVM_CAP_CLOCKSOURCE:
2568 case KVM_CAP_PIT: 2578 case KVM_CAP_PIT:
2569 case KVM_CAP_NOP_IO_DELAY: 2579 case KVM_CAP_NOP_IO_DELAY:
@@ -2673,15 +2683,17 @@ long kvm_arch_dev_ioctl(struct file *filp,
2673 r = 0; 2683 r = 0;
2674 break; 2684 break;
2675 } 2685 }
2676 case KVM_GET_SUPPORTED_CPUID: { 2686 case KVM_GET_SUPPORTED_CPUID:
2687 case KVM_GET_EMULATED_CPUID: {
2677 struct kvm_cpuid2 __user *cpuid_arg = argp; 2688 struct kvm_cpuid2 __user *cpuid_arg = argp;
2678 struct kvm_cpuid2 cpuid; 2689 struct kvm_cpuid2 cpuid;
2679 2690
2680 r = -EFAULT; 2691 r = -EFAULT;
2681 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2692 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2682 goto out; 2693 goto out;
2683 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 2694
2684 cpuid_arg->entries); 2695 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
2696 ioctl);
2685 if (r) 2697 if (r)
2686 goto out; 2698 goto out;
2687 2699
@@ -2715,8 +2727,7 @@ static void wbinvd_ipi(void *garbage)
2715 2727
2716static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) 2728static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2717{ 2729{
2718 return vcpu->kvm->arch.iommu_domain && 2730 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
2719 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2720} 2731}
2721 2732
2722void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2733void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -2984,11 +2995,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2984static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, 2995static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2985 struct kvm_xsave *guest_xsave) 2996 struct kvm_xsave *guest_xsave)
2986{ 2997{
2987 if (cpu_has_xsave) 2998 if (cpu_has_xsave) {
2988 memcpy(guest_xsave->region, 2999 memcpy(guest_xsave->region,
2989 &vcpu->arch.guest_fpu.state->xsave, 3000 &vcpu->arch.guest_fpu.state->xsave,
2990 xstate_size); 3001 vcpu->arch.guest_xstate_size);
2991 else { 3002 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
3003 vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
3004 } else {
2992 memcpy(guest_xsave->region, 3005 memcpy(guest_xsave->region,
2993 &vcpu->arch.guest_fpu.state->fxsave, 3006 &vcpu->arch.guest_fpu.state->fxsave,
2994 sizeof(struct i387_fxsave_struct)); 3007 sizeof(struct i387_fxsave_struct));
@@ -3003,10 +3016,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3003 u64 xstate_bv = 3016 u64 xstate_bv =
3004 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; 3017 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3005 3018
3006 if (cpu_has_xsave) 3019 if (cpu_has_xsave) {
3020 /*
3021 * Here we allow setting states that are not present in
3022 * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
3023 * with old userspace.
3024 */
3025 if (xstate_bv & ~KVM_SUPPORTED_XCR0)
3026 return -EINVAL;
3027 if (xstate_bv & ~host_xcr0)
3028 return -EINVAL;
3007 memcpy(&vcpu->arch.guest_fpu.state->xsave, 3029 memcpy(&vcpu->arch.guest_fpu.state->xsave,
3008 guest_xsave->region, xstate_size); 3030 guest_xsave->region, vcpu->arch.guest_xstate_size);
3009 else { 3031 } else {
3010 if (xstate_bv & ~XSTATE_FPSSE) 3032 if (xstate_bv & ~XSTATE_FPSSE)
3011 return -EINVAL; 3033 return -EINVAL;
3012 memcpy(&vcpu->arch.guest_fpu.state->fxsave, 3034 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
@@ -3042,9 +3064,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3042 3064
3043 for (i = 0; i < guest_xcrs->nr_xcrs; i++) 3065 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3044 /* Only support XCR0 currently */ 3066 /* Only support XCR0 currently */
3045 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { 3067 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
3046 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, 3068 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3047 guest_xcrs->xcrs[0].value); 3069 guest_xcrs->xcrs[i].value);
3048 break; 3070 break;
3049 } 3071 }
3050 if (r) 3072 if (r)
@@ -4775,8 +4797,8 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4775 4797
4776static void init_decode_cache(struct x86_emulate_ctxt *ctxt) 4798static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4777{ 4799{
4778 memset(&ctxt->twobyte, 0, 4800 memset(&ctxt->opcode_len, 0,
4779 (void *)&ctxt->_regs - (void *)&ctxt->twobyte); 4801 (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
4780 4802
4781 ctxt->fetch.start = 0; 4803 ctxt->fetch.start = 0;
4782 ctxt->fetch.end = 0; 4804 ctxt->fetch.end = 0;
@@ -5094,8 +5116,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5094 ctxt->have_exception = false; 5116 ctxt->have_exception = false;
5095 ctxt->perm_ok = false; 5117 ctxt->perm_ok = false;
5096 5118
5097 ctxt->only_vendor_specific_insn 5119 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
5098 = emulation_type & EMULTYPE_TRAP_UD;
5099 5120
5100 r = x86_decode_insn(ctxt, insn, insn_len); 5121 r = x86_decode_insn(ctxt, insn, insn_len);
5101 5122
@@ -5263,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
5263 5284
5264 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); 5285 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5265 5286
5266 raw_spin_lock(&kvm_lock); 5287 spin_lock(&kvm_lock);
5267 list_for_each_entry(kvm, &vm_list, vm_list) { 5288 list_for_each_entry(kvm, &vm_list, vm_list) {
5268 kvm_for_each_vcpu(i, vcpu, kvm) { 5289 kvm_for_each_vcpu(i, vcpu, kvm) {
5269 if (vcpu->cpu != freq->cpu) 5290 if (vcpu->cpu != freq->cpu)
@@ -5273,7 +5294,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
5273 send_ipi = 1; 5294 send_ipi = 1;
5274 } 5295 }
5275 } 5296 }
5276 raw_spin_unlock(&kvm_lock); 5297 spin_unlock(&kvm_lock);
5277 5298
5278 if (freq->old < freq->new && send_ipi) { 5299 if (freq->old < freq->new && send_ipi) {
5279 /* 5300 /*
@@ -5426,12 +5447,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
5426 struct kvm_vcpu *vcpu; 5447 struct kvm_vcpu *vcpu;
5427 int i; 5448 int i;
5428 5449
5429 raw_spin_lock(&kvm_lock); 5450 spin_lock(&kvm_lock);
5430 list_for_each_entry(kvm, &vm_list, vm_list) 5451 list_for_each_entry(kvm, &vm_list, vm_list)
5431 kvm_for_each_vcpu(i, vcpu, kvm) 5452 kvm_for_each_vcpu(i, vcpu, kvm)
5432 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); 5453 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
5433 atomic_set(&kvm_guest_has_master_clock, 0); 5454 atomic_set(&kvm_guest_has_master_clock, 0);
5434 raw_spin_unlock(&kvm_lock); 5455 spin_unlock(&kvm_lock);
5435} 5456}
5436 5457
5437static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); 5458static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -5945,10 +5966,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5945 5966
5946 vcpu->mode = IN_GUEST_MODE; 5967 vcpu->mode = IN_GUEST_MODE;
5947 5968
5969 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5970
5948 /* We should set ->mode before check ->requests, 5971 /* We should set ->mode before check ->requests,
5949 * see the comment in make_all_cpus_request. 5972 * see the comment in make_all_cpus_request.
5950 */ 5973 */
5951 smp_mb(); 5974 smp_mb__after_srcu_read_unlock();
5952 5975
5953 local_irq_disable(); 5976 local_irq_disable();
5954 5977
@@ -5958,12 +5981,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5958 smp_wmb(); 5981 smp_wmb();
5959 local_irq_enable(); 5982 local_irq_enable();
5960 preempt_enable(); 5983 preempt_enable();
5984 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5961 r = 1; 5985 r = 1;
5962 goto cancel_injection; 5986 goto cancel_injection;
5963 } 5987 }
5964 5988
5965 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5966
5967 if (req_immediate_exit) 5989 if (req_immediate_exit)
5968 smp_send_reschedule(vcpu->cpu); 5990 smp_send_reschedule(vcpu->cpu);
5969 5991
@@ -6688,7 +6710,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6688 if (r) 6710 if (r)
6689 return r; 6711 return r;
6690 kvm_vcpu_reset(vcpu); 6712 kvm_vcpu_reset(vcpu);
6691 r = kvm_mmu_setup(vcpu); 6713 kvm_mmu_setup(vcpu);
6692 vcpu_put(vcpu); 6714 vcpu_put(vcpu);
6693 6715
6694 return r; 6716 return r;
@@ -6940,6 +6962,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6940 6962
6941 vcpu->arch.ia32_tsc_adjust_msr = 0x0; 6963 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
6942 vcpu->arch.pv_time_enabled = false; 6964 vcpu->arch.pv_time_enabled = false;
6965
6966 vcpu->arch.guest_supported_xcr0 = 0;
6967 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
6968
6943 kvm_async_pf_hash_reset(vcpu); 6969 kvm_async_pf_hash_reset(vcpu);
6944 kvm_pmu_init(vcpu); 6970 kvm_pmu_init(vcpu);
6945 6971
@@ -6981,6 +7007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6981 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 7007 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6982 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); 7008 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
6983 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 7009 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
7010 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
6984 7011
6985 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 7012 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
6986 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 7013 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
@@ -7065,7 +7092,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
7065 kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); 7092 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7066} 7093}
7067 7094
7068void kvm_arch_free_memslot(struct kvm_memory_slot *free, 7095void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
7069 struct kvm_memory_slot *dont) 7096 struct kvm_memory_slot *dont)
7070{ 7097{
7071 int i; 7098 int i;
@@ -7086,7 +7113,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
7086 } 7113 }
7087} 7114}
7088 7115
7089int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 7116int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
7117 unsigned long npages)
7090{ 7118{
7091 int i; 7119 int i;
7092 7120
@@ -7283,7 +7311,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
7283 int r; 7311 int r;
7284 7312
7285 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || 7313 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
7286 is_error_page(work->page)) 7314 work->wakeup_all)
7287 return; 7315 return;
7288 7316
7289 r = kvm_mmu_reload(vcpu); 7317 r = kvm_mmu_reload(vcpu);
@@ -7393,7 +7421,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
7393 struct x86_exception fault; 7421 struct x86_exception fault;
7394 7422
7395 trace_kvm_async_pf_ready(work->arch.token, work->gva); 7423 trace_kvm_async_pf_ready(work->arch.token, work->gva);
7396 if (is_error_page(work->page)) 7424 if (work->wakeup_all)
7397 work->arch.token = ~0; /* broadcast wakeup */ 7425 work->arch.token = ~0; /* broadcast wakeup */
7398 else 7426 else
7399 kvm_del_async_pf_gfn(vcpu, work->arch.gfn); 7427 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
@@ -7420,6 +7448,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
7420 kvm_x86_ops->interrupt_allowed(vcpu); 7448 kvm_x86_ops->interrupt_allowed(vcpu);
7421} 7449}
7422 7450
7451void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
7452{
7453 atomic_inc(&kvm->arch.noncoherent_dma_count);
7454}
7455EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
7456
7457void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
7458{
7459 atomic_dec(&kvm->arch.noncoherent_dma_count);
7460}
7461EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
7462
7463bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
7464{
7465 return atomic_read(&kvm->arch.noncoherent_dma_count);
7466}
7467EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
7468
7423EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 7469EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
7424EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 7470EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
7425EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 7471EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e224f7a671b6..587fb9ede436 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -122,6 +122,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
122 gva_t addr, void *val, unsigned int bytes, 122 gva_t addr, void *val, unsigned int bytes,
123 struct x86_exception *exception); 123 struct x86_exception *exception);
124 124
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
125extern u64 host_xcr0; 126extern u64 host_xcr0;
126 127
127extern struct static_key kvm_no_apic_vcpu; 128extern struct static_key kvm_no_apic_vcpu;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0fbbc7aa02cb..9523d2ad7535 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -142,7 +142,7 @@ struct kvm;
142struct kvm_vcpu; 142struct kvm_vcpu;
143extern struct kmem_cache *kvm_vcpu_cache; 143extern struct kmem_cache *kvm_vcpu_cache;
144 144
145extern raw_spinlock_t kvm_lock; 145extern spinlock_t kvm_lock;
146extern struct list_head vm_list; 146extern struct list_head vm_list;
147 147
148struct kvm_io_range { 148struct kvm_io_range {
@@ -189,8 +189,7 @@ struct kvm_async_pf {
189 gva_t gva; 189 gva_t gva;
190 unsigned long addr; 190 unsigned long addr;
191 struct kvm_arch_async_pf arch; 191 struct kvm_arch_async_pf arch;
192 struct page *page; 192 bool wakeup_all;
193 bool done;
194}; 193};
195 194
196void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); 195void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
@@ -508,9 +507,10 @@ int kvm_set_memory_region(struct kvm *kvm,
508 struct kvm_userspace_memory_region *mem); 507 struct kvm_userspace_memory_region *mem);
509int __kvm_set_memory_region(struct kvm *kvm, 508int __kvm_set_memory_region(struct kvm *kvm,
510 struct kvm_userspace_memory_region *mem); 509 struct kvm_userspace_memory_region *mem);
511void kvm_arch_free_memslot(struct kvm_memory_slot *free, 510void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
512 struct kvm_memory_slot *dont); 511 struct kvm_memory_slot *dont);
513int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); 512int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
513 unsigned long npages);
514void kvm_arch_memslots_updated(struct kvm *kvm); 514void kvm_arch_memslots_updated(struct kvm *kvm);
515int kvm_arch_prepare_memory_region(struct kvm *kvm, 515int kvm_arch_prepare_memory_region(struct kvm *kvm,
516 struct kvm_memory_slot *memslot, 516 struct kvm_memory_slot *memslot,
@@ -671,6 +671,25 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
671} 671}
672#endif 672#endif
673 673
674#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
675void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
676void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
677bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
678#else
679static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
680{
681}
682
683static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
684{
685}
686
687static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
688{
689 return false;
690}
691#endif
692
674static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) 693static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
675{ 694{
676#ifdef __KVM_HAVE_ARCH_WQP 695#ifdef __KVM_HAVE_ARCH_WQP
@@ -747,9 +766,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
747int kvm_request_irq_source_id(struct kvm *kvm); 766int kvm_request_irq_source_id(struct kvm *kvm);
748void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 767void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
749 768
750/* For vcpu->arch.iommu_flags */
751#define KVM_IOMMU_CACHE_COHERENCY 0x1
752
753#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 769#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
754int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); 770int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
755void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); 771void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
@@ -789,7 +805,7 @@ static inline void kvm_guest_enter(void)
789 805
790 /* KVM does not hold any references to rcu protected data when it 806 /* KVM does not hold any references to rcu protected data when it
791 * switches CPU into a guest mode. In fact switching to a guest mode 807 * switches CPU into a guest mode. In fact switching to a guest mode
792 * is very similar to exiting to userspase from rcu point of view. In 808 * is very similar to exiting to userspace from rcu point of view. In
793 * addition CPU may stay in a guest mode for quite a long time (up to 809 * addition CPU may stay in a guest mode for quite a long time (up to
794 * one time slice). Lets treat guest mode as quiescent state, just like 810 * one time slice). Lets treat guest mode as quiescent state, just like
795 * we do with user-mode execution. 811 * we do with user-mode execution.
@@ -842,13 +858,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
842 return gfn_to_memslot(kvm, gfn)->id; 858 return gfn_to_memslot(kvm, gfn)->id;
843} 859}
844 860
845static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
846{
847 /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
848 return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
849 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
850}
851
852static inline gfn_t 861static inline gfn_t
853hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) 862hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
854{ 863{
@@ -1066,6 +1075,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
1066 1075
1067extern struct kvm_device_ops kvm_mpic_ops; 1076extern struct kvm_device_ops kvm_mpic_ops;
1068extern struct kvm_device_ops kvm_xics_ops; 1077extern struct kvm_device_ops kvm_xics_ops;
1078extern struct kvm_device_ops kvm_vfio_ops;
1069 1079
1070#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1080#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
1071 1081
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f7efc8604652..6f7ffa460089 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -286,6 +286,14 @@ static inline void lockup_detector_init(void)
286} 286}
287#endif 287#endif
288 288
289#ifdef CONFIG_DETECT_HUNG_TASK
290void reset_hung_task_detector(void);
291#else
292static inline void reset_hung_task_detector(void)
293{
294}
295#endif
296
289/* Attach to any functions which should be ignored in wchan output. */ 297/* Attach to any functions which should be ignored in wchan output. */
290#define __sched __attribute__((__section__(".sched.text"))) 298#define __sched __attribute__((__section__(".sched.text")))
291 299
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index c114614ed172..9b058eecd403 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -237,4 +237,18 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
237 __srcu_read_unlock(sp, idx); 237 __srcu_read_unlock(sp, idx);
238} 238}
239 239
240/**
241 * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
242 *
243 * Converts the preceding srcu_read_unlock into a two-way memory barrier.
244 *
245 * Call this after srcu_read_unlock, to guarantee that all memory operations
246 * that occur after smp_mb__after_srcu_read_unlock will appear to happen after
247 * the preceding srcu_read_unlock.
248 */
249static inline void smp_mb__after_srcu_read_unlock(void)
250{
251 /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
252}
253
240#endif 254#endif
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 7005d1109ec9..131a0bda7aec 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
296 296
297TRACE_EVENT( 297TRACE_EVENT(
298 kvm_async_pf_completed, 298 kvm_async_pf_completed,
299 TP_PROTO(unsigned long address, struct page *page, u64 gva), 299 TP_PROTO(unsigned long address, u64 gva),
300 TP_ARGS(address, page, gva), 300 TP_ARGS(address, gva),
301 301
302 TP_STRUCT__entry( 302 TP_STRUCT__entry(
303 __field(unsigned long, address) 303 __field(unsigned long, address)
304 __field(pfn_t, pfn)
305 __field(u64, gva) 304 __field(u64, gva)
306 ), 305 ),
307 306
308 TP_fast_assign( 307 TP_fast_assign(
309 __entry->address = address; 308 __entry->address = address;
310 __entry->pfn = page ? page_to_pfn(page) : 0;
311 __entry->gva = gva; 309 __entry->gva = gva;
312 ), 310 ),
313 311
314 TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva, 312 TP_printk("gva %#llx address %#lx", __entry->gva,
315 __entry->address, __entry->pfn) 313 __entry->address)
316); 314);
317 315
318#endif 316#endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 99c25338ede8..902f12461873 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -518,6 +518,10 @@ struct kvm_ppc_smmu_info {
518/* machine type bits, to be used as argument to KVM_CREATE_VM */ 518/* machine type bits, to be used as argument to KVM_CREATE_VM */
519#define KVM_VM_S390_UCONTROL 1 519#define KVM_VM_S390_UCONTROL 1
520 520
521/* on ppc, 0 indicate default, 1 should force HV and 2 PR */
522#define KVM_VM_PPC_HV 1
523#define KVM_VM_PPC_PR 2
524
521#define KVM_S390_SIE_PAGE_OFFSET 1 525#define KVM_S390_SIE_PAGE_OFFSET 1
522 526
523/* 527/*
@@ -541,6 +545,7 @@ struct kvm_ppc_smmu_info {
541#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 545#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
542#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 546#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
543#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 547#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
548#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
544 549
545/* 550/*
546 * Extension capability list. 551 * Extension capability list.
@@ -668,6 +673,7 @@ struct kvm_ppc_smmu_info {
668#define KVM_CAP_IRQ_XICS 92 673#define KVM_CAP_IRQ_XICS 92
669#define KVM_CAP_ARM_EL1_32BIT 93 674#define KVM_CAP_ARM_EL1_32BIT 93
670#define KVM_CAP_SPAPR_MULTITCE 94 675#define KVM_CAP_SPAPR_MULTITCE 94
676#define KVM_CAP_EXT_EMUL_CPUID 95
671 677
672#ifdef KVM_CAP_IRQ_ROUTING 678#ifdef KVM_CAP_IRQ_ROUTING
673 679
@@ -843,6 +849,10 @@ struct kvm_device_attr {
843#define KVM_DEV_TYPE_FSL_MPIC_20 1 849#define KVM_DEV_TYPE_FSL_MPIC_20 1
844#define KVM_DEV_TYPE_FSL_MPIC_42 2 850#define KVM_DEV_TYPE_FSL_MPIC_42 2
845#define KVM_DEV_TYPE_XICS 3 851#define KVM_DEV_TYPE_XICS 3
852#define KVM_DEV_TYPE_VFIO 4
853#define KVM_DEV_VFIO_GROUP 1
854#define KVM_DEV_VFIO_GROUP_ADD 1
855#define KVM_DEV_VFIO_GROUP_DEL 2
846 856
847/* 857/*
848 * ioctls for VM fds 858 * ioctls for VM fds
@@ -1012,6 +1022,7 @@ struct kvm_s390_ucas_mapping {
1012/* VM is being stopped by host */ 1022/* VM is being stopped by host */
1013#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) 1023#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad)
1014#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) 1024#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init)
1025#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init)
1015#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) 1026#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
1016 1027
1017#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 1028#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 8807061ca004..9328b80eaf14 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -207,6 +207,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
207 return ret; 207 return ret;
208} 208}
209 209
210static atomic_t reset_hung_task = ATOMIC_INIT(0);
211
212void reset_hung_task_detector(void)
213{
214 atomic_set(&reset_hung_task, 1);
215}
216EXPORT_SYMBOL_GPL(reset_hung_task_detector);
217
210/* 218/*
211 * kthread which checks for tasks stuck in D state 219 * kthread which checks for tasks stuck in D state
212 */ 220 */
@@ -220,6 +228,9 @@ static int watchdog(void *dummy)
220 while (schedule_timeout_interruptible(timeout_jiffies(timeout))) 228 while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
221 timeout = sysctl_hung_task_timeout_secs; 229 timeout = sysctl_hung_task_timeout_secs;
222 230
231 if (atomic_xchg(&reset_hung_task, 0))
232 continue;
233
223 check_hung_uninterruptible_tasks(timeout); 234 check_hung_uninterruptible_tasks(timeout);
224 } 235 }
225 236
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 779262f59e25..fbe1a48bd629 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -27,3 +27,6 @@ config HAVE_KVM_MSI
27 27
28config HAVE_KVM_CPU_RELAX_INTERCEPT 28config HAVE_KVM_CPU_RELAX_INTERCEPT
29 bool 29 bool
30
31config KVM_VFIO
32 bool
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8a39dda7a325..8631d9c14320 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
56 56
57static void async_pf_execute(struct work_struct *work) 57static void async_pf_execute(struct work_struct *work)
58{ 58{
59 struct page *page = NULL;
60 struct kvm_async_pf *apf = 59 struct kvm_async_pf *apf =
61 container_of(work, struct kvm_async_pf, work); 60 container_of(work, struct kvm_async_pf, work);
62 struct mm_struct *mm = apf->mm; 61 struct mm_struct *mm = apf->mm;
@@ -68,14 +67,12 @@ static void async_pf_execute(struct work_struct *work)
68 67
69 use_mm(mm); 68 use_mm(mm);
70 down_read(&mm->mmap_sem); 69 down_read(&mm->mmap_sem);
71 get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); 70 get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
72 up_read(&mm->mmap_sem); 71 up_read(&mm->mmap_sem);
73 unuse_mm(mm); 72 unuse_mm(mm);
74 73
75 spin_lock(&vcpu->async_pf.lock); 74 spin_lock(&vcpu->async_pf.lock);
76 list_add_tail(&apf->link, &vcpu->async_pf.done); 75 list_add_tail(&apf->link, &vcpu->async_pf.done);
77 apf->page = page;
78 apf->done = true;
79 spin_unlock(&vcpu->async_pf.lock); 76 spin_unlock(&vcpu->async_pf.lock);
80 77
81 /* 78 /*
@@ -83,7 +80,7 @@ static void async_pf_execute(struct work_struct *work)
83 * this point 80 * this point
84 */ 81 */
85 82
86 trace_kvm_async_pf_completed(addr, page, gva); 83 trace_kvm_async_pf_completed(addr, gva);
87 84
88 if (waitqueue_active(&vcpu->wq)) 85 if (waitqueue_active(&vcpu->wq))
89 wake_up_interruptible(&vcpu->wq); 86 wake_up_interruptible(&vcpu->wq);
@@ -99,9 +96,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
99 struct kvm_async_pf *work = 96 struct kvm_async_pf *work =
100 list_entry(vcpu->async_pf.queue.next, 97 list_entry(vcpu->async_pf.queue.next,
101 typeof(*work), queue); 98 typeof(*work), queue);
102 cancel_work_sync(&work->work);
103 list_del(&work->queue); 99 list_del(&work->queue);
104 if (!work->done) { /* work was canceled */ 100 if (cancel_work_sync(&work->work)) {
105 mmdrop(work->mm); 101 mmdrop(work->mm);
106 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 102 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
107 kmem_cache_free(async_pf_cache, work); 103 kmem_cache_free(async_pf_cache, work);
@@ -114,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
114 list_entry(vcpu->async_pf.done.next, 110 list_entry(vcpu->async_pf.done.next,
115 typeof(*work), link); 111 typeof(*work), link);
116 list_del(&work->link); 112 list_del(&work->link);
117 if (!is_error_page(work->page))
118 kvm_release_page_clean(work->page);
119 kmem_cache_free(async_pf_cache, work); 113 kmem_cache_free(async_pf_cache, work);
120 } 114 }
121 spin_unlock(&vcpu->async_pf.lock); 115 spin_unlock(&vcpu->async_pf.lock);
@@ -135,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
135 list_del(&work->link); 129 list_del(&work->link);
136 spin_unlock(&vcpu->async_pf.lock); 130 spin_unlock(&vcpu->async_pf.lock);
137 131
138 if (work->page) 132 kvm_arch_async_page_ready(vcpu, work);
139 kvm_arch_async_page_ready(vcpu, work);
140 kvm_arch_async_page_present(vcpu, work); 133 kvm_arch_async_page_present(vcpu, work);
141 134
142 list_del(&work->queue); 135 list_del(&work->queue);
143 vcpu->async_pf.queued--; 136 vcpu->async_pf.queued--;
144 if (!is_error_page(work->page))
145 kvm_release_page_clean(work->page);
146 kmem_cache_free(async_pf_cache, work); 137 kmem_cache_free(async_pf_cache, work);
147 } 138 }
148} 139}
@@ -165,8 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
165 if (!work) 156 if (!work)
166 return 0; 157 return 0;
167 158
168 work->page = NULL; 159 work->wakeup_all = false;
169 work->done = false;
170 work->vcpu = vcpu; 160 work->vcpu = vcpu;
171 work->gva = gva; 161 work->gva = gva;
172 work->addr = gfn_to_hva(vcpu->kvm, gfn); 162 work->addr = gfn_to_hva(vcpu->kvm, gfn);
@@ -206,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
206 if (!work) 196 if (!work)
207 return -ENOMEM; 197 return -ENOMEM;
208 198
209 work->page = KVM_ERR_PTR_BAD_PAGE; 199 work->wakeup_all = true;
210 INIT_LIST_HEAD(&work->queue); /* for list_del to work */ 200 INIT_LIST_HEAD(&work->queue); /* for list_del to work */
211 201
212 spin_lock(&vcpu->async_pf.lock); 202 spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 72a130bc448a..0df7d4b34dfe 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -79,7 +79,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
79 flags = IOMMU_READ; 79 flags = IOMMU_READ;
80 if (!(slot->flags & KVM_MEM_READONLY)) 80 if (!(slot->flags & KVM_MEM_READONLY))
81 flags |= IOMMU_WRITE; 81 flags |= IOMMU_WRITE;
82 if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) 82 if (!kvm->arch.iommu_noncoherent)
83 flags |= IOMMU_CACHE; 83 flags |= IOMMU_CACHE;
84 84
85 85
@@ -103,6 +103,10 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
103 while ((gfn << PAGE_SHIFT) & (page_size - 1)) 103 while ((gfn << PAGE_SHIFT) & (page_size - 1))
104 page_size >>= 1; 104 page_size >>= 1;
105 105
106 /* Make sure hva is aligned to the page size we want to map */
107 while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
108 page_size >>= 1;
109
106 /* 110 /*
107 * Pin all pages we are about to map in memory. This is 111 * Pin all pages we are about to map in memory. This is
108 * important because we unmap and unpin in 4kb steps later. 112 * important because we unmap and unpin in 4kb steps later.
@@ -140,6 +144,9 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
140 struct kvm_memslots *slots; 144 struct kvm_memslots *slots;
141 struct kvm_memory_slot *memslot; 145 struct kvm_memory_slot *memslot;
142 146
147 if (kvm->arch.iommu_noncoherent)
148 kvm_arch_register_noncoherent_dma(kvm);
149
143 idx = srcu_read_lock(&kvm->srcu); 150 idx = srcu_read_lock(&kvm->srcu);
144 slots = kvm_memslots(kvm); 151 slots = kvm_memslots(kvm);
145 152
@@ -158,7 +165,8 @@ int kvm_assign_device(struct kvm *kvm,
158{ 165{
159 struct pci_dev *pdev = NULL; 166 struct pci_dev *pdev = NULL;
160 struct iommu_domain *domain = kvm->arch.iommu_domain; 167 struct iommu_domain *domain = kvm->arch.iommu_domain;
161 int r, last_flags; 168 int r;
169 bool noncoherent;
162 170
163 /* check if iommu exists and in use */ 171 /* check if iommu exists and in use */
164 if (!domain) 172 if (!domain)
@@ -174,15 +182,13 @@ int kvm_assign_device(struct kvm *kvm,
174 return r; 182 return r;
175 } 183 }
176 184
177 last_flags = kvm->arch.iommu_flags; 185 noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain,
178 if (iommu_domain_has_cap(kvm->arch.iommu_domain, 186 IOMMU_CAP_CACHE_COHERENCY);
179 IOMMU_CAP_CACHE_COHERENCY))
180 kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
181 187
182 /* Check if need to update IOMMU page table for guest memory */ 188 /* Check if need to update IOMMU page table for guest memory */
183 if ((last_flags ^ kvm->arch.iommu_flags) == 189 if (noncoherent != kvm->arch.iommu_noncoherent) {
184 KVM_IOMMU_CACHE_COHERENCY) {
185 kvm_iommu_unmap_memslots(kvm); 190 kvm_iommu_unmap_memslots(kvm);
191 kvm->arch.iommu_noncoherent = noncoherent;
186 r = kvm_iommu_map_memslots(kvm); 192 r = kvm_iommu_map_memslots(kvm);
187 if (r) 193 if (r)
188 goto out_unmap; 194 goto out_unmap;
@@ -190,11 +196,7 @@ int kvm_assign_device(struct kvm *kvm,
190 196
191 pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; 197 pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
192 198
193 printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", 199 dev_info(&pdev->dev, "kvm assign device\n");
194 assigned_dev->host_segnr,
195 assigned_dev->host_busnr,
196 PCI_SLOT(assigned_dev->host_devfn),
197 PCI_FUNC(assigned_dev->host_devfn));
198 200
199 return 0; 201 return 0;
200out_unmap: 202out_unmap:
@@ -220,11 +222,7 @@ int kvm_deassign_device(struct kvm *kvm,
220 222
221 pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; 223 pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
222 224
223 printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", 225 dev_info(&pdev->dev, "kvm deassign device\n");
224 assigned_dev->host_segnr,
225 assigned_dev->host_busnr,
226 PCI_SLOT(assigned_dev->host_devfn),
227 PCI_FUNC(assigned_dev->host_devfn));
228 226
229 return 0; 227 return 0;
230} 228}
@@ -336,6 +334,9 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
336 334
337 srcu_read_unlock(&kvm->srcu, idx); 335 srcu_read_unlock(&kvm->srcu, idx);
338 336
337 if (kvm->arch.iommu_noncoherent)
338 kvm_arch_unregister_noncoherent_dma(kvm);
339
339 return 0; 340 return 0;
340} 341}
341 342
@@ -350,6 +351,7 @@ int kvm_iommu_unmap_guest(struct kvm *kvm)
350 mutex_lock(&kvm->slots_lock); 351 mutex_lock(&kvm->slots_lock);
351 kvm_iommu_unmap_memslots(kvm); 352 kvm_iommu_unmap_memslots(kvm);
352 kvm->arch.iommu_domain = NULL; 353 kvm->arch.iommu_domain = NULL;
354 kvm->arch.iommu_noncoherent = false;
353 mutex_unlock(&kvm->slots_lock); 355 mutex_unlock(&kvm->slots_lock);
354 356
355 iommu_domain_free(domain); 357 iommu_domain_free(domain);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1cf9ccb01013..662f34c3287e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,7 +70,8 @@ MODULE_LICENSE("GPL");
70 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock 70 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
71 */ 71 */
72 72
73DEFINE_RAW_SPINLOCK(kvm_lock); 73DEFINE_SPINLOCK(kvm_lock);
74static DEFINE_RAW_SPINLOCK(kvm_count_lock);
74LIST_HEAD(vm_list); 75LIST_HEAD(vm_list);
75 76
76static cpumask_var_t cpus_hardware_enabled; 77static cpumask_var_t cpus_hardware_enabled;
@@ -186,6 +187,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
186 ++kvm->stat.remote_tlb_flush; 187 ++kvm->stat.remote_tlb_flush;
187 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 188 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
188} 189}
190EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
189 191
190void kvm_reload_remote_mmus(struct kvm *kvm) 192void kvm_reload_remote_mmus(struct kvm *kvm)
191{ 193{
@@ -490,9 +492,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
490 if (r) 492 if (r)
491 goto out_err; 493 goto out_err;
492 494
493 raw_spin_lock(&kvm_lock); 495 spin_lock(&kvm_lock);
494 list_add(&kvm->vm_list, &vm_list); 496 list_add(&kvm->vm_list, &vm_list);
495 raw_spin_unlock(&kvm_lock); 497 spin_unlock(&kvm_lock);
496 498
497 return kvm; 499 return kvm;
498 500
@@ -540,13 +542,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
540/* 542/*
541 * Free any memory in @free but not in @dont. 543 * Free any memory in @free but not in @dont.
542 */ 544 */
543static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 545static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
544 struct kvm_memory_slot *dont) 546 struct kvm_memory_slot *dont)
545{ 547{
546 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 548 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
547 kvm_destroy_dirty_bitmap(free); 549 kvm_destroy_dirty_bitmap(free);
548 550
549 kvm_arch_free_memslot(free, dont); 551 kvm_arch_free_memslot(kvm, free, dont);
550 552
551 free->npages = 0; 553 free->npages = 0;
552} 554}
@@ -557,7 +559,7 @@ void kvm_free_physmem(struct kvm *kvm)
557 struct kvm_memory_slot *memslot; 559 struct kvm_memory_slot *memslot;
558 560
559 kvm_for_each_memslot(memslot, slots) 561 kvm_for_each_memslot(memslot, slots)
560 kvm_free_physmem_slot(memslot, NULL); 562 kvm_free_physmem_slot(kvm, memslot, NULL);
561 563
562 kfree(kvm->memslots); 564 kfree(kvm->memslots);
563} 565}
@@ -581,9 +583,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
581 struct mm_struct *mm = kvm->mm; 583 struct mm_struct *mm = kvm->mm;
582 584
583 kvm_arch_sync_events(kvm); 585 kvm_arch_sync_events(kvm);
584 raw_spin_lock(&kvm_lock); 586 spin_lock(&kvm_lock);
585 list_del(&kvm->vm_list); 587 list_del(&kvm->vm_list);
586 raw_spin_unlock(&kvm_lock); 588 spin_unlock(&kvm_lock);
587 kvm_free_irq_routing(kvm); 589 kvm_free_irq_routing(kvm);
588 for (i = 0; i < KVM_NR_BUSES; i++) 590 for (i = 0; i < KVM_NR_BUSES; i++)
589 kvm_io_bus_destroy(kvm->buses[i]); 591 kvm_io_bus_destroy(kvm->buses[i]);
@@ -821,7 +823,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
821 if (change == KVM_MR_CREATE) { 823 if (change == KVM_MR_CREATE) {
822 new.userspace_addr = mem->userspace_addr; 824 new.userspace_addr = mem->userspace_addr;
823 825
824 if (kvm_arch_create_memslot(&new, npages)) 826 if (kvm_arch_create_memslot(kvm, &new, npages))
825 goto out_free; 827 goto out_free;
826 } 828 }
827 829
@@ -872,6 +874,19 @@ int __kvm_set_memory_region(struct kvm *kvm,
872 goto out_free; 874 goto out_free;
873 } 875 }
874 876
877 /* actual memory is freed via old in kvm_free_physmem_slot below */
878 if (change == KVM_MR_DELETE) {
879 new.dirty_bitmap = NULL;
880 memset(&new.arch, 0, sizeof(new.arch));
881 }
882
883 old_memslots = install_new_memslots(kvm, slots, &new);
884
885 kvm_arch_commit_memory_region(kvm, mem, &old, change);
886
887 kvm_free_physmem_slot(kvm, &old, &new);
888 kfree(old_memslots);
889
875 /* 890 /*
876 * IOMMU mapping: New slots need to be mapped. Old slots need to be 891 * IOMMU mapping: New slots need to be mapped. Old slots need to be
877 * un-mapped and re-mapped if their base changes. Since base change 892 * un-mapped and re-mapped if their base changes. Since base change
@@ -883,29 +898,15 @@ int __kvm_set_memory_region(struct kvm *kvm,
883 */ 898 */
884 if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { 899 if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
885 r = kvm_iommu_map_pages(kvm, &new); 900 r = kvm_iommu_map_pages(kvm, &new);
886 if (r) 901 return r;
887 goto out_slots;
888 }
889
890 /* actual memory is freed via old in kvm_free_physmem_slot below */
891 if (change == KVM_MR_DELETE) {
892 new.dirty_bitmap = NULL;
893 memset(&new.arch, 0, sizeof(new.arch));
894 } 902 }
895 903
896 old_memslots = install_new_memslots(kvm, slots, &new);
897
898 kvm_arch_commit_memory_region(kvm, mem, &old, change);
899
900 kvm_free_physmem_slot(&old, &new);
901 kfree(old_memslots);
902
903 return 0; 904 return 0;
904 905
905out_slots: 906out_slots:
906 kfree(slots); 907 kfree(slots);
907out_free: 908out_free:
908 kvm_free_physmem_slot(&new, &old); 909 kvm_free_physmem_slot(kvm, &new, &old);
909out: 910out:
910 return r; 911 return r;
911} 912}
@@ -964,6 +965,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
964out: 965out:
965 return r; 966 return r;
966} 967}
968EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
967 969
968bool kvm_largepages_enabled(void) 970bool kvm_largepages_enabled(void)
969{ 971{
@@ -1654,6 +1656,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1654 memslot = gfn_to_memslot(kvm, gfn); 1656 memslot = gfn_to_memslot(kvm, gfn);
1655 mark_page_dirty_in_slot(kvm, memslot, gfn); 1657 mark_page_dirty_in_slot(kvm, memslot, gfn);
1656} 1658}
1659EXPORT_SYMBOL_GPL(mark_page_dirty);
1657 1660
1658/* 1661/*
1659 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1662 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
@@ -1679,6 +1682,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1679 1682
1680 finish_wait(&vcpu->wq, &wait); 1683 finish_wait(&vcpu->wq, &wait);
1681} 1684}
1685EXPORT_SYMBOL_GPL(kvm_vcpu_block);
1682 1686
1683#ifndef CONFIG_S390 1687#ifndef CONFIG_S390
1684/* 1688/*
@@ -2271,6 +2275,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2271 ops = &kvm_xics_ops; 2275 ops = &kvm_xics_ops;
2272 break; 2276 break;
2273#endif 2277#endif
2278#ifdef CONFIG_KVM_VFIO
2279 case KVM_DEV_TYPE_VFIO:
2280 ops = &kvm_vfio_ops;
2281 break;
2282#endif
2274 default: 2283 default:
2275 return -ENODEV; 2284 return -ENODEV;
2276 } 2285 }
@@ -2519,44 +2528,12 @@ out:
2519} 2528}
2520#endif 2529#endif
2521 2530
2522static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2523{
2524 struct page *page[1];
2525 unsigned long addr;
2526 int npages;
2527 gfn_t gfn = vmf->pgoff;
2528 struct kvm *kvm = vma->vm_file->private_data;
2529
2530 addr = gfn_to_hva(kvm, gfn);
2531 if (kvm_is_error_hva(addr))
2532 return VM_FAULT_SIGBUS;
2533
2534 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
2535 NULL);
2536 if (unlikely(npages != 1))
2537 return VM_FAULT_SIGBUS;
2538
2539 vmf->page = page[0];
2540 return 0;
2541}
2542
2543static const struct vm_operations_struct kvm_vm_vm_ops = {
2544 .fault = kvm_vm_fault,
2545};
2546
2547static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
2548{
2549 vma->vm_ops = &kvm_vm_vm_ops;
2550 return 0;
2551}
2552
2553static struct file_operations kvm_vm_fops = { 2531static struct file_operations kvm_vm_fops = {
2554 .release = kvm_vm_release, 2532 .release = kvm_vm_release,
2555 .unlocked_ioctl = kvm_vm_ioctl, 2533 .unlocked_ioctl = kvm_vm_ioctl,
2556#ifdef CONFIG_COMPAT 2534#ifdef CONFIG_COMPAT
2557 .compat_ioctl = kvm_vm_compat_ioctl, 2535 .compat_ioctl = kvm_vm_compat_ioctl,
2558#endif 2536#endif
2559 .mmap = kvm_vm_mmap,
2560 .llseek = noop_llseek, 2537 .llseek = noop_llseek,
2561}; 2538};
2562 2539
@@ -2683,11 +2660,12 @@ static void hardware_enable_nolock(void *junk)
2683 } 2660 }
2684} 2661}
2685 2662
2686static void hardware_enable(void *junk) 2663static void hardware_enable(void)
2687{ 2664{
2688 raw_spin_lock(&kvm_lock); 2665 raw_spin_lock(&kvm_count_lock);
2689 hardware_enable_nolock(junk); 2666 if (kvm_usage_count)
2690 raw_spin_unlock(&kvm_lock); 2667 hardware_enable_nolock(NULL);
2668 raw_spin_unlock(&kvm_count_lock);
2691} 2669}
2692 2670
2693static void hardware_disable_nolock(void *junk) 2671static void hardware_disable_nolock(void *junk)
@@ -2700,11 +2678,12 @@ static void hardware_disable_nolock(void *junk)
2700 kvm_arch_hardware_disable(NULL); 2678 kvm_arch_hardware_disable(NULL);
2701} 2679}
2702 2680
2703static void hardware_disable(void *junk) 2681static void hardware_disable(void)
2704{ 2682{
2705 raw_spin_lock(&kvm_lock); 2683 raw_spin_lock(&kvm_count_lock);
2706 hardware_disable_nolock(junk); 2684 if (kvm_usage_count)
2707 raw_spin_unlock(&kvm_lock); 2685 hardware_disable_nolock(NULL);
2686 raw_spin_unlock(&kvm_count_lock);
2708} 2687}
2709 2688
2710static void hardware_disable_all_nolock(void) 2689static void hardware_disable_all_nolock(void)
@@ -2718,16 +2697,16 @@ static void hardware_disable_all_nolock(void)
2718 2697
2719static void hardware_disable_all(void) 2698static void hardware_disable_all(void)
2720{ 2699{
2721 raw_spin_lock(&kvm_lock); 2700 raw_spin_lock(&kvm_count_lock);
2722 hardware_disable_all_nolock(); 2701 hardware_disable_all_nolock();
2723 raw_spin_unlock(&kvm_lock); 2702 raw_spin_unlock(&kvm_count_lock);
2724} 2703}
2725 2704
2726static int hardware_enable_all(void) 2705static int hardware_enable_all(void)
2727{ 2706{
2728 int r = 0; 2707 int r = 0;
2729 2708
2730 raw_spin_lock(&kvm_lock); 2709 raw_spin_lock(&kvm_count_lock);
2731 2710
2732 kvm_usage_count++; 2711 kvm_usage_count++;
2733 if (kvm_usage_count == 1) { 2712 if (kvm_usage_count == 1) {
@@ -2740,7 +2719,7 @@ static int hardware_enable_all(void)
2740 } 2719 }
2741 } 2720 }
2742 2721
2743 raw_spin_unlock(&kvm_lock); 2722 raw_spin_unlock(&kvm_count_lock);
2744 2723
2745 return r; 2724 return r;
2746} 2725}
@@ -2750,20 +2729,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2750{ 2729{
2751 int cpu = (long)v; 2730 int cpu = (long)v;
2752 2731
2753 if (!kvm_usage_count)
2754 return NOTIFY_OK;
2755
2756 val &= ~CPU_TASKS_FROZEN; 2732 val &= ~CPU_TASKS_FROZEN;
2757 switch (val) { 2733 switch (val) {
2758 case CPU_DYING: 2734 case CPU_DYING:
2759 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 2735 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
2760 cpu); 2736 cpu);
2761 hardware_disable(NULL); 2737 hardware_disable();
2762 break; 2738 break;
2763 case CPU_STARTING: 2739 case CPU_STARTING:
2764 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 2740 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
2765 cpu); 2741 cpu);
2766 hardware_enable(NULL); 2742 hardware_enable();
2767 break; 2743 break;
2768 } 2744 }
2769 return NOTIFY_OK; 2745 return NOTIFY_OK;
@@ -3056,10 +3032,10 @@ static int vm_stat_get(void *_offset, u64 *val)
3056 struct kvm *kvm; 3032 struct kvm *kvm;
3057 3033
3058 *val = 0; 3034 *val = 0;
3059 raw_spin_lock(&kvm_lock); 3035 spin_lock(&kvm_lock);
3060 list_for_each_entry(kvm, &vm_list, vm_list) 3036 list_for_each_entry(kvm, &vm_list, vm_list)
3061 *val += *(u32 *)((void *)kvm + offset); 3037 *val += *(u32 *)((void *)kvm + offset);
3062 raw_spin_unlock(&kvm_lock); 3038 spin_unlock(&kvm_lock);
3063 return 0; 3039 return 0;
3064} 3040}
3065 3041
@@ -3073,12 +3049,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
3073 int i; 3049 int i;
3074 3050
3075 *val = 0; 3051 *val = 0;
3076 raw_spin_lock(&kvm_lock); 3052 spin_lock(&kvm_lock);
3077 list_for_each_entry(kvm, &vm_list, vm_list) 3053 list_for_each_entry(kvm, &vm_list, vm_list)
3078 kvm_for_each_vcpu(i, vcpu, kvm) 3054 kvm_for_each_vcpu(i, vcpu, kvm)
3079 *val += *(u32 *)((void *)vcpu + offset); 3055 *val += *(u32 *)((void *)vcpu + offset);
3080 3056
3081 raw_spin_unlock(&kvm_lock); 3057 spin_unlock(&kvm_lock);
3082 return 0; 3058 return 0;
3083} 3059}
3084 3060
@@ -3133,7 +3109,7 @@ static int kvm_suspend(void)
3133static void kvm_resume(void) 3109static void kvm_resume(void)
3134{ 3110{
3135 if (kvm_usage_count) { 3111 if (kvm_usage_count) {
3136 WARN_ON(raw_spin_is_locked(&kvm_lock)); 3112 WARN_ON(raw_spin_is_locked(&kvm_count_lock));
3137 hardware_enable_nolock(NULL); 3113 hardware_enable_nolock(NULL);
3138 } 3114 }
3139} 3115}
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
new file mode 100644
index 000000000000..ca4260e35037
--- /dev/null
+++ b/virt/kvm/vfio.c
@@ -0,0 +1,264 @@
1/*
2 * VFIO-KVM bridge pseudo device
3 *
4 * Copyright (C) 2013 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/errno.h>
13#include <linux/file.h>
14#include <linux/kvm_host.h>
15#include <linux/list.h>
16#include <linux/module.h>
17#include <linux/mutex.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/vfio.h>
21
22struct kvm_vfio_group {
23 struct list_head node;
24 struct vfio_group *vfio_group;
25};
26
27struct kvm_vfio {
28 struct list_head group_list;
29 struct mutex lock;
30 bool noncoherent;
31};
32
33static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
34{
35 struct vfio_group *vfio_group;
36 struct vfio_group *(*fn)(struct file *);
37
38 fn = symbol_get(vfio_group_get_external_user);
39 if (!fn)
40 return ERR_PTR(-EINVAL);
41
42 vfio_group = fn(filep);
43
44 symbol_put(vfio_group_get_external_user);
45
46 return vfio_group;
47}
48
49static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
50{
51 void (*fn)(struct vfio_group *);
52
53 fn = symbol_get(vfio_group_put_external_user);
54 if (!fn)
55 return;
56
57 fn(vfio_group);
58
59 symbol_put(vfio_group_put_external_user);
60}
61
62/*
63 * Groups can use the same or different IOMMU domains. If the same then
64 * adding a new group may change the coherency of groups we've previously
65 * been told about. We don't want to care about any of that so we retest
66 * each group and bail as soon as we find one that's noncoherent. This
67 * means we only ever [un]register_noncoherent_dma once for the whole device.
68 */
69static void kvm_vfio_update_coherency(struct kvm_device *dev)
70{
71 struct kvm_vfio *kv = dev->private;
72 bool noncoherent = false;
73 struct kvm_vfio_group *kvg;
74
75 mutex_lock(&kv->lock);
76
77 list_for_each_entry(kvg, &kv->group_list, node) {
78 /*
79 * TODO: We need an interface to check the coherency of
80 * the IOMMU domain this group is using. For now, assume
81 * it's always noncoherent.
82 */
83 noncoherent = true;
84 break;
85 }
86
87 if (noncoherent != kv->noncoherent) {
88 kv->noncoherent = noncoherent;
89
90 if (kv->noncoherent)
91 kvm_arch_register_noncoherent_dma(dev->kvm);
92 else
93 kvm_arch_unregister_noncoherent_dma(dev->kvm);
94 }
95
96 mutex_unlock(&kv->lock);
97}
98
99static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
100{
101 struct kvm_vfio *kv = dev->private;
102 struct vfio_group *vfio_group;
103 struct kvm_vfio_group *kvg;
104 void __user *argp = (void __user *)arg;
105 struct fd f;
106 int32_t fd;
107 int ret;
108
109 switch (attr) {
110 case KVM_DEV_VFIO_GROUP_ADD:
111 if (get_user(fd, (int32_t __user *)argp))
112 return -EFAULT;
113
114 f = fdget(fd);
115 if (!f.file)
116 return -EBADF;
117
118 vfio_group = kvm_vfio_group_get_external_user(f.file);
119 fdput(f);
120
121 if (IS_ERR(vfio_group))
122 return PTR_ERR(vfio_group);
123
124 mutex_lock(&kv->lock);
125
126 list_for_each_entry(kvg, &kv->group_list, node) {
127 if (kvg->vfio_group == vfio_group) {
128 mutex_unlock(&kv->lock);
129 kvm_vfio_group_put_external_user(vfio_group);
130 return -EEXIST;
131 }
132 }
133
134 kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
135 if (!kvg) {
136 mutex_unlock(&kv->lock);
137 kvm_vfio_group_put_external_user(vfio_group);
138 return -ENOMEM;
139 }
140
141 list_add_tail(&kvg->node, &kv->group_list);
142 kvg->vfio_group = vfio_group;
143
144 mutex_unlock(&kv->lock);
145
146 kvm_vfio_update_coherency(dev);
147
148 return 0;
149
150 case KVM_DEV_VFIO_GROUP_DEL:
151 if (get_user(fd, (int32_t __user *)argp))
152 return -EFAULT;
153
154 f = fdget(fd);
155 if (!f.file)
156 return -EBADF;
157
158 vfio_group = kvm_vfio_group_get_external_user(f.file);
159 fdput(f);
160
161 if (IS_ERR(vfio_group))
162 return PTR_ERR(vfio_group);
163
164 ret = -ENOENT;
165
166 mutex_lock(&kv->lock);
167
168 list_for_each_entry(kvg, &kv->group_list, node) {
169 if (kvg->vfio_group != vfio_group)
170 continue;
171
172 list_del(&kvg->node);
173 kvm_vfio_group_put_external_user(kvg->vfio_group);
174 kfree(kvg);
175 ret = 0;
176 break;
177 }
178
179 mutex_unlock(&kv->lock);
180
181 kvm_vfio_group_put_external_user(vfio_group);
182
183 kvm_vfio_update_coherency(dev);
184
185 return ret;
186 }
187
188 return -ENXIO;
189}
190
191static int kvm_vfio_set_attr(struct kvm_device *dev,
192 struct kvm_device_attr *attr)
193{
194 switch (attr->group) {
195 case KVM_DEV_VFIO_GROUP:
196 return kvm_vfio_set_group(dev, attr->attr, attr->addr);
197 }
198
199 return -ENXIO;
200}
201
202static int kvm_vfio_has_attr(struct kvm_device *dev,
203 struct kvm_device_attr *attr)
204{
205 switch (attr->group) {
206 case KVM_DEV_VFIO_GROUP:
207 switch (attr->attr) {
208 case KVM_DEV_VFIO_GROUP_ADD:
209 case KVM_DEV_VFIO_GROUP_DEL:
210 return 0;
211 }
212
213 break;
214 }
215
216 return -ENXIO;
217}
218
219static void kvm_vfio_destroy(struct kvm_device *dev)
220{
221 struct kvm_vfio *kv = dev->private;
222 struct kvm_vfio_group *kvg, *tmp;
223
224 list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
225 kvm_vfio_group_put_external_user(kvg->vfio_group);
226 list_del(&kvg->node);
227 kfree(kvg);
228 }
229
230 kvm_vfio_update_coherency(dev);
231
232 kfree(kv);
233 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
234}
235
236static int kvm_vfio_create(struct kvm_device *dev, u32 type)
237{
238 struct kvm_device *tmp;
239 struct kvm_vfio *kv;
240
241 /* Only one VFIO "device" per VM */
242 list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
243 if (tmp->ops == &kvm_vfio_ops)
244 return -EBUSY;
245
246 kv = kzalloc(sizeof(*kv), GFP_KERNEL);
247 if (!kv)
248 return -ENOMEM;
249
250 INIT_LIST_HEAD(&kv->group_list);
251 mutex_init(&kv->lock);
252
253 dev->private = kv;
254
255 return 0;
256}
257
258struct kvm_device_ops kvm_vfio_ops = {
259 .name = "kvm-vfio",
260 .create = kvm_vfio_create,
261 .destroy = kvm_vfio_destroy,
262 .set_attr = kvm_vfio_set_attr,
263 .has_attr = kvm_vfio_has_attr,
264};