aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoffer Dall <christoffer.dall@linaro.org>2014-09-18 21:15:32 -0400
committerChristoffer Dall <christoffer.dall@linaro.org>2014-09-18 21:15:32 -0400
commita875dafcf9b6b266c855e1f9b0aa060ef585d38a (patch)
tree1903cb0a39ac1cade1940ccb559591cddf3660a0
parent0ba09511ddc3ff0b462f37b4fe4b9c4dccc054ec (diff)
parentf51770ed465e6eb41da7fa16fd92eb67069600cf (diff)
Merge remote-tracking branch 'kvm/next' into queue
Conflicts: arch/arm64/include/asm/kvm_host.h virt/kvm/arm/vgic.c
-rw-r--r--Documentation/virtual/kvm/api.txt186
-rw-r--r--Documentation/virtual/kvm/mmu.txt14
-rw-r--r--arch/arm/include/asm/kvm_host.h13
-rw-r--r--arch/arm/kvm/arm.c21
-rw-r--r--arch/arm64/include/asm/kvm_host.h12
-rw-r--r--arch/ia64/include/asm/kvm_host.h15
-rw-r--r--arch/ia64/kvm/kvm-ia64.c34
-rw-r--r--arch/mips/include/asm/kvm_host.h16
-rw-r--r--arch/mips/kvm/mips.c44
-rw-r--r--arch/powerpc/include/asm/kvm_host.h13
-rw-r--r--arch/powerpc/kvm/powerpc.c31
-rw-r--r--arch/s390/include/asm/kvm_host.h33
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h72
-rw-r--r--arch/s390/include/asm/tlb.h2
-rw-r--r--arch/s390/include/uapi/asm/kvm.h10
-rw-r--r--arch/s390/kvm/diag.c28
-rw-r--r--arch/s390/kvm/gaccess.c3
-rw-r--r--arch/s390/kvm/interrupt.c151
-rw-r--r--arch/s390/kvm/kvm-s390.c193
-rw-r--r--arch/s390/kvm/kvm-s390.h6
-rw-r--r--arch/s390/kvm/priv.c11
-rw-r--r--arch/s390/mm/fault.c25
-rw-r--r--arch/s390/mm/pgtable.c705
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/x86/include/asm/kvm_host.h20
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c27
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.c49
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/paging_tmpl.h19
-rw-r--r--arch/x86/kvm/svm.c33
-rw-r--r--arch/x86/kvm/trace.h15
-rw-r--r--arch/x86/kvm/vmx.c160
-rw-r--r--arch/x86/kvm/x86.c60
-rw-r--r--arch/x86/kvm/x86.h20
-rw-r--r--include/linux/kvm_host.h14
-rw-r--r--include/linux/kvm_types.h14
-rw-r--r--include/trace/events/kvm.h20
-rw-r--r--include/uapi/linux/kvm.h28
-rw-r--r--virt/kvm/arm/vgic.c157
-rw-r--r--virt/kvm/ioapic.c46
-rw-r--r--virt/kvm/ioapic.h2
-rw-r--r--virt/kvm/kvm_main.c101
-rw-r--r--virt/kvm/vfio.c22
46 files changed, 1293 insertions, 1179 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index beae3fde075e..f7735c72c128 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2565,6 +2565,120 @@ associated with the service will be forgotten, and subsequent RTAS
2565calls by the guest for that service will be passed to userspace to be 2565calls by the guest for that service will be passed to userspace to be
2566handled. 2566handled.
2567 2567
25684.87 KVM_SET_GUEST_DEBUG
2569
2570Capability: KVM_CAP_SET_GUEST_DEBUG
2571Architectures: x86, s390, ppc
2572Type: vcpu ioctl
2573Parameters: struct kvm_guest_debug (in)
2574Returns: 0 on success; -1 on error
2575
2576struct kvm_guest_debug {
2577 __u32 control;
2578 __u32 pad;
2579 struct kvm_guest_debug_arch arch;
2580};
2581
2582Set up the processor specific debug registers and configure vcpu for
2583handling guest debug events. There are two parts to the structure, the
2584first a control bitfield indicates the type of debug events to handle
2585when running. Common control bits are:
2586
2587 - KVM_GUESTDBG_ENABLE: guest debugging is enabled
2588 - KVM_GUESTDBG_SINGLESTEP: the next run should single-step
2589
2590The top 16 bits of the control field are architecture specific control
2591flags which can include the following:
2592
2593 - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86]
2594 - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
2595 - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
2596 - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
2597 - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
2598
2599For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints
2600are enabled in memory so we need to ensure breakpoint exceptions are
2601correctly trapped and the KVM run loop exits at the breakpoint and not
2602running off into the normal guest vector. For KVM_GUESTDBG_USE_HW_BP
2603we need to ensure the guest vCPUs architecture specific registers are
2604updated to the correct (supplied) values.
2605
2606The second part of the structure is architecture specific and
2607typically contains a set of debug registers.
2608
2609When debug events exit the main run loop with the reason
2610KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
2611structure containing architecture specific debug information.
2612
26134.88 KVM_GET_EMULATED_CPUID
2614
2615Capability: KVM_CAP_EXT_EMUL_CPUID
2616Architectures: x86
2617Type: system ioctl
2618Parameters: struct kvm_cpuid2 (in/out)
2619Returns: 0 on success, -1 on error
2620
2621struct kvm_cpuid2 {
2622 __u32 nent;
2623 __u32 flags;
2624 struct kvm_cpuid_entry2 entries[0];
2625};
2626
2627The member 'flags' is used for passing flags from userspace.
2628
2629#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
2630#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
2631#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
2632
2633struct kvm_cpuid_entry2 {
2634 __u32 function;
2635 __u32 index;
2636 __u32 flags;
2637 __u32 eax;
2638 __u32 ebx;
2639 __u32 ecx;
2640 __u32 edx;
2641 __u32 padding[3];
2642};
2643
2644This ioctl returns x86 cpuid features which are emulated by
2645kvm.Userspace can use the information returned by this ioctl to query
2646which features are emulated by kvm instead of being present natively.
2647
2648Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
2649structure with the 'nent' field indicating the number of entries in
2650the variable-size array 'entries'. If the number of entries is too low
2651to describe the cpu capabilities, an error (E2BIG) is returned. If the
2652number is too high, the 'nent' field is adjusted and an error (ENOMEM)
2653is returned. If the number is just right, the 'nent' field is adjusted
2654to the number of valid entries in the 'entries' array, which is then
2655filled.
2656
2657The entries returned are the set CPUID bits of the respective features
2658which kvm emulates, as returned by the CPUID instruction, with unknown
2659or unsupported feature bits cleared.
2660
2661Features like x2apic, for example, may not be present in the host cpu
2662but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
2663emulated efficiently and thus not included here.
2664
2665The fields in each entry are defined as follows:
2666
2667 function: the eax value used to obtain the entry
2668 index: the ecx value used to obtain the entry (for entries that are
2669 affected by ecx)
2670 flags: an OR of zero or more of the following:
2671 KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
2672 if the index field is valid
2673 KVM_CPUID_FLAG_STATEFUL_FUNC:
2674 if cpuid for this function returns different values for successive
2675 invocations; there will be several entries with the same function,
2676 all with this flag set
2677 KVM_CPUID_FLAG_STATE_READ_NEXT:
2678 for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
2679 the first entry to be read by a cpu
2680 eax, ebx, ecx, edx: the values returned by the cpuid instruction for
2681 this function/index combination
2568 2682
25695. The kvm_run structure 26835. The kvm_run structure
2570------------------------ 2684------------------------
@@ -2861,78 +2975,12 @@ kvm_valid_regs for specific bits. These bits are architecture specific
2861and usually define the validity of a groups of registers. (e.g. one bit 2975and usually define the validity of a groups of registers. (e.g. one bit
2862 for general purpose registers) 2976 for general purpose registers)
2863 2977
2864}; 2978Please note that the kernel is allowed to use the kvm_run structure as the
2865 2979primary storage for certain register types. Therefore, the kernel may use the
2980values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
2866 2981
28674.81 KVM_GET_EMULATED_CPUID
2868
2869Capability: KVM_CAP_EXT_EMUL_CPUID
2870Architectures: x86
2871Type: system ioctl
2872Parameters: struct kvm_cpuid2 (in/out)
2873Returns: 0 on success, -1 on error
2874
2875struct kvm_cpuid2 {
2876 __u32 nent;
2877 __u32 flags;
2878 struct kvm_cpuid_entry2 entries[0];
2879}; 2982};
2880 2983
2881The member 'flags' is used for passing flags from userspace.
2882
2883#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
2884#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
2885#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
2886
2887struct kvm_cpuid_entry2 {
2888 __u32 function;
2889 __u32 index;
2890 __u32 flags;
2891 __u32 eax;
2892 __u32 ebx;
2893 __u32 ecx;
2894 __u32 edx;
2895 __u32 padding[3];
2896};
2897
2898This ioctl returns x86 cpuid features which are emulated by
2899kvm.Userspace can use the information returned by this ioctl to query
2900which features are emulated by kvm instead of being present natively.
2901
2902Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
2903structure with the 'nent' field indicating the number of entries in
2904the variable-size array 'entries'. If the number of entries is too low
2905to describe the cpu capabilities, an error (E2BIG) is returned. If the
2906number is too high, the 'nent' field is adjusted and an error (ENOMEM)
2907is returned. If the number is just right, the 'nent' field is adjusted
2908to the number of valid entries in the 'entries' array, which is then
2909filled.
2910
2911The entries returned are the set CPUID bits of the respective features
2912which kvm emulates, as returned by the CPUID instruction, with unknown
2913or unsupported feature bits cleared.
2914
2915Features like x2apic, for example, may not be present in the host cpu
2916but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
2917emulated efficiently and thus not included here.
2918
2919The fields in each entry are defined as follows:
2920
2921 function: the eax value used to obtain the entry
2922 index: the ecx value used to obtain the entry (for entries that are
2923 affected by ecx)
2924 flags: an OR of zero or more of the following:
2925 KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
2926 if the index field is valid
2927 KVM_CPUID_FLAG_STATEFUL_FUNC:
2928 if cpuid for this function returns different values for successive
2929 invocations; there will be several entries with the same function,
2930 all with this flag set
2931 KVM_CPUID_FLAG_STATE_READ_NEXT:
2932 for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
2933 the first entry to be read by a cpu
2934 eax, ebx, ecx, edx: the values returned by the cpuid instruction for
2935 this function/index combination
2936 2984
2937 2985
29386. Capabilities that can be enabled on vCPUs 29866. Capabilities that can be enabled on vCPUs
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index 290894176142..53838d9c6295 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -425,6 +425,20 @@ fault through the slow path.
425Since only 19 bits are used to store generation-number on mmio spte, all 425Since only 19 bits are used to store generation-number on mmio spte, all
426pages are zapped when there is an overflow. 426pages are zapped when there is an overflow.
427 427
428Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
429times, the last one happening when the generation number is retrieved and
430stored into the MMIO spte. Thus, the MMIO spte might be created based on
431out-of-date information, but with an up-to-date generation number.
432
433To avoid this, the generation number is incremented again after synchronize_srcu
434returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a
435memslot update, while some SRCU readers might be using the old copy. We do not
436want to use an MMIO sptes created with an odd generation number, and we can do
437this without losing a bit in the MMIO spte. The low bit of the generation
438is not stored in MMIO spte, and presumed zero when it is extracted out of the
439spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1,
440the next access to the spte will always be a cache miss.
441
428 442
429Further reading 443Further reading
430=============== 444===============
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index fcb12a6f7db5..46e5d4da1989 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
19#ifndef __ARM_KVM_HOST_H__ 19#ifndef __ARM_KVM_HOST_H__
20#define __ARM_KVM_HOST_H__ 20#define __ARM_KVM_HOST_H__
21 21
22#include <linux/types.h>
23#include <linux/kvm_types.h>
22#include <asm/kvm.h> 24#include <asm/kvm.h>
23#include <asm/kvm_asm.h> 25#include <asm/kvm_asm.h>
24#include <asm/kvm_mmio.h> 26#include <asm/kvm_mmio.h>
@@ -40,7 +42,6 @@
40 42
41#include <kvm/arm_vgic.h> 43#include <kvm/arm_vgic.h>
42 44
43struct kvm_vcpu;
44u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); 45u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
45int __attribute_const__ kvm_target_cpu(void); 46int __attribute_const__ kvm_target_cpu(void);
46int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 47int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -149,20 +150,17 @@ struct kvm_vcpu_stat {
149 u32 halt_wakeup; 150 u32 halt_wakeup;
150}; 151};
151 152
152struct kvm_vcpu_init;
153int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 153int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
154 const struct kvm_vcpu_init *init); 154 const struct kvm_vcpu_init *init);
155int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 155int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
156unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 156unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
157int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 157int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
158struct kvm_one_reg;
159int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 158int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
160int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 159int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
161u64 kvm_call_hyp(void *hypfn, ...); 160u64 kvm_call_hyp(void *hypfn, ...);
162void force_vm_exit(const cpumask_t *mask); 161void force_vm_exit(const cpumask_t *mask);
163 162
164#define KVM_ARCH_WANT_MMU_NOTIFIER 163#define KVM_ARCH_WANT_MMU_NOTIFIER
165struct kvm;
166int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 164int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
167int kvm_unmap_hva_range(struct kvm *kvm, 165int kvm_unmap_hva_range(struct kvm *kvm,
168 unsigned long start, unsigned long end); 166 unsigned long start, unsigned long end);
@@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
187 185
188int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); 186int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
189unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); 187unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
190struct kvm_one_reg;
191int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); 188int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
192int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); 189int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
193 190
@@ -233,4 +230,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
233int kvm_perf_init(void); 230int kvm_perf_init(void);
234int kvm_perf_teardown(void); 231int kvm_perf_teardown(void);
235 232
233static inline void kvm_arch_hardware_disable(void) {}
234static inline void kvm_arch_hardware_unsetup(void) {}
235static inline void kvm_arch_sync_events(struct kvm *kvm) {}
236static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
237static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
238
236#endif /* __ARM_KVM_HOST_H__ */ 239#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 53ee31b23961..88c901cfc75e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -87,7 +87,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
87 return &kvm_arm_running_vcpu; 87 return &kvm_arm_running_vcpu;
88} 88}
89 89
90int kvm_arch_hardware_enable(void *garbage) 90int kvm_arch_hardware_enable(void)
91{ 91{
92 return 0; 92 return 0;
93} 93}
@@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
97 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; 97 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
98} 98}
99 99
100void kvm_arch_hardware_disable(void *garbage)
101{
102}
103
104int kvm_arch_hardware_setup(void) 100int kvm_arch_hardware_setup(void)
105{ 101{
106 return 0; 102 return 0;
107} 103}
108 104
109void kvm_arch_hardware_unsetup(void)
110{
111}
112
113void kvm_arch_check_processor_compat(void *rtn) 105void kvm_arch_check_processor_compat(void *rtn)
114{ 106{
115 *(int *)rtn = 0; 107 *(int *)rtn = 0;
116} 108}
117 109
118void kvm_arch_sync_events(struct kvm *kvm)
119{
120}
121 110
122/** 111/**
123 * kvm_arch_init_vm - initializes a VM data structure 112 * kvm_arch_init_vm - initializes a VM data structure
@@ -285,14 +274,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
285 return 0; 274 return 0;
286} 275}
287 276
288void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
289{
290}
291
292void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
293{
294}
295
296void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 277void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
297{ 278{
298 vcpu->cpu = cpu; 279 vcpu->cpu = cpu;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 50431d36732b..bcde41905746 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
22#ifndef __ARM64_KVM_HOST_H__ 22#ifndef __ARM64_KVM_HOST_H__
23#define __ARM64_KVM_HOST_H__ 23#define __ARM64_KVM_HOST_H__
24 24
25#include <linux/types.h>
26#include <linux/kvm_types.h>
25#include <asm/kvm.h> 27#include <asm/kvm.h>
26#include <asm/kvm_asm.h> 28#include <asm/kvm_asm.h>
27#include <asm/kvm_mmio.h> 29#include <asm/kvm_mmio.h>
@@ -41,7 +43,6 @@
41 43
42#define KVM_VCPU_MAX_FEATURES 3 44#define KVM_VCPU_MAX_FEATURES 3
43 45
44struct kvm_vcpu;
45int __attribute_const__ kvm_target_cpu(void); 46int __attribute_const__ kvm_target_cpu(void);
46int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 47int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
47int kvm_arch_dev_ioctl_check_extension(long ext); 48int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -164,18 +165,15 @@ struct kvm_vcpu_stat {
164 u32 halt_wakeup; 165 u32 halt_wakeup;
165}; 166};
166 167
167struct kvm_vcpu_init;
168int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 168int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
169 const struct kvm_vcpu_init *init); 169 const struct kvm_vcpu_init *init);
170int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 170int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
171unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 171unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
172int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 172int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
173struct kvm_one_reg;
174int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 173int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
175int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 174int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
176 175
177#define KVM_ARCH_WANT_MMU_NOTIFIER 176#define KVM_ARCH_WANT_MMU_NOTIFIER
178struct kvm;
179int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 177int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
180int kvm_unmap_hva_range(struct kvm *kvm, 178int kvm_unmap_hva_range(struct kvm *kvm,
181 unsigned long start, unsigned long end); 179 unsigned long start, unsigned long end);
@@ -244,4 +242,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
244 } 242 }
245} 243}
246 244
245static inline void kvm_arch_hardware_disable(void) {}
246static inline void kvm_arch_hardware_unsetup(void) {}
247static inline void kvm_arch_sync_events(struct kvm *kvm) {}
248static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
249static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
250
247#endif /* __ARM64_KVM_HOST_H__ */ 251#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index db95f570705f..4729752b7256 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -234,9 +234,6 @@ struct kvm_vm_data {
234#define KVM_REQ_PTC_G 32 234#define KVM_REQ_PTC_G 32
235#define KVM_REQ_RESUME 33 235#define KVM_REQ_RESUME 33
236 236
237struct kvm;
238struct kvm_vcpu;
239
240struct kvm_mmio_req { 237struct kvm_mmio_req {
241 uint64_t addr; /* physical address */ 238 uint64_t addr; /* physical address */
242 uint64_t size; /* size in bytes */ 239 uint64_t size; /* size in bytes */
@@ -595,6 +592,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu);
595struct kvm *kvm_arch_alloc_vm(void); 592struct kvm *kvm_arch_alloc_vm(void);
596void kvm_arch_free_vm(struct kvm *kvm); 593void kvm_arch_free_vm(struct kvm *kvm);
597 594
595static inline void kvm_arch_sync_events(struct kvm *kvm) {}
596static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
597static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
598static inline void kvm_arch_free_memslot(struct kvm *kvm,
599 struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
600static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
601static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
602 struct kvm_userspace_memory_region *mem,
603 const struct kvm_memory_slot *old,
604 enum kvm_mr_change change) {}
605static inline void kvm_arch_hardware_unsetup(void) {}
606
598#endif /* __ASSEMBLY__*/ 607#endif /* __ASSEMBLY__*/
599 608
600#endif 609#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0729ba6acddf..ec6b9acb6bea 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
125 125
126static DEFINE_SPINLOCK(vp_lock); 126static DEFINE_SPINLOCK(vp_lock);
127 127
128int kvm_arch_hardware_enable(void *garbage) 128int kvm_arch_hardware_enable(void)
129{ 129{
130 long status; 130 long status;
131 long tmp_base; 131 long tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
160 return 0; 160 return 0;
161} 161}
162 162
163void kvm_arch_hardware_disable(void *garbage) 163void kvm_arch_hardware_disable(void)
164{ 164{
165 165
166 long status; 166 long status;
@@ -1364,10 +1364,6 @@ static void kvm_release_vm_pages(struct kvm *kvm)
1364 } 1364 }
1365} 1365}
1366 1366
1367void kvm_arch_sync_events(struct kvm *kvm)
1368{
1369}
1370
1371void kvm_arch_destroy_vm(struct kvm *kvm) 1367void kvm_arch_destroy_vm(struct kvm *kvm)
1372{ 1368{
1373 kvm_iommu_unmap_guest(kvm); 1369 kvm_iommu_unmap_guest(kvm);
@@ -1376,10 +1372,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
1376 kvm_release_vm_pages(kvm); 1372 kvm_release_vm_pages(kvm);
1377} 1373}
1378 1374
1379void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1380{
1381}
1382
1383void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1375void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1384{ 1376{
1385 if (cpu != vcpu->cpu) { 1377 if (cpu != vcpu->cpu) {
@@ -1468,7 +1460,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1468 kfree(vcpu->arch.apic); 1460 kfree(vcpu->arch.apic);
1469} 1461}
1470 1462
1471
1472long kvm_arch_vcpu_ioctl(struct file *filp, 1463long kvm_arch_vcpu_ioctl(struct file *filp,
1473 unsigned int ioctl, unsigned long arg) 1464 unsigned int ioctl, unsigned long arg)
1474{ 1465{
@@ -1551,21 +1542,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1551 return VM_FAULT_SIGBUS; 1542 return VM_FAULT_SIGBUS;
1552} 1543}
1553 1544
1554void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1555 struct kvm_memory_slot *dont)
1556{
1557}
1558
1559int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1545int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1560 unsigned long npages) 1546 unsigned long npages)
1561{ 1547{
1562 return 0; 1548 return 0;
1563} 1549}
1564 1550
1565void kvm_arch_memslots_updated(struct kvm *kvm)
1566{
1567}
1568
1569int kvm_arch_prepare_memory_region(struct kvm *kvm, 1551int kvm_arch_prepare_memory_region(struct kvm *kvm,
1570 struct kvm_memory_slot *memslot, 1552 struct kvm_memory_slot *memslot,
1571 struct kvm_userspace_memory_region *mem, 1553 struct kvm_userspace_memory_region *mem,
@@ -1597,14 +1579,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1597 return 0; 1579 return 0;
1598} 1580}
1599 1581
1600void kvm_arch_commit_memory_region(struct kvm *kvm,
1601 struct kvm_userspace_memory_region *mem,
1602 const struct kvm_memory_slot *old,
1603 enum kvm_mr_change change)
1604{
1605 return;
1606}
1607
1608void kvm_arch_flush_shadow_all(struct kvm *kvm) 1582void kvm_arch_flush_shadow_all(struct kvm *kvm)
1609{ 1583{
1610 kvm_flush_remote_tlbs(kvm); 1584 kvm_flush_remote_tlbs(kvm);
@@ -1853,10 +1827,6 @@ int kvm_arch_hardware_setup(void)
1853 return 0; 1827 return 0;
1854} 1828}
1855 1829
1856void kvm_arch_hardware_unsetup(void)
1857{
1858}
1859
1860int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 1830int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
1861{ 1831{
1862 return __apic_accept_irq(vcpu, irq->vector); 1832 return __apic_accept_irq(vcpu, irq->vector);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 7a3fc67bd7f9..f2c249796ea8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -96,11 +96,6 @@
96#define CAUSEB_DC 27 96#define CAUSEB_DC 27
97#define CAUSEF_DC (_ULCAST_(1) << 27) 97#define CAUSEF_DC (_ULCAST_(1) << 27)
98 98
99struct kvm;
100struct kvm_run;
101struct kvm_vcpu;
102struct kvm_interrupt;
103
104extern atomic_t kvm_mips_instance; 99extern atomic_t kvm_mips_instance;
105extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); 100extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
106extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); 101extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
@@ -767,5 +762,16 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
767extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); 762extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
768extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); 763extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
769 764
765static inline void kvm_arch_hardware_disable(void) {}
766static inline void kvm_arch_hardware_unsetup(void) {}
767static inline void kvm_arch_sync_events(struct kvm *kvm) {}
768static inline void kvm_arch_free_memslot(struct kvm *kvm,
769 struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
770static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
771static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
772static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
773 struct kvm_memory_slot *slot) {}
774static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
775static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
770 776
771#endif /* __MIPS_KVM_HOST_H__ */ 777#endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 2362df2a79f9..e3b21e51ff7e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -77,24 +77,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
77 return 1; 77 return 1;
78} 78}
79 79
80int kvm_arch_hardware_enable(void *garbage) 80int kvm_arch_hardware_enable(void)
81{ 81{
82 return 0; 82 return 0;
83} 83}
84 84
85void kvm_arch_hardware_disable(void *garbage)
86{
87}
88
89int kvm_arch_hardware_setup(void) 85int kvm_arch_hardware_setup(void)
90{ 86{
91 return 0; 87 return 0;
92} 88}
93 89
94void kvm_arch_hardware_unsetup(void)
95{
96}
97
98void kvm_arch_check_processor_compat(void *rtn) 90void kvm_arch_check_processor_compat(void *rtn)
99{ 91{
100 *(int *)rtn = 0; 92 *(int *)rtn = 0;
@@ -163,10 +155,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
163 mutex_unlock(&kvm->lock); 155 mutex_unlock(&kvm->lock);
164} 156}
165 157
166void kvm_arch_sync_events(struct kvm *kvm)
167{
168}
169
170static void kvm_mips_uninit_tlbs(void *arg) 158static void kvm_mips_uninit_tlbs(void *arg)
171{ 159{
172 /* Restore wired count */ 160 /* Restore wired count */
@@ -194,21 +182,12 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl,
194 return -ENOIOCTLCMD; 182 return -ENOIOCTLCMD;
195} 183}
196 184
197void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
198 struct kvm_memory_slot *dont)
199{
200}
201
202int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 185int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
203 unsigned long npages) 186 unsigned long npages)
204{ 187{
205 return 0; 188 return 0;
206} 189}
207 190
208void kvm_arch_memslots_updated(struct kvm *kvm)
209{
210}
211
212int kvm_arch_prepare_memory_region(struct kvm *kvm, 191int kvm_arch_prepare_memory_region(struct kvm *kvm,
213 struct kvm_memory_slot *memslot, 192 struct kvm_memory_slot *memslot,
214 struct kvm_userspace_memory_region *mem, 193 struct kvm_userspace_memory_region *mem,
@@ -254,19 +233,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
254 } 233 }
255} 234}
256 235
257void kvm_arch_flush_shadow_all(struct kvm *kvm)
258{
259}
260
261void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
262 struct kvm_memory_slot *slot)
263{
264}
265
266void kvm_arch_flush_shadow(struct kvm *kvm)
267{
268}
269
270struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 236struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
271{ 237{
272 int err, size, offset; 238 int err, size, offset;
@@ -998,14 +964,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
998 return 0; 964 return 0;
999} 965}
1000 966
1001void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1002{
1003}
1004
1005void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
1006{
1007}
1008
1009int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 967int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1010 struct kvm_translation *tr) 968 struct kvm_translation *tr)
1011{ 969{
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 98d9dd50d063..604000882352 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,7 +53,6 @@
53 53
54#define KVM_ARCH_WANT_MMU_NOTIFIER 54#define KVM_ARCH_WANT_MMU_NOTIFIER
55 55
56struct kvm;
57extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 56extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
58extern int kvm_unmap_hva_range(struct kvm *kvm, 57extern int kvm_unmap_hva_range(struct kvm *kvm,
59 unsigned long start, unsigned long end); 58 unsigned long start, unsigned long end);
@@ -76,10 +75,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
76/* Physical Address Mask - allowed range of real mode RAM access */ 75/* Physical Address Mask - allowed range of real mode RAM access */
77#define KVM_PAM 0x0fffffffffffffffULL 76#define KVM_PAM 0x0fffffffffffffffULL
78 77
79struct kvm;
80struct kvm_run;
81struct kvm_vcpu;
82
83struct lppaca; 78struct lppaca;
84struct slb_shadow; 79struct slb_shadow;
85struct dtl_entry; 80struct dtl_entry;
@@ -687,4 +682,12 @@ struct kvm_vcpu_arch {
687#define __KVM_HAVE_ARCH_WQP 682#define __KVM_HAVE_ARCH_WQP
688#define __KVM_HAVE_CREATE_DEVICE 683#define __KVM_HAVE_CREATE_DEVICE
689 684
685static inline void kvm_arch_hardware_disable(void) {}
686static inline void kvm_arch_hardware_unsetup(void) {}
687static inline void kvm_arch_sync_events(struct kvm *kvm) {}
688static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
689static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
690static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
691static inline void kvm_arch_exit(void) {}
692
690#endif /* __POWERPC_KVM_HOST_H__ */ 693#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index cbc432f4f0a6..da505237a664 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -384,24 +384,16 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
384} 384}
385EXPORT_SYMBOL_GPL(kvmppc_ld); 385EXPORT_SYMBOL_GPL(kvmppc_ld);
386 386
387int kvm_arch_hardware_enable(void *garbage) 387int kvm_arch_hardware_enable(void)
388{ 388{
389 return 0; 389 return 0;
390} 390}
391 391
392void kvm_arch_hardware_disable(void *garbage)
393{
394}
395
396int kvm_arch_hardware_setup(void) 392int kvm_arch_hardware_setup(void)
397{ 393{
398 return 0; 394 return 0;
399} 395}
400 396
401void kvm_arch_hardware_unsetup(void)
402{
403}
404
405void kvm_arch_check_processor_compat(void *rtn) 397void kvm_arch_check_processor_compat(void *rtn)
406{ 398{
407 *(int *)rtn = kvmppc_core_check_processor_compat(); 399 *(int *)rtn = kvmppc_core_check_processor_compat();
@@ -462,10 +454,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
462 module_put(kvm->arch.kvm_ops->owner); 454 module_put(kvm->arch.kvm_ops->owner);
463} 455}
464 456
465void kvm_arch_sync_events(struct kvm *kvm)
466{
467}
468
469int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 457int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
470{ 458{
471 int r; 459 int r;
@@ -608,10 +596,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
608 return kvmppc_core_create_memslot(kvm, slot, npages); 596 return kvmppc_core_create_memslot(kvm, slot, npages);
609} 597}
610 598
611void kvm_arch_memslots_updated(struct kvm *kvm)
612{
613}
614
615int kvm_arch_prepare_memory_region(struct kvm *kvm, 599int kvm_arch_prepare_memory_region(struct kvm *kvm,
616 struct kvm_memory_slot *memslot, 600 struct kvm_memory_slot *memslot,
617 struct kvm_userspace_memory_region *mem, 601 struct kvm_userspace_memory_region *mem,
@@ -628,10 +612,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
628 kvmppc_core_commit_memory_region(kvm, mem, old); 612 kvmppc_core_commit_memory_region(kvm, mem, old);
629} 613}
630 614
631void kvm_arch_flush_shadow_all(struct kvm *kvm)
632{
633}
634
635void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 615void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
636 struct kvm_memory_slot *slot) 616 struct kvm_memory_slot *slot)
637{ 617{
@@ -720,10 +700,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
720 kvmppc_subarch_vcpu_uninit(vcpu); 700 kvmppc_subarch_vcpu_uninit(vcpu);
721} 701}
722 702
723void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
724{
725}
726
727void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 703void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
728{ 704{
729#ifdef CONFIG_BOOKE 705#ifdef CONFIG_BOOKE
@@ -1347,9 +1323,4 @@ int kvm_arch_init(void *opaque)
1347 return 0; 1323 return 0;
1348} 1324}
1349 1325
1350void kvm_arch_exit(void)
1351{
1352
1353}
1354
1355EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); 1326EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 773bef7614d8..1a6f6fd8bd34 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
13 13
14#ifndef ASM_KVM_HOST_H 14#ifndef ASM_KVM_HOST_H
15#define ASM_KVM_HOST_H 15#define ASM_KVM_HOST_H
16
17#include <linux/types.h>
16#include <linux/hrtimer.h> 18#include <linux/hrtimer.h>
17#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/kvm_types.h>
18#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
19#include <linux/kvm.h> 22#include <linux/kvm.h>
20#include <asm/debug.h> 23#include <asm/debug.h>
@@ -154,7 +157,9 @@ struct kvm_s390_sie_block {
154 __u8 armid; /* 0x00e3 */ 157 __u8 armid; /* 0x00e3 */
155 __u8 reservede4[4]; /* 0x00e4 */ 158 __u8 reservede4[4]; /* 0x00e4 */
156 __u64 tecmc; /* 0x00e8 */ 159 __u64 tecmc; /* 0x00e8 */
157 __u8 reservedf0[16]; /* 0x00f0 */ 160 __u8 reservedf0[12]; /* 0x00f0 */
161#define CRYCB_FORMAT1 0x00000001
162 __u32 crycbd; /* 0x00fc */
158 __u64 gcr[16]; /* 0x0100 */ 163 __u64 gcr[16]; /* 0x0100 */
159 __u64 gbea; /* 0x0180 */ 164 __u64 gbea; /* 0x0180 */
160 __u8 reserved188[24]; /* 0x0188 */ 165 __u8 reserved188[24]; /* 0x0188 */
@@ -407,6 +412,15 @@ struct s390_io_adapter {
407#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) 412#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
408#define MAX_S390_ADAPTER_MAPS 256 413#define MAX_S390_ADAPTER_MAPS 256
409 414
415struct kvm_s390_crypto {
416 struct kvm_s390_crypto_cb *crycb;
417 __u32 crycbd;
418};
419
420struct kvm_s390_crypto_cb {
421 __u8 reserved00[128]; /* 0x0000 */
422};
423
410struct kvm_arch{ 424struct kvm_arch{
411 struct sca_block *sca; 425 struct sca_block *sca;
412 debug_info_t *dbf; 426 debug_info_t *dbf;
@@ -420,6 +434,7 @@ struct kvm_arch{
420 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; 434 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
421 wait_queue_head_t ipte_wq; 435 wait_queue_head_t ipte_wq;
422 spinlock_t start_stop_lock; 436 spinlock_t start_stop_lock;
437 struct kvm_s390_crypto crypto;
423}; 438};
424 439
425#define KVM_HVA_ERR_BAD (-1UL) 440#define KVM_HVA_ERR_BAD (-1UL)
@@ -431,8 +446,6 @@ static inline bool kvm_is_error_hva(unsigned long addr)
431} 446}
432 447
433#define ASYNC_PF_PER_VCPU 64 448#define ASYNC_PF_PER_VCPU 64
434struct kvm_vcpu;
435struct kvm_async_pf;
436struct kvm_arch_async_pf { 449struct kvm_arch_async_pf {
437 unsigned long pfault_token; 450 unsigned long pfault_token;
438}; 451};
@@ -450,4 +463,18 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
450 463
451extern int sie64a(struct kvm_s390_sie_block *, u64 *); 464extern int sie64a(struct kvm_s390_sie_block *, u64 *);
452extern char sie_exit; 465extern char sie_exit;
466
467static inline void kvm_arch_hardware_disable(void) {}
468static inline void kvm_arch_check_processor_compat(void *rtn) {}
469static inline void kvm_arch_exit(void) {}
470static inline void kvm_arch_sync_events(struct kvm *kvm) {}
471static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
472static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
473static inline void kvm_arch_free_memslot(struct kvm *kvm,
474 struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
475static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
476static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
477static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
478 struct kvm_memory_slot *slot) {}
479
453#endif 480#endif
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 9e18a61d3df3..d39a31c3cdf2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -18,9 +18,9 @@
18unsigned long *crst_table_alloc(struct mm_struct *); 18unsigned long *crst_table_alloc(struct mm_struct *);
19void crst_table_free(struct mm_struct *, unsigned long *); 19void crst_table_free(struct mm_struct *, unsigned long *);
20 20
21unsigned long *page_table_alloc(struct mm_struct *, unsigned long); 21unsigned long *page_table_alloc(struct mm_struct *);
22void page_table_free(struct mm_struct *, unsigned long *); 22void page_table_free(struct mm_struct *, unsigned long *);
23void page_table_free_rcu(struct mmu_gather *, unsigned long *); 23void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
24 24
25void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, 25void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
26 bool init_skey); 26 bool init_skey);
@@ -145,8 +145,8 @@ static inline void pmd_populate(struct mm_struct *mm,
145/* 145/*
146 * page table entry allocation/free routines. 146 * page table entry allocation/free routines.
147 */ 147 */
148#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) 148#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
149#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) 149#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
150 150
151#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) 151#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
152#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) 152#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b76317c1f3eb..0242588ded67 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -30,6 +30,7 @@
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/mm_types.h> 31#include <linux/mm_types.h>
32#include <linux/page-flags.h> 32#include <linux/page-flags.h>
33#include <linux/radix-tree.h>
33#include <asm/bug.h> 34#include <asm/bug.h>
34#include <asm/page.h> 35#include <asm/page.h>
35 36
@@ -789,82 +790,67 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
789 790
790/** 791/**
791 * struct gmap_struct - guest address space 792 * struct gmap_struct - guest address space
793 * @crst_list: list of all crst tables used in the guest address space
792 * @mm: pointer to the parent mm_struct 794 * @mm: pointer to the parent mm_struct
795 * @guest_to_host: radix tree with guest to host address translation
796 * @host_to_guest: radix tree with pointer to segment table entries
797 * @guest_table_lock: spinlock to protect all entries in the guest page table
793 * @table: pointer to the page directory 798 * @table: pointer to the page directory
794 * @asce: address space control element for gmap page table 799 * @asce: address space control element for gmap page table
795 * @crst_list: list of all crst tables used in the guest address space
796 * @pfault_enabled: defines if pfaults are applicable for the guest 800 * @pfault_enabled: defines if pfaults are applicable for the guest
797 */ 801 */
798struct gmap { 802struct gmap {
799 struct list_head list; 803 struct list_head list;
804 struct list_head crst_list;
800 struct mm_struct *mm; 805 struct mm_struct *mm;
806 struct radix_tree_root guest_to_host;
807 struct radix_tree_root host_to_guest;
808 spinlock_t guest_table_lock;
801 unsigned long *table; 809 unsigned long *table;
802 unsigned long asce; 810 unsigned long asce;
811 unsigned long asce_end;
803 void *private; 812 void *private;
804 struct list_head crst_list;
805 bool pfault_enabled; 813 bool pfault_enabled;
806}; 814};
807 815
808/** 816/**
809 * struct gmap_rmap - reverse mapping for segment table entries
810 * @gmap: pointer to the gmap_struct
811 * @entry: pointer to a segment table entry
812 * @vmaddr: virtual address in the guest address space
813 */
814struct gmap_rmap {
815 struct list_head list;
816 struct gmap *gmap;
817 unsigned long *entry;
818 unsigned long vmaddr;
819};
820
821/**
822 * struct gmap_pgtable - gmap information attached to a page table
823 * @vmaddr: address of the 1MB segment in the process virtual memory
824 * @mapper: list of segment table entries mapping a page table
825 */
826struct gmap_pgtable {
827 unsigned long vmaddr;
828 struct list_head mapper;
829};
830
831/**
832 * struct gmap_notifier - notify function block for page invalidation 817 * struct gmap_notifier - notify function block for page invalidation
833 * @notifier_call: address of callback function 818 * @notifier_call: address of callback function
834 */ 819 */
835struct gmap_notifier { 820struct gmap_notifier {
836 struct list_head list; 821 struct list_head list;
837 void (*notifier_call)(struct gmap *gmap, unsigned long address); 822 void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
838}; 823};
839 824
840struct gmap *gmap_alloc(struct mm_struct *mm); 825struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
841void gmap_free(struct gmap *gmap); 826void gmap_free(struct gmap *gmap);
842void gmap_enable(struct gmap *gmap); 827void gmap_enable(struct gmap *gmap);
843void gmap_disable(struct gmap *gmap); 828void gmap_disable(struct gmap *gmap);
844int gmap_map_segment(struct gmap *gmap, unsigned long from, 829int gmap_map_segment(struct gmap *gmap, unsigned long from,
845 unsigned long to, unsigned long len); 830 unsigned long to, unsigned long len);
846int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); 831int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
847unsigned long __gmap_translate(unsigned long address, struct gmap *); 832unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
848unsigned long gmap_translate(unsigned long address, struct gmap *); 833unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
849unsigned long __gmap_fault(unsigned long address, struct gmap *); 834int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
850unsigned long gmap_fault(unsigned long address, struct gmap *); 835int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
851void gmap_discard(unsigned long from, unsigned long to, struct gmap *); 836void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
852void __gmap_zap(unsigned long address, struct gmap *); 837void __gmap_zap(struct gmap *, unsigned long gaddr);
853bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); 838bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
854 839
855 840
856void gmap_register_ipte_notifier(struct gmap_notifier *); 841void gmap_register_ipte_notifier(struct gmap_notifier *);
857void gmap_unregister_ipte_notifier(struct gmap_notifier *); 842void gmap_unregister_ipte_notifier(struct gmap_notifier *);
858int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); 843int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
859void gmap_do_ipte_notify(struct mm_struct *, pte_t *); 844void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
860 845
861static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, 846static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
847 unsigned long addr,
862 pte_t *ptep, pgste_t pgste) 848 pte_t *ptep, pgste_t pgste)
863{ 849{
864#ifdef CONFIG_PGSTE 850#ifdef CONFIG_PGSTE
865 if (pgste_val(pgste) & PGSTE_IN_BIT) { 851 if (pgste_val(pgste) & PGSTE_IN_BIT) {
866 pgste_val(pgste) &= ~PGSTE_IN_BIT; 852 pgste_val(pgste) &= ~PGSTE_IN_BIT;
867 gmap_do_ipte_notify(mm, ptep); 853 gmap_do_ipte_notify(mm, addr, ptep);
868 } 854 }
869#endif 855#endif
870 return pgste; 856 return pgste;
@@ -1110,7 +1096,7 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
1110 pgste_val(pgste) &= ~PGSTE_UC_BIT; 1096 pgste_val(pgste) &= ~PGSTE_UC_BIT;
1111 pte = *ptep; 1097 pte = *ptep;
1112 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 1098 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
1113 pgste = pgste_ipte_notify(mm, ptep, pgste); 1099 pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
1114 __ptep_ipte(addr, ptep); 1100 __ptep_ipte(addr, ptep);
1115 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 1101 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
1116 pte_val(pte) |= _PAGE_PROTECT; 1102 pte_val(pte) |= _PAGE_PROTECT;
@@ -1132,7 +1118,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1132 1118
1133 if (mm_has_pgste(vma->vm_mm)) { 1119 if (mm_has_pgste(vma->vm_mm)) {
1134 pgste = pgste_get_lock(ptep); 1120 pgste = pgste_get_lock(ptep);
1135 pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); 1121 pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
1136 } 1122 }
1137 1123
1138 pte = *ptep; 1124 pte = *ptep;
@@ -1178,7 +1164,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1178 1164
1179 if (mm_has_pgste(mm)) { 1165 if (mm_has_pgste(mm)) {
1180 pgste = pgste_get_lock(ptep); 1166 pgste = pgste_get_lock(ptep);
1181 pgste = pgste_ipte_notify(mm, ptep, pgste); 1167 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1182 } 1168 }
1183 1169
1184 pte = *ptep; 1170 pte = *ptep;
@@ -1202,7 +1188,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
1202 1188
1203 if (mm_has_pgste(mm)) { 1189 if (mm_has_pgste(mm)) {
1204 pgste = pgste_get_lock(ptep); 1190 pgste = pgste_get_lock(ptep);
1205 pgste_ipte_notify(mm, ptep, pgste); 1191 pgste_ipte_notify(mm, address, ptep, pgste);
1206 } 1192 }
1207 1193
1208 pte = *ptep; 1194 pte = *ptep;
@@ -1239,7 +1225,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1239 1225
1240 if (mm_has_pgste(vma->vm_mm)) { 1226 if (mm_has_pgste(vma->vm_mm)) {
1241 pgste = pgste_get_lock(ptep); 1227 pgste = pgste_get_lock(ptep);
1242 pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); 1228 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1243 } 1229 }
1244 1230
1245 pte = *ptep; 1231 pte = *ptep;
@@ -1273,7 +1259,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1273 1259
1274 if (!full && mm_has_pgste(mm)) { 1260 if (!full && mm_has_pgste(mm)) {
1275 pgste = pgste_get_lock(ptep); 1261 pgste = pgste_get_lock(ptep);
1276 pgste = pgste_ipte_notify(mm, ptep, pgste); 1262 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1277 } 1263 }
1278 1264
1279 pte = *ptep; 1265 pte = *ptep;
@@ -1298,7 +1284,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
1298 if (pte_write(pte)) { 1284 if (pte_write(pte)) {
1299 if (mm_has_pgste(mm)) { 1285 if (mm_has_pgste(mm)) {
1300 pgste = pgste_get_lock(ptep); 1286 pgste = pgste_get_lock(ptep);
1301 pgste = pgste_ipte_notify(mm, ptep, pgste); 1287 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1302 } 1288 }
1303 1289
1304 ptep_flush_lazy(mm, address, ptep); 1290 ptep_flush_lazy(mm, address, ptep);
@@ -1324,7 +1310,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1324 return 0; 1310 return 0;
1325 if (mm_has_pgste(vma->vm_mm)) { 1311 if (mm_has_pgste(vma->vm_mm)) {
1326 pgste = pgste_get_lock(ptep); 1312 pgste = pgste_get_lock(ptep);
1327 pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); 1313 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1328 } 1314 }
1329 1315
1330 ptep_flush_direct(vma->vm_mm, address, ptep); 1316 ptep_flush_direct(vma->vm_mm, address, ptep);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index a25f09fbaf36..572c59949004 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
105static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 105static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
106 unsigned long address) 106 unsigned long address)
107{ 107{
108 page_table_free_rcu(tlb, (unsigned long *) pte); 108 page_table_free_rcu(tlb, (unsigned long *) pte, address);
109} 109}
110 110
111/* 111/*
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 0fc26430a1e5..48eda3ab4944 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -111,12 +111,22 @@ struct kvm_guest_debug_arch {
111#define KVM_SYNC_GPRS (1UL << 1) 111#define KVM_SYNC_GPRS (1UL << 1)
112#define KVM_SYNC_ACRS (1UL << 2) 112#define KVM_SYNC_ACRS (1UL << 2)
113#define KVM_SYNC_CRS (1UL << 3) 113#define KVM_SYNC_CRS (1UL << 3)
114#define KVM_SYNC_ARCH0 (1UL << 4)
115#define KVM_SYNC_PFAULT (1UL << 5)
114/* definition of registers in kvm_run */ 116/* definition of registers in kvm_run */
115struct kvm_sync_regs { 117struct kvm_sync_regs {
116 __u64 prefix; /* prefix register */ 118 __u64 prefix; /* prefix register */
117 __u64 gprs[16]; /* general purpose registers */ 119 __u64 gprs[16]; /* general purpose registers */
118 __u32 acrs[16]; /* access registers */ 120 __u32 acrs[16]; /* access registers */
119 __u64 crs[16]; /* control registers */ 121 __u64 crs[16]; /* control registers */
122 __u64 todpr; /* tod programmable register [ARCH0] */
123 __u64 cputm; /* cpu timer [ARCH0] */
124 __u64 ckc; /* clock comparator [ARCH0] */
125 __u64 pp; /* program parameter [ARCH0] */
126 __u64 gbea; /* guest breaking-event address [ARCH0] */
127 __u64 pft; /* pfault token [PFAULT] */
128 __u64 pfs; /* pfault select [PFAULT] */
129 __u64 pfc; /* pfault compare [PFAULT] */
120}; 130};
121 131
122#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) 132#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 59bd8f991b98..9254afff250c 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -28,22 +28,32 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
28 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; 28 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
29 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; 29 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
30 30
31 if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end 31 if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
32 || start < 2 * PAGE_SIZE) 32 || start < 2 * PAGE_SIZE)
33 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 33 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
34 34
35 VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); 35 VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
36 vcpu->stat.diagnose_10++; 36 vcpu->stat.diagnose_10++;
37 37
38 /* we checked for start > end above */ 38 /*
39 if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { 39 * We checked for start >= end above, so lets check for the
40 gmap_discard(start, end, vcpu->arch.gmap); 40 * fast path (no prefix swap page involved)
41 */
42 if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
43 gmap_discard(vcpu->arch.gmap, start, end);
41 } else { 44 } else {
42 if (start < prefix) 45 /*
43 gmap_discard(start, prefix, vcpu->arch.gmap); 46 * This is slow path. gmap_discard will check for start
44 if (end >= prefix) 47 * so lets split this into before prefix, prefix, after
45 gmap_discard(prefix + 2 * PAGE_SIZE, 48 * prefix and let gmap_discard make some of these calls
46 end, vcpu->arch.gmap); 49 * NOPs.
50 */
51 gmap_discard(vcpu->arch.gmap, start, prefix);
52 if (start <= prefix)
53 gmap_discard(vcpu->arch.gmap, 0, 4096);
54 if (end > prefix + 4096)
55 gmap_discard(vcpu->arch.gmap, 4096, 8192);
56 gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
47 } 57 }
48 return 0; 58 return 0;
49} 59}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4653ac6e182b..0f961a1c64b3 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -254,8 +254,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
254 new = old = ACCESS_ONCE(*ic); 254 new = old = ACCESS_ONCE(*ic);
255 new.k = 0; 255 new.k = 0;
256 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 256 } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
257 if (!ipte_lock_count) 257 wake_up(&vcpu->kvm->arch.ipte_wq);
258 wake_up(&vcpu->kvm->arch.ipte_wq);
259out: 258out:
260 mutex_unlock(&ipte_mutex); 259 mutex_unlock(&ipte_mutex);
261} 260}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f4c819bfc193..4cad00adef93 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -26,8 +26,9 @@
26#define IOINT_SSID_MASK 0x00030000 26#define IOINT_SSID_MASK 0x00030000
27#define IOINT_CSSID_MASK 0x03fc0000 27#define IOINT_CSSID_MASK 0x03fc0000
28#define IOINT_AI_MASK 0x04000000 28#define IOINT_AI_MASK 0x04000000
29#define PFAULT_INIT 0x0600
29 30
30static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu); 31static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
31 32
32static int is_ioint(u64 type) 33static int is_ioint(u64 type)
33{ 34{
@@ -76,7 +77,7 @@ static u64 int_word_to_isc_bits(u32 int_word)
76 return (0x80 >> isc) << 24; 77 return (0x80 >> isc) << 24;
77} 78}
78 79
79static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, 80static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
80 struct kvm_s390_interrupt_info *inti) 81 struct kvm_s390_interrupt_info *inti)
81{ 82{
82 switch (inti->type) { 83 switch (inti->type) {
@@ -85,6 +86,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
85 return 0; 86 return 0;
86 if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) 87 if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
87 return 1; 88 return 1;
89 return 0;
88 case KVM_S390_INT_EMERGENCY: 90 case KVM_S390_INT_EMERGENCY:
89 if (psw_extint_disabled(vcpu)) 91 if (psw_extint_disabled(vcpu))
90 return 0; 92 return 0;
@@ -205,11 +207,30 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
205 } 207 }
206} 208}
207 209
208static int __deliver_prog_irq(struct kvm_vcpu *vcpu, 210static u16 get_ilc(struct kvm_vcpu *vcpu)
209 struct kvm_s390_pgm_info *pgm_info)
210{ 211{
211 const unsigned short table[] = { 2, 4, 4, 6 }; 212 const unsigned short table[] = { 2, 4, 4, 6 };
213
214 switch (vcpu->arch.sie_block->icptcode) {
215 case ICPT_INST:
216 case ICPT_INSTPROGI:
217 case ICPT_OPEREXC:
218 case ICPT_PARTEXEC:
219 case ICPT_IOINST:
220 /* last instruction only stored for these icptcodes */
221 return table[vcpu->arch.sie_block->ipa >> 14];
222 case ICPT_PROGI:
223 return vcpu->arch.sie_block->pgmilc;
224 default:
225 return 0;
226 }
227}
228
229static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
230 struct kvm_s390_pgm_info *pgm_info)
231{
212 int rc = 0; 232 int rc = 0;
233 u16 ilc = get_ilc(vcpu);
213 234
214 switch (pgm_info->code & ~PGM_PER) { 235 switch (pgm_info->code & ~PGM_PER) {
215 case PGM_AFX_TRANSLATION: 236 case PGM_AFX_TRANSLATION:
@@ -276,25 +297,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
276 (u8 *) __LC_PER_ACCESS_ID); 297 (u8 *) __LC_PER_ACCESS_ID);
277 } 298 }
278 299
279 switch (vcpu->arch.sie_block->icptcode) { 300 rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
280 case ICPT_INST:
281 case ICPT_INSTPROGI:
282 case ICPT_OPEREXC:
283 case ICPT_PARTEXEC:
284 case ICPT_IOINST:
285 /* last instruction only stored for these icptcodes */
286 rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
287 (u16 *) __LC_PGM_ILC);
288 break;
289 case ICPT_PROGI:
290 rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
291 (u16 *) __LC_PGM_ILC);
292 break;
293 default:
294 rc |= put_guest_lc(vcpu, 0,
295 (u16 *) __LC_PGM_ILC);
296 }
297
298 rc |= put_guest_lc(vcpu, pgm_info->code, 301 rc |= put_guest_lc(vcpu, pgm_info->code,
299 (u16 *)__LC_PGM_INT_CODE); 302 (u16 *)__LC_PGM_INT_CODE);
300 rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, 303 rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -305,7 +308,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
305 return rc; 308 return rc;
306} 309}
307 310
308static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, 311static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
309 struct kvm_s390_interrupt_info *inti) 312 struct kvm_s390_interrupt_info *inti)
310{ 313{
311 const unsigned short table[] = { 2, 4, 4, 6 }; 314 const unsigned short table[] = { 2, 4, 4, 6 };
@@ -343,7 +346,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
343 case KVM_S390_INT_CLOCK_COMP: 346 case KVM_S390_INT_CLOCK_COMP:
344 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 347 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
345 inti->ext.ext_params, 0); 348 inti->ext.ext_params, 0);
346 deliver_ckc_interrupt(vcpu); 349 rc = deliver_ckc_interrupt(vcpu);
347 break; 350 break;
348 case KVM_S390_INT_CPU_TIMER: 351 case KVM_S390_INT_CPU_TIMER:
349 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 352 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
@@ -376,8 +379,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
376 case KVM_S390_INT_PFAULT_INIT: 379 case KVM_S390_INT_PFAULT_INIT:
377 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 380 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
378 inti->ext.ext_params2); 381 inti->ext.ext_params2);
379 rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE); 382 rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
380 rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR); 383 (u16 *) __LC_EXT_INT_CODE);
384 rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
381 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, 385 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
382 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 386 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
383 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, 387 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
@@ -501,14 +505,11 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
501 default: 505 default:
502 BUG(); 506 BUG();
503 } 507 }
504 if (rc) { 508
505 printk("kvm: The guest lowcore is not mapped during interrupt " 509 return rc;
506 "delivery, killing userspace\n");
507 do_exit(SIGKILL);
508 }
509} 510}
510 511
511static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu) 512static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
512{ 513{
513 int rc; 514 int rc;
514 515
@@ -518,11 +519,7 @@ static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
518 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, 519 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
519 &vcpu->arch.sie_block->gpsw, 520 &vcpu->arch.sie_block->gpsw,
520 sizeof(psw_t)); 521 sizeof(psw_t));
521 if (rc) { 522 return rc;
522 printk("kvm: The guest lowcore is not mapped during interrupt "
523 "delivery, killing userspace\n");
524 do_exit(SIGKILL);
525 }
526} 523}
527 524
528/* Check whether SIGP interpretation facility has an external call pending */ 525/* Check whether SIGP interpretation facility has an external call pending */
@@ -661,12 +658,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
661 &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); 658 &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
662} 659}
663 660
664void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) 661int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
665{ 662{
666 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 663 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
667 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; 664 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
668 struct kvm_s390_interrupt_info *n, *inti = NULL; 665 struct kvm_s390_interrupt_info *n, *inti = NULL;
669 int deliver; 666 int deliver;
667 int rc = 0;
670 668
671 __reset_intercept_indicators(vcpu); 669 __reset_intercept_indicators(vcpu);
672 if (atomic_read(&li->active)) { 670 if (atomic_read(&li->active)) {
@@ -685,16 +683,16 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
685 atomic_set(&li->active, 0); 683 atomic_set(&li->active, 0);
686 spin_unlock(&li->lock); 684 spin_unlock(&li->lock);
687 if (deliver) { 685 if (deliver) {
688 __do_deliver_interrupt(vcpu, inti); 686 rc = __do_deliver_interrupt(vcpu, inti);
689 kfree(inti); 687 kfree(inti);
690 } 688 }
691 } while (deliver); 689 } while (!rc && deliver);
692 } 690 }
693 691
694 if (kvm_cpu_has_pending_timer(vcpu)) 692 if (!rc && kvm_cpu_has_pending_timer(vcpu))
695 deliver_ckc_interrupt(vcpu); 693 rc = deliver_ckc_interrupt(vcpu);
696 694
697 if (atomic_read(&fi->active)) { 695 if (!rc && atomic_read(&fi->active)) {
698 do { 696 do {
699 deliver = 0; 697 deliver = 0;
700 spin_lock(&fi->lock); 698 spin_lock(&fi->lock);
@@ -711,67 +709,13 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
711 atomic_set(&fi->active, 0); 709 atomic_set(&fi->active, 0);
712 spin_unlock(&fi->lock); 710 spin_unlock(&fi->lock);
713 if (deliver) { 711 if (deliver) {
714 __do_deliver_interrupt(vcpu, inti); 712 rc = __do_deliver_interrupt(vcpu, inti);
715 kfree(inti);
716 }
717 } while (deliver);
718 }
719}
720
721void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
722{
723 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
724 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
725 struct kvm_s390_interrupt_info *n, *inti = NULL;
726 int deliver;
727
728 __reset_intercept_indicators(vcpu);
729 if (atomic_read(&li->active)) {
730 do {
731 deliver = 0;
732 spin_lock(&li->lock);
733 list_for_each_entry_safe(inti, n, &li->list, list) {
734 if ((inti->type == KVM_S390_MCHK) &&
735 __interrupt_is_deliverable(vcpu, inti)) {
736 list_del(&inti->list);
737 deliver = 1;
738 break;
739 }
740 __set_intercept_indicator(vcpu, inti);
741 }
742 if (list_empty(&li->list))
743 atomic_set(&li->active, 0);
744 spin_unlock(&li->lock);
745 if (deliver) {
746 __do_deliver_interrupt(vcpu, inti);
747 kfree(inti); 713 kfree(inti);
748 } 714 }
749 } while (deliver); 715 } while (!rc && deliver);
750 } 716 }
751 717
752 if (atomic_read(&fi->active)) { 718 return rc;
753 do {
754 deliver = 0;
755 spin_lock(&fi->lock);
756 list_for_each_entry_safe(inti, n, &fi->list, list) {
757 if ((inti->type == KVM_S390_MCHK) &&
758 __interrupt_is_deliverable(vcpu, inti)) {
759 list_del(&inti->list);
760 fi->irq_count--;
761 deliver = 1;
762 break;
763 }
764 __set_intercept_indicator(vcpu, inti);
765 }
766 if (list_empty(&fi->list))
767 atomic_set(&fi->active, 0);
768 spin_unlock(&fi->lock);
769 if (deliver) {
770 __do_deliver_interrupt(vcpu, inti);
771 kfree(inti);
772 }
773 } while (deliver);
774 }
775} 719}
776 720
777int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) 721int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
@@ -1048,7 +992,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
1048 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, 992 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
1049 s390int->parm64, 2); 993 s390int->parm64, 2);
1050 994
1051 mutex_lock(&vcpu->kvm->lock);
1052 li = &vcpu->arch.local_int; 995 li = &vcpu->arch.local_int;
1053 spin_lock(&li->lock); 996 spin_lock(&li->lock);
1054 if (inti->type == KVM_S390_PROGRAM_INT) 997 if (inti->type == KVM_S390_PROGRAM_INT)
@@ -1060,7 +1003,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
1060 li->action_bits |= ACTION_STOP_ON_STOP; 1003 li->action_bits |= ACTION_STOP_ON_STOP;
1061 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); 1004 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
1062 spin_unlock(&li->lock); 1005 spin_unlock(&li->lock);
1063 mutex_unlock(&vcpu->kvm->lock);
1064 kvm_s390_vcpu_wakeup(vcpu); 1006 kvm_s390_vcpu_wakeup(vcpu);
1065 return 0; 1007 return 0;
1066} 1008}
@@ -1300,7 +1242,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
1300 } 1242 }
1301 INIT_LIST_HEAD(&map->list); 1243 INIT_LIST_HEAD(&map->list);
1302 map->guest_addr = addr; 1244 map->guest_addr = addr;
1303 map->addr = gmap_translate(addr, kvm->arch.gmap); 1245 map->addr = gmap_translate(kvm->arch.gmap, addr);
1304 if (map->addr == -EFAULT) { 1246 if (map->addr == -EFAULT) {
1305 ret = -EFAULT; 1247 ret = -EFAULT;
1306 goto out; 1248 goto out;
@@ -1410,7 +1352,6 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1410 r = enqueue_floating_irq(dev, attr); 1352 r = enqueue_floating_irq(dev, attr);
1411 break; 1353 break;
1412 case KVM_DEV_FLIC_CLEAR_IRQS: 1354 case KVM_DEV_FLIC_CLEAR_IRQS:
1413 r = 0;
1414 kvm_s390_clear_float_irqs(dev->kvm); 1355 kvm_s390_clear_float_irqs(dev->kvm);
1415 break; 1356 break;
1416 case KVM_DEV_FLIC_APF_ENABLE: 1357 case KVM_DEV_FLIC_APF_ENABLE:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index a3c324ec4370..56a411c0245a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -100,16 +100,12 @@ int test_vfacility(unsigned long nr)
100} 100}
101 101
102/* Section: not file related */ 102/* Section: not file related */
103int kvm_arch_hardware_enable(void *garbage) 103int kvm_arch_hardware_enable(void)
104{ 104{
105 /* every s390 is virtualization enabled ;-) */ 105 /* every s390 is virtualization enabled ;-) */
106 return 0; 106 return 0;
107} 107}
108 108
109void kvm_arch_hardware_disable(void *garbage)
110{
111}
112
113static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); 109static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
114 110
115int kvm_arch_hardware_setup(void) 111int kvm_arch_hardware_setup(void)
@@ -124,17 +120,10 @@ void kvm_arch_hardware_unsetup(void)
124 gmap_unregister_ipte_notifier(&gmap_notifier); 120 gmap_unregister_ipte_notifier(&gmap_notifier);
125} 121}
126 122
127void kvm_arch_check_processor_compat(void *rtn)
128{
129}
130
131int kvm_arch_init(void *opaque) 123int kvm_arch_init(void *opaque)
132{ 124{
133 return 0; 125 /* Register floating interrupt controller interface. */
134} 126 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
135
136void kvm_arch_exit(void)
137{
138} 127}
139 128
140/* Section: device related */ 129/* Section: device related */
@@ -404,6 +393,22 @@ long kvm_arch_vm_ioctl(struct file *filp,
404 return r; 393 return r;
405} 394}
406 395
396static int kvm_s390_crypto_init(struct kvm *kvm)
397{
398 if (!test_vfacility(76))
399 return 0;
400
401 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
402 GFP_KERNEL | GFP_DMA);
403 if (!kvm->arch.crypto.crycb)
404 return -ENOMEM;
405
406 kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb |
407 CRYCB_FORMAT1;
408
409 return 0;
410}
411
407int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 412int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
408{ 413{
409 int rc; 414 int rc;
@@ -441,6 +446,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
441 if (!kvm->arch.dbf) 446 if (!kvm->arch.dbf)
442 goto out_nodbf; 447 goto out_nodbf;
443 448
449 if (kvm_s390_crypto_init(kvm) < 0)
450 goto out_crypto;
451
444 spin_lock_init(&kvm->arch.float_int.lock); 452 spin_lock_init(&kvm->arch.float_int.lock);
445 INIT_LIST_HEAD(&kvm->arch.float_int.list); 453 INIT_LIST_HEAD(&kvm->arch.float_int.list);
446 init_waitqueue_head(&kvm->arch.ipte_wq); 454 init_waitqueue_head(&kvm->arch.ipte_wq);
@@ -451,7 +459,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
451 if (type & KVM_VM_S390_UCONTROL) { 459 if (type & KVM_VM_S390_UCONTROL) {
452 kvm->arch.gmap = NULL; 460 kvm->arch.gmap = NULL;
453 } else { 461 } else {
454 kvm->arch.gmap = gmap_alloc(current->mm); 462 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
455 if (!kvm->arch.gmap) 463 if (!kvm->arch.gmap)
456 goto out_nogmap; 464 goto out_nogmap;
457 kvm->arch.gmap->private = kvm; 465 kvm->arch.gmap->private = kvm;
@@ -465,6 +473,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
465 473
466 return 0; 474 return 0;
467out_nogmap: 475out_nogmap:
476 kfree(kvm->arch.crypto.crycb);
477out_crypto:
468 debug_unregister(kvm->arch.dbf); 478 debug_unregister(kvm->arch.dbf);
469out_nodbf: 479out_nodbf:
470 free_page((unsigned long)(kvm->arch.sca)); 480 free_page((unsigned long)(kvm->arch.sca));
@@ -514,15 +524,12 @@ static void kvm_free_vcpus(struct kvm *kvm)
514 mutex_unlock(&kvm->lock); 524 mutex_unlock(&kvm->lock);
515} 525}
516 526
517void kvm_arch_sync_events(struct kvm *kvm)
518{
519}
520
521void kvm_arch_destroy_vm(struct kvm *kvm) 527void kvm_arch_destroy_vm(struct kvm *kvm)
522{ 528{
523 kvm_free_vcpus(kvm); 529 kvm_free_vcpus(kvm);
524 free_page((unsigned long)(kvm->arch.sca)); 530 free_page((unsigned long)(kvm->arch.sca));
525 debug_unregister(kvm->arch.dbf); 531 debug_unregister(kvm->arch.dbf);
532 kfree(kvm->arch.crypto.crycb);
526 if (!kvm_is_ucontrol(kvm)) 533 if (!kvm_is_ucontrol(kvm))
527 gmap_free(kvm->arch.gmap); 534 gmap_free(kvm->arch.gmap);
528 kvm_s390_destroy_adapters(kvm); 535 kvm_s390_destroy_adapters(kvm);
@@ -535,7 +542,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
535 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 542 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
536 kvm_clear_async_pf_completion_queue(vcpu); 543 kvm_clear_async_pf_completion_queue(vcpu);
537 if (kvm_is_ucontrol(vcpu->kvm)) { 544 if (kvm_is_ucontrol(vcpu->kvm)) {
538 vcpu->arch.gmap = gmap_alloc(current->mm); 545 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
539 if (!vcpu->arch.gmap) 546 if (!vcpu->arch.gmap)
540 return -ENOMEM; 547 return -ENOMEM;
541 vcpu->arch.gmap->private = vcpu->kvm; 548 vcpu->arch.gmap->private = vcpu->kvm;
@@ -546,19 +553,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
546 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 553 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
547 KVM_SYNC_GPRS | 554 KVM_SYNC_GPRS |
548 KVM_SYNC_ACRS | 555 KVM_SYNC_ACRS |
549 KVM_SYNC_CRS; 556 KVM_SYNC_CRS |
557 KVM_SYNC_ARCH0 |
558 KVM_SYNC_PFAULT;
550 return 0; 559 return 0;
551} 560}
552 561
553void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
554{
555 /* Nothing todo */
556}
557
558void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
559{
560}
561
562void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 562void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
563{ 563{
564 save_fp_ctl(&vcpu->arch.host_fpregs.fpc); 564 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
@@ -611,6 +611,14 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
611 return 0; 611 return 0;
612} 612}
613 613
614static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
615{
616 if (!test_vfacility(76))
617 return;
618
619 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
620}
621
614void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 622void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
615{ 623{
616 free_page(vcpu->arch.sie_block->cbrlo); 624 free_page(vcpu->arch.sie_block->cbrlo);
@@ -657,6 +665,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
657 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 665 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
658 get_cpu_id(&vcpu->arch.cpu_id); 666 get_cpu_id(&vcpu->arch.cpu_id);
659 vcpu->arch.cpu_id.version = 0xff; 667 vcpu->arch.cpu_id.version = 0xff;
668
669 kvm_s390_vcpu_crypto_setup(vcpu);
670
660 return rc; 671 return rc;
661} 672}
662 673
@@ -1053,6 +1064,11 @@ retry:
1053 goto retry; 1064 goto retry;
1054 } 1065 }
1055 1066
1067 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1068 vcpu->arch.sie_block->ihcpu = 0xffff;
1069 goto retry;
1070 }
1071
1056 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 1072 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1057 if (!ibs_enabled(vcpu)) { 1073 if (!ibs_enabled(vcpu)) {
1058 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 1074 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
@@ -1089,18 +1105,8 @@ retry:
1089 */ 1105 */
1090long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 1106long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1091{ 1107{
1092 struct mm_struct *mm = current->mm; 1108 return gmap_fault(vcpu->arch.gmap, gpa,
1093 hva_t hva; 1109 writable ? FAULT_FLAG_WRITE : 0);
1094 long rc;
1095
1096 hva = gmap_fault(gpa, vcpu->arch.gmap);
1097 if (IS_ERR_VALUE(hva))
1098 return (long)hva;
1099 down_read(&mm->mmap_sem);
1100 rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
1101 up_read(&mm->mmap_sem);
1102
1103 return rc < 0 ? rc : 0;
1104} 1110}
1105 1111
1106static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 1112static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
@@ -1195,8 +1201,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1195 if (test_cpu_flag(CIF_MCCK_PENDING)) 1201 if (test_cpu_flag(CIF_MCCK_PENDING))
1196 s390_handle_mcck(); 1202 s390_handle_mcck();
1197 1203
1198 if (!kvm_is_ucontrol(vcpu->kvm)) 1204 if (!kvm_is_ucontrol(vcpu->kvm)) {
1199 kvm_s390_deliver_pending_interrupts(vcpu); 1205 rc = kvm_s390_deliver_pending_interrupts(vcpu);
1206 if (rc)
1207 return rc;
1208 }
1200 1209
1201 rc = kvm_s390_handle_requests(vcpu); 1210 rc = kvm_s390_handle_requests(vcpu);
1202 if (rc) 1211 if (rc)
@@ -1300,6 +1309,48 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
1300 return rc; 1309 return rc;
1301} 1310}
1302 1311
1312static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1313{
1314 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1315 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1316 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
1317 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1318 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1319 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1320 /* some control register changes require a tlb flush */
1321 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1322 }
1323 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
1324 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
1325 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
1326 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
1327 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
1328 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
1329 }
1330 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
1331 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
1332 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
1333 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
1334 }
1335 kvm_run->kvm_dirty_regs = 0;
1336}
1337
1338static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1339{
1340 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1341 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1342 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
1343 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1344 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
1345 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
1346 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
1347 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
1348 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
1349 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
1350 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
1351 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
1352}
1353
1303int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1354int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1304{ 1355{
1305 int rc; 1356 int rc;
@@ -1321,30 +1372,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1321 return -EINVAL; 1372 return -EINVAL;
1322 } 1373 }
1323 1374
1324 switch (kvm_run->exit_reason) { 1375 sync_regs(vcpu, kvm_run);
1325 case KVM_EXIT_S390_SIEIC:
1326 case KVM_EXIT_UNKNOWN:
1327 case KVM_EXIT_INTR:
1328 case KVM_EXIT_S390_RESET:
1329 case KVM_EXIT_S390_UCONTROL:
1330 case KVM_EXIT_S390_TSCH:
1331 case KVM_EXIT_DEBUG:
1332 break;
1333 default:
1334 BUG();
1335 }
1336
1337 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1338 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1339 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
1340 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
1341 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1342 }
1343 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1344 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
1345 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1346 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1347 }
1348 1376
1349 might_fault(); 1377 might_fault();
1350 rc = __vcpu_run(vcpu); 1378 rc = __vcpu_run(vcpu);
@@ -1374,10 +1402,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1374 rc = 0; 1402 rc = 0;
1375 } 1403 }
1376 1404
1377 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 1405 store_regs(vcpu, kvm_run);
1378 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1379 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
1380 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1381 1406
1382 if (vcpu->sigset_active) 1407 if (vcpu->sigset_active)
1383 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1408 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1506,7 +1531,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
1506 * Another VCPU might have used IBS while we were offline. 1531 * Another VCPU might have used IBS while we were offline.
1507 * Let's play safe and flush the VCPU at startup. 1532 * Let's play safe and flush the VCPU at startup.
1508 */ 1533 */
1509 vcpu->arch.sie_block->ihcpu = 0xffff; 1534 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1510 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 1535 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
1511 return; 1536 return;
1512} 1537}
@@ -1661,9 +1686,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1661 } 1686 }
1662#endif 1687#endif
1663 case KVM_S390_VCPU_FAULT: { 1688 case KVM_S390_VCPU_FAULT: {
1664 r = gmap_fault(arg, vcpu->arch.gmap); 1689 r = gmap_fault(vcpu->arch.gmap, arg, 0);
1665 if (!IS_ERR_VALUE(r))
1666 r = 0;
1667 break; 1690 break;
1668 } 1691 }
1669 case KVM_ENABLE_CAP: 1692 case KVM_ENABLE_CAP:
@@ -1694,21 +1717,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1694 return VM_FAULT_SIGBUS; 1717 return VM_FAULT_SIGBUS;
1695} 1718}
1696 1719
1697void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1698 struct kvm_memory_slot *dont)
1699{
1700}
1701
1702int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1720int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1703 unsigned long npages) 1721 unsigned long npages)
1704{ 1722{
1705 return 0; 1723 return 0;
1706} 1724}
1707 1725
1708void kvm_arch_memslots_updated(struct kvm *kvm)
1709{
1710}
1711
1712/* Section: memory related */ 1726/* Section: memory related */
1713int kvm_arch_prepare_memory_region(struct kvm *kvm, 1727int kvm_arch_prepare_memory_region(struct kvm *kvm,
1714 struct kvm_memory_slot *memslot, 1728 struct kvm_memory_slot *memslot,
@@ -1754,15 +1768,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
1754 return; 1768 return;
1755} 1769}
1756 1770
1757void kvm_arch_flush_shadow_all(struct kvm *kvm)
1758{
1759}
1760
1761void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1762 struct kvm_memory_slot *slot)
1763{
1764}
1765
1766static int __init kvm_s390_init(void) 1771static int __init kvm_s390_init(void)
1767{ 1772{
1768 int ret; 1773 int ret;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3862fa2cefe0..244d02303182 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -70,7 +70,7 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
70static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) 70static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
71{ 71{
72 vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; 72 vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
73 vcpu->arch.sie_block->ihcpu = 0xffff; 73 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
74 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 74 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
75} 75}
76 76
@@ -138,8 +138,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
138int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); 138int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
139void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); 139void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
140enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); 140enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
141void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); 141int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
142void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
143void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); 142void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
144void kvm_s390_clear_float_irqs(struct kvm *kvm); 143void kvm_s390_clear_float_irqs(struct kvm *kvm);
145int __must_check kvm_s390_inject_vm(struct kvm *kvm, 144int __must_check kvm_s390_inject_vm(struct kvm *kvm,
@@ -228,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
228int psw_extint_disabled(struct kvm_vcpu *vcpu); 227int psw_extint_disabled(struct kvm_vcpu *vcpu);
229void kvm_s390_destroy_adapters(struct kvm *kvm); 228void kvm_s390_destroy_adapters(struct kvm *kvm);
230int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu); 229int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
230extern struct kvm_device_ops kvm_flic_ops;
231 231
232/* implemented in guestdbg.c */ 232/* implemented in guestdbg.c */
233void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); 233void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f89c1cd67751..72bb2dd8b9cd 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -352,13 +352,6 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
352 return 0; 352 return 0;
353} 353}
354 354
355static void handle_new_psw(struct kvm_vcpu *vcpu)
356{
357 /* Check whether the new psw is enabled for machine checks. */
358 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
359 kvm_s390_deliver_pending_machine_checks(vcpu);
360}
361
362#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) 355#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
363#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL 356#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
364#define PSW_ADDR_24 0x0000000000ffffffUL 357#define PSW_ADDR_24 0x0000000000ffffffUL
@@ -405,7 +398,6 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
405 gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; 398 gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
406 if (!is_valid_psw(gpsw)) 399 if (!is_valid_psw(gpsw))
407 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 400 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
408 handle_new_psw(vcpu);
409 return 0; 401 return 0;
410} 402}
411 403
@@ -427,7 +419,6 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
427 vcpu->arch.sie_block->gpsw = new_psw; 419 vcpu->arch.sie_block->gpsw = new_psw;
428 if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) 420 if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
429 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 421 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
430 handle_new_psw(vcpu);
431 return 0; 422 return 0;
432} 423}
433 424
@@ -738,7 +729,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
738 /* invalid entry */ 729 /* invalid entry */
739 break; 730 break;
740 /* try to free backing */ 731 /* try to free backing */
741 __gmap_zap(cbrle, gmap); 732 __gmap_zap(gmap, cbrle);
742 } 733 }
743 up_read(&gmap->mm->mmap_sem); 734 up_read(&gmap->mm->mmap_sem);
744 if (i < entries) 735 if (i < entries)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3f3b35403d0a..a2b81d6ce8a5 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -442,18 +442,15 @@ static inline int do_exception(struct pt_regs *regs, int access)
442 down_read(&mm->mmap_sem); 442 down_read(&mm->mmap_sem);
443 443
444#ifdef CONFIG_PGSTE 444#ifdef CONFIG_PGSTE
445 gmap = (struct gmap *) 445 gmap = (current->flags & PF_VCPU) ?
446 ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); 446 (struct gmap *) S390_lowcore.gmap : NULL;
447 if (gmap) { 447 if (gmap) {
448 address = __gmap_fault(address, gmap); 448 current->thread.gmap_addr = address;
449 address = __gmap_translate(gmap, address);
449 if (address == -EFAULT) { 450 if (address == -EFAULT) {
450 fault = VM_FAULT_BADMAP; 451 fault = VM_FAULT_BADMAP;
451 goto out_up; 452 goto out_up;
452 } 453 }
453 if (address == -ENOMEM) {
454 fault = VM_FAULT_OOM;
455 goto out_up;
456 }
457 if (gmap->pfault_enabled) 454 if (gmap->pfault_enabled)
458 flags |= FAULT_FLAG_RETRY_NOWAIT; 455 flags |= FAULT_FLAG_RETRY_NOWAIT;
459 } 456 }
@@ -530,6 +527,20 @@ retry:
530 goto retry; 527 goto retry;
531 } 528 }
532 } 529 }
530#ifdef CONFIG_PGSTE
531 if (gmap) {
532 address = __gmap_link(gmap, current->thread.gmap_addr,
533 address);
534 if (address == -EFAULT) {
535 fault = VM_FAULT_BADMAP;
536 goto out_up;
537 }
538 if (address == -ENOMEM) {
539 fault = VM_FAULT_OOM;
540 goto out_up;
541 }
542 }
543#endif
533 fault = 0; 544 fault = 0;
534out_up: 545out_up:
535 up_read(&mm->mmap_sem); 546 up_read(&mm->mmap_sem);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 19daa53a3da4..296b61a4af59 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -145,30 +145,56 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
145/** 145/**
146 * gmap_alloc - allocate a guest address space 146 * gmap_alloc - allocate a guest address space
147 * @mm: pointer to the parent mm_struct 147 * @mm: pointer to the parent mm_struct
148 * @limit: maximum size of the gmap address space
148 * 149 *
149 * Returns a guest address space structure. 150 * Returns a guest address space structure.
150 */ 151 */
151struct gmap *gmap_alloc(struct mm_struct *mm) 152struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
152{ 153{
153 struct gmap *gmap; 154 struct gmap *gmap;
154 struct page *page; 155 struct page *page;
155 unsigned long *table; 156 unsigned long *table;
156 157 unsigned long etype, atype;
158
159 if (limit < (1UL << 31)) {
160 limit = (1UL << 31) - 1;
161 atype = _ASCE_TYPE_SEGMENT;
162 etype = _SEGMENT_ENTRY_EMPTY;
163 } else if (limit < (1UL << 42)) {
164 limit = (1UL << 42) - 1;
165 atype = _ASCE_TYPE_REGION3;
166 etype = _REGION3_ENTRY_EMPTY;
167 } else if (limit < (1UL << 53)) {
168 limit = (1UL << 53) - 1;
169 atype = _ASCE_TYPE_REGION2;
170 etype = _REGION2_ENTRY_EMPTY;
171 } else {
172 limit = -1UL;
173 atype = _ASCE_TYPE_REGION1;
174 etype = _REGION1_ENTRY_EMPTY;
175 }
157 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 176 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
158 if (!gmap) 177 if (!gmap)
159 goto out; 178 goto out;
160 INIT_LIST_HEAD(&gmap->crst_list); 179 INIT_LIST_HEAD(&gmap->crst_list);
180 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
181 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
182 spin_lock_init(&gmap->guest_table_lock);
161 gmap->mm = mm; 183 gmap->mm = mm;
162 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 184 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
163 if (!page) 185 if (!page)
164 goto out_free; 186 goto out_free;
187 page->index = 0;
165 list_add(&page->lru, &gmap->crst_list); 188 list_add(&page->lru, &gmap->crst_list);
166 table = (unsigned long *) page_to_phys(page); 189 table = (unsigned long *) page_to_phys(page);
167 crst_table_init(table, _REGION1_ENTRY_EMPTY); 190 crst_table_init(table, etype);
168 gmap->table = table; 191 gmap->table = table;
169 gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | 192 gmap->asce = atype | _ASCE_TABLE_LENGTH |
170 _ASCE_USER_BITS | __pa(table); 193 _ASCE_USER_BITS | __pa(table);
194 gmap->asce_end = limit;
195 down_write(&mm->mmap_sem);
171 list_add(&gmap->list, &mm->context.gmap_list); 196 list_add(&gmap->list, &mm->context.gmap_list);
197 up_write(&mm->mmap_sem);
172 return gmap; 198 return gmap;
173 199
174out_free: 200out_free:
@@ -178,36 +204,38 @@ out:
178} 204}
179EXPORT_SYMBOL_GPL(gmap_alloc); 205EXPORT_SYMBOL_GPL(gmap_alloc);
180 206
181static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
182{
183 struct gmap_pgtable *mp;
184 struct gmap_rmap *rmap;
185 struct page *page;
186
187 if (*table & _SEGMENT_ENTRY_INVALID)
188 return 0;
189 page = pfn_to_page(*table >> PAGE_SHIFT);
190 mp = (struct gmap_pgtable *) page->index;
191 list_for_each_entry(rmap, &mp->mapper, list) {
192 if (rmap->entry != table)
193 continue;
194 list_del(&rmap->list);
195 kfree(rmap);
196 break;
197 }
198 *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
199 return 1;
200}
201
202static void gmap_flush_tlb(struct gmap *gmap) 207static void gmap_flush_tlb(struct gmap *gmap)
203{ 208{
204 if (MACHINE_HAS_IDTE) 209 if (MACHINE_HAS_IDTE)
205 __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | 210 __tlb_flush_asce(gmap->mm, gmap->asce);
206 _ASCE_TYPE_REGION1);
207 else 211 else
208 __tlb_flush_global(); 212 __tlb_flush_global();
209} 213}
210 214
215static void gmap_radix_tree_free(struct radix_tree_root *root)
216{
217 struct radix_tree_iter iter;
218 unsigned long indices[16];
219 unsigned long index;
220 void **slot;
221 int i, nr;
222
223 /* A radix tree is freed by deleting all of its entries */
224 index = 0;
225 do {
226 nr = 0;
227 radix_tree_for_each_slot(slot, root, &iter, index) {
228 indices[nr] = iter.index;
229 if (++nr == 16)
230 break;
231 }
232 for (i = 0; i < nr; i++) {
233 index = indices[i];
234 radix_tree_delete(root, index);
235 }
236 } while (nr > 0);
237}
238
211/** 239/**
212 * gmap_free - free a guest address space 240 * gmap_free - free a guest address space
213 * @gmap: pointer to the guest address space structure 241 * @gmap: pointer to the guest address space structure
@@ -215,31 +243,21 @@ static void gmap_flush_tlb(struct gmap *gmap)
215void gmap_free(struct gmap *gmap) 243void gmap_free(struct gmap *gmap)
216{ 244{
217 struct page *page, *next; 245 struct page *page, *next;
218 unsigned long *table;
219 int i;
220
221 246
222 /* Flush tlb. */ 247 /* Flush tlb. */
223 if (MACHINE_HAS_IDTE) 248 if (MACHINE_HAS_IDTE)
224 __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | 249 __tlb_flush_asce(gmap->mm, gmap->asce);
225 _ASCE_TYPE_REGION1);
226 else 250 else
227 __tlb_flush_global(); 251 __tlb_flush_global();
228 252
229 /* Free all segment & region tables. */ 253 /* Free all segment & region tables. */
230 down_read(&gmap->mm->mmap_sem); 254 list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
231 spin_lock(&gmap->mm->page_table_lock);
232 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
233 table = (unsigned long *) page_to_phys(page);
234 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
235 /* Remove gmap rmap structures for segment table. */
236 for (i = 0; i < PTRS_PER_PMD; i++, table++)
237 gmap_unlink_segment(gmap, table);
238 __free_pages(page, ALLOC_ORDER); 255 __free_pages(page, ALLOC_ORDER);
239 } 256 gmap_radix_tree_free(&gmap->guest_to_host);
240 spin_unlock(&gmap->mm->page_table_lock); 257 gmap_radix_tree_free(&gmap->host_to_guest);
241 up_read(&gmap->mm->mmap_sem); 258 down_write(&gmap->mm->mmap_sem);
242 list_del(&gmap->list); 259 list_del(&gmap->list);
260 up_write(&gmap->mm->mmap_sem);
243 kfree(gmap); 261 kfree(gmap);
244} 262}
245EXPORT_SYMBOL_GPL(gmap_free); 263EXPORT_SYMBOL_GPL(gmap_free);
@@ -267,42 +285,97 @@ EXPORT_SYMBOL_GPL(gmap_disable);
267/* 285/*
268 * gmap_alloc_table is assumed to be called with mmap_sem held 286 * gmap_alloc_table is assumed to be called with mmap_sem held
269 */ 287 */
270static int gmap_alloc_table(struct gmap *gmap, 288static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
271 unsigned long *table, unsigned long init) 289 unsigned long init, unsigned long gaddr)
272 __releases(&gmap->mm->page_table_lock)
273 __acquires(&gmap->mm->page_table_lock)
274{ 290{
275 struct page *page; 291 struct page *page;
276 unsigned long *new; 292 unsigned long *new;
277 293
278 /* since we dont free the gmap table until gmap_free we can unlock */ 294 /* since we dont free the gmap table until gmap_free we can unlock */
279 spin_unlock(&gmap->mm->page_table_lock);
280 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 295 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
281 spin_lock(&gmap->mm->page_table_lock);
282 if (!page) 296 if (!page)
283 return -ENOMEM; 297 return -ENOMEM;
284 new = (unsigned long *) page_to_phys(page); 298 new = (unsigned long *) page_to_phys(page);
285 crst_table_init(new, init); 299 crst_table_init(new, init);
300 spin_lock(&gmap->mm->page_table_lock);
286 if (*table & _REGION_ENTRY_INVALID) { 301 if (*table & _REGION_ENTRY_INVALID) {
287 list_add(&page->lru, &gmap->crst_list); 302 list_add(&page->lru, &gmap->crst_list);
288 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 303 *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
289 (*table & _REGION_ENTRY_TYPE_MASK); 304 (*table & _REGION_ENTRY_TYPE_MASK);
290 } else 305 page->index = gaddr;
306 page = NULL;
307 }
308 spin_unlock(&gmap->mm->page_table_lock);
309 if (page)
291 __free_pages(page, ALLOC_ORDER); 310 __free_pages(page, ALLOC_ORDER);
292 return 0; 311 return 0;
293} 312}
294 313
295/** 314/**
315 * __gmap_segment_gaddr - find virtual address from segment pointer
316 * @entry: pointer to a segment table entry in the guest address space
317 *
318 * Returns the virtual address in the guest address space for the segment
319 */
320static unsigned long __gmap_segment_gaddr(unsigned long *entry)
321{
322 struct page *page;
323 unsigned long offset;
324
325 offset = (unsigned long) entry / sizeof(unsigned long);
326 offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
327 page = pmd_to_page((pmd_t *) entry);
328 return page->index + offset;
329}
330
331/**
332 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
333 * @gmap: pointer to the guest address space structure
334 * @vmaddr: address in the host process address space
335 *
336 * Returns 1 if a TLB flush is required
337 */
338static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
339{
340 unsigned long *entry;
341 int flush = 0;
342
343 spin_lock(&gmap->guest_table_lock);
344 entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
345 if (entry) {
346 flush = (*entry != _SEGMENT_ENTRY_INVALID);
347 *entry = _SEGMENT_ENTRY_INVALID;
348 }
349 spin_unlock(&gmap->guest_table_lock);
350 return flush;
351}
352
353/**
354 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
355 * @gmap: pointer to the guest address space structure
356 * @gaddr: address in the guest address space
357 *
358 * Returns 1 if a TLB flush is required
359 */
360static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
361{
362 unsigned long vmaddr;
363
364 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
365 gaddr >> PMD_SHIFT);
366 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
367}
368
369/**
296 * gmap_unmap_segment - unmap segment from the guest address space 370 * gmap_unmap_segment - unmap segment from the guest address space
297 * @gmap: pointer to the guest address space structure 371 * @gmap: pointer to the guest address space structure
298 * @addr: address in the guest address space 372 * @to: address in the guest address space
299 * @len: length of the memory area to unmap 373 * @len: length of the memory area to unmap
300 * 374 *
301 * Returns 0 if the unmap succeeded, -EINVAL if not. 375 * Returns 0 if the unmap succeeded, -EINVAL if not.
302 */ 376 */
303int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 377int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
304{ 378{
305 unsigned long *table;
306 unsigned long off; 379 unsigned long off;
307 int flush; 380 int flush;
308 381
@@ -312,31 +385,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
312 return -EINVAL; 385 return -EINVAL;
313 386
314 flush = 0; 387 flush = 0;
315 down_read(&gmap->mm->mmap_sem); 388 down_write(&gmap->mm->mmap_sem);
316 spin_lock(&gmap->mm->page_table_lock); 389 for (off = 0; off < len; off += PMD_SIZE)
317 for (off = 0; off < len; off += PMD_SIZE) { 390 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
318 /* Walk the guest addr space page table */ 391 up_write(&gmap->mm->mmap_sem);
319 table = gmap->table + (((to + off) >> 53) & 0x7ff);
320 if (*table & _REGION_ENTRY_INVALID)
321 goto out;
322 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
323 table = table + (((to + off) >> 42) & 0x7ff);
324 if (*table & _REGION_ENTRY_INVALID)
325 goto out;
326 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
327 table = table + (((to + off) >> 31) & 0x7ff);
328 if (*table & _REGION_ENTRY_INVALID)
329 goto out;
330 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
331 table = table + (((to + off) >> 20) & 0x7ff);
332
333 /* Clear segment table entry in guest address space. */
334 flush |= gmap_unlink_segment(gmap, table);
335 *table = _SEGMENT_ENTRY_INVALID;
336 }
337out:
338 spin_unlock(&gmap->mm->page_table_lock);
339 up_read(&gmap->mm->mmap_sem);
340 if (flush) 392 if (flush)
341 gmap_flush_tlb(gmap); 393 gmap_flush_tlb(gmap);
342 return 0; 394 return 0;
@@ -348,87 +400,47 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
348 * @gmap: pointer to the guest address space structure 400 * @gmap: pointer to the guest address space structure
349 * @from: source address in the parent address space 401 * @from: source address in the parent address space
350 * @to: target address in the guest address space 402 * @to: target address in the guest address space
403 * @len: length of the memory area to map
351 * 404 *
352 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 405 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
353 */ 406 */
354int gmap_map_segment(struct gmap *gmap, unsigned long from, 407int gmap_map_segment(struct gmap *gmap, unsigned long from,
355 unsigned long to, unsigned long len) 408 unsigned long to, unsigned long len)
356{ 409{
357 unsigned long *table;
358 unsigned long off; 410 unsigned long off;
359 int flush; 411 int flush;
360 412
361 if ((from | to | len) & (PMD_SIZE - 1)) 413 if ((from | to | len) & (PMD_SIZE - 1))
362 return -EINVAL; 414 return -EINVAL;
363 if (len == 0 || from + len > TASK_MAX_SIZE || 415 if (len == 0 || from + len < from || to + len < to ||
364 from + len < from || to + len < to) 416 from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
365 return -EINVAL; 417 return -EINVAL;
366 418
367 flush = 0; 419 flush = 0;
368 down_read(&gmap->mm->mmap_sem); 420 down_write(&gmap->mm->mmap_sem);
369 spin_lock(&gmap->mm->page_table_lock);
370 for (off = 0; off < len; off += PMD_SIZE) { 421 for (off = 0; off < len; off += PMD_SIZE) {
371 /* Walk the gmap address space page table */ 422 /* Remove old translation */
372 table = gmap->table + (((to + off) >> 53) & 0x7ff); 423 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
373 if ((*table & _REGION_ENTRY_INVALID) && 424 /* Store new translation */
374 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 425 if (radix_tree_insert(&gmap->guest_to_host,
375 goto out_unmap; 426 (to + off) >> PMD_SHIFT,
376 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 427 (void *) from + off))
377 table = table + (((to + off) >> 42) & 0x7ff); 428 break;
378 if ((*table & _REGION_ENTRY_INVALID) &&
379 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
380 goto out_unmap;
381 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
382 table = table + (((to + off) >> 31) & 0x7ff);
383 if ((*table & _REGION_ENTRY_INVALID) &&
384 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
385 goto out_unmap;
386 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
387 table = table + (((to + off) >> 20) & 0x7ff);
388
389 /* Store 'from' address in an invalid segment table entry. */
390 flush |= gmap_unlink_segment(gmap, table);
391 *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
392 _SEGMENT_ENTRY_PROTECT);
393 } 429 }
394 spin_unlock(&gmap->mm->page_table_lock); 430 up_write(&gmap->mm->mmap_sem);
395 up_read(&gmap->mm->mmap_sem);
396 if (flush) 431 if (flush)
397 gmap_flush_tlb(gmap); 432 gmap_flush_tlb(gmap);
398 return 0; 433 if (off >= len)
399 434 return 0;
400out_unmap:
401 spin_unlock(&gmap->mm->page_table_lock);
402 up_read(&gmap->mm->mmap_sem);
403 gmap_unmap_segment(gmap, to, len); 435 gmap_unmap_segment(gmap, to, len);
404 return -ENOMEM; 436 return -ENOMEM;
405} 437}
406EXPORT_SYMBOL_GPL(gmap_map_segment); 438EXPORT_SYMBOL_GPL(gmap_map_segment);
407 439
408static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
409{
410 unsigned long *table;
411
412 table = gmap->table + ((address >> 53) & 0x7ff);
413 if (unlikely(*table & _REGION_ENTRY_INVALID))
414 return ERR_PTR(-EFAULT);
415 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
416 table = table + ((address >> 42) & 0x7ff);
417 if (unlikely(*table & _REGION_ENTRY_INVALID))
418 return ERR_PTR(-EFAULT);
419 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
420 table = table + ((address >> 31) & 0x7ff);
421 if (unlikely(*table & _REGION_ENTRY_INVALID))
422 return ERR_PTR(-EFAULT);
423 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
424 table = table + ((address >> 20) & 0x7ff);
425 return table;
426}
427
428/** 440/**
429 * __gmap_translate - translate a guest address to a user space address 441 * __gmap_translate - translate a guest address to a user space address
430 * @address: guest address
431 * @gmap: pointer to guest mapping meta data structure 442 * @gmap: pointer to guest mapping meta data structure
443 * @gaddr: guest address
432 * 444 *
433 * Returns user space address which corresponds to the guest address or 445 * Returns user space address which corresponds to the guest address or
434 * -EFAULT if no such mapping exists. 446 * -EFAULT if no such mapping exists.
@@ -436,168 +448,161 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
436 * The mmap_sem of the mm that belongs to the address space must be held 448 * The mmap_sem of the mm that belongs to the address space must be held
437 * when this function gets called. 449 * when this function gets called.
438 */ 450 */
439unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) 451unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
440{ 452{
441 unsigned long *segment_ptr, vmaddr, segment; 453 unsigned long vmaddr;
442 struct gmap_pgtable *mp;
443 struct page *page;
444 454
445 current->thread.gmap_addr = address; 455 vmaddr = (unsigned long)
446 segment_ptr = gmap_table_walk(address, gmap); 456 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
447 if (IS_ERR(segment_ptr)) 457 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
448 return PTR_ERR(segment_ptr);
449 /* Convert the gmap address to an mm address. */
450 segment = *segment_ptr;
451 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
452 page = pfn_to_page(segment >> PAGE_SHIFT);
453 mp = (struct gmap_pgtable *) page->index;
454 return mp->vmaddr | (address & ~PMD_MASK);
455 } else if (segment & _SEGMENT_ENTRY_PROTECT) {
456 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
457 return vmaddr | (address & ~PMD_MASK);
458 }
459 return -EFAULT;
460} 458}
461EXPORT_SYMBOL_GPL(__gmap_translate); 459EXPORT_SYMBOL_GPL(__gmap_translate);
462 460
463/** 461/**
464 * gmap_translate - translate a guest address to a user space address 462 * gmap_translate - translate a guest address to a user space address
465 * @address: guest address
466 * @gmap: pointer to guest mapping meta data structure 463 * @gmap: pointer to guest mapping meta data structure
464 * @gaddr: guest address
467 * 465 *
468 * Returns user space address which corresponds to the guest address or 466 * Returns user space address which corresponds to the guest address or
469 * -EFAULT if no such mapping exists. 467 * -EFAULT if no such mapping exists.
470 * This function does not establish potentially missing page table entries. 468 * This function does not establish potentially missing page table entries.
471 */ 469 */
472unsigned long gmap_translate(unsigned long address, struct gmap *gmap) 470unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
473{ 471{
474 unsigned long rc; 472 unsigned long rc;
475 473
476 down_read(&gmap->mm->mmap_sem); 474 down_read(&gmap->mm->mmap_sem);
477 rc = __gmap_translate(address, gmap); 475 rc = __gmap_translate(gmap, gaddr);
478 up_read(&gmap->mm->mmap_sem); 476 up_read(&gmap->mm->mmap_sem);
479 return rc; 477 return rc;
480} 478}
481EXPORT_SYMBOL_GPL(gmap_translate); 479EXPORT_SYMBOL_GPL(gmap_translate);
482 480
483static int gmap_connect_pgtable(unsigned long address, unsigned long segment, 481/**
484 unsigned long *segment_ptr, struct gmap *gmap) 482 * gmap_unlink - disconnect a page table from the gmap shadow tables
483 * @gmap: pointer to guest mapping meta data structure
484 * @table: pointer to the host page table
485 * @vmaddr: vm address associated with the host page table
486 */
487static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
488 unsigned long vmaddr)
489{
490 struct gmap *gmap;
491 int flush;
492
493 list_for_each_entry(gmap, &mm->context.gmap_list, list) {
494 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
495 if (flush)
496 gmap_flush_tlb(gmap);
497 }
498}
499
500/**
501 * gmap_link - set up shadow page tables to connect a host to a guest address
502 * @gmap: pointer to guest mapping meta data structure
503 * @gaddr: guest address
504 * @vmaddr: vm address
505 *
506 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
507 * if the vm address is already mapped to a different guest segment.
508 * The mmap_sem of the mm that belongs to the address space must be held
509 * when this function gets called.
510 */
511int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
485{ 512{
486 unsigned long vmaddr;
487 struct vm_area_struct *vma;
488 struct gmap_pgtable *mp;
489 struct gmap_rmap *rmap;
490 struct mm_struct *mm; 513 struct mm_struct *mm;
491 struct page *page; 514 unsigned long *table;
515 spinlock_t *ptl;
492 pgd_t *pgd; 516 pgd_t *pgd;
493 pud_t *pud; 517 pud_t *pud;
494 pmd_t *pmd; 518 pmd_t *pmd;
519 int rc;
495 520
496 mm = gmap->mm; 521 /* Create higher level tables in the gmap page table */
497 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 522 table = gmap->table;
498 vma = find_vma(mm, vmaddr); 523 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
499 if (!vma || vma->vm_start > vmaddr) 524 table += (gaddr >> 53) & 0x7ff;
500 return -EFAULT; 525 if ((*table & _REGION_ENTRY_INVALID) &&
526 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
527 gaddr & 0xffe0000000000000))
528 return -ENOMEM;
529 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
530 }
531 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
532 table += (gaddr >> 42) & 0x7ff;
533 if ((*table & _REGION_ENTRY_INVALID) &&
534 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
535 gaddr & 0xfffffc0000000000))
536 return -ENOMEM;
537 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
538 }
539 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
540 table += (gaddr >> 31) & 0x7ff;
541 if ((*table & _REGION_ENTRY_INVALID) &&
542 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
543 gaddr & 0xffffffff80000000))
544 return -ENOMEM;
545 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
546 }
547 table += (gaddr >> 20) & 0x7ff;
501 /* Walk the parent mm page table */ 548 /* Walk the parent mm page table */
549 mm = gmap->mm;
502 pgd = pgd_offset(mm, vmaddr); 550 pgd = pgd_offset(mm, vmaddr);
503 pud = pud_alloc(mm, pgd, vmaddr); 551 VM_BUG_ON(pgd_none(*pgd));
504 if (!pud) 552 pud = pud_offset(pgd, vmaddr);
505 return -ENOMEM; 553 VM_BUG_ON(pud_none(*pud));
506 pmd = pmd_alloc(mm, pud, vmaddr); 554 pmd = pmd_offset(pud, vmaddr);
507 if (!pmd) 555 VM_BUG_ON(pmd_none(*pmd));
508 return -ENOMEM;
509 if (!pmd_present(*pmd) &&
510 __pte_alloc(mm, vma, pmd, vmaddr))
511 return -ENOMEM;
512 /* large pmds cannot yet be handled */ 556 /* large pmds cannot yet be handled */
513 if (pmd_large(*pmd)) 557 if (pmd_large(*pmd))
514 return -EFAULT; 558 return -EFAULT;
515 /* pmd now points to a valid segment table entry. */
516 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
517 if (!rmap)
518 return -ENOMEM;
519 /* Link gmap segment table entry location to page table. */ 559 /* Link gmap segment table entry location to page table. */
520 page = pmd_page(*pmd); 560 rc = radix_tree_preload(GFP_KERNEL);
521 mp = (struct gmap_pgtable *) page->index; 561 if (rc)
522 rmap->gmap = gmap; 562 return rc;
523 rmap->entry = segment_ptr; 563 ptl = pmd_lock(mm, pmd);
524 rmap->vmaddr = address & PMD_MASK; 564 spin_lock(&gmap->guest_table_lock);
525 spin_lock(&mm->page_table_lock); 565 if (*table == _SEGMENT_ENTRY_INVALID) {
526 if (*segment_ptr == segment) { 566 rc = radix_tree_insert(&gmap->host_to_guest,
527 list_add(&rmap->list, &mp->mapper); 567 vmaddr >> PMD_SHIFT, table);
528 /* Set gmap segment table entry to page table. */ 568 if (!rc)
529 *segment_ptr = pmd_val(*pmd) & PAGE_MASK; 569 *table = pmd_val(*pmd);
530 rmap = NULL; 570 } else
531 } 571 rc = 0;
532 spin_unlock(&mm->page_table_lock); 572 spin_unlock(&gmap->guest_table_lock);
533 kfree(rmap); 573 spin_unlock(ptl);
534 return 0; 574 radix_tree_preload_end();
535} 575 return rc;
536
537static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
538{
539 struct gmap_rmap *rmap, *next;
540 struct gmap_pgtable *mp;
541 struct page *page;
542 int flush;
543
544 flush = 0;
545 spin_lock(&mm->page_table_lock);
546 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
547 mp = (struct gmap_pgtable *) page->index;
548 list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
549 *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
550 _SEGMENT_ENTRY_PROTECT);
551 list_del(&rmap->list);
552 kfree(rmap);
553 flush = 1;
554 }
555 spin_unlock(&mm->page_table_lock);
556 if (flush)
557 __tlb_flush_global();
558} 576}
559 577
560/* 578/**
561 * this function is assumed to be called with mmap_sem held 579 * gmap_fault - resolve a fault on a guest address
580 * @gmap: pointer to guest mapping meta data structure
581 * @gaddr: guest address
582 * @fault_flags: flags to pass down to handle_mm_fault()
583 *
584 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
585 * if the vm address is already mapped to a different guest segment.
562 */ 586 */
563unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) 587int gmap_fault(struct gmap *gmap, unsigned long gaddr,
588 unsigned int fault_flags)
564{ 589{
565 unsigned long *segment_ptr, segment; 590 unsigned long vmaddr;
566 struct gmap_pgtable *mp;
567 struct page *page;
568 int rc; 591 int rc;
569 592
570 current->thread.gmap_addr = address;
571 segment_ptr = gmap_table_walk(address, gmap);
572 if (IS_ERR(segment_ptr))
573 return -EFAULT;
574 /* Convert the gmap address to an mm address. */
575 while (1) {
576 segment = *segment_ptr;
577 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
578 /* Page table is present */
579 page = pfn_to_page(segment >> PAGE_SHIFT);
580 mp = (struct gmap_pgtable *) page->index;
581 return mp->vmaddr | (address & ~PMD_MASK);
582 }
583 if (!(segment & _SEGMENT_ENTRY_PROTECT))
584 /* Nothing mapped in the gmap address space. */
585 break;
586 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
587 if (rc)
588 return rc;
589 }
590 return -EFAULT;
591}
592
593unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
594{
595 unsigned long rc;
596
597 down_read(&gmap->mm->mmap_sem); 593 down_read(&gmap->mm->mmap_sem);
598 rc = __gmap_fault(address, gmap); 594 vmaddr = __gmap_translate(gmap, gaddr);
595 if (IS_ERR_VALUE(vmaddr)) {
596 rc = vmaddr;
597 goto out_up;
598 }
599 if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
600 rc = -EFAULT;
601 goto out_up;
602 }
603 rc = __gmap_link(gmap, gaddr, vmaddr);
604out_up:
599 up_read(&gmap->mm->mmap_sem); 605 up_read(&gmap->mm->mmap_sem);
600
601 return rc; 606 return rc;
602} 607}
603EXPORT_SYMBOL_GPL(gmap_fault); 608EXPORT_SYMBOL_GPL(gmap_fault);
@@ -617,17 +622,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
617 free_swap_and_cache(entry); 622 free_swap_and_cache(entry);
618} 623}
619 624
620/** 625/*
621 * The mm->mmap_sem lock must be held 626 * this function is assumed to be called with mmap_sem held
622 */ 627 */
623static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) 628void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
624{ 629{
625 unsigned long ptev, pgstev; 630 unsigned long vmaddr, ptev, pgstev;
631 pte_t *ptep, pte;
626 spinlock_t *ptl; 632 spinlock_t *ptl;
627 pgste_t pgste; 633 pgste_t pgste;
628 pte_t *ptep, pte;
629 634
630 ptep = get_locked_pte(mm, address, &ptl); 635 /* Find the vm address for the guest address */
636 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
637 gaddr >> PMD_SHIFT);
638 if (!vmaddr)
639 return;
640 vmaddr |= gaddr & ~PMD_MASK;
641 /* Get pointer to the page table entry */
642 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
631 if (unlikely(!ptep)) 643 if (unlikely(!ptep))
632 return; 644 return;
633 pte = *ptep; 645 pte = *ptep;
@@ -639,87 +651,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
639 ptev = pte_val(pte); 651 ptev = pte_val(pte);
640 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || 652 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
641 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { 653 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
642 gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); 654 gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
643 pte_clear(mm, address, ptep); 655 pte_clear(gmap->mm, vmaddr, ptep);
644 } 656 }
645 pgste_set_unlock(ptep, pgste); 657 pgste_set_unlock(ptep, pgste);
646out_pte: 658out_pte:
647 pte_unmap_unlock(*ptep, ptl); 659 pte_unmap_unlock(*ptep, ptl);
648} 660}
649
650/*
651 * this function is assumed to be called with mmap_sem held
652 */
653void __gmap_zap(unsigned long address, struct gmap *gmap)
654{
655 unsigned long *table, *segment_ptr;
656 unsigned long segment, pgstev, ptev;
657 struct gmap_pgtable *mp;
658 struct page *page;
659
660 segment_ptr = gmap_table_walk(address, gmap);
661 if (IS_ERR(segment_ptr))
662 return;
663 segment = *segment_ptr;
664 if (segment & _SEGMENT_ENTRY_INVALID)
665 return;
666 page = pfn_to_page(segment >> PAGE_SHIFT);
667 mp = (struct gmap_pgtable *) page->index;
668 address = mp->vmaddr | (address & ~PMD_MASK);
669 /* Page table is present */
670 table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
671 table = table + ((address >> 12) & 0xff);
672 pgstev = table[PTRS_PER_PTE];
673 ptev = table[0];
674 /* quick check, checked again with locks held */
675 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
676 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
677 gmap_zap_unused(gmap->mm, address);
678}
679EXPORT_SYMBOL_GPL(__gmap_zap); 661EXPORT_SYMBOL_GPL(__gmap_zap);
680 662
681void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) 663void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
682{ 664{
683 665 unsigned long gaddr, vmaddr, size;
684 unsigned long *table, address, size;
685 struct vm_area_struct *vma; 666 struct vm_area_struct *vma;
686 struct gmap_pgtable *mp;
687 struct page *page;
688 667
689 down_read(&gmap->mm->mmap_sem); 668 down_read(&gmap->mm->mmap_sem);
690 address = from; 669 for (gaddr = from; gaddr < to;
691 while (address < to) { 670 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
692 /* Walk the gmap address space page table */ 671 /* Find the vm address for the guest address */
693 table = gmap->table + ((address >> 53) & 0x7ff); 672 vmaddr = (unsigned long)
694 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 673 radix_tree_lookup(&gmap->guest_to_host,
695 address = (address + PMD_SIZE) & PMD_MASK; 674 gaddr >> PMD_SHIFT);
696 continue; 675 if (!vmaddr)
697 }
698 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
699 table = table + ((address >> 42) & 0x7ff);
700 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
701 address = (address + PMD_SIZE) & PMD_MASK;
702 continue; 676 continue;
703 } 677 vmaddr |= gaddr & ~PMD_MASK;
704 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 678 /* Find vma in the parent mm */
705 table = table + ((address >> 31) & 0x7ff); 679 vma = find_vma(gmap->mm, vmaddr);
706 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 680 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
707 address = (address + PMD_SIZE) & PMD_MASK; 681 zap_page_range(vma, vmaddr, size, NULL);
708 continue;
709 }
710 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
711 table = table + ((address >> 20) & 0x7ff);
712 if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
713 address = (address + PMD_SIZE) & PMD_MASK;
714 continue;
715 }
716 page = pfn_to_page(*table >> PAGE_SHIFT);
717 mp = (struct gmap_pgtable *) page->index;
718 vma = find_vma(gmap->mm, mp->vmaddr);
719 size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
720 zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
721 size, NULL);
722 address = (address + PMD_SIZE) & PMD_MASK;
723 } 682 }
724 up_read(&gmap->mm->mmap_sem); 683 up_read(&gmap->mm->mmap_sem);
725} 684}
@@ -755,7 +714,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
755/** 714/**
756 * gmap_ipte_notify - mark a range of ptes for invalidation notification 715 * gmap_ipte_notify - mark a range of ptes for invalidation notification
757 * @gmap: pointer to guest mapping meta data structure 716 * @gmap: pointer to guest mapping meta data structure
758 * @start: virtual address in the guest address space 717 * @gaddr: virtual address in the guest address space
759 * @len: size of area 718 * @len: size of area
760 * 719 *
761 * Returns 0 if for each page in the given range a gmap mapping exists and 720 * Returns 0 if for each page in the given range a gmap mapping exists and
@@ -763,7 +722,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
763 * for one or more pages -EFAULT is returned. If no memory could be allocated 722 * for one or more pages -EFAULT is returned. If no memory could be allocated
764 * -ENOMEM is returned. This function establishes missing page table entries. 723 * -ENOMEM is returned. This function establishes missing page table entries.
765 */ 724 */
766int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) 725int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
767{ 726{
768 unsigned long addr; 727 unsigned long addr;
769 spinlock_t *ptl; 728 spinlock_t *ptl;
@@ -771,12 +730,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
771 pgste_t pgste; 730 pgste_t pgste;
772 int rc = 0; 731 int rc = 0;
773 732
774 if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) 733 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
775 return -EINVAL; 734 return -EINVAL;
776 down_read(&gmap->mm->mmap_sem); 735 down_read(&gmap->mm->mmap_sem);
777 while (len) { 736 while (len) {
778 /* Convert gmap address and connect the page tables */ 737 /* Convert gmap address and connect the page tables */
779 addr = __gmap_fault(start, gmap); 738 addr = __gmap_translate(gmap, gaddr);
780 if (IS_ERR_VALUE(addr)) { 739 if (IS_ERR_VALUE(addr)) {
781 rc = addr; 740 rc = addr;
782 break; 741 break;
@@ -786,6 +745,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
786 rc = -EFAULT; 745 rc = -EFAULT;
787 break; 746 break;
788 } 747 }
748 rc = __gmap_link(gmap, gaddr, addr);
749 if (rc)
750 break;
789 /* Walk the process page table, lock and get pte pointer */ 751 /* Walk the process page table, lock and get pte pointer */
790 ptep = get_locked_pte(gmap->mm, addr, &ptl); 752 ptep = get_locked_pte(gmap->mm, addr, &ptl);
791 if (unlikely(!ptep)) 753 if (unlikely(!ptep))
@@ -796,7 +758,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
796 pgste = pgste_get_lock(ptep); 758 pgste = pgste_get_lock(ptep);
797 pgste_val(pgste) |= PGSTE_IN_BIT; 759 pgste_val(pgste) |= PGSTE_IN_BIT;
798 pgste_set_unlock(ptep, pgste); 760 pgste_set_unlock(ptep, pgste);
799 start += PAGE_SIZE; 761 gaddr += PAGE_SIZE;
800 len -= PAGE_SIZE; 762 len -= PAGE_SIZE;
801 } 763 }
802 spin_unlock(ptl); 764 spin_unlock(ptl);
@@ -809,28 +771,30 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify);
809/** 771/**
810 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. 772 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
811 * @mm: pointer to the process mm_struct 773 * @mm: pointer to the process mm_struct
774 * @addr: virtual address in the process address space
812 * @pte: pointer to the page table entry 775 * @pte: pointer to the page table entry
813 * 776 *
814 * This function is assumed to be called with the page table lock held 777 * This function is assumed to be called with the page table lock held
815 * for the pte to notify. 778 * for the pte to notify.
816 */ 779 */
817void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte) 780void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
818{ 781{
819 unsigned long segment_offset; 782 unsigned long offset, gaddr;
783 unsigned long *table;
820 struct gmap_notifier *nb; 784 struct gmap_notifier *nb;
821 struct gmap_pgtable *mp; 785 struct gmap *gmap;
822 struct gmap_rmap *rmap;
823 struct page *page;
824 786
825 segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 787 offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
826 segment_offset = segment_offset * (4096 / sizeof(pte_t)); 788 offset = offset * (4096 / sizeof(pte_t));
827 page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
828 mp = (struct gmap_pgtable *) page->index;
829 spin_lock(&gmap_notifier_lock); 789 spin_lock(&gmap_notifier_lock);
830 list_for_each_entry(rmap, &mp->mapper, list) { 790 list_for_each_entry(gmap, &mm->context.gmap_list, list) {
791 table = radix_tree_lookup(&gmap->host_to_guest,
792 vmaddr >> PMD_SHIFT);
793 if (!table)
794 continue;
795 gaddr = __gmap_segment_gaddr(table) + offset;
831 list_for_each_entry(nb, &gmap_notifier_list, list) 796 list_for_each_entry(nb, &gmap_notifier_list, list)
832 nb->notifier_call(rmap->gmap, 797 nb->notifier_call(gmap, gaddr);
833 rmap->vmaddr + segment_offset);
834 } 798 }
835 spin_unlock(&gmap_notifier_lock); 799 spin_unlock(&gmap_notifier_lock);
836} 800}
@@ -841,29 +805,18 @@ static inline int page_table_with_pgste(struct page *page)
841 return atomic_read(&page->_mapcount) == 0; 805 return atomic_read(&page->_mapcount) == 0;
842} 806}
843 807
844static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 808static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
845 unsigned long vmaddr)
846{ 809{
847 struct page *page; 810 struct page *page;
848 unsigned long *table; 811 unsigned long *table;
849 struct gmap_pgtable *mp;
850 812
851 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 813 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
852 if (!page) 814 if (!page)
853 return NULL; 815 return NULL;
854 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
855 if (!mp) {
856 __free_page(page);
857 return NULL;
858 }
859 if (!pgtable_page_ctor(page)) { 816 if (!pgtable_page_ctor(page)) {
860 kfree(mp);
861 __free_page(page); 817 __free_page(page);
862 return NULL; 818 return NULL;
863 } 819 }
864 mp->vmaddr = vmaddr & PMD_MASK;
865 INIT_LIST_HEAD(&mp->mapper);
866 page->index = (unsigned long) mp;
867 atomic_set(&page->_mapcount, 0); 820 atomic_set(&page->_mapcount, 0);
868 table = (unsigned long *) page_to_phys(page); 821 table = (unsigned long *) page_to_phys(page);
869 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 822 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
@@ -874,14 +827,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
874static inline void page_table_free_pgste(unsigned long *table) 827static inline void page_table_free_pgste(unsigned long *table)
875{ 828{
876 struct page *page; 829 struct page *page;
877 struct gmap_pgtable *mp;
878 830
879 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 831 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
880 mp = (struct gmap_pgtable *) page->index;
881 BUG_ON(!list_empty(&mp->mapper));
882 pgtable_page_dtor(page); 832 pgtable_page_dtor(page);
883 atomic_set(&page->_mapcount, -1); 833 atomic_set(&page->_mapcount, -1);
884 kfree(mp);
885 __free_page(page); 834 __free_page(page);
886} 835}
887 836
@@ -986,11 +935,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
986 pte_t *ptep; 935 pte_t *ptep;
987 936
988 down_read(&mm->mmap_sem); 937 down_read(&mm->mmap_sem);
938retry:
989 ptep = get_locked_pte(current->mm, addr, &ptl); 939 ptep = get_locked_pte(current->mm, addr, &ptl);
990 if (unlikely(!ptep)) { 940 if (unlikely(!ptep)) {
991 up_read(&mm->mmap_sem); 941 up_read(&mm->mmap_sem);
992 return -EFAULT; 942 return -EFAULT;
993 } 943 }
944 if (!(pte_val(*ptep) & _PAGE_INVALID) &&
945 (pte_val(*ptep) & _PAGE_PROTECT)) {
946 pte_unmap_unlock(*ptep, ptl);
947 if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
948 up_read(&mm->mmap_sem);
949 return -EFAULT;
950 }
951 goto retry;
952 }
994 953
995 new = old = pgste_get_lock(ptep); 954 new = old = pgste_get_lock(ptep);
996 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 955 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
@@ -1028,8 +987,7 @@ static inline int page_table_with_pgste(struct page *page)
1028 return 0; 987 return 0;
1029} 988}
1030 989
1031static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 990static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
1032 unsigned long vmaddr)
1033{ 991{
1034 return NULL; 992 return NULL;
1035} 993}
@@ -1043,8 +1001,8 @@ static inline void page_table_free_pgste(unsigned long *table)
1043{ 1001{
1044} 1002}
1045 1003
1046static inline void gmap_disconnect_pgtable(struct mm_struct *mm, 1004static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
1047 unsigned long *table) 1005 unsigned long vmaddr)
1048{ 1006{
1049} 1007}
1050 1008
@@ -1064,14 +1022,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
1064/* 1022/*
1065 * page table entry allocation/free routines. 1023 * page table entry allocation/free routines.
1066 */ 1024 */
1067unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 1025unsigned long *page_table_alloc(struct mm_struct *mm)
1068{ 1026{
1069 unsigned long *uninitialized_var(table); 1027 unsigned long *uninitialized_var(table);
1070 struct page *uninitialized_var(page); 1028 struct page *uninitialized_var(page);
1071 unsigned int mask, bit; 1029 unsigned int mask, bit;
1072 1030
1073 if (mm_has_pgste(mm)) 1031 if (mm_has_pgste(mm))
1074 return page_table_alloc_pgste(mm, vmaddr); 1032 return page_table_alloc_pgste(mm);
1075 /* Allocate fragments of a 4K page as 1K/2K page table */ 1033 /* Allocate fragments of a 4K page as 1K/2K page table */
1076 spin_lock_bh(&mm->context.list_lock); 1034 spin_lock_bh(&mm->context.list_lock);
1077 mask = FRAG_MASK; 1035 mask = FRAG_MASK;
@@ -1113,10 +1071,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
1113 unsigned int bit, mask; 1071 unsigned int bit, mask;
1114 1072
1115 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1073 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1116 if (page_table_with_pgste(page)) { 1074 if (page_table_with_pgste(page))
1117 gmap_disconnect_pgtable(mm, table);
1118 return page_table_free_pgste(table); 1075 return page_table_free_pgste(table);
1119 }
1120 /* Free 1K/2K page table fragment of a 4K page */ 1076 /* Free 1K/2K page table fragment of a 4K page */
1121 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 1077 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
1122 spin_lock_bh(&mm->context.list_lock); 1078 spin_lock_bh(&mm->context.list_lock);
@@ -1148,7 +1104,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)
1148 } 1104 }
1149} 1105}
1150 1106
1151void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) 1107void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
1108 unsigned long vmaddr)
1152{ 1109{
1153 struct mm_struct *mm; 1110 struct mm_struct *mm;
1154 struct page *page; 1111 struct page *page;
@@ -1157,7 +1114,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
1157 mm = tlb->mm; 1114 mm = tlb->mm;
1158 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1115 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1159 if (page_table_with_pgste(page)) { 1116 if (page_table_with_pgste(page)) {
1160 gmap_disconnect_pgtable(mm, table); 1117 gmap_unlink(mm, table, vmaddr);
1161 table = (unsigned long *) (__pa(table) | FRAG_MASK); 1118 table = (unsigned long *) (__pa(table) | FRAG_MASK);
1162 tlb_remove_table(tlb, table); 1119 tlb_remove_table(tlb, table);
1163 return; 1120 return;
@@ -1293,7 +1250,7 @@ again:
1293 if (page_table_with_pgste(page)) 1250 if (page_table_with_pgste(page))
1294 continue; 1251 continue;
1295 /* Allocate new page table with pgstes */ 1252 /* Allocate new page table with pgstes */
1296 new = page_table_alloc_pgste(mm, addr); 1253 new = page_table_alloc_pgste(mm);
1297 if (!new) 1254 if (!new)
1298 return -ENOMEM; 1255 return -ENOMEM;
1299 1256
@@ -1308,7 +1265,7 @@ again:
1308 /* Establish new table */ 1265 /* Establish new table */
1309 pmd_populate(mm, pmd, (pte_t *) new); 1266 pmd_populate(mm, pmd, (pte_t *) new);
1310 /* Free old table with rcu, there might be a walker! */ 1267 /* Free old table with rcu, there might be a walker! */
1311 page_table_free_rcu(tlb, table); 1268 page_table_free_rcu(tlb, table, addr);
1312 new = NULL; 1269 new = NULL;
1313 } 1270 }
1314 spin_unlock(ptl); 1271 spin_unlock(ptl);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index fe9012a49aa5..fdbd7888cb07 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -65,7 +65,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
65 pte_t *pte; 65 pte_t *pte;
66 66
67 if (slab_is_available()) 67 if (slab_is_available())
68 pte = (pte_t *) page_table_alloc(&init_mm, address); 68 pte = (pte_t *) page_table_alloc(&init_mm);
69 else 69 else
70 pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), 70 pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
71 PTRS_PER_PTE * sizeof(pte_t)); 71 PTRS_PER_PTE * sizeof(pte_t));
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ac0f90e26a0b..028df8dc538e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
99 99
100#define ASYNC_PF_PER_VCPU 64 100#define ASYNC_PF_PER_VCPU 64
101 101
102struct kvm_vcpu;
103struct kvm;
104struct kvm_async_pf;
105
106enum kvm_reg { 102enum kvm_reg {
107 VCPU_REGS_RAX = 0, 103 VCPU_REGS_RAX = 0,
108 VCPU_REGS_RCX = 1, 104 VCPU_REGS_RCX = 1,
@@ -266,7 +262,8 @@ struct kvm_mmu {
266 struct x86_exception *fault); 262 struct x86_exception *fault);
267 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, 263 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
268 struct x86_exception *exception); 264 struct x86_exception *exception);
269 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); 265 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
266 struct x86_exception *exception);
270 int (*sync_page)(struct kvm_vcpu *vcpu, 267 int (*sync_page)(struct kvm_vcpu *vcpu,
271 struct kvm_mmu_page *sp); 268 struct kvm_mmu_page *sp);
272 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); 269 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
@@ -481,6 +478,7 @@ struct kvm_vcpu_arch {
481 u64 mmio_gva; 478 u64 mmio_gva;
482 unsigned access; 479 unsigned access;
483 gfn_t mmio_gfn; 480 gfn_t mmio_gfn;
481 u64 mmio_gen;
484 482
485 struct kvm_pmu pmu; 483 struct kvm_pmu pmu;
486 484
@@ -580,7 +578,6 @@ struct kvm_arch {
580 578
581 gpa_t wall_clock; 579 gpa_t wall_clock;
582 580
583 struct page *ept_identity_pagetable;
584 bool ept_identity_pagetable_done; 581 bool ept_identity_pagetable_done;
585 gpa_t ept_identity_map_addr; 582 gpa_t ept_identity_map_addr;
586 583
@@ -665,8 +662,8 @@ struct msr_data {
665struct kvm_x86_ops { 662struct kvm_x86_ops {
666 int (*cpu_has_kvm_support)(void); /* __init */ 663 int (*cpu_has_kvm_support)(void); /* __init */
667 int (*disabled_by_bios)(void); /* __init */ 664 int (*disabled_by_bios)(void); /* __init */
668 int (*hardware_enable)(void *dummy); 665 int (*hardware_enable)(void);
669 void (*hardware_disable)(void *dummy); 666 void (*hardware_disable)(void);
670 void (*check_processor_compatibility)(void *rtn); 667 void (*check_processor_compatibility)(void *rtn);
671 int (*hardware_setup)(void); /* __init */ 668 int (*hardware_setup)(void); /* __init */
672 void (*hardware_unsetup)(void); /* __exit */ 669 void (*hardware_unsetup)(void); /* __exit */
@@ -896,7 +893,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
896int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, 893int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
897 gfn_t gfn, void *data, int offset, int len, 894 gfn_t gfn, void *data, int offset, int len,
898 u32 access); 895 u32 access);
899void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
900bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); 896bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
901 897
902static inline int __kvm_irq_line_state(unsigned long *irq_state, 898static inline int __kvm_irq_line_state(unsigned long *irq_state,
@@ -927,7 +923,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
927int kvm_mmu_load(struct kvm_vcpu *vcpu); 923int kvm_mmu_load(struct kvm_vcpu *vcpu);
928void kvm_mmu_unload(struct kvm_vcpu *vcpu); 924void kvm_mmu_unload(struct kvm_vcpu *vcpu);
929void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 925void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
930gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); 926gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
927 struct x86_exception *exception);
931gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, 928gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
932 struct x86_exception *exception); 929 struct x86_exception *exception);
933gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, 930gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
@@ -947,7 +944,8 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
947void kvm_enable_tdp(void); 944void kvm_enable_tdp(void);
948void kvm_disable_tdp(void); 945void kvm_disable_tdp(void);
949 946
950static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 947static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
948 struct x86_exception *exception)
951{ 949{
952 return gpa; 950 return gpa;
953} 951}
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a5380590ab0e..43b33e301e68 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -88,6 +88,14 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
88 return best && (best->ecx & bit(X86_FEATURE_X2APIC)); 88 return best && (best->ecx & bit(X86_FEATURE_X2APIC));
89} 89}
90 90
91static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
92{
93 struct kvm_cpuid_entry2 *best;
94
95 best = kvm_find_cpuid_entry(vcpu, 0, 0);
96 return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
97}
98
91static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) 99static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
92{ 100{
93 struct kvm_cpuid_entry2 *best; 101 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e5bf13003cd2..20d91873d831 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3139,12 +3139,8 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
3139 3139
3140static int em_vmcall(struct x86_emulate_ctxt *ctxt) 3140static int em_vmcall(struct x86_emulate_ctxt *ctxt)
3141{ 3141{
3142 int rc; 3142 int rc = ctxt->ops->fix_hypercall(ctxt);
3143
3144 if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1)
3145 return X86EMUL_UNHANDLEABLE;
3146 3143
3147 rc = ctxt->ops->fix_hypercall(ctxt);
3148 if (rc != X86EMUL_CONTINUE) 3144 if (rc != X86EMUL_CONTINUE)
3149 return rc; 3145 return rc;
3150 3146
@@ -3562,6 +3558,12 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3562 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ 3558 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
3563 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) 3559 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3564 3560
3561static const struct opcode group7_rm0[] = {
3562 N,
3563 I(SrcNone | Priv | EmulateOnUD, em_vmcall),
3564 N, N, N, N, N, N,
3565};
3566
3565static const struct opcode group7_rm1[] = { 3567static const struct opcode group7_rm1[] = {
3566 DI(SrcNone | Priv, monitor), 3568 DI(SrcNone | Priv, monitor),
3567 DI(SrcNone | Priv, mwait), 3569 DI(SrcNone | Priv, mwait),
@@ -3655,7 +3657,7 @@ static const struct group_dual group7 = { {
3655 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), 3657 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
3656 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), 3658 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
3657}, { 3659}, {
3658 I(SrcNone | Priv | EmulateOnUD, em_vmcall), 3660 EXT(0, group7_rm0),
3659 EXT(0, group7_rm1), 3661 EXT(0, group7_rm1),
3660 N, EXT(0, group7_rm3), 3662 N, EXT(0, group7_rm3),
3661 II(SrcNone | DstMem | Mov, em_smsw, smsw), N, 3663 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
@@ -3686,14 +3688,18 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
3686 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), 3688 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3687}; 3689};
3688 3690
3689static const struct gprefix pfx_vmovntpx = { 3691static const struct gprefix pfx_0f_2b = {
3690 I(0, em_mov), N, N, N, 3692 I(0, em_mov), I(0, em_mov), N, N,
3691}; 3693};
3692 3694
3693static const struct gprefix pfx_0f_28_0f_29 = { 3695static const struct gprefix pfx_0f_28_0f_29 = {
3694 I(Aligned, em_mov), I(Aligned, em_mov), N, N, 3696 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
3695}; 3697};
3696 3698
3699static const struct gprefix pfx_0f_e7 = {
3700 N, I(Sse, em_mov), N, N,
3701};
3702
3697static const struct escape escape_d9 = { { 3703static const struct escape escape_d9 = { {
3698 N, N, N, N, N, N, N, I(DstMem, em_fnstcw), 3704 N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
3699}, { 3705}, {
@@ -3900,7 +3906,7 @@ static const struct opcode twobyte_table[256] = {
3900 N, N, N, N, 3906 N, N, N, N,
3901 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), 3907 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
3902 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), 3908 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
3903 N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), 3909 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
3904 N, N, N, N, 3910 N, N, N, N,
3905 /* 0x30 - 0x3F */ 3911 /* 0x30 - 0x3F */
3906 II(ImplicitOps | Priv, em_wrmsr, wrmsr), 3912 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
@@ -3964,7 +3970,8 @@ static const struct opcode twobyte_table[256] = {
3964 /* 0xD0 - 0xDF */ 3970 /* 0xD0 - 0xDF */
3965 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3971 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
3966 /* 0xE0 - 0xEF */ 3972 /* 0xE0 - 0xEF */
3967 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3973 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
3974 N, N, N, N, N, N, N, N,
3968 /* 0xF0 - 0xFF */ 3975 /* 0xF0 - 0xFF */
3969 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N 3976 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
3970}; 3977};
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fb919c574e23..b8345dd41b25 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -709,6 +709,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
709 int result = 0; 709 int result = 0;
710 struct kvm_vcpu *vcpu = apic->vcpu; 710 struct kvm_vcpu *vcpu = apic->vcpu;
711 711
712 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
713 trig_mode, vector);
712 switch (delivery_mode) { 714 switch (delivery_mode) {
713 case APIC_DM_LOWEST: 715 case APIC_DM_LOWEST:
714 vcpu->arch.apic_arb_prio++; 716 vcpu->arch.apic_arb_prio++;
@@ -730,8 +732,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
730 kvm_make_request(KVM_REQ_EVENT, vcpu); 732 kvm_make_request(KVM_REQ_EVENT, vcpu);
731 kvm_vcpu_kick(vcpu); 733 kvm_vcpu_kick(vcpu);
732 } 734 }
733 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
734 trig_mode, vector, false);
735 break; 735 break;
736 736
737 case APIC_DM_REMRD: 737 case APIC_DM_REMRD:
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 931467881da7..76398fe15df2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
199EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); 199EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
200 200
201/* 201/*
202 * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, 202 * the low bit of the generation number is always presumed to be zero.
203 * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation 203 * This disables mmio caching during memslot updates. The concept is
204 * number. 204 * similar to a seqcount but instead of retrying the access we just punt
205 * and ignore the cache.
206 *
207 * spte bits 3-11 are used as bits 1-9 of the generation number,
208 * the bits 52-61 are used as bits 10-19 of the generation number.
205 */ 209 */
206#define MMIO_SPTE_GEN_LOW_SHIFT 3 210#define MMIO_SPTE_GEN_LOW_SHIFT 2
207#define MMIO_SPTE_GEN_HIGH_SHIFT 52 211#define MMIO_SPTE_GEN_HIGH_SHIFT 52
208 212
209#define MMIO_GEN_SHIFT 19 213#define MMIO_GEN_SHIFT 20
210#define MMIO_GEN_LOW_SHIFT 9 214#define MMIO_GEN_LOW_SHIFT 10
211#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) 215#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2)
212#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) 216#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
213#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) 217#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
214 218
@@ -236,12 +240,7 @@ static unsigned int get_mmio_spte_generation(u64 spte)
236 240
237static unsigned int kvm_current_mmio_generation(struct kvm *kvm) 241static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
238{ 242{
239 /* 243 return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
240 * Init kvm generation close to MMIO_MAX_GEN to easily test the
241 * code of handling generation number wrap-around.
242 */
243 return (kvm_memslots(kvm)->generation +
244 MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
245} 244}
246 245
247static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, 246static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
@@ -296,11 +295,6 @@ static bool check_mmio_spte(struct kvm *kvm, u64 spte)
296 return likely(kvm_gen == spte_gen); 295 return likely(kvm_gen == spte_gen);
297} 296}
298 297
299static inline u64 rsvd_bits(int s, int e)
300{
301 return ((1ULL << (e - s + 1)) - 1) << s;
302}
303
304void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 298void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
305 u64 dirty_mask, u64 nx_mask, u64 x_mask) 299 u64 dirty_mask, u64 nx_mask, u64 x_mask)
306{ 300{
@@ -3163,7 +3157,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
3163 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 3157 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
3164 return; 3158 return;
3165 3159
3166 vcpu_clear_mmio_info(vcpu, ~0ul); 3160 vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
3167 kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); 3161 kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
3168 if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { 3162 if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
3169 hpa_t root = vcpu->arch.mmu.root_hpa; 3163 hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -3206,7 +3200,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
3206{ 3200{
3207 if (exception) 3201 if (exception)
3208 exception->error_code = 0; 3202 exception->error_code = 0;
3209 return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); 3203 return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
3210} 3204}
3211 3205
3212static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) 3206static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
@@ -3518,6 +3512,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3518 int maxphyaddr = cpuid_maxphyaddr(vcpu); 3512 int maxphyaddr = cpuid_maxphyaddr(vcpu);
3519 u64 exb_bit_rsvd = 0; 3513 u64 exb_bit_rsvd = 0;
3520 u64 gbpages_bit_rsvd = 0; 3514 u64 gbpages_bit_rsvd = 0;
3515 u64 nonleaf_bit8_rsvd = 0;
3521 3516
3522 context->bad_mt_xwr = 0; 3517 context->bad_mt_xwr = 0;
3523 3518
@@ -3525,6 +3520,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3525 exb_bit_rsvd = rsvd_bits(63, 63); 3520 exb_bit_rsvd = rsvd_bits(63, 63);
3526 if (!guest_cpuid_has_gbpages(vcpu)) 3521 if (!guest_cpuid_has_gbpages(vcpu))
3527 gbpages_bit_rsvd = rsvd_bits(7, 7); 3522 gbpages_bit_rsvd = rsvd_bits(7, 7);
3523
3524 /*
3525 * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
3526 * leaf entries) on AMD CPUs only.
3527 */
3528 if (guest_cpuid_is_amd(vcpu))
3529 nonleaf_bit8_rsvd = rsvd_bits(8, 8);
3530
3528 switch (context->root_level) { 3531 switch (context->root_level) {
3529 case PT32_ROOT_LEVEL: 3532 case PT32_ROOT_LEVEL:
3530 /* no rsvd bits for 2 level 4K page table entries */ 3533 /* no rsvd bits for 2 level 4K page table entries */
@@ -3559,9 +3562,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3559 break; 3562 break;
3560 case PT64_ROOT_LEVEL: 3563 case PT64_ROOT_LEVEL:
3561 context->rsvd_bits_mask[0][3] = exb_bit_rsvd | 3564 context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
3562 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7); 3565 nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
3563 context->rsvd_bits_mask[0][2] = exb_bit_rsvd | 3566 context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
3564 gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); 3567 nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
3565 context->rsvd_bits_mask[0][1] = exb_bit_rsvd | 3568 context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
3566 rsvd_bits(maxphyaddr, 51); 3569 rsvd_bits(maxphyaddr, 51);
3567 context->rsvd_bits_mask[0][0] = exb_bit_rsvd | 3570 context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
@@ -4433,7 +4436,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
4433 * The very rare case: if the generation-number is round, 4436 * The very rare case: if the generation-number is round,
4434 * zap all shadow pages. 4437 * zap all shadow pages.
4435 */ 4438 */
4436 if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { 4439 if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
4437 printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); 4440 printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
4438 kvm_mmu_invalidate_zap_all_pages(kvm); 4441 kvm_mmu_invalidate_zap_all_pages(kvm);
4439 } 4442 }
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b982112d2ca5..bde8ee725754 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -56,6 +56,11 @@
56#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) 56#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
57#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) 57#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
58 58
59static inline u64 rsvd_bits(int s, int e)
60{
61 return ((1ULL << (e - s + 1)) - 1) << s;
62}
63
59int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); 64int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
60void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); 65void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
61 66
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 410776528265..0ab6c65a2821 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -321,9 +321,22 @@ retry_walk:
321 walker->pte_gpa[walker->level - 1] = pte_gpa; 321 walker->pte_gpa[walker->level - 1] = pte_gpa;
322 322
323 real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), 323 real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
324 PFERR_USER_MASK|PFERR_WRITE_MASK); 324 PFERR_USER_MASK|PFERR_WRITE_MASK,
325 &walker->fault);
326
327 /*
328 * FIXME: This can happen if emulation (for of an INS/OUTS
329 * instruction) triggers a nested page fault. The exit
330 * qualification / exit info field will incorrectly have
331 * "guest page access" as the nested page fault's cause,
332 * instead of "guest page structure access". To fix this,
333 * the x86_exception struct should be augmented with enough
334 * information to fix the exit_qualification or exit_info_1
335 * fields.
336 */
325 if (unlikely(real_gfn == UNMAPPED_GVA)) 337 if (unlikely(real_gfn == UNMAPPED_GVA))
326 goto error; 338 return 0;
339
327 real_gfn = gpa_to_gfn(real_gfn); 340 real_gfn = gpa_to_gfn(real_gfn);
328 341
329 host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, 342 host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
@@ -364,7 +377,7 @@ retry_walk:
364 if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) 377 if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
365 gfn += pse36_gfn_delta(pte); 378 gfn += pse36_gfn_delta(pte);
366 379
367 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); 380 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
368 if (real_gpa == UNMAPPED_GVA) 381 if (real_gpa == UNMAPPED_GVA)
369 return 0; 382 return 0;
370 383
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1703aab84a6d..f7f6a4a157a6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -622,7 +622,7 @@ static int has_svm(void)
622 return 1; 622 return 1;
623} 623}
624 624
625static void svm_hardware_disable(void *garbage) 625static void svm_hardware_disable(void)
626{ 626{
627 /* Make sure we clean up behind us */ 627 /* Make sure we clean up behind us */
628 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) 628 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage)
633 amd_pmu_disable_virt(); 633 amd_pmu_disable_virt();
634} 634}
635 635
636static int svm_hardware_enable(void *garbage) 636static int svm_hardware_enable(void)
637{ 637{
638 638
639 struct svm_cpu_data *sd; 639 struct svm_cpu_data *sd;
@@ -1257,7 +1257,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1257 svm->asid_generation = 0; 1257 svm->asid_generation = 0;
1258 init_vmcb(svm); 1258 init_vmcb(svm);
1259 1259
1260 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 1260 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
1261 MSR_IA32_APICBASE_ENABLE;
1261 if (kvm_vcpu_is_bsp(&svm->vcpu)) 1262 if (kvm_vcpu_is_bsp(&svm->vcpu))
1262 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 1263 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1263 1264
@@ -1974,10 +1975,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1974{ 1975{
1975 struct vcpu_svm *svm = to_svm(vcpu); 1976 struct vcpu_svm *svm = to_svm(vcpu);
1976 1977
1977 svm->vmcb->control.exit_code = SVM_EXIT_NPF; 1978 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
1978 svm->vmcb->control.exit_code_hi = 0; 1979 /*
1979 svm->vmcb->control.exit_info_1 = fault->error_code; 1980 * TODO: track the cause of the nested page fault, and
1980 svm->vmcb->control.exit_info_2 = fault->address; 1981 * correctly fill in the high bits of exit_info_1.
1982 */
1983 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
1984 svm->vmcb->control.exit_code_hi = 0;
1985 svm->vmcb->control.exit_info_1 = (1ULL << 32);
1986 svm->vmcb->control.exit_info_2 = fault->address;
1987 }
1988
1989 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
1990 svm->vmcb->control.exit_info_1 |= fault->error_code;
1991
1992 /*
1993 * The present bit is always zero for page structure faults on real
1994 * hardware.
1995 */
1996 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
1997 svm->vmcb->control.exit_info_1 &= ~1;
1981 1998
1982 nested_svm_vmexit(svm); 1999 nested_svm_vmexit(svm);
1983} 2000}
@@ -3031,7 +3048,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
3031 return 0; 3048 return 0;
3032} 3049}
3033 3050
3034u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) 3051static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
3035{ 3052{
3036 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); 3053 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
3037 return vmcb->control.tsc_offset + 3054 return vmcb->control.tsc_offset +
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 1742dfbd26b3..6b06ab8748dd 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -415,15 +415,14 @@ TRACE_EVENT(kvm_apic_ipi,
415); 415);
416 416
417TRACE_EVENT(kvm_apic_accept_irq, 417TRACE_EVENT(kvm_apic_accept_irq,
418 TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced), 418 TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
419 TP_ARGS(apicid, dm, tm, vec, coalesced), 419 TP_ARGS(apicid, dm, tm, vec),
420 420
421 TP_STRUCT__entry( 421 TP_STRUCT__entry(
422 __field( __u32, apicid ) 422 __field( __u32, apicid )
423 __field( __u16, dm ) 423 __field( __u16, dm )
424 __field( __u8, tm ) 424 __field( __u8, tm )
425 __field( __u8, vec ) 425 __field( __u8, vec )
426 __field( bool, coalesced )
427 ), 426 ),
428 427
429 TP_fast_assign( 428 TP_fast_assign(
@@ -431,14 +430,12 @@ TRACE_EVENT(kvm_apic_accept_irq,
431 __entry->dm = dm; 430 __entry->dm = dm;
432 __entry->tm = tm; 431 __entry->tm = tm;
433 __entry->vec = vec; 432 __entry->vec = vec;
434 __entry->coalesced = coalesced;
435 ), 433 ),
436 434
437 TP_printk("apicid %x vec %u (%s|%s)%s", 435 TP_printk("apicid %x vec %u (%s|%s)",
438 __entry->apicid, __entry->vec, 436 __entry->apicid, __entry->vec,
439 __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), 437 __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
440 __entry->tm ? "level" : "edge", 438 __entry->tm ? "level" : "edge")
441 __entry->coalesced ? " (coalesced)" : "")
442); 439);
443 440
444TRACE_EVENT(kvm_eoi, 441TRACE_EVENT(kvm_eoi,
@@ -848,6 +845,8 @@ TRACE_EVENT(kvm_track_tsc,
848 __print_symbolic(__entry->host_clock, host_clocks)) 845 __print_symbolic(__entry->host_clock, host_clocks))
849); 846);
850 847
848#endif /* CONFIG_X86_64 */
849
851TRACE_EVENT(kvm_ple_window, 850TRACE_EVENT(kvm_ple_window,
852 TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), 851 TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
853 TP_ARGS(grow, vcpu_id, new, old), 852 TP_ARGS(grow, vcpu_id, new, old),
@@ -878,8 +877,6 @@ TRACE_EVENT(kvm_ple_window,
878#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \ 877#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
879 trace_kvm_ple_window(false, vcpu_id, new, old) 878 trace_kvm_ple_window(false, vcpu_id, new, old)
880 879
881#endif /* CONFIG_X86_64 */
882
883#endif /* _TRACE_KVM_H */ 880#endif /* _TRACE_KVM_H */
884 881
885#undef TRACE_INCLUDE_PATH 882#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 661abc2f7049..6ffd643d1a64 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -397,6 +397,7 @@ struct nested_vmx {
397 * we must keep them pinned while L2 runs. 397 * we must keep them pinned while L2 runs.
398 */ 398 */
399 struct page *apic_access_page; 399 struct page *apic_access_page;
400 struct page *virtual_apic_page;
400 u64 msr_ia32_feature_control; 401 u64 msr_ia32_feature_control;
401 402
402 struct hrtimer preemption_timer; 403 struct hrtimer preemption_timer;
@@ -555,6 +556,7 @@ static int max_shadow_read_only_fields =
555 ARRAY_SIZE(shadow_read_only_fields); 556 ARRAY_SIZE(shadow_read_only_fields);
556 557
557static unsigned long shadow_read_write_fields[] = { 558static unsigned long shadow_read_write_fields[] = {
559 TPR_THRESHOLD,
558 GUEST_RIP, 560 GUEST_RIP,
559 GUEST_RSP, 561 GUEST_RSP,
560 GUEST_CR0, 562 GUEST_CR0,
@@ -765,6 +767,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
765static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); 767static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
766static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); 768static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
767static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); 769static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
770static int alloc_identity_pagetable(struct kvm *kvm);
768 771
769static DEFINE_PER_CPU(struct vmcs *, vmxarea); 772static DEFINE_PER_CPU(struct vmcs *, vmxarea);
770static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 773static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2157,7 +2160,7 @@ static u64 guest_read_tsc(void)
2157 * Like guest_read_tsc, but always returns L1's notion of the timestamp 2160 * Like guest_read_tsc, but always returns L1's notion of the timestamp
2158 * counter, even if a nested guest (L2) is currently running. 2161 * counter, even if a nested guest (L2) is currently running.
2159 */ 2162 */
2160u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) 2163static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
2161{ 2164{
2162 u64 tsc_offset; 2165 u64 tsc_offset;
2163 2166
@@ -2352,7 +2355,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2352 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | 2355 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
2353 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | 2356 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
2354 CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | 2357 CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
2355 CPU_BASED_PAUSE_EXITING | 2358 CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
2356 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 2359 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2357 /* 2360 /*
2358 * We can allow some features even when not supported by the 2361 * We can allow some features even when not supported by the
@@ -2726,7 +2729,7 @@ static void kvm_cpu_vmxon(u64 addr)
2726 : "memory", "cc"); 2729 : "memory", "cc");
2727} 2730}
2728 2731
2729static int hardware_enable(void *garbage) 2732static int hardware_enable(void)
2730{ 2733{
2731 int cpu = raw_smp_processor_id(); 2734 int cpu = raw_smp_processor_id();
2732 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 2735 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2790,7 +2793,7 @@ static void kvm_cpu_vmxoff(void)
2790 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 2793 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
2791} 2794}
2792 2795
2793static void hardware_disable(void *garbage) 2796static void hardware_disable(void)
2794{ 2797{
2795 if (vmm_exclusive) { 2798 if (vmm_exclusive) {
2796 vmclear_local_loaded_vmcss(); 2799 vmclear_local_loaded_vmcss();
@@ -3960,21 +3963,25 @@ out:
3960 3963
3961static int init_rmode_identity_map(struct kvm *kvm) 3964static int init_rmode_identity_map(struct kvm *kvm)
3962{ 3965{
3963 int i, idx, r, ret; 3966 int i, idx, r = 0;
3964 pfn_t identity_map_pfn; 3967 pfn_t identity_map_pfn;
3965 u32 tmp; 3968 u32 tmp;
3966 3969
3967 if (!enable_ept) 3970 if (!enable_ept)
3968 return 1;
3969 if (unlikely(!kvm->arch.ept_identity_pagetable)) {
3970 printk(KERN_ERR "EPT: identity-mapping pagetable "
3971 "haven't been allocated!\n");
3972 return 0; 3971 return 0;
3973 } 3972
3973 /* Protect kvm->arch.ept_identity_pagetable_done. */
3974 mutex_lock(&kvm->slots_lock);
3975
3974 if (likely(kvm->arch.ept_identity_pagetable_done)) 3976 if (likely(kvm->arch.ept_identity_pagetable_done))
3975 return 1; 3977 goto out2;
3976 ret = 0; 3978
3977 identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; 3979 identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
3980
3981 r = alloc_identity_pagetable(kvm);
3982 if (r < 0)
3983 goto out2;
3984
3978 idx = srcu_read_lock(&kvm->srcu); 3985 idx = srcu_read_lock(&kvm->srcu);
3979 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); 3986 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
3980 if (r < 0) 3987 if (r < 0)
@@ -3989,10 +3996,13 @@ static int init_rmode_identity_map(struct kvm *kvm)
3989 goto out; 3996 goto out;
3990 } 3997 }
3991 kvm->arch.ept_identity_pagetable_done = true; 3998 kvm->arch.ept_identity_pagetable_done = true;
3992 ret = 1; 3999
3993out: 4000out:
3994 srcu_read_unlock(&kvm->srcu, idx); 4001 srcu_read_unlock(&kvm->srcu, idx);
3995 return ret; 4002
4003out2:
4004 mutex_unlock(&kvm->slots_lock);
4005 return r;
3996} 4006}
3997 4007
3998static void seg_setup(int seg) 4008static void seg_setup(int seg)
@@ -4021,13 +4031,13 @@ static int alloc_apic_access_page(struct kvm *kvm)
4021 goto out; 4031 goto out;
4022 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; 4032 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
4023 kvm_userspace_mem.flags = 0; 4033 kvm_userspace_mem.flags = 0;
4024 kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; 4034 kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
4025 kvm_userspace_mem.memory_size = PAGE_SIZE; 4035 kvm_userspace_mem.memory_size = PAGE_SIZE;
4026 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); 4036 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
4027 if (r) 4037 if (r)
4028 goto out; 4038 goto out;
4029 4039
4030 page = gfn_to_page(kvm, 0xfee00); 4040 page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
4031 if (is_error_page(page)) { 4041 if (is_error_page(page)) {
4032 r = -EFAULT; 4042 r = -EFAULT;
4033 goto out; 4043 goto out;
@@ -4041,31 +4051,20 @@ out:
4041 4051
4042static int alloc_identity_pagetable(struct kvm *kvm) 4052static int alloc_identity_pagetable(struct kvm *kvm)
4043{ 4053{
4044 struct page *page; 4054 /* Called with kvm->slots_lock held. */
4055
4045 struct kvm_userspace_memory_region kvm_userspace_mem; 4056 struct kvm_userspace_memory_region kvm_userspace_mem;
4046 int r = 0; 4057 int r = 0;
4047 4058
4048 mutex_lock(&kvm->slots_lock); 4059 BUG_ON(kvm->arch.ept_identity_pagetable_done);
4049 if (kvm->arch.ept_identity_pagetable) 4060
4050 goto out;
4051 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; 4061 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
4052 kvm_userspace_mem.flags = 0; 4062 kvm_userspace_mem.flags = 0;
4053 kvm_userspace_mem.guest_phys_addr = 4063 kvm_userspace_mem.guest_phys_addr =
4054 kvm->arch.ept_identity_map_addr; 4064 kvm->arch.ept_identity_map_addr;
4055 kvm_userspace_mem.memory_size = PAGE_SIZE; 4065 kvm_userspace_mem.memory_size = PAGE_SIZE;
4056 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); 4066 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
4057 if (r)
4058 goto out;
4059 4067
4060 page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
4061 if (is_error_page(page)) {
4062 r = -EFAULT;
4063 goto out;
4064 }
4065
4066 kvm->arch.ept_identity_pagetable = page;
4067out:
4068 mutex_unlock(&kvm->slots_lock);
4069 return r; 4068 return r;
4070} 4069}
4071 4070
@@ -4500,7 +4499,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4500 4499
4501 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 4500 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
4502 kvm_set_cr8(&vmx->vcpu, 0); 4501 kvm_set_cr8(&vmx->vcpu, 0);
4503 apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 4502 apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
4504 if (kvm_vcpu_is_bsp(&vmx->vcpu)) 4503 if (kvm_vcpu_is_bsp(&vmx->vcpu))
4505 apic_base_msr.data |= MSR_IA32_APICBASE_BSP; 4504 apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
4506 apic_base_msr.host_initiated = true; 4505 apic_base_msr.host_initiated = true;
@@ -6244,7 +6243,11 @@ static void free_nested(struct vcpu_vmx *vmx)
6244 /* Unpin physical memory we referred to in current vmcs02 */ 6243 /* Unpin physical memory we referred to in current vmcs02 */
6245 if (vmx->nested.apic_access_page) { 6244 if (vmx->nested.apic_access_page) {
6246 nested_release_page(vmx->nested.apic_access_page); 6245 nested_release_page(vmx->nested.apic_access_page);
6247 vmx->nested.apic_access_page = 0; 6246 vmx->nested.apic_access_page = NULL;
6247 }
6248 if (vmx->nested.virtual_apic_page) {
6249 nested_release_page(vmx->nested.virtual_apic_page);
6250 vmx->nested.virtual_apic_page = NULL;
6248 } 6251 }
6249 6252
6250 nested_free_all_saved_vmcss(vmx); 6253 nested_free_all_saved_vmcss(vmx);
@@ -7034,7 +7037,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7034 case EXIT_REASON_MCE_DURING_VMENTRY: 7037 case EXIT_REASON_MCE_DURING_VMENTRY:
7035 return 0; 7038 return 0;
7036 case EXIT_REASON_TPR_BELOW_THRESHOLD: 7039 case EXIT_REASON_TPR_BELOW_THRESHOLD:
7037 return 1; 7040 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
7038 case EXIT_REASON_APIC_ACCESS: 7041 case EXIT_REASON_APIC_ACCESS:
7039 return nested_cpu_has2(vmcs12, 7042 return nested_cpu_has2(vmcs12,
7040 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); 7043 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
@@ -7155,6 +7158,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
7155 7158
7156static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 7159static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
7157{ 7160{
7161 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7162
7163 if (is_guest_mode(vcpu) &&
7164 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
7165 return;
7166
7158 if (irr == -1 || tpr < irr) { 7167 if (irr == -1 || tpr < irr) {
7159 vmcs_write32(TPR_THRESHOLD, 0); 7168 vmcs_write32(TPR_THRESHOLD, 0);
7160 return; 7169 return;
@@ -7745,10 +7754,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
7745 if (!kvm->arch.ept_identity_map_addr) 7754 if (!kvm->arch.ept_identity_map_addr)
7746 kvm->arch.ept_identity_map_addr = 7755 kvm->arch.ept_identity_map_addr =
7747 VMX_EPT_IDENTITY_PAGETABLE_ADDR; 7756 VMX_EPT_IDENTITY_PAGETABLE_ADDR;
7748 err = -ENOMEM; 7757 err = init_rmode_identity_map(kvm);
7749 if (alloc_identity_pagetable(kvm) != 0) 7758 if (err)
7750 goto free_vmcs;
7751 if (!init_rmode_identity_map(kvm))
7752 goto free_vmcs; 7759 goto free_vmcs;
7753 } 7760 }
7754 7761
@@ -7927,6 +7934,55 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7927 kvm_inject_page_fault(vcpu, fault); 7934 kvm_inject_page_fault(vcpu, fault);
7928} 7935}
7929 7936
7937static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
7938 struct vmcs12 *vmcs12)
7939{
7940 struct vcpu_vmx *vmx = to_vmx(vcpu);
7941
7942 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
7943 /* TODO: Also verify bits beyond physical address width are 0 */
7944 if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
7945 return false;
7946
7947 /*
7948 * Translate L1 physical address to host physical
7949 * address for vmcs02. Keep the page pinned, so this
7950 * physical address remains valid. We keep a reference
7951 * to it so we can release it later.
7952 */
7953 if (vmx->nested.apic_access_page) /* shouldn't happen */
7954 nested_release_page(vmx->nested.apic_access_page);
7955 vmx->nested.apic_access_page =
7956 nested_get_page(vcpu, vmcs12->apic_access_addr);
7957 }
7958
7959 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
7960 /* TODO: Also verify bits beyond physical address width are 0 */
7961 if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
7962 return false;
7963
7964 if (vmx->nested.virtual_apic_page) /* shouldn't happen */
7965 nested_release_page(vmx->nested.virtual_apic_page);
7966 vmx->nested.virtual_apic_page =
7967 nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
7968
7969 /*
7970 * Failing the vm entry is _not_ what the processor does
7971 * but it's basically the only possibility we have.
7972 * We could still enter the guest if CR8 load exits are
7973 * enabled, CR8 store exits are enabled, and virtualize APIC
7974 * access is disabled; in this case the processor would never
7975 * use the TPR shadow and we could simply clear the bit from
7976 * the execution control. But such a configuration is useless,
7977 * so let's keep the code simple.
7978 */
7979 if (!vmx->nested.virtual_apic_page)
7980 return false;
7981 }
7982
7983 return true;
7984}
7985
7930static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) 7986static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
7931{ 7987{
7932 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; 7988 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
@@ -8073,16 +8129,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8073 8129
8074 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { 8130 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
8075 /* 8131 /*
8076 * Translate L1 physical address to host physical
8077 * address for vmcs02. Keep the page pinned, so this
8078 * physical address remains valid. We keep a reference
8079 * to it so we can release it later.
8080 */
8081 if (vmx->nested.apic_access_page) /* shouldn't happen */
8082 nested_release_page(vmx->nested.apic_access_page);
8083 vmx->nested.apic_access_page =
8084 nested_get_page(vcpu, vmcs12->apic_access_addr);
8085 /*
8086 * If translation failed, no matter: This feature asks 8132 * If translation failed, no matter: This feature asks
8087 * to exit when accessing the given address, and if it 8133 * to exit when accessing the given address, and if it
8088 * can never be accessed, this feature won't do 8134 * can never be accessed, this feature won't do
@@ -8127,6 +8173,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8127 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; 8173 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
8128 exec_control &= ~CPU_BASED_TPR_SHADOW; 8174 exec_control &= ~CPU_BASED_TPR_SHADOW;
8129 exec_control |= vmcs12->cpu_based_vm_exec_control; 8175 exec_control |= vmcs12->cpu_based_vm_exec_control;
8176
8177 if (exec_control & CPU_BASED_TPR_SHADOW) {
8178 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
8179 page_to_phys(vmx->nested.virtual_apic_page));
8180 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
8181 }
8182
8130 /* 8183 /*
8131 * Merging of IO and MSR bitmaps not currently supported. 8184 * Merging of IO and MSR bitmaps not currently supported.
8132 * Rather, exit every time. 8185 * Rather, exit every time.
@@ -8288,8 +8341,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8288 return 1; 8341 return 1;
8289 } 8342 }
8290 8343
8291 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && 8344 if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
8292 !PAGE_ALIGNED(vmcs12->apic_access_addr)) {
8293 /*TODO: Also verify bits beyond physical address width are 0*/ 8345 /*TODO: Also verify bits beyond physical address width are 0*/
8294 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 8346 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
8295 return 1; 8347 return 1;
@@ -8893,7 +8945,11 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8893 /* Unpin physical memory we referred to in vmcs02 */ 8945 /* Unpin physical memory we referred to in vmcs02 */
8894 if (vmx->nested.apic_access_page) { 8946 if (vmx->nested.apic_access_page) {
8895 nested_release_page(vmx->nested.apic_access_page); 8947 nested_release_page(vmx->nested.apic_access_page);
8896 vmx->nested.apic_access_page = 0; 8948 vmx->nested.apic_access_page = NULL;
8949 }
8950 if (vmx->nested.virtual_apic_page) {
8951 nested_release_page(vmx->nested.virtual_apic_page);
8952 vmx->nested.virtual_apic_page = NULL;
8897 } 8953 }
8898 8954
8899 /* 8955 /*
@@ -8949,7 +9005,7 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
8949 return X86EMUL_CONTINUE; 9005 return X86EMUL_CONTINUE;
8950} 9006}
8951 9007
8952void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) 9008static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
8953{ 9009{
8954 if (ple_gap) 9010 if (ple_gap)
8955 shrink_ple_window(vcpu); 9011 shrink_ple_window(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c10408ef9ab1..2d7f65daa8d0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
246} 246}
247EXPORT_SYMBOL_GPL(kvm_set_shared_msr); 247EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
248 248
249static void drop_user_return_notifiers(void *ignore) 249static void drop_user_return_notifiers(void)
250{ 250{
251 unsigned int cpu = smp_processor_id(); 251 unsigned int cpu = smp_processor_id();
252 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); 252 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -408,12 +408,14 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
408} 408}
409EXPORT_SYMBOL_GPL(kvm_inject_page_fault); 409EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
410 410
411void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) 411static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
412{ 412{
413 if (mmu_is_nested(vcpu) && !fault->nested_page_fault) 413 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
414 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); 414 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
415 else 415 else
416 vcpu->arch.mmu.inject_page_fault(vcpu, fault); 416 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
417
418 return fault->nested_page_fault;
417} 419}
418 420
419void kvm_inject_nmi(struct kvm_vcpu *vcpu) 421void kvm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -457,11 +459,12 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
457 gfn_t ngfn, void *data, int offset, int len, 459 gfn_t ngfn, void *data, int offset, int len,
458 u32 access) 460 u32 access)
459{ 461{
462 struct x86_exception exception;
460 gfn_t real_gfn; 463 gfn_t real_gfn;
461 gpa_t ngpa; 464 gpa_t ngpa;
462 465
463 ngpa = gfn_to_gpa(ngfn); 466 ngpa = gfn_to_gpa(ngfn);
464 real_gfn = mmu->translate_gpa(vcpu, ngpa, access); 467 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
465 if (real_gfn == UNMAPPED_GVA) 468 if (real_gfn == UNMAPPED_GVA)
466 return -EFAULT; 469 return -EFAULT;
467 470
@@ -1518,7 +1521,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
1518 pvclock_update_vm_gtod_copy(kvm); 1521 pvclock_update_vm_gtod_copy(kvm);
1519 1522
1520 kvm_for_each_vcpu(i, vcpu, kvm) 1523 kvm_for_each_vcpu(i, vcpu, kvm)
1521 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); 1524 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1522 1525
1523 /* guest entries allowed */ 1526 /* guest entries allowed */
1524 kvm_for_each_vcpu(i, vcpu, kvm) 1527 kvm_for_each_vcpu(i, vcpu, kvm)
@@ -1661,7 +1664,7 @@ static void kvmclock_update_fn(struct work_struct *work)
1661 struct kvm_vcpu *vcpu; 1664 struct kvm_vcpu *vcpu;
1662 1665
1663 kvm_for_each_vcpu(i, vcpu, kvm) { 1666 kvm_for_each_vcpu(i, vcpu, kvm) {
1664 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); 1667 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1665 kvm_vcpu_kick(vcpu); 1668 kvm_vcpu_kick(vcpu);
1666 } 1669 }
1667} 1670}
@@ -1670,7 +1673,7 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1670{ 1673{
1671 struct kvm *kvm = v->kvm; 1674 struct kvm *kvm = v->kvm;
1672 1675
1673 set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests); 1676 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1674 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 1677 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
1675 KVMCLOCK_UPDATE_DELAY); 1678 KVMCLOCK_UPDATE_DELAY);
1676} 1679}
@@ -1726,7 +1729,7 @@ static bool valid_mtrr_type(unsigned t)
1726static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1729static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1727{ 1730{
1728 int i; 1731 int i;
1729 u64 mask = 0; 1732 u64 mask;
1730 1733
1731 if (!msr_mtrr_valid(msr)) 1734 if (!msr_mtrr_valid(msr))
1732 return false; 1735 return false;
@@ -1750,8 +1753,7 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1750 /* variable MTRRs */ 1753 /* variable MTRRs */
1751 WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR)); 1754 WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
1752 1755
1753 for (i = 63; i > boot_cpu_data.x86_phys_bits; i--) 1756 mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
1754 mask |= (1ULL << i);
1755 if ((msr & 1) == 0) { 1757 if ((msr & 1) == 0) {
1756 /* MTRR base */ 1758 /* MTRR base */
1757 if (!valid_mtrr_type(data & 0xff)) 1759 if (!valid_mtrr_type(data & 0xff))
@@ -2847,7 +2849,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2847 if (unlikely(vcpu->arch.tsc_offset_adjustment)) { 2849 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2848 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); 2850 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2849 vcpu->arch.tsc_offset_adjustment = 0; 2851 vcpu->arch.tsc_offset_adjustment = 0;
2850 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); 2852 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2851 } 2853 }
2852 2854
2853 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { 2855 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
@@ -4064,16 +4066,16 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
4064 kvm_x86_ops->get_segment(vcpu, var, seg); 4066 kvm_x86_ops->get_segment(vcpu, var, seg);
4065} 4067}
4066 4068
4067gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 4069gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
4070 struct x86_exception *exception)
4068{ 4071{
4069 gpa_t t_gpa; 4072 gpa_t t_gpa;
4070 struct x86_exception exception;
4071 4073
4072 BUG_ON(!mmu_is_nested(vcpu)); 4074 BUG_ON(!mmu_is_nested(vcpu));
4073 4075
4074 /* NPT walks are always user-walks */ 4076 /* NPT walks are always user-walks */
4075 access |= PFERR_USER_MASK; 4077 access |= PFERR_USER_MASK;
4076 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception); 4078 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
4077 4079
4078 return t_gpa; 4080 return t_gpa;
4079} 4081}
@@ -4930,16 +4932,18 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4930 } 4932 }
4931} 4933}
4932 4934
4933static void inject_emulated_exception(struct kvm_vcpu *vcpu) 4935static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
4934{ 4936{
4935 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4937 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4936 if (ctxt->exception.vector == PF_VECTOR) 4938 if (ctxt->exception.vector == PF_VECTOR)
4937 kvm_propagate_fault(vcpu, &ctxt->exception); 4939 return kvm_propagate_fault(vcpu, &ctxt->exception);
4938 else if (ctxt->exception.error_code_valid) 4940
4941 if (ctxt->exception.error_code_valid)
4939 kvm_queue_exception_e(vcpu, ctxt->exception.vector, 4942 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4940 ctxt->exception.error_code); 4943 ctxt->exception.error_code);
4941 else 4944 else
4942 kvm_queue_exception(vcpu, ctxt->exception.vector); 4945 kvm_queue_exception(vcpu, ctxt->exception.vector);
4946 return false;
4943} 4947}
4944 4948
4945static void init_emulate_ctxt(struct kvm_vcpu *vcpu) 4949static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
@@ -5301,8 +5305,9 @@ restart:
5301 } 5305 }
5302 5306
5303 if (ctxt->have_exception) { 5307 if (ctxt->have_exception) {
5304 inject_emulated_exception(vcpu);
5305 r = EMULATE_DONE; 5308 r = EMULATE_DONE;
5309 if (inject_emulated_exception(vcpu))
5310 return r;
5306 } else if (vcpu->arch.pio.count) { 5311 } else if (vcpu->arch.pio.count) {
5307 if (!vcpu->arch.pio.in) { 5312 if (!vcpu->arch.pio.in) {
5308 /* FIXME: return into emulator if single-stepping. */ 5313 /* FIXME: return into emulator if single-stepping. */
@@ -5570,7 +5575,7 @@ static void kvm_set_mmio_spte_mask(void)
5570 * entry to generate page fault with PFER.RSV = 1. 5575 * entry to generate page fault with PFER.RSV = 1.
5571 */ 5576 */
5572 /* Mask the reserved physical address bits. */ 5577 /* Mask the reserved physical address bits. */
5573 mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr; 5578 mask = rsvd_bits(maxphyaddr, 51);
5574 5579
5575 /* Bit 62 is always reserved for 32bit host. */ 5580 /* Bit 62 is always reserved for 32bit host. */
5576 mask |= 0x3ull << 62; 5581 mask |= 0x3ull << 62;
@@ -5601,7 +5606,7 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
5601 spin_lock(&kvm_lock); 5606 spin_lock(&kvm_lock);
5602 list_for_each_entry(kvm, &vm_list, vm_list) 5607 list_for_each_entry(kvm, &vm_list, vm_list)
5603 kvm_for_each_vcpu(i, vcpu, kvm) 5608 kvm_for_each_vcpu(i, vcpu, kvm)
5604 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); 5609 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
5605 atomic_set(&kvm_guest_has_master_clock, 0); 5610 atomic_set(&kvm_guest_has_master_clock, 0);
5606 spin_unlock(&kvm_lock); 5611 spin_unlock(&kvm_lock);
5607} 5612}
@@ -6959,7 +6964,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
6959 kvm_rip_write(vcpu, 0); 6964 kvm_rip_write(vcpu, 0);
6960} 6965}
6961 6966
6962int kvm_arch_hardware_enable(void *garbage) 6967int kvm_arch_hardware_enable(void)
6963{ 6968{
6964 struct kvm *kvm; 6969 struct kvm *kvm;
6965 struct kvm_vcpu *vcpu; 6970 struct kvm_vcpu *vcpu;
@@ -6970,7 +6975,7 @@ int kvm_arch_hardware_enable(void *garbage)
6970 bool stable, backwards_tsc = false; 6975 bool stable, backwards_tsc = false;
6971 6976
6972 kvm_shared_msr_cpu_online(); 6977 kvm_shared_msr_cpu_online();
6973 ret = kvm_x86_ops->hardware_enable(garbage); 6978 ret = kvm_x86_ops->hardware_enable();
6974 if (ret != 0) 6979 if (ret != 0)
6975 return ret; 6980 return ret;
6976 6981
@@ -6979,7 +6984,7 @@ int kvm_arch_hardware_enable(void *garbage)
6979 list_for_each_entry(kvm, &vm_list, vm_list) { 6984 list_for_each_entry(kvm, &vm_list, vm_list) {
6980 kvm_for_each_vcpu(i, vcpu, kvm) { 6985 kvm_for_each_vcpu(i, vcpu, kvm) {
6981 if (!stable && vcpu->cpu == smp_processor_id()) 6986 if (!stable && vcpu->cpu == smp_processor_id())
6982 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); 6987 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6983 if (stable && vcpu->arch.last_host_tsc > local_tsc) { 6988 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
6984 backwards_tsc = true; 6989 backwards_tsc = true;
6985 if (vcpu->arch.last_host_tsc > max_tsc) 6990 if (vcpu->arch.last_host_tsc > max_tsc)
@@ -7033,8 +7038,7 @@ int kvm_arch_hardware_enable(void *garbage)
7033 kvm_for_each_vcpu(i, vcpu, kvm) { 7038 kvm_for_each_vcpu(i, vcpu, kvm) {
7034 vcpu->arch.tsc_offset_adjustment += delta_cyc; 7039 vcpu->arch.tsc_offset_adjustment += delta_cyc;
7035 vcpu->arch.last_host_tsc = local_tsc; 7040 vcpu->arch.last_host_tsc = local_tsc;
7036 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, 7041 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7037 &vcpu->requests);
7038 } 7042 }
7039 7043
7040 /* 7044 /*
@@ -7051,10 +7055,10 @@ int kvm_arch_hardware_enable(void *garbage)
7051 return 0; 7055 return 0;
7052} 7056}
7053 7057
7054void kvm_arch_hardware_disable(void *garbage) 7058void kvm_arch_hardware_disable(void)
7055{ 7059{
7056 kvm_x86_ops->hardware_disable(garbage); 7060 kvm_x86_ops->hardware_disable();
7057 drop_user_return_notifiers(garbage); 7061 drop_user_return_notifiers();
7058} 7062}
7059 7063
7060int kvm_arch_hardware_setup(void) 7064int kvm_arch_hardware_setup(void)
@@ -7269,8 +7273,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
7269 kvm_free_vcpus(kvm); 7273 kvm_free_vcpus(kvm);
7270 if (kvm->arch.apic_access_page) 7274 if (kvm->arch.apic_access_page)
7271 put_page(kvm->arch.apic_access_page); 7275 put_page(kvm->arch.apic_access_page);
7272 if (kvm->arch.ept_identity_pagetable)
7273 put_page(kvm->arch.ept_identity_pagetable);
7274 kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); 7276 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7275} 7277}
7276 7278
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 306a1b77581f..985fb2c006fa 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -88,15 +88,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
88 vcpu->arch.mmio_gva = gva & PAGE_MASK; 88 vcpu->arch.mmio_gva = gva & PAGE_MASK;
89 vcpu->arch.access = access; 89 vcpu->arch.access = access;
90 vcpu->arch.mmio_gfn = gfn; 90 vcpu->arch.mmio_gfn = gfn;
91 vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
92}
93
94static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
95{
96 return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation;
91} 97}
92 98
93/* 99/*
94 * Clear the mmio cache info for the given gva, 100 * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we
95 * specially, if gva is ~0ul, we clear all mmio cache info. 101 * clear all mmio cache info.
96 */ 102 */
103#define MMIO_GVA_ANY (~(gva_t)0)
104
97static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) 105static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
98{ 106{
99 if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) 107 if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
100 return; 108 return;
101 109
102 vcpu->arch.mmio_gva = 0; 110 vcpu->arch.mmio_gva = 0;
@@ -104,7 +112,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
104 112
105static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) 113static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
106{ 114{
107 if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) 115 if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva &&
116 vcpu->arch.mmio_gva == (gva & PAGE_MASK))
108 return true; 117 return true;
109 118
110 return false; 119 return false;
@@ -112,7 +121,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
112 121
113static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) 122static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
114{ 123{
115 if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) 124 if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn &&
125 vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
116 return true; 126 return true;
117 127
118 return false; 128 return false;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6d8a658ec174..bbd8d57b04e0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -140,8 +140,6 @@ static inline bool is_error_page(struct page *page)
140#define KVM_USERSPACE_IRQ_SOURCE_ID 0 140#define KVM_USERSPACE_IRQ_SOURCE_ID 0
141#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 141#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
142 142
143struct kvm;
144struct kvm_vcpu;
145extern struct kmem_cache *kvm_vcpu_cache; 143extern struct kmem_cache *kvm_vcpu_cache;
146 144
147extern spinlock_t kvm_lock; 145extern spinlock_t kvm_lock;
@@ -325,8 +323,6 @@ struct kvm_kernel_irq_routing_entry {
325 struct hlist_node link; 323 struct hlist_node link;
326}; 324};
327 325
328struct kvm_irq_routing_table;
329
330#ifndef KVM_PRIVATE_MEM_SLOTS 326#ifndef KVM_PRIVATE_MEM_SLOTS
331#define KVM_PRIVATE_MEM_SLOTS 0 327#define KVM_PRIVATE_MEM_SLOTS 0
332#endif 328#endif
@@ -636,8 +632,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
636int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); 632int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
637void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); 633void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
638 634
639int kvm_arch_hardware_enable(void *garbage); 635int kvm_arch_hardware_enable(void);
640void kvm_arch_hardware_disable(void *garbage); 636void kvm_arch_hardware_disable(void);
641int kvm_arch_hardware_setup(void); 637int kvm_arch_hardware_setup(void);
642void kvm_arch_hardware_unsetup(void); 638void kvm_arch_hardware_unsetup(void);
643void kvm_arch_check_processor_compat(void *rtn); 639void kvm_arch_check_processor_compat(void *rtn);
@@ -1038,8 +1034,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
1038 1034
1039extern bool kvm_rebooting; 1035extern bool kvm_rebooting;
1040 1036
1041struct kvm_device_ops;
1042
1043struct kvm_device { 1037struct kvm_device {
1044 struct kvm_device_ops *ops; 1038 struct kvm_device_ops *ops;
1045 struct kvm *kvm; 1039 struct kvm *kvm;
@@ -1072,12 +1066,10 @@ struct kvm_device_ops {
1072void kvm_device_get(struct kvm_device *dev); 1066void kvm_device_get(struct kvm_device *dev);
1073void kvm_device_put(struct kvm_device *dev); 1067void kvm_device_put(struct kvm_device *dev);
1074struct kvm_device *kvm_device_from_filp(struct file *filp); 1068struct kvm_device *kvm_device_from_filp(struct file *filp);
1069int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
1075 1070
1076extern struct kvm_device_ops kvm_mpic_ops; 1071extern struct kvm_device_ops kvm_mpic_ops;
1077extern struct kvm_device_ops kvm_xics_ops; 1072extern struct kvm_device_ops kvm_xics_ops;
1078extern struct kvm_device_ops kvm_vfio_ops;
1079extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
1080extern struct kvm_device_ops kvm_flic_ops;
1081 1073
1082#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1074#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
1083 1075
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b0bcce0ddc95..b606bb689a3e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -17,6 +17,20 @@
17#ifndef __KVM_TYPES_H__ 17#ifndef __KVM_TYPES_H__
18#define __KVM_TYPES_H__ 18#define __KVM_TYPES_H__
19 19
20struct kvm;
21struct kvm_async_pf;
22struct kvm_device_ops;
23struct kvm_interrupt;
24struct kvm_irq_routing_table;
25struct kvm_memory_slot;
26struct kvm_one_reg;
27struct kvm_run;
28struct kvm_userspace_memory_region;
29struct kvm_vcpu;
30struct kvm_vcpu_init;
31
32enum kvm_mr_change;
33
20#include <asm/types.h> 34#include <asm/types.h>
21 35
22/* 36/*
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 908925ace776..ab679c395042 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq,
95 __entry->coalesced ? " (coalesced)" : "") 95 __entry->coalesced ? " (coalesced)" : "")
96); 96);
97 97
98TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
99 TP_PROTO(__u64 e),
100 TP_ARGS(e),
101
102 TP_STRUCT__entry(
103 __field( __u64, e )
104 ),
105
106 TP_fast_assign(
107 __entry->e = e;
108 ),
109
110 TP_printk("dst %x vec=%u (%s|%s|%s%s)",
111 (u8)(__entry->e >> 56), (u8)__entry->e,
112 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
113 (__entry->e & (1<<11)) ? "logical" : "physical",
114 (__entry->e & (1<<15)) ? "level" : "edge",
115 (__entry->e & (1<<16)) ? "|masked" : "")
116);
117
98TRACE_EVENT(kvm_msi_set_irq, 118TRACE_EVENT(kvm_msi_set_irq,
99 TP_PROTO(__u64 address, __u64 data), 119 TP_PROTO(__u64 address, __u64 data),
100 TP_ARGS(address, data), 120 TP_ARGS(address, data),
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cf3a2ff440e4..60768822b140 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -654,9 +654,7 @@ struct kvm_ppc_smmu_info {
654#endif 654#endif
655/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ 655/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
656#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 656#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
657#ifdef __KVM_HAVE_USER_NMI
658#define KVM_CAP_USER_NMI 22 657#define KVM_CAP_USER_NMI 22
659#endif
660#ifdef __KVM_HAVE_GUEST_DEBUG 658#ifdef __KVM_HAVE_GUEST_DEBUG
661#define KVM_CAP_SET_GUEST_DEBUG 23 659#define KVM_CAP_SET_GUEST_DEBUG 23
662#endif 660#endif
@@ -738,9 +736,7 @@ struct kvm_ppc_smmu_info {
738#define KVM_CAP_PPC_GET_SMMU_INFO 78 736#define KVM_CAP_PPC_GET_SMMU_INFO 78
739#define KVM_CAP_S390_COW 79 737#define KVM_CAP_S390_COW 79
740#define KVM_CAP_PPC_ALLOC_HTAB 80 738#define KVM_CAP_PPC_ALLOC_HTAB 80
741#ifdef __KVM_HAVE_READONLY_MEM
742#define KVM_CAP_READONLY_MEM 81 739#define KVM_CAP_READONLY_MEM 81
743#endif
744#define KVM_CAP_IRQFD_RESAMPLE 82 740#define KVM_CAP_IRQFD_RESAMPLE 82
745#define KVM_CAP_PPC_BOOKE_WATCHDOG 83 741#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
746#define KVM_CAP_PPC_HTAB_FD 84 742#define KVM_CAP_PPC_HTAB_FD 84
@@ -947,15 +943,25 @@ struct kvm_device_attr {
947 __u64 addr; /* userspace address of attr data */ 943 __u64 addr; /* userspace address of attr data */
948}; 944};
949 945
950#define KVM_DEV_TYPE_FSL_MPIC_20 1
951#define KVM_DEV_TYPE_FSL_MPIC_42 2
952#define KVM_DEV_TYPE_XICS 3
953#define KVM_DEV_TYPE_VFIO 4
954#define KVM_DEV_VFIO_GROUP 1 946#define KVM_DEV_VFIO_GROUP 1
955#define KVM_DEV_VFIO_GROUP_ADD 1 947#define KVM_DEV_VFIO_GROUP_ADD 1
956#define KVM_DEV_VFIO_GROUP_DEL 2 948#define KVM_DEV_VFIO_GROUP_DEL 2
957#define KVM_DEV_TYPE_ARM_VGIC_V2 5 949
958#define KVM_DEV_TYPE_FLIC 6 950enum kvm_device_type {
951 KVM_DEV_TYPE_FSL_MPIC_20 = 1,
952#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20
953 KVM_DEV_TYPE_FSL_MPIC_42,
954#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42
955 KVM_DEV_TYPE_XICS,
956#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS
957 KVM_DEV_TYPE_VFIO,
958#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO
959 KVM_DEV_TYPE_ARM_VGIC_V2,
960#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2
961 KVM_DEV_TYPE_FLIC,
962#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC
963 KVM_DEV_TYPE_MAX,
964};
959 965
960/* 966/*
961 * ioctls for VM fds 967 * ioctls for VM fds
@@ -1093,7 +1099,7 @@ struct kvm_s390_ucas_mapping {
1093#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) 1099#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
1094#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) 1100#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
1095#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) 1101#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
1096/* Available with KVM_CAP_NMI */ 1102/* Available with KVM_CAP_USER_NMI */
1097#define KVM_NMI _IO(KVMIO, 0x9a) 1103#define KVM_NMI _IO(KVMIO, 0x9a)
1098/* Available with KVM_CAP_SET_GUEST_DEBUG */ 1104/* Available with KVM_CAP_SET_GUEST_DEBUG */
1099#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) 1105#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index efe6eee2e7eb..eeb23b37f87c 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1522,83 +1522,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1522 return 0; 1522 return 0;
1523} 1523}
1524 1524
1525static void vgic_init_maintenance_interrupt(void *info)
1526{
1527 enable_percpu_irq(vgic->maint_irq, 0);
1528}
1529
1530static int vgic_cpu_notify(struct notifier_block *self,
1531 unsigned long action, void *cpu)
1532{
1533 switch (action) {
1534 case CPU_STARTING:
1535 case CPU_STARTING_FROZEN:
1536 vgic_init_maintenance_interrupt(NULL);
1537 break;
1538 case CPU_DYING:
1539 case CPU_DYING_FROZEN:
1540 disable_percpu_irq(vgic->maint_irq);
1541 break;
1542 }
1543
1544 return NOTIFY_OK;
1545}
1546
1547static struct notifier_block vgic_cpu_nb = {
1548 .notifier_call = vgic_cpu_notify,
1549};
1550
1551static const struct of_device_id vgic_ids[] = {
1552 { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
1553 { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
1554 {},
1555};
1556
1557int kvm_vgic_hyp_init(void)
1558{
1559 const struct of_device_id *matched_id;
1560 const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
1561 const struct vgic_params **);
1562 struct device_node *vgic_node;
1563 int ret;
1564
1565 vgic_node = of_find_matching_node_and_match(NULL,
1566 vgic_ids, &matched_id);
1567 if (!vgic_node) {
1568 kvm_err("error: no compatible GIC node found\n");
1569 return -ENODEV;
1570 }
1571
1572 vgic_probe = matched_id->data;
1573 ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
1574 if (ret)
1575 return ret;
1576
1577 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
1578 "vgic", kvm_get_running_vcpus());
1579 if (ret) {
1580 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
1581 return ret;
1582 }
1583
1584 ret = __register_cpu_notifier(&vgic_cpu_nb);
1585 if (ret) {
1586 kvm_err("Cannot register vgic CPU notifier\n");
1587 goto out_free_irq;
1588 }
1589
1590 /* Callback into for arch code for setup */
1591 vgic_arch_setup(vgic);
1592
1593 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
1594
1595 return 0;
1596
1597out_free_irq:
1598 free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
1599 return ret;
1600}
1601
1602/** 1525/**
1603 * kvm_vgic_init - Initialize global VGIC state before running any VCPUs 1526 * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
1604 * @kvm: pointer to the kvm struct 1527 * @kvm: pointer to the kvm struct
@@ -2062,7 +1985,7 @@ static int vgic_create(struct kvm_device *dev, u32 type)
2062 return kvm_vgic_create(dev->kvm); 1985 return kvm_vgic_create(dev->kvm);
2063} 1986}
2064 1987
2065struct kvm_device_ops kvm_arm_vgic_v2_ops = { 1988static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2066 .name = "kvm-arm-vgic", 1989 .name = "kvm-arm-vgic",
2067 .create = vgic_create, 1990 .create = vgic_create,
2068 .destroy = vgic_destroy, 1991 .destroy = vgic_destroy,
@@ -2070,3 +1993,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2070 .get_attr = vgic_get_attr, 1993 .get_attr = vgic_get_attr,
2071 .has_attr = vgic_has_attr, 1994 .has_attr = vgic_has_attr,
2072}; 1995};
1996
1997static void vgic_init_maintenance_interrupt(void *info)
1998{
1999 enable_percpu_irq(vgic->maint_irq, 0);
2000}
2001
2002static int vgic_cpu_notify(struct notifier_block *self,
2003 unsigned long action, void *cpu)
2004{
2005 switch (action) {
2006 case CPU_STARTING:
2007 case CPU_STARTING_FROZEN:
2008 vgic_init_maintenance_interrupt(NULL);
2009 break;
2010 case CPU_DYING:
2011 case CPU_DYING_FROZEN:
2012 disable_percpu_irq(vgic->maint_irq);
2013 break;
2014 }
2015
2016 return NOTIFY_OK;
2017}
2018
2019static struct notifier_block vgic_cpu_nb = {
2020 .notifier_call = vgic_cpu_notify,
2021};
2022
2023static const struct of_device_id vgic_ids[] = {
2024 { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
2025 { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
2026 {},
2027};
2028
2029int kvm_vgic_hyp_init(void)
2030{
2031 const struct of_device_id *matched_id;
2032 const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
2033 const struct vgic_params **);
2034 struct device_node *vgic_node;
2035 int ret;
2036
2037 vgic_node = of_find_matching_node_and_match(NULL,
2038 vgic_ids, &matched_id);
2039 if (!vgic_node) {
2040 kvm_err("error: no compatible GIC node found\n");
2041 return -ENODEV;
2042 }
2043
2044 vgic_probe = matched_id->data;
2045 ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
2046 if (ret)
2047 return ret;
2048
2049 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
2050 "vgic", kvm_get_running_vcpus());
2051 if (ret) {
2052 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
2053 return ret;
2054 }
2055
2056 ret = __register_cpu_notifier(&vgic_cpu_nb);
2057 if (ret) {
2058 kvm_err("Cannot register vgic CPU notifier\n");
2059 goto out_free_irq;
2060 }
2061
2062 /* Callback into for arch code for setup */
2063 vgic_arch_setup(vgic);
2064
2065 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
2066
2067 return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
2068 KVM_DEV_TYPE_ARM_VGIC_V2);
2069
2070out_free_irq:
2071 free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
2072 return ret;
2073}
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index e8ce34c9db32..0ba4057d271b 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -405,6 +405,26 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
405 spin_unlock(&ioapic->lock); 405 spin_unlock(&ioapic->lock);
406} 406}
407 407
408static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
409{
410 int i;
411 struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
412 eoi_inject.work);
413 spin_lock(&ioapic->lock);
414 for (i = 0; i < IOAPIC_NUM_PINS; i++) {
415 union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
416
417 if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
418 continue;
419
420 if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
421 ioapic_service(ioapic, i, false);
422 }
423 spin_unlock(&ioapic->lock);
424}
425
426#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
427
408static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, 428static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
409 struct kvm_ioapic *ioapic, int vector, int trigger_mode) 429 struct kvm_ioapic *ioapic, int vector, int trigger_mode)
410{ 430{
@@ -435,8 +455,26 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
435 455
436 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 456 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
437 ent->fields.remote_irr = 0; 457 ent->fields.remote_irr = 0;
438 if (ioapic->irr & (1 << i)) 458 if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
439 ioapic_service(ioapic, i, false); 459 ++ioapic->irq_eoi[i];
460 if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
461 /*
462 * Real hardware does not deliver the interrupt
463 * immediately during eoi broadcast, and this
464 * lets a buggy guest make slow progress
465 * even if it does not correctly handle a
466 * level-triggered interrupt. Emulate this
467 * behavior if we detect an interrupt storm.
468 */
469 schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
470 ioapic->irq_eoi[i] = 0;
471 trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
472 } else {
473 ioapic_service(ioapic, i, false);
474 }
475 } else {
476 ioapic->irq_eoi[i] = 0;
477 }
440 } 478 }
441} 479}
442 480
@@ -565,12 +603,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
565{ 603{
566 int i; 604 int i;
567 605
606 cancel_delayed_work_sync(&ioapic->eoi_inject);
568 for (i = 0; i < IOAPIC_NUM_PINS; i++) 607 for (i = 0; i < IOAPIC_NUM_PINS; i++)
569 ioapic->redirtbl[i].fields.mask = 1; 608 ioapic->redirtbl[i].fields.mask = 1;
570 ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; 609 ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
571 ioapic->ioregsel = 0; 610 ioapic->ioregsel = 0;
572 ioapic->irr = 0; 611 ioapic->irr = 0;
573 ioapic->id = 0; 612 ioapic->id = 0;
613 memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
574 rtc_irq_eoi_tracking_reset(ioapic); 614 rtc_irq_eoi_tracking_reset(ioapic);
575 update_handled_vectors(ioapic); 615 update_handled_vectors(ioapic);
576} 616}
@@ -589,6 +629,7 @@ int kvm_ioapic_init(struct kvm *kvm)
589 if (!ioapic) 629 if (!ioapic)
590 return -ENOMEM; 630 return -ENOMEM;
591 spin_lock_init(&ioapic->lock); 631 spin_lock_init(&ioapic->lock);
632 INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
592 kvm->arch.vioapic = ioapic; 633 kvm->arch.vioapic = ioapic;
593 kvm_ioapic_reset(ioapic); 634 kvm_ioapic_reset(ioapic);
594 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); 635 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
@@ -609,6 +650,7 @@ void kvm_ioapic_destroy(struct kvm *kvm)
609{ 650{
610 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 651 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
611 652
653 cancel_delayed_work_sync(&ioapic->eoi_inject);
612 if (ioapic) { 654 if (ioapic) {
613 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); 655 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
614 kvm->arch.vioapic = NULL; 656 kvm->arch.vioapic = NULL;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 90d43e95dcf8..e23b70634f1e 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -59,6 +59,8 @@ struct kvm_ioapic {
59 spinlock_t lock; 59 spinlock_t lock;
60 DECLARE_BITMAP(handled_vectors, 256); 60 DECLARE_BITMAP(handled_vectors, 256);
61 struct rtc_status rtc_status; 61 struct rtc_status rtc_status;
62 struct delayed_work eoi_inject;
63 u32 irq_eoi[IOAPIC_NUM_PINS];
62}; 64};
63 65
64#ifdef DEBUG 66#ifdef DEBUG
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 76c92a7249c4..278232025129 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,8 +95,6 @@ static int hardware_enable_all(void);
95static void hardware_disable_all(void); 95static void hardware_disable_all(void);
96 96
97static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 97static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
98static void update_memslots(struct kvm_memslots *slots,
99 struct kvm_memory_slot *new, u64 last_generation);
100 98
101static void kvm_release_pfn_dirty(pfn_t pfn); 99static void kvm_release_pfn_dirty(pfn_t pfn);
102static void mark_page_dirty_in_slot(struct kvm *kvm, 100static void mark_page_dirty_in_slot(struct kvm *kvm,
@@ -477,6 +475,13 @@ static struct kvm *kvm_create_vm(unsigned long type)
477 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 475 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
478 if (!kvm->memslots) 476 if (!kvm->memslots)
479 goto out_err_no_srcu; 477 goto out_err_no_srcu;
478
479 /*
480 * Init kvm generation close to the maximum to easily test the
481 * code of handling generation number wrap-around.
482 */
483 kvm->memslots->generation = -150;
484
480 kvm_init_memslots_id(kvm); 485 kvm_init_memslots_id(kvm);
481 if (init_srcu_struct(&kvm->srcu)) 486 if (init_srcu_struct(&kvm->srcu))
482 goto out_err_no_srcu; 487 goto out_err_no_srcu;
@@ -688,8 +693,7 @@ static void sort_memslots(struct kvm_memslots *slots)
688} 693}
689 694
690static void update_memslots(struct kvm_memslots *slots, 695static void update_memslots(struct kvm_memslots *slots,
691 struct kvm_memory_slot *new, 696 struct kvm_memory_slot *new)
692 u64 last_generation)
693{ 697{
694 if (new) { 698 if (new) {
695 int id = new->id; 699 int id = new->id;
@@ -700,15 +704,13 @@ static void update_memslots(struct kvm_memslots *slots,
700 if (new->npages != npages) 704 if (new->npages != npages)
701 sort_memslots(slots); 705 sort_memslots(slots);
702 } 706 }
703
704 slots->generation = last_generation + 1;
705} 707}
706 708
707static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) 709static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
708{ 710{
709 u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; 711 u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
710 712
711#ifdef KVM_CAP_READONLY_MEM 713#ifdef __KVM_HAVE_READONLY_MEM
712 valid_flags |= KVM_MEM_READONLY; 714 valid_flags |= KVM_MEM_READONLY;
713#endif 715#endif
714 716
@@ -723,10 +725,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
723{ 725{
724 struct kvm_memslots *old_memslots = kvm->memslots; 726 struct kvm_memslots *old_memslots = kvm->memslots;
725 727
726 update_memslots(slots, new, kvm->memslots->generation); 728 /*
729 * Set the low bit in the generation, which disables SPTE caching
730 * until the end of synchronize_srcu_expedited.
731 */
732 WARN_ON(old_memslots->generation & 1);
733 slots->generation = old_memslots->generation + 1;
734
735 update_memslots(slots, new);
727 rcu_assign_pointer(kvm->memslots, slots); 736 rcu_assign_pointer(kvm->memslots, slots);
728 synchronize_srcu_expedited(&kvm->srcu); 737 synchronize_srcu_expedited(&kvm->srcu);
729 738
739 /*
740 * Increment the new memslot generation a second time. This prevents
741 * vm exits that race with memslot updates from caching a memslot
742 * generation that will (potentially) be valid forever.
743 */
744 slots->generation++;
745
730 kvm_arch_memslots_updated(kvm); 746 kvm_arch_memslots_updated(kvm);
731 747
732 return old_memslots; 748 return old_memslots;
@@ -777,7 +793,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
777 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 793 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
778 npages = mem->memory_size >> PAGE_SHIFT; 794 npages = mem->memory_size >> PAGE_SHIFT;
779 795
780 r = -EINVAL;
781 if (npages > KVM_MEM_MAX_NR_PAGES) 796 if (npages > KVM_MEM_MAX_NR_PAGES)
782 goto out; 797 goto out;
783 798
@@ -791,7 +806,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
791 new.npages = npages; 806 new.npages = npages;
792 new.flags = mem->flags; 807 new.flags = mem->flags;
793 808
794 r = -EINVAL;
795 if (npages) { 809 if (npages) {
796 if (!old.npages) 810 if (!old.npages)
797 change = KVM_MR_CREATE; 811 change = KVM_MR_CREATE;
@@ -847,7 +861,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
847 } 861 }
848 862
849 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { 863 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
850 r = -ENOMEM;
851 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), 864 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
852 GFP_KERNEL); 865 GFP_KERNEL);
853 if (!slots) 866 if (!slots)
@@ -1776,8 +1789,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
1776 bool eligible; 1789 bool eligible;
1777 1790
1778 eligible = !vcpu->spin_loop.in_spin_loop || 1791 eligible = !vcpu->spin_loop.in_spin_loop ||
1779 (vcpu->spin_loop.in_spin_loop && 1792 vcpu->spin_loop.dy_eligible;
1780 vcpu->spin_loop.dy_eligible);
1781 1793
1782 if (vcpu->spin_loop.in_spin_loop) 1794 if (vcpu->spin_loop.in_spin_loop)
1783 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); 1795 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
@@ -2267,6 +2279,29 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
2267 return filp->private_data; 2279 return filp->private_data;
2268} 2280}
2269 2281
2282static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
2283#ifdef CONFIG_KVM_MPIC
2284 [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
2285 [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
2286#endif
2287
2288#ifdef CONFIG_KVM_XICS
2289 [KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
2290#endif
2291};
2292
2293int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
2294{
2295 if (type >= ARRAY_SIZE(kvm_device_ops_table))
2296 return -ENOSPC;
2297
2298 if (kvm_device_ops_table[type] != NULL)
2299 return -EEXIST;
2300
2301 kvm_device_ops_table[type] = ops;
2302 return 0;
2303}
2304
2270static int kvm_ioctl_create_device(struct kvm *kvm, 2305static int kvm_ioctl_create_device(struct kvm *kvm,
2271 struct kvm_create_device *cd) 2306 struct kvm_create_device *cd)
2272{ 2307{
@@ -2275,36 +2310,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2275 bool test = cd->flags & KVM_CREATE_DEVICE_TEST; 2310 bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
2276 int ret; 2311 int ret;
2277 2312
2278 switch (cd->type) { 2313 if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
2279#ifdef CONFIG_KVM_MPIC 2314 return -ENODEV;
2280 case KVM_DEV_TYPE_FSL_MPIC_20: 2315
2281 case KVM_DEV_TYPE_FSL_MPIC_42: 2316 ops = kvm_device_ops_table[cd->type];
2282 ops = &kvm_mpic_ops; 2317 if (ops == NULL)
2283 break;
2284#endif
2285#ifdef CONFIG_KVM_XICS
2286 case KVM_DEV_TYPE_XICS:
2287 ops = &kvm_xics_ops;
2288 break;
2289#endif
2290#ifdef CONFIG_KVM_VFIO
2291 case KVM_DEV_TYPE_VFIO:
2292 ops = &kvm_vfio_ops;
2293 break;
2294#endif
2295#ifdef CONFIG_KVM_ARM_VGIC
2296 case KVM_DEV_TYPE_ARM_VGIC_V2:
2297 ops = &kvm_arm_vgic_v2_ops;
2298 break;
2299#endif
2300#ifdef CONFIG_S390
2301 case KVM_DEV_TYPE_FLIC:
2302 ops = &kvm_flic_ops;
2303 break;
2304#endif
2305 default:
2306 return -ENODEV; 2318 return -ENODEV;
2307 }
2308 2319
2309 if (test) 2320 if (test)
2310 return 0; 2321 return 0;
@@ -2619,7 +2630,6 @@ static long kvm_dev_ioctl(struct file *filp,
2619 2630
2620 switch (ioctl) { 2631 switch (ioctl) {
2621 case KVM_GET_API_VERSION: 2632 case KVM_GET_API_VERSION:
2622 r = -EINVAL;
2623 if (arg) 2633 if (arg)
2624 goto out; 2634 goto out;
2625 r = KVM_API_VERSION; 2635 r = KVM_API_VERSION;
@@ -2631,7 +2641,6 @@ static long kvm_dev_ioctl(struct file *filp,
2631 r = kvm_vm_ioctl_check_extension_generic(NULL, arg); 2641 r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
2632 break; 2642 break;
2633 case KVM_GET_VCPU_MMAP_SIZE: 2643 case KVM_GET_VCPU_MMAP_SIZE:
2634 r = -EINVAL;
2635 if (arg) 2644 if (arg)
2636 goto out; 2645 goto out;
2637 r = PAGE_SIZE; /* struct kvm_run */ 2646 r = PAGE_SIZE; /* struct kvm_run */
@@ -2676,7 +2685,7 @@ static void hardware_enable_nolock(void *junk)
2676 2685
2677 cpumask_set_cpu(cpu, cpus_hardware_enabled); 2686 cpumask_set_cpu(cpu, cpus_hardware_enabled);
2678 2687
2679 r = kvm_arch_hardware_enable(NULL); 2688 r = kvm_arch_hardware_enable();
2680 2689
2681 if (r) { 2690 if (r) {
2682 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 2691 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2701,7 +2710,7 @@ static void hardware_disable_nolock(void *junk)
2701 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) 2710 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
2702 return; 2711 return;
2703 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 2712 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
2704 kvm_arch_hardware_disable(NULL); 2713 kvm_arch_hardware_disable();
2705} 2714}
2706 2715
2707static void hardware_disable(void) 2716static void hardware_disable(void)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ba1a93f935c7..bb11b36ee8a2 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -246,6 +246,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
246 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ 246 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
247} 247}
248 248
249static int kvm_vfio_create(struct kvm_device *dev, u32 type);
250
251static struct kvm_device_ops kvm_vfio_ops = {
252 .name = "kvm-vfio",
253 .create = kvm_vfio_create,
254 .destroy = kvm_vfio_destroy,
255 .set_attr = kvm_vfio_set_attr,
256 .has_attr = kvm_vfio_has_attr,
257};
258
249static int kvm_vfio_create(struct kvm_device *dev, u32 type) 259static int kvm_vfio_create(struct kvm_device *dev, u32 type)
250{ 260{
251 struct kvm_device *tmp; 261 struct kvm_device *tmp;
@@ -268,10 +278,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
268 return 0; 278 return 0;
269} 279}
270 280
271struct kvm_device_ops kvm_vfio_ops = { 281static int __init kvm_vfio_ops_init(void)
272 .name = "kvm-vfio", 282{
273 .create = kvm_vfio_create, 283 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
274 .destroy = kvm_vfio_destroy, 284}
275 .set_attr = kvm_vfio_set_attr, 285module_init(kvm_vfio_ops_init);
276 .has_attr = kvm_vfio_has_attr,
277};