aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt188
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt10
-rw-r--r--Documentation/virtual/kvm/mmu.txt14
-rw-r--r--arch/arm/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm/include/asm/kvm_host.h23
-rw-r--r--arch/arm/include/asm/kvm_mmu.h11
-rw-r--r--arch/arm/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm/kvm/arm.c34
-rw-r--r--arch/arm/kvm/coproc.c2
-rw-r--r--arch/arm/kvm/guest.c2
-rw-r--r--arch/arm/kvm/mmu.c40
-rw-r--r--arch/arm64/include/asm/kvm_arm.h13
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm64/include/asm/kvm_host.h24
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h18
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c2
-rw-r--r--arch/ia64/include/asm/kvm_host.h15
-rw-r--r--arch/ia64/kvm/kvm-ia64.c34
-rw-r--r--arch/mips/include/asm/kvm_host.h16
-rw-r--r--arch/mips/kvm/mips.c40
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h20
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h7
-rw-r--r--arch/powerpc/include/asm/kvm_host.h24
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h13
-rw-r--r--arch/powerpc/include/asm/reg_booke.h2
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h6
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S12
-rw-r--r--arch/powerpc/kernel/cputable.c5
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S4
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S26
-rw-r--r--arch/powerpc/kvm/book3s.c162
-rw-r--r--arch/powerpc/kvm/book3s.h3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c47
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S3
-rw-r--r--arch/powerpc/kvm/book3s_pr.c6
-rw-r--r--arch/powerpc/kvm/booke.c287
-rw-r--r--arch/powerpc/kvm/booke.h40
-rw-r--r--arch/powerpc/kvm/booke_emulate.c163
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S13
-rw-r--r--arch/powerpc/kvm/e500.h20
-rw-r--r--arch/powerpc/kvm/e500_emulate.c20
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c20
-rw-r--r--arch/powerpc/kvm/e500mc.c60
-rw-r--r--arch/powerpc/kvm/emulate.c17
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c134
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype6
-rw-r--r--arch/s390/include/asm/kvm_host.h34
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h72
-rw-r--r--arch/s390/include/asm/tlb.h2
-rw-r--r--arch/s390/include/uapi/asm/kvm.h10
-rw-r--r--arch/s390/kvm/diag.c28
-rw-r--r--arch/s390/kvm/gaccess.c3
-rw-r--r--arch/s390/kvm/interrupt.c152
-rw-r--r--arch/s390/kvm/kvm-s390.c179
-rw-r--r--arch/s390/kvm/kvm-s390.h6
-rw-r--r--arch/s390/kvm/priv.c11
-rw-r--r--arch/s390/mm/fault.c25
-rw-r--r--arch/s390/mm/pgtable.c707
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h32
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kvm/cpuid.c31
-rw-r--r--arch/x86/kvm/cpuid.h10
-rw-r--r--arch/x86/kvm/emulate.c51
-rw-r--r--arch/x86/kvm/lapic.c34
-rw-r--r--arch/x86/kvm/mmu.c139
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/paging_tmpl.h22
-rw-r--r--arch/x86/kvm/pmu.c24
-rw-r--r--arch/x86/kvm/svm.c40
-rw-r--r--arch/x86/kvm/trace.h41
-rw-r--r--arch/x86/kvm/vmx.c377
-rw-r--r--arch/x86/kvm/x86.c148
-rw-r--r--arch/x86/kvm/x86.h22
-rw-r--r--drivers/iommu/amd_iommu_v2.c6
-rw-r--r--include/kvm/arm_vgic.h112
-rw-r--r--include/linux/kvm_host.h31
-rw-r--r--include/linux/kvm_types.h14
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/mmu_notifier.h24
-rw-r--r--include/trace/events/kvm.h36
-rw-r--r--include/uapi/linux/kvm.h28
-rw-r--r--mm/gup.c4
-rw-r--r--mm/mmu_notifier.c5
-rw-r--r--mm/rmap.c6
-rw-r--r--virt/kvm/arm/vgic.c744
-rw-r--r--virt/kvm/async_pf.c4
-rw-r--r--virt/kvm/eventfd.c4
-rw-r--r--virt/kvm/ioapic.c46
-rw-r--r--virt/kvm/ioapic.h2
-rw-r--r--virt/kvm/kvm_main.c192
-rw-r--r--virt/kvm/vfio.c22
-rw-r--r--virt/kvm/vfio.h13
101 files changed, 3339 insertions, 1822 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index beae3fde075e..7610eaa4d491 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1901,6 +1901,8 @@ registers, find a list below:
1901 PPC | KVM_REG_PPC_ARCH_COMPAT | 32 1901 PPC | KVM_REG_PPC_ARCH_COMPAT | 32
1902 PPC | KVM_REG_PPC_DABRX | 32 1902 PPC | KVM_REG_PPC_DABRX | 32
1903 PPC | KVM_REG_PPC_WORT | 64 1903 PPC | KVM_REG_PPC_WORT | 64
1904 PPC | KVM_REG_PPC_SPRG9 | 64
1905 PPC | KVM_REG_PPC_DBSR | 32
1904 PPC | KVM_REG_PPC_TM_GPR0 | 64 1906 PPC | KVM_REG_PPC_TM_GPR0 | 64
1905 ... 1907 ...
1906 PPC | KVM_REG_PPC_TM_GPR31 | 64 1908 PPC | KVM_REG_PPC_TM_GPR31 | 64
@@ -2565,6 +2567,120 @@ associated with the service will be forgotten, and subsequent RTAS
2565calls by the guest for that service will be passed to userspace to be 2567calls by the guest for that service will be passed to userspace to be
2566handled. 2568handled.
2567 2569
25704.87 KVM_SET_GUEST_DEBUG
2571
2572Capability: KVM_CAP_SET_GUEST_DEBUG
2573Architectures: x86, s390, ppc
2574Type: vcpu ioctl
2575Parameters: struct kvm_guest_debug (in)
2576Returns: 0 on success; -1 on error
2577
2578struct kvm_guest_debug {
2579 __u32 control;
2580 __u32 pad;
2581 struct kvm_guest_debug_arch arch;
2582};
2583
2584Set up the processor specific debug registers and configure vcpu for
2585handling guest debug events. There are two parts to the structure, the
2586first a control bitfield indicates the type of debug events to handle
2587when running. Common control bits are:
2588
2589 - KVM_GUESTDBG_ENABLE: guest debugging is enabled
2590 - KVM_GUESTDBG_SINGLESTEP: the next run should single-step
2591
2592The top 16 bits of the control field are architecture specific control
2593flags which can include the following:
2594
2595 - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86]
2596 - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
2597 - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
2598 - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
2599 - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
2600
2601For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints
2602are enabled in memory so we need to ensure breakpoint exceptions are
2603correctly trapped and the KVM run loop exits at the breakpoint and not
2604running off into the normal guest vector. For KVM_GUESTDBG_USE_HW_BP
2605we need to ensure the guest vCPUs architecture specific registers are
2606updated to the correct (supplied) values.
2607
2608The second part of the structure is architecture specific and
2609typically contains a set of debug registers.
2610
2611When debug events exit the main run loop with the reason
2612KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
2613structure containing architecture specific debug information.
2614
26154.88 KVM_GET_EMULATED_CPUID
2616
2617Capability: KVM_CAP_EXT_EMUL_CPUID
2618Architectures: x86
2619Type: system ioctl
2620Parameters: struct kvm_cpuid2 (in/out)
2621Returns: 0 on success, -1 on error
2622
2623struct kvm_cpuid2 {
2624 __u32 nent;
2625 __u32 flags;
2626 struct kvm_cpuid_entry2 entries[0];
2627};
2628
2629The member 'flags' is used for passing flags from userspace.
2630
2631#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
2632#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
2633#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
2634
2635struct kvm_cpuid_entry2 {
2636 __u32 function;
2637 __u32 index;
2638 __u32 flags;
2639 __u32 eax;
2640 __u32 ebx;
2641 __u32 ecx;
2642 __u32 edx;
2643 __u32 padding[3];
2644};
2645
2646This ioctl returns x86 cpuid features which are emulated by
2647kvm.Userspace can use the information returned by this ioctl to query
2648which features are emulated by kvm instead of being present natively.
2649
2650Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
2651structure with the 'nent' field indicating the number of entries in
2652the variable-size array 'entries'. If the number of entries is too low
2653to describe the cpu capabilities, an error (E2BIG) is returned. If the
2654number is too high, the 'nent' field is adjusted and an error (ENOMEM)
2655is returned. If the number is just right, the 'nent' field is adjusted
2656to the number of valid entries in the 'entries' array, which is then
2657filled.
2658
2659The entries returned are the set CPUID bits of the respective features
2660which kvm emulates, as returned by the CPUID instruction, with unknown
2661or unsupported feature bits cleared.
2662
2663Features like x2apic, for example, may not be present in the host cpu
2664but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
2665emulated efficiently and thus not included here.
2666
2667The fields in each entry are defined as follows:
2668
2669 function: the eax value used to obtain the entry
2670 index: the ecx value used to obtain the entry (for entries that are
2671 affected by ecx)
2672 flags: an OR of zero or more of the following:
2673 KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
2674 if the index field is valid
2675 KVM_CPUID_FLAG_STATEFUL_FUNC:
2676 if cpuid for this function returns different values for successive
2677 invocations; there will be several entries with the same function,
2678 all with this flag set
2679 KVM_CPUID_FLAG_STATE_READ_NEXT:
2680 for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
2681 the first entry to be read by a cpu
2682 eax, ebx, ecx, edx: the values returned by the cpuid instruction for
2683 this function/index combination
2568 2684
25695. The kvm_run structure 26855. The kvm_run structure
2570------------------------ 2686------------------------
@@ -2861,78 +2977,12 @@ kvm_valid_regs for specific bits. These bits are architecture specific
2861and usually define the validity of a groups of registers. (e.g. one bit 2977and usually define the validity of a groups of registers. (e.g. one bit
2862 for general purpose registers) 2978 for general purpose registers)
2863 2979
2864}; 2980Please note that the kernel is allowed to use the kvm_run structure as the
2865 2981primary storage for certain register types. Therefore, the kernel may use the
2982values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
2866 2983
28674.81 KVM_GET_EMULATED_CPUID
2868
2869Capability: KVM_CAP_EXT_EMUL_CPUID
2870Architectures: x86
2871Type: system ioctl
2872Parameters: struct kvm_cpuid2 (in/out)
2873Returns: 0 on success, -1 on error
2874
2875struct kvm_cpuid2 {
2876 __u32 nent;
2877 __u32 flags;
2878 struct kvm_cpuid_entry2 entries[0];
2879}; 2984};
2880 2985
2881The member 'flags' is used for passing flags from userspace.
2882
2883#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
2884#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
2885#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
2886
2887struct kvm_cpuid_entry2 {
2888 __u32 function;
2889 __u32 index;
2890 __u32 flags;
2891 __u32 eax;
2892 __u32 ebx;
2893 __u32 ecx;
2894 __u32 edx;
2895 __u32 padding[3];
2896};
2897
2898This ioctl returns x86 cpuid features which are emulated by
2899kvm.Userspace can use the information returned by this ioctl to query
2900which features are emulated by kvm instead of being present natively.
2901
2902Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
2903structure with the 'nent' field indicating the number of entries in
2904the variable-size array 'entries'. If the number of entries is too low
2905to describe the cpu capabilities, an error (E2BIG) is returned. If the
2906number is too high, the 'nent' field is adjusted and an error (ENOMEM)
2907is returned. If the number is just right, the 'nent' field is adjusted
2908to the number of valid entries in the 'entries' array, which is then
2909filled.
2910
2911The entries returned are the set CPUID bits of the respective features
2912which kvm emulates, as returned by the CPUID instruction, with unknown
2913or unsupported feature bits cleared.
2914
2915Features like x2apic, for example, may not be present in the host cpu
2916but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
2917emulated efficiently and thus not included here.
2918
2919The fields in each entry are defined as follows:
2920
2921 function: the eax value used to obtain the entry
2922 index: the ecx value used to obtain the entry (for entries that are
2923 affected by ecx)
2924 flags: an OR of zero or more of the following:
2925 KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
2926 if the index field is valid
2927 KVM_CPUID_FLAG_STATEFUL_FUNC:
2928 if cpuid for this function returns different values for successive
2929 invocations; there will be several entries with the same function,
2930 all with this flag set
2931 KVM_CPUID_FLAG_STATE_READ_NEXT:
2932 for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
2933 the first entry to be read by a cpu
2934 eax, ebx, ecx, edx: the values returned by the cpuid instruction for
2935 this function/index combination
2936 2986
2937 2987
29386. Capabilities that can be enabled on vCPUs 29886. Capabilities that can be enabled on vCPUs
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 7f4e91b1316b..df8b0c7540b6 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -71,3 +71,13 @@ Groups:
71 Errors: 71 Errors:
72 -ENODEV: Getting or setting this register is not yet supported 72 -ENODEV: Getting or setting this register is not yet supported
73 -EBUSY: One or more VCPUs are running 73 -EBUSY: One or more VCPUs are running
74
75 KVM_DEV_ARM_VGIC_GRP_NR_IRQS
76 Attributes:
77 A value describing the number of interrupts (SGI, PPI and SPI) for
78 this GIC instance, ranging from 64 to 1024, in increments of 32.
79
80 Errors:
81 -EINVAL: Value set is out of the expected range
82 -EBUSY: Value has already be set, or GIC has already been initialized
83 with default values.
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index 290894176142..53838d9c6295 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -425,6 +425,20 @@ fault through the slow path.
425Since only 19 bits are used to store generation-number on mmio spte, all 425Since only 19 bits are used to store generation-number on mmio spte, all
426pages are zapped when there is an overflow. 426pages are zapped when there is an overflow.
427 427
428Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
429times, the last one happening when the generation number is retrieved and
430stored into the MMIO spte. Thus, the MMIO spte might be created based on
431out-of-date information, but with an up-to-date generation number.
432
433To avoid this, the generation number is incremented again after synchronize_srcu
434returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a
435memslot update, while some SRCU readers might be using the old copy. We do not
436want to use an MMIO sptes created with an odd generation number, and we can do
437this without losing a bit in the MMIO spte. The low bit of the generation
438is not stored in MMIO spte, and presumed zero when it is extracted out of the
439spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1,
440the next access to the spte will always be a cache miss.
441
428 442
429Further reading 443Further reading
430=============== 444===============
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 69b746955fca..b9db269c6e61 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -149,6 +149,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
149 149
150static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) 150static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
151{ 151{
152 return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
153}
154
155static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
156{
152 return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; 157 return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
153} 158}
154 159
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6dfb404f6c46..53036e21756b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
19#ifndef __ARM_KVM_HOST_H__ 19#ifndef __ARM_KVM_HOST_H__
20#define __ARM_KVM_HOST_H__ 20#define __ARM_KVM_HOST_H__
21 21
22#include <linux/types.h>
23#include <linux/kvm_types.h>
22#include <asm/kvm.h> 24#include <asm/kvm.h>
23#include <asm/kvm_asm.h> 25#include <asm/kvm_asm.h>
24#include <asm/kvm_mmio.h> 26#include <asm/kvm_mmio.h>
@@ -40,9 +42,8 @@
40 42
41#include <kvm/arm_vgic.h> 43#include <kvm/arm_vgic.h>
42 44
43struct kvm_vcpu;
44u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); 45u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
45int kvm_target_cpu(void); 46int __attribute_const__ kvm_target_cpu(void);
46int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 47int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
47void kvm_reset_coprocs(struct kvm_vcpu *vcpu); 48void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
48 49
@@ -149,20 +150,17 @@ struct kvm_vcpu_stat {
149 u32 halt_wakeup; 150 u32 halt_wakeup;
150}; 151};
151 152
152struct kvm_vcpu_init;
153int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 153int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
154 const struct kvm_vcpu_init *init); 154 const struct kvm_vcpu_init *init);
155int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 155int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
156unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 156unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
157int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 157int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
158struct kvm_one_reg;
159int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 158int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
160int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 159int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
161u64 kvm_call_hyp(void *hypfn, ...); 160u64 kvm_call_hyp(void *hypfn, ...);
162void force_vm_exit(const cpumask_t *mask); 161void force_vm_exit(const cpumask_t *mask);
163 162
164#define KVM_ARCH_WANT_MMU_NOTIFIER 163#define KVM_ARCH_WANT_MMU_NOTIFIER
165struct kvm;
166int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 164int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
167int kvm_unmap_hva_range(struct kvm *kvm, 165int kvm_unmap_hva_range(struct kvm *kvm,
168 unsigned long start, unsigned long end); 166 unsigned long start, unsigned long end);
@@ -172,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
172int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 170int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
173 171
174/* We do not have shadow page tables, hence the empty hooks */ 172/* We do not have shadow page tables, hence the empty hooks */
175static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) 173static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
174 unsigned long end)
176{ 175{
177 return 0; 176 return 0;
178} 177}
@@ -182,12 +181,16 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
182 return 0; 181 return 0;
183} 182}
184 183
184static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
185 unsigned long address)
186{
187}
188
185struct kvm_vcpu *kvm_arm_get_running_vcpu(void); 189struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
186struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); 190struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
187 191
188int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); 192int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
189unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); 193unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
190struct kvm_one_reg;
191int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); 194int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
192int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); 195int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
193 196
@@ -233,4 +236,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
233int kvm_perf_init(void); 236int kvm_perf_init(void);
234int kvm_perf_teardown(void); 237int kvm_perf_teardown(void);
235 238
239static inline void kvm_arch_hardware_disable(void) {}
240static inline void kvm_arch_hardware_unsetup(void) {}
241static inline void kvm_arch_sync_events(struct kvm *kvm) {}
242static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
243static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
244
236#endif /* __ARM_KVM_HOST_H__ */ 245#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f5f72f..3f688b458143 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
78 flush_pmd_entry(pte); 78 flush_pmd_entry(pte);
79} 79}
80 80
81static inline bool kvm_is_write_fault(unsigned long hsr)
82{
83 unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
84 if (hsr_ec == HSR_EC_IABT)
85 return false;
86 else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
87 return false;
88 else
89 return true;
90}
91
92static inline void kvm_clean_pgd(pgd_t *pgd) 81static inline void kvm_clean_pgd(pgd_t *pgd)
93{ 82{
94 clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); 83 clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index e6ebdd3471e5..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
25 25
26#define __KVM_HAVE_GUEST_DEBUG 26#define __KVM_HAVE_GUEST_DEBUG
27#define __KVM_HAVE_IRQ_LINE 27#define __KVM_HAVE_IRQ_LINE
28#define __KVM_HAVE_READONLY_MEM
28 29
29#define KVM_REG_SIZE(id) \ 30#define KVM_REG_SIZE(id) \
30 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) 31 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -173,6 +174,7 @@ struct kvm_arch_memory_slot {
173#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) 174#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
174#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 175#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
175#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) 176#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
177#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
176 178
177/* KVM_IRQ_LINE irq field index values */ 179/* KVM_IRQ_LINE irq field index values */
178#define KVM_ARM_IRQ_TYPE_SHIFT 24 180#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cdf8ba2..779605122f32 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -82,12 +82,12 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
82/** 82/**
83 * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. 83 * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
84 */ 84 */
85struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) 85struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
86{ 86{
87 return &kvm_arm_running_vcpu; 87 return &kvm_arm_running_vcpu;
88} 88}
89 89
90int kvm_arch_hardware_enable(void *garbage) 90int kvm_arch_hardware_enable(void)
91{ 91{
92 return 0; 92 return 0;
93} 93}
@@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
97 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; 97 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
98} 98}
99 99
100void kvm_arch_hardware_disable(void *garbage)
101{
102}
103
104int kvm_arch_hardware_setup(void) 100int kvm_arch_hardware_setup(void)
105{ 101{
106 return 0; 102 return 0;
107} 103}
108 104
109void kvm_arch_hardware_unsetup(void)
110{
111}
112
113void kvm_arch_check_processor_compat(void *rtn) 105void kvm_arch_check_processor_compat(void *rtn)
114{ 106{
115 *(int *)rtn = 0; 107 *(int *)rtn = 0;
116} 108}
117 109
118void kvm_arch_sync_events(struct kvm *kvm)
119{
120}
121 110
122/** 111/**
123 * kvm_arch_init_vm - initializes a VM data structure 112 * kvm_arch_init_vm - initializes a VM data structure
@@ -172,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
172 kvm->vcpus[i] = NULL; 161 kvm->vcpus[i] = NULL;
173 } 162 }
174 } 163 }
164
165 kvm_vgic_destroy(kvm);
175} 166}
176 167
177int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 168int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -188,6 +179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
188 case KVM_CAP_ONE_REG: 179 case KVM_CAP_ONE_REG:
189 case KVM_CAP_ARM_PSCI: 180 case KVM_CAP_ARM_PSCI:
190 case KVM_CAP_ARM_PSCI_0_2: 181 case KVM_CAP_ARM_PSCI_0_2:
182 case KVM_CAP_READONLY_MEM:
191 r = 1; 183 r = 1;
192 break; 184 break;
193 case KVM_CAP_COALESCED_MMIO: 185 case KVM_CAP_COALESCED_MMIO:
@@ -253,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
253{ 245{
254 kvm_mmu_free_memory_caches(vcpu); 246 kvm_mmu_free_memory_caches(vcpu);
255 kvm_timer_vcpu_terminate(vcpu); 247 kvm_timer_vcpu_terminate(vcpu);
248 kvm_vgic_vcpu_destroy(vcpu);
256 kmem_cache_free(kvm_vcpu_cache, vcpu); 249 kmem_cache_free(kvm_vcpu_cache, vcpu);
257} 250}
258 251
@@ -268,26 +261,15 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
268 261
269int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 262int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
270{ 263{
271 int ret;
272
273 /* Force users to call KVM_ARM_VCPU_INIT */ 264 /* Force users to call KVM_ARM_VCPU_INIT */
274 vcpu->arch.target = -1; 265 vcpu->arch.target = -1;
275 266
276 /* Set up VGIC */
277 ret = kvm_vgic_vcpu_init(vcpu);
278 if (ret)
279 return ret;
280
281 /* Set up the timer */ 267 /* Set up the timer */
282 kvm_timer_vcpu_init(vcpu); 268 kvm_timer_vcpu_init(vcpu);
283 269
284 return 0; 270 return 0;
285} 271}
286 272
287void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
288{
289}
290
291void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 273void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
292{ 274{
293 vcpu->cpu = cpu; 275 vcpu->cpu = cpu;
@@ -428,9 +410,9 @@ static void update_vttbr(struct kvm *kvm)
428 410
429 /* update vttbr to be used with the new vmid */ 411 /* update vttbr to be used with the new vmid */
430 pgd_phys = virt_to_phys(kvm->arch.pgd); 412 pgd_phys = virt_to_phys(kvm->arch.pgd);
413 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
431 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; 414 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
432 kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; 415 kvm->arch.vttbr = pgd_phys | vmid;
433 kvm->arch.vttbr |= vmid;
434 416
435 spin_unlock(&kvm_vmid_lock); 417 spin_unlock(&kvm_vmid_lock);
436} 418}
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 37a0fe1bb9bb..7928dbdf2102 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val)
791 u32 level, ctype; 791 u32 level, ctype;
792 792
793 if (val >= CSSELR_MAX) 793 if (val >= CSSELR_MAX)
794 return -ENOENT; 794 return false;
795 795
796 /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ 796 /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
797 level = (val >> 1); 797 level = (val >> 1);
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 813e49258690..cc0b78769bd8 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
163 163
164 ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); 164 ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
165 if (ret != 0) 165 if (ret != 0)
166 return ret; 166 return -EFAULT;
167 167
168 return kvm_arm_timer_set_reg(vcpu, reg->id, val); 168 return kvm_arm_timer_set_reg(vcpu, reg->id, val);
169} 169}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16e7994bf347..eea03069161b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -746,22 +746,29 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
746 return false; 746 return false;
747} 747}
748 748
749static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
750{
751 if (kvm_vcpu_trap_is_iabt(vcpu))
752 return false;
753
754 return kvm_vcpu_dabt_iswrite(vcpu);
755}
756
749static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 757static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
750 struct kvm_memory_slot *memslot, 758 struct kvm_memory_slot *memslot, unsigned long hva,
751 unsigned long fault_status) 759 unsigned long fault_status)
752{ 760{
753 int ret; 761 int ret;
754 bool write_fault, writable, hugetlb = false, force_pte = false; 762 bool write_fault, writable, hugetlb = false, force_pte = false;
755 unsigned long mmu_seq; 763 unsigned long mmu_seq;
756 gfn_t gfn = fault_ipa >> PAGE_SHIFT; 764 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
757 unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
758 struct kvm *kvm = vcpu->kvm; 765 struct kvm *kvm = vcpu->kvm;
759 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 766 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
760 struct vm_area_struct *vma; 767 struct vm_area_struct *vma;
761 pfn_t pfn; 768 pfn_t pfn;
762 pgprot_t mem_type = PAGE_S2; 769 pgprot_t mem_type = PAGE_S2;
763 770
764 write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); 771 write_fault = kvm_is_write_fault(vcpu);
765 if (fault_status == FSC_PERM && !write_fault) { 772 if (fault_status == FSC_PERM && !write_fault) {
766 kvm_err("Unexpected L2 read permission error\n"); 773 kvm_err("Unexpected L2 read permission error\n");
767 return -EFAULT; 774 return -EFAULT;
@@ -863,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
863 unsigned long fault_status; 870 unsigned long fault_status;
864 phys_addr_t fault_ipa; 871 phys_addr_t fault_ipa;
865 struct kvm_memory_slot *memslot; 872 struct kvm_memory_slot *memslot;
866 bool is_iabt; 873 unsigned long hva;
874 bool is_iabt, write_fault, writable;
867 gfn_t gfn; 875 gfn_t gfn;
868 int ret, idx; 876 int ret, idx;
869 877
@@ -874,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
874 kvm_vcpu_get_hfar(vcpu), fault_ipa); 882 kvm_vcpu_get_hfar(vcpu), fault_ipa);
875 883
876 /* Check the stage-2 fault is trans. fault or write fault */ 884 /* Check the stage-2 fault is trans. fault or write fault */
877 fault_status = kvm_vcpu_trap_get_fault(vcpu); 885 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
878 if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { 886 if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
879 kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", 887 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
880 kvm_vcpu_trap_get_class(vcpu), fault_status); 888 kvm_vcpu_trap_get_class(vcpu),
889 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
890 (unsigned long)kvm_vcpu_get_hsr(vcpu));
881 return -EFAULT; 891 return -EFAULT;
882 } 892 }
883 893
884 idx = srcu_read_lock(&vcpu->kvm->srcu); 894 idx = srcu_read_lock(&vcpu->kvm->srcu);
885 895
886 gfn = fault_ipa >> PAGE_SHIFT; 896 gfn = fault_ipa >> PAGE_SHIFT;
887 if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { 897 memslot = gfn_to_memslot(vcpu->kvm, gfn);
898 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
899 write_fault = kvm_is_write_fault(vcpu);
900 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
888 if (is_iabt) { 901 if (is_iabt) {
889 /* Prefetch Abort on I/O address */ 902 /* Prefetch Abort on I/O address */
890 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); 903 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -892,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
892 goto out_unlock; 905 goto out_unlock;
893 } 906 }
894 907
895 if (fault_status != FSC_FAULT) {
896 kvm_err("Unsupported fault status on io memory: %#lx\n",
897 fault_status);
898 ret = -EFAULT;
899 goto out_unlock;
900 }
901
902 /* 908 /*
903 * The IPA is reported as [MAX:12], so we need to 909 * The IPA is reported as [MAX:12], so we need to
904 * complement it with the bottom 12 bits from the 910 * complement it with the bottom 12 bits from the
@@ -910,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
910 goto out_unlock; 916 goto out_unlock;
911 } 917 }
912 918
913 memslot = gfn_to_memslot(vcpu->kvm, gfn); 919 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
914
915 ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
916 if (ret == 0) 920 if (ret == 0)
917 ret = 1; 921 ret = 1;
918out_unlock: 922out_unlock:
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520459ed..7fd3e27e3ccc 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -122,6 +122,17 @@
122#define VTCR_EL2_T0SZ_MASK 0x3f 122#define VTCR_EL2_T0SZ_MASK 0x3f
123#define VTCR_EL2_T0SZ_40B 24 123#define VTCR_EL2_T0SZ_40B 24
124 124
125/*
126 * We configure the Stage-2 page tables to always restrict the IPA space to be
127 * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
128 * not known to exist and will break with this configuration.
129 *
130 * Note that when using 4K pages, we concatenate two first level page tables
131 * together.
132 *
133 * The magic numbers used for VTTBR_X in this patch can be found in Tables
134 * D4-23 and D4-25 in ARM DDI 0487A.b.
135 */
125#ifdef CONFIG_ARM64_64K_PAGES 136#ifdef CONFIG_ARM64_64K_PAGES
126/* 137/*
127 * Stage2 translation configuration: 138 * Stage2 translation configuration:
@@ -149,7 +160,7 @@
149#endif 160#endif
150 161
151#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) 162#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
152#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) 163#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
153#define VTTBR_VMID_SHIFT (48LLU) 164#define VTTBR_VMID_SHIFT (48LLU)
154#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) 165#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT)
155 166
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fdc3e21abd8d..5674a55b5518 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -174,6 +174,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
174 174
175static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) 175static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
176{ 176{
177 return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC;
178}
179
180static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
181{
177 return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; 182 return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
178} 183}
179 184
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e10c45a578e3..2012c4ba8d67 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
22#ifndef __ARM64_KVM_HOST_H__ 22#ifndef __ARM64_KVM_HOST_H__
23#define __ARM64_KVM_HOST_H__ 23#define __ARM64_KVM_HOST_H__
24 24
25#include <linux/types.h>
26#include <linux/kvm_types.h>
25#include <asm/kvm.h> 27#include <asm/kvm.h>
26#include <asm/kvm_asm.h> 28#include <asm/kvm_asm.h>
27#include <asm/kvm_mmio.h> 29#include <asm/kvm_mmio.h>
@@ -41,8 +43,7 @@
41 43
42#define KVM_VCPU_MAX_FEATURES 3 44#define KVM_VCPU_MAX_FEATURES 3
43 45
44struct kvm_vcpu; 46int __attribute_const__ kvm_target_cpu(void);
45int kvm_target_cpu(void);
46int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 47int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
47int kvm_arch_dev_ioctl_check_extension(long ext); 48int kvm_arch_dev_ioctl_check_extension(long ext);
48 49
@@ -164,25 +165,23 @@ struct kvm_vcpu_stat {
164 u32 halt_wakeup; 165 u32 halt_wakeup;
165}; 166};
166 167
167struct kvm_vcpu_init;
168int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 168int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
169 const struct kvm_vcpu_init *init); 169 const struct kvm_vcpu_init *init);
170int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 170int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
171unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 171unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
172int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 172int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
173struct kvm_one_reg;
174int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 173int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
175int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 174int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
176 175
177#define KVM_ARCH_WANT_MMU_NOTIFIER 176#define KVM_ARCH_WANT_MMU_NOTIFIER
178struct kvm;
179int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 177int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
180int kvm_unmap_hva_range(struct kvm *kvm, 178int kvm_unmap_hva_range(struct kvm *kvm,
181 unsigned long start, unsigned long end); 179 unsigned long start, unsigned long end);
182void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 180void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
183 181
184/* We do not have shadow page tables, hence the empty hooks */ 182/* We do not have shadow page tables, hence the empty hooks */
185static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) 183static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
184 unsigned long end)
186{ 185{
187 return 0; 186 return 0;
188} 187}
@@ -192,8 +191,13 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
192 return 0; 191 return 0;
193} 192}
194 193
194static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
195 unsigned long address)
196{
197}
198
195struct kvm_vcpu *kvm_arm_get_running_vcpu(void); 199struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
196struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); 200struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
197 201
198u64 kvm_call_hyp(void *hypfn, ...); 202u64 kvm_call_hyp(void *hypfn, ...);
199 203
@@ -244,4 +248,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
244 } 248 }
245} 249}
246 250
251static inline void kvm_arch_hardware_disable(void) {}
252static inline void kvm_arch_hardware_unsetup(void) {}
253static inline void kvm_arch_sync_events(struct kvm *kvm) {}
254static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
255static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
256
247#endif /* __ARM64_KVM_HOST_H__ */ 257#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 8e138c7c53ac..a030d163840b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -59,10 +59,9 @@
59#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) 59#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
60 60
61/* 61/*
62 * Align KVM with the kernel's view of physical memory. Should be 62 * We currently only support a 40bit IPA.
63 * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
64 */ 63 */
65#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT 64#define KVM_PHYS_SHIFT (40)
66#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) 65#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
67#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) 66#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
68 67
@@ -93,19 +92,6 @@ void kvm_clear_hyp_idmap(void);
93#define kvm_set_pte(ptep, pte) set_pte(ptep, pte) 92#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
94#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) 93#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
95 94
96static inline bool kvm_is_write_fault(unsigned long esr)
97{
98 unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
99
100 if (esr_ec == ESR_EL2_EC_IABT)
101 return false;
102
103 if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
104 return false;
105
106 return true;
107}
108
109static inline void kvm_clean_pgd(pgd_t *pgd) {} 95static inline void kvm_clean_pgd(pgd_t *pgd) {}
110static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} 96static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
111static inline void kvm_clean_pte(pte_t *pte) {} 97static inline void kvm_clean_pte(pte_t *pte) {}
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index e633ff8cdec8..8e38878c87c6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -37,6 +37,7 @@
37 37
38#define __KVM_HAVE_GUEST_DEBUG 38#define __KVM_HAVE_GUEST_DEBUG
39#define __KVM_HAVE_IRQ_LINE 39#define __KVM_HAVE_IRQ_LINE
40#define __KVM_HAVE_READONLY_MEM
40 41
41#define KVM_REG_SIZE(id) \ 42#define KVM_REG_SIZE(id) \
42 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) 43 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -159,6 +160,7 @@ struct kvm_arch_memory_slot {
159#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) 160#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
160#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 161#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
161#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) 162#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
163#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
162 164
163/* KVM_IRQ_LINE irq field index values */ 165/* KVM_IRQ_LINE irq field index values */
164#define KVM_ARM_IRQ_TYPE_SHIFT 24 166#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 8d1ec2887a26..76794692c20b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
174 174
175 ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); 175 ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
176 if (ret != 0) 176 if (ret != 0)
177 return ret; 177 return -EFAULT;
178 178
179 return kvm_arm_timer_set_reg(vcpu, reg->id, val); 179 return kvm_arm_timer_set_reg(vcpu, reg->id, val);
180} 180}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5805e7c4a4dd..4cc3b719208e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val)
1218 u32 level, ctype; 1218 u32 level, ctype;
1219 1219
1220 if (val >= CSSELR_MAX) 1220 if (val >= CSSELR_MAX)
1221 return -ENOENT; 1221 return false;
1222 1222
1223 /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ 1223 /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
1224 level = (val >> 1); 1224 level = (val >> 1);
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index db95f570705f..4729752b7256 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -234,9 +234,6 @@ struct kvm_vm_data {
234#define KVM_REQ_PTC_G 32 234#define KVM_REQ_PTC_G 32
235#define KVM_REQ_RESUME 33 235#define KVM_REQ_RESUME 33
236 236
237struct kvm;
238struct kvm_vcpu;
239
240struct kvm_mmio_req { 237struct kvm_mmio_req {
241 uint64_t addr; /* physical address */ 238 uint64_t addr; /* physical address */
242 uint64_t size; /* size in bytes */ 239 uint64_t size; /* size in bytes */
@@ -595,6 +592,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu);
595struct kvm *kvm_arch_alloc_vm(void); 592struct kvm *kvm_arch_alloc_vm(void);
596void kvm_arch_free_vm(struct kvm *kvm); 593void kvm_arch_free_vm(struct kvm *kvm);
597 594
595static inline void kvm_arch_sync_events(struct kvm *kvm) {}
596static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
597static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
598static inline void kvm_arch_free_memslot(struct kvm *kvm,
599 struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
600static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
601static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
602 struct kvm_userspace_memory_region *mem,
603 const struct kvm_memory_slot *old,
604 enum kvm_mr_change change) {}
605static inline void kvm_arch_hardware_unsetup(void) {}
606
598#endif /* __ASSEMBLY__*/ 607#endif /* __ASSEMBLY__*/
599 608
600#endif 609#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0729ba6acddf..ec6b9acb6bea 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
125 125
126static DEFINE_SPINLOCK(vp_lock); 126static DEFINE_SPINLOCK(vp_lock);
127 127
128int kvm_arch_hardware_enable(void *garbage) 128int kvm_arch_hardware_enable(void)
129{ 129{
130 long status; 130 long status;
131 long tmp_base; 131 long tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
160 return 0; 160 return 0;
161} 161}
162 162
163void kvm_arch_hardware_disable(void *garbage) 163void kvm_arch_hardware_disable(void)
164{ 164{
165 165
166 long status; 166 long status;
@@ -1364,10 +1364,6 @@ static void kvm_release_vm_pages(struct kvm *kvm)
1364 } 1364 }
1365} 1365}
1366 1366
1367void kvm_arch_sync_events(struct kvm *kvm)
1368{
1369}
1370
1371void kvm_arch_destroy_vm(struct kvm *kvm) 1367void kvm_arch_destroy_vm(struct kvm *kvm)
1372{ 1368{
1373 kvm_iommu_unmap_guest(kvm); 1369 kvm_iommu_unmap_guest(kvm);
@@ -1376,10 +1372,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
1376 kvm_release_vm_pages(kvm); 1372 kvm_release_vm_pages(kvm);
1377} 1373}
1378 1374
1379void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1380{
1381}
1382
1383void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1375void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1384{ 1376{
1385 if (cpu != vcpu->cpu) { 1377 if (cpu != vcpu->cpu) {
@@ -1468,7 +1460,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1468 kfree(vcpu->arch.apic); 1460 kfree(vcpu->arch.apic);
1469} 1461}
1470 1462
1471
1472long kvm_arch_vcpu_ioctl(struct file *filp, 1463long kvm_arch_vcpu_ioctl(struct file *filp,
1473 unsigned int ioctl, unsigned long arg) 1464 unsigned int ioctl, unsigned long arg)
1474{ 1465{
@@ -1551,21 +1542,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1551 return VM_FAULT_SIGBUS; 1542 return VM_FAULT_SIGBUS;
1552} 1543}
1553 1544
1554void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1555 struct kvm_memory_slot *dont)
1556{
1557}
1558
1559int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1545int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1560 unsigned long npages) 1546 unsigned long npages)
1561{ 1547{
1562 return 0; 1548 return 0;
1563} 1549}
1564 1550
1565void kvm_arch_memslots_updated(struct kvm *kvm)
1566{
1567}
1568
1569int kvm_arch_prepare_memory_region(struct kvm *kvm, 1551int kvm_arch_prepare_memory_region(struct kvm *kvm,
1570 struct kvm_memory_slot *memslot, 1552 struct kvm_memory_slot *memslot,
1571 struct kvm_userspace_memory_region *mem, 1553 struct kvm_userspace_memory_region *mem,
@@ -1597,14 +1579,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1597 return 0; 1579 return 0;
1598} 1580}
1599 1581
1600void kvm_arch_commit_memory_region(struct kvm *kvm,
1601 struct kvm_userspace_memory_region *mem,
1602 const struct kvm_memory_slot *old,
1603 enum kvm_mr_change change)
1604{
1605 return;
1606}
1607
1608void kvm_arch_flush_shadow_all(struct kvm *kvm) 1582void kvm_arch_flush_shadow_all(struct kvm *kvm)
1609{ 1583{
1610 kvm_flush_remote_tlbs(kvm); 1584 kvm_flush_remote_tlbs(kvm);
@@ -1853,10 +1827,6 @@ int kvm_arch_hardware_setup(void)
1853 return 0; 1827 return 0;
1854} 1828}
1855 1829
1856void kvm_arch_hardware_unsetup(void)
1857{
1858}
1859
1860int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 1830int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
1861{ 1831{
1862 return __apic_accept_irq(vcpu, irq->vector); 1832 return __apic_accept_irq(vcpu, irq->vector);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 7a3fc67bd7f9..f2c249796ea8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -96,11 +96,6 @@
96#define CAUSEB_DC 27 96#define CAUSEB_DC 27
97#define CAUSEF_DC (_ULCAST_(1) << 27) 97#define CAUSEF_DC (_ULCAST_(1) << 27)
98 98
99struct kvm;
100struct kvm_run;
101struct kvm_vcpu;
102struct kvm_interrupt;
103
104extern atomic_t kvm_mips_instance; 99extern atomic_t kvm_mips_instance;
105extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); 100extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
106extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); 101extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
@@ -767,5 +762,16 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
767extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); 762extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
768extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); 763extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
769 764
765static inline void kvm_arch_hardware_disable(void) {}
766static inline void kvm_arch_hardware_unsetup(void) {}
767static inline void kvm_arch_sync_events(struct kvm *kvm) {}
768static inline void kvm_arch_free_memslot(struct kvm *kvm,
769 struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
770static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
771static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
772static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
773 struct kvm_memory_slot *slot) {}
774static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
775static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
770 776
771#endif /* __MIPS_KVM_HOST_H__ */ 777#endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index cd7114147ae7..e3b21e51ff7e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -77,24 +77,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
77 return 1; 77 return 1;
78} 78}
79 79
80int kvm_arch_hardware_enable(void *garbage) 80int kvm_arch_hardware_enable(void)
81{ 81{
82 return 0; 82 return 0;
83} 83}
84 84
85void kvm_arch_hardware_disable(void *garbage)
86{
87}
88
89int kvm_arch_hardware_setup(void) 85int kvm_arch_hardware_setup(void)
90{ 86{
91 return 0; 87 return 0;
92} 88}
93 89
94void kvm_arch_hardware_unsetup(void)
95{
96}
97
98void kvm_arch_check_processor_compat(void *rtn) 90void kvm_arch_check_processor_compat(void *rtn)
99{ 91{
100 *(int *)rtn = 0; 92 *(int *)rtn = 0;
@@ -163,10 +155,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
163 mutex_unlock(&kvm->lock); 155 mutex_unlock(&kvm->lock);
164} 156}
165 157
166void kvm_arch_sync_events(struct kvm *kvm)
167{
168}
169
170static void kvm_mips_uninit_tlbs(void *arg) 158static void kvm_mips_uninit_tlbs(void *arg)
171{ 159{
172 /* Restore wired count */ 160 /* Restore wired count */
@@ -194,21 +182,12 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl,
194 return -ENOIOCTLCMD; 182 return -ENOIOCTLCMD;
195} 183}
196 184
197void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
198 struct kvm_memory_slot *dont)
199{
200}
201
202int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 185int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
203 unsigned long npages) 186 unsigned long npages)
204{ 187{
205 return 0; 188 return 0;
206} 189}
207 190
208void kvm_arch_memslots_updated(struct kvm *kvm)
209{
210}
211
212int kvm_arch_prepare_memory_region(struct kvm *kvm, 191int kvm_arch_prepare_memory_region(struct kvm *kvm,
213 struct kvm_memory_slot *memslot, 192 struct kvm_memory_slot *memslot,
214 struct kvm_userspace_memory_region *mem, 193 struct kvm_userspace_memory_region *mem,
@@ -254,19 +233,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
254 } 233 }
255} 234}
256 235
257void kvm_arch_flush_shadow_all(struct kvm *kvm)
258{
259}
260
261void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
262 struct kvm_memory_slot *slot)
263{
264}
265
266void kvm_arch_flush_shadow(struct kvm *kvm)
267{
268}
269
270struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 236struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
271{ 237{
272 int err, size, offset; 238 int err, size, offset;
@@ -998,10 +964,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
998 return 0; 964 return 0;
999} 965}
1000 966
1001void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1002{
1003}
1004
1005int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 967int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1006 struct kvm_translation *tr) 968 struct kvm_translation *tr)
1007{ 969{
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 465dfcb82c92..5bca220bbb60 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -53,17 +53,17 @@
53#define BOOKE_INTERRUPT_DEBUG 15 53#define BOOKE_INTERRUPT_DEBUG 15
54 54
55/* E500 */ 55/* E500 */
56#define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32 56#ifdef CONFIG_SPE_POSSIBLE
57#define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33 57#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
58/* 58#define BOOKE_INTERRUPT_SPE_FP_DATA 33
59 * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines
60 */
61#define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
62#define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
63#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
64#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \
65 BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
66#define BOOKE_INTERRUPT_SPE_FP_ROUND 34 59#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
60#endif
61
62#ifdef CONFIG_PPC_E500MC
63#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32
64#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33
65#endif
66
67#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 67#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
68#define BOOKE_INTERRUPT_DOORBELL 36 68#define BOOKE_INTERRUPT_DOORBELL 36
69#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37 69#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index f7aa5cc395c4..3286f0d6a86c 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -23,15 +23,16 @@
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25 25
26/* LPIDs we support with this build -- runtime limit may be lower */ 26/*
27 * Number of available lpids. Only the low-order 6 bits of LPID rgister are
28 * implemented on e500mc+ cores.
29 */
27#define KVMPPC_NR_LPIDS 64 30#define KVMPPC_NR_LPIDS 64
28 31
29#define KVMPPC_INST_EHPRIV 0x7c00021c 32#define KVMPPC_INST_EHPRIV 0x7c00021c
30#define EHPRIV_OC_SHIFT 11 33#define EHPRIV_OC_SHIFT 11
31/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */ 34/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */
32#define EHPRIV_OC_DEBUG 1 35#define EHPRIV_OC_DEBUG 1
33#define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \
34 (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT))
35 36
36static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 37static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
37{ 38{
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 98d9dd50d063..047855619cc4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,14 +53,18 @@
53 53
54#define KVM_ARCH_WANT_MMU_NOTIFIER 54#define KVM_ARCH_WANT_MMU_NOTIFIER
55 55
56struct kvm;
57extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 56extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
58extern int kvm_unmap_hva_range(struct kvm *kvm, 57extern int kvm_unmap_hva_range(struct kvm *kvm,
59 unsigned long start, unsigned long end); 58 unsigned long start, unsigned long end);
60extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); 59extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
61extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 60extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
62extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 61extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
63 62
63static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
64 unsigned long address)
65{
66}
67
64#define HPTEG_CACHE_NUM (1 << 15) 68#define HPTEG_CACHE_NUM (1 << 15)
65#define HPTEG_HASH_BITS_PTE 13 69#define HPTEG_HASH_BITS_PTE 13
66#define HPTEG_HASH_BITS_PTE_LONG 12 70#define HPTEG_HASH_BITS_PTE_LONG 12
@@ -76,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
76/* Physical Address Mask - allowed range of real mode RAM access */ 80/* Physical Address Mask - allowed range of real mode RAM access */
77#define KVM_PAM 0x0fffffffffffffffULL 81#define KVM_PAM 0x0fffffffffffffffULL
78 82
79struct kvm;
80struct kvm_run;
81struct kvm_vcpu;
82
83struct lppaca; 83struct lppaca;
84struct slb_shadow; 84struct slb_shadow;
85struct dtl_entry; 85struct dtl_entry;
@@ -144,6 +144,7 @@ enum kvm_exit_types {
144 EMULATED_TLBWE_EXITS, 144 EMULATED_TLBWE_EXITS,
145 EMULATED_RFI_EXITS, 145 EMULATED_RFI_EXITS,
146 EMULATED_RFCI_EXITS, 146 EMULATED_RFCI_EXITS,
147 EMULATED_RFDI_EXITS,
147 DEC_EXITS, 148 DEC_EXITS,
148 EXT_INTR_EXITS, 149 EXT_INTR_EXITS,
149 HALT_WAKEUP, 150 HALT_WAKEUP,
@@ -589,8 +590,6 @@ struct kvm_vcpu_arch {
589 u32 crit_save; 590 u32 crit_save;
590 /* guest debug registers*/ 591 /* guest debug registers*/
591 struct debug_reg dbg_reg; 592 struct debug_reg dbg_reg;
592 /* hardware visible debug registers when in guest state */
593 struct debug_reg shadow_dbg_reg;
594#endif 593#endif
595 gpa_t paddr_accessed; 594 gpa_t paddr_accessed;
596 gva_t vaddr_accessed; 595 gva_t vaddr_accessed;
@@ -612,7 +611,6 @@ struct kvm_vcpu_arch {
612 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ 611 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
613 612
614 struct hrtimer dec_timer; 613 struct hrtimer dec_timer;
615 struct tasklet_struct tasklet;
616 u64 dec_jiffies; 614 u64 dec_jiffies;
617 u64 dec_expires; 615 u64 dec_expires;
618 unsigned long pending_exceptions; 616 unsigned long pending_exceptions;
@@ -687,4 +685,12 @@ struct kvm_vcpu_arch {
687#define __KVM_HAVE_ARCH_WQP 685#define __KVM_HAVE_ARCH_WQP
688#define __KVM_HAVE_CREATE_DEVICE 686#define __KVM_HAVE_CREATE_DEVICE
689 687
688static inline void kvm_arch_hardware_disable(void) {}
689static inline void kvm_arch_hardware_unsetup(void) {}
690static inline void kvm_arch_sync_events(struct kvm *kvm) {}
691static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
692static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
693static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
694static inline void kvm_arch_exit(void) {}
695
690#endif /* __POWERPC_KVM_HOST_H__ */ 696#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fb86a2299d8a..a6dcdb6d13c1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -38,6 +38,12 @@
38#include <asm/paca.h> 38#include <asm/paca.h>
39#endif 39#endif
40 40
41/*
42 * KVMPPC_INST_SW_BREAKPOINT is debug Instruction
43 * for supporting software breakpoint.
44 */
45#define KVMPPC_INST_SW_BREAKPOINT 0x00dddd00
46
41enum emulation_result { 47enum emulation_result {
42 EMULATE_DONE, /* no further processing */ 48 EMULATE_DONE, /* no further processing */
43 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ 49 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@@ -89,7 +95,7 @@ extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
89extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); 95extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
90extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); 96extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
91extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); 97extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
92extern void kvmppc_decrementer_func(unsigned long data); 98extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
93extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); 99extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
94extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); 100extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
95extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); 101extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
@@ -206,6 +212,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
206extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); 212extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
207extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); 213extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
208 214
215void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu);
216void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu);
217
209union kvmppc_one_reg { 218union kvmppc_one_reg {
210 u32 wval; 219 u32 wval;
211 u64 dval; 220 u64 dval;
@@ -243,7 +252,7 @@ struct kvmppc_ops {
243 int (*unmap_hva)(struct kvm *kvm, unsigned long hva); 252 int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
244 int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, 253 int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
245 unsigned long end); 254 unsigned long end);
246 int (*age_hva)(struct kvm *kvm, unsigned long hva); 255 int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
247 int (*test_age_hva)(struct kvm *kvm, unsigned long hva); 256 int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
248 void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); 257 void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
249 void (*mmu_destroy)(struct kvm_vcpu *vcpu); 258 void (*mmu_destroy)(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 1d653308a33c..16547efa2d5a 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -319,6 +319,8 @@
319 * DBSR bits which have conflicting definitions on true Book E versus IBM 40x. 319 * DBSR bits which have conflicting definitions on true Book E versus IBM 40x.
320 */ 320 */
321#ifdef CONFIG_BOOKE 321#ifdef CONFIG_BOOKE
322#define DBSR_IDE 0x80000000 /* Imprecise Debug Event */
323#define DBSR_MRR 0x30000000 /* Most Recent Reset */
322#define DBSR_IC 0x08000000 /* Instruction Completion */ 324#define DBSR_IC 0x08000000 /* Instruction Completion */
323#define DBSR_BT 0x04000000 /* Branch Taken */ 325#define DBSR_BT 0x04000000 /* Branch Taken */
324#define DBSR_IRPT 0x02000000 /* Exception Debug Event */ 326#define DBSR_IRPT 0x02000000 /* Exception Debug Event */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index e0e49dbb145d..ab4d4732c492 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -476,6 +476,11 @@ struct kvm_get_htab_header {
476 476
477/* FP and vector status/control registers */ 477/* FP and vector status/control registers */
478#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) 478#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
479/*
480 * VSCR register is documented as a 32-bit register in the ISA, but it can
481 * only be accesses via a vector register. Expose VSCR as a 32-bit register
482 * even though the kernel represents it as a 128-bit vector.
483 */
479#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) 484#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
480 485
481/* Virtual processor areas */ 486/* Virtual processor areas */
@@ -557,6 +562,7 @@ struct kvm_get_htab_header {
557#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) 562#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
558#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9) 563#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
559#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) 564#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
565#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
560 566
561/* Transactional Memory checkpointed state: 567/* Transactional Memory checkpointed state:
562 * This is all GPRs, all VSX regs and a subset of SPRs 568 * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 4f1393d20079..dddba3e94260 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -91,6 +91,7 @@ _GLOBAL(setup_altivec_idle)
91 91
92 blr 92 blr
93 93
94#ifdef CONFIG_PPC_E500MC
94_GLOBAL(__setup_cpu_e6500) 95_GLOBAL(__setup_cpu_e6500)
95 mflr r6 96 mflr r6
96#ifdef CONFIG_PPC64 97#ifdef CONFIG_PPC64
@@ -107,14 +108,20 @@ _GLOBAL(__setup_cpu_e6500)
107 bl __setup_cpu_e5500 108 bl __setup_cpu_e5500
108 mtlr r6 109 mtlr r6
109 blr 110 blr
111#endif /* CONFIG_PPC_E500MC */
110 112
111#ifdef CONFIG_PPC32 113#ifdef CONFIG_PPC32
114#ifdef CONFIG_E200
112_GLOBAL(__setup_cpu_e200) 115_GLOBAL(__setup_cpu_e200)
113 /* enable dedicated debug exception handling resources (Debug APU) */ 116 /* enable dedicated debug exception handling resources (Debug APU) */
114 mfspr r3,SPRN_HID0 117 mfspr r3,SPRN_HID0
115 ori r3,r3,HID0_DAPUEN@l 118 ori r3,r3,HID0_DAPUEN@l
116 mtspr SPRN_HID0,r3 119 mtspr SPRN_HID0,r3
117 b __setup_e200_ivors 120 b __setup_e200_ivors
121#endif /* CONFIG_E200 */
122
123#ifdef CONFIG_E500
124#ifndef CONFIG_PPC_E500MC
118_GLOBAL(__setup_cpu_e500v1) 125_GLOBAL(__setup_cpu_e500v1)
119_GLOBAL(__setup_cpu_e500v2) 126_GLOBAL(__setup_cpu_e500v2)
120 mflr r4 127 mflr r4
@@ -129,6 +136,7 @@ _GLOBAL(__setup_cpu_e500v2)
129#endif 136#endif
130 mtlr r4 137 mtlr r4
131 blr 138 blr
139#else /* CONFIG_PPC_E500MC */
132_GLOBAL(__setup_cpu_e500mc) 140_GLOBAL(__setup_cpu_e500mc)
133_GLOBAL(__setup_cpu_e5500) 141_GLOBAL(__setup_cpu_e5500)
134 mflr r5 142 mflr r5
@@ -159,7 +167,9 @@ _GLOBAL(__setup_cpu_e5500)
1592: 1672:
160 mtlr r5 168 mtlr r5
161 blr 169 blr
162#endif 170#endif /* CONFIG_PPC_E500MC */
171#endif /* CONFIG_E500 */
172#endif /* CONFIG_PPC32 */
163 173
164#ifdef CONFIG_PPC_BOOK3E_64 174#ifdef CONFIG_PPC_BOOK3E_64
165_GLOBAL(__restore_cpu_e6500) 175_GLOBAL(__restore_cpu_e6500)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 9b6dcaaec1a3..808405906336 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1961,6 +1961,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
1961#endif /* CONFIG_PPC32 */ 1961#endif /* CONFIG_PPC32 */
1962#ifdef CONFIG_E500 1962#ifdef CONFIG_E500
1963#ifdef CONFIG_PPC32 1963#ifdef CONFIG_PPC32
1964#ifndef CONFIG_PPC_E500MC
1964 { /* e500 */ 1965 { /* e500 */
1965 .pvr_mask = 0xffff0000, 1966 .pvr_mask = 0xffff0000,
1966 .pvr_value = 0x80200000, 1967 .pvr_value = 0x80200000,
@@ -2000,6 +2001,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
2000 .machine_check = machine_check_e500, 2001 .machine_check = machine_check_e500,
2001 .platform = "ppc8548", 2002 .platform = "ppc8548",
2002 }, 2003 },
2004#else
2003 { /* e500mc */ 2005 { /* e500mc */
2004 .pvr_mask = 0xffff0000, 2006 .pvr_mask = 0xffff0000,
2005 .pvr_value = 0x80230000, 2007 .pvr_value = 0x80230000,
@@ -2018,7 +2020,9 @@ static struct cpu_spec __initdata cpu_specs[] = {
2018 .machine_check = machine_check_e500mc, 2020 .machine_check = machine_check_e500mc,
2019 .platform = "ppce500mc", 2021 .platform = "ppce500mc",
2020 }, 2022 },
2023#endif /* CONFIG_PPC_E500MC */
2021#endif /* CONFIG_PPC32 */ 2024#endif /* CONFIG_PPC32 */
2025#ifdef CONFIG_PPC_E500MC
2022 { /* e5500 */ 2026 { /* e5500 */
2023 .pvr_mask = 0xffff0000, 2027 .pvr_mask = 0xffff0000,
2024 .pvr_value = 0x80240000, 2028 .pvr_value = 0x80240000,
@@ -2062,6 +2066,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
2062 .machine_check = machine_check_e500mc, 2066 .machine_check = machine_check_e500mc,
2063 .platform = "ppce6500", 2067 .platform = "ppce6500",
2064 }, 2068 },
2069#endif /* CONFIG_PPC_E500MC */
2065#ifdef CONFIG_PPC32 2070#ifdef CONFIG_PPC32
2066 { /* default match */ 2071 { /* default match */
2067 .pvr_mask = 0x00000000, 2072 .pvr_mask = 0x00000000,
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index bb9cac6c8051..3e68d1c69718 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -635,7 +635,7 @@ interrupt_end_book3e:
635 635
636/* Altivec Unavailable Interrupt */ 636/* Altivec Unavailable Interrupt */
637 START_EXCEPTION(altivec_unavailable); 637 START_EXCEPTION(altivec_unavailable);
638 NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL, 638 NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL,
639 PROLOG_ADDITION_NONE) 639 PROLOG_ADDITION_NONE)
640 /* we can probably do a shorter exception entry for that one... */ 640 /* we can probably do a shorter exception entry for that one... */
641 EXCEPTION_COMMON(0x200) 641 EXCEPTION_COMMON(0x200)
@@ -658,7 +658,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
658/* AltiVec Assist */ 658/* AltiVec Assist */
659 START_EXCEPTION(altivec_assist); 659 START_EXCEPTION(altivec_assist);
660 NORMAL_EXCEPTION_PROLOG(0x220, 660 NORMAL_EXCEPTION_PROLOG(0x220,
661 BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST, 661 BOOKE_INTERRUPT_ALTIVEC_ASSIST,
662 PROLOG_ADDITION_NONE) 662 PROLOG_ADDITION_NONE)
663 EXCEPTION_COMMON(0x220) 663 EXCEPTION_COMMON(0x220)
664 INTS_DISABLE 664 INTS_DISABLE
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index b497188a94a1..fffd1f96bb1d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -613,34 +613,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
613 mfspr r10, SPRN_SPRG_RSCRATCH0 613 mfspr r10, SPRN_SPRG_RSCRATCH0
614 b InstructionStorage 614 b InstructionStorage
615 615
616/* Define SPE handlers for e200 and e500v2 */
616#ifdef CONFIG_SPE 617#ifdef CONFIG_SPE
617 /* SPE Unavailable */ 618 /* SPE Unavailable */
618 START_EXCEPTION(SPEUnavailable) 619 START_EXCEPTION(SPEUnavailable)
619 NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL) 620 NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
620 beq 1f 621 beq 1f
621 bl load_up_spe 622 bl load_up_spe
622 b fast_exception_return 623 b fast_exception_return
6231: addi r3,r1,STACK_FRAME_OVERHEAD 6241: addi r3,r1,STACK_FRAME_OVERHEAD
624 EXC_XFER_EE_LITE(0x2010, KernelSPE) 625 EXC_XFER_EE_LITE(0x2010, KernelSPE)
625#else 626#elif defined(CONFIG_SPE_POSSIBLE)
626 EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \ 627 EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
627 unknown_exception, EXC_XFER_EE) 628 unknown_exception, EXC_XFER_EE)
628#endif /* CONFIG_SPE */ 629#endif /* CONFIG_SPE_POSSIBLE */
629 630
630 /* SPE Floating Point Data */ 631 /* SPE Floating Point Data */
631#ifdef CONFIG_SPE 632#ifdef CONFIG_SPE
632 EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, 633 EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
633 SPEFloatingPointException, EXC_XFER_EE) 634 SPEFloatingPointException, EXC_XFER_EE)
634 635
635 /* SPE Floating Point Round */ 636 /* SPE Floating Point Round */
636 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ 637 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
637 SPEFloatingPointRoundException, EXC_XFER_EE) 638 SPEFloatingPointRoundException, EXC_XFER_EE)
638#else 639#elif defined(CONFIG_SPE_POSSIBLE)
639 EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, 640 EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
640 unknown_exception, EXC_XFER_EE) 641 unknown_exception, EXC_XFER_EE)
641 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ 642 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
642 unknown_exception, EXC_XFER_EE) 643 unknown_exception, EXC_XFER_EE)
643#endif /* CONFIG_SPE */ 644#endif /* CONFIG_SPE_POSSIBLE */
645
644 646
645 /* Performance Monitor */ 647 /* Performance Monitor */
646 EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ 648 EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
@@ -947,6 +949,7 @@ get_phys_addr:
947 * Global functions 949 * Global functions
948 */ 950 */
949 951
952#ifdef CONFIG_E200
950/* Adjust or setup IVORs for e200 */ 953/* Adjust or setup IVORs for e200 */
951_GLOBAL(__setup_e200_ivors) 954_GLOBAL(__setup_e200_ivors)
952 li r3,DebugDebug@l 955 li r3,DebugDebug@l
@@ -959,7 +962,10 @@ _GLOBAL(__setup_e200_ivors)
959 mtspr SPRN_IVOR34,r3 962 mtspr SPRN_IVOR34,r3
960 sync 963 sync
961 blr 964 blr
965#endif
962 966
967#ifdef CONFIG_E500
968#ifndef CONFIG_PPC_E500MC
963/* Adjust or setup IVORs for e500v1/v2 */ 969/* Adjust or setup IVORs for e500v1/v2 */
964_GLOBAL(__setup_e500_ivors) 970_GLOBAL(__setup_e500_ivors)
965 li r3,DebugCrit@l 971 li r3,DebugCrit@l
@@ -974,7 +980,7 @@ _GLOBAL(__setup_e500_ivors)
974 mtspr SPRN_IVOR35,r3 980 mtspr SPRN_IVOR35,r3
975 sync 981 sync
976 blr 982 blr
977 983#else
978/* Adjust or setup IVORs for e500mc */ 984/* Adjust or setup IVORs for e500mc */
979_GLOBAL(__setup_e500mc_ivors) 985_GLOBAL(__setup_e500mc_ivors)
980 li r3,DebugDebug@l 986 li r3,DebugDebug@l
@@ -1000,6 +1006,8 @@ _GLOBAL(__setup_ehv_ivors)
1000 mtspr SPRN_IVOR41,r3 1006 mtspr SPRN_IVOR41,r3
1001 sync 1007 sync
1002 blr 1008 blr
1009#endif /* CONFIG_PPC_E500MC */
1010#endif /* CONFIG_E500 */
1003 1011
1004#ifdef CONFIG_SPE 1012#ifdef CONFIG_SPE
1005/* 1013/*
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b299ba..b32db4b95361 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -535,174 +535,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
535 return -ENOTSUPP; 535 return -ENOTSUPP;
536} 536}
537 537
538int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 538int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
539 union kvmppc_one_reg *val)
539{ 540{
540 int r; 541 int r = 0;
541 union kvmppc_one_reg val;
542 int size;
543 long int i; 542 long int i;
544 543
545 size = one_reg_size(reg->id); 544 r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
546 if (size > sizeof(val))
547 return -EINVAL;
548
549 r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
550 if (r == -EINVAL) { 545 if (r == -EINVAL) {
551 r = 0; 546 r = 0;
552 switch (reg->id) { 547 switch (id) {
553 case KVM_REG_PPC_DAR: 548 case KVM_REG_PPC_DAR:
554 val = get_reg_val(reg->id, kvmppc_get_dar(vcpu)); 549 *val = get_reg_val(id, kvmppc_get_dar(vcpu));
555 break; 550 break;
556 case KVM_REG_PPC_DSISR: 551 case KVM_REG_PPC_DSISR:
557 val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu)); 552 *val = get_reg_val(id, kvmppc_get_dsisr(vcpu));
558 break; 553 break;
559 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 554 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
560 i = reg->id - KVM_REG_PPC_FPR0; 555 i = id - KVM_REG_PPC_FPR0;
561 val = get_reg_val(reg->id, VCPU_FPR(vcpu, i)); 556 *val = get_reg_val(id, VCPU_FPR(vcpu, i));
562 break; 557 break;
563 case KVM_REG_PPC_FPSCR: 558 case KVM_REG_PPC_FPSCR:
564 val = get_reg_val(reg->id, vcpu->arch.fp.fpscr); 559 *val = get_reg_val(id, vcpu->arch.fp.fpscr);
565 break;
566#ifdef CONFIG_ALTIVEC
567 case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
568 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
569 r = -ENXIO;
570 break;
571 }
572 val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
573 break;
574 case KVM_REG_PPC_VSCR:
575 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
576 r = -ENXIO;
577 break;
578 }
579 val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
580 break; 560 break;
581 case KVM_REG_PPC_VRSAVE:
582 val = get_reg_val(reg->id, vcpu->arch.vrsave);
583 break;
584#endif /* CONFIG_ALTIVEC */
585#ifdef CONFIG_VSX 561#ifdef CONFIG_VSX
586 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: 562 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
587 if (cpu_has_feature(CPU_FTR_VSX)) { 563 if (cpu_has_feature(CPU_FTR_VSX)) {
588 long int i = reg->id - KVM_REG_PPC_VSR0; 564 i = id - KVM_REG_PPC_VSR0;
589 val.vsxval[0] = vcpu->arch.fp.fpr[i][0]; 565 val->vsxval[0] = vcpu->arch.fp.fpr[i][0];
590 val.vsxval[1] = vcpu->arch.fp.fpr[i][1]; 566 val->vsxval[1] = vcpu->arch.fp.fpr[i][1];
591 } else { 567 } else {
592 r = -ENXIO; 568 r = -ENXIO;
593 } 569 }
594 break; 570 break;
595#endif /* CONFIG_VSX */ 571#endif /* CONFIG_VSX */
596 case KVM_REG_PPC_DEBUG_INST: { 572 case KVM_REG_PPC_DEBUG_INST:
597 u32 opcode = INS_TW; 573 *val = get_reg_val(id, INS_TW);
598 r = copy_to_user((u32 __user *)(long)reg->addr,
599 &opcode, sizeof(u32));
600 break; 574 break;
601 }
602#ifdef CONFIG_KVM_XICS 575#ifdef CONFIG_KVM_XICS
603 case KVM_REG_PPC_ICP_STATE: 576 case KVM_REG_PPC_ICP_STATE:
604 if (!vcpu->arch.icp) { 577 if (!vcpu->arch.icp) {
605 r = -ENXIO; 578 r = -ENXIO;
606 break; 579 break;
607 } 580 }
608 val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); 581 *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
609 break; 582 break;
610#endif /* CONFIG_KVM_XICS */ 583#endif /* CONFIG_KVM_XICS */
611 case KVM_REG_PPC_FSCR: 584 case KVM_REG_PPC_FSCR:
612 val = get_reg_val(reg->id, vcpu->arch.fscr); 585 *val = get_reg_val(id, vcpu->arch.fscr);
613 break; 586 break;
614 case KVM_REG_PPC_TAR: 587 case KVM_REG_PPC_TAR:
615 val = get_reg_val(reg->id, vcpu->arch.tar); 588 *val = get_reg_val(id, vcpu->arch.tar);
616 break; 589 break;
617 case KVM_REG_PPC_EBBHR: 590 case KVM_REG_PPC_EBBHR:
618 val = get_reg_val(reg->id, vcpu->arch.ebbhr); 591 *val = get_reg_val(id, vcpu->arch.ebbhr);
619 break; 592 break;
620 case KVM_REG_PPC_EBBRR: 593 case KVM_REG_PPC_EBBRR:
621 val = get_reg_val(reg->id, vcpu->arch.ebbrr); 594 *val = get_reg_val(id, vcpu->arch.ebbrr);
622 break; 595 break;
623 case KVM_REG_PPC_BESCR: 596 case KVM_REG_PPC_BESCR:
624 val = get_reg_val(reg->id, vcpu->arch.bescr); 597 *val = get_reg_val(id, vcpu->arch.bescr);
625 break; 598 break;
626 case KVM_REG_PPC_VTB: 599 case KVM_REG_PPC_VTB:
627 val = get_reg_val(reg->id, vcpu->arch.vtb); 600 *val = get_reg_val(id, vcpu->arch.vtb);
628 break; 601 break;
629 case KVM_REG_PPC_IC: 602 case KVM_REG_PPC_IC:
630 val = get_reg_val(reg->id, vcpu->arch.ic); 603 *val = get_reg_val(id, vcpu->arch.ic);
631 break; 604 break;
632 default: 605 default:
633 r = -EINVAL; 606 r = -EINVAL;
634 break; 607 break;
635 } 608 }
636 } 609 }
637 if (r)
638 return r;
639
640 if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
641 r = -EFAULT;
642 610
643 return r; 611 return r;
644} 612}
645 613
646int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 614int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
615 union kvmppc_one_reg *val)
647{ 616{
648 int r; 617 int r = 0;
649 union kvmppc_one_reg val;
650 int size;
651 long int i; 618 long int i;
652 619
653 size = one_reg_size(reg->id); 620 r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
654 if (size > sizeof(val))
655 return -EINVAL;
656
657 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
658 return -EFAULT;
659
660 r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
661 if (r == -EINVAL) { 621 if (r == -EINVAL) {
662 r = 0; 622 r = 0;
663 switch (reg->id) { 623 switch (id) {
664 case KVM_REG_PPC_DAR: 624 case KVM_REG_PPC_DAR:
665 kvmppc_set_dar(vcpu, set_reg_val(reg->id, val)); 625 kvmppc_set_dar(vcpu, set_reg_val(id, *val));
666 break; 626 break;
667 case KVM_REG_PPC_DSISR: 627 case KVM_REG_PPC_DSISR:
668 kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val)); 628 kvmppc_set_dsisr(vcpu, set_reg_val(id, *val));
669 break; 629 break;
670 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 630 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
671 i = reg->id - KVM_REG_PPC_FPR0; 631 i = id - KVM_REG_PPC_FPR0;
672 VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val); 632 VCPU_FPR(vcpu, i) = set_reg_val(id, *val);
673 break; 633 break;
674 case KVM_REG_PPC_FPSCR: 634 case KVM_REG_PPC_FPSCR:
675 vcpu->arch.fp.fpscr = set_reg_val(reg->id, val); 635 vcpu->arch.fp.fpscr = set_reg_val(id, *val);
676 break;
677#ifdef CONFIG_ALTIVEC
678 case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
679 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
680 r = -ENXIO;
681 break;
682 }
683 vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
684 break;
685 case KVM_REG_PPC_VSCR:
686 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
687 r = -ENXIO;
688 break;
689 }
690 vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
691 break;
692 case KVM_REG_PPC_VRSAVE:
693 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
694 r = -ENXIO;
695 break;
696 }
697 vcpu->arch.vrsave = set_reg_val(reg->id, val);
698 break; 636 break;
699#endif /* CONFIG_ALTIVEC */
700#ifdef CONFIG_VSX 637#ifdef CONFIG_VSX
701 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: 638 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
702 if (cpu_has_feature(CPU_FTR_VSX)) { 639 if (cpu_has_feature(CPU_FTR_VSX)) {
703 long int i = reg->id - KVM_REG_PPC_VSR0; 640 i = id - KVM_REG_PPC_VSR0;
704 vcpu->arch.fp.fpr[i][0] = val.vsxval[0]; 641 vcpu->arch.fp.fpr[i][0] = val->vsxval[0];
705 vcpu->arch.fp.fpr[i][1] = val.vsxval[1]; 642 vcpu->arch.fp.fpr[i][1] = val->vsxval[1];
706 } else { 643 } else {
707 r = -ENXIO; 644 r = -ENXIO;
708 } 645 }
@@ -715,29 +652,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
715 break; 652 break;
716 } 653 }
717 r = kvmppc_xics_set_icp(vcpu, 654 r = kvmppc_xics_set_icp(vcpu,
718 set_reg_val(reg->id, val)); 655 set_reg_val(id, *val));
719 break; 656 break;
720#endif /* CONFIG_KVM_XICS */ 657#endif /* CONFIG_KVM_XICS */
721 case KVM_REG_PPC_FSCR: 658 case KVM_REG_PPC_FSCR:
722 vcpu->arch.fscr = set_reg_val(reg->id, val); 659 vcpu->arch.fscr = set_reg_val(id, *val);
723 break; 660 break;
724 case KVM_REG_PPC_TAR: 661 case KVM_REG_PPC_TAR:
725 vcpu->arch.tar = set_reg_val(reg->id, val); 662 vcpu->arch.tar = set_reg_val(id, *val);
726 break; 663 break;
727 case KVM_REG_PPC_EBBHR: 664 case KVM_REG_PPC_EBBHR:
728 vcpu->arch.ebbhr = set_reg_val(reg->id, val); 665 vcpu->arch.ebbhr = set_reg_val(id, *val);
729 break; 666 break;
730 case KVM_REG_PPC_EBBRR: 667 case KVM_REG_PPC_EBBRR:
731 vcpu->arch.ebbrr = set_reg_val(reg->id, val); 668 vcpu->arch.ebbrr = set_reg_val(id, *val);
732 break; 669 break;
733 case KVM_REG_PPC_BESCR: 670 case KVM_REG_PPC_BESCR:
734 vcpu->arch.bescr = set_reg_val(reg->id, val); 671 vcpu->arch.bescr = set_reg_val(id, *val);
735 break; 672 break;
736 case KVM_REG_PPC_VTB: 673 case KVM_REG_PPC_VTB:
737 vcpu->arch.vtb = set_reg_val(reg->id, val); 674 vcpu->arch.vtb = set_reg_val(id, *val);
738 break; 675 break;
739 case KVM_REG_PPC_IC: 676 case KVM_REG_PPC_IC:
740 vcpu->arch.ic = set_reg_val(reg->id, val); 677 vcpu->arch.ic = set_reg_val(id, *val);
741 break; 678 break;
742 default: 679 default:
743 r = -EINVAL; 680 r = -EINVAL;
@@ -778,13 +715,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
778int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 715int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
779 struct kvm_guest_debug *dbg) 716 struct kvm_guest_debug *dbg)
780{ 717{
781 return -EINVAL; 718 vcpu->guest_debug = dbg->control;
719 return 0;
782} 720}
783 721
784void kvmppc_decrementer_func(unsigned long data) 722void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
785{ 723{
786 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
787
788 kvmppc_core_queue_dec(vcpu); 724 kvmppc_core_queue_dec(vcpu);
789 kvm_vcpu_kick(vcpu); 725 kvm_vcpu_kick(vcpu);
790} 726}
@@ -851,9 +787,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
851 return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); 787 return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
852} 788}
853 789
854int kvm_age_hva(struct kvm *kvm, unsigned long hva) 790int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
855{ 791{
856 return kvm->arch.kvm_ops->age_hva(kvm, hva); 792 return kvm->arch.kvm_ops->age_hva(kvm, start, end);
857} 793}
858 794
859int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 795int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 4bf956cf94d6..d2b3ec088b8c 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
17extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); 17extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
18extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, 18extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
19 unsigned long end); 19 unsigned long end);
20extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); 20extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
21 unsigned long end);
21extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); 22extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
22extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); 23extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
23 24
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 79294c4c5015..d40770248b6a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1002 return ret; 1002 return ret;
1003} 1003}
1004 1004
1005int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) 1005int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
1006{ 1006{
1007 if (!kvm->arch.using_mmu_notifiers) 1007 if (!kvm->arch.using_mmu_notifiers)
1008 return 0; 1008 return 0;
1009 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 1009 return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
1010} 1010}
1011 1011
1012static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1012static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 27cced9c7249..e63587d30b70 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -725,6 +725,30 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
725 return kvmppc_hcall_impl_hv_realmode(cmd); 725 return kvmppc_hcall_impl_hv_realmode(cmd);
726} 726}
727 727
728static int kvmppc_emulate_debug_inst(struct kvm_run *run,
729 struct kvm_vcpu *vcpu)
730{
731 u32 last_inst;
732
733 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
734 EMULATE_DONE) {
735 /*
736 * Fetch failed, so return to guest and
737 * try executing it again.
738 */
739 return RESUME_GUEST;
740 }
741
742 if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
743 run->exit_reason = KVM_EXIT_DEBUG;
744 run->debug.arch.address = kvmppc_get_pc(vcpu);
745 return RESUME_HOST;
746 } else {
747 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
748 return RESUME_GUEST;
749 }
750}
751
728static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, 752static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
729 struct task_struct *tsk) 753 struct task_struct *tsk)
730{ 754{
@@ -807,12 +831,18 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
807 break; 831 break;
808 /* 832 /*
809 * This occurs if the guest executes an illegal instruction. 833 * This occurs if the guest executes an illegal instruction.
810 * We just generate a program interrupt to the guest, since 834 * If the guest debug is disabled, generate a program interrupt
811 * we don't emulate any guest instructions at this stage. 835 * to the guest. If guest debug is enabled, we need to check
836 * whether the instruction is a software breakpoint instruction.
837 * Accordingly return to Guest or Host.
812 */ 838 */
813 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 839 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
814 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 840 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
815 r = RESUME_GUEST; 841 r = kvmppc_emulate_debug_inst(run, vcpu);
842 } else {
843 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
844 r = RESUME_GUEST;
845 }
816 break; 846 break;
817 /* 847 /*
818 * This occurs if the guest (kernel or userspace), does something that 848 * This occurs if the guest (kernel or userspace), does something that
@@ -856,7 +886,9 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
856{ 886{
857 int i, j; 887 int i, j;
858 888
859 kvmppc_set_pvr_hv(vcpu, sregs->pvr); 889 /* Only accept the same PVR as the host's, since we can't spoof it */
890 if (sregs->pvr != vcpu->arch.pvr)
891 return -EINVAL;
860 892
861 j = 0; 893 j = 0;
862 for (i = 0; i < vcpu->arch.slb_nr; i++) { 894 for (i = 0; i < vcpu->arch.slb_nr; i++) {
@@ -922,6 +954,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
922 long int i; 954 long int i;
923 955
924 switch (id) { 956 switch (id) {
957 case KVM_REG_PPC_DEBUG_INST:
958 *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
959 break;
925 case KVM_REG_PPC_HIOR: 960 case KVM_REG_PPC_HIOR:
926 *val = get_reg_val(id, 0); 961 *val = get_reg_val(id, 0);
927 break; 962 break;
@@ -1489,7 +1524,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
1489static int kvmppc_grab_hwthread(int cpu) 1524static int kvmppc_grab_hwthread(int cpu)
1490{ 1525{
1491 struct paca_struct *tpaca; 1526 struct paca_struct *tpaca;
1492 long timeout = 1000; 1527 long timeout = 10000;
1493 1528
1494 tpaca = &paca[cpu]; 1529 tpaca = &paca[cpu];
1495 1530
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index b9615ba5b083..4fdc27c80f4c 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -163,6 +163,12 @@ void __init kvm_cma_reserve(void)
163 unsigned long align_size; 163 unsigned long align_size;
164 struct memblock_region *reg; 164 struct memblock_region *reg;
165 phys_addr_t selected_size = 0; 165 phys_addr_t selected_size = 0;
166
167 /*
168 * We need CMA reservation only when we are in HV mode
169 */
170 if (!cpu_has_feature(CPU_FTR_HVMODE))
171 return;
166 /* 172 /*
167 * We cannot use memblock_phys_mem_size() here, because 173 * We cannot use memblock_phys_mem_size() here, because
168 * memblock_analyze() has not been called yet. 174 * memblock_analyze() has not been called yet.
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f0c4db7704c3..edb2ccdbb2ba 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -355,6 +355,7 @@ kvmppc_hv_entry:
355 * MSR = ~IR|DR 355 * MSR = ~IR|DR
356 * R13 = PACA 356 * R13 = PACA
357 * R1 = host R1 357 * R1 = host R1
358 * R2 = TOC
358 * all other volatile GPRS = free 359 * all other volatile GPRS = free
359 */ 360 */
360 mflr r0 361 mflr r0
@@ -503,7 +504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
503toc_tlbie_lock: 504toc_tlbie_lock:
504 .tc native_tlbie_lock[TC],native_tlbie_lock 505 .tc native_tlbie_lock[TC],native_tlbie_lock
505 .previous 506 .previous
506 ld r3,toc_tlbie_lock@toc(2) 507 ld r3,toc_tlbie_lock@toc(r2)
507#ifdef __BIG_ENDIAN__ 508#ifdef __BIG_ENDIAN__
508 lwz r8,PACA_LOCK_TOKEN(r13) 509 lwz r8,PACA_LOCK_TOKEN(r13)
509#else 510#else
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index faffb27badd9..cf2eb16846d1 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
295 return 0; 295 return 0;
296} 296}
297 297
298static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) 298static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start,
299 unsigned long end)
299{ 300{
300 /* XXX could be more clever ;) */ 301 /* XXX could be more clever ;) */
301 return 0; 302 return 0;
@@ -1319,6 +1320,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1319 int r = 0; 1320 int r = 0;
1320 1321
1321 switch (id) { 1322 switch (id) {
1323 case KVM_REG_PPC_DEBUG_INST:
1324 *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
1325 break;
1322 case KVM_REG_PPC_HIOR: 1326 case KVM_REG_PPC_HIOR:
1323 *val = get_reg_val(id, to_book3s(vcpu)->hior); 1327 *val = get_reg_val(id, to_book3s(vcpu)->hior);
1324 break; 1328 break;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b4c89fa6f109..9b55dec2d6cc 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -124,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
124} 124}
125#endif 125#endif
126 126
127/*
128 * Load up guest vcpu FP state if it's needed.
129 * It also set the MSR_FP in thread so that host know
130 * we're holding FPU, and then host can help to save
131 * guest vcpu FP state if other threads require to use FPU.
132 * This simulates an FP unavailable fault.
133 *
134 * It requires to be called with preemption disabled.
135 */
136static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
137{
138#ifdef CONFIG_PPC_FPU
139 if (!(current->thread.regs->msr & MSR_FP)) {
140 enable_kernel_fp();
141 load_fp_state(&vcpu->arch.fp);
142 current->thread.fp_save_area = &vcpu->arch.fp;
143 current->thread.regs->msr |= MSR_FP;
144 }
145#endif
146}
147
148/*
149 * Save guest vcpu FP state into thread.
150 * It requires to be called with preemption disabled.
151 */
152static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
153{
154#ifdef CONFIG_PPC_FPU
155 if (current->thread.regs->msr & MSR_FP)
156 giveup_fpu(current);
157 current->thread.fp_save_area = NULL;
158#endif
159}
160
127static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) 161static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
128{ 162{
129#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV) 163#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
@@ -134,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
134#endif 168#endif
135} 169}
136 170
171/*
172 * Simulate AltiVec unavailable fault to load guest state
173 * from thread to AltiVec unit.
174 * It requires to be called with preemption disabled.
175 */
176static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu)
177{
178#ifdef CONFIG_ALTIVEC
179 if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
180 if (!(current->thread.regs->msr & MSR_VEC)) {
181 enable_kernel_altivec();
182 load_vr_state(&vcpu->arch.vr);
183 current->thread.vr_save_area = &vcpu->arch.vr;
184 current->thread.regs->msr |= MSR_VEC;
185 }
186 }
187#endif
188}
189
190/*
191 * Save guest vcpu AltiVec state into thread.
192 * It requires to be called with preemption disabled.
193 */
194static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu)
195{
196#ifdef CONFIG_ALTIVEC
197 if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
198 if (current->thread.regs->msr & MSR_VEC)
199 giveup_altivec(current);
200 current->thread.vr_save_area = NULL;
201 }
202#endif
203}
204
137static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) 205static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
138{ 206{
139 /* Synchronize guest's desire to get debug interrupts into shadow MSR */ 207 /* Synchronize guest's desire to get debug interrupts into shadow MSR */
@@ -267,6 +335,16 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
267 clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); 335 clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
268} 336}
269 337
338void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu)
339{
340 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG);
341}
342
343void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu)
344{
345 clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions);
346}
347
270static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) 348static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
271{ 349{
272 kvmppc_set_srr0(vcpu, srr0); 350 kvmppc_set_srr0(vcpu, srr0);
@@ -341,9 +419,15 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
341 case BOOKE_IRQPRIO_ITLB_MISS: 419 case BOOKE_IRQPRIO_ITLB_MISS:
342 case BOOKE_IRQPRIO_SYSCALL: 420 case BOOKE_IRQPRIO_SYSCALL:
343 case BOOKE_IRQPRIO_FP_UNAVAIL: 421 case BOOKE_IRQPRIO_FP_UNAVAIL:
422#ifdef CONFIG_SPE_POSSIBLE
344 case BOOKE_IRQPRIO_SPE_UNAVAIL: 423 case BOOKE_IRQPRIO_SPE_UNAVAIL:
345 case BOOKE_IRQPRIO_SPE_FP_DATA: 424 case BOOKE_IRQPRIO_SPE_FP_DATA:
346 case BOOKE_IRQPRIO_SPE_FP_ROUND: 425 case BOOKE_IRQPRIO_SPE_FP_ROUND:
426#endif
427#ifdef CONFIG_ALTIVEC
428 case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL:
429 case BOOKE_IRQPRIO_ALTIVEC_ASSIST:
430#endif
347 case BOOKE_IRQPRIO_AP_UNAVAIL: 431 case BOOKE_IRQPRIO_AP_UNAVAIL:
348 allowed = 1; 432 allowed = 1;
349 msr_mask = MSR_CE | MSR_ME | MSR_DE; 433 msr_mask = MSR_CE | MSR_ME | MSR_DE;
@@ -377,7 +461,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
377 allowed = vcpu->arch.shared->msr & MSR_DE; 461 allowed = vcpu->arch.shared->msr & MSR_DE;
378 allowed = allowed && !crit; 462 allowed = allowed && !crit;
379 msr_mask = MSR_ME; 463 msr_mask = MSR_ME;
380 int_class = INT_CLASS_CRIT; 464 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
465 int_class = INT_CLASS_DBG;
466 else
467 int_class = INT_CLASS_CRIT;
468
381 break; 469 break;
382 } 470 }
383 471
@@ -654,20 +742,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
654 742
655 /* 743 /*
656 * Since we can't trap on MSR_FP in GS-mode, we consider the guest 744 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
657 * as always using the FPU. Kernel usage of FP (via 745 * as always using the FPU.
658 * enable_kernel_fp()) in this thread must not occur while
659 * vcpu->fpu_active is set.
660 */ 746 */
661 vcpu->fpu_active = 1;
662
663 kvmppc_load_guest_fp(vcpu); 747 kvmppc_load_guest_fp(vcpu);
664#endif 748#endif
665 749
750#ifdef CONFIG_ALTIVEC
751 /* Save userspace AltiVec state in stack */
752 if (cpu_has_feature(CPU_FTR_ALTIVEC))
753 enable_kernel_altivec();
754 /*
755 * Since we can't trap on MSR_VEC in GS-mode, we consider the guest
756 * as always using the AltiVec.
757 */
758 kvmppc_load_guest_altivec(vcpu);
759#endif
760
666 /* Switch to guest debug context */ 761 /* Switch to guest debug context */
667 debug = vcpu->arch.shadow_dbg_reg; 762 debug = vcpu->arch.dbg_reg;
668 switch_booke_debug_regs(&debug); 763 switch_booke_debug_regs(&debug);
669 debug = current->thread.debug; 764 debug = current->thread.debug;
670 current->thread.debug = vcpu->arch.shadow_dbg_reg; 765 current->thread.debug = vcpu->arch.dbg_reg;
671 766
672 vcpu->arch.pgdir = current->mm->pgd; 767 vcpu->arch.pgdir = current->mm->pgd;
673 kvmppc_fix_ee_before_entry(); 768 kvmppc_fix_ee_before_entry();
@@ -683,8 +778,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
683 778
684#ifdef CONFIG_PPC_FPU 779#ifdef CONFIG_PPC_FPU
685 kvmppc_save_guest_fp(vcpu); 780 kvmppc_save_guest_fp(vcpu);
781#endif
686 782
687 vcpu->fpu_active = 0; 783#ifdef CONFIG_ALTIVEC
784 kvmppc_save_guest_altivec(vcpu);
688#endif 785#endif
689 786
690out: 787out:
@@ -728,9 +825,36 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
728 825
729static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) 826static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
730{ 827{
731 struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg); 828 struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg);
732 u32 dbsr = vcpu->arch.dbsr; 829 u32 dbsr = vcpu->arch.dbsr;
733 830
831 if (vcpu->guest_debug == 0) {
832 /*
833 * Debug resources belong to Guest.
834 * Imprecise debug event is not injected
835 */
836 if (dbsr & DBSR_IDE) {
837 dbsr &= ~DBSR_IDE;
838 if (!dbsr)
839 return RESUME_GUEST;
840 }
841
842 if (dbsr && (vcpu->arch.shared->msr & MSR_DE) &&
843 (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM))
844 kvmppc_core_queue_debug(vcpu);
845
846 /* Inject a program interrupt if trap debug is not allowed */
847 if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE))
848 kvmppc_core_queue_program(vcpu, ESR_PTR);
849
850 return RESUME_GUEST;
851 }
852
853 /*
854 * Debug resource owned by userspace.
855 * Clear guest dbsr (vcpu->arch.dbsr)
856 */
857 vcpu->arch.dbsr = 0;
734 run->debug.arch.status = 0; 858 run->debug.arch.status = 0;
735 run->debug.arch.address = vcpu->arch.pc; 859 run->debug.arch.address = vcpu->arch.pc;
736 860
@@ -868,7 +992,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
868 case BOOKE_INTERRUPT_DATA_STORAGE: 992 case BOOKE_INTERRUPT_DATA_STORAGE:
869 case BOOKE_INTERRUPT_DTLB_MISS: 993 case BOOKE_INTERRUPT_DTLB_MISS:
870 case BOOKE_INTERRUPT_HV_PRIV: 994 case BOOKE_INTERRUPT_HV_PRIV:
871 emulated = kvmppc_get_last_inst(vcpu, false, &last_inst); 995 emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
996 break;
997 case BOOKE_INTERRUPT_PROGRAM:
998 /* SW breakpoints arrive as illegal instructions on HV */
999 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1000 emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
872 break; 1001 break;
873 default: 1002 default:
874 break; 1003 break;
@@ -947,6 +1076,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
947 break; 1076 break;
948 1077
949 case BOOKE_INTERRUPT_PROGRAM: 1078 case BOOKE_INTERRUPT_PROGRAM:
1079 if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
1080 (last_inst == KVMPPC_INST_SW_BREAKPOINT)) {
1081 /*
1082 * We are here because of an SW breakpoint instr,
1083 * so lets return to host to handle.
1084 */
1085 r = kvmppc_handle_debug(run, vcpu);
1086 run->exit_reason = KVM_EXIT_DEBUG;
1087 kvmppc_account_exit(vcpu, DEBUG_EXITS);
1088 break;
1089 }
1090
950 if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { 1091 if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
951 /* 1092 /*
952 * Program traps generated by user-level software must 1093 * Program traps generated by user-level software must
@@ -991,7 +1132,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
991 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); 1132 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
992 r = RESUME_GUEST; 1133 r = RESUME_GUEST;
993 break; 1134 break;
994#else 1135#elif defined(CONFIG_SPE_POSSIBLE)
995 case BOOKE_INTERRUPT_SPE_UNAVAIL: 1136 case BOOKE_INTERRUPT_SPE_UNAVAIL:
996 /* 1137 /*
997 * Guest wants SPE, but host kernel doesn't support it. Send 1138 * Guest wants SPE, but host kernel doesn't support it. Send
@@ -1012,6 +1153,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
1012 run->hw.hardware_exit_reason = exit_nr; 1153 run->hw.hardware_exit_reason = exit_nr;
1013 r = RESUME_HOST; 1154 r = RESUME_HOST;
1014 break; 1155 break;
1156#endif /* CONFIG_SPE_POSSIBLE */
1157
1158/*
1159 * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC,
1160 * see kvmppc_core_check_processor_compat().
1161 */
1162#ifdef CONFIG_ALTIVEC
1163 case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL:
1164 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
1165 r = RESUME_GUEST;
1166 break;
1167
1168 case BOOKE_INTERRUPT_ALTIVEC_ASSIST:
1169 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST);
1170 r = RESUME_GUEST;
1171 break;
1015#endif 1172#endif
1016 1173
1017 case BOOKE_INTERRUPT_DATA_STORAGE: 1174 case BOOKE_INTERRUPT_DATA_STORAGE:
@@ -1188,6 +1345,8 @@ out:
1188 else { 1345 else {
1189 /* interrupts now hard-disabled */ 1346 /* interrupts now hard-disabled */
1190 kvmppc_fix_ee_before_entry(); 1347 kvmppc_fix_ee_before_entry();
1348 kvmppc_load_guest_fp(vcpu);
1349 kvmppc_load_guest_altivec(vcpu);
1191 } 1350 }
1192 } 1351 }
1193 1352
@@ -1243,6 +1402,11 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
1243 setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, 1402 setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
1244 (unsigned long)vcpu); 1403 (unsigned long)vcpu);
1245 1404
1405 /*
1406 * Clear DBSR.MRR to avoid guest debug interrupt as
1407 * this is of host interest
1408 */
1409 mtspr(SPRN_DBSR, DBSR_MRR);
1246 return 0; 1410 return 0;
1247} 1411}
1248 1412
@@ -1457,144 +1621,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1457 return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); 1621 return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
1458} 1622}
1459 1623
1460int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1624int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
1625 union kvmppc_one_reg *val)
1461{ 1626{
1462 int r = 0; 1627 int r = 0;
1463 union kvmppc_one_reg val;
1464 int size;
1465
1466 size = one_reg_size(reg->id);
1467 if (size > sizeof(val))
1468 return -EINVAL;
1469 1628
1470 switch (reg->id) { 1629 switch (id) {
1471 case KVM_REG_PPC_IAC1: 1630 case KVM_REG_PPC_IAC1:
1472 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1); 1631 *val = get_reg_val(id, vcpu->arch.dbg_reg.iac1);
1473 break; 1632 break;
1474 case KVM_REG_PPC_IAC2: 1633 case KVM_REG_PPC_IAC2:
1475 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2); 1634 *val = get_reg_val(id, vcpu->arch.dbg_reg.iac2);
1476 break; 1635 break;
1477#if CONFIG_PPC_ADV_DEBUG_IACS > 2 1636#if CONFIG_PPC_ADV_DEBUG_IACS > 2
1478 case KVM_REG_PPC_IAC3: 1637 case KVM_REG_PPC_IAC3:
1479 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3); 1638 *val = get_reg_val(id, vcpu->arch.dbg_reg.iac3);
1480 break; 1639 break;
1481 case KVM_REG_PPC_IAC4: 1640 case KVM_REG_PPC_IAC4:
1482 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4); 1641 *val = get_reg_val(id, vcpu->arch.dbg_reg.iac4);
1483 break; 1642 break;
1484#endif 1643#endif
1485 case KVM_REG_PPC_DAC1: 1644 case KVM_REG_PPC_DAC1:
1486 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1); 1645 *val = get_reg_val(id, vcpu->arch.dbg_reg.dac1);
1487 break; 1646 break;
1488 case KVM_REG_PPC_DAC2: 1647 case KVM_REG_PPC_DAC2:
1489 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); 1648 *val = get_reg_val(id, vcpu->arch.dbg_reg.dac2);
1490 break; 1649 break;
1491 case KVM_REG_PPC_EPR: { 1650 case KVM_REG_PPC_EPR: {
1492 u32 epr = kvmppc_get_epr(vcpu); 1651 u32 epr = kvmppc_get_epr(vcpu);
1493 val = get_reg_val(reg->id, epr); 1652 *val = get_reg_val(id, epr);
1494 break; 1653 break;
1495 } 1654 }
1496#if defined(CONFIG_64BIT) 1655#if defined(CONFIG_64BIT)
1497 case KVM_REG_PPC_EPCR: 1656 case KVM_REG_PPC_EPCR:
1498 val = get_reg_val(reg->id, vcpu->arch.epcr); 1657 *val = get_reg_val(id, vcpu->arch.epcr);
1499 break; 1658 break;
1500#endif 1659#endif
1501 case KVM_REG_PPC_TCR: 1660 case KVM_REG_PPC_TCR:
1502 val = get_reg_val(reg->id, vcpu->arch.tcr); 1661 *val = get_reg_val(id, vcpu->arch.tcr);
1503 break; 1662 break;
1504 case KVM_REG_PPC_TSR: 1663 case KVM_REG_PPC_TSR:
1505 val = get_reg_val(reg->id, vcpu->arch.tsr); 1664 *val = get_reg_val(id, vcpu->arch.tsr);
1506 break; 1665 break;
1507 case KVM_REG_PPC_DEBUG_INST: 1666 case KVM_REG_PPC_DEBUG_INST:
1508 val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG); 1667 *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
1509 break; 1668 break;
1510 case KVM_REG_PPC_VRSAVE: 1669 case KVM_REG_PPC_VRSAVE:
1511 val = get_reg_val(reg->id, vcpu->arch.vrsave); 1670 *val = get_reg_val(id, vcpu->arch.vrsave);
1512 break; 1671 break;
1513 default: 1672 default:
1514 r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); 1673 r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
1515 break; 1674 break;
1516 } 1675 }
1517 1676
1518 if (r)
1519 return r;
1520
1521 if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
1522 r = -EFAULT;
1523
1524 return r; 1677 return r;
1525} 1678}
1526 1679
1527int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1680int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
1681 union kvmppc_one_reg *val)
1528{ 1682{
1529 int r = 0; 1683 int r = 0;
1530 union kvmppc_one_reg val;
1531 int size;
1532 1684
1533 size = one_reg_size(reg->id); 1685 switch (id) {
1534 if (size > sizeof(val))
1535 return -EINVAL;
1536
1537 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
1538 return -EFAULT;
1539
1540 switch (reg->id) {
1541 case KVM_REG_PPC_IAC1: 1686 case KVM_REG_PPC_IAC1:
1542 vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val); 1687 vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val);
1543 break; 1688 break;
1544 case KVM_REG_PPC_IAC2: 1689 case KVM_REG_PPC_IAC2:
1545 vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val); 1690 vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val);
1546 break; 1691 break;
1547#if CONFIG_PPC_ADV_DEBUG_IACS > 2 1692#if CONFIG_PPC_ADV_DEBUG_IACS > 2
1548 case KVM_REG_PPC_IAC3: 1693 case KVM_REG_PPC_IAC3:
1549 vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val); 1694 vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val);
1550 break; 1695 break;
1551 case KVM_REG_PPC_IAC4: 1696 case KVM_REG_PPC_IAC4:
1552 vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val); 1697 vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val);
1553 break; 1698 break;
1554#endif 1699#endif
1555 case KVM_REG_PPC_DAC1: 1700 case KVM_REG_PPC_DAC1:
1556 vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val); 1701 vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val);
1557 break; 1702 break;
1558 case KVM_REG_PPC_DAC2: 1703 case KVM_REG_PPC_DAC2:
1559 vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val); 1704 vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val);
1560 break; 1705 break;
1561 case KVM_REG_PPC_EPR: { 1706 case KVM_REG_PPC_EPR: {
1562 u32 new_epr = set_reg_val(reg->id, val); 1707 u32 new_epr = set_reg_val(id, *val);
1563 kvmppc_set_epr(vcpu, new_epr); 1708 kvmppc_set_epr(vcpu, new_epr);
1564 break; 1709 break;
1565 } 1710 }
1566#if defined(CONFIG_64BIT) 1711#if defined(CONFIG_64BIT)
1567 case KVM_REG_PPC_EPCR: { 1712 case KVM_REG_PPC_EPCR: {
1568 u32 new_epcr = set_reg_val(reg->id, val); 1713 u32 new_epcr = set_reg_val(id, *val);
1569 kvmppc_set_epcr(vcpu, new_epcr); 1714 kvmppc_set_epcr(vcpu, new_epcr);
1570 break; 1715 break;
1571 } 1716 }
1572#endif 1717#endif
1573 case KVM_REG_PPC_OR_TSR: { 1718 case KVM_REG_PPC_OR_TSR: {
1574 u32 tsr_bits = set_reg_val(reg->id, val); 1719 u32 tsr_bits = set_reg_val(id, *val);
1575 kvmppc_set_tsr_bits(vcpu, tsr_bits); 1720 kvmppc_set_tsr_bits(vcpu, tsr_bits);
1576 break; 1721 break;
1577 } 1722 }
1578 case KVM_REG_PPC_CLEAR_TSR: { 1723 case KVM_REG_PPC_CLEAR_TSR: {
1579 u32 tsr_bits = set_reg_val(reg->id, val); 1724 u32 tsr_bits = set_reg_val(id, *val);
1580 kvmppc_clr_tsr_bits(vcpu, tsr_bits); 1725 kvmppc_clr_tsr_bits(vcpu, tsr_bits);
1581 break; 1726 break;
1582 } 1727 }
1583 case KVM_REG_PPC_TSR: { 1728 case KVM_REG_PPC_TSR: {
1584 u32 tsr = set_reg_val(reg->id, val); 1729 u32 tsr = set_reg_val(id, *val);
1585 kvmppc_set_tsr(vcpu, tsr); 1730 kvmppc_set_tsr(vcpu, tsr);
1586 break; 1731 break;
1587 } 1732 }
1588 case KVM_REG_PPC_TCR: { 1733 case KVM_REG_PPC_TCR: {
1589 u32 tcr = set_reg_val(reg->id, val); 1734 u32 tcr = set_reg_val(id, *val);
1590 kvmppc_set_tcr(vcpu, tcr); 1735 kvmppc_set_tcr(vcpu, tcr);
1591 break; 1736 break;
1592 } 1737 }
1593 case KVM_REG_PPC_VRSAVE: 1738 case KVM_REG_PPC_VRSAVE:
1594 vcpu->arch.vrsave = set_reg_val(reg->id, val); 1739 vcpu->arch.vrsave = set_reg_val(id, *val);
1595 break; 1740 break;
1596 default: 1741 default:
1597 r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); 1742 r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
1598 break; 1743 break;
1599 } 1744 }
1600 1745
@@ -1694,10 +1839,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
1694 update_timer_ints(vcpu); 1839 update_timer_ints(vcpu);
1695} 1840}
1696 1841
1697void kvmppc_decrementer_func(unsigned long data) 1842void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
1698{ 1843{
1699 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
1700
1701 if (vcpu->arch.tcr & TCR_ARE) { 1844 if (vcpu->arch.tcr & TCR_ARE) {
1702 vcpu->arch.dec = vcpu->arch.decar; 1845 vcpu->arch.dec = vcpu->arch.decar;
1703 kvmppc_emulate_dec(vcpu); 1846 kvmppc_emulate_dec(vcpu);
@@ -1842,7 +1985,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1842 int n, b = 0, w = 0; 1985 int n, b = 0, w = 0;
1843 1986
1844 if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { 1987 if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
1845 vcpu->arch.shadow_dbg_reg.dbcr0 = 0; 1988 vcpu->arch.dbg_reg.dbcr0 = 0;
1846 vcpu->guest_debug = 0; 1989 vcpu->guest_debug = 0;
1847 kvm_guest_protect_msr(vcpu, MSR_DE, false); 1990 kvm_guest_protect_msr(vcpu, MSR_DE, false);
1848 return 0; 1991 return 0;
@@ -1850,15 +1993,13 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1850 1993
1851 kvm_guest_protect_msr(vcpu, MSR_DE, true); 1994 kvm_guest_protect_msr(vcpu, MSR_DE, true);
1852 vcpu->guest_debug = dbg->control; 1995 vcpu->guest_debug = dbg->control;
1853 vcpu->arch.shadow_dbg_reg.dbcr0 = 0; 1996 vcpu->arch.dbg_reg.dbcr0 = 0;
1854 /* Set DBCR0_EDM in guest visible DBCR0 register. */
1855 vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
1856 1997
1857 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 1998 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
1858 vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; 1999 vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
1859 2000
1860 /* Code below handles only HW breakpoints */ 2001 /* Code below handles only HW breakpoints */
1861 dbg_reg = &(vcpu->arch.shadow_dbg_reg); 2002 dbg_reg = &(vcpu->arch.dbg_reg);
1862 2003
1863#ifdef CONFIG_KVM_BOOKE_HV 2004#ifdef CONFIG_KVM_BOOKE_HV
1864 /* 2005 /*
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index f753543c56fa..22ba08ea68e9 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -32,9 +32,15 @@
32#define BOOKE_IRQPRIO_ALIGNMENT 2 32#define BOOKE_IRQPRIO_ALIGNMENT 2
33#define BOOKE_IRQPRIO_PROGRAM 3 33#define BOOKE_IRQPRIO_PROGRAM 3
34#define BOOKE_IRQPRIO_FP_UNAVAIL 4 34#define BOOKE_IRQPRIO_FP_UNAVAIL 4
35#ifdef CONFIG_SPE_POSSIBLE
35#define BOOKE_IRQPRIO_SPE_UNAVAIL 5 36#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
36#define BOOKE_IRQPRIO_SPE_FP_DATA 6 37#define BOOKE_IRQPRIO_SPE_FP_DATA 6
37#define BOOKE_IRQPRIO_SPE_FP_ROUND 7 38#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
39#endif
40#ifdef CONFIG_PPC_E500MC
41#define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5
42#define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6
43#endif
38#define BOOKE_IRQPRIO_SYSCALL 8 44#define BOOKE_IRQPRIO_SYSCALL 8
39#define BOOKE_IRQPRIO_AP_UNAVAIL 9 45#define BOOKE_IRQPRIO_AP_UNAVAIL 9
40#define BOOKE_IRQPRIO_DTLB_MISS 10 46#define BOOKE_IRQPRIO_DTLB_MISS 10
@@ -116,40 +122,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
116extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, 122extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
117 ulong *spr_val); 123 ulong *spr_val);
118 124
119/*
120 * Load up guest vcpu FP state if it's needed.
121 * It also set the MSR_FP in thread so that host know
122 * we're holding FPU, and then host can help to save
123 * guest vcpu FP state if other threads require to use FPU.
124 * This simulates an FP unavailable fault.
125 *
126 * It requires to be called with preemption disabled.
127 */
128static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
129{
130#ifdef CONFIG_PPC_FPU
131 if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
132 enable_kernel_fp();
133 load_fp_state(&vcpu->arch.fp);
134 current->thread.fp_save_area = &vcpu->arch.fp;
135 current->thread.regs->msr |= MSR_FP;
136 }
137#endif
138}
139
140/*
141 * Save guest vcpu FP state into thread.
142 * It requires to be called with preemption disabled.
143 */
144static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
145{
146#ifdef CONFIG_PPC_FPU
147 if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
148 giveup_fpu(current);
149 current->thread.fp_save_area = NULL;
150#endif
151}
152
153static inline void kvmppc_clear_dbsr(void) 125static inline void kvmppc_clear_dbsr(void)
154{ 126{
155 mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); 127 mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 28c158881d23..a82f64502de1 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -25,6 +25,7 @@
25 25
26#define OP_19_XOP_RFI 50 26#define OP_19_XOP_RFI 50
27#define OP_19_XOP_RFCI 51 27#define OP_19_XOP_RFCI 51
28#define OP_19_XOP_RFDI 39
28 29
29#define OP_31_XOP_MFMSR 83 30#define OP_31_XOP_MFMSR 83
30#define OP_31_XOP_WRTEE 131 31#define OP_31_XOP_WRTEE 131
@@ -37,6 +38,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
37 kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); 38 kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
38} 39}
39 40
41static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu)
42{
43 vcpu->arch.pc = vcpu->arch.dsrr0;
44 kvmppc_set_msr(vcpu, vcpu->arch.dsrr1);
45}
46
40static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) 47static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
41{ 48{
42 vcpu->arch.pc = vcpu->arch.csrr0; 49 vcpu->arch.pc = vcpu->arch.csrr0;
@@ -65,6 +72,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
65 *advance = 0; 72 *advance = 0;
66 break; 73 break;
67 74
75 case OP_19_XOP_RFDI:
76 kvmppc_emul_rfdi(vcpu);
77 kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS);
78 *advance = 0;
79 break;
80
68 default: 81 default:
69 emulated = EMULATE_FAIL; 82 emulated = EMULATE_FAIL;
70 break; 83 break;
@@ -118,6 +131,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
118int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 131int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
119{ 132{
120 int emulated = EMULATE_DONE; 133 int emulated = EMULATE_DONE;
134 bool debug_inst = false;
121 135
122 switch (sprn) { 136 switch (sprn) {
123 case SPRN_DEAR: 137 case SPRN_DEAR:
@@ -132,14 +146,128 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
132 case SPRN_CSRR1: 146 case SPRN_CSRR1:
133 vcpu->arch.csrr1 = spr_val; 147 vcpu->arch.csrr1 = spr_val;
134 break; 148 break;
149 case SPRN_DSRR0:
150 vcpu->arch.dsrr0 = spr_val;
151 break;
152 case SPRN_DSRR1:
153 vcpu->arch.dsrr1 = spr_val;
154 break;
155 case SPRN_IAC1:
156 /*
157 * If userspace is debugging guest then guest
158 * can not access debug registers.
159 */
160 if (vcpu->guest_debug)
161 break;
162
163 debug_inst = true;
164 vcpu->arch.dbg_reg.iac1 = spr_val;
165 break;
166 case SPRN_IAC2:
167 /*
168 * If userspace is debugging guest then guest
169 * can not access debug registers.
170 */
171 if (vcpu->guest_debug)
172 break;
173
174 debug_inst = true;
175 vcpu->arch.dbg_reg.iac2 = spr_val;
176 break;
177#if CONFIG_PPC_ADV_DEBUG_IACS > 2
178 case SPRN_IAC3:
179 /*
180 * If userspace is debugging guest then guest
181 * can not access debug registers.
182 */
183 if (vcpu->guest_debug)
184 break;
185
186 debug_inst = true;
187 vcpu->arch.dbg_reg.iac3 = spr_val;
188 break;
189 case SPRN_IAC4:
190 /*
191 * If userspace is debugging guest then guest
192 * can not access debug registers.
193 */
194 if (vcpu->guest_debug)
195 break;
196
197 debug_inst = true;
198 vcpu->arch.dbg_reg.iac4 = spr_val;
199 break;
200#endif
201 case SPRN_DAC1:
202 /*
203 * If userspace is debugging guest then guest
204 * can not access debug registers.
205 */
206 if (vcpu->guest_debug)
207 break;
208
209 debug_inst = true;
210 vcpu->arch.dbg_reg.dac1 = spr_val;
211 break;
212 case SPRN_DAC2:
213 /*
214 * If userspace is debugging guest then guest
215 * can not access debug registers.
216 */
217 if (vcpu->guest_debug)
218 break;
219
220 debug_inst = true;
221 vcpu->arch.dbg_reg.dac2 = spr_val;
222 break;
135 case SPRN_DBCR0: 223 case SPRN_DBCR0:
224 /*
225 * If userspace is debugging guest then guest
226 * can not access debug registers.
227 */
228 if (vcpu->guest_debug)
229 break;
230
231 debug_inst = true;
232 spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE |
233 DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 |
234 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W);
235
136 vcpu->arch.dbg_reg.dbcr0 = spr_val; 236 vcpu->arch.dbg_reg.dbcr0 = spr_val;
137 break; 237 break;
138 case SPRN_DBCR1: 238 case SPRN_DBCR1:
239 /*
240 * If userspace is debugging guest then guest
241 * can not access debug registers.
242 */
243 if (vcpu->guest_debug)
244 break;
245
246 debug_inst = true;
139 vcpu->arch.dbg_reg.dbcr1 = spr_val; 247 vcpu->arch.dbg_reg.dbcr1 = spr_val;
140 break; 248 break;
249 case SPRN_DBCR2:
250 /*
251 * If userspace is debugging guest then guest
252 * can not access debug registers.
253 */
254 if (vcpu->guest_debug)
255 break;
256
257 debug_inst = true;
258 vcpu->arch.dbg_reg.dbcr2 = spr_val;
259 break;
141 case SPRN_DBSR: 260 case SPRN_DBSR:
261 /*
262 * If userspace is debugging guest then guest
263 * can not access debug registers.
264 */
265 if (vcpu->guest_debug)
266 break;
267
142 vcpu->arch.dbsr &= ~spr_val; 268 vcpu->arch.dbsr &= ~spr_val;
269 if (!(vcpu->arch.dbsr & ~DBSR_IDE))
270 kvmppc_core_dequeue_debug(vcpu);
143 break; 271 break;
144 case SPRN_TSR: 272 case SPRN_TSR:
145 kvmppc_clr_tsr_bits(vcpu, spr_val); 273 kvmppc_clr_tsr_bits(vcpu, spr_val);
@@ -252,6 +380,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
252 emulated = EMULATE_FAIL; 380 emulated = EMULATE_FAIL;
253 } 381 }
254 382
383 if (debug_inst) {
384 current->thread.debug = vcpu->arch.dbg_reg;
385 switch_booke_debug_regs(&vcpu->arch.dbg_reg);
386 }
255 return emulated; 387 return emulated;
256} 388}
257 389
@@ -278,12 +410,43 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
278 case SPRN_CSRR1: 410 case SPRN_CSRR1:
279 *spr_val = vcpu->arch.csrr1; 411 *spr_val = vcpu->arch.csrr1;
280 break; 412 break;
413 case SPRN_DSRR0:
414 *spr_val = vcpu->arch.dsrr0;
415 break;
416 case SPRN_DSRR1:
417 *spr_val = vcpu->arch.dsrr1;
418 break;
419 case SPRN_IAC1:
420 *spr_val = vcpu->arch.dbg_reg.iac1;
421 break;
422 case SPRN_IAC2:
423 *spr_val = vcpu->arch.dbg_reg.iac2;
424 break;
425#if CONFIG_PPC_ADV_DEBUG_IACS > 2
426 case SPRN_IAC3:
427 *spr_val = vcpu->arch.dbg_reg.iac3;
428 break;
429 case SPRN_IAC4:
430 *spr_val = vcpu->arch.dbg_reg.iac4;
431 break;
432#endif
433 case SPRN_DAC1:
434 *spr_val = vcpu->arch.dbg_reg.dac1;
435 break;
436 case SPRN_DAC2:
437 *spr_val = vcpu->arch.dbg_reg.dac2;
438 break;
281 case SPRN_DBCR0: 439 case SPRN_DBCR0:
282 *spr_val = vcpu->arch.dbg_reg.dbcr0; 440 *spr_val = vcpu->arch.dbg_reg.dbcr0;
441 if (vcpu->guest_debug)
442 *spr_val = *spr_val | DBCR0_EDM;
283 break; 443 break;
284 case SPRN_DBCR1: 444 case SPRN_DBCR1:
285 *spr_val = vcpu->arch.dbg_reg.dbcr1; 445 *spr_val = vcpu->arch.dbg_reg.dbcr1;
286 break; 446 break;
447 case SPRN_DBCR2:
448 *spr_val = vcpu->arch.dbg_reg.dbcr2;
449 break;
287 case SPRN_DBSR: 450 case SPRN_DBSR:
288 *spr_val = vcpu->arch.dbsr; 451 *spr_val = vcpu->arch.dbsr;
289 break; 452 break;
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index e9fa56a911fd..81bd8a07aa51 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -238,7 +238,7 @@ kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
238kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \ 238kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
239 SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR) 239 SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
240kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \ 240kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
241 SPRN_SRR0, SPRN_SRR1,NEED_ESR 241 SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
242kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \ 242kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
243 SPRN_SRR0, SPRN_SRR1, 0 243 SPRN_SRR0, SPRN_SRR1, 0
244kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \ 244kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
@@ -256,11 +256,9 @@ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
256 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) 256 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
257kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \ 257kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
258 SPRN_SRR0, SPRN_SRR1, 0 258 SPRN_SRR0, SPRN_SRR1, 0
259kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \ 259kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
260 SPRN_SRR0, SPRN_SRR1, 0 260 SPRN_SRR0, SPRN_SRR1, 0
261kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \ 261kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
262 SPRN_SRR0, SPRN_SRR1, 0
263kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
264 SPRN_SRR0, SPRN_SRR1, 0 262 SPRN_SRR0, SPRN_SRR1, 0
265kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \ 263kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
266 SPRN_SRR0, SPRN_SRR1, 0 264 SPRN_SRR0, SPRN_SRR1, 0
@@ -350,7 +348,7 @@ kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
350kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 348kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
351kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ 349kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
352 SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR) 350 SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
353kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR 351kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
354kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 352kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
355kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 353kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
356kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 354kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
@@ -361,9 +359,6 @@ kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
361kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \ 359kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
362 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) 360 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
363kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0 361kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
364kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
365kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
366kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
367kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0 362kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
368kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0 363kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
369kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \ 364kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index a326178bdea5..72920bed3ac6 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -22,6 +22,7 @@
22#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
23#include <asm/mmu-book3e.h> 23#include <asm/mmu-book3e.h>
24#include <asm/tlb.h> 24#include <asm/tlb.h>
25#include <asm/cputhreads.h>
25 26
26enum vcpu_ftr { 27enum vcpu_ftr {
27 VCPU_FTR_MMU_V2 28 VCPU_FTR_MMU_V2
@@ -289,6 +290,25 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
289#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe) 290#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe)
290#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu) 291#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu)
291#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS) 292#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS)
293
294/*
295 * These functions should be called with preemption disabled
296 * and the returned value is valid only in that context
297 */
298static inline int get_thread_specific_lpid(int vm_lpid)
299{
300 int vcpu_lpid = vm_lpid;
301
302 if (threads_per_core == 2)
303 vcpu_lpid |= smp_processor_id() & 1;
304
305 return vcpu_lpid;
306}
307
308static inline int get_lpid(struct kvm_vcpu *vcpu)
309{
310 return get_thread_specific_lpid(vcpu->kvm->arch.lpid);
311}
292#else 312#else
293unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, 313unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
294 struct kvm_book3e_206_tlb_entry *gtlbe); 314 struct kvm_book3e_206_tlb_entry *gtlbe);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index c99c40e9182a..ce7291c79f6c 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -259,6 +259,7 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
259 break; 259 break;
260 260
261 /* extra exceptions */ 261 /* extra exceptions */
262#ifdef CONFIG_SPE_POSSIBLE
262 case SPRN_IVOR32: 263 case SPRN_IVOR32:
263 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; 264 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
264 break; 265 break;
@@ -268,6 +269,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
268 case SPRN_IVOR34: 269 case SPRN_IVOR34:
269 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val; 270 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
270 break; 271 break;
272#endif
273#ifdef CONFIG_ALTIVEC
274 case SPRN_IVOR32:
275 vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val;
276 break;
277 case SPRN_IVOR33:
278 vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val;
279 break;
280#endif
271 case SPRN_IVOR35: 281 case SPRN_IVOR35:
272 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; 282 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
273 break; 283 break;
@@ -381,6 +391,7 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
381 break; 391 break;
382 392
383 /* extra exceptions */ 393 /* extra exceptions */
394#ifdef CONFIG_SPE_POSSIBLE
384 case SPRN_IVOR32: 395 case SPRN_IVOR32:
385 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; 396 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
386 break; 397 break;
@@ -390,6 +401,15 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
390 case SPRN_IVOR34: 401 case SPRN_IVOR34:
391 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; 402 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
392 break; 403 break;
404#endif
405#ifdef CONFIG_ALTIVEC
406 case SPRN_IVOR32:
407 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL];
408 break;
409 case SPRN_IVOR33:
410 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST];
411 break;
412#endif
393 case SPRN_IVOR35: 413 case SPRN_IVOR35:
394 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; 414 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
395 break; 415 break;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 08f14bb57897..769778f855b0 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -69,7 +69,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
69 * writing shadow tlb entry to host TLB 69 * writing shadow tlb entry to host TLB
70 */ 70 */
71static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, 71static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
72 uint32_t mas0) 72 uint32_t mas0,
73 uint32_t lpid)
73{ 74{
74 unsigned long flags; 75 unsigned long flags;
75 76
@@ -80,7 +81,7 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
80 mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); 81 mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
81 mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); 82 mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
82#ifdef CONFIG_KVM_BOOKE_HV 83#ifdef CONFIG_KVM_BOOKE_HV
83 mtspr(SPRN_MAS8, stlbe->mas8); 84 mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid));
84#endif 85#endif
85 asm volatile("isync; tlbwe" : : : "memory"); 86 asm volatile("isync; tlbwe" : : : "memory");
86 87
@@ -129,11 +130,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
129 130
130 if (tlbsel == 0) { 131 if (tlbsel == 0) {
131 mas0 = get_host_mas0(stlbe->mas2); 132 mas0 = get_host_mas0(stlbe->mas2);
132 __write_host_tlbe(stlbe, mas0); 133 __write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid);
133 } else { 134 } else {
134 __write_host_tlbe(stlbe, 135 __write_host_tlbe(stlbe,
135 MAS0_TLBSEL(1) | 136 MAS0_TLBSEL(1) |
136 MAS0_ESEL(to_htlb1_esel(sesel))); 137 MAS0_ESEL(to_htlb1_esel(sesel)),
138 vcpu_e500->vcpu.kvm->arch.lpid);
137 } 139 }
138} 140}
139 141
@@ -176,7 +178,7 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
176 MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; 178 MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
177 magic.mas8 = 0; 179 magic.mas8 = 0;
178 180
179 __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); 181 __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0);
180 preempt_enable(); 182 preempt_enable();
181} 183}
182#endif 184#endif
@@ -317,10 +319,6 @@ static void kvmppc_e500_setup_stlbe(
317 stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); 319 stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
318 stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | 320 stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
319 e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); 321 e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
320
321#ifdef CONFIG_KVM_BOOKE_HV
322 stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
323#endif
324} 322}
325 323
326static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, 324static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -633,7 +631,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
633 631
634 local_irq_save(flags); 632 local_irq_save(flags);
635 mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space); 633 mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
636 mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid); 634 mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu));
637 asm volatile("tlbsx 0, %[geaddr]\n" : : 635 asm volatile("tlbsx 0, %[geaddr]\n" : :
638 [geaddr] "r" (geaddr)); 636 [geaddr] "r" (geaddr));
639 mtspr(SPRN_MAS5, 0); 637 mtspr(SPRN_MAS5, 0);
@@ -732,7 +730,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
732 return 0; 730 return 0;
733} 731}
734 732
735int kvm_age_hva(struct kvm *kvm, unsigned long hva) 733int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
736{ 734{
737 /* XXX could be more clever ;) */ 735 /* XXX could be more clever ;) */
738 return 0; 736 return 0;
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 164bad2a19bf..2fdc8722e324 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -48,10 +48,11 @@ void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
48 return; 48 return;
49 } 49 }
50 50
51 51 preempt_disable();
52 tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id; 52 tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id;
53 mb(); 53 mb();
54 ppc_msgsnd(dbell_type, 0, tag); 54 ppc_msgsnd(dbell_type, 0, tag);
55 preempt_enable();
55} 56}
56 57
57/* gtlbe must not be mapped by more than one host tlb entry */ 58/* gtlbe must not be mapped by more than one host tlb entry */
@@ -60,12 +61,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
60{ 61{
61 unsigned int tid, ts; 62 unsigned int tid, ts;
62 gva_t eaddr; 63 gva_t eaddr;
63 u32 val, lpid; 64 u32 val;
64 unsigned long flags; 65 unsigned long flags;
65 66
66 ts = get_tlb_ts(gtlbe); 67 ts = get_tlb_ts(gtlbe);
67 tid = get_tlb_tid(gtlbe); 68 tid = get_tlb_tid(gtlbe);
68 lpid = vcpu_e500->vcpu.kvm->arch.lpid;
69 69
70 /* We search the host TLB to invalidate its shadow TLB entry */ 70 /* We search the host TLB to invalidate its shadow TLB entry */
71 val = (tid << 16) | ts; 71 val = (tid << 16) | ts;
@@ -74,7 +74,7 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
74 local_irq_save(flags); 74 local_irq_save(flags);
75 75
76 mtspr(SPRN_MAS6, val); 76 mtspr(SPRN_MAS6, val);
77 mtspr(SPRN_MAS5, MAS5_SGS | lpid); 77 mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
78 78
79 asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr)); 79 asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
80 val = mfspr(SPRN_MAS1); 80 val = mfspr(SPRN_MAS1);
@@ -95,7 +95,7 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
95 unsigned long flags; 95 unsigned long flags;
96 96
97 local_irq_save(flags); 97 local_irq_save(flags);
98 mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid); 98 mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
99 asm volatile("tlbilxlpid"); 99 asm volatile("tlbilxlpid");
100 mtspr(SPRN_MAS5, 0); 100 mtspr(SPRN_MAS5, 0);
101 local_irq_restore(flags); 101 local_irq_restore(flags);
@@ -110,6 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
110{ 110{
111} 111}
112 112
113/* We use two lpids per VM */
113static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid); 114static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid);
114 115
115static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) 116static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
@@ -118,10 +119,12 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
118 119
119 kvmppc_booke_vcpu_load(vcpu, cpu); 120 kvmppc_booke_vcpu_load(vcpu, cpu);
120 121
121 mtspr(SPRN_LPID, vcpu->kvm->arch.lpid); 122 mtspr(SPRN_LPID, get_lpid(vcpu));
122 mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); 123 mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
123 mtspr(SPRN_GPIR, vcpu->vcpu_id); 124 mtspr(SPRN_GPIR, vcpu->vcpu_id);
124 mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp); 125 mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
126 vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT);
127 vcpu->arch.epsc = vcpu->arch.eplc;
125 mtspr(SPRN_EPLC, vcpu->arch.eplc); 128 mtspr(SPRN_EPLC, vcpu->arch.eplc);
126 mtspr(SPRN_EPSC, vcpu->arch.epsc); 129 mtspr(SPRN_EPSC, vcpu->arch.epsc);
127 130
@@ -141,12 +144,10 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
141 mtspr(SPRN_GESR, vcpu->arch.shared->esr); 144 mtspr(SPRN_GESR, vcpu->arch.shared->esr);
142 145
143 if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || 146 if (vcpu->arch.oldpir != mfspr(SPRN_PIR) ||
144 __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { 147 __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) {
145 kvmppc_e500_tlbil_all(vcpu_e500); 148 kvmppc_e500_tlbil_all(vcpu_e500);
146 __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; 149 __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu;
147 } 150 }
148
149 kvmppc_load_guest_fp(vcpu);
150} 151}
151 152
152static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu) 153static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
@@ -179,6 +180,16 @@ int kvmppc_core_check_processor_compat(void)
179 r = 0; 180 r = 0;
180 else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) 181 else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
181 r = 0; 182 r = 0;
183#ifdef CONFIG_ALTIVEC
184 /*
185 * Since guests have the priviledge to enable AltiVec, we need AltiVec
186 * support in the host to save/restore their context.
187 * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
188 * because it's cleared in the absence of CONFIG_ALTIVEC!
189 */
190 else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
191 r = 0;
192#endif
182 else 193 else
183 r = -ENOTSUPP; 194 r = -ENOTSUPP;
184 195
@@ -194,9 +205,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
194#ifdef CONFIG_64BIT 205#ifdef CONFIG_64BIT
195 vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM; 206 vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
196#endif 207#endif
197 vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP; 208 vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP;
198 vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
199 vcpu->arch.epsc = vcpu->arch.eplc;
200 209
201 vcpu->arch.pvr = mfspr(SPRN_PVR); 210 vcpu->arch.pvr = mfspr(SPRN_PVR);
202 vcpu_e500->svr = mfspr(SPRN_SVR); 211 vcpu_e500->svr = mfspr(SPRN_SVR);
@@ -356,13 +365,26 @@ static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
356 if (lpid < 0) 365 if (lpid < 0)
357 return lpid; 366 return lpid;
358 367
368 /*
369 * Use two lpids per VM on cores with two threads like e6500. Use
370 * even numbers to speedup vcpu lpid computation with consecutive lpids
371 * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on.
372 */
373 if (threads_per_core == 2)
374 lpid <<= 1;
375
359 kvm->arch.lpid = lpid; 376 kvm->arch.lpid = lpid;
360 return 0; 377 return 0;
361} 378}
362 379
363static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm) 380static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
364{ 381{
365 kvmppc_free_lpid(kvm->arch.lpid); 382 int lpid = kvm->arch.lpid;
383
384 if (threads_per_core == 2)
385 lpid >>= 1;
386
387 kvmppc_free_lpid(lpid);
366} 388}
367 389
368static struct kvmppc_ops kvm_ops_e500mc = { 390static struct kvmppc_ops kvm_ops_e500mc = {
@@ -390,7 +412,13 @@ static int __init kvmppc_e500mc_init(void)
390 if (r) 412 if (r)
391 goto err_out; 413 goto err_out;
392 414
393 kvmppc_init_lpid(64); 415 /*
416 * Use two lpids per VM on dual threaded processors like e6500
417 * to workarround the lack of tlb write conditional instruction.
418 * Expose half the number of available hardware lpids to the lpid
419 * allocator.
420 */
421 kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core);
394 kvmppc_claim_lpid(0); /* host */ 422 kvmppc_claim_lpid(0); /* host */
395 423
396 r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 424 r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index e96b50d0bdab..5cc2e7af3a7b 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -219,7 +219,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
219 /* this default type might be overwritten by subcategories */ 219 /* this default type might be overwritten by subcategories */
220 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); 220 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
221 221
222 emulated = kvmppc_get_last_inst(vcpu, false, &inst); 222 emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
223 if (emulated != EMULATE_DONE) 223 if (emulated != EMULATE_DONE)
224 return emulated; 224 return emulated;
225 225
@@ -274,6 +274,21 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
274 } 274 }
275 break; 275 break;
276 276
277 case 0:
278 /*
279 * Instruction with primary opcode 0. Based on PowerISA
280 * these are illegal instructions.
281 */
282 if (inst == KVMPPC_INST_SW_BREAKPOINT) {
283 run->exit_reason = KVM_EXIT_DEBUG;
284 run->debug.arch.address = kvmppc_get_pc(vcpu);
285 emulated = EMULATE_EXIT_USER;
286 advance = 0;
287 } else
288 emulated = EMULATE_FAIL;
289
290 break;
291
277 default: 292 default:
278 emulated = EMULATE_FAIL; 293 emulated = EMULATE_FAIL;
279 } 294 }
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 0de4ffa175a9..6d3c0ee1d744 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -58,7 +58,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
58 /* this default type might be overwritten by subcategories */ 58 /* this default type might be overwritten by subcategories */
59 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); 59 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
60 60
61 emulated = kvmppc_get_last_inst(vcpu, false, &inst); 61 emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
62 if (emulated != EMULATE_DONE) 62 if (emulated != EMULATE_DONE)
63 return emulated; 63 return emulated;
64 64
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c79284b58be..c1f8f53cd312 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -294,7 +294,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
294 { 294 {
295 u32 last_inst; 295 u32 last_inst;
296 296
297 kvmppc_get_last_inst(vcpu, false, &last_inst); 297 kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
298 /* XXX Deliver Program interrupt to guest. */ 298 /* XXX Deliver Program interrupt to guest. */
299 pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); 299 pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst);
300 r = RESUME_HOST; 300 r = RESUME_HOST;
@@ -384,24 +384,16 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
384} 384}
385EXPORT_SYMBOL_GPL(kvmppc_ld); 385EXPORT_SYMBOL_GPL(kvmppc_ld);
386 386
387int kvm_arch_hardware_enable(void *garbage) 387int kvm_arch_hardware_enable(void)
388{ 388{
389 return 0; 389 return 0;
390} 390}
391 391
392void kvm_arch_hardware_disable(void *garbage)
393{
394}
395
396int kvm_arch_hardware_setup(void) 392int kvm_arch_hardware_setup(void)
397{ 393{
398 return 0; 394 return 0;
399} 395}
400 396
401void kvm_arch_hardware_unsetup(void)
402{
403}
404
405void kvm_arch_check_processor_compat(void *rtn) 397void kvm_arch_check_processor_compat(void *rtn)
406{ 398{
407 *(int *)rtn = kvmppc_core_check_processor_compat(); 399 *(int *)rtn = kvmppc_core_check_processor_compat();
@@ -462,10 +454,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
462 module_put(kvm->arch.kvm_ops->owner); 454 module_put(kvm->arch.kvm_ops->owner);
463} 455}
464 456
465void kvm_arch_sync_events(struct kvm *kvm)
466{
467}
468
469int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 457int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
470{ 458{
471 int r; 459 int r;
@@ -608,10 +596,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
608 return kvmppc_core_create_memslot(kvm, slot, npages); 596 return kvmppc_core_create_memslot(kvm, slot, npages);
609} 597}
610 598
611void kvm_arch_memslots_updated(struct kvm *kvm)
612{
613}
614
615int kvm_arch_prepare_memory_region(struct kvm *kvm, 599int kvm_arch_prepare_memory_region(struct kvm *kvm,
616 struct kvm_memory_slot *memslot, 600 struct kvm_memory_slot *memslot,
617 struct kvm_userspace_memory_region *mem, 601 struct kvm_userspace_memory_region *mem,
@@ -628,10 +612,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
628 kvmppc_core_commit_memory_region(kvm, mem, old); 612 kvmppc_core_commit_memory_region(kvm, mem, old);
629} 613}
630 614
631void kvm_arch_flush_shadow_all(struct kvm *kvm)
632{
633}
634
635void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 615void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
636 struct kvm_memory_slot *slot) 616 struct kvm_memory_slot *slot)
637{ 617{
@@ -658,7 +638,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
658{ 638{
659 /* Make sure we're not using the vcpu anymore */ 639 /* Make sure we're not using the vcpu anymore */
660 hrtimer_cancel(&vcpu->arch.dec_timer); 640 hrtimer_cancel(&vcpu->arch.dec_timer);
661 tasklet_kill(&vcpu->arch.tasklet);
662 641
663 kvmppc_remove_vcpu_debugfs(vcpu); 642 kvmppc_remove_vcpu_debugfs(vcpu);
664 643
@@ -684,16 +663,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
684 return kvmppc_core_pending_dec(vcpu); 663 return kvmppc_core_pending_dec(vcpu);
685} 664}
686 665
687/*
688 * low level hrtimer wake routine. Because this runs in hardirq context
689 * we schedule a tasklet to do the real work.
690 */
691enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) 666enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
692{ 667{
693 struct kvm_vcpu *vcpu; 668 struct kvm_vcpu *vcpu;
694 669
695 vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); 670 vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
696 tasklet_schedule(&vcpu->arch.tasklet); 671 kvmppc_decrementer_func(vcpu);
697 672
698 return HRTIMER_NORESTART; 673 return HRTIMER_NORESTART;
699} 674}
@@ -703,7 +678,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
703 int ret; 678 int ret;
704 679
705 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 680 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
706 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
707 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; 681 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
708 vcpu->arch.dec_expires = ~(u64)0; 682 vcpu->arch.dec_expires = ~(u64)0;
709 683
@@ -927,6 +901,103 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
927} 901}
928EXPORT_SYMBOL_GPL(kvmppc_handle_store); 902EXPORT_SYMBOL_GPL(kvmppc_handle_store);
929 903
904int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
905{
906 int r = 0;
907 union kvmppc_one_reg val;
908 int size;
909
910 size = one_reg_size(reg->id);
911 if (size > sizeof(val))
912 return -EINVAL;
913
914 r = kvmppc_get_one_reg(vcpu, reg->id, &val);
915 if (r == -EINVAL) {
916 r = 0;
917 switch (reg->id) {
918#ifdef CONFIG_ALTIVEC
919 case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
920 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
921 r = -ENXIO;
922 break;
923 }
924 vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
925 break;
926 case KVM_REG_PPC_VSCR:
927 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
928 r = -ENXIO;
929 break;
930 }
931 vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
932 break;
933 case KVM_REG_PPC_VRSAVE:
934 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
935 r = -ENXIO;
936 break;
937 }
938 vcpu->arch.vrsave = set_reg_val(reg->id, val);
939 break;
940#endif /* CONFIG_ALTIVEC */
941 default:
942 r = -EINVAL;
943 break;
944 }
945 }
946
947 if (r)
948 return r;
949
950 if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
951 r = -EFAULT;
952
953 return r;
954}
955
956int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
957{
958 int r;
959 union kvmppc_one_reg val;
960 int size;
961
962 size = one_reg_size(reg->id);
963 if (size > sizeof(val))
964 return -EINVAL;
965
966 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
967 return -EFAULT;
968
969 r = kvmppc_set_one_reg(vcpu, reg->id, &val);
970 if (r == -EINVAL) {
971 r = 0;
972 switch (reg->id) {
973#ifdef CONFIG_ALTIVEC
974 case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
975 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
976 r = -ENXIO;
977 break;
978 }
979 val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
980 break;
981 case KVM_REG_PPC_VSCR:
982 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
983 r = -ENXIO;
984 break;
985 }
986 val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
987 break;
988 case KVM_REG_PPC_VRSAVE:
989 val = get_reg_val(reg->id, vcpu->arch.vrsave);
990 break;
991#endif /* CONFIG_ALTIVEC */
992 default:
993 r = -EINVAL;
994 break;
995 }
996 }
997
998 return r;
999}
1000
930int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 1001int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
931{ 1002{
932 int r; 1003 int r;
@@ -1343,9 +1414,4 @@ int kvm_arch_init(void *opaque)
1343 return 0; 1414 return 0;
1344} 1415}
1345 1416
1346void kvm_arch_exit(void)
1347{
1348
1349}
1350
1351EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); 1417EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e8bc40869cbd..7d9ee3d8c618 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -303,9 +303,13 @@ config PPC_ICSWX_USE_SIGILL
303 303
304 If in doubt, say N here. 304 If in doubt, say N here.
305 305
306config SPE_POSSIBLE
307 def_bool y
308 depends on E200 || (E500 && !PPC_E500MC)
309
306config SPE 310config SPE
307 bool "SPE Support" 311 bool "SPE Support"
308 depends on E200 || (E500 && !PPC_E500MC) 312 depends on SPE_POSSIBLE
309 default y 313 default y
310 ---help--- 314 ---help---
311 This option enables kernel support for the Signal Processing 315 This option enables kernel support for the Signal Processing
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 773bef7614d8..2175f911a73a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
13 13
14#ifndef ASM_KVM_HOST_H 14#ifndef ASM_KVM_HOST_H
15#define ASM_KVM_HOST_H 15#define ASM_KVM_HOST_H
16
17#include <linux/types.h>
16#include <linux/hrtimer.h> 18#include <linux/hrtimer.h>
17#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/kvm_types.h>
18#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
19#include <linux/kvm.h> 22#include <linux/kvm.h>
20#include <asm/debug.h> 23#include <asm/debug.h>
@@ -154,7 +157,9 @@ struct kvm_s390_sie_block {
154 __u8 armid; /* 0x00e3 */ 157 __u8 armid; /* 0x00e3 */
155 __u8 reservede4[4]; /* 0x00e4 */ 158 __u8 reservede4[4]; /* 0x00e4 */
156 __u64 tecmc; /* 0x00e8 */ 159 __u64 tecmc; /* 0x00e8 */
157 __u8 reservedf0[16]; /* 0x00f0 */ 160 __u8 reservedf0[12]; /* 0x00f0 */
161#define CRYCB_FORMAT1 0x00000001
162 __u32 crycbd; /* 0x00fc */
158 __u64 gcr[16]; /* 0x0100 */ 163 __u64 gcr[16]; /* 0x0100 */
159 __u64 gbea; /* 0x0180 */ 164 __u64 gbea; /* 0x0180 */
160 __u8 reserved188[24]; /* 0x0188 */ 165 __u8 reserved188[24]; /* 0x0188 */
@@ -187,6 +192,7 @@ struct kvm_vcpu_stat {
187 u32 exit_stop_request; 192 u32 exit_stop_request;
188 u32 exit_validity; 193 u32 exit_validity;
189 u32 exit_instruction; 194 u32 exit_instruction;
195 u32 halt_wakeup;
190 u32 instruction_lctl; 196 u32 instruction_lctl;
191 u32 instruction_lctlg; 197 u32 instruction_lctlg;
192 u32 instruction_stctl; 198 u32 instruction_stctl;
@@ -407,6 +413,15 @@ struct s390_io_adapter {
407#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) 413#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
408#define MAX_S390_ADAPTER_MAPS 256 414#define MAX_S390_ADAPTER_MAPS 256
409 415
416struct kvm_s390_crypto {
417 struct kvm_s390_crypto_cb *crycb;
418 __u32 crycbd;
419};
420
421struct kvm_s390_crypto_cb {
422 __u8 reserved00[128]; /* 0x0000 */
423};
424
410struct kvm_arch{ 425struct kvm_arch{
411 struct sca_block *sca; 426 struct sca_block *sca;
412 debug_info_t *dbf; 427 debug_info_t *dbf;
@@ -420,6 +435,7 @@ struct kvm_arch{
420 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; 435 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
421 wait_queue_head_t ipte_wq; 436 wait_queue_head_t ipte_wq;
422 spinlock_t start_stop_lock; 437 spinlock_t start_stop_lock;
438 struct kvm_s390_crypto crypto;
423}; 439};
424 440
425#define KVM_HVA_ERR_BAD (-1UL) 441#define KVM_HVA_ERR_BAD (-1UL)
@@ -431,8 +447,6 @@ static inline bool kvm_is_error_hva(unsigned long addr)
431} 447}
432 448
433#define ASYNC_PF_PER_VCPU 64 449#define ASYNC_PF_PER_VCPU 64
434struct kvm_vcpu;
435struct kvm_async_pf;
436struct kvm_arch_async_pf { 450struct kvm_arch_async_pf {
437 unsigned long pfault_token; 451 unsigned long pfault_token;
438}; 452};
@@ -450,4 +464,18 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
450 464
451extern int sie64a(struct kvm_s390_sie_block *, u64 *); 465extern int sie64a(struct kvm_s390_sie_block *, u64 *);
452extern char sie_exit; 466extern char sie_exit;
467
468static inline void kvm_arch_hardware_disable(void) {}
469static inline void kvm_arch_check_processor_compat(void *rtn) {}
470static inline void kvm_arch_exit(void) {}
471static inline void kvm_arch_sync_events(struct kvm *kvm) {}
472static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
473static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
474static inline void kvm_arch_free_memslot(struct kvm *kvm,
475 struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
476static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
477static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
478static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
479 struct kvm_memory_slot *slot) {}
480
453#endif 481#endif
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 9e18a61d3df3..d39a31c3cdf2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -18,9 +18,9 @@
18unsigned long *crst_table_alloc(struct mm_struct *); 18unsigned long *crst_table_alloc(struct mm_struct *);
19void crst_table_free(struct mm_struct *, unsigned long *); 19void crst_table_free(struct mm_struct *, unsigned long *);
20 20
21unsigned long *page_table_alloc(struct mm_struct *, unsigned long); 21unsigned long *page_table_alloc(struct mm_struct *);
22void page_table_free(struct mm_struct *, unsigned long *); 22void page_table_free(struct mm_struct *, unsigned long *);
23void page_table_free_rcu(struct mmu_gather *, unsigned long *); 23void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
24 24
25void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, 25void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
26 bool init_skey); 26 bool init_skey);
@@ -145,8 +145,8 @@ static inline void pmd_populate(struct mm_struct *mm,
145/* 145/*
146 * page table entry allocation/free routines. 146 * page table entry allocation/free routines.
147 */ 147 */
148#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) 148#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
149#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) 149#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
150 150
151#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) 151#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
152#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) 152#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5efb2fe186e7..b7054356cc98 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -30,6 +30,7 @@
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/mm_types.h> 31#include <linux/mm_types.h>
32#include <linux/page-flags.h> 32#include <linux/page-flags.h>
33#include <linux/radix-tree.h>
33#include <asm/bug.h> 34#include <asm/bug.h>
34#include <asm/page.h> 35#include <asm/page.h>
35 36
@@ -789,82 +790,67 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
789 790
790/** 791/**
791 * struct gmap_struct - guest address space 792 * struct gmap_struct - guest address space
793 * @crst_list: list of all crst tables used in the guest address space
792 * @mm: pointer to the parent mm_struct 794 * @mm: pointer to the parent mm_struct
795 * @guest_to_host: radix tree with guest to host address translation
796 * @host_to_guest: radix tree with pointer to segment table entries
797 * @guest_table_lock: spinlock to protect all entries in the guest page table
793 * @table: pointer to the page directory 798 * @table: pointer to the page directory
794 * @asce: address space control element for gmap page table 799 * @asce: address space control element for gmap page table
795 * @crst_list: list of all crst tables used in the guest address space
796 * @pfault_enabled: defines if pfaults are applicable for the guest 800 * @pfault_enabled: defines if pfaults are applicable for the guest
797 */ 801 */
798struct gmap { 802struct gmap {
799 struct list_head list; 803 struct list_head list;
804 struct list_head crst_list;
800 struct mm_struct *mm; 805 struct mm_struct *mm;
806 struct radix_tree_root guest_to_host;
807 struct radix_tree_root host_to_guest;
808 spinlock_t guest_table_lock;
801 unsigned long *table; 809 unsigned long *table;
802 unsigned long asce; 810 unsigned long asce;
811 unsigned long asce_end;
803 void *private; 812 void *private;
804 struct list_head crst_list;
805 bool pfault_enabled; 813 bool pfault_enabled;
806}; 814};
807 815
808/** 816/**
809 * struct gmap_rmap - reverse mapping for segment table entries
810 * @gmap: pointer to the gmap_struct
811 * @entry: pointer to a segment table entry
812 * @vmaddr: virtual address in the guest address space
813 */
814struct gmap_rmap {
815 struct list_head list;
816 struct gmap *gmap;
817 unsigned long *entry;
818 unsigned long vmaddr;
819};
820
821/**
822 * struct gmap_pgtable - gmap information attached to a page table
823 * @vmaddr: address of the 1MB segment in the process virtual memory
824 * @mapper: list of segment table entries mapping a page table
825 */
826struct gmap_pgtable {
827 unsigned long vmaddr;
828 struct list_head mapper;
829};
830
831/**
832 * struct gmap_notifier - notify function block for page invalidation 817 * struct gmap_notifier - notify function block for page invalidation
833 * @notifier_call: address of callback function 818 * @notifier_call: address of callback function
834 */ 819 */
835struct gmap_notifier { 820struct gmap_notifier {
836 struct list_head list; 821 struct list_head list;
837 void (*notifier_call)(struct gmap *gmap, unsigned long address); 822 void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
838}; 823};
839 824
840struct gmap *gmap_alloc(struct mm_struct *mm); 825struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
841void gmap_free(struct gmap *gmap); 826void gmap_free(struct gmap *gmap);
842void gmap_enable(struct gmap *gmap); 827void gmap_enable(struct gmap *gmap);
843void gmap_disable(struct gmap *gmap); 828void gmap_disable(struct gmap *gmap);
844int gmap_map_segment(struct gmap *gmap, unsigned long from, 829int gmap_map_segment(struct gmap *gmap, unsigned long from,
845 unsigned long to, unsigned long len); 830 unsigned long to, unsigned long len);
846int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); 831int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
847unsigned long __gmap_translate(unsigned long address, struct gmap *); 832unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
848unsigned long gmap_translate(unsigned long address, struct gmap *); 833unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
849unsigned long __gmap_fault(unsigned long address, struct gmap *); 834int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
850unsigned long gmap_fault(unsigned long address, struct gmap *); 835int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
851void gmap_discard(unsigned long from, unsigned long to, struct gmap *); 836void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
852void __gmap_zap(unsigned long address, struct gmap *); 837void __gmap_zap(struct gmap *, unsigned long gaddr);
853bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); 838bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
854 839
855 840
856void gmap_register_ipte_notifier(struct gmap_notifier *); 841void gmap_register_ipte_notifier(struct gmap_notifier *);
857void gmap_unregister_ipte_notifier(struct gmap_notifier *); 842void gmap_unregister_ipte_notifier(struct gmap_notifier *);
858int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); 843int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
859void gmap_do_ipte_notify(struct mm_struct *, pte_t *); 844void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
860 845
861static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, 846static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
847 unsigned long addr,
862 pte_t *ptep, pgste_t pgste) 848 pte_t *ptep, pgste_t pgste)
863{ 849{
864#ifdef CONFIG_PGSTE 850#ifdef CONFIG_PGSTE
865 if (pgste_val(pgste) & PGSTE_IN_BIT) { 851 if (pgste_val(pgste) & PGSTE_IN_BIT) {
866 pgste_val(pgste) &= ~PGSTE_IN_BIT; 852 pgste_val(pgste) &= ~PGSTE_IN_BIT;
867 gmap_do_ipte_notify(mm, ptep); 853 gmap_do_ipte_notify(mm, addr, ptep);
868 } 854 }
869#endif 855#endif
870 return pgste; 856 return pgste;
@@ -1110,7 +1096,7 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
1110 pgste_val(pgste) &= ~PGSTE_UC_BIT; 1096 pgste_val(pgste) &= ~PGSTE_UC_BIT;
1111 pte = *ptep; 1097 pte = *ptep;
1112 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 1098 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
1113 pgste = pgste_ipte_notify(mm, ptep, pgste); 1099 pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
1114 __ptep_ipte(addr, ptep); 1100 __ptep_ipte(addr, ptep);
1115 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 1101 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
1116 pte_val(pte) |= _PAGE_PROTECT; 1102 pte_val(pte) |= _PAGE_PROTECT;
@@ -1132,7 +1118,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1132 1118
1133 if (mm_has_pgste(vma->vm_mm)) { 1119 if (mm_has_pgste(vma->vm_mm)) {
1134 pgste = pgste_get_lock(ptep); 1120 pgste = pgste_get_lock(ptep);
1135 pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); 1121 pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
1136 } 1122 }
1137 1123
1138 oldpte = pte = *ptep; 1124 oldpte = pte = *ptep;
@@ -1179,7 +1165,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1179 1165
1180 if (mm_has_pgste(mm)) { 1166 if (mm_has_pgste(mm)) {
1181 pgste = pgste_get_lock(ptep); 1167 pgste = pgste_get_lock(ptep);
1182 pgste = pgste_ipte_notify(mm, ptep, pgste); 1168 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1183 } 1169 }
1184 1170
1185 pte = *ptep; 1171 pte = *ptep;
@@ -1203,7 +1189,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
1203 1189
1204 if (mm_has_pgste(mm)) { 1190 if (mm_has_pgste(mm)) {
1205 pgste = pgste_get_lock(ptep); 1191 pgste = pgste_get_lock(ptep);
1206 pgste_ipte_notify(mm, ptep, pgste); 1192 pgste_ipte_notify(mm, address, ptep, pgste);
1207 } 1193 }
1208 1194
1209 pte = *ptep; 1195 pte = *ptep;
@@ -1240,7 +1226,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1240 1226
1241 if (mm_has_pgste(vma->vm_mm)) { 1227 if (mm_has_pgste(vma->vm_mm)) {
1242 pgste = pgste_get_lock(ptep); 1228 pgste = pgste_get_lock(ptep);
1243 pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); 1229 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1244 } 1230 }
1245 1231
1246 pte = *ptep; 1232 pte = *ptep;
@@ -1274,7 +1260,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1274 1260
1275 if (!full && mm_has_pgste(mm)) { 1261 if (!full && mm_has_pgste(mm)) {
1276 pgste = pgste_get_lock(ptep); 1262 pgste = pgste_get_lock(ptep);
1277 pgste = pgste_ipte_notify(mm, ptep, pgste); 1263 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1278 } 1264 }
1279 1265
1280 pte = *ptep; 1266 pte = *ptep;
@@ -1299,7 +1285,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
1299 if (pte_write(pte)) { 1285 if (pte_write(pte)) {
1300 if (mm_has_pgste(mm)) { 1286 if (mm_has_pgste(mm)) {
1301 pgste = pgste_get_lock(ptep); 1287 pgste = pgste_get_lock(ptep);
1302 pgste = pgste_ipte_notify(mm, ptep, pgste); 1288 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1303 } 1289 }
1304 1290
1305 ptep_flush_lazy(mm, address, ptep); 1291 ptep_flush_lazy(mm, address, ptep);
@@ -1325,7 +1311,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1325 return 0; 1311 return 0;
1326 if (mm_has_pgste(vma->vm_mm)) { 1312 if (mm_has_pgste(vma->vm_mm)) {
1327 pgste = pgste_get_lock(ptep); 1313 pgste = pgste_get_lock(ptep);
1328 pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); 1314 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1329 } 1315 }
1330 1316
1331 ptep_flush_direct(vma->vm_mm, address, ptep); 1317 ptep_flush_direct(vma->vm_mm, address, ptep);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index a25f09fbaf36..572c59949004 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
105static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 105static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
106 unsigned long address) 106 unsigned long address)
107{ 107{
108 page_table_free_rcu(tlb, (unsigned long *) pte); 108 page_table_free_rcu(tlb, (unsigned long *) pte, address);
109} 109}
110 110
111/* 111/*
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 0fc26430a1e5..48eda3ab4944 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -111,12 +111,22 @@ struct kvm_guest_debug_arch {
111#define KVM_SYNC_GPRS (1UL << 1) 111#define KVM_SYNC_GPRS (1UL << 1)
112#define KVM_SYNC_ACRS (1UL << 2) 112#define KVM_SYNC_ACRS (1UL << 2)
113#define KVM_SYNC_CRS (1UL << 3) 113#define KVM_SYNC_CRS (1UL << 3)
114#define KVM_SYNC_ARCH0 (1UL << 4)
115#define KVM_SYNC_PFAULT (1UL << 5)
114/* definition of registers in kvm_run */ 116/* definition of registers in kvm_run */
115struct kvm_sync_regs { 117struct kvm_sync_regs {
116 __u64 prefix; /* prefix register */ 118 __u64 prefix; /* prefix register */
117 __u64 gprs[16]; /* general purpose registers */ 119 __u64 gprs[16]; /* general purpose registers */
118 __u32 acrs[16]; /* access registers */ 120 __u32 acrs[16]; /* access registers */
119 __u64 crs[16]; /* control registers */ 121 __u64 crs[16]; /* control registers */
122 __u64 todpr; /* tod programmable register [ARCH0] */
123 __u64 cputm; /* cpu timer [ARCH0] */
124 __u64 ckc; /* clock comparator [ARCH0] */
125 __u64 pp; /* program parameter [ARCH0] */
126 __u64 gbea; /* guest breaking-event address [ARCH0] */
127 __u64 pft; /* pfault token [PFAULT] */
128 __u64 pfs; /* pfault select [PFAULT] */
129 __u64 pfc; /* pfault compare [PFAULT] */
120}; 130};
121 131
122#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) 132#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 59bd8f991b98..9254afff250c 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -28,22 +28,32 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
28 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; 28 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
29 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; 29 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
30 30
31 if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end 31 if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
32 || start < 2 * PAGE_SIZE) 32 || start < 2 * PAGE_SIZE)
33 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 33 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
34 34
35 VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); 35 VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
36 vcpu->stat.diagnose_10++; 36 vcpu->stat.diagnose_10++;
37 37
38 /* we checked for start > end above */ 38 /*
39 if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { 39 * We checked for start >= end above, so lets check for the
40 gmap_discard(start, end, vcpu->arch.gmap); 40 * fast path (no prefix swap page involved)
41 */
42 if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
43 gmap_discard(vcpu->arch.gmap, start, end);
41 } else { 44 } else {
42 if (start < prefix) 45 /*
43 gmap_discard(start, prefix, vcpu->arch.gmap); 46 * This is slow path. gmap_discard will check for start
44 if (end >= prefix) 47 * so lets split this into before prefix, prefix, after
45 gmap_discard(prefix + 2 * PAGE_SIZE, 48 * prefix and let gmap_discard make some of these calls
46 end, vcpu->arch.gmap); 49 * NOPs.
50 */
51 gmap_discard(vcpu->arch.gmap, start, prefix);
52 if (start <= prefix)
53 gmap_discard(vcpu->arch.gmap, 0, 4096);
54 if (end > prefix + 4096)
55 gmap_discard(vcpu->arch.gmap, 4096, 8192);
56 gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
47 } 57 }
48 return 0; 58 return 0;
49} 59}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4653ac6e182b..0f961a1c64b3 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -254,8 +254,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
254 new = old = ACCESS_ONCE(*ic); 254 new = old = ACCESS_ONCE(*ic);
255 new.k = 0; 255 new.k = 0;
256 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 256 } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
257 if (!ipte_lock_count) 257 wake_up(&vcpu->kvm->arch.ipte_wq);
258 wake_up(&vcpu->kvm->arch.ipte_wq);
259out: 258out:
260 mutex_unlock(&ipte_mutex); 259 mutex_unlock(&ipte_mutex);
261} 260}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f4c819bfc193..a39838457f01 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -26,8 +26,9 @@
26#define IOINT_SSID_MASK 0x00030000 26#define IOINT_SSID_MASK 0x00030000
27#define IOINT_CSSID_MASK 0x03fc0000 27#define IOINT_CSSID_MASK 0x03fc0000
28#define IOINT_AI_MASK 0x04000000 28#define IOINT_AI_MASK 0x04000000
29#define PFAULT_INIT 0x0600
29 30
30static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu); 31static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
31 32
32static int is_ioint(u64 type) 33static int is_ioint(u64 type)
33{ 34{
@@ -76,7 +77,7 @@ static u64 int_word_to_isc_bits(u32 int_word)
76 return (0x80 >> isc) << 24; 77 return (0x80 >> isc) << 24;
77} 78}
78 79
79static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, 80static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
80 struct kvm_s390_interrupt_info *inti) 81 struct kvm_s390_interrupt_info *inti)
81{ 82{
82 switch (inti->type) { 83 switch (inti->type) {
@@ -85,6 +86,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
85 return 0; 86 return 0;
86 if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) 87 if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
87 return 1; 88 return 1;
89 return 0;
88 case KVM_S390_INT_EMERGENCY: 90 case KVM_S390_INT_EMERGENCY:
89 if (psw_extint_disabled(vcpu)) 91 if (psw_extint_disabled(vcpu))
90 return 0; 92 return 0;
@@ -205,11 +207,30 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
205 } 207 }
206} 208}
207 209
208static int __deliver_prog_irq(struct kvm_vcpu *vcpu, 210static u16 get_ilc(struct kvm_vcpu *vcpu)
209 struct kvm_s390_pgm_info *pgm_info)
210{ 211{
211 const unsigned short table[] = { 2, 4, 4, 6 }; 212 const unsigned short table[] = { 2, 4, 4, 6 };
213
214 switch (vcpu->arch.sie_block->icptcode) {
215 case ICPT_INST:
216 case ICPT_INSTPROGI:
217 case ICPT_OPEREXC:
218 case ICPT_PARTEXEC:
219 case ICPT_IOINST:
220 /* last instruction only stored for these icptcodes */
221 return table[vcpu->arch.sie_block->ipa >> 14];
222 case ICPT_PROGI:
223 return vcpu->arch.sie_block->pgmilc;
224 default:
225 return 0;
226 }
227}
228
229static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
230 struct kvm_s390_pgm_info *pgm_info)
231{
212 int rc = 0; 232 int rc = 0;
233 u16 ilc = get_ilc(vcpu);
213 234
214 switch (pgm_info->code & ~PGM_PER) { 235 switch (pgm_info->code & ~PGM_PER) {
215 case PGM_AFX_TRANSLATION: 236 case PGM_AFX_TRANSLATION:
@@ -276,25 +297,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
276 (u8 *) __LC_PER_ACCESS_ID); 297 (u8 *) __LC_PER_ACCESS_ID);
277 } 298 }
278 299
279 switch (vcpu->arch.sie_block->icptcode) { 300 rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
280 case ICPT_INST:
281 case ICPT_INSTPROGI:
282 case ICPT_OPEREXC:
283 case ICPT_PARTEXEC:
284 case ICPT_IOINST:
285 /* last instruction only stored for these icptcodes */
286 rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
287 (u16 *) __LC_PGM_ILC);
288 break;
289 case ICPT_PROGI:
290 rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
291 (u16 *) __LC_PGM_ILC);
292 break;
293 default:
294 rc |= put_guest_lc(vcpu, 0,
295 (u16 *) __LC_PGM_ILC);
296 }
297
298 rc |= put_guest_lc(vcpu, pgm_info->code, 301 rc |= put_guest_lc(vcpu, pgm_info->code,
299 (u16 *)__LC_PGM_INT_CODE); 302 (u16 *)__LC_PGM_INT_CODE);
300 rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, 303 rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -305,7 +308,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
305 return rc; 308 return rc;
306} 309}
307 310
308static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, 311static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
309 struct kvm_s390_interrupt_info *inti) 312 struct kvm_s390_interrupt_info *inti)
310{ 313{
311 const unsigned short table[] = { 2, 4, 4, 6 }; 314 const unsigned short table[] = { 2, 4, 4, 6 };
@@ -343,7 +346,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
343 case KVM_S390_INT_CLOCK_COMP: 346 case KVM_S390_INT_CLOCK_COMP:
344 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 347 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
345 inti->ext.ext_params, 0); 348 inti->ext.ext_params, 0);
346 deliver_ckc_interrupt(vcpu); 349 rc = deliver_ckc_interrupt(vcpu);
347 break; 350 break;
348 case KVM_S390_INT_CPU_TIMER: 351 case KVM_S390_INT_CPU_TIMER:
349 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 352 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
@@ -376,8 +379,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
376 case KVM_S390_INT_PFAULT_INIT: 379 case KVM_S390_INT_PFAULT_INIT:
377 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 380 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
378 inti->ext.ext_params2); 381 inti->ext.ext_params2);
379 rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE); 382 rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
380 rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR); 383 (u16 *) __LC_EXT_INT_CODE);
384 rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
381 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, 385 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
382 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 386 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
383 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, 387 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
@@ -501,14 +505,11 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
501 default: 505 default:
502 BUG(); 506 BUG();
503 } 507 }
504 if (rc) { 508
505 printk("kvm: The guest lowcore is not mapped during interrupt " 509 return rc;
506 "delivery, killing userspace\n");
507 do_exit(SIGKILL);
508 }
509} 510}
510 511
511static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu) 512static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
512{ 513{
513 int rc; 514 int rc;
514 515
@@ -518,11 +519,7 @@ static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
518 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, 519 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
519 &vcpu->arch.sie_block->gpsw, 520 &vcpu->arch.sie_block->gpsw,
520 sizeof(psw_t)); 521 sizeof(psw_t));
521 if (rc) { 522 return rc;
522 printk("kvm: The guest lowcore is not mapped during interrupt "
523 "delivery, killing userspace\n");
524 do_exit(SIGKILL);
525 }
526} 523}
527 524
528/* Check whether SIGP interpretation facility has an external call pending */ 525/* Check whether SIGP interpretation facility has an external call pending */
@@ -629,6 +626,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
629 */ 626 */
630 vcpu->preempted = true; 627 vcpu->preempted = true;
631 wake_up_interruptible(&vcpu->wq); 628 wake_up_interruptible(&vcpu->wq);
629 vcpu->stat.halt_wakeup++;
632 } 630 }
633} 631}
634 632
@@ -661,12 +659,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
661 &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); 659 &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
662} 660}
663 661
664void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) 662int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
665{ 663{
666 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 664 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
667 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; 665 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
668 struct kvm_s390_interrupt_info *n, *inti = NULL; 666 struct kvm_s390_interrupt_info *n, *inti = NULL;
669 int deliver; 667 int deliver;
668 int rc = 0;
670 669
671 __reset_intercept_indicators(vcpu); 670 __reset_intercept_indicators(vcpu);
672 if (atomic_read(&li->active)) { 671 if (atomic_read(&li->active)) {
@@ -685,16 +684,16 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
685 atomic_set(&li->active, 0); 684 atomic_set(&li->active, 0);
686 spin_unlock(&li->lock); 685 spin_unlock(&li->lock);
687 if (deliver) { 686 if (deliver) {
688 __do_deliver_interrupt(vcpu, inti); 687 rc = __do_deliver_interrupt(vcpu, inti);
689 kfree(inti); 688 kfree(inti);
690 } 689 }
691 } while (deliver); 690 } while (!rc && deliver);
692 } 691 }
693 692
694 if (kvm_cpu_has_pending_timer(vcpu)) 693 if (!rc && kvm_cpu_has_pending_timer(vcpu))
695 deliver_ckc_interrupt(vcpu); 694 rc = deliver_ckc_interrupt(vcpu);
696 695
697 if (atomic_read(&fi->active)) { 696 if (!rc && atomic_read(&fi->active)) {
698 do { 697 do {
699 deliver = 0; 698 deliver = 0;
700 spin_lock(&fi->lock); 699 spin_lock(&fi->lock);
@@ -711,67 +710,13 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
711 atomic_set(&fi->active, 0); 710 atomic_set(&fi->active, 0);
712 spin_unlock(&fi->lock); 711 spin_unlock(&fi->lock);
713 if (deliver) { 712 if (deliver) {
714 __do_deliver_interrupt(vcpu, inti); 713 rc = __do_deliver_interrupt(vcpu, inti);
715 kfree(inti);
716 }
717 } while (deliver);
718 }
719}
720
721void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
722{
723 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
724 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
725 struct kvm_s390_interrupt_info *n, *inti = NULL;
726 int deliver;
727
728 __reset_intercept_indicators(vcpu);
729 if (atomic_read(&li->active)) {
730 do {
731 deliver = 0;
732 spin_lock(&li->lock);
733 list_for_each_entry_safe(inti, n, &li->list, list) {
734 if ((inti->type == KVM_S390_MCHK) &&
735 __interrupt_is_deliverable(vcpu, inti)) {
736 list_del(&inti->list);
737 deliver = 1;
738 break;
739 }
740 __set_intercept_indicator(vcpu, inti);
741 }
742 if (list_empty(&li->list))
743 atomic_set(&li->active, 0);
744 spin_unlock(&li->lock);
745 if (deliver) {
746 __do_deliver_interrupt(vcpu, inti);
747 kfree(inti); 714 kfree(inti);
748 } 715 }
749 } while (deliver); 716 } while (!rc && deliver);
750 } 717 }
751 718
752 if (atomic_read(&fi->active)) { 719 return rc;
753 do {
754 deliver = 0;
755 spin_lock(&fi->lock);
756 list_for_each_entry_safe(inti, n, &fi->list, list) {
757 if ((inti->type == KVM_S390_MCHK) &&
758 __interrupt_is_deliverable(vcpu, inti)) {
759 list_del(&inti->list);
760 fi->irq_count--;
761 deliver = 1;
762 break;
763 }
764 __set_intercept_indicator(vcpu, inti);
765 }
766 if (list_empty(&fi->list))
767 atomic_set(&fi->active, 0);
768 spin_unlock(&fi->lock);
769 if (deliver) {
770 __do_deliver_interrupt(vcpu, inti);
771 kfree(inti);
772 }
773 } while (deliver);
774 }
775} 720}
776 721
777int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) 722int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
@@ -1048,7 +993,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
1048 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, 993 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
1049 s390int->parm64, 2); 994 s390int->parm64, 2);
1050 995
1051 mutex_lock(&vcpu->kvm->lock);
1052 li = &vcpu->arch.local_int; 996 li = &vcpu->arch.local_int;
1053 spin_lock(&li->lock); 997 spin_lock(&li->lock);
1054 if (inti->type == KVM_S390_PROGRAM_INT) 998 if (inti->type == KVM_S390_PROGRAM_INT)
@@ -1060,7 +1004,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
1060 li->action_bits |= ACTION_STOP_ON_STOP; 1004 li->action_bits |= ACTION_STOP_ON_STOP;
1061 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); 1005 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
1062 spin_unlock(&li->lock); 1006 spin_unlock(&li->lock);
1063 mutex_unlock(&vcpu->kvm->lock);
1064 kvm_s390_vcpu_wakeup(vcpu); 1007 kvm_s390_vcpu_wakeup(vcpu);
1065 return 0; 1008 return 0;
1066} 1009}
@@ -1300,7 +1243,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
1300 } 1243 }
1301 INIT_LIST_HEAD(&map->list); 1244 INIT_LIST_HEAD(&map->list);
1302 map->guest_addr = addr; 1245 map->guest_addr = addr;
1303 map->addr = gmap_translate(addr, kvm->arch.gmap); 1246 map->addr = gmap_translate(kvm->arch.gmap, addr);
1304 if (map->addr == -EFAULT) { 1247 if (map->addr == -EFAULT) {
1305 ret = -EFAULT; 1248 ret = -EFAULT;
1306 goto out; 1249 goto out;
@@ -1410,7 +1353,6 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1410 r = enqueue_floating_irq(dev, attr); 1353 r = enqueue_floating_irq(dev, attr);
1411 break; 1354 break;
1412 case KVM_DEV_FLIC_CLEAR_IRQS: 1355 case KVM_DEV_FLIC_CLEAR_IRQS:
1413 r = 0;
1414 kvm_s390_clear_float_irqs(dev->kvm); 1356 kvm_s390_clear_float_irqs(dev->kvm);
1415 break; 1357 break;
1416 case KVM_DEV_FLIC_APF_ENABLE: 1358 case KVM_DEV_FLIC_APF_ENABLE:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 81b0e11521e4..55aade49b6d1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
50 { "exit_instruction", VCPU_STAT(exit_instruction) }, 50 { "exit_instruction", VCPU_STAT(exit_instruction) },
51 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 51 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
52 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 52 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
53 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
53 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 54 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
54 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 55 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
55 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 56 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
@@ -100,16 +101,12 @@ int test_vfacility(unsigned long nr)
100} 101}
101 102
102/* Section: not file related */ 103/* Section: not file related */
103int kvm_arch_hardware_enable(void *garbage) 104int kvm_arch_hardware_enable(void)
104{ 105{
105 /* every s390 is virtualization enabled ;-) */ 106 /* every s390 is virtualization enabled ;-) */
106 return 0; 107 return 0;
107} 108}
108 109
109void kvm_arch_hardware_disable(void *garbage)
110{
111}
112
113static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); 110static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
114 111
115int kvm_arch_hardware_setup(void) 112int kvm_arch_hardware_setup(void)
@@ -124,17 +121,10 @@ void kvm_arch_hardware_unsetup(void)
124 gmap_unregister_ipte_notifier(&gmap_notifier); 121 gmap_unregister_ipte_notifier(&gmap_notifier);
125} 122}
126 123
127void kvm_arch_check_processor_compat(void *rtn)
128{
129}
130
131int kvm_arch_init(void *opaque) 124int kvm_arch_init(void *opaque)
132{ 125{
133 return 0; 126 /* Register floating interrupt controller interface. */
134} 127 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
135
136void kvm_arch_exit(void)
137{
138} 128}
139 129
140/* Section: device related */ 130/* Section: device related */
@@ -404,6 +394,22 @@ long kvm_arch_vm_ioctl(struct file *filp,
404 return r; 394 return r;
405} 395}
406 396
397static int kvm_s390_crypto_init(struct kvm *kvm)
398{
399 if (!test_vfacility(76))
400 return 0;
401
402 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
403 GFP_KERNEL | GFP_DMA);
404 if (!kvm->arch.crypto.crycb)
405 return -ENOMEM;
406
407 kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb |
408 CRYCB_FORMAT1;
409
410 return 0;
411}
412
407int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 413int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
408{ 414{
409 int rc; 415 int rc;
@@ -441,6 +447,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
441 if (!kvm->arch.dbf) 447 if (!kvm->arch.dbf)
442 goto out_nodbf; 448 goto out_nodbf;
443 449
450 if (kvm_s390_crypto_init(kvm) < 0)
451 goto out_crypto;
452
444 spin_lock_init(&kvm->arch.float_int.lock); 453 spin_lock_init(&kvm->arch.float_int.lock);
445 INIT_LIST_HEAD(&kvm->arch.float_int.list); 454 INIT_LIST_HEAD(&kvm->arch.float_int.list);
446 init_waitqueue_head(&kvm->arch.ipte_wq); 455 init_waitqueue_head(&kvm->arch.ipte_wq);
@@ -451,7 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
451 if (type & KVM_VM_S390_UCONTROL) { 460 if (type & KVM_VM_S390_UCONTROL) {
452 kvm->arch.gmap = NULL; 461 kvm->arch.gmap = NULL;
453 } else { 462 } else {
454 kvm->arch.gmap = gmap_alloc(current->mm); 463 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
455 if (!kvm->arch.gmap) 464 if (!kvm->arch.gmap)
456 goto out_nogmap; 465 goto out_nogmap;
457 kvm->arch.gmap->private = kvm; 466 kvm->arch.gmap->private = kvm;
@@ -465,6 +474,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
465 474
466 return 0; 475 return 0;
467out_nogmap: 476out_nogmap:
477 kfree(kvm->arch.crypto.crycb);
478out_crypto:
468 debug_unregister(kvm->arch.dbf); 479 debug_unregister(kvm->arch.dbf);
469out_nodbf: 480out_nodbf:
470 free_page((unsigned long)(kvm->arch.sca)); 481 free_page((unsigned long)(kvm->arch.sca));
@@ -514,15 +525,12 @@ static void kvm_free_vcpus(struct kvm *kvm)
514 mutex_unlock(&kvm->lock); 525 mutex_unlock(&kvm->lock);
515} 526}
516 527
517void kvm_arch_sync_events(struct kvm *kvm)
518{
519}
520
521void kvm_arch_destroy_vm(struct kvm *kvm) 528void kvm_arch_destroy_vm(struct kvm *kvm)
522{ 529{
523 kvm_free_vcpus(kvm); 530 kvm_free_vcpus(kvm);
524 free_page((unsigned long)(kvm->arch.sca)); 531 free_page((unsigned long)(kvm->arch.sca));
525 debug_unregister(kvm->arch.dbf); 532 debug_unregister(kvm->arch.dbf);
533 kfree(kvm->arch.crypto.crycb);
526 if (!kvm_is_ucontrol(kvm)) 534 if (!kvm_is_ucontrol(kvm))
527 gmap_free(kvm->arch.gmap); 535 gmap_free(kvm->arch.gmap);
528 kvm_s390_destroy_adapters(kvm); 536 kvm_s390_destroy_adapters(kvm);
@@ -535,7 +543,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
535 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 543 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
536 kvm_clear_async_pf_completion_queue(vcpu); 544 kvm_clear_async_pf_completion_queue(vcpu);
537 if (kvm_is_ucontrol(vcpu->kvm)) { 545 if (kvm_is_ucontrol(vcpu->kvm)) {
538 vcpu->arch.gmap = gmap_alloc(current->mm); 546 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
539 if (!vcpu->arch.gmap) 547 if (!vcpu->arch.gmap)
540 return -ENOMEM; 548 return -ENOMEM;
541 vcpu->arch.gmap->private = vcpu->kvm; 549 vcpu->arch.gmap->private = vcpu->kvm;
@@ -546,15 +554,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
546 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 554 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
547 KVM_SYNC_GPRS | 555 KVM_SYNC_GPRS |
548 KVM_SYNC_ACRS | 556 KVM_SYNC_ACRS |
549 KVM_SYNC_CRS; 557 KVM_SYNC_CRS |
558 KVM_SYNC_ARCH0 |
559 KVM_SYNC_PFAULT;
550 return 0; 560 return 0;
551} 561}
552 562
553void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
554{
555 /* Nothing todo */
556}
557
558void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 563void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
559{ 564{
560 save_fp_ctl(&vcpu->arch.host_fpregs.fpc); 565 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
@@ -607,6 +612,14 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
607 return 0; 612 return 0;
608} 613}
609 614
615static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
616{
617 if (!test_vfacility(76))
618 return;
619
620 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
621}
622
610void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 623void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
611{ 624{
612 free_page(vcpu->arch.sie_block->cbrlo); 625 free_page(vcpu->arch.sie_block->cbrlo);
@@ -653,6 +666,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
653 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 666 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
654 get_cpu_id(&vcpu->arch.cpu_id); 667 get_cpu_id(&vcpu->arch.cpu_id);
655 vcpu->arch.cpu_id.version = 0xff; 668 vcpu->arch.cpu_id.version = 0xff;
669
670 kvm_s390_vcpu_crypto_setup(vcpu);
671
656 return rc; 672 return rc;
657} 673}
658 674
@@ -1049,6 +1065,11 @@ retry:
1049 goto retry; 1065 goto retry;
1050 } 1066 }
1051 1067
1068 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1069 vcpu->arch.sie_block->ihcpu = 0xffff;
1070 goto retry;
1071 }
1072
1052 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 1073 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1053 if (!ibs_enabled(vcpu)) { 1074 if (!ibs_enabled(vcpu)) {
1054 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 1075 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
@@ -1085,18 +1106,8 @@ retry:
1085 */ 1106 */
1086long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 1107long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1087{ 1108{
1088 struct mm_struct *mm = current->mm; 1109 return gmap_fault(vcpu->arch.gmap, gpa,
1089 hva_t hva; 1110 writable ? FAULT_FLAG_WRITE : 0);
1090 long rc;
1091
1092 hva = gmap_fault(gpa, vcpu->arch.gmap);
1093 if (IS_ERR_VALUE(hva))
1094 return (long)hva;
1095 down_read(&mm->mmap_sem);
1096 rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
1097 up_read(&mm->mmap_sem);
1098
1099 return rc < 0 ? rc : 0;
1100} 1111}
1101 1112
1102static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 1113static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
@@ -1191,8 +1202,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1191 if (test_cpu_flag(CIF_MCCK_PENDING)) 1202 if (test_cpu_flag(CIF_MCCK_PENDING))
1192 s390_handle_mcck(); 1203 s390_handle_mcck();
1193 1204
1194 if (!kvm_is_ucontrol(vcpu->kvm)) 1205 if (!kvm_is_ucontrol(vcpu->kvm)) {
1195 kvm_s390_deliver_pending_interrupts(vcpu); 1206 rc = kvm_s390_deliver_pending_interrupts(vcpu);
1207 if (rc)
1208 return rc;
1209 }
1196 1210
1197 rc = kvm_s390_handle_requests(vcpu); 1211 rc = kvm_s390_handle_requests(vcpu);
1198 if (rc) 1212 if (rc)
@@ -1296,6 +1310,48 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
1296 return rc; 1310 return rc;
1297} 1311}
1298 1312
1313static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1314{
1315 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1316 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1317 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
1318 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1319 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1320 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1321 /* some control register changes require a tlb flush */
1322 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1323 }
1324 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
1325 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
1326 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
1327 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
1328 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
1329 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
1330 }
1331 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
1332 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
1333 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
1334 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
1335 }
1336 kvm_run->kvm_dirty_regs = 0;
1337}
1338
1339static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1340{
1341 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1342 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1343 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
1344 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1345 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
1346 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
1347 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
1348 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
1349 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
1350 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
1351 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
1352 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
1353}
1354
1299int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1355int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1300{ 1356{
1301 int rc; 1357 int rc;
@@ -1317,17 +1373,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1317 return -EINVAL; 1373 return -EINVAL;
1318 } 1374 }
1319 1375
1320 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 1376 sync_regs(vcpu, kvm_run);
1321 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1322 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
1323 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
1324 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1325 }
1326 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1327 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
1328 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1329 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1330 }
1331 1377
1332 might_fault(); 1378 might_fault();
1333 rc = __vcpu_run(vcpu); 1379 rc = __vcpu_run(vcpu);
@@ -1357,10 +1403,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1357 rc = 0; 1403 rc = 0;
1358 } 1404 }
1359 1405
1360 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 1406 store_regs(vcpu, kvm_run);
1361 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1362 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
1363 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1364 1407
1365 if (vcpu->sigset_active) 1408 if (vcpu->sigset_active)
1366 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1409 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1489,7 +1532,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
1489 * Another VCPU might have used IBS while we were offline. 1532 * Another VCPU might have used IBS while we were offline.
1490 * Let's play safe and flush the VCPU at startup. 1533 * Let's play safe and flush the VCPU at startup.
1491 */ 1534 */
1492 vcpu->arch.sie_block->ihcpu = 0xffff; 1535 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1493 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 1536 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
1494 return; 1537 return;
1495} 1538}
@@ -1644,9 +1687,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1644 } 1687 }
1645#endif 1688#endif
1646 case KVM_S390_VCPU_FAULT: { 1689 case KVM_S390_VCPU_FAULT: {
1647 r = gmap_fault(arg, vcpu->arch.gmap); 1690 r = gmap_fault(vcpu->arch.gmap, arg, 0);
1648 if (!IS_ERR_VALUE(r))
1649 r = 0;
1650 break; 1691 break;
1651 } 1692 }
1652 case KVM_ENABLE_CAP: 1693 case KVM_ENABLE_CAP:
@@ -1677,21 +1718,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1677 return VM_FAULT_SIGBUS; 1718 return VM_FAULT_SIGBUS;
1678} 1719}
1679 1720
1680void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1681 struct kvm_memory_slot *dont)
1682{
1683}
1684
1685int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1721int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1686 unsigned long npages) 1722 unsigned long npages)
1687{ 1723{
1688 return 0; 1724 return 0;
1689} 1725}
1690 1726
1691void kvm_arch_memslots_updated(struct kvm *kvm)
1692{
1693}
1694
1695/* Section: memory related */ 1727/* Section: memory related */
1696int kvm_arch_prepare_memory_region(struct kvm *kvm, 1728int kvm_arch_prepare_memory_region(struct kvm *kvm,
1697 struct kvm_memory_slot *memslot, 1729 struct kvm_memory_slot *memslot,
@@ -1737,15 +1769,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
1737 return; 1769 return;
1738} 1770}
1739 1771
1740void kvm_arch_flush_shadow_all(struct kvm *kvm)
1741{
1742}
1743
1744void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1745 struct kvm_memory_slot *slot)
1746{
1747}
1748
1749static int __init kvm_s390_init(void) 1772static int __init kvm_s390_init(void)
1750{ 1773{
1751 int ret; 1774 int ret;
@@ -1764,7 +1787,7 @@ static int __init kvm_s390_init(void)
1764 return -ENOMEM; 1787 return -ENOMEM;
1765 } 1788 }
1766 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); 1789 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1767 vfacilities[0] &= 0xff82fff3f4fc2000UL; 1790 vfacilities[0] &= 0xff82fffbf47c2000UL;
1768 vfacilities[1] &= 0x005c000000000000UL; 1791 vfacilities[1] &= 0x005c000000000000UL;
1769 return 0; 1792 return 0;
1770} 1793}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3862fa2cefe0..244d02303182 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -70,7 +70,7 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
70static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) 70static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
71{ 71{
72 vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; 72 vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
73 vcpu->arch.sie_block->ihcpu = 0xffff; 73 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
74 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 74 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
75} 75}
76 76
@@ -138,8 +138,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
138int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); 138int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
139void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); 139void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
140enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); 140enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
141void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); 141int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
142void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
143void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); 142void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
144void kvm_s390_clear_float_irqs(struct kvm *kvm); 143void kvm_s390_clear_float_irqs(struct kvm *kvm);
145int __must_check kvm_s390_inject_vm(struct kvm *kvm, 144int __must_check kvm_s390_inject_vm(struct kvm *kvm,
@@ -228,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
228int psw_extint_disabled(struct kvm_vcpu *vcpu); 227int psw_extint_disabled(struct kvm_vcpu *vcpu);
229void kvm_s390_destroy_adapters(struct kvm *kvm); 228void kvm_s390_destroy_adapters(struct kvm *kvm);
230int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu); 229int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
230extern struct kvm_device_ops kvm_flic_ops;
231 231
232/* implemented in guestdbg.c */ 232/* implemented in guestdbg.c */
233void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); 233void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f89c1cd67751..72bb2dd8b9cd 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -352,13 +352,6 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
352 return 0; 352 return 0;
353} 353}
354 354
355static void handle_new_psw(struct kvm_vcpu *vcpu)
356{
357 /* Check whether the new psw is enabled for machine checks. */
358 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
359 kvm_s390_deliver_pending_machine_checks(vcpu);
360}
361
362#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) 355#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
363#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL 356#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
364#define PSW_ADDR_24 0x0000000000ffffffUL 357#define PSW_ADDR_24 0x0000000000ffffffUL
@@ -405,7 +398,6 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
405 gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; 398 gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
406 if (!is_valid_psw(gpsw)) 399 if (!is_valid_psw(gpsw))
407 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 400 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
408 handle_new_psw(vcpu);
409 return 0; 401 return 0;
410} 402}
411 403
@@ -427,7 +419,6 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
427 vcpu->arch.sie_block->gpsw = new_psw; 419 vcpu->arch.sie_block->gpsw = new_psw;
428 if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) 420 if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
429 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 421 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
430 handle_new_psw(vcpu);
431 return 0; 422 return 0;
432} 423}
433 424
@@ -738,7 +729,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
738 /* invalid entry */ 729 /* invalid entry */
739 break; 730 break;
740 /* try to free backing */ 731 /* try to free backing */
741 __gmap_zap(cbrle, gmap); 732 __gmap_zap(gmap, cbrle);
742 } 733 }
743 up_read(&gmap->mm->mmap_sem); 734 up_read(&gmap->mm->mmap_sem);
744 if (i < entries) 735 if (i < entries)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3f3b35403d0a..a2b81d6ce8a5 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -442,18 +442,15 @@ static inline int do_exception(struct pt_regs *regs, int access)
442 down_read(&mm->mmap_sem); 442 down_read(&mm->mmap_sem);
443 443
444#ifdef CONFIG_PGSTE 444#ifdef CONFIG_PGSTE
445 gmap = (struct gmap *) 445 gmap = (current->flags & PF_VCPU) ?
446 ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); 446 (struct gmap *) S390_lowcore.gmap : NULL;
447 if (gmap) { 447 if (gmap) {
448 address = __gmap_fault(address, gmap); 448 current->thread.gmap_addr = address;
449 address = __gmap_translate(gmap, address);
449 if (address == -EFAULT) { 450 if (address == -EFAULT) {
450 fault = VM_FAULT_BADMAP; 451 fault = VM_FAULT_BADMAP;
451 goto out_up; 452 goto out_up;
452 } 453 }
453 if (address == -ENOMEM) {
454 fault = VM_FAULT_OOM;
455 goto out_up;
456 }
457 if (gmap->pfault_enabled) 454 if (gmap->pfault_enabled)
458 flags |= FAULT_FLAG_RETRY_NOWAIT; 455 flags |= FAULT_FLAG_RETRY_NOWAIT;
459 } 456 }
@@ -530,6 +527,20 @@ retry:
530 goto retry; 527 goto retry;
531 } 528 }
532 } 529 }
530#ifdef CONFIG_PGSTE
531 if (gmap) {
532 address = __gmap_link(gmap, current->thread.gmap_addr,
533 address);
534 if (address == -EFAULT) {
535 fault = VM_FAULT_BADMAP;
536 goto out_up;
537 }
538 if (address == -ENOMEM) {
539 fault = VM_FAULT_OOM;
540 goto out_up;
541 }
542 }
543#endif
533 fault = 0; 544 fault = 0;
534out_up: 545out_up:
535 up_read(&mm->mmap_sem); 546 up_read(&mm->mmap_sem);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5404a6261db9..296b61a4af59 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -145,30 +145,56 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
145/** 145/**
146 * gmap_alloc - allocate a guest address space 146 * gmap_alloc - allocate a guest address space
147 * @mm: pointer to the parent mm_struct 147 * @mm: pointer to the parent mm_struct
148 * @limit: maximum size of the gmap address space
148 * 149 *
149 * Returns a guest address space structure. 150 * Returns a guest address space structure.
150 */ 151 */
151struct gmap *gmap_alloc(struct mm_struct *mm) 152struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
152{ 153{
153 struct gmap *gmap; 154 struct gmap *gmap;
154 struct page *page; 155 struct page *page;
155 unsigned long *table; 156 unsigned long *table;
156 157 unsigned long etype, atype;
158
159 if (limit < (1UL << 31)) {
160 limit = (1UL << 31) - 1;
161 atype = _ASCE_TYPE_SEGMENT;
162 etype = _SEGMENT_ENTRY_EMPTY;
163 } else if (limit < (1UL << 42)) {
164 limit = (1UL << 42) - 1;
165 atype = _ASCE_TYPE_REGION3;
166 etype = _REGION3_ENTRY_EMPTY;
167 } else if (limit < (1UL << 53)) {
168 limit = (1UL << 53) - 1;
169 atype = _ASCE_TYPE_REGION2;
170 etype = _REGION2_ENTRY_EMPTY;
171 } else {
172 limit = -1UL;
173 atype = _ASCE_TYPE_REGION1;
174 etype = _REGION1_ENTRY_EMPTY;
175 }
157 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 176 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
158 if (!gmap) 177 if (!gmap)
159 goto out; 178 goto out;
160 INIT_LIST_HEAD(&gmap->crst_list); 179 INIT_LIST_HEAD(&gmap->crst_list);
180 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
181 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
182 spin_lock_init(&gmap->guest_table_lock);
161 gmap->mm = mm; 183 gmap->mm = mm;
162 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 184 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
163 if (!page) 185 if (!page)
164 goto out_free; 186 goto out_free;
187 page->index = 0;
165 list_add(&page->lru, &gmap->crst_list); 188 list_add(&page->lru, &gmap->crst_list);
166 table = (unsigned long *) page_to_phys(page); 189 table = (unsigned long *) page_to_phys(page);
167 crst_table_init(table, _REGION1_ENTRY_EMPTY); 190 crst_table_init(table, etype);
168 gmap->table = table; 191 gmap->table = table;
169 gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | 192 gmap->asce = atype | _ASCE_TABLE_LENGTH |
170 _ASCE_USER_BITS | __pa(table); 193 _ASCE_USER_BITS | __pa(table);
194 gmap->asce_end = limit;
195 down_write(&mm->mmap_sem);
171 list_add(&gmap->list, &mm->context.gmap_list); 196 list_add(&gmap->list, &mm->context.gmap_list);
197 up_write(&mm->mmap_sem);
172 return gmap; 198 return gmap;
173 199
174out_free: 200out_free:
@@ -178,36 +204,38 @@ out:
178} 204}
179EXPORT_SYMBOL_GPL(gmap_alloc); 205EXPORT_SYMBOL_GPL(gmap_alloc);
180 206
181static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
182{
183 struct gmap_pgtable *mp;
184 struct gmap_rmap *rmap;
185 struct page *page;
186
187 if (*table & _SEGMENT_ENTRY_INVALID)
188 return 0;
189 page = pfn_to_page(*table >> PAGE_SHIFT);
190 mp = (struct gmap_pgtable *) page->index;
191 list_for_each_entry(rmap, &mp->mapper, list) {
192 if (rmap->entry != table)
193 continue;
194 list_del(&rmap->list);
195 kfree(rmap);
196 break;
197 }
198 *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
199 return 1;
200}
201
202static void gmap_flush_tlb(struct gmap *gmap) 207static void gmap_flush_tlb(struct gmap *gmap)
203{ 208{
204 if (MACHINE_HAS_IDTE) 209 if (MACHINE_HAS_IDTE)
205 __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | 210 __tlb_flush_asce(gmap->mm, gmap->asce);
206 _ASCE_TYPE_REGION1);
207 else 211 else
208 __tlb_flush_global(); 212 __tlb_flush_global();
209} 213}
210 214
215static void gmap_radix_tree_free(struct radix_tree_root *root)
216{
217 struct radix_tree_iter iter;
218 unsigned long indices[16];
219 unsigned long index;
220 void **slot;
221 int i, nr;
222
223 /* A radix tree is freed by deleting all of its entries */
224 index = 0;
225 do {
226 nr = 0;
227 radix_tree_for_each_slot(slot, root, &iter, index) {
228 indices[nr] = iter.index;
229 if (++nr == 16)
230 break;
231 }
232 for (i = 0; i < nr; i++) {
233 index = indices[i];
234 radix_tree_delete(root, index);
235 }
236 } while (nr > 0);
237}
238
211/** 239/**
212 * gmap_free - free a guest address space 240 * gmap_free - free a guest address space
213 * @gmap: pointer to the guest address space structure 241 * @gmap: pointer to the guest address space structure
@@ -215,31 +243,21 @@ static void gmap_flush_tlb(struct gmap *gmap)
215void gmap_free(struct gmap *gmap) 243void gmap_free(struct gmap *gmap)
216{ 244{
217 struct page *page, *next; 245 struct page *page, *next;
218 unsigned long *table;
219 int i;
220
221 246
222 /* Flush tlb. */ 247 /* Flush tlb. */
223 if (MACHINE_HAS_IDTE) 248 if (MACHINE_HAS_IDTE)
224 __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | 249 __tlb_flush_asce(gmap->mm, gmap->asce);
225 _ASCE_TYPE_REGION1);
226 else 250 else
227 __tlb_flush_global(); 251 __tlb_flush_global();
228 252
229 /* Free all segment & region tables. */ 253 /* Free all segment & region tables. */
230 down_read(&gmap->mm->mmap_sem); 254 list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
231 spin_lock(&gmap->mm->page_table_lock);
232 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
233 table = (unsigned long *) page_to_phys(page);
234 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
235 /* Remove gmap rmap structures for segment table. */
236 for (i = 0; i < PTRS_PER_PMD; i++, table++)
237 gmap_unlink_segment(gmap, table);
238 __free_pages(page, ALLOC_ORDER); 255 __free_pages(page, ALLOC_ORDER);
239 } 256 gmap_radix_tree_free(&gmap->guest_to_host);
240 spin_unlock(&gmap->mm->page_table_lock); 257 gmap_radix_tree_free(&gmap->host_to_guest);
241 up_read(&gmap->mm->mmap_sem); 258 down_write(&gmap->mm->mmap_sem);
242 list_del(&gmap->list); 259 list_del(&gmap->list);
260 up_write(&gmap->mm->mmap_sem);
243 kfree(gmap); 261 kfree(gmap);
244} 262}
245EXPORT_SYMBOL_GPL(gmap_free); 263EXPORT_SYMBOL_GPL(gmap_free);
@@ -267,42 +285,97 @@ EXPORT_SYMBOL_GPL(gmap_disable);
267/* 285/*
268 * gmap_alloc_table is assumed to be called with mmap_sem held 286 * gmap_alloc_table is assumed to be called with mmap_sem held
269 */ 287 */
270static int gmap_alloc_table(struct gmap *gmap, 288static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
271 unsigned long *table, unsigned long init) 289 unsigned long init, unsigned long gaddr)
272 __releases(&gmap->mm->page_table_lock)
273 __acquires(&gmap->mm->page_table_lock)
274{ 290{
275 struct page *page; 291 struct page *page;
276 unsigned long *new; 292 unsigned long *new;
277 293
278 /* since we dont free the gmap table until gmap_free we can unlock */ 294 /* since we dont free the gmap table until gmap_free we can unlock */
279 spin_unlock(&gmap->mm->page_table_lock);
280 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 295 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
281 spin_lock(&gmap->mm->page_table_lock);
282 if (!page) 296 if (!page)
283 return -ENOMEM; 297 return -ENOMEM;
284 new = (unsigned long *) page_to_phys(page); 298 new = (unsigned long *) page_to_phys(page);
285 crst_table_init(new, init); 299 crst_table_init(new, init);
300 spin_lock(&gmap->mm->page_table_lock);
286 if (*table & _REGION_ENTRY_INVALID) { 301 if (*table & _REGION_ENTRY_INVALID) {
287 list_add(&page->lru, &gmap->crst_list); 302 list_add(&page->lru, &gmap->crst_list);
288 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 303 *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
289 (*table & _REGION_ENTRY_TYPE_MASK); 304 (*table & _REGION_ENTRY_TYPE_MASK);
290 } else 305 page->index = gaddr;
306 page = NULL;
307 }
308 spin_unlock(&gmap->mm->page_table_lock);
309 if (page)
291 __free_pages(page, ALLOC_ORDER); 310 __free_pages(page, ALLOC_ORDER);
292 return 0; 311 return 0;
293} 312}
294 313
295/** 314/**
315 * __gmap_segment_gaddr - find virtual address from segment pointer
316 * @entry: pointer to a segment table entry in the guest address space
317 *
318 * Returns the virtual address in the guest address space for the segment
319 */
320static unsigned long __gmap_segment_gaddr(unsigned long *entry)
321{
322 struct page *page;
323 unsigned long offset;
324
325 offset = (unsigned long) entry / sizeof(unsigned long);
326 offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
327 page = pmd_to_page((pmd_t *) entry);
328 return page->index + offset;
329}
330
331/**
332 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
333 * @gmap: pointer to the guest address space structure
334 * @vmaddr: address in the host process address space
335 *
336 * Returns 1 if a TLB flush is required
337 */
338static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
339{
340 unsigned long *entry;
341 int flush = 0;
342
343 spin_lock(&gmap->guest_table_lock);
344 entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
345 if (entry) {
346 flush = (*entry != _SEGMENT_ENTRY_INVALID);
347 *entry = _SEGMENT_ENTRY_INVALID;
348 }
349 spin_unlock(&gmap->guest_table_lock);
350 return flush;
351}
352
353/**
354 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
355 * @gmap: pointer to the guest address space structure
356 * @gaddr: address in the guest address space
357 *
358 * Returns 1 if a TLB flush is required
359 */
360static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
361{
362 unsigned long vmaddr;
363
364 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
365 gaddr >> PMD_SHIFT);
366 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
367}
368
369/**
296 * gmap_unmap_segment - unmap segment from the guest address space 370 * gmap_unmap_segment - unmap segment from the guest address space
297 * @gmap: pointer to the guest address space structure 371 * @gmap: pointer to the guest address space structure
298 * @addr: address in the guest address space 372 * @to: address in the guest address space
299 * @len: length of the memory area to unmap 373 * @len: length of the memory area to unmap
300 * 374 *
301 * Returns 0 if the unmap succeeded, -EINVAL if not. 375 * Returns 0 if the unmap succeeded, -EINVAL if not.
302 */ 376 */
303int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 377int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
304{ 378{
305 unsigned long *table;
306 unsigned long off; 379 unsigned long off;
307 int flush; 380 int flush;
308 381
@@ -312,31 +385,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
312 return -EINVAL; 385 return -EINVAL;
313 386
314 flush = 0; 387 flush = 0;
315 down_read(&gmap->mm->mmap_sem); 388 down_write(&gmap->mm->mmap_sem);
316 spin_lock(&gmap->mm->page_table_lock); 389 for (off = 0; off < len; off += PMD_SIZE)
317 for (off = 0; off < len; off += PMD_SIZE) { 390 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
318 /* Walk the guest addr space page table */ 391 up_write(&gmap->mm->mmap_sem);
319 table = gmap->table + (((to + off) >> 53) & 0x7ff);
320 if (*table & _REGION_ENTRY_INVALID)
321 goto out;
322 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
323 table = table + (((to + off) >> 42) & 0x7ff);
324 if (*table & _REGION_ENTRY_INVALID)
325 goto out;
326 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
327 table = table + (((to + off) >> 31) & 0x7ff);
328 if (*table & _REGION_ENTRY_INVALID)
329 goto out;
330 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
331 table = table + (((to + off) >> 20) & 0x7ff);
332
333 /* Clear segment table entry in guest address space. */
334 flush |= gmap_unlink_segment(gmap, table);
335 *table = _SEGMENT_ENTRY_INVALID;
336 }
337out:
338 spin_unlock(&gmap->mm->page_table_lock);
339 up_read(&gmap->mm->mmap_sem);
340 if (flush) 392 if (flush)
341 gmap_flush_tlb(gmap); 393 gmap_flush_tlb(gmap);
342 return 0; 394 return 0;
@@ -348,87 +400,47 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
348 * @gmap: pointer to the guest address space structure 400 * @gmap: pointer to the guest address space structure
349 * @from: source address in the parent address space 401 * @from: source address in the parent address space
350 * @to: target address in the guest address space 402 * @to: target address in the guest address space
403 * @len: length of the memory area to map
351 * 404 *
352 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 405 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
353 */ 406 */
354int gmap_map_segment(struct gmap *gmap, unsigned long from, 407int gmap_map_segment(struct gmap *gmap, unsigned long from,
355 unsigned long to, unsigned long len) 408 unsigned long to, unsigned long len)
356{ 409{
357 unsigned long *table;
358 unsigned long off; 410 unsigned long off;
359 int flush; 411 int flush;
360 412
361 if ((from | to | len) & (PMD_SIZE - 1)) 413 if ((from | to | len) & (PMD_SIZE - 1))
362 return -EINVAL; 414 return -EINVAL;
363 if (len == 0 || from + len > TASK_MAX_SIZE || 415 if (len == 0 || from + len < from || to + len < to ||
364 from + len < from || to + len < to) 416 from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
365 return -EINVAL; 417 return -EINVAL;
366 418
367 flush = 0; 419 flush = 0;
368 down_read(&gmap->mm->mmap_sem); 420 down_write(&gmap->mm->mmap_sem);
369 spin_lock(&gmap->mm->page_table_lock);
370 for (off = 0; off < len; off += PMD_SIZE) { 421 for (off = 0; off < len; off += PMD_SIZE) {
371 /* Walk the gmap address space page table */ 422 /* Remove old translation */
372 table = gmap->table + (((to + off) >> 53) & 0x7ff); 423 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
373 if ((*table & _REGION_ENTRY_INVALID) && 424 /* Store new translation */
374 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 425 if (radix_tree_insert(&gmap->guest_to_host,
375 goto out_unmap; 426 (to + off) >> PMD_SHIFT,
376 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 427 (void *) from + off))
377 table = table + (((to + off) >> 42) & 0x7ff); 428 break;
378 if ((*table & _REGION_ENTRY_INVALID) &&
379 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
380 goto out_unmap;
381 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
382 table = table + (((to + off) >> 31) & 0x7ff);
383 if ((*table & _REGION_ENTRY_INVALID) &&
384 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
385 goto out_unmap;
386 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
387 table = table + (((to + off) >> 20) & 0x7ff);
388
389 /* Store 'from' address in an invalid segment table entry. */
390 flush |= gmap_unlink_segment(gmap, table);
391 *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
392 _SEGMENT_ENTRY_PROTECT);
393 } 429 }
394 spin_unlock(&gmap->mm->page_table_lock); 430 up_write(&gmap->mm->mmap_sem);
395 up_read(&gmap->mm->mmap_sem);
396 if (flush) 431 if (flush)
397 gmap_flush_tlb(gmap); 432 gmap_flush_tlb(gmap);
398 return 0; 433 if (off >= len)
399 434 return 0;
400out_unmap:
401 spin_unlock(&gmap->mm->page_table_lock);
402 up_read(&gmap->mm->mmap_sem);
403 gmap_unmap_segment(gmap, to, len); 435 gmap_unmap_segment(gmap, to, len);
404 return -ENOMEM; 436 return -ENOMEM;
405} 437}
406EXPORT_SYMBOL_GPL(gmap_map_segment); 438EXPORT_SYMBOL_GPL(gmap_map_segment);
407 439
408static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
409{
410 unsigned long *table;
411
412 table = gmap->table + ((address >> 53) & 0x7ff);
413 if (unlikely(*table & _REGION_ENTRY_INVALID))
414 return ERR_PTR(-EFAULT);
415 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
416 table = table + ((address >> 42) & 0x7ff);
417 if (unlikely(*table & _REGION_ENTRY_INVALID))
418 return ERR_PTR(-EFAULT);
419 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
420 table = table + ((address >> 31) & 0x7ff);
421 if (unlikely(*table & _REGION_ENTRY_INVALID))
422 return ERR_PTR(-EFAULT);
423 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
424 table = table + ((address >> 20) & 0x7ff);
425 return table;
426}
427
428/** 440/**
429 * __gmap_translate - translate a guest address to a user space address 441 * __gmap_translate - translate a guest address to a user space address
430 * @address: guest address
431 * @gmap: pointer to guest mapping meta data structure 442 * @gmap: pointer to guest mapping meta data structure
443 * @gaddr: guest address
432 * 444 *
433 * Returns user space address which corresponds to the guest address or 445 * Returns user space address which corresponds to the guest address or
434 * -EFAULT if no such mapping exists. 446 * -EFAULT if no such mapping exists.
@@ -436,168 +448,161 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
436 * The mmap_sem of the mm that belongs to the address space must be held 448 * The mmap_sem of the mm that belongs to the address space must be held
437 * when this function gets called. 449 * when this function gets called.
438 */ 450 */
439unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) 451unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
440{ 452{
441 unsigned long *segment_ptr, vmaddr, segment; 453 unsigned long vmaddr;
442 struct gmap_pgtable *mp;
443 struct page *page;
444 454
445 current->thread.gmap_addr = address; 455 vmaddr = (unsigned long)
446 segment_ptr = gmap_table_walk(address, gmap); 456 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
447 if (IS_ERR(segment_ptr)) 457 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
448 return PTR_ERR(segment_ptr);
449 /* Convert the gmap address to an mm address. */
450 segment = *segment_ptr;
451 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
452 page = pfn_to_page(segment >> PAGE_SHIFT);
453 mp = (struct gmap_pgtable *) page->index;
454 return mp->vmaddr | (address & ~PMD_MASK);
455 } else if (segment & _SEGMENT_ENTRY_PROTECT) {
456 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
457 return vmaddr | (address & ~PMD_MASK);
458 }
459 return -EFAULT;
460} 458}
461EXPORT_SYMBOL_GPL(__gmap_translate); 459EXPORT_SYMBOL_GPL(__gmap_translate);
462 460
463/** 461/**
464 * gmap_translate - translate a guest address to a user space address 462 * gmap_translate - translate a guest address to a user space address
465 * @address: guest address
466 * @gmap: pointer to guest mapping meta data structure 463 * @gmap: pointer to guest mapping meta data structure
464 * @gaddr: guest address
467 * 465 *
468 * Returns user space address which corresponds to the guest address or 466 * Returns user space address which corresponds to the guest address or
469 * -EFAULT if no such mapping exists. 467 * -EFAULT if no such mapping exists.
470 * This function does not establish potentially missing page table entries. 468 * This function does not establish potentially missing page table entries.
471 */ 469 */
472unsigned long gmap_translate(unsigned long address, struct gmap *gmap) 470unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
473{ 471{
474 unsigned long rc; 472 unsigned long rc;
475 473
476 down_read(&gmap->mm->mmap_sem); 474 down_read(&gmap->mm->mmap_sem);
477 rc = __gmap_translate(address, gmap); 475 rc = __gmap_translate(gmap, gaddr);
478 up_read(&gmap->mm->mmap_sem); 476 up_read(&gmap->mm->mmap_sem);
479 return rc; 477 return rc;
480} 478}
481EXPORT_SYMBOL_GPL(gmap_translate); 479EXPORT_SYMBOL_GPL(gmap_translate);
482 480
483static int gmap_connect_pgtable(unsigned long address, unsigned long segment, 481/**
484 unsigned long *segment_ptr, struct gmap *gmap) 482 * gmap_unlink - disconnect a page table from the gmap shadow tables
483 * @gmap: pointer to guest mapping meta data structure
484 * @table: pointer to the host page table
485 * @vmaddr: vm address associated with the host page table
486 */
487static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
488 unsigned long vmaddr)
489{
490 struct gmap *gmap;
491 int flush;
492
493 list_for_each_entry(gmap, &mm->context.gmap_list, list) {
494 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
495 if (flush)
496 gmap_flush_tlb(gmap);
497 }
498}
499
500/**
501 * gmap_link - set up shadow page tables to connect a host to a guest address
502 * @gmap: pointer to guest mapping meta data structure
503 * @gaddr: guest address
504 * @vmaddr: vm address
505 *
506 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
507 * if the vm address is already mapped to a different guest segment.
508 * The mmap_sem of the mm that belongs to the address space must be held
509 * when this function gets called.
510 */
511int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
485{ 512{
486 unsigned long vmaddr;
487 struct vm_area_struct *vma;
488 struct gmap_pgtable *mp;
489 struct gmap_rmap *rmap;
490 struct mm_struct *mm; 513 struct mm_struct *mm;
491 struct page *page; 514 unsigned long *table;
515 spinlock_t *ptl;
492 pgd_t *pgd; 516 pgd_t *pgd;
493 pud_t *pud; 517 pud_t *pud;
494 pmd_t *pmd; 518 pmd_t *pmd;
519 int rc;
495 520
496 mm = gmap->mm; 521 /* Create higher level tables in the gmap page table */
497 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 522 table = gmap->table;
498 vma = find_vma(mm, vmaddr); 523 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
499 if (!vma || vma->vm_start > vmaddr) 524 table += (gaddr >> 53) & 0x7ff;
500 return -EFAULT; 525 if ((*table & _REGION_ENTRY_INVALID) &&
526 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
527 gaddr & 0xffe0000000000000))
528 return -ENOMEM;
529 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
530 }
531 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
532 table += (gaddr >> 42) & 0x7ff;
533 if ((*table & _REGION_ENTRY_INVALID) &&
534 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
535 gaddr & 0xfffffc0000000000))
536 return -ENOMEM;
537 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
538 }
539 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
540 table += (gaddr >> 31) & 0x7ff;
541 if ((*table & _REGION_ENTRY_INVALID) &&
542 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
543 gaddr & 0xffffffff80000000))
544 return -ENOMEM;
545 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
546 }
547 table += (gaddr >> 20) & 0x7ff;
501 /* Walk the parent mm page table */ 548 /* Walk the parent mm page table */
549 mm = gmap->mm;
502 pgd = pgd_offset(mm, vmaddr); 550 pgd = pgd_offset(mm, vmaddr);
503 pud = pud_alloc(mm, pgd, vmaddr); 551 VM_BUG_ON(pgd_none(*pgd));
504 if (!pud) 552 pud = pud_offset(pgd, vmaddr);
505 return -ENOMEM; 553 VM_BUG_ON(pud_none(*pud));
506 pmd = pmd_alloc(mm, pud, vmaddr); 554 pmd = pmd_offset(pud, vmaddr);
507 if (!pmd) 555 VM_BUG_ON(pmd_none(*pmd));
508 return -ENOMEM;
509 if (!pmd_present(*pmd) &&
510 __pte_alloc(mm, vma, pmd, vmaddr))
511 return -ENOMEM;
512 /* large pmds cannot yet be handled */ 556 /* large pmds cannot yet be handled */
513 if (pmd_large(*pmd)) 557 if (pmd_large(*pmd))
514 return -EFAULT; 558 return -EFAULT;
515 /* pmd now points to a valid segment table entry. */
516 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
517 if (!rmap)
518 return -ENOMEM;
519 /* Link gmap segment table entry location to page table. */ 559 /* Link gmap segment table entry location to page table. */
520 page = pmd_page(*pmd); 560 rc = radix_tree_preload(GFP_KERNEL);
521 mp = (struct gmap_pgtable *) page->index; 561 if (rc)
522 rmap->gmap = gmap; 562 return rc;
523 rmap->entry = segment_ptr; 563 ptl = pmd_lock(mm, pmd);
524 rmap->vmaddr = address & PMD_MASK; 564 spin_lock(&gmap->guest_table_lock);
525 spin_lock(&mm->page_table_lock); 565 if (*table == _SEGMENT_ENTRY_INVALID) {
526 if (*segment_ptr == segment) { 566 rc = radix_tree_insert(&gmap->host_to_guest,
527 list_add(&rmap->list, &mp->mapper); 567 vmaddr >> PMD_SHIFT, table);
528 /* Set gmap segment table entry to page table. */ 568 if (!rc)
529 *segment_ptr = pmd_val(*pmd) & PAGE_MASK; 569 *table = pmd_val(*pmd);
530 rmap = NULL; 570 } else
531 } 571 rc = 0;
532 spin_unlock(&mm->page_table_lock); 572 spin_unlock(&gmap->guest_table_lock);
533 kfree(rmap); 573 spin_unlock(ptl);
534 return 0; 574 radix_tree_preload_end();
535} 575 return rc;
536
537static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
538{
539 struct gmap_rmap *rmap, *next;
540 struct gmap_pgtable *mp;
541 struct page *page;
542 int flush;
543
544 flush = 0;
545 spin_lock(&mm->page_table_lock);
546 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
547 mp = (struct gmap_pgtable *) page->index;
548 list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
549 *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
550 _SEGMENT_ENTRY_PROTECT);
551 list_del(&rmap->list);
552 kfree(rmap);
553 flush = 1;
554 }
555 spin_unlock(&mm->page_table_lock);
556 if (flush)
557 __tlb_flush_global();
558} 576}
559 577
560/* 578/**
561 * this function is assumed to be called with mmap_sem held 579 * gmap_fault - resolve a fault on a guest address
580 * @gmap: pointer to guest mapping meta data structure
581 * @gaddr: guest address
582 * @fault_flags: flags to pass down to handle_mm_fault()
583 *
584 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
585 * if the vm address is already mapped to a different guest segment.
562 */ 586 */
563unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) 587int gmap_fault(struct gmap *gmap, unsigned long gaddr,
588 unsigned int fault_flags)
564{ 589{
565 unsigned long *segment_ptr, segment; 590 unsigned long vmaddr;
566 struct gmap_pgtable *mp;
567 struct page *page;
568 int rc; 591 int rc;
569 592
570 current->thread.gmap_addr = address;
571 segment_ptr = gmap_table_walk(address, gmap);
572 if (IS_ERR(segment_ptr))
573 return -EFAULT;
574 /* Convert the gmap address to an mm address. */
575 while (1) {
576 segment = *segment_ptr;
577 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
578 /* Page table is present */
579 page = pfn_to_page(segment >> PAGE_SHIFT);
580 mp = (struct gmap_pgtable *) page->index;
581 return mp->vmaddr | (address & ~PMD_MASK);
582 }
583 if (!(segment & _SEGMENT_ENTRY_PROTECT))
584 /* Nothing mapped in the gmap address space. */
585 break;
586 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
587 if (rc)
588 return rc;
589 }
590 return -EFAULT;
591}
592
593unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
594{
595 unsigned long rc;
596
597 down_read(&gmap->mm->mmap_sem); 593 down_read(&gmap->mm->mmap_sem);
598 rc = __gmap_fault(address, gmap); 594 vmaddr = __gmap_translate(gmap, gaddr);
595 if (IS_ERR_VALUE(vmaddr)) {
596 rc = vmaddr;
597 goto out_up;
598 }
599 if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
600 rc = -EFAULT;
601 goto out_up;
602 }
603 rc = __gmap_link(gmap, gaddr, vmaddr);
604out_up:
599 up_read(&gmap->mm->mmap_sem); 605 up_read(&gmap->mm->mmap_sem);
600
601 return rc; 606 return rc;
602} 607}
603EXPORT_SYMBOL_GPL(gmap_fault); 608EXPORT_SYMBOL_GPL(gmap_fault);
@@ -617,17 +622,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
617 free_swap_and_cache(entry); 622 free_swap_and_cache(entry);
618} 623}
619 624
620/** 625/*
621 * The mm->mmap_sem lock must be held 626 * this function is assumed to be called with mmap_sem held
622 */ 627 */
623static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) 628void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
624{ 629{
625 unsigned long ptev, pgstev; 630 unsigned long vmaddr, ptev, pgstev;
631 pte_t *ptep, pte;
626 spinlock_t *ptl; 632 spinlock_t *ptl;
627 pgste_t pgste; 633 pgste_t pgste;
628 pte_t *ptep, pte;
629 634
630 ptep = get_locked_pte(mm, address, &ptl); 635 /* Find the vm address for the guest address */
636 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
637 gaddr >> PMD_SHIFT);
638 if (!vmaddr)
639 return;
640 vmaddr |= gaddr & ~PMD_MASK;
641 /* Get pointer to the page table entry */
642 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
631 if (unlikely(!ptep)) 643 if (unlikely(!ptep))
632 return; 644 return;
633 pte = *ptep; 645 pte = *ptep;
@@ -639,87 +651,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
639 ptev = pte_val(pte); 651 ptev = pte_val(pte);
640 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || 652 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
641 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { 653 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
642 gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); 654 gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
643 pte_clear(mm, address, ptep); 655 pte_clear(gmap->mm, vmaddr, ptep);
644 } 656 }
645 pgste_set_unlock(ptep, pgste); 657 pgste_set_unlock(ptep, pgste);
646out_pte: 658out_pte:
647 pte_unmap_unlock(*ptep, ptl); 659 pte_unmap_unlock(*ptep, ptl);
648} 660}
649
650/*
651 * this function is assumed to be called with mmap_sem held
652 */
653void __gmap_zap(unsigned long address, struct gmap *gmap)
654{
655 unsigned long *table, *segment_ptr;
656 unsigned long segment, pgstev, ptev;
657 struct gmap_pgtable *mp;
658 struct page *page;
659
660 segment_ptr = gmap_table_walk(address, gmap);
661 if (IS_ERR(segment_ptr))
662 return;
663 segment = *segment_ptr;
664 if (segment & _SEGMENT_ENTRY_INVALID)
665 return;
666 page = pfn_to_page(segment >> PAGE_SHIFT);
667 mp = (struct gmap_pgtable *) page->index;
668 address = mp->vmaddr | (address & ~PMD_MASK);
669 /* Page table is present */
670 table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
671 table = table + ((address >> 12) & 0xff);
672 pgstev = table[PTRS_PER_PTE];
673 ptev = table[0];
674 /* quick check, checked again with locks held */
675 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
676 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
677 gmap_zap_unused(gmap->mm, address);
678}
679EXPORT_SYMBOL_GPL(__gmap_zap); 661EXPORT_SYMBOL_GPL(__gmap_zap);
680 662
681void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) 663void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
682{ 664{
683 665 unsigned long gaddr, vmaddr, size;
684 unsigned long *table, address, size;
685 struct vm_area_struct *vma; 666 struct vm_area_struct *vma;
686 struct gmap_pgtable *mp;
687 struct page *page;
688 667
689 down_read(&gmap->mm->mmap_sem); 668 down_read(&gmap->mm->mmap_sem);
690 address = from; 669 for (gaddr = from; gaddr < to;
691 while (address < to) { 670 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
692 /* Walk the gmap address space page table */ 671 /* Find the vm address for the guest address */
693 table = gmap->table + ((address >> 53) & 0x7ff); 672 vmaddr = (unsigned long)
694 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 673 radix_tree_lookup(&gmap->guest_to_host,
695 address = (address + PMD_SIZE) & PMD_MASK; 674 gaddr >> PMD_SHIFT);
696 continue; 675 if (!vmaddr)
697 }
698 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
699 table = table + ((address >> 42) & 0x7ff);
700 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
701 address = (address + PMD_SIZE) & PMD_MASK;
702 continue; 676 continue;
703 } 677 vmaddr |= gaddr & ~PMD_MASK;
704 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 678 /* Find vma in the parent mm */
705 table = table + ((address >> 31) & 0x7ff); 679 vma = find_vma(gmap->mm, vmaddr);
706 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 680 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
707 address = (address + PMD_SIZE) & PMD_MASK; 681 zap_page_range(vma, vmaddr, size, NULL);
708 continue;
709 }
710 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
711 table = table + ((address >> 20) & 0x7ff);
712 if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
713 address = (address + PMD_SIZE) & PMD_MASK;
714 continue;
715 }
716 page = pfn_to_page(*table >> PAGE_SHIFT);
717 mp = (struct gmap_pgtable *) page->index;
718 vma = find_vma(gmap->mm, mp->vmaddr);
719 size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
720 zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
721 size, NULL);
722 address = (address + PMD_SIZE) & PMD_MASK;
723 } 682 }
724 up_read(&gmap->mm->mmap_sem); 683 up_read(&gmap->mm->mmap_sem);
725} 684}
@@ -755,7 +714,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
755/** 714/**
756 * gmap_ipte_notify - mark a range of ptes for invalidation notification 715 * gmap_ipte_notify - mark a range of ptes for invalidation notification
757 * @gmap: pointer to guest mapping meta data structure 716 * @gmap: pointer to guest mapping meta data structure
758 * @start: virtual address in the guest address space 717 * @gaddr: virtual address in the guest address space
759 * @len: size of area 718 * @len: size of area
760 * 719 *
761 * Returns 0 if for each page in the given range a gmap mapping exists and 720 * Returns 0 if for each page in the given range a gmap mapping exists and
@@ -763,7 +722,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
763 * for one or more pages -EFAULT is returned. If no memory could be allocated 722 * for one or more pages -EFAULT is returned. If no memory could be allocated
764 * -ENOMEM is returned. This function establishes missing page table entries. 723 * -ENOMEM is returned. This function establishes missing page table entries.
765 */ 724 */
766int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) 725int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
767{ 726{
768 unsigned long addr; 727 unsigned long addr;
769 spinlock_t *ptl; 728 spinlock_t *ptl;
@@ -771,12 +730,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
771 pgste_t pgste; 730 pgste_t pgste;
772 int rc = 0; 731 int rc = 0;
773 732
774 if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) 733 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
775 return -EINVAL; 734 return -EINVAL;
776 down_read(&gmap->mm->mmap_sem); 735 down_read(&gmap->mm->mmap_sem);
777 while (len) { 736 while (len) {
778 /* Convert gmap address and connect the page tables */ 737 /* Convert gmap address and connect the page tables */
779 addr = __gmap_fault(start, gmap); 738 addr = __gmap_translate(gmap, gaddr);
780 if (IS_ERR_VALUE(addr)) { 739 if (IS_ERR_VALUE(addr)) {
781 rc = addr; 740 rc = addr;
782 break; 741 break;
@@ -786,6 +745,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
786 rc = -EFAULT; 745 rc = -EFAULT;
787 break; 746 break;
788 } 747 }
748 rc = __gmap_link(gmap, gaddr, addr);
749 if (rc)
750 break;
789 /* Walk the process page table, lock and get pte pointer */ 751 /* Walk the process page table, lock and get pte pointer */
790 ptep = get_locked_pte(gmap->mm, addr, &ptl); 752 ptep = get_locked_pte(gmap->mm, addr, &ptl);
791 if (unlikely(!ptep)) 753 if (unlikely(!ptep))
@@ -796,7 +758,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
796 pgste = pgste_get_lock(ptep); 758 pgste = pgste_get_lock(ptep);
797 pgste_val(pgste) |= PGSTE_IN_BIT; 759 pgste_val(pgste) |= PGSTE_IN_BIT;
798 pgste_set_unlock(ptep, pgste); 760 pgste_set_unlock(ptep, pgste);
799 start += PAGE_SIZE; 761 gaddr += PAGE_SIZE;
800 len -= PAGE_SIZE; 762 len -= PAGE_SIZE;
801 } 763 }
802 spin_unlock(ptl); 764 spin_unlock(ptl);
@@ -809,28 +771,30 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify);
809/** 771/**
810 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. 772 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
811 * @mm: pointer to the process mm_struct 773 * @mm: pointer to the process mm_struct
774 * @addr: virtual address in the process address space
812 * @pte: pointer to the page table entry 775 * @pte: pointer to the page table entry
813 * 776 *
814 * This function is assumed to be called with the page table lock held 777 * This function is assumed to be called with the page table lock held
815 * for the pte to notify. 778 * for the pte to notify.
816 */ 779 */
817void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte) 780void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
818{ 781{
819 unsigned long segment_offset; 782 unsigned long offset, gaddr;
783 unsigned long *table;
820 struct gmap_notifier *nb; 784 struct gmap_notifier *nb;
821 struct gmap_pgtable *mp; 785 struct gmap *gmap;
822 struct gmap_rmap *rmap;
823 struct page *page;
824 786
825 segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 787 offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
826 segment_offset = segment_offset * (4096 / sizeof(pte_t)); 788 offset = offset * (4096 / sizeof(pte_t));
827 page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
828 mp = (struct gmap_pgtable *) page->index;
829 spin_lock(&gmap_notifier_lock); 789 spin_lock(&gmap_notifier_lock);
830 list_for_each_entry(rmap, &mp->mapper, list) { 790 list_for_each_entry(gmap, &mm->context.gmap_list, list) {
791 table = radix_tree_lookup(&gmap->host_to_guest,
792 vmaddr >> PMD_SHIFT);
793 if (!table)
794 continue;
795 gaddr = __gmap_segment_gaddr(table) + offset;
831 list_for_each_entry(nb, &gmap_notifier_list, list) 796 list_for_each_entry(nb, &gmap_notifier_list, list)
832 nb->notifier_call(rmap->gmap, 797 nb->notifier_call(gmap, gaddr);
833 rmap->vmaddr + segment_offset);
834 } 798 }
835 spin_unlock(&gmap_notifier_lock); 799 spin_unlock(&gmap_notifier_lock);
836} 800}
@@ -841,29 +805,18 @@ static inline int page_table_with_pgste(struct page *page)
841 return atomic_read(&page->_mapcount) == 0; 805 return atomic_read(&page->_mapcount) == 0;
842} 806}
843 807
844static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 808static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
845 unsigned long vmaddr)
846{ 809{
847 struct page *page; 810 struct page *page;
848 unsigned long *table; 811 unsigned long *table;
849 struct gmap_pgtable *mp;
850 812
851 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 813 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
852 if (!page) 814 if (!page)
853 return NULL; 815 return NULL;
854 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
855 if (!mp) {
856 __free_page(page);
857 return NULL;
858 }
859 if (!pgtable_page_ctor(page)) { 816 if (!pgtable_page_ctor(page)) {
860 kfree(mp);
861 __free_page(page); 817 __free_page(page);
862 return NULL; 818 return NULL;
863 } 819 }
864 mp->vmaddr = vmaddr & PMD_MASK;
865 INIT_LIST_HEAD(&mp->mapper);
866 page->index = (unsigned long) mp;
867 atomic_set(&page->_mapcount, 0); 820 atomic_set(&page->_mapcount, 0);
868 table = (unsigned long *) page_to_phys(page); 821 table = (unsigned long *) page_to_phys(page);
869 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 822 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
@@ -874,14 +827,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
874static inline void page_table_free_pgste(unsigned long *table) 827static inline void page_table_free_pgste(unsigned long *table)
875{ 828{
876 struct page *page; 829 struct page *page;
877 struct gmap_pgtable *mp;
878 830
879 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 831 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
880 mp = (struct gmap_pgtable *) page->index;
881 BUG_ON(!list_empty(&mp->mapper));
882 pgtable_page_dtor(page); 832 pgtable_page_dtor(page);
883 atomic_set(&page->_mapcount, -1); 833 atomic_set(&page->_mapcount, -1);
884 kfree(mp);
885 __free_page(page); 834 __free_page(page);
886} 835}
887 836
@@ -994,13 +943,13 @@ retry:
994 } 943 }
995 if (!(pte_val(*ptep) & _PAGE_INVALID) && 944 if (!(pte_val(*ptep) & _PAGE_INVALID) &&
996 (pte_val(*ptep) & _PAGE_PROTECT)) { 945 (pte_val(*ptep) & _PAGE_PROTECT)) {
997 pte_unmap_unlock(*ptep, ptl); 946 pte_unmap_unlock(*ptep, ptl);
998 if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { 947 if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
999 up_read(&mm->mmap_sem); 948 up_read(&mm->mmap_sem);
1000 return -EFAULT; 949 return -EFAULT;
1001 }
1002 goto retry;
1003 } 950 }
951 goto retry;
952 }
1004 953
1005 new = old = pgste_get_lock(ptep); 954 new = old = pgste_get_lock(ptep);
1006 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 955 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
@@ -1038,8 +987,7 @@ static inline int page_table_with_pgste(struct page *page)
1038 return 0; 987 return 0;
1039} 988}
1040 989
1041static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 990static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
1042 unsigned long vmaddr)
1043{ 991{
1044 return NULL; 992 return NULL;
1045} 993}
@@ -1053,8 +1001,8 @@ static inline void page_table_free_pgste(unsigned long *table)
1053{ 1001{
1054} 1002}
1055 1003
1056static inline void gmap_disconnect_pgtable(struct mm_struct *mm, 1004static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
1057 unsigned long *table) 1005 unsigned long vmaddr)
1058{ 1006{
1059} 1007}
1060 1008
@@ -1074,14 +1022,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
1074/* 1022/*
1075 * page table entry allocation/free routines. 1023 * page table entry allocation/free routines.
1076 */ 1024 */
1077unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 1025unsigned long *page_table_alloc(struct mm_struct *mm)
1078{ 1026{
1079 unsigned long *uninitialized_var(table); 1027 unsigned long *uninitialized_var(table);
1080 struct page *uninitialized_var(page); 1028 struct page *uninitialized_var(page);
1081 unsigned int mask, bit; 1029 unsigned int mask, bit;
1082 1030
1083 if (mm_has_pgste(mm)) 1031 if (mm_has_pgste(mm))
1084 return page_table_alloc_pgste(mm, vmaddr); 1032 return page_table_alloc_pgste(mm);
1085 /* Allocate fragments of a 4K page as 1K/2K page table */ 1033 /* Allocate fragments of a 4K page as 1K/2K page table */
1086 spin_lock_bh(&mm->context.list_lock); 1034 spin_lock_bh(&mm->context.list_lock);
1087 mask = FRAG_MASK; 1035 mask = FRAG_MASK;
@@ -1123,10 +1071,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
1123 unsigned int bit, mask; 1071 unsigned int bit, mask;
1124 1072
1125 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1073 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1126 if (page_table_with_pgste(page)) { 1074 if (page_table_with_pgste(page))
1127 gmap_disconnect_pgtable(mm, table);
1128 return page_table_free_pgste(table); 1075 return page_table_free_pgste(table);
1129 }
1130 /* Free 1K/2K page table fragment of a 4K page */ 1076 /* Free 1K/2K page table fragment of a 4K page */
1131 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 1077 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
1132 spin_lock_bh(&mm->context.list_lock); 1078 spin_lock_bh(&mm->context.list_lock);
@@ -1158,7 +1104,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)
1158 } 1104 }
1159} 1105}
1160 1106
1161void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) 1107void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
1108 unsigned long vmaddr)
1162{ 1109{
1163 struct mm_struct *mm; 1110 struct mm_struct *mm;
1164 struct page *page; 1111 struct page *page;
@@ -1167,7 +1114,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
1167 mm = tlb->mm; 1114 mm = tlb->mm;
1168 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1115 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1169 if (page_table_with_pgste(page)) { 1116 if (page_table_with_pgste(page)) {
1170 gmap_disconnect_pgtable(mm, table); 1117 gmap_unlink(mm, table, vmaddr);
1171 table = (unsigned long *) (__pa(table) | FRAG_MASK); 1118 table = (unsigned long *) (__pa(table) | FRAG_MASK);
1172 tlb_remove_table(tlb, table); 1119 tlb_remove_table(tlb, table);
1173 return; 1120 return;
@@ -1303,7 +1250,7 @@ again:
1303 if (page_table_with_pgste(page)) 1250 if (page_table_with_pgste(page))
1304 continue; 1251 continue;
1305 /* Allocate new page table with pgstes */ 1252 /* Allocate new page table with pgstes */
1306 new = page_table_alloc_pgste(mm, addr); 1253 new = page_table_alloc_pgste(mm);
1307 if (!new) 1254 if (!new)
1308 return -ENOMEM; 1255 return -ENOMEM;
1309 1256
@@ -1318,7 +1265,7 @@ again:
1318 /* Establish new table */ 1265 /* Establish new table */
1319 pmd_populate(mm, pmd, (pte_t *) new); 1266 pmd_populate(mm, pmd, (pte_t *) new);
1320 /* Free old table with rcu, there might be a walker! */ 1267 /* Free old table with rcu, there might be a walker! */
1321 page_table_free_rcu(tlb, table); 1268 page_table_free_rcu(tlb, table, addr);
1322 new = NULL; 1269 new = NULL;
1323 } 1270 }
1324 spin_unlock(ptl); 1271 spin_unlock(ptl);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index fe9012a49aa5..fdbd7888cb07 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -65,7 +65,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
65 pte_t *pte; 65 pte_t *pte;
66 66
67 if (slab_is_available()) 67 if (slab_is_available())
68 pte = (pte_t *) page_table_alloc(&init_mm, address); 68 pte = (pte_t *) page_table_alloc(&init_mm);
69 else 69 else
70 pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), 70 pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
71 PTRS_PER_PTE * sizeof(pte_t)); 71 PTRS_PER_PTE * sizeof(pte_t));
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 516903b98e06..094292a63e74 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -202,6 +202,7 @@
202#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ 202#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
203#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ 203#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
204#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ 204#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
205#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
205 206
206 207
207/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 208/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7c492ed9087b..7d603a71ab3a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
99 99
100#define ASYNC_PF_PER_VCPU 64 100#define ASYNC_PF_PER_VCPU 64
101 101
102struct kvm_vcpu;
103struct kvm;
104struct kvm_async_pf;
105
106enum kvm_reg { 102enum kvm_reg {
107 VCPU_REGS_RAX = 0, 103 VCPU_REGS_RAX = 0,
108 VCPU_REGS_RCX = 1, 104 VCPU_REGS_RCX = 1,
@@ -266,7 +262,8 @@ struct kvm_mmu {
266 struct x86_exception *fault); 262 struct x86_exception *fault);
267 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, 263 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
268 struct x86_exception *exception); 264 struct x86_exception *exception);
269 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); 265 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
266 struct x86_exception *exception);
270 int (*sync_page)(struct kvm_vcpu *vcpu, 267 int (*sync_page)(struct kvm_vcpu *vcpu,
271 struct kvm_mmu_page *sp); 268 struct kvm_mmu_page *sp);
272 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); 269 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
@@ -481,6 +478,7 @@ struct kvm_vcpu_arch {
481 u64 mmio_gva; 478 u64 mmio_gva;
482 unsigned access; 479 unsigned access;
483 gfn_t mmio_gfn; 480 gfn_t mmio_gfn;
481 u64 mmio_gen;
484 482
485 struct kvm_pmu pmu; 483 struct kvm_pmu pmu;
486 484
@@ -576,11 +574,10 @@ struct kvm_arch {
576 struct kvm_apic_map *apic_map; 574 struct kvm_apic_map *apic_map;
577 575
578 unsigned int tss_addr; 576 unsigned int tss_addr;
579 struct page *apic_access_page; 577 bool apic_access_page_done;
580 578
581 gpa_t wall_clock; 579 gpa_t wall_clock;
582 580
583 struct page *ept_identity_pagetable;
584 bool ept_identity_pagetable_done; 581 bool ept_identity_pagetable_done;
585 gpa_t ept_identity_map_addr; 582 gpa_t ept_identity_map_addr;
586 583
@@ -665,8 +662,8 @@ struct msr_data {
665struct kvm_x86_ops { 662struct kvm_x86_ops {
666 int (*cpu_has_kvm_support)(void); /* __init */ 663 int (*cpu_has_kvm_support)(void); /* __init */
667 int (*disabled_by_bios)(void); /* __init */ 664 int (*disabled_by_bios)(void); /* __init */
668 int (*hardware_enable)(void *dummy); 665 int (*hardware_enable)(void);
669 void (*hardware_disable)(void *dummy); 666 void (*hardware_disable)(void);
670 void (*check_processor_compatibility)(void *rtn); 667 void (*check_processor_compatibility)(void *rtn);
671 int (*hardware_setup)(void); /* __init */ 668 int (*hardware_setup)(void); /* __init */
672 void (*hardware_unsetup)(void); /* __exit */ 669 void (*hardware_unsetup)(void); /* __exit */
@@ -710,7 +707,6 @@ struct kvm_x86_ops {
710 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 707 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
711 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 708 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
712 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 709 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
713 void (*fpu_activate)(struct kvm_vcpu *vcpu);
714 void (*fpu_deactivate)(struct kvm_vcpu *vcpu); 710 void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
715 711
716 void (*tlb_flush)(struct kvm_vcpu *vcpu); 712 void (*tlb_flush)(struct kvm_vcpu *vcpu);
@@ -740,6 +736,7 @@ struct kvm_x86_ops {
740 void (*hwapic_isr_update)(struct kvm *kvm, int isr); 736 void (*hwapic_isr_update)(struct kvm *kvm, int isr);
741 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); 737 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
742 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); 738 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
739 void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
743 void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); 740 void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
744 void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); 741 void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
745 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 742 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
@@ -772,6 +769,8 @@ struct kvm_x86_ops {
772 bool (*mpx_supported)(void); 769 bool (*mpx_supported)(void);
773 770
774 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); 771 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
772
773 void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
775}; 774};
776 775
777struct kvm_arch_async_pf { 776struct kvm_arch_async_pf {
@@ -895,7 +894,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
895int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, 894int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
896 gfn_t gfn, void *data, int offset, int len, 895 gfn_t gfn, void *data, int offset, int len,
897 u32 access); 896 u32 access);
898void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
899bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); 897bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
900 898
901static inline int __kvm_irq_line_state(unsigned long *irq_state, 899static inline int __kvm_irq_line_state(unsigned long *irq_state,
@@ -917,7 +915,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
917 915
918int fx_init(struct kvm_vcpu *vcpu); 916int fx_init(struct kvm_vcpu *vcpu);
919 917
920void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
921void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 918void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
922 const u8 *new, int bytes); 919 const u8 *new, int bytes);
923int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); 920int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
@@ -926,7 +923,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
926int kvm_mmu_load(struct kvm_vcpu *vcpu); 923int kvm_mmu_load(struct kvm_vcpu *vcpu);
927void kvm_mmu_unload(struct kvm_vcpu *vcpu); 924void kvm_mmu_unload(struct kvm_vcpu *vcpu);
928void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 925void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
929gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); 926gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
927 struct x86_exception *exception);
930gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, 928gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
931 struct x86_exception *exception); 929 struct x86_exception *exception);
932gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, 930gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
@@ -946,7 +944,8 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
946void kvm_enable_tdp(void); 944void kvm_enable_tdp(void);
947void kvm_disable_tdp(void); 945void kvm_disable_tdp(void);
948 946
949static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 947static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
948 struct x86_exception *exception)
950{ 949{
951 return gpa; 950 return gpa;
952} 951}
@@ -1037,7 +1036,7 @@ asmlinkage void kvm_spurious_fault(void);
1037#define KVM_ARCH_WANT_MMU_NOTIFIER 1036#define KVM_ARCH_WANT_MMU_NOTIFIER
1038int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 1037int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
1039int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); 1038int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
1040int kvm_age_hva(struct kvm *kvm, unsigned long hva); 1039int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
1041int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 1040int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
1042void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 1041void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
1043int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 1042int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
@@ -1046,6 +1045,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
1046int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); 1045int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
1047int kvm_cpu_get_interrupt(struct kvm_vcpu *v); 1046int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
1048void kvm_vcpu_reset(struct kvm_vcpu *vcpu); 1047void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
1048void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
1049void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
1050 unsigned long address);
1049 1051
1050void kvm_define_shared_msr(unsigned index, u32 msr); 1052void kvm_define_shared_msr(unsigned index, u32 msr);
1051void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 1053void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c7678e43465b..e62cf897f781 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_KVM_PARA_H 2#define _ASM_X86_KVM_PARA_H
3 3
4#include <asm/processor.h> 4#include <asm/processor.h>
5#include <asm/alternative.h>
5#include <uapi/asm/kvm_para.h> 6#include <uapi/asm/kvm_para.h>
6 7
7extern void kvmclock_init(void); 8extern void kvmclock_init(void);
@@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void)
16} 17}
17#endif /* CONFIG_KVM_GUEST */ 18#endif /* CONFIG_KVM_GUEST */
18 19
19/* This instruction is vmcall. On non-VT architectures, it will generate a 20#ifdef CONFIG_DEBUG_RODATA
20 * trap that we will then rewrite to the appropriate instruction. 21#define KVM_HYPERCALL \
22 ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
23#else
24/* On AMD processors, vmcall will generate a trap that we will
25 * then rewrite to the appropriate instruction.
21 */ 26 */
22#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" 27#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
28#endif
23 29
24/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall 30/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
25 * instruction. The hypervisor may replace it with something else but only the 31 * instruction. The hypervisor may replace it with something else but only the
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 60e5497681f5..813d29d00a17 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c)
525 } 525 }
526#endif 526#endif
527 527
528 /*
529 * This is only needed to tell the kernel whether to use VMCALL
530 * and VMMCALL. VMMCALL is never executed except under virt, so
531 * we can set it unconditionally.
532 */
533 set_cpu_cap(c, X86_FEATURE_VMMCALL);
534
528 /* F16h erratum 793, CVE-2013-6885 */ 535 /* F16h erratum 793, CVE-2013-6885 */
529 if (c->x86 == 0x16 && c->x86_model <= 0xf) 536 if (c->x86 == 0x16 && c->x86_model <= 0xf)
530 msr_set_bit(MSR_AMD64_LS_CFG, 15); 537 msr_set_bit(MSR_AMD64_LS_CFG, 15);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 38a0afe83c6b..976e3a57f9ea 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -53,14 +53,14 @@ u64 kvm_supported_xcr0(void)
53 return xcr0; 53 return xcr0;
54} 54}
55 55
56void kvm_update_cpuid(struct kvm_vcpu *vcpu) 56int kvm_update_cpuid(struct kvm_vcpu *vcpu)
57{ 57{
58 struct kvm_cpuid_entry2 *best; 58 struct kvm_cpuid_entry2 *best;
59 struct kvm_lapic *apic = vcpu->arch.apic; 59 struct kvm_lapic *apic = vcpu->arch.apic;
60 60
61 best = kvm_find_cpuid_entry(vcpu, 1, 0); 61 best = kvm_find_cpuid_entry(vcpu, 1, 0);
62 if (!best) 62 if (!best)
63 return; 63 return 0;
64 64
65 /* Update OSXSAVE bit */ 65 /* Update OSXSAVE bit */
66 if (cpu_has_xsave && best->function == 0x1) { 66 if (cpu_has_xsave && best->function == 0x1) {
@@ -88,7 +88,17 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
88 xstate_required_size(vcpu->arch.xcr0); 88 xstate_required_size(vcpu->arch.xcr0);
89 } 89 }
90 90
91 /*
92 * The existing code assumes virtual address is 48-bit in the canonical
93 * address checks; exit if it is ever changed.
94 */
95 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
96 if (best && ((best->eax & 0xff00) >> 8) != 48 &&
97 ((best->eax & 0xff00) >> 8) != 0)
98 return -EINVAL;
99
91 kvm_pmu_cpuid_update(vcpu); 100 kvm_pmu_cpuid_update(vcpu);
101 return 0;
92} 102}
93 103
94static int is_efer_nx(void) 104static int is_efer_nx(void)
@@ -112,8 +122,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
112 break; 122 break;
113 } 123 }
114 } 124 }
115 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { 125 if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
116 entry->edx &= ~(1 << 20); 126 entry->edx &= ~bit(X86_FEATURE_NX);
117 printk(KERN_INFO "kvm: guest NX capability removed\n"); 127 printk(KERN_INFO "kvm: guest NX capability removed\n");
118 } 128 }
119} 129}
@@ -151,10 +161,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
151 } 161 }
152 vcpu->arch.cpuid_nent = cpuid->nent; 162 vcpu->arch.cpuid_nent = cpuid->nent;
153 cpuid_fix_nx_cap(vcpu); 163 cpuid_fix_nx_cap(vcpu);
154 r = 0;
155 kvm_apic_set_version(vcpu); 164 kvm_apic_set_version(vcpu);
156 kvm_x86_ops->cpuid_update(vcpu); 165 kvm_x86_ops->cpuid_update(vcpu);
157 kvm_update_cpuid(vcpu); 166 r = kvm_update_cpuid(vcpu);
158 167
159out_free: 168out_free:
160 vfree(cpuid_entries); 169 vfree(cpuid_entries);
@@ -178,9 +187,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
178 vcpu->arch.cpuid_nent = cpuid->nent; 187 vcpu->arch.cpuid_nent = cpuid->nent;
179 kvm_apic_set_version(vcpu); 188 kvm_apic_set_version(vcpu);
180 kvm_x86_ops->cpuid_update(vcpu); 189 kvm_x86_ops->cpuid_update(vcpu);
181 kvm_update_cpuid(vcpu); 190 r = kvm_update_cpuid(vcpu);
182 return 0;
183
184out: 191out:
185 return r; 192 return r;
186} 193}
@@ -767,6 +774,12 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
767 if (!best) 774 if (!best)
768 best = check_cpuid_limit(vcpu, function, index); 775 best = check_cpuid_limit(vcpu, function, index);
769 776
777 /*
778 * Perfmon not yet supported for L2 guest.
779 */
780 if (is_guest_mode(vcpu) && function == 0xa)
781 best = NULL;
782
770 if (best) { 783 if (best) {
771 *eax = best->eax; 784 *eax = best->eax;
772 *ebx = best->ebx; 785 *ebx = best->ebx;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a5380590ab0e..4452eedfaedd 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -3,7 +3,7 @@
3 3
4#include "x86.h" 4#include "x86.h"
5 5
6void kvm_update_cpuid(struct kvm_vcpu *vcpu); 6int kvm_update_cpuid(struct kvm_vcpu *vcpu);
7struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 7struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
8 u32 function, u32 index); 8 u32 function, u32 index);
9int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, 9int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
@@ -88,6 +88,14 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
88 return best && (best->ecx & bit(X86_FEATURE_X2APIC)); 88 return best && (best->ecx & bit(X86_FEATURE_X2APIC));
89} 89}
90 90
91static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
92{
93 struct kvm_cpuid_entry2 *best;
94
95 best = kvm_find_cpuid_entry(vcpu, 0, 0);
96 return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
97}
98
91static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) 99static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
92{ 100{
93 struct kvm_cpuid_entry2 *best; 101 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 03954f7900f5..a46207a05835 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -527,6 +527,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
527static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, 527static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
528 u32 error, bool valid) 528 u32 error, bool valid)
529{ 529{
530 WARN_ON(vec > 0x1f);
530 ctxt->exception.vector = vec; 531 ctxt->exception.vector = vec;
531 ctxt->exception.error_code = error; 532 ctxt->exception.error_code = error;
532 ctxt->exception.error_code_valid = valid; 533 ctxt->exception.error_code_valid = valid;
@@ -1468,7 +1469,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1468 return ret; 1469 return ret;
1469 1470
1470 err_code = selector & 0xfffc; 1471 err_code = selector & 0xfffc;
1471 err_vec = GP_VECTOR; 1472 err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR;
1472 1473
1473 /* can't load system descriptor into segment selector */ 1474 /* can't load system descriptor into segment selector */
1474 if (seg <= VCPU_SREG_GS && !seg_desc.s) 1475 if (seg <= VCPU_SREG_GS && !seg_desc.s)
@@ -1503,6 +1504,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1503 if (rpl > cpl || dpl != cpl) 1504 if (rpl > cpl || dpl != cpl)
1504 goto exception; 1505 goto exception;
1505 } 1506 }
1507 /* in long-mode d/b must be clear if l is set */
1508 if (seg_desc.d && seg_desc.l) {
1509 u64 efer = 0;
1510
1511 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1512 if (efer & EFER_LMA)
1513 goto exception;
1514 }
1515
1506 /* CS(RPL) <- CPL */ 1516 /* CS(RPL) <- CPL */
1507 selector = (selector & 0xfffc) | cpl; 1517 selector = (selector & 0xfffc) | cpl;
1508 break; 1518 break;
@@ -1549,8 +1559,7 @@ load:
1549 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); 1559 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1550 return X86EMUL_CONTINUE; 1560 return X86EMUL_CONTINUE;
1551exception: 1561exception:
1552 emulate_exception(ctxt, err_vec, err_code, true); 1562 return emulate_exception(ctxt, err_vec, err_code, true);
1553 return X86EMUL_PROPAGATE_FAULT;
1554} 1563}
1555 1564
1556static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1565static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -2723,8 +2732,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2723 if (!next_tss_desc.p || 2732 if (!next_tss_desc.p ||
2724 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || 2733 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2725 desc_limit < 0x2b)) { 2734 desc_limit < 0x2b)) {
2726 emulate_ts(ctxt, tss_selector & 0xfffc); 2735 return emulate_ts(ctxt, tss_selector & 0xfffc);
2727 return X86EMUL_PROPAGATE_FAULT;
2728 } 2736 }
2729 2737
2730 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { 2738 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
@@ -3016,7 +3024,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
3016 ctxt->dst.val = swab64(ctxt->src.val); 3024 ctxt->dst.val = swab64(ctxt->src.val);
3017 break; 3025 break;
3018 default: 3026 default:
3019 return X86EMUL_PROPAGATE_FAULT; 3027 BUG();
3020 } 3028 }
3021 return X86EMUL_CONTINUE; 3029 return X86EMUL_CONTINUE;
3022} 3030}
@@ -3140,12 +3148,8 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
3140 3148
3141static int em_vmcall(struct x86_emulate_ctxt *ctxt) 3149static int em_vmcall(struct x86_emulate_ctxt *ctxt)
3142{ 3150{
3143 int rc; 3151 int rc = ctxt->ops->fix_hypercall(ctxt);
3144
3145 if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1)
3146 return X86EMUL_UNHANDLEABLE;
3147 3152
3148 rc = ctxt->ops->fix_hypercall(ctxt);
3149 if (rc != X86EMUL_CONTINUE) 3153 if (rc != X86EMUL_CONTINUE)
3150 return rc; 3154 return rc;
3151 3155
@@ -3563,6 +3567,12 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3563 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ 3567 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
3564 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) 3568 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3565 3569
3570static const struct opcode group7_rm0[] = {
3571 N,
3572 I(SrcNone | Priv | EmulateOnUD, em_vmcall),
3573 N, N, N, N, N, N,
3574};
3575
3566static const struct opcode group7_rm1[] = { 3576static const struct opcode group7_rm1[] = {
3567 DI(SrcNone | Priv, monitor), 3577 DI(SrcNone | Priv, monitor),
3568 DI(SrcNone | Priv, mwait), 3578 DI(SrcNone | Priv, mwait),
@@ -3656,7 +3666,7 @@ static const struct group_dual group7 = { {
3656 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), 3666 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
3657 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), 3667 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
3658}, { 3668}, {
3659 I(SrcNone | Priv | EmulateOnUD, em_vmcall), 3669 EXT(0, group7_rm0),
3660 EXT(0, group7_rm1), 3670 EXT(0, group7_rm1),
3661 N, EXT(0, group7_rm3), 3671 N, EXT(0, group7_rm3),
3662 II(SrcNone | DstMem | Mov, em_smsw, smsw), N, 3672 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
@@ -3687,14 +3697,18 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
3687 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), 3697 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3688}; 3698};
3689 3699
3690static const struct gprefix pfx_vmovntpx = { 3700static const struct gprefix pfx_0f_2b = {
3691 I(0, em_mov), N, N, N, 3701 I(0, em_mov), I(0, em_mov), N, N,
3692}; 3702};
3693 3703
3694static const struct gprefix pfx_0f_28_0f_29 = { 3704static const struct gprefix pfx_0f_28_0f_29 = {
3695 I(Aligned, em_mov), I(Aligned, em_mov), N, N, 3705 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
3696}; 3706};
3697 3707
3708static const struct gprefix pfx_0f_e7 = {
3709 N, I(Sse, em_mov), N, N,
3710};
3711
3698static const struct escape escape_d9 = { { 3712static const struct escape escape_d9 = { {
3699 N, N, N, N, N, N, N, I(DstMem, em_fnstcw), 3713 N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
3700}, { 3714}, {
@@ -3901,7 +3915,7 @@ static const struct opcode twobyte_table[256] = {
3901 N, N, N, N, 3915 N, N, N, N,
3902 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), 3916 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
3903 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), 3917 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
3904 N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), 3918 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
3905 N, N, N, N, 3919 N, N, N, N,
3906 /* 0x30 - 0x3F */ 3920 /* 0x30 - 0x3F */
3907 II(ImplicitOps | Priv, em_wrmsr, wrmsr), 3921 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
@@ -3965,7 +3979,8 @@ static const struct opcode twobyte_table[256] = {
3965 /* 0xD0 - 0xDF */ 3979 /* 0xD0 - 0xDF */
3966 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3980 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
3967 /* 0xE0 - 0xEF */ 3981 /* 0xE0 - 0xEF */
3968 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3982 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
3983 N, N, N, N, N, N, N, N,
3969 /* 0xF0 - 0xFF */ 3984 /* 0xF0 - 0xFF */
3970 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N 3985 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
3971}; 3986};
@@ -4829,8 +4844,10 @@ writeback:
4829 ctxt->eip = ctxt->_eip; 4844 ctxt->eip = ctxt->_eip;
4830 4845
4831done: 4846done:
4832 if (rc == X86EMUL_PROPAGATE_FAULT) 4847 if (rc == X86EMUL_PROPAGATE_FAULT) {
4848 WARN_ON(ctxt->exception.vector > 0x1f);
4833 ctxt->have_exception = true; 4849 ctxt->have_exception = true;
4850 }
4834 if (rc == X86EMUL_INTERCEPTED) 4851 if (rc == X86EMUL_INTERCEPTED)
4835 return EMULATION_INTERCEPTED; 4852 return EMULATION_INTERCEPTED;
4836 4853
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 08e8a899e005..b8345dd41b25 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -112,17 +112,6 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
112struct static_key_deferred apic_hw_disabled __read_mostly; 112struct static_key_deferred apic_hw_disabled __read_mostly;
113struct static_key_deferred apic_sw_disabled __read_mostly; 113struct static_key_deferred apic_sw_disabled __read_mostly;
114 114
115static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
116{
117 if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
118 if (val & APIC_SPIV_APIC_ENABLED)
119 static_key_slow_dec_deferred(&apic_sw_disabled);
120 else
121 static_key_slow_inc(&apic_sw_disabled.key);
122 }
123 apic_set_reg(apic, APIC_SPIV, val);
124}
125
126static inline int apic_enabled(struct kvm_lapic *apic) 115static inline int apic_enabled(struct kvm_lapic *apic)
127{ 116{
128 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 117 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
@@ -210,6 +199,20 @@ out:
210 kvm_vcpu_request_scan_ioapic(kvm); 199 kvm_vcpu_request_scan_ioapic(kvm);
211} 200}
212 201
202static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
203{
204 u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
205
206 apic_set_reg(apic, APIC_SPIV, val);
207 if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
208 if (val & APIC_SPIV_APIC_ENABLED) {
209 static_key_slow_dec_deferred(&apic_sw_disabled);
210 recalculate_apic_map(apic->vcpu->kvm);
211 } else
212 static_key_slow_inc(&apic_sw_disabled.key);
213 }
214}
215
213static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 216static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
214{ 217{
215 apic_set_reg(apic, APIC_ID, id << 24); 218 apic_set_reg(apic, APIC_ID, id << 24);
@@ -706,6 +709,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
706 int result = 0; 709 int result = 0;
707 struct kvm_vcpu *vcpu = apic->vcpu; 710 struct kvm_vcpu *vcpu = apic->vcpu;
708 711
712 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
713 trig_mode, vector);
709 switch (delivery_mode) { 714 switch (delivery_mode) {
710 case APIC_DM_LOWEST: 715 case APIC_DM_LOWEST:
711 vcpu->arch.apic_arb_prio++; 716 vcpu->arch.apic_arb_prio++;
@@ -727,8 +732,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
727 kvm_make_request(KVM_REQ_EVENT, vcpu); 732 kvm_make_request(KVM_REQ_EVENT, vcpu);
728 kvm_vcpu_kick(vcpu); 733 kvm_vcpu_kick(vcpu);
729 } 734 }
730 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
731 trig_mode, vector, false);
732 break; 735 break;
733 736
734 case APIC_DM_REMRD: 737 case APIC_DM_REMRD:
@@ -1352,6 +1355,9 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
1352 return; 1355 return;
1353 1356
1354 hrtimer_cancel(&apic->lapic_timer.timer); 1357 hrtimer_cancel(&apic->lapic_timer.timer);
1358 /* Inject here so clearing tscdeadline won't override new value */
1359 if (apic_has_pending_timer(vcpu))
1360 kvm_inject_apic_timer_irqs(vcpu);
1355 apic->lapic_timer.tscdeadline = data; 1361 apic->lapic_timer.tscdeadline = data;
1356 start_apic_timer(apic); 1362 start_apic_timer(apic);
1357} 1363}
@@ -1639,6 +1645,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1639 1645
1640 if (atomic_read(&apic->lapic_timer.pending) > 0) { 1646 if (atomic_read(&apic->lapic_timer.pending) > 0) {
1641 kvm_apic_local_deliver(apic, APIC_LVTT); 1647 kvm_apic_local_deliver(apic, APIC_LVTT);
1648 if (apic_lvtt_tscdeadline(apic))
1649 apic->lapic_timer.tscdeadline = 0;
1642 atomic_set(&apic->lapic_timer.pending, 0); 1650 atomic_set(&apic->lapic_timer.pending, 0);
1643 } 1651 }
1644} 1652}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 931467881da7..3201e93ebd07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
199EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); 199EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
200 200
201/* 201/*
202 * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, 202 * the low bit of the generation number is always presumed to be zero.
203 * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation 203 * This disables mmio caching during memslot updates. The concept is
204 * number. 204 * similar to a seqcount but instead of retrying the access we just punt
205 * and ignore the cache.
206 *
207 * spte bits 3-11 are used as bits 1-9 of the generation number,
208 * the bits 52-61 are used as bits 10-19 of the generation number.
205 */ 209 */
206#define MMIO_SPTE_GEN_LOW_SHIFT 3 210#define MMIO_SPTE_GEN_LOW_SHIFT 2
207#define MMIO_SPTE_GEN_HIGH_SHIFT 52 211#define MMIO_SPTE_GEN_HIGH_SHIFT 52
208 212
209#define MMIO_GEN_SHIFT 19 213#define MMIO_GEN_SHIFT 20
210#define MMIO_GEN_LOW_SHIFT 9 214#define MMIO_GEN_LOW_SHIFT 10
211#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) 215#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2)
212#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) 216#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
213#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) 217#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
214 218
@@ -236,12 +240,7 @@ static unsigned int get_mmio_spte_generation(u64 spte)
236 240
237static unsigned int kvm_current_mmio_generation(struct kvm *kvm) 241static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
238{ 242{
239 /* 243 return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
240 * Init kvm generation close to MMIO_MAX_GEN to easily test the
241 * code of handling generation number wrap-around.
242 */
243 return (kvm_memslots(kvm)->generation +
244 MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
245} 244}
246 245
247static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, 246static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
@@ -296,11 +295,6 @@ static bool check_mmio_spte(struct kvm *kvm, u64 spte)
296 return likely(kvm_gen == spte_gen); 295 return likely(kvm_gen == spte_gen);
297} 296}
298 297
299static inline u64 rsvd_bits(int s, int e)
300{
301 return ((1ULL << (e - s + 1)) - 1) << s;
302}
303
304void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 298void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
305 u64 dirty_mask, u64 nx_mask, u64 x_mask) 299 u64 dirty_mask, u64 nx_mask, u64 x_mask)
306{ 300{
@@ -1180,7 +1174,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1180 * Write-protect on the specified @sptep, @pt_protect indicates whether 1174 * Write-protect on the specified @sptep, @pt_protect indicates whether
1181 * spte write-protection is caused by protecting shadow page table. 1175 * spte write-protection is caused by protecting shadow page table.
1182 * 1176 *
1183 * Note: write protection is difference between drity logging and spte 1177 * Note: write protection is difference between dirty logging and spte
1184 * protection: 1178 * protection:
1185 * - for dirty logging, the spte can be set to writable at anytime if 1179 * - for dirty logging, the spte can be set to writable at anytime if
1186 * its dirty bitmap is properly set. 1180 * its dirty bitmap is properly set.
@@ -1268,7 +1262,8 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
1268} 1262}
1269 1263
1270static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 1264static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
1271 struct kvm_memory_slot *slot, unsigned long data) 1265 struct kvm_memory_slot *slot, gfn_t gfn, int level,
1266 unsigned long data)
1272{ 1267{
1273 u64 *sptep; 1268 u64 *sptep;
1274 struct rmap_iterator iter; 1269 struct rmap_iterator iter;
@@ -1276,7 +1271,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
1276 1271
1277 while ((sptep = rmap_get_first(*rmapp, &iter))) { 1272 while ((sptep = rmap_get_first(*rmapp, &iter))) {
1278 BUG_ON(!(*sptep & PT_PRESENT_MASK)); 1273 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1279 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); 1274 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n",
1275 sptep, *sptep, gfn, level);
1280 1276
1281 drop_spte(kvm, sptep); 1277 drop_spte(kvm, sptep);
1282 need_tlb_flush = 1; 1278 need_tlb_flush = 1;
@@ -1286,7 +1282,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
1286} 1282}
1287 1283
1288static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, 1284static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
1289 struct kvm_memory_slot *slot, unsigned long data) 1285 struct kvm_memory_slot *slot, gfn_t gfn, int level,
1286 unsigned long data)
1290{ 1287{
1291 u64 *sptep; 1288 u64 *sptep;
1292 struct rmap_iterator iter; 1289 struct rmap_iterator iter;
@@ -1300,7 +1297,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
1300 1297
1301 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { 1298 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
1302 BUG_ON(!is_shadow_present_pte(*sptep)); 1299 BUG_ON(!is_shadow_present_pte(*sptep));
1303 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep); 1300 rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
1301 sptep, *sptep, gfn, level);
1304 1302
1305 need_flush = 1; 1303 need_flush = 1;
1306 1304
@@ -1334,6 +1332,8 @@ static int kvm_handle_hva_range(struct kvm *kvm,
1334 int (*handler)(struct kvm *kvm, 1332 int (*handler)(struct kvm *kvm,
1335 unsigned long *rmapp, 1333 unsigned long *rmapp,
1336 struct kvm_memory_slot *slot, 1334 struct kvm_memory_slot *slot,
1335 gfn_t gfn,
1336 int level,
1337 unsigned long data)) 1337 unsigned long data))
1338{ 1338{
1339 int j; 1339 int j;
@@ -1363,6 +1363,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
1363 j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { 1363 j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
1364 unsigned long idx, idx_end; 1364 unsigned long idx, idx_end;
1365 unsigned long *rmapp; 1365 unsigned long *rmapp;
1366 gfn_t gfn = gfn_start;
1366 1367
1367 /* 1368 /*
1368 * {idx(page_j) | page_j intersects with 1369 * {idx(page_j) | page_j intersects with
@@ -1373,8 +1374,10 @@ static int kvm_handle_hva_range(struct kvm *kvm,
1373 1374
1374 rmapp = __gfn_to_rmap(gfn_start, j, memslot); 1375 rmapp = __gfn_to_rmap(gfn_start, j, memslot);
1375 1376
1376 for (; idx <= idx_end; ++idx) 1377 for (; idx <= idx_end;
1377 ret |= handler(kvm, rmapp++, memslot, data); 1378 ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j)))
1379 ret |= handler(kvm, rmapp++, memslot,
1380 gfn, j, data);
1378 } 1381 }
1379 } 1382 }
1380 1383
@@ -1385,6 +1388,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
1385 unsigned long data, 1388 unsigned long data,
1386 int (*handler)(struct kvm *kvm, unsigned long *rmapp, 1389 int (*handler)(struct kvm *kvm, unsigned long *rmapp,
1387 struct kvm_memory_slot *slot, 1390 struct kvm_memory_slot *slot,
1391 gfn_t gfn, int level,
1388 unsigned long data)) 1392 unsigned long data))
1389{ 1393{
1390 return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); 1394 return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
@@ -1406,24 +1410,14 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1406} 1410}
1407 1411
1408static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1412static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1409 struct kvm_memory_slot *slot, unsigned long data) 1413 struct kvm_memory_slot *slot, gfn_t gfn, int level,
1414 unsigned long data)
1410{ 1415{
1411 u64 *sptep; 1416 u64 *sptep;
1412 struct rmap_iterator uninitialized_var(iter); 1417 struct rmap_iterator uninitialized_var(iter);
1413 int young = 0; 1418 int young = 0;
1414 1419
1415 /* 1420 BUG_ON(!shadow_accessed_mask);
1416 * In case of absence of EPT Access and Dirty Bits supports,
1417 * emulate the accessed bit for EPT, by checking if this page has
1418 * an EPT mapping, and clearing it if it does. On the next access,
1419 * a new EPT mapping will be established.
1420 * This has some overhead, but not as much as the cost of swapping
1421 * out actively used pages or breaking up actively used hugepages.
1422 */
1423 if (!shadow_accessed_mask) {
1424 young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
1425 goto out;
1426 }
1427 1421
1428 for (sptep = rmap_get_first(*rmapp, &iter); sptep; 1422 for (sptep = rmap_get_first(*rmapp, &iter); sptep;
1429 sptep = rmap_get_next(&iter)) { 1423 sptep = rmap_get_next(&iter)) {
@@ -1435,14 +1429,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1435 (unsigned long *)sptep); 1429 (unsigned long *)sptep);
1436 } 1430 }
1437 } 1431 }
1438out: 1432 trace_kvm_age_page(gfn, level, slot, young);
1439 /* @data has hva passed to kvm_age_hva(). */
1440 trace_kvm_age_page(data, slot, young);
1441 return young; 1433 return young;
1442} 1434}
1443 1435
1444static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1436static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1445 struct kvm_memory_slot *slot, unsigned long data) 1437 struct kvm_memory_slot *slot, gfn_t gfn,
1438 int level, unsigned long data)
1446{ 1439{
1447 u64 *sptep; 1440 u64 *sptep;
1448 struct rmap_iterator iter; 1441 struct rmap_iterator iter;
@@ -1480,13 +1473,33 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
1480 1473
1481 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 1474 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
1482 1475
1483 kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); 1476 kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
1484 kvm_flush_remote_tlbs(vcpu->kvm); 1477 kvm_flush_remote_tlbs(vcpu->kvm);
1485} 1478}
1486 1479
1487int kvm_age_hva(struct kvm *kvm, unsigned long hva) 1480int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
1488{ 1481{
1489 return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); 1482 /*
1483 * In case of absence of EPT Access and Dirty Bits supports,
1484 * emulate the accessed bit for EPT, by checking if this page has
1485 * an EPT mapping, and clearing it if it does. On the next access,
1486 * a new EPT mapping will be established.
1487 * This has some overhead, but not as much as the cost of swapping
1488 * out actively used pages or breaking up actively used hugepages.
1489 */
1490 if (!shadow_accessed_mask) {
1491 /*
1492 * We are holding the kvm->mmu_lock, and we are blowing up
1493 * shadow PTEs. MMU notifier consumers need to be kept at bay.
1494 * This is correct as long as we don't decouple the mmu_lock
1495 * protected regions (like invalidate_range_start|end does).
1496 */
1497 kvm->mmu_notifier_seq++;
1498 return kvm_handle_hva_range(kvm, start, end, 0,
1499 kvm_unmap_rmapp);
1500 }
1501
1502 return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
1490} 1503}
1491 1504
1492int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 1505int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
@@ -1749,7 +1762,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1749 return 1; 1762 return 1;
1750 } 1763 }
1751 1764
1752 kvm_mmu_flush_tlb(vcpu); 1765 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1753 return 0; 1766 return 0;
1754} 1767}
1755 1768
@@ -1802,7 +1815,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
1802 1815
1803 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 1816 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
1804 if (flush) 1817 if (flush)
1805 kvm_mmu_flush_tlb(vcpu); 1818 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1806} 1819}
1807 1820
1808struct mmu_page_path { 1821struct mmu_page_path {
@@ -2536,7 +2549,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2536 true, host_writable)) { 2549 true, host_writable)) {
2537 if (write_fault) 2550 if (write_fault)
2538 *emulate = 1; 2551 *emulate = 1;
2539 kvm_mmu_flush_tlb(vcpu); 2552 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2540 } 2553 }
2541 2554
2542 if (unlikely(is_mmio_spte(*sptep) && emulate)) 2555 if (unlikely(is_mmio_spte(*sptep) && emulate))
@@ -3163,7 +3176,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
3163 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 3176 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
3164 return; 3177 return;
3165 3178
3166 vcpu_clear_mmio_info(vcpu, ~0ul); 3179 vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
3167 kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); 3180 kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
3168 if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { 3181 if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
3169 hpa_t root = vcpu->arch.mmu.root_hpa; 3182 hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -3206,7 +3219,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
3206{ 3219{
3207 if (exception) 3220 if (exception)
3208 exception->error_code = 0; 3221 exception->error_code = 0;
3209 return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); 3222 return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
3210} 3223}
3211 3224
3212static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) 3225static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
@@ -3450,13 +3463,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
3450 context->nx = false; 3463 context->nx = false;
3451} 3464}
3452 3465
3453void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
3454{
3455 ++vcpu->stat.tlb_flush;
3456 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3457}
3458EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
3459
3460void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) 3466void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
3461{ 3467{
3462 mmu_free_roots(vcpu); 3468 mmu_free_roots(vcpu);
@@ -3518,6 +3524,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3518 int maxphyaddr = cpuid_maxphyaddr(vcpu); 3524 int maxphyaddr = cpuid_maxphyaddr(vcpu);
3519 u64 exb_bit_rsvd = 0; 3525 u64 exb_bit_rsvd = 0;
3520 u64 gbpages_bit_rsvd = 0; 3526 u64 gbpages_bit_rsvd = 0;
3527 u64 nonleaf_bit8_rsvd = 0;
3521 3528
3522 context->bad_mt_xwr = 0; 3529 context->bad_mt_xwr = 0;
3523 3530
@@ -3525,6 +3532,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3525 exb_bit_rsvd = rsvd_bits(63, 63); 3532 exb_bit_rsvd = rsvd_bits(63, 63);
3526 if (!guest_cpuid_has_gbpages(vcpu)) 3533 if (!guest_cpuid_has_gbpages(vcpu))
3527 gbpages_bit_rsvd = rsvd_bits(7, 7); 3534 gbpages_bit_rsvd = rsvd_bits(7, 7);
3535
3536 /*
3537 * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
3538 * leaf entries) on AMD CPUs only.
3539 */
3540 if (guest_cpuid_is_amd(vcpu))
3541 nonleaf_bit8_rsvd = rsvd_bits(8, 8);
3542
3528 switch (context->root_level) { 3543 switch (context->root_level) {
3529 case PT32_ROOT_LEVEL: 3544 case PT32_ROOT_LEVEL:
3530 /* no rsvd bits for 2 level 4K page table entries */ 3545 /* no rsvd bits for 2 level 4K page table entries */
@@ -3559,9 +3574,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3559 break; 3574 break;
3560 case PT64_ROOT_LEVEL: 3575 case PT64_ROOT_LEVEL:
3561 context->rsvd_bits_mask[0][3] = exb_bit_rsvd | 3576 context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
3562 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7); 3577 nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
3563 context->rsvd_bits_mask[0][2] = exb_bit_rsvd | 3578 context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
3564 gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); 3579 nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
3565 context->rsvd_bits_mask[0][1] = exb_bit_rsvd | 3580 context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
3566 rsvd_bits(maxphyaddr, 51); 3581 rsvd_bits(maxphyaddr, 51);
3567 context->rsvd_bits_mask[0][0] = exb_bit_rsvd | 3582 context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
@@ -3962,7 +3977,7 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
3962 if (remote_flush) 3977 if (remote_flush)
3963 kvm_flush_remote_tlbs(vcpu->kvm); 3978 kvm_flush_remote_tlbs(vcpu->kvm);
3964 else if (local_flush) 3979 else if (local_flush)
3965 kvm_mmu_flush_tlb(vcpu); 3980 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3966} 3981}
3967 3982
3968static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, 3983static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
@@ -4223,7 +4238,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
4223void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) 4238void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
4224{ 4239{
4225 vcpu->arch.mmu.invlpg(vcpu, gva); 4240 vcpu->arch.mmu.invlpg(vcpu, gva);
4226 kvm_mmu_flush_tlb(vcpu); 4241 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4227 ++vcpu->stat.invlpg; 4242 ++vcpu->stat.invlpg;
4228} 4243}
4229EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); 4244EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@@ -4433,7 +4448,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
4433 * The very rare case: if the generation-number is round, 4448 * The very rare case: if the generation-number is round,
4434 * zap all shadow pages. 4449 * zap all shadow pages.
4435 */ 4450 */
4436 if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { 4451 if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
4437 printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); 4452 printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
4438 kvm_mmu_invalidate_zap_all_pages(kvm); 4453 kvm_mmu_invalidate_zap_all_pages(kvm);
4439 } 4454 }
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b982112d2ca5..bde8ee725754 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -56,6 +56,11 @@
56#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) 56#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
57#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) 57#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
58 58
59static inline u64 rsvd_bits(int s, int e)
60{
61 return ((1ULL << (e - s + 1)) - 1) << s;
62}
63
59int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); 64int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
60void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); 65void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
61 66
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 410776528265..806d58e3c320 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,8 +298,7 @@ retry_walk:
298 } 298 }
299#endif 299#endif
300 walker->max_level = walker->level; 300 walker->max_level = walker->level;
301 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || 301 ASSERT(!is_long_mode(vcpu) && is_pae(vcpu));
302 (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
303 302
304 accessed_dirty = PT_GUEST_ACCESSED_MASK; 303 accessed_dirty = PT_GUEST_ACCESSED_MASK;
305 pt_access = pte_access = ACC_ALL; 304 pt_access = pte_access = ACC_ALL;
@@ -321,9 +320,22 @@ retry_walk:
321 walker->pte_gpa[walker->level - 1] = pte_gpa; 320 walker->pte_gpa[walker->level - 1] = pte_gpa;
322 321
323 real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), 322 real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
324 PFERR_USER_MASK|PFERR_WRITE_MASK); 323 PFERR_USER_MASK|PFERR_WRITE_MASK,
324 &walker->fault);
325
326 /*
327 * FIXME: This can happen if emulation (for of an INS/OUTS
328 * instruction) triggers a nested page fault. The exit
329 * qualification / exit info field will incorrectly have
330 * "guest page access" as the nested page fault's cause,
331 * instead of "guest page structure access". To fix this,
332 * the x86_exception struct should be augmented with enough
333 * information to fix the exit_qualification or exit_info_1
334 * fields.
335 */
325 if (unlikely(real_gfn == UNMAPPED_GVA)) 336 if (unlikely(real_gfn == UNMAPPED_GVA))
326 goto error; 337 return 0;
338
327 real_gfn = gpa_to_gfn(real_gfn); 339 real_gfn = gpa_to_gfn(real_gfn);
328 340
329 host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, 341 host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
@@ -364,7 +376,7 @@ retry_walk:
364 if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) 376 if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
365 gfn += pse36_gfn_delta(pte); 377 gfn += pse36_gfn_delta(pte);
366 378
367 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); 379 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
368 if (real_gpa == UNMAPPED_GVA) 380 if (real_gpa == UNMAPPED_GVA)
369 return 0; 381 return 0;
370 382
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 3dd6accb64ec..8e6b7d869d2f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -15,6 +15,7 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/kvm_host.h> 16#include <linux/kvm_host.h>
17#include <linux/perf_event.h> 17#include <linux/perf_event.h>
18#include <asm/perf_event.h>
18#include "x86.h" 19#include "x86.h"
19#include "cpuid.h" 20#include "cpuid.h"
20#include "lapic.h" 21#include "lapic.h"
@@ -463,7 +464,8 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
463{ 464{
464 struct kvm_pmu *pmu = &vcpu->arch.pmu; 465 struct kvm_pmu *pmu = &vcpu->arch.pmu;
465 struct kvm_cpuid_entry2 *entry; 466 struct kvm_cpuid_entry2 *entry;
466 unsigned bitmap_len; 467 union cpuid10_eax eax;
468 union cpuid10_edx edx;
467 469
468 pmu->nr_arch_gp_counters = 0; 470 pmu->nr_arch_gp_counters = 0;
469 pmu->nr_arch_fixed_counters = 0; 471 pmu->nr_arch_fixed_counters = 0;
@@ -475,25 +477,27 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
475 entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); 477 entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
476 if (!entry) 478 if (!entry)
477 return; 479 return;
480 eax.full = entry->eax;
481 edx.full = entry->edx;
478 482
479 pmu->version = entry->eax & 0xff; 483 pmu->version = eax.split.version_id;
480 if (!pmu->version) 484 if (!pmu->version)
481 return; 485 return;
482 486
483 pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, 487 pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
484 INTEL_PMC_MAX_GENERIC); 488 INTEL_PMC_MAX_GENERIC);
485 pmu->counter_bitmask[KVM_PMC_GP] = 489 pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
486 ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; 490 pmu->available_event_types = ~entry->ebx &
487 bitmap_len = (entry->eax >> 24) & 0xff; 491 ((1ull << eax.split.mask_length) - 1);
488 pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
489 492
490 if (pmu->version == 1) { 493 if (pmu->version == 1) {
491 pmu->nr_arch_fixed_counters = 0; 494 pmu->nr_arch_fixed_counters = 0;
492 } else { 495 } else {
493 pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), 496 pmu->nr_arch_fixed_counters =
497 min_t(int, edx.split.num_counters_fixed,
494 INTEL_PMC_MAX_FIXED); 498 INTEL_PMC_MAX_FIXED);
495 pmu->counter_bitmask[KVM_PMC_FIXED] = 499 pmu->counter_bitmask[KVM_PMC_FIXED] =
496 ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; 500 ((u64)1 << edx.split.bit_width_fixed) - 1;
497 } 501 }
498 502
499 pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | 503 pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ddf742768ecf..f7f6a4a157a6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -622,7 +622,7 @@ static int has_svm(void)
622 return 1; 622 return 1;
623} 623}
624 624
625static void svm_hardware_disable(void *garbage) 625static void svm_hardware_disable(void)
626{ 626{
627 /* Make sure we clean up behind us */ 627 /* Make sure we clean up behind us */
628 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) 628 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage)
633 amd_pmu_disable_virt(); 633 amd_pmu_disable_virt();
634} 634}
635 635
636static int svm_hardware_enable(void *garbage) 636static int svm_hardware_enable(void)
637{ 637{
638 638
639 struct svm_cpu_data *sd; 639 struct svm_cpu_data *sd;
@@ -1257,7 +1257,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1257 svm->asid_generation = 0; 1257 svm->asid_generation = 0;
1258 init_vmcb(svm); 1258 init_vmcb(svm);
1259 1259
1260 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 1260 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
1261 MSR_IA32_APICBASE_ENABLE;
1261 if (kvm_vcpu_is_bsp(&svm->vcpu)) 1262 if (kvm_vcpu_is_bsp(&svm->vcpu))
1262 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 1263 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1263 1264
@@ -1974,10 +1975,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1974{ 1975{
1975 struct vcpu_svm *svm = to_svm(vcpu); 1976 struct vcpu_svm *svm = to_svm(vcpu);
1976 1977
1977 svm->vmcb->control.exit_code = SVM_EXIT_NPF; 1978 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
1978 svm->vmcb->control.exit_code_hi = 0; 1979 /*
1979 svm->vmcb->control.exit_info_1 = fault->error_code; 1980 * TODO: track the cause of the nested page fault, and
1980 svm->vmcb->control.exit_info_2 = fault->address; 1981 * correctly fill in the high bits of exit_info_1.
1982 */
1983 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
1984 svm->vmcb->control.exit_code_hi = 0;
1985 svm->vmcb->control.exit_info_1 = (1ULL << 32);
1986 svm->vmcb->control.exit_info_2 = fault->address;
1987 }
1988
1989 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
1990 svm->vmcb->control.exit_info_1 |= fault->error_code;
1991
1992 /*
1993 * The present bit is always zero for page structure faults on real
1994 * hardware.
1995 */
1996 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
1997 svm->vmcb->control.exit_info_1 &= ~1;
1981 1998
1982 nested_svm_vmexit(svm); 1999 nested_svm_vmexit(svm);
1983} 2000}
@@ -3031,7 +3048,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
3031 return 0; 3048 return 0;
3032} 3049}
3033 3050
3034u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) 3051static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
3035{ 3052{
3036 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); 3053 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
3037 return vmcb->control.tsc_offset + 3054 return vmcb->control.tsc_offset +
@@ -4305,6 +4322,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
4305 local_irq_enable(); 4322 local_irq_enable();
4306} 4323}
4307 4324
4325static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
4326{
4327}
4328
4308static struct kvm_x86_ops svm_x86_ops = { 4329static struct kvm_x86_ops svm_x86_ops = {
4309 .cpu_has_kvm_support = has_svm, 4330 .cpu_has_kvm_support = has_svm,
4310 .disabled_by_bios = is_disabled, 4331 .disabled_by_bios = is_disabled,
@@ -4349,7 +4370,6 @@ static struct kvm_x86_ops svm_x86_ops = {
4349 .cache_reg = svm_cache_reg, 4370 .cache_reg = svm_cache_reg,
4350 .get_rflags = svm_get_rflags, 4371 .get_rflags = svm_get_rflags,
4351 .set_rflags = svm_set_rflags, 4372 .set_rflags = svm_set_rflags,
4352 .fpu_activate = svm_fpu_activate,
4353 .fpu_deactivate = svm_fpu_deactivate, 4373 .fpu_deactivate = svm_fpu_deactivate,
4354 4374
4355 .tlb_flush = svm_flush_tlb, 4375 .tlb_flush = svm_flush_tlb,
@@ -4406,6 +4426,8 @@ static struct kvm_x86_ops svm_x86_ops = {
4406 4426
4407 .check_intercept = svm_check_intercept, 4427 .check_intercept = svm_check_intercept,
4408 .handle_external_intr = svm_handle_external_intr, 4428 .handle_external_intr = svm_handle_external_intr,
4429
4430 .sched_in = svm_sched_in,
4409}; 4431};
4410 4432
4411static int __init svm_init(void) 4433static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e850a7d332be..6b06ab8748dd 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -415,15 +415,14 @@ TRACE_EVENT(kvm_apic_ipi,
415); 415);
416 416
417TRACE_EVENT(kvm_apic_accept_irq, 417TRACE_EVENT(kvm_apic_accept_irq,
418 TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced), 418 TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
419 TP_ARGS(apicid, dm, tm, vec, coalesced), 419 TP_ARGS(apicid, dm, tm, vec),
420 420
421 TP_STRUCT__entry( 421 TP_STRUCT__entry(
422 __field( __u32, apicid ) 422 __field( __u32, apicid )
423 __field( __u16, dm ) 423 __field( __u16, dm )
424 __field( __u8, tm ) 424 __field( __u8, tm )
425 __field( __u8, vec ) 425 __field( __u8, vec )
426 __field( bool, coalesced )
427 ), 426 ),
428 427
429 TP_fast_assign( 428 TP_fast_assign(
@@ -431,14 +430,12 @@ TRACE_EVENT(kvm_apic_accept_irq,
431 __entry->dm = dm; 430 __entry->dm = dm;
432 __entry->tm = tm; 431 __entry->tm = tm;
433 __entry->vec = vec; 432 __entry->vec = vec;
434 __entry->coalesced = coalesced;
435 ), 433 ),
436 434
437 TP_printk("apicid %x vec %u (%s|%s)%s", 435 TP_printk("apicid %x vec %u (%s|%s)",
438 __entry->apicid, __entry->vec, 436 __entry->apicid, __entry->vec,
439 __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), 437 __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
440 __entry->tm ? "level" : "edge", 438 __entry->tm ? "level" : "edge")
441 __entry->coalesced ? " (coalesced)" : "")
442); 439);
443 440
444TRACE_EVENT(kvm_eoi, 441TRACE_EVENT(kvm_eoi,
@@ -850,6 +847,36 @@ TRACE_EVENT(kvm_track_tsc,
850 847
851#endif /* CONFIG_X86_64 */ 848#endif /* CONFIG_X86_64 */
852 849
850TRACE_EVENT(kvm_ple_window,
851 TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
852 TP_ARGS(grow, vcpu_id, new, old),
853
854 TP_STRUCT__entry(
855 __field( bool, grow )
856 __field( unsigned int, vcpu_id )
857 __field( int, new )
858 __field( int, old )
859 ),
860
861 TP_fast_assign(
862 __entry->grow = grow;
863 __entry->vcpu_id = vcpu_id;
864 __entry->new = new;
865 __entry->old = old;
866 ),
867
868 TP_printk("vcpu %u: ple_window %d (%s %d)",
869 __entry->vcpu_id,
870 __entry->new,
871 __entry->grow ? "grow" : "shrink",
872 __entry->old)
873);
874
875#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
876 trace_kvm_ple_window(true, vcpu_id, new, old)
877#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
878 trace_kvm_ple_window(false, vcpu_id, new, old)
879
853#endif /* _TRACE_KVM_H */ 880#endif /* _TRACE_KVM_H */
854 881
855#undef TRACE_INCLUDE_PATH 882#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe11cf124a1..04fa1b8298c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
125 * Time is measured based on a counter that runs at the same rate as the TSC, 125 * Time is measured based on a counter that runs at the same rate as the TSC,
126 * refer SDM volume 3b section 21.6.13 & 22.1.3. 126 * refer SDM volume 3b section 21.6.13 & 22.1.3.
127 */ 127 */
128#define KVM_VMX_DEFAULT_PLE_GAP 128 128#define KVM_VMX_DEFAULT_PLE_GAP 128
129#define KVM_VMX_DEFAULT_PLE_WINDOW 4096 129#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
130#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
131#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
132#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
133 INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
134
130static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; 135static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
131module_param(ple_gap, int, S_IRUGO); 136module_param(ple_gap, int, S_IRUGO);
132 137
133static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; 138static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
134module_param(ple_window, int, S_IRUGO); 139module_param(ple_window, int, S_IRUGO);
135 140
141/* Default doubles per-vcpu window every exit. */
142static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
143module_param(ple_window_grow, int, S_IRUGO);
144
145/* Default resets per-vcpu window every exit to ple_window. */
146static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
147module_param(ple_window_shrink, int, S_IRUGO);
148
149/* Default is to compute the maximum so we can never overflow. */
150static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
151static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
152module_param(ple_window_max, int, S_IRUGO);
153
136extern const ulong vmx_return; 154extern const ulong vmx_return;
137 155
138#define NR_AUTOLOAD_MSRS 8 156#define NR_AUTOLOAD_MSRS 8
@@ -379,6 +397,7 @@ struct nested_vmx {
379 * we must keep them pinned while L2 runs. 397 * we must keep them pinned while L2 runs.
380 */ 398 */
381 struct page *apic_access_page; 399 struct page *apic_access_page;
400 struct page *virtual_apic_page;
382 u64 msr_ia32_feature_control; 401 u64 msr_ia32_feature_control;
383 402
384 struct hrtimer preemption_timer; 403 struct hrtimer preemption_timer;
@@ -484,6 +503,10 @@ struct vcpu_vmx {
484 503
485 /* Support for a guest hypervisor (nested VMX) */ 504 /* Support for a guest hypervisor (nested VMX) */
486 struct nested_vmx nested; 505 struct nested_vmx nested;
506
507 /* Dynamic PLE window. */
508 int ple_window;
509 bool ple_window_dirty;
487}; 510};
488 511
489enum segment_cache_field { 512enum segment_cache_field {
@@ -533,6 +556,7 @@ static int max_shadow_read_only_fields =
533 ARRAY_SIZE(shadow_read_only_fields); 556 ARRAY_SIZE(shadow_read_only_fields);
534 557
535static unsigned long shadow_read_write_fields[] = { 558static unsigned long shadow_read_write_fields[] = {
559 TPR_THRESHOLD,
536 GUEST_RIP, 560 GUEST_RIP,
537 GUEST_RSP, 561 GUEST_RSP,
538 GUEST_CR0, 562 GUEST_CR0,
@@ -743,6 +767,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
743static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); 767static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
744static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); 768static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
745static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); 769static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
770static int alloc_identity_pagetable(struct kvm *kvm);
746 771
747static DEFINE_PER_CPU(struct vmcs *, vmxarea); 772static DEFINE_PER_CPU(struct vmcs *, vmxarea);
748static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 773static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2135,7 +2160,7 @@ static u64 guest_read_tsc(void)
2135 * Like guest_read_tsc, but always returns L1's notion of the timestamp 2160 * Like guest_read_tsc, but always returns L1's notion of the timestamp
2136 * counter, even if a nested guest (L2) is currently running. 2161 * counter, even if a nested guest (L2) is currently running.
2137 */ 2162 */
2138u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) 2163static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
2139{ 2164{
2140 u64 tsc_offset; 2165 u64 tsc_offset;
2141 2166
@@ -2330,7 +2355,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2330 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | 2355 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
2331 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | 2356 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
2332 CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | 2357 CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
2333 CPU_BASED_PAUSE_EXITING | 2358 CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
2334 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 2359 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2335 /* 2360 /*
2336 * We can allow some features even when not supported by the 2361 * We can allow some features even when not supported by the
@@ -2601,6 +2626,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2601 break; 2626 break;
2602 case MSR_IA32_CR_PAT: 2627 case MSR_IA32_CR_PAT:
2603 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { 2628 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2629 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
2630 return 1;
2604 vmcs_write64(GUEST_IA32_PAT, data); 2631 vmcs_write64(GUEST_IA32_PAT, data);
2605 vcpu->arch.pat = data; 2632 vcpu->arch.pat = data;
2606 break; 2633 break;
@@ -2704,7 +2731,7 @@ static void kvm_cpu_vmxon(u64 addr)
2704 : "memory", "cc"); 2731 : "memory", "cc");
2705} 2732}
2706 2733
2707static int hardware_enable(void *garbage) 2734static int hardware_enable(void)
2708{ 2735{
2709 int cpu = raw_smp_processor_id(); 2736 int cpu = raw_smp_processor_id();
2710 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 2737 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2768,7 +2795,7 @@ static void kvm_cpu_vmxoff(void)
2768 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 2795 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
2769} 2796}
2770 2797
2771static void hardware_disable(void *garbage) 2798static void hardware_disable(void)
2772{ 2799{
2773 if (vmm_exclusive) { 2800 if (vmm_exclusive) {
2774 vmclear_local_loaded_vmcss(); 2801 vmclear_local_loaded_vmcss();
@@ -3107,9 +3134,17 @@ static __init int hardware_setup(void)
3107 if (!cpu_has_vmx_unrestricted_guest()) 3134 if (!cpu_has_vmx_unrestricted_guest())
3108 enable_unrestricted_guest = 0; 3135 enable_unrestricted_guest = 0;
3109 3136
3110 if (!cpu_has_vmx_flexpriority()) 3137 if (!cpu_has_vmx_flexpriority()) {
3111 flexpriority_enabled = 0; 3138 flexpriority_enabled = 0;
3112 3139
3140 /*
3141 * set_apic_access_page_addr() is used to reload apic access
3142 * page upon invalidation. No need to do anything if the
3143 * processor does not have the APIC_ACCESS_ADDR VMCS field.
3144 */
3145 kvm_x86_ops->set_apic_access_page_addr = NULL;
3146 }
3147
3113 if (!cpu_has_vmx_tpr_shadow()) 3148 if (!cpu_has_vmx_tpr_shadow())
3114 kvm_x86_ops->update_cr8_intercept = NULL; 3149 kvm_x86_ops->update_cr8_intercept = NULL;
3115 3150
@@ -3905,7 +3940,7 @@ static int init_rmode_tss(struct kvm *kvm)
3905{ 3940{
3906 gfn_t fn; 3941 gfn_t fn;
3907 u16 data = 0; 3942 u16 data = 0;
3908 int r, idx, ret = 0; 3943 int idx, r;
3909 3944
3910 idx = srcu_read_lock(&kvm->srcu); 3945 idx = srcu_read_lock(&kvm->srcu);
3911 fn = kvm->arch.tss_addr >> PAGE_SHIFT; 3946 fn = kvm->arch.tss_addr >> PAGE_SHIFT;
@@ -3927,32 +3962,32 @@ static int init_rmode_tss(struct kvm *kvm)
3927 r = kvm_write_guest_page(kvm, fn, &data, 3962 r = kvm_write_guest_page(kvm, fn, &data,
3928 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, 3963 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
3929 sizeof(u8)); 3964 sizeof(u8));
3930 if (r < 0)
3931 goto out;
3932
3933 ret = 1;
3934out: 3965out:
3935 srcu_read_unlock(&kvm->srcu, idx); 3966 srcu_read_unlock(&kvm->srcu, idx);
3936 return ret; 3967 return r;
3937} 3968}
3938 3969
3939static int init_rmode_identity_map(struct kvm *kvm) 3970static int init_rmode_identity_map(struct kvm *kvm)
3940{ 3971{
3941 int i, idx, r, ret; 3972 int i, idx, r = 0;
3942 pfn_t identity_map_pfn; 3973 pfn_t identity_map_pfn;
3943 u32 tmp; 3974 u32 tmp;
3944 3975
3945 if (!enable_ept) 3976 if (!enable_ept)
3946 return 1;
3947 if (unlikely(!kvm->arch.ept_identity_pagetable)) {
3948 printk(KERN_ERR "EPT: identity-mapping pagetable "
3949 "haven't been allocated!\n");
3950 return 0; 3977 return 0;
3951 } 3978
3979 /* Protect kvm->arch.ept_identity_pagetable_done. */
3980 mutex_lock(&kvm->slots_lock);
3981
3952 if (likely(kvm->arch.ept_identity_pagetable_done)) 3982 if (likely(kvm->arch.ept_identity_pagetable_done))
3953 return 1; 3983 goto out2;
3954 ret = 0; 3984
3955 identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; 3985 identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
3986
3987 r = alloc_identity_pagetable(kvm);
3988 if (r < 0)
3989 goto out2;
3990
3956 idx = srcu_read_lock(&kvm->srcu); 3991 idx = srcu_read_lock(&kvm->srcu);
3957 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); 3992 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
3958 if (r < 0) 3993 if (r < 0)
@@ -3967,10 +4002,13 @@ static int init_rmode_identity_map(struct kvm *kvm)
3967 goto out; 4002 goto out;
3968 } 4003 }
3969 kvm->arch.ept_identity_pagetable_done = true; 4004 kvm->arch.ept_identity_pagetable_done = true;
3970 ret = 1; 4005
3971out: 4006out:
3972 srcu_read_unlock(&kvm->srcu, idx); 4007 srcu_read_unlock(&kvm->srcu, idx);
3973 return ret; 4008
4009out2:
4010 mutex_unlock(&kvm->slots_lock);
4011 return r;
3974} 4012}
3975 4013
3976static void seg_setup(int seg) 4014static void seg_setup(int seg)
@@ -3995,23 +4033,28 @@ static int alloc_apic_access_page(struct kvm *kvm)
3995 int r = 0; 4033 int r = 0;
3996 4034
3997 mutex_lock(&kvm->slots_lock); 4035 mutex_lock(&kvm->slots_lock);
3998 if (kvm->arch.apic_access_page) 4036 if (kvm->arch.apic_access_page_done)
3999 goto out; 4037 goto out;
4000 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; 4038 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
4001 kvm_userspace_mem.flags = 0; 4039 kvm_userspace_mem.flags = 0;
4002 kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; 4040 kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
4003 kvm_userspace_mem.memory_size = PAGE_SIZE; 4041 kvm_userspace_mem.memory_size = PAGE_SIZE;
4004 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); 4042 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
4005 if (r) 4043 if (r)
4006 goto out; 4044 goto out;
4007 4045
4008 page = gfn_to_page(kvm, 0xfee00); 4046 page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
4009 if (is_error_page(page)) { 4047 if (is_error_page(page)) {
4010 r = -EFAULT; 4048 r = -EFAULT;
4011 goto out; 4049 goto out;
4012 } 4050 }
4013 4051
4014 kvm->arch.apic_access_page = page; 4052 /*
4053 * Do not pin the page in memory, so that memory hot-unplug
4054 * is able to migrate it.
4055 */
4056 put_page(page);
4057 kvm->arch.apic_access_page_done = true;
4015out: 4058out:
4016 mutex_unlock(&kvm->slots_lock); 4059 mutex_unlock(&kvm->slots_lock);
4017 return r; 4060 return r;
@@ -4019,31 +4062,20 @@ out:
4019 4062
4020static int alloc_identity_pagetable(struct kvm *kvm) 4063static int alloc_identity_pagetable(struct kvm *kvm)
4021{ 4064{
4022 struct page *page; 4065 /* Called with kvm->slots_lock held. */
4066
4023 struct kvm_userspace_memory_region kvm_userspace_mem; 4067 struct kvm_userspace_memory_region kvm_userspace_mem;
4024 int r = 0; 4068 int r = 0;
4025 4069
4026 mutex_lock(&kvm->slots_lock); 4070 BUG_ON(kvm->arch.ept_identity_pagetable_done);
4027 if (kvm->arch.ept_identity_pagetable) 4071
4028 goto out;
4029 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; 4072 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
4030 kvm_userspace_mem.flags = 0; 4073 kvm_userspace_mem.flags = 0;
4031 kvm_userspace_mem.guest_phys_addr = 4074 kvm_userspace_mem.guest_phys_addr =
4032 kvm->arch.ept_identity_map_addr; 4075 kvm->arch.ept_identity_map_addr;
4033 kvm_userspace_mem.memory_size = PAGE_SIZE; 4076 kvm_userspace_mem.memory_size = PAGE_SIZE;
4034 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); 4077 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
4035 if (r)
4036 goto out;
4037
4038 page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
4039 if (is_error_page(page)) {
4040 r = -EFAULT;
4041 goto out;
4042 }
4043 4078
4044 kvm->arch.ept_identity_pagetable = page;
4045out:
4046 mutex_unlock(&kvm->slots_lock);
4047 return r; 4079 return r;
4048} 4080}
4049 4081
@@ -4402,7 +4434,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4402 4434
4403 if (ple_gap) { 4435 if (ple_gap) {
4404 vmcs_write32(PLE_GAP, ple_gap); 4436 vmcs_write32(PLE_GAP, ple_gap);
4405 vmcs_write32(PLE_WINDOW, ple_window); 4437 vmx->ple_window = ple_window;
4438 vmx->ple_window_dirty = true;
4406 } 4439 }
4407 4440
4408 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); 4441 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
@@ -4477,7 +4510,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4477 4510
4478 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 4511 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
4479 kvm_set_cr8(&vmx->vcpu, 0); 4512 kvm_set_cr8(&vmx->vcpu, 0);
4480 apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 4513 apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
4481 if (kvm_vcpu_is_bsp(&vmx->vcpu)) 4514 if (kvm_vcpu_is_bsp(&vmx->vcpu))
4482 apic_base_msr.data |= MSR_IA32_APICBASE_BSP; 4515 apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
4483 apic_base_msr.host_initiated = true; 4516 apic_base_msr.host_initiated = true;
@@ -4537,9 +4570,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4537 vmcs_write32(TPR_THRESHOLD, 0); 4570 vmcs_write32(TPR_THRESHOLD, 0);
4538 } 4571 }
4539 4572
4540 if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) 4573 kvm_vcpu_reload_apic_access_page(vcpu);
4541 vmcs_write64(APIC_ACCESS_ADDR,
4542 page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
4543 4574
4544 if (vmx_vm_has_apicv(vcpu->kvm)) 4575 if (vmx_vm_has_apicv(vcpu->kvm))
4545 memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); 4576 memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
@@ -4729,10 +4760,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4729 if (ret) 4760 if (ret)
4730 return ret; 4761 return ret;
4731 kvm->arch.tss_addr = addr; 4762 kvm->arch.tss_addr = addr;
4732 if (!init_rmode_tss(kvm)) 4763 return init_rmode_tss(kvm);
4733 return -ENOMEM;
4734
4735 return 0;
4736} 4764}
4737 4765
4738static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) 4766static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
@@ -5521,17 +5549,18 @@ static u64 ept_rsvd_mask(u64 spte, int level)
5521 for (i = 51; i > boot_cpu_data.x86_phys_bits; i--) 5549 for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
5522 mask |= (1ULL << i); 5550 mask |= (1ULL << i);
5523 5551
5524 if (level > 2) 5552 if (level == 4)
5525 /* bits 7:3 reserved */ 5553 /* bits 7:3 reserved */
5526 mask |= 0xf8; 5554 mask |= 0xf8;
5527 else if (level == 2) { 5555 else if (spte & (1ULL << 7))
5528 if (spte & (1ULL << 7)) 5556 /*
5529 /* 2MB ref, bits 20:12 reserved */ 5557 * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
5530 mask |= 0x1ff000; 5558 * level == 1 if the hypervisor is using the ignored bit 7.
5531 else 5559 */
5532 /* bits 6:3 reserved */ 5560 mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
5533 mask |= 0x78; 5561 else if (level > 1)
5534 } 5562 /* bits 6:3 reserved */
5563 mask |= 0x78;
5535 5564
5536 return mask; 5565 return mask;
5537} 5566}
@@ -5561,7 +5590,8 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
5561 WARN_ON(1); 5590 WARN_ON(1);
5562 } 5591 }
5563 5592
5564 if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) { 5593 /* bits 5:3 are _not_ reserved for large page or leaf page */
5594 if ((rsvd_bits & 0x38) == 0) {
5565 u64 ept_mem_type = (spte & 0x38) >> 3; 5595 u64 ept_mem_type = (spte & 0x38) >> 3;
5566 5596
5567 if (ept_mem_type == 2 || ept_mem_type == 3 || 5597 if (ept_mem_type == 2 || ept_mem_type == 3 ||
@@ -5676,12 +5706,85 @@ out:
5676 return ret; 5706 return ret;
5677} 5707}
5678 5708
5709static int __grow_ple_window(int val)
5710{
5711 if (ple_window_grow < 1)
5712 return ple_window;
5713
5714 val = min(val, ple_window_actual_max);
5715
5716 if (ple_window_grow < ple_window)
5717 val *= ple_window_grow;
5718 else
5719 val += ple_window_grow;
5720
5721 return val;
5722}
5723
5724static int __shrink_ple_window(int val, int modifier, int minimum)
5725{
5726 if (modifier < 1)
5727 return ple_window;
5728
5729 if (modifier < ple_window)
5730 val /= modifier;
5731 else
5732 val -= modifier;
5733
5734 return max(val, minimum);
5735}
5736
5737static void grow_ple_window(struct kvm_vcpu *vcpu)
5738{
5739 struct vcpu_vmx *vmx = to_vmx(vcpu);
5740 int old = vmx->ple_window;
5741
5742 vmx->ple_window = __grow_ple_window(old);
5743
5744 if (vmx->ple_window != old)
5745 vmx->ple_window_dirty = true;
5746
5747 trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
5748}
5749
5750static void shrink_ple_window(struct kvm_vcpu *vcpu)
5751{
5752 struct vcpu_vmx *vmx = to_vmx(vcpu);
5753 int old = vmx->ple_window;
5754
5755 vmx->ple_window = __shrink_ple_window(old,
5756 ple_window_shrink, ple_window);
5757
5758 if (vmx->ple_window != old)
5759 vmx->ple_window_dirty = true;
5760
5761 trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
5762}
5763
5764/*
5765 * ple_window_actual_max is computed to be one grow_ple_window() below
5766 * ple_window_max. (See __grow_ple_window for the reason.)
5767 * This prevents overflows, because ple_window_max is int.
5768 * ple_window_max effectively rounded down to a multiple of ple_window_grow in
5769 * this process.
5770 * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
5771 */
5772static void update_ple_window_actual_max(void)
5773{
5774 ple_window_actual_max =
5775 __shrink_ple_window(max(ple_window_max, ple_window),
5776 ple_window_grow, INT_MIN);
5777}
5778
5679/* 5779/*
5680 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE 5780 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
5681 * exiting, so only get here on cpu with PAUSE-Loop-Exiting. 5781 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
5682 */ 5782 */
5683static int handle_pause(struct kvm_vcpu *vcpu) 5783static int handle_pause(struct kvm_vcpu *vcpu)
5684{ 5784{
5785 if (ple_gap)
5786 grow_ple_window(vcpu);
5787
5685 skip_emulated_instruction(vcpu); 5788 skip_emulated_instruction(vcpu);
5686 kvm_vcpu_on_spin(vcpu); 5789 kvm_vcpu_on_spin(vcpu);
5687 5790
@@ -6146,7 +6249,11 @@ static void free_nested(struct vcpu_vmx *vmx)
6146 /* Unpin physical memory we referred to in current vmcs02 */ 6249 /* Unpin physical memory we referred to in current vmcs02 */
6147 if (vmx->nested.apic_access_page) { 6250 if (vmx->nested.apic_access_page) {
6148 nested_release_page(vmx->nested.apic_access_page); 6251 nested_release_page(vmx->nested.apic_access_page);
6149 vmx->nested.apic_access_page = 0; 6252 vmx->nested.apic_access_page = NULL;
6253 }
6254 if (vmx->nested.virtual_apic_page) {
6255 nested_release_page(vmx->nested.virtual_apic_page);
6256 vmx->nested.virtual_apic_page = NULL;
6150 } 6257 }
6151 6258
6152 nested_free_all_saved_vmcss(vmx); 6259 nested_free_all_saved_vmcss(vmx);
@@ -6617,7 +6724,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6617 switch (type) { 6724 switch (type) {
6618 case VMX_EPT_EXTENT_GLOBAL: 6725 case VMX_EPT_EXTENT_GLOBAL:
6619 kvm_mmu_sync_roots(vcpu); 6726 kvm_mmu_sync_roots(vcpu);
6620 kvm_mmu_flush_tlb(vcpu); 6727 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
6621 nested_vmx_succeed(vcpu); 6728 nested_vmx_succeed(vcpu);
6622 break; 6729 break;
6623 default: 6730 default:
@@ -6892,6 +6999,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6892 case EXIT_REASON_TASK_SWITCH: 6999 case EXIT_REASON_TASK_SWITCH:
6893 return 1; 7000 return 1;
6894 case EXIT_REASON_CPUID: 7001 case EXIT_REASON_CPUID:
7002 if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
7003 return 0;
6895 return 1; 7004 return 1;
6896 case EXIT_REASON_HLT: 7005 case EXIT_REASON_HLT:
6897 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); 7006 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
@@ -6936,7 +7045,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6936 case EXIT_REASON_MCE_DURING_VMENTRY: 7045 case EXIT_REASON_MCE_DURING_VMENTRY:
6937 return 0; 7046 return 0;
6938 case EXIT_REASON_TPR_BELOW_THRESHOLD: 7047 case EXIT_REASON_TPR_BELOW_THRESHOLD:
6939 return 1; 7048 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
6940 case EXIT_REASON_APIC_ACCESS: 7049 case EXIT_REASON_APIC_ACCESS:
6941 return nested_cpu_has2(vmcs12, 7050 return nested_cpu_has2(vmcs12,
6942 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); 7051 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
@@ -7057,6 +7166,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
7057 7166
7058static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 7167static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
7059{ 7168{
7169 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7170
7171 if (is_guest_mode(vcpu) &&
7172 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
7173 return;
7174
7060 if (irr == -1 || tpr < irr) { 7175 if (irr == -1 || tpr < irr) {
7061 vmcs_write32(TPR_THRESHOLD, 0); 7176 vmcs_write32(TPR_THRESHOLD, 0);
7062 return; 7177 return;
@@ -7094,6 +7209,29 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
7094 vmx_set_msr_bitmap(vcpu); 7209 vmx_set_msr_bitmap(vcpu);
7095} 7210}
7096 7211
7212static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
7213{
7214 struct vcpu_vmx *vmx = to_vmx(vcpu);
7215
7216 /*
7217 * Currently we do not handle the nested case where L2 has an
7218 * APIC access page of its own; that page is still pinned.
7219 * Hence, we skip the case where the VCPU is in guest mode _and_
7220 * L1 prepared an APIC access page for L2.
7221 *
7222 * For the case where L1 and L2 share the same APIC access page
7223 * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
7224 * in the vmcs12), this function will only update either the vmcs01
7225 * or the vmcs02. If the former, the vmcs02 will be updated by
7226 * prepare_vmcs02. If the latter, the vmcs01 will be updated in
7227 * the next L2->L1 exit.
7228 */
7229 if (!is_guest_mode(vcpu) ||
7230 !nested_cpu_has2(vmx->nested.current_vmcs12,
7231 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
7232 vmcs_write64(APIC_ACCESS_ADDR, hpa);
7233}
7234
7097static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) 7235static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
7098{ 7236{
7099 u16 status; 7237 u16 status;
@@ -7387,6 +7525,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7387 if (vmx->emulation_required) 7525 if (vmx->emulation_required)
7388 return; 7526 return;
7389 7527
7528 if (vmx->ple_window_dirty) {
7529 vmx->ple_window_dirty = false;
7530 vmcs_write32(PLE_WINDOW, vmx->ple_window);
7531 }
7532
7390 if (vmx->nested.sync_shadow_vmcs) { 7533 if (vmx->nested.sync_shadow_vmcs) {
7391 copy_vmcs12_to_shadow(vmx); 7534 copy_vmcs12_to_shadow(vmx);
7392 vmx->nested.sync_shadow_vmcs = false; 7535 vmx->nested.sync_shadow_vmcs = false;
@@ -7642,10 +7785,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
7642 if (!kvm->arch.ept_identity_map_addr) 7785 if (!kvm->arch.ept_identity_map_addr)
7643 kvm->arch.ept_identity_map_addr = 7786 kvm->arch.ept_identity_map_addr =
7644 VMX_EPT_IDENTITY_PAGETABLE_ADDR; 7787 VMX_EPT_IDENTITY_PAGETABLE_ADDR;
7645 err = -ENOMEM; 7788 err = init_rmode_identity_map(kvm);
7646 if (alloc_identity_pagetable(kvm) != 0) 7789 if (err)
7647 goto free_vmcs;
7648 if (!init_rmode_identity_map(kvm))
7649 goto free_vmcs; 7790 goto free_vmcs;
7650 } 7791 }
7651 7792
@@ -7824,6 +7965,55 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7824 kvm_inject_page_fault(vcpu, fault); 7965 kvm_inject_page_fault(vcpu, fault);
7825} 7966}
7826 7967
7968static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
7969 struct vmcs12 *vmcs12)
7970{
7971 struct vcpu_vmx *vmx = to_vmx(vcpu);
7972
7973 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
7974 /* TODO: Also verify bits beyond physical address width are 0 */
7975 if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
7976 return false;
7977
7978 /*
7979 * Translate L1 physical address to host physical
7980 * address for vmcs02. Keep the page pinned, so this
7981 * physical address remains valid. We keep a reference
7982 * to it so we can release it later.
7983 */
7984 if (vmx->nested.apic_access_page) /* shouldn't happen */
7985 nested_release_page(vmx->nested.apic_access_page);
7986 vmx->nested.apic_access_page =
7987 nested_get_page(vcpu, vmcs12->apic_access_addr);
7988 }
7989
7990 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
7991 /* TODO: Also verify bits beyond physical address width are 0 */
7992 if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
7993 return false;
7994
7995 if (vmx->nested.virtual_apic_page) /* shouldn't happen */
7996 nested_release_page(vmx->nested.virtual_apic_page);
7997 vmx->nested.virtual_apic_page =
7998 nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
7999
8000 /*
8001 * Failing the vm entry is _not_ what the processor does
8002 * but it's basically the only possibility we have.
8003 * We could still enter the guest if CR8 load exits are
8004 * enabled, CR8 store exits are enabled, and virtualize APIC
8005 * access is disabled; in this case the processor would never
8006 * use the TPR shadow and we could simply clear the bit from
8007 * the execution control. But such a configuration is useless,
8008 * so let's keep the code simple.
8009 */
8010 if (!vmx->nested.virtual_apic_page)
8011 return false;
8012 }
8013
8014 return true;
8015}
8016
7827static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) 8017static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
7828{ 8018{
7829 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; 8019 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
@@ -7849,7 +8039,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
7849/* 8039/*
7850 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested 8040 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
7851 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it 8041 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
7852 * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2 8042 * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
7853 * guest in a way that will both be appropriate to L1's requests, and our 8043 * guest in a way that will both be appropriate to L1's requests, and our
7854 * needs. In addition to modifying the active vmcs (which is vmcs02), this 8044 * needs. In addition to modifying the active vmcs (which is vmcs02), this
7855 * function also has additional necessary side-effects, like setting various 8045 * function also has additional necessary side-effects, like setting various
@@ -7970,16 +8160,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7970 8160
7971 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { 8161 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
7972 /* 8162 /*
7973 * Translate L1 physical address to host physical
7974 * address for vmcs02. Keep the page pinned, so this
7975 * physical address remains valid. We keep a reference
7976 * to it so we can release it later.
7977 */
7978 if (vmx->nested.apic_access_page) /* shouldn't happen */
7979 nested_release_page(vmx->nested.apic_access_page);
7980 vmx->nested.apic_access_page =
7981 nested_get_page(vcpu, vmcs12->apic_access_addr);
7982 /*
7983 * If translation failed, no matter: This feature asks 8163 * If translation failed, no matter: This feature asks
7984 * to exit when accessing the given address, and if it 8164 * to exit when accessing the given address, and if it
7985 * can never be accessed, this feature won't do 8165 * can never be accessed, this feature won't do
@@ -7994,8 +8174,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7994 } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { 8174 } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
7995 exec_control |= 8175 exec_control |=
7996 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 8176 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
7997 vmcs_write64(APIC_ACCESS_ADDR, 8177 kvm_vcpu_reload_apic_access_page(vcpu);
7998 page_to_phys(vcpu->kvm->arch.apic_access_page));
7999 } 8178 }
8000 8179
8001 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 8180 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -8024,6 +8203,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8024 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; 8203 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
8025 exec_control &= ~CPU_BASED_TPR_SHADOW; 8204 exec_control &= ~CPU_BASED_TPR_SHADOW;
8026 exec_control |= vmcs12->cpu_based_vm_exec_control; 8205 exec_control |= vmcs12->cpu_based_vm_exec_control;
8206
8207 if (exec_control & CPU_BASED_TPR_SHADOW) {
8208 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
8209 page_to_phys(vmx->nested.virtual_apic_page));
8210 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
8211 }
8212
8027 /* 8213 /*
8028 * Merging of IO and MSR bitmaps not currently supported. 8214 * Merging of IO and MSR bitmaps not currently supported.
8029 * Rather, exit every time. 8215 * Rather, exit every time.
@@ -8185,8 +8371,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8185 return 1; 8371 return 1;
8186 } 8372 }
8187 8373
8188 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && 8374 if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
8189 !PAGE_ALIGNED(vmcs12->apic_access_addr)) {
8190 /*TODO: Also verify bits beyond physical address width are 0*/ 8375 /*TODO: Also verify bits beyond physical address width are 0*/
8191 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 8376 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
8192 return 1; 8377 return 1;
@@ -8790,10 +8975,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8790 /* Unpin physical memory we referred to in vmcs02 */ 8975 /* Unpin physical memory we referred to in vmcs02 */
8791 if (vmx->nested.apic_access_page) { 8976 if (vmx->nested.apic_access_page) {
8792 nested_release_page(vmx->nested.apic_access_page); 8977 nested_release_page(vmx->nested.apic_access_page);
8793 vmx->nested.apic_access_page = 0; 8978 vmx->nested.apic_access_page = NULL;
8979 }
8980 if (vmx->nested.virtual_apic_page) {
8981 nested_release_page(vmx->nested.virtual_apic_page);
8982 vmx->nested.virtual_apic_page = NULL;
8794 } 8983 }
8795 8984
8796 /* 8985 /*
8986 * We are now running in L2, mmu_notifier will force to reload the
8987 * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
8988 */
8989 kvm_vcpu_reload_apic_access_page(vcpu);
8990
8991 /*
8797 * Exiting from L2 to L1, we're now back to L1 which thinks it just 8992 * Exiting from L2 to L1, we're now back to L1 which thinks it just
8798 * finished a VMLAUNCH or VMRESUME instruction, so we need to set the 8993 * finished a VMLAUNCH or VMRESUME instruction, so we need to set the
8799 * success or failure flag accordingly. 8994 * success or failure flag accordingly.
@@ -8846,6 +9041,12 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
8846 return X86EMUL_CONTINUE; 9041 return X86EMUL_CONTINUE;
8847} 9042}
8848 9043
9044static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
9045{
9046 if (ple_gap)
9047 shrink_ple_window(vcpu);
9048}
9049
8849static struct kvm_x86_ops vmx_x86_ops = { 9050static struct kvm_x86_ops vmx_x86_ops = {
8850 .cpu_has_kvm_support = cpu_has_kvm_support, 9051 .cpu_has_kvm_support = cpu_has_kvm_support,
8851 .disabled_by_bios = vmx_disabled_by_bios, 9052 .disabled_by_bios = vmx_disabled_by_bios,
@@ -8890,7 +9091,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
8890 .cache_reg = vmx_cache_reg, 9091 .cache_reg = vmx_cache_reg,
8891 .get_rflags = vmx_get_rflags, 9092 .get_rflags = vmx_get_rflags,
8892 .set_rflags = vmx_set_rflags, 9093 .set_rflags = vmx_set_rflags,
8893 .fpu_activate = vmx_fpu_activate,
8894 .fpu_deactivate = vmx_fpu_deactivate, 9094 .fpu_deactivate = vmx_fpu_deactivate,
8895 9095
8896 .tlb_flush = vmx_flush_tlb, 9096 .tlb_flush = vmx_flush_tlb,
@@ -8913,6 +9113,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
8913 .enable_irq_window = enable_irq_window, 9113 .enable_irq_window = enable_irq_window,
8914 .update_cr8_intercept = update_cr8_intercept, 9114 .update_cr8_intercept = update_cr8_intercept,
8915 .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, 9115 .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
9116 .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
8916 .vm_has_apicv = vmx_vm_has_apicv, 9117 .vm_has_apicv = vmx_vm_has_apicv,
8917 .load_eoi_exitmap = vmx_load_eoi_exitmap, 9118 .load_eoi_exitmap = vmx_load_eoi_exitmap,
8918 .hwapic_irr_update = vmx_hwapic_irr_update, 9119 .hwapic_irr_update = vmx_hwapic_irr_update,
@@ -8951,6 +9152,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
8951 .mpx_supported = vmx_mpx_supported, 9152 .mpx_supported = vmx_mpx_supported,
8952 9153
8953 .check_nested_events = vmx_check_nested_events, 9154 .check_nested_events = vmx_check_nested_events,
9155
9156 .sched_in = vmx_sched_in,
8954}; 9157};
8955 9158
8956static int __init vmx_init(void) 9159static int __init vmx_init(void)
@@ -9065,6 +9268,8 @@ static int __init vmx_init(void)
9065 } else 9268 } else
9066 kvm_disable_tdp(); 9269 kvm_disable_tdp();
9067 9270
9271 update_ple_window_actual_max();
9272
9068 return 0; 9273 return 0;
9069 9274
9070out7: 9275out7:
@@ -9098,7 +9303,7 @@ static void __exit vmx_exit(void)
9098 free_page((unsigned long)vmx_vmread_bitmap); 9303 free_page((unsigned long)vmx_vmread_bitmap);
9099 9304
9100#ifdef CONFIG_KEXEC 9305#ifdef CONFIG_KEXEC
9101 rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); 9306 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
9102 synchronize_rcu(); 9307 synchronize_rcu();
9103#endif 9308#endif
9104 9309
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f1e22d3b286..5430e4b0af29 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
246} 246}
247EXPORT_SYMBOL_GPL(kvm_set_shared_msr); 247EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
248 248
249static void drop_user_return_notifiers(void *ignore) 249static void drop_user_return_notifiers(void)
250{ 250{
251 unsigned int cpu = smp_processor_id(); 251 unsigned int cpu = smp_processor_id();
252 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); 252 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -408,12 +408,14 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
408} 408}
409EXPORT_SYMBOL_GPL(kvm_inject_page_fault); 409EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
410 410
411void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) 411static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
412{ 412{
413 if (mmu_is_nested(vcpu) && !fault->nested_page_fault) 413 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
414 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); 414 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
415 else 415 else
416 vcpu->arch.mmu.inject_page_fault(vcpu, fault); 416 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
417
418 return fault->nested_page_fault;
417} 419}
418 420
419void kvm_inject_nmi(struct kvm_vcpu *vcpu) 421void kvm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -457,11 +459,12 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
457 gfn_t ngfn, void *data, int offset, int len, 459 gfn_t ngfn, void *data, int offset, int len,
458 u32 access) 460 u32 access)
459{ 461{
462 struct x86_exception exception;
460 gfn_t real_gfn; 463 gfn_t real_gfn;
461 gpa_t ngpa; 464 gpa_t ngpa;
462 465
463 ngpa = gfn_to_gpa(ngfn); 466 ngpa = gfn_to_gpa(ngfn);
464 real_gfn = mmu->translate_gpa(vcpu, ngpa, access); 467 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
465 if (real_gfn == UNMAPPED_GVA) 468 if (real_gfn == UNMAPPED_GVA)
466 return -EFAULT; 469 return -EFAULT;
467 470
@@ -726,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
726{ 729{
727 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { 730 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
728 kvm_mmu_sync_roots(vcpu); 731 kvm_mmu_sync_roots(vcpu);
729 kvm_mmu_flush_tlb(vcpu); 732 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
730 return 0; 733 return 0;
731 } 734 }
732 735
@@ -1518,7 +1521,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
1518 pvclock_update_vm_gtod_copy(kvm); 1521 pvclock_update_vm_gtod_copy(kvm);
1519 1522
1520 kvm_for_each_vcpu(i, vcpu, kvm) 1523 kvm_for_each_vcpu(i, vcpu, kvm)
1521 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); 1524 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1522 1525
1523 /* guest entries allowed */ 1526 /* guest entries allowed */
1524 kvm_for_each_vcpu(i, vcpu, kvm) 1527 kvm_for_each_vcpu(i, vcpu, kvm)
@@ -1661,7 +1664,7 @@ static void kvmclock_update_fn(struct work_struct *work)
1661 struct kvm_vcpu *vcpu; 1664 struct kvm_vcpu *vcpu;
1662 1665
1663 kvm_for_each_vcpu(i, vcpu, kvm) { 1666 kvm_for_each_vcpu(i, vcpu, kvm) {
1664 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); 1667 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1665 kvm_vcpu_kick(vcpu); 1668 kvm_vcpu_kick(vcpu);
1666 } 1669 }
1667} 1670}
@@ -1670,7 +1673,7 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1670{ 1673{
1671 struct kvm *kvm = v->kvm; 1674 struct kvm *kvm = v->kvm;
1672 1675
1673 set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests); 1676 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1674 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 1677 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
1675 KVMCLOCK_UPDATE_DELAY); 1678 KVMCLOCK_UPDATE_DELAY);
1676} 1679}
@@ -1723,9 +1726,10 @@ static bool valid_mtrr_type(unsigned t)
1723 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ 1726 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
1724} 1727}
1725 1728
1726static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1729bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1727{ 1730{
1728 int i; 1731 int i;
1732 u64 mask;
1729 1733
1730 if (!msr_mtrr_valid(msr)) 1734 if (!msr_mtrr_valid(msr))
1731 return false; 1735 return false;
@@ -1747,14 +1751,31 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1747 } 1751 }
1748 1752
1749 /* variable MTRRs */ 1753 /* variable MTRRs */
1750 return valid_mtrr_type(data & 0xff); 1754 WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
1755
1756 mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
1757 if ((msr & 1) == 0) {
1758 /* MTRR base */
1759 if (!valid_mtrr_type(data & 0xff))
1760 return false;
1761 mask |= 0xf00;
1762 } else
1763 /* MTRR mask */
1764 mask |= 0x7ff;
1765 if (data & mask) {
1766 kvm_inject_gp(vcpu, 0);
1767 return false;
1768 }
1769
1770 return true;
1751} 1771}
1772EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
1752 1773
1753static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1774static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1754{ 1775{
1755 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 1776 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1756 1777
1757 if (!mtrr_valid(vcpu, msr, data)) 1778 if (!kvm_mtrr_valid(vcpu, msr, data))
1758 return 1; 1779 return 1;
1759 1780
1760 if (msr == MSR_MTRRdefType) { 1781 if (msr == MSR_MTRRdefType) {
@@ -1805,7 +1826,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1805 break; 1826 break;
1806 default: 1827 default:
1807 if (msr >= MSR_IA32_MC0_CTL && 1828 if (msr >= MSR_IA32_MC0_CTL &&
1808 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 1829 msr < MSR_IA32_MCx_CTL(bank_num)) {
1809 u32 offset = msr - MSR_IA32_MC0_CTL; 1830 u32 offset = msr - MSR_IA32_MC0_CTL;
1810 /* only 0 or all 1s can be written to IA32_MCi_CTL 1831 /* only 0 or all 1s can be written to IA32_MCi_CTL
1811 * some Linux kernels though clear bit 10 in bank 4 to 1832 * some Linux kernels though clear bit 10 in bank 4 to
@@ -2164,7 +2185,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2164 2185
2165 case MSR_IA32_MCG_CTL: 2186 case MSR_IA32_MCG_CTL:
2166 case MSR_IA32_MCG_STATUS: 2187 case MSR_IA32_MCG_STATUS:
2167 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 2188 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2168 return set_msr_mce(vcpu, msr, data); 2189 return set_msr_mce(vcpu, msr, data);
2169 2190
2170 /* Performance counters are not protected by a CPUID bit, 2191 /* Performance counters are not protected by a CPUID bit,
@@ -2330,7 +2351,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2330 break; 2351 break;
2331 default: 2352 default:
2332 if (msr >= MSR_IA32_MC0_CTL && 2353 if (msr >= MSR_IA32_MC0_CTL &&
2333 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 2354 msr < MSR_IA32_MCx_CTL(bank_num)) {
2334 u32 offset = msr - MSR_IA32_MC0_CTL; 2355 u32 offset = msr - MSR_IA32_MC0_CTL;
2335 data = vcpu->arch.mce_banks[offset]; 2356 data = vcpu->arch.mce_banks[offset];
2336 break; 2357 break;
@@ -2419,7 +2440,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2419 case MSR_K7_HWCR: 2440 case MSR_K7_HWCR:
2420 case MSR_VM_HSAVE_PA: 2441 case MSR_VM_HSAVE_PA:
2421 case MSR_K7_EVNTSEL0: 2442 case MSR_K7_EVNTSEL0:
2443 case MSR_K7_EVNTSEL1:
2444 case MSR_K7_EVNTSEL2:
2445 case MSR_K7_EVNTSEL3:
2422 case MSR_K7_PERFCTR0: 2446 case MSR_K7_PERFCTR0:
2447 case MSR_K7_PERFCTR1:
2448 case MSR_K7_PERFCTR2:
2449 case MSR_K7_PERFCTR3:
2423 case MSR_K8_INT_PENDING_MSG: 2450 case MSR_K8_INT_PENDING_MSG:
2424 case MSR_AMD64_NB_CFG: 2451 case MSR_AMD64_NB_CFG:
2425 case MSR_FAM10H_MMIO_CONF_BASE: 2452 case MSR_FAM10H_MMIO_CONF_BASE:
@@ -2505,7 +2532,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2505 case MSR_IA32_MCG_CAP: 2532 case MSR_IA32_MCG_CAP:
2506 case MSR_IA32_MCG_CTL: 2533 case MSR_IA32_MCG_CTL:
2507 case MSR_IA32_MCG_STATUS: 2534 case MSR_IA32_MCG_STATUS:
2508 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 2535 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2509 return get_msr_mce(vcpu, msr, pdata); 2536 return get_msr_mce(vcpu, msr, pdata);
2510 case MSR_K7_CLK_CTL: 2537 case MSR_K7_CLK_CTL:
2511 /* 2538 /*
@@ -2823,7 +2850,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2823 if (unlikely(vcpu->arch.tsc_offset_adjustment)) { 2850 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2824 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); 2851 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2825 vcpu->arch.tsc_offset_adjustment = 0; 2852 vcpu->arch.tsc_offset_adjustment = 0;
2826 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); 2853 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2827 } 2854 }
2828 2855
2829 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { 2856 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
@@ -4040,16 +4067,16 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
4040 kvm_x86_ops->get_segment(vcpu, var, seg); 4067 kvm_x86_ops->get_segment(vcpu, var, seg);
4041} 4068}
4042 4069
4043gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 4070gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
4071 struct x86_exception *exception)
4044{ 4072{
4045 gpa_t t_gpa; 4073 gpa_t t_gpa;
4046 struct x86_exception exception;
4047 4074
4048 BUG_ON(!mmu_is_nested(vcpu)); 4075 BUG_ON(!mmu_is_nested(vcpu));
4049 4076
4050 /* NPT walks are always user-walks */ 4077 /* NPT walks are always user-walks */
4051 access |= PFERR_USER_MASK; 4078 access |= PFERR_USER_MASK;
4052 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception); 4079 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
4053 4080
4054 return t_gpa; 4081 return t_gpa;
4055} 4082}
@@ -4906,16 +4933,18 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4906 } 4933 }
4907} 4934}
4908 4935
4909static void inject_emulated_exception(struct kvm_vcpu *vcpu) 4936static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
4910{ 4937{
4911 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4938 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4912 if (ctxt->exception.vector == PF_VECTOR) 4939 if (ctxt->exception.vector == PF_VECTOR)
4913 kvm_propagate_fault(vcpu, &ctxt->exception); 4940 return kvm_propagate_fault(vcpu, &ctxt->exception);
4914 else if (ctxt->exception.error_code_valid) 4941
4942 if (ctxt->exception.error_code_valid)
4915 kvm_queue_exception_e(vcpu, ctxt->exception.vector, 4943 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4916 ctxt->exception.error_code); 4944 ctxt->exception.error_code);
4917 else 4945 else
4918 kvm_queue_exception(vcpu, ctxt->exception.vector); 4946 kvm_queue_exception(vcpu, ctxt->exception.vector);
4947 return false;
4919} 4948}
4920 4949
4921static void init_emulate_ctxt(struct kvm_vcpu *vcpu) 4950static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
@@ -4972,7 +5001,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4972 5001
4973 ++vcpu->stat.insn_emulation_fail; 5002 ++vcpu->stat.insn_emulation_fail;
4974 trace_kvm_emulate_insn_failed(vcpu); 5003 trace_kvm_emulate_insn_failed(vcpu);
4975 if (!is_guest_mode(vcpu)) { 5004 if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
4976 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 5005 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4977 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 5006 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4978 vcpu->run->internal.ndata = 0; 5007 vcpu->run->internal.ndata = 0;
@@ -5224,6 +5253,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5224 5253
5225 ctxt->interruptibility = 0; 5254 ctxt->interruptibility = 0;
5226 ctxt->have_exception = false; 5255 ctxt->have_exception = false;
5256 ctxt->exception.vector = -1;
5227 ctxt->perm_ok = false; 5257 ctxt->perm_ok = false;
5228 5258
5229 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; 5259 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
@@ -5276,8 +5306,9 @@ restart:
5276 } 5306 }
5277 5307
5278 if (ctxt->have_exception) { 5308 if (ctxt->have_exception) {
5279 inject_emulated_exception(vcpu);
5280 r = EMULATE_DONE; 5309 r = EMULATE_DONE;
5310 if (inject_emulated_exception(vcpu))
5311 return r;
5281 } else if (vcpu->arch.pio.count) { 5312 } else if (vcpu->arch.pio.count) {
5282 if (!vcpu->arch.pio.in) { 5313 if (!vcpu->arch.pio.in) {
5283 /* FIXME: return into emulator if single-stepping. */ 5314 /* FIXME: return into emulator if single-stepping. */
@@ -5545,7 +5576,7 @@ static void kvm_set_mmio_spte_mask(void)
5545 * entry to generate page fault with PFER.RSV = 1. 5576 * entry to generate page fault with PFER.RSV = 1.
5546 */ 5577 */
5547 /* Mask the reserved physical address bits. */ 5578 /* Mask the reserved physical address bits. */
5548 mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr; 5579 mask = rsvd_bits(maxphyaddr, 51);
5549 5580
5550 /* Bit 62 is always reserved for 32bit host. */ 5581 /* Bit 62 is always reserved for 32bit host. */
5551 mask |= 0x3ull << 62; 5582 mask |= 0x3ull << 62;
@@ -5576,7 +5607,7 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
5576 spin_lock(&kvm_lock); 5607 spin_lock(&kvm_lock);
5577 list_for_each_entry(kvm, &vm_list, vm_list) 5608 list_for_each_entry(kvm, &vm_list, vm_list)
5578 kvm_for_each_vcpu(i, vcpu, kvm) 5609 kvm_for_each_vcpu(i, vcpu, kvm)
5579 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); 5610 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
5580 atomic_set(&kvm_guest_has_master_clock, 0); 5611 atomic_set(&kvm_guest_has_master_clock, 0);
5581 spin_unlock(&kvm_lock); 5612 spin_unlock(&kvm_lock);
5582} 5613}
@@ -5989,6 +6020,44 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5989 kvm_apic_update_tmr(vcpu, tmr); 6020 kvm_apic_update_tmr(vcpu, tmr);
5990} 6021}
5991 6022
6023static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
6024{
6025 ++vcpu->stat.tlb_flush;
6026 kvm_x86_ops->tlb_flush(vcpu);
6027}
6028
6029void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
6030{
6031 struct page *page = NULL;
6032
6033 if (!irqchip_in_kernel(vcpu->kvm))
6034 return;
6035
6036 if (!kvm_x86_ops->set_apic_access_page_addr)
6037 return;
6038
6039 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
6040 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
6041
6042 /*
6043 * Do not pin apic access page in memory, the MMU notifier
6044 * will call us again if it is migrated or swapped out.
6045 */
6046 put_page(page);
6047}
6048EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
6049
6050void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
6051 unsigned long address)
6052{
6053 /*
6054 * The physical address of apic access page is stored in the VMCS.
6055 * Update it when it becomes invalid.
6056 */
6057 if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
6058 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
6059}
6060
5992/* 6061/*
5993 * Returns 1 to let __vcpu_run() continue the guest execution loop without 6062 * Returns 1 to let __vcpu_run() continue the guest execution loop without
5994 * exiting to the userspace. Otherwise, the value will be returned to the 6063 * exiting to the userspace. Otherwise, the value will be returned to the
@@ -6018,7 +6087,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6018 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) 6087 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
6019 kvm_mmu_sync_roots(vcpu); 6088 kvm_mmu_sync_roots(vcpu);
6020 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) 6089 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
6021 kvm_x86_ops->tlb_flush(vcpu); 6090 kvm_vcpu_flush_tlb(vcpu);
6022 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { 6091 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
6023 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; 6092 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
6024 r = 0; 6093 r = 0;
@@ -6049,6 +6118,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6049 kvm_deliver_pmi(vcpu); 6118 kvm_deliver_pmi(vcpu);
6050 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) 6119 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
6051 vcpu_scan_ioapic(vcpu); 6120 vcpu_scan_ioapic(vcpu);
6121 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
6122 kvm_vcpu_reload_apic_access_page(vcpu);
6052 } 6123 }
6053 6124
6054 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 6125 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -6934,7 +7005,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
6934 kvm_rip_write(vcpu, 0); 7005 kvm_rip_write(vcpu, 0);
6935} 7006}
6936 7007
6937int kvm_arch_hardware_enable(void *garbage) 7008int kvm_arch_hardware_enable(void)
6938{ 7009{
6939 struct kvm *kvm; 7010 struct kvm *kvm;
6940 struct kvm_vcpu *vcpu; 7011 struct kvm_vcpu *vcpu;
@@ -6945,7 +7016,7 @@ int kvm_arch_hardware_enable(void *garbage)
6945 bool stable, backwards_tsc = false; 7016 bool stable, backwards_tsc = false;
6946 7017
6947 kvm_shared_msr_cpu_online(); 7018 kvm_shared_msr_cpu_online();
6948 ret = kvm_x86_ops->hardware_enable(garbage); 7019 ret = kvm_x86_ops->hardware_enable();
6949 if (ret != 0) 7020 if (ret != 0)
6950 return ret; 7021 return ret;
6951 7022
@@ -6954,7 +7025,7 @@ int kvm_arch_hardware_enable(void *garbage)
6954 list_for_each_entry(kvm, &vm_list, vm_list) { 7025 list_for_each_entry(kvm, &vm_list, vm_list) {
6955 kvm_for_each_vcpu(i, vcpu, kvm) { 7026 kvm_for_each_vcpu(i, vcpu, kvm) {
6956 if (!stable && vcpu->cpu == smp_processor_id()) 7027 if (!stable && vcpu->cpu == smp_processor_id())
6957 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); 7028 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6958 if (stable && vcpu->arch.last_host_tsc > local_tsc) { 7029 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
6959 backwards_tsc = true; 7030 backwards_tsc = true;
6960 if (vcpu->arch.last_host_tsc > max_tsc) 7031 if (vcpu->arch.last_host_tsc > max_tsc)
@@ -7008,8 +7079,7 @@ int kvm_arch_hardware_enable(void *garbage)
7008 kvm_for_each_vcpu(i, vcpu, kvm) { 7079 kvm_for_each_vcpu(i, vcpu, kvm) {
7009 vcpu->arch.tsc_offset_adjustment += delta_cyc; 7080 vcpu->arch.tsc_offset_adjustment += delta_cyc;
7010 vcpu->arch.last_host_tsc = local_tsc; 7081 vcpu->arch.last_host_tsc = local_tsc;
7011 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, 7082 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7012 &vcpu->requests);
7013 } 7083 }
7014 7084
7015 /* 7085 /*
@@ -7026,10 +7096,10 @@ int kvm_arch_hardware_enable(void *garbage)
7026 return 0; 7096 return 0;
7027} 7097}
7028 7098
7029void kvm_arch_hardware_disable(void *garbage) 7099void kvm_arch_hardware_disable(void)
7030{ 7100{
7031 kvm_x86_ops->hardware_disable(garbage); 7101 kvm_x86_ops->hardware_disable();
7032 drop_user_return_notifiers(garbage); 7102 drop_user_return_notifiers();
7033} 7103}
7034 7104
7035int kvm_arch_hardware_setup(void) 7105int kvm_arch_hardware_setup(void)
@@ -7146,6 +7216,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
7146 static_key_slow_dec(&kvm_no_apic_vcpu); 7216 static_key_slow_dec(&kvm_no_apic_vcpu);
7147} 7217}
7148 7218
7219void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
7220{
7221 kvm_x86_ops->sched_in(vcpu, cpu);
7222}
7223
7149int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 7224int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7150{ 7225{
7151 if (type) 7226 if (type)
@@ -7237,10 +7312,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
7237 kfree(kvm->arch.vpic); 7312 kfree(kvm->arch.vpic);
7238 kfree(kvm->arch.vioapic); 7313 kfree(kvm->arch.vioapic);
7239 kvm_free_vcpus(kvm); 7314 kvm_free_vcpus(kvm);
7240 if (kvm->arch.apic_access_page)
7241 put_page(kvm->arch.apic_access_page);
7242 if (kvm->arch.ept_identity_pagetable)
7243 put_page(kvm->arch.ept_identity_pagetable);
7244 kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); 7315 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7245} 7316}
7246 7317
@@ -7643,3 +7714,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
7643EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); 7714EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
7644EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); 7715EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
7645EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); 7716EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
7717EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 306a1b77581f..7cb9c45a5fe0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -88,15 +88,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
88 vcpu->arch.mmio_gva = gva & PAGE_MASK; 88 vcpu->arch.mmio_gva = gva & PAGE_MASK;
89 vcpu->arch.access = access; 89 vcpu->arch.access = access;
90 vcpu->arch.mmio_gfn = gfn; 90 vcpu->arch.mmio_gfn = gfn;
91 vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
92}
93
94static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
95{
96 return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation;
91} 97}
92 98
93/* 99/*
94 * Clear the mmio cache info for the given gva, 100 * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we
95 * specially, if gva is ~0ul, we clear all mmio cache info. 101 * clear all mmio cache info.
96 */ 102 */
103#define MMIO_GVA_ANY (~(gva_t)0)
104
97static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) 105static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
98{ 106{
99 if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) 107 if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
100 return; 108 return;
101 109
102 vcpu->arch.mmio_gva = 0; 110 vcpu->arch.mmio_gva = 0;
@@ -104,7 +112,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
104 112
105static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) 113static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
106{ 114{
107 if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) 115 if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva &&
116 vcpu->arch.mmio_gva == (gva & PAGE_MASK))
108 return true; 117 return true;
109 118
110 return false; 119 return false;
@@ -112,7 +121,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
112 121
113static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) 122static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
114{ 123{
115 if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) 124 if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn &&
125 vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
116 return true; 126 return true;
117 127
118 return false; 128 return false;
@@ -149,6 +159,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
149 gva_t addr, void *val, unsigned int bytes, 159 gva_t addr, void *val, unsigned int bytes,
150 struct x86_exception *exception); 160 struct x86_exception *exception);
151 161
162bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
163
152#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ 164#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
153 | XSTATE_BNDREGS | XSTATE_BNDCSR) 165 | XSTATE_BNDREGS | XSTATE_BNDCSR)
154extern u64 host_xcr0; 166extern u64 host_xcr0;
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 5f578e850fc5..90d734bbf467 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn,
402 402
403static int mn_clear_flush_young(struct mmu_notifier *mn, 403static int mn_clear_flush_young(struct mmu_notifier *mn,
404 struct mm_struct *mm, 404 struct mm_struct *mm,
405 unsigned long address) 405 unsigned long start,
406 unsigned long end)
406{ 407{
407 __mn_flush_page(mn, address); 408 for (; start < end; start += PAGE_SIZE)
409 __mn_flush_page(mn, start);
408 410
409 return 0; 411 return 0;
410} 412}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 35b0c121bb65..2f2aac8448a4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,26 +25,25 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/types.h> 26#include <linux/types.h>
27 27
28#define VGIC_NR_IRQS 256 28#define VGIC_NR_IRQS_LEGACY 256
29#define VGIC_NR_SGIS 16 29#define VGIC_NR_SGIS 16
30#define VGIC_NR_PPIS 16 30#define VGIC_NR_PPIS 16
31#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) 31#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS)
32#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
33#define VGIC_MAX_CPUS KVM_MAX_VCPUS
34 32
35#define VGIC_V2_MAX_LRS (1 << 6) 33#define VGIC_V2_MAX_LRS (1 << 6)
36#define VGIC_V3_MAX_LRS 16 34#define VGIC_V3_MAX_LRS 16
35#define VGIC_MAX_IRQS 1024
37 36
38/* Sanity checks... */ 37/* Sanity checks... */
39#if (VGIC_MAX_CPUS > 8) 38#if (KVM_MAX_VCPUS > 8)
40#error Invalid number of CPU interfaces 39#error Invalid number of CPU interfaces
41#endif 40#endif
42 41
43#if (VGIC_NR_IRQS & 31) 42#if (VGIC_NR_IRQS_LEGACY & 31)
44#error "VGIC_NR_IRQS must be a multiple of 32" 43#error "VGIC_NR_IRQS must be a multiple of 32"
45#endif 44#endif
46 45
47#if (VGIC_NR_IRQS > 1024) 46#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS)
48#error "VGIC_NR_IRQS must be <= 1024" 47#error "VGIC_NR_IRQS must be <= 1024"
49#endif 48#endif
50 49
@@ -54,19 +53,33 @@
54 * - a bunch of shared interrupts (SPI) 53 * - a bunch of shared interrupts (SPI)
55 */ 54 */
56struct vgic_bitmap { 55struct vgic_bitmap {
57 union { 56 /*
58 u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; 57 * - One UL per VCPU for private interrupts (assumes UL is at
59 DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); 58 * least 32 bits)
60 } percpu[VGIC_MAX_CPUS]; 59 * - As many UL as necessary for shared interrupts.
61 union { 60 *
62 u32 reg[VGIC_NR_SHARED_IRQS / 32]; 61 * The private interrupts are accessed via the "private"
63 DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); 62 * field, one UL per vcpu (the state for vcpu n is in
64 } shared; 63 * private[n]). The shared interrupts are accessed via the
64 * "shared" pointer (IRQn state is at bit n-32 in the bitmap).
65 */
66 unsigned long *private;
67 unsigned long *shared;
65}; 68};
66 69
67struct vgic_bytemap { 70struct vgic_bytemap {
68 u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; 71 /*
69 u32 shared[VGIC_NR_SHARED_IRQS / 4]; 72 * - 8 u32 per VCPU for private interrupts
73 * - As many u32 as necessary for shared interrupts.
74 *
75 * The private interrupts are accessed via the "private"
76 * field, (the state for vcpu n is in private[n*8] to
77 * private[n*8 + 7]). The shared interrupts are accessed via
78 * the "shared" pointer (IRQn state is at byte (n-32)%4 of the
79 * shared[(n-32)/4] word).
80 */
81 u32 *private;
82 u32 *shared;
70}; 83};
71 84
72struct kvm_vcpu; 85struct kvm_vcpu;
@@ -127,6 +140,9 @@ struct vgic_dist {
127 bool in_kernel; 140 bool in_kernel;
128 bool ready; 141 bool ready;
129 142
143 int nr_cpus;
144 int nr_irqs;
145
130 /* Virtual control interface mapping */ 146 /* Virtual control interface mapping */
131 void __iomem *vctrl_base; 147 void __iomem *vctrl_base;
132 148
@@ -140,11 +156,25 @@ struct vgic_dist {
140 /* Interrupt enabled (one bit per IRQ) */ 156 /* Interrupt enabled (one bit per IRQ) */
141 struct vgic_bitmap irq_enabled; 157 struct vgic_bitmap irq_enabled;
142 158
143 /* Interrupt 'pin' level */ 159 /* Level-triggered interrupt external input is asserted */
144 struct vgic_bitmap irq_state; 160 struct vgic_bitmap irq_level;
145 161
146 /* Level-triggered interrupt in progress */ 162 /*
147 struct vgic_bitmap irq_active; 163 * Interrupt state is pending on the distributor
164 */
165 struct vgic_bitmap irq_pending;
166
167 /*
168 * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered
169 * interrupts. Essentially holds the state of the flip-flop in
170 * Figure 4-10 on page 4-101 in ARM IHI 0048B.b.
171 * Once set, it is only cleared for level-triggered interrupts on
172 * guest ACKs (when we queue it) or writes to GICD_ICPENDRn.
173 */
174 struct vgic_bitmap irq_soft_pend;
175
176 /* Level-triggered interrupt queued on VCPU interface */
177 struct vgic_bitmap irq_queued;
148 178
149 /* Interrupt priority. Not used yet. */ 179 /* Interrupt priority. Not used yet. */
150 struct vgic_bytemap irq_priority; 180 struct vgic_bytemap irq_priority;
@@ -152,15 +182,36 @@ struct vgic_dist {
152 /* Level/edge triggered */ 182 /* Level/edge triggered */
153 struct vgic_bitmap irq_cfg; 183 struct vgic_bitmap irq_cfg;
154 184
155 /* Source CPU per SGI and target CPU */ 185 /*
156 u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; 186 * Source CPU per SGI and target CPU:
157 187 *
158 /* Target CPU for each IRQ */ 188 * Each byte represent a SGI observable on a VCPU, each bit of
159 u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; 189 * this byte indicating if the corresponding VCPU has
160 struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; 190 * generated this interrupt. This is a GICv2 feature only.
191 *
192 * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are
193 * the SGIs observable on VCPUn.
194 */
195 u8 *irq_sgi_sources;
196
197 /*
198 * Target CPU for each SPI:
199 *
200 * Array of available SPI, each byte indicating the target
201 * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32].
202 */
203 u8 *irq_spi_cpu;
204
205 /*
206 * Reverse lookup of irq_spi_cpu for faster compute pending:
207 *
208 * Array of bitmaps, one per VCPU, describing if IRQn is
209 * routed to a particular VCPU.
210 */
211 struct vgic_bitmap *irq_spi_target;
161 212
162 /* Bitmap indicating which CPU has something pending */ 213 /* Bitmap indicating which CPU has something pending */
163 unsigned long irq_pending_on_cpu; 214 unsigned long *irq_pending_on_cpu;
164#endif 215#endif
165}; 216};
166 217
@@ -190,11 +241,11 @@ struct vgic_v3_cpu_if {
190struct vgic_cpu { 241struct vgic_cpu {
191#ifdef CONFIG_KVM_ARM_VGIC 242#ifdef CONFIG_KVM_ARM_VGIC
192 /* per IRQ to LR mapping */ 243 /* per IRQ to LR mapping */
193 u8 vgic_irq_lr_map[VGIC_NR_IRQS]; 244 u8 *vgic_irq_lr_map;
194 245
195 /* Pending interrupts on this VCPU */ 246 /* Pending interrupts on this VCPU */
196 DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); 247 DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
197 DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); 248 unsigned long *pending_shared;
198 249
199 /* Bitmap of used/free list registers */ 250 /* Bitmap of used/free list registers */
200 DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); 251 DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
@@ -225,7 +276,8 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
225int kvm_vgic_hyp_init(void); 276int kvm_vgic_hyp_init(void);
226int kvm_vgic_init(struct kvm *kvm); 277int kvm_vgic_init(struct kvm *kvm);
227int kvm_vgic_create(struct kvm *kvm); 278int kvm_vgic_create(struct kvm *kvm);
228int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); 279void kvm_vgic_destroy(struct kvm *kvm);
280void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
229void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); 281void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
230void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); 282void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
231int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 283int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a4c33b34fe3f..28be31f49250 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -136,12 +136,11 @@ static inline bool is_error_page(struct page *page)
136#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 136#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
137#define KVM_REQ_ENABLE_IBS 23 137#define KVM_REQ_ENABLE_IBS 23
138#define KVM_REQ_DISABLE_IBS 24 138#define KVM_REQ_DISABLE_IBS 24
139#define KVM_REQ_APIC_PAGE_RELOAD 25
139 140
140#define KVM_USERSPACE_IRQ_SOURCE_ID 0 141#define KVM_USERSPACE_IRQ_SOURCE_ID 0
141#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 142#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
142 143
143struct kvm;
144struct kvm_vcpu;
145extern struct kmem_cache *kvm_vcpu_cache; 144extern struct kmem_cache *kvm_vcpu_cache;
146 145
147extern spinlock_t kvm_lock; 146extern spinlock_t kvm_lock;
@@ -200,6 +199,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
200int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); 199int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
201#endif 200#endif
202 201
202/*
203 * Carry out a gup that requires IO. Allow the mm to relinquish the mmap
204 * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL
205 * controls whether we retry the gup one more time to completion in that case.
206 * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp
207 * handler.
208 */
209int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
210 unsigned long addr, bool write_fault,
211 struct page **pagep);
212
203enum { 213enum {
204 OUTSIDE_GUEST_MODE, 214 OUTSIDE_GUEST_MODE,
205 IN_GUEST_MODE, 215 IN_GUEST_MODE,
@@ -325,8 +335,6 @@ struct kvm_kernel_irq_routing_entry {
325 struct hlist_node link; 335 struct hlist_node link;
326}; 336};
327 337
328struct kvm_irq_routing_table;
329
330#ifndef KVM_PRIVATE_MEM_SLOTS 338#ifndef KVM_PRIVATE_MEM_SLOTS
331#define KVM_PRIVATE_MEM_SLOTS 0 339#define KVM_PRIVATE_MEM_SLOTS 0
332#endif 340#endif
@@ -528,6 +536,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
528unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); 536unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
529unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); 537unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
530unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); 538unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
539unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
540 bool *writable);
531void kvm_release_page_clean(struct page *page); 541void kvm_release_page_clean(struct page *page);
532void kvm_release_page_dirty(struct page *page); 542void kvm_release_page_dirty(struct page *page);
533void kvm_set_page_accessed(struct page *page); 543void kvm_set_page_accessed(struct page *page);
@@ -579,6 +589,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm);
579void kvm_reload_remote_mmus(struct kvm *kvm); 589void kvm_reload_remote_mmus(struct kvm *kvm);
580void kvm_make_mclock_inprogress_request(struct kvm *kvm); 590void kvm_make_mclock_inprogress_request(struct kvm *kvm);
581void kvm_make_scan_ioapic_request(struct kvm *kvm); 591void kvm_make_scan_ioapic_request(struct kvm *kvm);
592bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
582 593
583long kvm_arch_dev_ioctl(struct file *filp, 594long kvm_arch_dev_ioctl(struct file *filp,
584 unsigned int ioctl, unsigned long arg); 595 unsigned int ioctl, unsigned long arg);
@@ -624,6 +635,8 @@ void kvm_arch_exit(void);
624int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); 635int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
625void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); 636void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
626 637
638void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
639
627void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); 640void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
628void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); 641void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
629void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); 642void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
@@ -632,8 +645,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
632int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); 645int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
633void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); 646void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
634 647
635int kvm_arch_hardware_enable(void *garbage); 648int kvm_arch_hardware_enable(void);
636void kvm_arch_hardware_disable(void *garbage); 649void kvm_arch_hardware_disable(void);
637int kvm_arch_hardware_setup(void); 650int kvm_arch_hardware_setup(void);
638void kvm_arch_hardware_unsetup(void); 651void kvm_arch_hardware_unsetup(void);
639void kvm_arch_check_processor_compat(void *rtn); 652void kvm_arch_check_processor_compat(void *rtn);
@@ -1034,8 +1047,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
1034 1047
1035extern bool kvm_rebooting; 1048extern bool kvm_rebooting;
1036 1049
1037struct kvm_device_ops;
1038
1039struct kvm_device { 1050struct kvm_device {
1040 struct kvm_device_ops *ops; 1051 struct kvm_device_ops *ops;
1041 struct kvm *kvm; 1052 struct kvm *kvm;
@@ -1068,12 +1079,10 @@ struct kvm_device_ops {
1068void kvm_device_get(struct kvm_device *dev); 1079void kvm_device_get(struct kvm_device *dev);
1069void kvm_device_put(struct kvm_device *dev); 1080void kvm_device_put(struct kvm_device *dev);
1070struct kvm_device *kvm_device_from_filp(struct file *filp); 1081struct kvm_device *kvm_device_from_filp(struct file *filp);
1082int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
1071 1083
1072extern struct kvm_device_ops kvm_mpic_ops; 1084extern struct kvm_device_ops kvm_mpic_ops;
1073extern struct kvm_device_ops kvm_xics_ops; 1085extern struct kvm_device_ops kvm_xics_ops;
1074extern struct kvm_device_ops kvm_vfio_ops;
1075extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
1076extern struct kvm_device_ops kvm_flic_ops;
1077 1086
1078#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1087#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
1079 1088
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b0bcce0ddc95..b606bb689a3e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -17,6 +17,20 @@
17#ifndef __KVM_TYPES_H__ 17#ifndef __KVM_TYPES_H__
18#define __KVM_TYPES_H__ 18#define __KVM_TYPES_H__
19 19
20struct kvm;
21struct kvm_async_pf;
22struct kvm_device_ops;
23struct kvm_interrupt;
24struct kvm_irq_routing_table;
25struct kvm_memory_slot;
26struct kvm_one_reg;
27struct kvm_run;
28struct kvm_userspace_memory_region;
29struct kvm_vcpu;
30struct kvm_vcpu_init;
31
32enum kvm_mr_change;
33
20#include <asm/types.h> 34#include <asm/types.h>
21 35
22/* 36/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8981cc882ed2..0f4196a0bc20 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
1985#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ 1985#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
1986#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ 1986#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
1987#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ 1987#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
1988#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
1988 1989
1989typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, 1990typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
1990 void *data); 1991 void *data);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 27288692241e..88787bb4b3b9 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -57,10 +57,13 @@ struct mmu_notifier_ops {
57 * pte. This way the VM will provide proper aging to the 57 * pte. This way the VM will provide proper aging to the
58 * accesses to the page through the secondary MMUs and not 58 * accesses to the page through the secondary MMUs and not
59 * only to the ones through the Linux pte. 59 * only to the ones through the Linux pte.
60 * Start-end is necessary in case the secondary MMU is mapping the page
61 * at a smaller granularity than the primary MMU.
60 */ 62 */
61 int (*clear_flush_young)(struct mmu_notifier *mn, 63 int (*clear_flush_young)(struct mmu_notifier *mn,
62 struct mm_struct *mm, 64 struct mm_struct *mm,
63 unsigned long address); 65 unsigned long start,
66 unsigned long end);
64 67
65 /* 68 /*
66 * test_young is called to check the young/accessed bitflag in 69 * test_young is called to check the young/accessed bitflag in
@@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
175extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); 178extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
176extern void __mmu_notifier_release(struct mm_struct *mm); 179extern void __mmu_notifier_release(struct mm_struct *mm);
177extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 180extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
178 unsigned long address); 181 unsigned long start,
182 unsigned long end);
179extern int __mmu_notifier_test_young(struct mm_struct *mm, 183extern int __mmu_notifier_test_young(struct mm_struct *mm,
180 unsigned long address); 184 unsigned long address);
181extern void __mmu_notifier_change_pte(struct mm_struct *mm, 185extern void __mmu_notifier_change_pte(struct mm_struct *mm,
@@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
194} 198}
195 199
196static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 200static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
197 unsigned long address) 201 unsigned long start,
202 unsigned long end)
198{ 203{
199 if (mm_has_notifiers(mm)) 204 if (mm_has_notifiers(mm))
200 return __mmu_notifier_clear_flush_young(mm, address); 205 return __mmu_notifier_clear_flush_young(mm, start, end);
201 return 0; 206 return 0;
202} 207}
203 208
@@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
255 unsigned long ___address = __address; \ 260 unsigned long ___address = __address; \
256 __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ 261 __young = ptep_clear_flush_young(___vma, ___address, __ptep); \
257 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 262 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
258 ___address); \ 263 ___address, \
264 ___address + \
265 PAGE_SIZE); \
259 __young; \ 266 __young; \
260}) 267})
261 268
@@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
266 unsigned long ___address = __address; \ 273 unsigned long ___address = __address; \
267 __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ 274 __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \
268 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 275 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
269 ___address); \ 276 ___address, \
277 ___address + \
278 PMD_SIZE); \
270 __young; \ 279 __young; \
271}) 280})
272 281
@@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
301} 310}
302 311
303static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 312static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
304 unsigned long address) 313 unsigned long start,
314 unsigned long end)
305{ 315{
306 return 0; 316 return 0;
307} 317}
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 908925ace776..6edf1f2028cd 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq,
95 __entry->coalesced ? " (coalesced)" : "") 95 __entry->coalesced ? " (coalesced)" : "")
96); 96);
97 97
98TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
99 TP_PROTO(__u64 e),
100 TP_ARGS(e),
101
102 TP_STRUCT__entry(
103 __field( __u64, e )
104 ),
105
106 TP_fast_assign(
107 __entry->e = e;
108 ),
109
110 TP_printk("dst %x vec=%u (%s|%s|%s%s)",
111 (u8)(__entry->e >> 56), (u8)__entry->e,
112 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
113 (__entry->e & (1<<11)) ? "logical" : "physical",
114 (__entry->e & (1<<15)) ? "level" : "edge",
115 (__entry->e & (1<<16)) ? "|masked" : "")
116);
117
98TRACE_EVENT(kvm_msi_set_irq, 118TRACE_EVENT(kvm_msi_set_irq,
99 TP_PROTO(__u64 address, __u64 data), 119 TP_PROTO(__u64 address, __u64 data),
100 TP_ARGS(address, data), 120 TP_ARGS(address, data),
@@ -205,24 +225,26 @@ TRACE_EVENT(kvm_fpu,
205); 225);
206 226
207TRACE_EVENT(kvm_age_page, 227TRACE_EVENT(kvm_age_page,
208 TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref), 228 TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref),
209 TP_ARGS(hva, slot, ref), 229 TP_ARGS(gfn, level, slot, ref),
210 230
211 TP_STRUCT__entry( 231 TP_STRUCT__entry(
212 __field( u64, hva ) 232 __field( u64, hva )
213 __field( u64, gfn ) 233 __field( u64, gfn )
234 __field( u8, level )
214 __field( u8, referenced ) 235 __field( u8, referenced )
215 ), 236 ),
216 237
217 TP_fast_assign( 238 TP_fast_assign(
218 __entry->hva = hva; 239 __entry->gfn = gfn;
219 __entry->gfn = 240 __entry->level = level;
220 slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT); 241 __entry->hva = ((gfn - slot->base_gfn) <<
242 PAGE_SHIFT) + slot->userspace_addr;
221 __entry->referenced = ref; 243 __entry->referenced = ref;
222 ), 244 ),
223 245
224 TP_printk("hva %llx gfn %llx %s", 246 TP_printk("hva %llx gfn %llx level %u %s",
225 __entry->hva, __entry->gfn, 247 __entry->hva, __entry->gfn, __entry->level,
226 __entry->referenced ? "YOUNG" : "OLD") 248 __entry->referenced ? "YOUNG" : "OLD")
227); 249);
228 250
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cf3a2ff440e4..60768822b140 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -654,9 +654,7 @@ struct kvm_ppc_smmu_info {
654#endif 654#endif
655/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ 655/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
656#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 656#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
657#ifdef __KVM_HAVE_USER_NMI
658#define KVM_CAP_USER_NMI 22 657#define KVM_CAP_USER_NMI 22
659#endif
660#ifdef __KVM_HAVE_GUEST_DEBUG 658#ifdef __KVM_HAVE_GUEST_DEBUG
661#define KVM_CAP_SET_GUEST_DEBUG 23 659#define KVM_CAP_SET_GUEST_DEBUG 23
662#endif 660#endif
@@ -738,9 +736,7 @@ struct kvm_ppc_smmu_info {
738#define KVM_CAP_PPC_GET_SMMU_INFO 78 736#define KVM_CAP_PPC_GET_SMMU_INFO 78
739#define KVM_CAP_S390_COW 79 737#define KVM_CAP_S390_COW 79
740#define KVM_CAP_PPC_ALLOC_HTAB 80 738#define KVM_CAP_PPC_ALLOC_HTAB 80
741#ifdef __KVM_HAVE_READONLY_MEM
742#define KVM_CAP_READONLY_MEM 81 739#define KVM_CAP_READONLY_MEM 81
743#endif
744#define KVM_CAP_IRQFD_RESAMPLE 82 740#define KVM_CAP_IRQFD_RESAMPLE 82
745#define KVM_CAP_PPC_BOOKE_WATCHDOG 83 741#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
746#define KVM_CAP_PPC_HTAB_FD 84 742#define KVM_CAP_PPC_HTAB_FD 84
@@ -947,15 +943,25 @@ struct kvm_device_attr {
947 __u64 addr; /* userspace address of attr data */ 943 __u64 addr; /* userspace address of attr data */
948}; 944};
949 945
950#define KVM_DEV_TYPE_FSL_MPIC_20 1
951#define KVM_DEV_TYPE_FSL_MPIC_42 2
952#define KVM_DEV_TYPE_XICS 3
953#define KVM_DEV_TYPE_VFIO 4
954#define KVM_DEV_VFIO_GROUP 1 946#define KVM_DEV_VFIO_GROUP 1
955#define KVM_DEV_VFIO_GROUP_ADD 1 947#define KVM_DEV_VFIO_GROUP_ADD 1
956#define KVM_DEV_VFIO_GROUP_DEL 2 948#define KVM_DEV_VFIO_GROUP_DEL 2
957#define KVM_DEV_TYPE_ARM_VGIC_V2 5 949
958#define KVM_DEV_TYPE_FLIC 6 950enum kvm_device_type {
951 KVM_DEV_TYPE_FSL_MPIC_20 = 1,
952#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20
953 KVM_DEV_TYPE_FSL_MPIC_42,
954#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42
955 KVM_DEV_TYPE_XICS,
956#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS
957 KVM_DEV_TYPE_VFIO,
958#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO
959 KVM_DEV_TYPE_ARM_VGIC_V2,
960#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2
961 KVM_DEV_TYPE_FLIC,
962#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC
963 KVM_DEV_TYPE_MAX,
964};
959 965
960/* 966/*
961 * ioctls for VM fds 967 * ioctls for VM fds
@@ -1093,7 +1099,7 @@ struct kvm_s390_ucas_mapping {
1093#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) 1099#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
1094#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) 1100#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
1095#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) 1101#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
1096/* Available with KVM_CAP_NMI */ 1102/* Available with KVM_CAP_USER_NMI */
1097#define KVM_NMI _IO(KVMIO, 0x9a) 1103#define KVM_NMI _IO(KVMIO, 0x9a)
1098/* Available with KVM_CAP_SET_GUEST_DEBUG */ 1104/* Available with KVM_CAP_SET_GUEST_DEBUG */
1099#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) 1105#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
diff --git a/mm/gup.c b/mm/gup.c
index 91d044b1600d..af7ea3e0826b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -281,6 +281,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
281 fault_flags |= FAULT_FLAG_ALLOW_RETRY; 281 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
282 if (*flags & FOLL_NOWAIT) 282 if (*flags & FOLL_NOWAIT)
283 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; 283 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
284 if (*flags & FOLL_TRIED) {
285 VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
286 fault_flags |= FAULT_FLAG_TRIED;
287 }
284 288
285 ret = handle_mm_fault(mm, vma, address, fault_flags); 289 ret = handle_mm_fault(mm, vma, address, fault_flags);
286 if (ret & VM_FAULT_ERROR) { 290 if (ret & VM_FAULT_ERROR) {
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 950813b1eb36..2c8da9825fe3 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm)
107 * existed or not. 107 * existed or not.
108 */ 108 */
109int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 109int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
110 unsigned long address) 110 unsigned long start,
111 unsigned long end)
111{ 112{
112 struct mmu_notifier *mn; 113 struct mmu_notifier *mn;
113 int young = 0, id; 114 int young = 0, id;
@@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
115 id = srcu_read_lock(&srcu); 116 id = srcu_read_lock(&srcu);
116 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 117 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
117 if (mn->ops->clear_flush_young) 118 if (mn->ops->clear_flush_young)
118 young |= mn->ops->clear_flush_young(mn, mm, address); 119 young |= mn->ops->clear_flush_young(mn, mm, start, end);
119 } 120 }
120 srcu_read_unlock(&srcu, id); 121 srcu_read_unlock(&srcu, id);
121 122
diff --git a/mm/rmap.c b/mm/rmap.c
index 3e8491c504f8..bc74e0012809 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1355 continue; /* don't unmap */ 1355 continue; /* don't unmap */
1356 } 1356 }
1357 1357
1358 if (ptep_clear_flush_young_notify(vma, address, pte)) 1358 /*
1359 * No need for _notify because we're within an
1360 * mmu_notifier_invalidate_range_ {start|end} scope.
1361 */
1362 if (ptep_clear_flush_young(vma, address, pte))
1359 continue; 1363 continue;
1360 1364
1361 /* Nuke the page table entry. */ 1365 /* Nuke the page table entry. */
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba793b17f..862967852d5a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -36,21 +36,22 @@
36 * How the whole thing works (courtesy of Christoffer Dall): 36 * How the whole thing works (courtesy of Christoffer Dall):
37 * 37 *
38 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if 38 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
39 * something is pending 39 * something is pending on the CPU interface.
40 * - VGIC pending interrupts are stored on the vgic.irq_state vgic 40 * - Interrupts that are pending on the distributor are stored on the
41 * bitmap (this bitmap is updated by both user land ioctls and guest 41 * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
42 * mmio ops, and other in-kernel peripherals such as the 42 * ioctls and guest mmio ops, and other in-kernel peripherals such as the
43 * arch. timers) and indicate the 'wire' state. 43 * arch. timers).
44 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is 44 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
45 * recalculated 45 * recalculated
46 * - To calculate the oracle, we need info for each cpu from 46 * - To calculate the oracle, we need info for each cpu from
47 * compute_pending_for_cpu, which considers: 47 * compute_pending_for_cpu, which considers:
48 * - PPI: dist->irq_state & dist->irq_enable 48 * - PPI: dist->irq_pending & dist->irq_enable
49 * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target 49 * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
50 * - irq_spi_target is a 'formatted' version of the GICD_ICFGR 50 * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
51 * registers, stored on each vcpu. We only keep one bit of 51 * registers, stored on each vcpu. We only keep one bit of
52 * information per interrupt, making sure that only one vcpu can 52 * information per interrupt, making sure that only one vcpu can
53 * accept the interrupt. 53 * accept the interrupt.
54 * - If any of the above state changes, we must recalculate the oracle.
54 * - The same is true when injecting an interrupt, except that we only 55 * - The same is true when injecting an interrupt, except that we only
55 * consider a single interrupt at a time. The irq_spi_cpu array 56 * consider a single interrupt at a time. The irq_spi_cpu array
56 * contains the target CPU for each SPI. 57 * contains the target CPU for each SPI.
@@ -60,13 +61,18 @@
60 * the 'line' again. This is achieved as such: 61 * the 'line' again. This is achieved as such:
61 * 62 *
62 * - When a level interrupt is moved onto a vcpu, the corresponding 63 * - When a level interrupt is moved onto a vcpu, the corresponding
63 * bit in irq_active is set. As long as this bit is set, the line 64 * bit in irq_queued is set. As long as this bit is set, the line
64 * will be ignored for further interrupts. The interrupt is injected 65 * will be ignored for further interrupts. The interrupt is injected
65 * into the vcpu with the GICH_LR_EOI bit set (generate a 66 * into the vcpu with the GICH_LR_EOI bit set (generate a
66 * maintenance interrupt on EOI). 67 * maintenance interrupt on EOI).
67 * - When the interrupt is EOIed, the maintenance interrupt fires, 68 * - When the interrupt is EOIed, the maintenance interrupt fires,
68 * and clears the corresponding bit in irq_active. This allow the 69 * and clears the corresponding bit in irq_queued. This allows the
69 * interrupt line to be sampled again. 70 * interrupt line to be sampled again.
71 * - Note that level-triggered interrupts can also be set to pending from
72 * writes to GICD_ISPENDRn and lowering the external input line does not
73 * cause the interrupt to become inactive in such a situation.
74 * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
75 * inactive as long as the external input line is held high.
70 */ 76 */
71 77
72#define VGIC_ADDR_UNDEF (-1) 78#define VGIC_ADDR_UNDEF (-1)
@@ -89,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
89static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); 95static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
90static void vgic_update_state(struct kvm *kvm); 96static void vgic_update_state(struct kvm *kvm);
91static void vgic_kick_vcpus(struct kvm *kvm); 97static void vgic_kick_vcpus(struct kvm *kvm);
98static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
92static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); 99static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
93static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); 100static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
94static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); 101static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
@@ -99,10 +106,8 @@ static const struct vgic_ops *vgic_ops;
99static const struct vgic_params *vgic; 106static const struct vgic_params *vgic;
100 107
101/* 108/*
102 * struct vgic_bitmap contains unions that provide two views of 109 * struct vgic_bitmap contains a bitmap made of unsigned longs, but
103 * the same data. In one case it is an array of registers of 110 * extracts u32s out of them.
104 * u32's, and in the other case it is a bitmap of unsigned
105 * longs.
106 * 111 *
107 * This does not work on 64-bit BE systems, because the bitmap access 112 * This does not work on 64-bit BE systems, because the bitmap access
108 * will store two consecutive 32-bit words with the higher-addressed 113 * will store two consecutive 32-bit words with the higher-addressed
@@ -118,23 +123,45 @@ static const struct vgic_params *vgic;
118#define REG_OFFSET_SWIZZLE 0 123#define REG_OFFSET_SWIZZLE 0
119#endif 124#endif
120 125
126static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
127{
128 int nr_longs;
129
130 nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
131
132 b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
133 if (!b->private)
134 return -ENOMEM;
135
136 b->shared = b->private + nr_cpus;
137
138 return 0;
139}
140
141static void vgic_free_bitmap(struct vgic_bitmap *b)
142{
143 kfree(b->private);
144 b->private = NULL;
145 b->shared = NULL;
146}
147
121static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, 148static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
122 int cpuid, u32 offset) 149 int cpuid, u32 offset)
123{ 150{
124 offset >>= 2; 151 offset >>= 2;
125 if (!offset) 152 if (!offset)
126 return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); 153 return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
127 else 154 else
128 return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); 155 return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
129} 156}
130 157
131static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, 158static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
132 int cpuid, int irq) 159 int cpuid, int irq)
133{ 160{
134 if (irq < VGIC_NR_PRIVATE_IRQS) 161 if (irq < VGIC_NR_PRIVATE_IRQS)
135 return test_bit(irq, x->percpu[cpuid].reg_ul); 162 return test_bit(irq, x->private + cpuid);
136 163
137 return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); 164 return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
138} 165}
139 166
140static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, 167static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
@@ -143,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
143 unsigned long *reg; 170 unsigned long *reg;
144 171
145 if (irq < VGIC_NR_PRIVATE_IRQS) { 172 if (irq < VGIC_NR_PRIVATE_IRQS) {
146 reg = x->percpu[cpuid].reg_ul; 173 reg = x->private + cpuid;
147 } else { 174 } else {
148 reg = x->shared.reg_ul; 175 reg = x->shared;
149 irq -= VGIC_NR_PRIVATE_IRQS; 176 irq -= VGIC_NR_PRIVATE_IRQS;
150 } 177 }
151 178
@@ -157,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
157 184
158static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) 185static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
159{ 186{
160 if (unlikely(cpuid >= VGIC_MAX_CPUS)) 187 return x->private + cpuid;
161 return NULL;
162 return x->percpu[cpuid].reg_ul;
163} 188}
164 189
165static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) 190static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
166{ 191{
167 return x->shared.reg_ul; 192 return x->shared;
193}
194
195static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
196{
197 int size;
198
199 size = nr_cpus * VGIC_NR_PRIVATE_IRQS;
200 size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
201
202 x->private = kzalloc(size, GFP_KERNEL);
203 if (!x->private)
204 return -ENOMEM;
205
206 x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
207 return 0;
208}
209
210static void vgic_free_bytemap(struct vgic_bytemap *b)
211{
212 kfree(b->private);
213 b->private = NULL;
214 b->shared = NULL;
168} 215}
169 216
170static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) 217static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
171{ 218{
172 offset >>= 2; 219 u32 *reg;
173 BUG_ON(offset > (VGIC_NR_IRQS / 4)); 220
174 if (offset < 8) 221 if (offset < VGIC_NR_PRIVATE_IRQS) {
175 return x->percpu[cpuid] + offset; 222 reg = x->private;
176 else 223 offset += cpuid * VGIC_NR_PRIVATE_IRQS;
177 return x->shared + offset - 8; 224 } else {
225 reg = x->shared;
226 offset -= VGIC_NR_PRIVATE_IRQS;
227 }
228
229 return reg + (offset / sizeof(u32));
178} 230}
179 231
180#define VGIC_CFG_LEVEL 0 232#define VGIC_CFG_LEVEL 0
@@ -196,46 +248,81 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
196 return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); 248 return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
197} 249}
198 250
199static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) 251static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
252{
253 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
254
255 return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
256}
257
258static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
259{
260 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
261
262 vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
263}
264
265static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
266{
267 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
268
269 vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
270}
271
272static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
200{ 273{
201 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 274 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
202 275
203 return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); 276 return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
204} 277}
205 278
206static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) 279static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
207{ 280{
208 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 281 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
209 282
210 vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); 283 vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
211} 284}
212 285
213static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) 286static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
214{ 287{
215 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 288 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
216 289
217 vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); 290 vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
291}
292
293static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
294{
295 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
296
297 return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
298}
299
300static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
301{
302 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
303
304 vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
218} 305}
219 306
220static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) 307static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
221{ 308{
222 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 309 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
223 310
224 return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); 311 return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
225} 312}
226 313
227static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) 314static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
228{ 315{
229 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 316 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
230 317
231 vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); 318 vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
232} 319}
233 320
234static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) 321static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
235{ 322{
236 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 323 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
237 324
238 vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); 325 vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
239} 326}
240 327
241static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) 328static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
@@ -256,6 +343,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
256 vcpu->arch.vgic_cpu.pending_shared); 343 vcpu->arch.vgic_cpu.pending_shared);
257} 344}
258 345
346static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
347{
348 return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
349}
350
259static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) 351static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
260{ 352{
261 return le32_to_cpu(*((u32 *)mmio->data)) & mask; 353 return le32_to_cpu(*((u32 *)mmio->data)) & mask;
@@ -347,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
347 439
348 case 4: /* GICD_TYPER */ 440 case 4: /* GICD_TYPER */
349 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; 441 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
350 reg |= (VGIC_NR_IRQS >> 5) - 1; 442 reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
351 vgic_reg_access(mmio, &reg, word_offset, 443 vgic_reg_access(mmio, &reg, word_offset,
352 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); 444 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
353 break; 445 break;
@@ -409,11 +501,33 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
409 struct kvm_exit_mmio *mmio, 501 struct kvm_exit_mmio *mmio,
410 phys_addr_t offset) 502 phys_addr_t offset)
411{ 503{
412 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, 504 u32 *reg, orig;
413 vcpu->vcpu_id, offset); 505 u32 level_mask;
506 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
507
508 reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
509 level_mask = (~(*reg));
510
511 /* Mark both level and edge triggered irqs as pending */
512 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
513 orig = *reg;
414 vgic_reg_access(mmio, reg, offset, 514 vgic_reg_access(mmio, reg, offset,
415 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); 515 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
516
416 if (mmio->is_write) { 517 if (mmio->is_write) {
518 /* Set the soft-pending flag only for level-triggered irqs */
519 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
520 vcpu->vcpu_id, offset);
521 vgic_reg_access(mmio, reg, offset,
522 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
523 *reg &= level_mask;
524
525 /* Ignore writes to SGIs */
526 if (offset < 2) {
527 *reg &= ~0xffff;
528 *reg |= orig & 0xffff;
529 }
530
417 vgic_update_state(vcpu->kvm); 531 vgic_update_state(vcpu->kvm);
418 return true; 532 return true;
419 } 533 }
@@ -425,11 +539,34 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
425 struct kvm_exit_mmio *mmio, 539 struct kvm_exit_mmio *mmio,
426 phys_addr_t offset) 540 phys_addr_t offset)
427{ 541{
428 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, 542 u32 *level_active;
429 vcpu->vcpu_id, offset); 543 u32 *reg, orig;
544 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
545
546 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
547 orig = *reg;
430 vgic_reg_access(mmio, reg, offset, 548 vgic_reg_access(mmio, reg, offset,
431 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); 549 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
432 if (mmio->is_write) { 550 if (mmio->is_write) {
551 /* Re-set level triggered level-active interrupts */
552 level_active = vgic_bitmap_get_reg(&dist->irq_level,
553 vcpu->vcpu_id, offset);
554 reg = vgic_bitmap_get_reg(&dist->irq_pending,
555 vcpu->vcpu_id, offset);
556 *reg |= *level_active;
557
558 /* Ignore writes to SGIs */
559 if (offset < 2) {
560 *reg &= ~0xffff;
561 *reg |= orig & 0xffff;
562 }
563
564 /* Clear soft-pending flags */
565 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
566 vcpu->vcpu_id, offset);
567 vgic_reg_access(mmio, reg, offset,
568 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
569
433 vgic_update_state(vcpu->kvm); 570 vgic_update_state(vcpu->kvm);
434 return true; 571 return true;
435 } 572 }
@@ -651,9 +788,9 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
651 * is fine, then we are only setting a few bits that were 788 * is fine, then we are only setting a few bits that were
652 * already set. 789 * already set.
653 */ 790 */
654 vgic_dist_irq_set(vcpu, lr.irq); 791 vgic_dist_irq_set_pending(vcpu, lr.irq);
655 if (lr.irq < VGIC_NR_SGIS) 792 if (lr.irq < VGIC_NR_SGIS)
656 dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; 793 *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
657 lr.state &= ~LR_STATE_PENDING; 794 lr.state &= ~LR_STATE_PENDING;
658 vgic_set_lr(vcpu, i, lr); 795 vgic_set_lr(vcpu, i, lr);
659 796
@@ -662,8 +799,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
662 * active), then the LR does not hold any useful info and can 799 * active), then the LR does not hold any useful info and can
663 * be marked as free for other use. 800 * be marked as free for other use.
664 */ 801 */
665 if (!(lr.state & LR_STATE_MASK)) 802 if (!(lr.state & LR_STATE_MASK)) {
666 vgic_retire_lr(i, lr.irq, vcpu); 803 vgic_retire_lr(i, lr.irq, vcpu);
804 vgic_irq_clear_queued(vcpu, lr.irq);
805 }
667 806
668 /* Finally update the VGIC state. */ 807 /* Finally update the VGIC state. */
669 vgic_update_state(vcpu->kvm); 808 vgic_update_state(vcpu->kvm);
@@ -677,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
677{ 816{
678 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 817 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
679 int sgi; 818 int sgi;
680 int min_sgi = (offset & ~0x3) * 4; 819 int min_sgi = (offset & ~0x3);
681 int max_sgi = min_sgi + 3; 820 int max_sgi = min_sgi + 3;
682 int vcpu_id = vcpu->vcpu_id; 821 int vcpu_id = vcpu->vcpu_id;
683 u32 reg = 0; 822 u32 reg = 0;
@@ -685,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
685 /* Copy source SGIs from distributor side */ 824 /* Copy source SGIs from distributor side */
686 for (sgi = min_sgi; sgi <= max_sgi; sgi++) { 825 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
687 int shift = 8 * (sgi - min_sgi); 826 int shift = 8 * (sgi - min_sgi);
688 reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; 827 reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
689 } 828 }
690 829
691 mmio_data_write(mmio, ~0, reg); 830 mmio_data_write(mmio, ~0, reg);
@@ -698,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
698{ 837{
699 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 838 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
700 int sgi; 839 int sgi;
701 int min_sgi = (offset & ~0x3) * 4; 840 int min_sgi = (offset & ~0x3);
702 int max_sgi = min_sgi + 3; 841 int max_sgi = min_sgi + 3;
703 int vcpu_id = vcpu->vcpu_id; 842 int vcpu_id = vcpu->vcpu_id;
704 u32 reg; 843 u32 reg;
@@ -709,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
709 /* Clear pending SGIs on the distributor */ 848 /* Clear pending SGIs on the distributor */
710 for (sgi = min_sgi; sgi <= max_sgi; sgi++) { 849 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
711 u8 mask = reg >> (8 * (sgi - min_sgi)); 850 u8 mask = reg >> (8 * (sgi - min_sgi));
851 u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
712 if (set) { 852 if (set) {
713 if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) 853 if ((*src & mask) != mask)
714 updated = true; 854 updated = true;
715 dist->irq_sgi_sources[vcpu_id][sgi] |= mask; 855 *src |= mask;
716 } else { 856 } else {
717 if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) 857 if (*src & mask)
718 updated = true; 858 updated = true;
719 dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; 859 *src &= ~mask;
720 } 860 }
721 } 861 }
722 862
@@ -755,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
755struct mmio_range { 895struct mmio_range {
756 phys_addr_t base; 896 phys_addr_t base;
757 unsigned long len; 897 unsigned long len;
898 int bits_per_irq;
758 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, 899 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
759 phys_addr_t offset); 900 phys_addr_t offset);
760}; 901};
@@ -763,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = {
763 { 904 {
764 .base = GIC_DIST_CTRL, 905 .base = GIC_DIST_CTRL,
765 .len = 12, 906 .len = 12,
907 .bits_per_irq = 0,
766 .handle_mmio = handle_mmio_misc, 908 .handle_mmio = handle_mmio_misc,
767 }, 909 },
768 { 910 {
769 .base = GIC_DIST_IGROUP, 911 .base = GIC_DIST_IGROUP,
770 .len = VGIC_NR_IRQS / 8, 912 .len = VGIC_MAX_IRQS / 8,
913 .bits_per_irq = 1,
771 .handle_mmio = handle_mmio_raz_wi, 914 .handle_mmio = handle_mmio_raz_wi,
772 }, 915 },
773 { 916 {
774 .base = GIC_DIST_ENABLE_SET, 917 .base = GIC_DIST_ENABLE_SET,
775 .len = VGIC_NR_IRQS / 8, 918 .len = VGIC_MAX_IRQS / 8,
919 .bits_per_irq = 1,
776 .handle_mmio = handle_mmio_set_enable_reg, 920 .handle_mmio = handle_mmio_set_enable_reg,
777 }, 921 },
778 { 922 {
779 .base = GIC_DIST_ENABLE_CLEAR, 923 .base = GIC_DIST_ENABLE_CLEAR,
780 .len = VGIC_NR_IRQS / 8, 924 .len = VGIC_MAX_IRQS / 8,
925 .bits_per_irq = 1,
781 .handle_mmio = handle_mmio_clear_enable_reg, 926 .handle_mmio = handle_mmio_clear_enable_reg,
782 }, 927 },
783 { 928 {
784 .base = GIC_DIST_PENDING_SET, 929 .base = GIC_DIST_PENDING_SET,
785 .len = VGIC_NR_IRQS / 8, 930 .len = VGIC_MAX_IRQS / 8,
931 .bits_per_irq = 1,
786 .handle_mmio = handle_mmio_set_pending_reg, 932 .handle_mmio = handle_mmio_set_pending_reg,
787 }, 933 },
788 { 934 {
789 .base = GIC_DIST_PENDING_CLEAR, 935 .base = GIC_DIST_PENDING_CLEAR,
790 .len = VGIC_NR_IRQS / 8, 936 .len = VGIC_MAX_IRQS / 8,
937 .bits_per_irq = 1,
791 .handle_mmio = handle_mmio_clear_pending_reg, 938 .handle_mmio = handle_mmio_clear_pending_reg,
792 }, 939 },
793 { 940 {
794 .base = GIC_DIST_ACTIVE_SET, 941 .base = GIC_DIST_ACTIVE_SET,
795 .len = VGIC_NR_IRQS / 8, 942 .len = VGIC_MAX_IRQS / 8,
943 .bits_per_irq = 1,
796 .handle_mmio = handle_mmio_raz_wi, 944 .handle_mmio = handle_mmio_raz_wi,
797 }, 945 },
798 { 946 {
799 .base = GIC_DIST_ACTIVE_CLEAR, 947 .base = GIC_DIST_ACTIVE_CLEAR,
800 .len = VGIC_NR_IRQS / 8, 948 .len = VGIC_MAX_IRQS / 8,
949 .bits_per_irq = 1,
801 .handle_mmio = handle_mmio_raz_wi, 950 .handle_mmio = handle_mmio_raz_wi,
802 }, 951 },
803 { 952 {
804 .base = GIC_DIST_PRI, 953 .base = GIC_DIST_PRI,
805 .len = VGIC_NR_IRQS, 954 .len = VGIC_MAX_IRQS,
955 .bits_per_irq = 8,
806 .handle_mmio = handle_mmio_priority_reg, 956 .handle_mmio = handle_mmio_priority_reg,
807 }, 957 },
808 { 958 {
809 .base = GIC_DIST_TARGET, 959 .base = GIC_DIST_TARGET,
810 .len = VGIC_NR_IRQS, 960 .len = VGIC_MAX_IRQS,
961 .bits_per_irq = 8,
811 .handle_mmio = handle_mmio_target_reg, 962 .handle_mmio = handle_mmio_target_reg,
812 }, 963 },
813 { 964 {
814 .base = GIC_DIST_CONFIG, 965 .base = GIC_DIST_CONFIG,
815 .len = VGIC_NR_IRQS / 4, 966 .len = VGIC_MAX_IRQS / 4,
967 .bits_per_irq = 2,
816 .handle_mmio = handle_mmio_cfg_reg, 968 .handle_mmio = handle_mmio_cfg_reg,
817 }, 969 },
818 { 970 {
@@ -850,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
850 return NULL; 1002 return NULL;
851} 1003}
852 1004
1005static bool vgic_validate_access(const struct vgic_dist *dist,
1006 const struct mmio_range *range,
1007 unsigned long offset)
1008{
1009 int irq;
1010
1011 if (!range->bits_per_irq)
1012 return true; /* Not an irq-based access */
1013
1014 irq = offset * 8 / range->bits_per_irq;
1015 if (irq >= dist->nr_irqs)
1016 return false;
1017
1018 return true;
1019}
1020
853/** 1021/**
854 * vgic_handle_mmio - handle an in-kernel MMIO access 1022 * vgic_handle_mmio - handle an in-kernel MMIO access
855 * @vcpu: pointer to the vcpu performing the access 1023 * @vcpu: pointer to the vcpu performing the access
@@ -889,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
889 1057
890 spin_lock(&vcpu->kvm->arch.vgic.lock); 1058 spin_lock(&vcpu->kvm->arch.vgic.lock);
891 offset = mmio->phys_addr - range->base - base; 1059 offset = mmio->phys_addr - range->base - base;
892 updated_state = range->handle_mmio(vcpu, mmio, offset); 1060 if (vgic_validate_access(dist, range, offset)) {
1061 updated_state = range->handle_mmio(vcpu, mmio, offset);
1062 } else {
1063 vgic_reg_access(mmio, NULL, offset,
1064 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
1065 updated_state = false;
1066 }
893 spin_unlock(&vcpu->kvm->arch.vgic.lock); 1067 spin_unlock(&vcpu->kvm->arch.vgic.lock);
894 kvm_prepare_mmio(run, mmio); 1068 kvm_prepare_mmio(run, mmio);
895 kvm_handle_mmio_return(vcpu, run); 1069 kvm_handle_mmio_return(vcpu, run);
@@ -900,6 +1074,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
900 return true; 1074 return true;
901} 1075}
902 1076
1077static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
1078{
1079 return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
1080}
1081
903static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) 1082static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
904{ 1083{
905 struct kvm *kvm = vcpu->kvm; 1084 struct kvm *kvm = vcpu->kvm;
@@ -932,8 +1111,8 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
932 kvm_for_each_vcpu(c, vcpu, kvm) { 1111 kvm_for_each_vcpu(c, vcpu, kvm) {
933 if (target_cpus & 1) { 1112 if (target_cpus & 1) {
934 /* Flag the SGI as pending */ 1113 /* Flag the SGI as pending */
935 vgic_dist_irq_set(vcpu, sgi); 1114 vgic_dist_irq_set_pending(vcpu, sgi);
936 dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; 1115 *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
937 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); 1116 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
938 } 1117 }
939 1118
@@ -941,32 +1120,38 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
941 } 1120 }
942} 1121}
943 1122
1123static int vgic_nr_shared_irqs(struct vgic_dist *dist)
1124{
1125 return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
1126}
1127
944static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) 1128static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
945{ 1129{
946 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1130 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
947 unsigned long *pending, *enabled, *pend_percpu, *pend_shared; 1131 unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
948 unsigned long pending_private, pending_shared; 1132 unsigned long pending_private, pending_shared;
1133 int nr_shared = vgic_nr_shared_irqs(dist);
949 int vcpu_id; 1134 int vcpu_id;
950 1135
951 vcpu_id = vcpu->vcpu_id; 1136 vcpu_id = vcpu->vcpu_id;
952 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; 1137 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
953 pend_shared = vcpu->arch.vgic_cpu.pending_shared; 1138 pend_shared = vcpu->arch.vgic_cpu.pending_shared;
954 1139
955 pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); 1140 pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
956 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); 1141 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
957 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); 1142 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
958 1143
959 pending = vgic_bitmap_get_shared_map(&dist->irq_state); 1144 pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
960 enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); 1145 enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
961 bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); 1146 bitmap_and(pend_shared, pending, enabled, nr_shared);
962 bitmap_and(pend_shared, pend_shared, 1147 bitmap_and(pend_shared, pend_shared,
963 vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), 1148 vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
964 VGIC_NR_SHARED_IRQS); 1149 nr_shared);
965 1150
966 pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); 1151 pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
967 pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); 1152 pending_shared = find_first_bit(pend_shared, nr_shared);
968 return (pending_private < VGIC_NR_PRIVATE_IRQS || 1153 return (pending_private < VGIC_NR_PRIVATE_IRQS ||
969 pending_shared < VGIC_NR_SHARED_IRQS); 1154 pending_shared < vgic_nr_shared_irqs(dist));
970} 1155}
971 1156
972/* 1157/*
@@ -980,14 +1165,14 @@ static void vgic_update_state(struct kvm *kvm)
980 int c; 1165 int c;
981 1166
982 if (!dist->enabled) { 1167 if (!dist->enabled) {
983 set_bit(0, &dist->irq_pending_on_cpu); 1168 set_bit(0, dist->irq_pending_on_cpu);
984 return; 1169 return;
985 } 1170 }
986 1171
987 kvm_for_each_vcpu(c, vcpu, kvm) { 1172 kvm_for_each_vcpu(c, vcpu, kvm) {
988 if (compute_pending_for_cpu(vcpu)) { 1173 if (compute_pending_for_cpu(vcpu)) {
989 pr_debug("CPU%d has pending interrupts\n", c); 1174 pr_debug("CPU%d has pending interrupts\n", c);
990 set_bit(c, &dist->irq_pending_on_cpu); 1175 set_bit(c, dist->irq_pending_on_cpu);
991 } 1176 }
992 } 1177 }
993} 1178}
@@ -1079,8 +1264,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1079 1264
1080 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { 1265 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
1081 vgic_retire_lr(lr, vlr.irq, vcpu); 1266 vgic_retire_lr(lr, vlr.irq, vcpu);
1082 if (vgic_irq_is_active(vcpu, vlr.irq)) 1267 if (vgic_irq_is_queued(vcpu, vlr.irq))
1083 vgic_irq_clear_active(vcpu, vlr.irq); 1268 vgic_irq_clear_queued(vcpu, vlr.irq);
1084 } 1269 }
1085 } 1270 }
1086} 1271}
@@ -1092,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1092static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) 1277static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1093{ 1278{
1094 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1279 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1280 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1095 struct vgic_lr vlr; 1281 struct vgic_lr vlr;
1096 int lr; 1282 int lr;
1097 1283
1098 /* Sanitize the input... */ 1284 /* Sanitize the input... */
1099 BUG_ON(sgi_source_id & ~7); 1285 BUG_ON(sgi_source_id & ~7);
1100 BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); 1286 BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
1101 BUG_ON(irq >= VGIC_NR_IRQS); 1287 BUG_ON(irq >= dist->nr_irqs);
1102 1288
1103 kvm_debug("Queue IRQ%d\n", irq); 1289 kvm_debug("Queue IRQ%d\n", irq);
1104 1290
@@ -1144,14 +1330,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1144 int vcpu_id = vcpu->vcpu_id; 1330 int vcpu_id = vcpu->vcpu_id;
1145 int c; 1331 int c;
1146 1332
1147 sources = dist->irq_sgi_sources[vcpu_id][irq]; 1333 sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
1148 1334
1149 for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { 1335 for_each_set_bit(c, &sources, dist->nr_cpus) {
1150 if (vgic_queue_irq(vcpu, c, irq)) 1336 if (vgic_queue_irq(vcpu, c, irq))
1151 clear_bit(c, &sources); 1337 clear_bit(c, &sources);
1152 } 1338 }
1153 1339
1154 dist->irq_sgi_sources[vcpu_id][irq] = sources; 1340 *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
1155 1341
1156 /* 1342 /*
1157 * If the sources bitmap has been cleared it means that we 1343 * If the sources bitmap has been cleared it means that we
@@ -1160,7 +1346,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1160 * our emulated gic and can get rid of them. 1346 * our emulated gic and can get rid of them.
1161 */ 1347 */
1162 if (!sources) { 1348 if (!sources) {
1163 vgic_dist_irq_clear(vcpu, irq); 1349 vgic_dist_irq_clear_pending(vcpu, irq);
1164 vgic_cpu_irq_clear(vcpu, irq); 1350 vgic_cpu_irq_clear(vcpu, irq);
1165 return true; 1351 return true;
1166 } 1352 }
@@ -1170,15 +1356,15 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1170 1356
1171static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) 1357static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
1172{ 1358{
1173 if (vgic_irq_is_active(vcpu, irq)) 1359 if (!vgic_can_sample_irq(vcpu, irq))
1174 return true; /* level interrupt, already queued */ 1360 return true; /* level interrupt, already queued */
1175 1361
1176 if (vgic_queue_irq(vcpu, 0, irq)) { 1362 if (vgic_queue_irq(vcpu, 0, irq)) {
1177 if (vgic_irq_is_edge(vcpu, irq)) { 1363 if (vgic_irq_is_edge(vcpu, irq)) {
1178 vgic_dist_irq_clear(vcpu, irq); 1364 vgic_dist_irq_clear_pending(vcpu, irq);
1179 vgic_cpu_irq_clear(vcpu, irq); 1365 vgic_cpu_irq_clear(vcpu, irq);
1180 } else { 1366 } else {
1181 vgic_irq_set_active(vcpu, irq); 1367 vgic_irq_set_queued(vcpu, irq);
1182 } 1368 }
1183 1369
1184 return true; 1370 return true;
@@ -1223,7 +1409,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1223 } 1409 }
1224 1410
1225 /* SPIs */ 1411 /* SPIs */
1226 for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { 1412 for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
1227 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) 1413 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
1228 overflow = 1; 1414 overflow = 1;
1229 } 1415 }
@@ -1239,7 +1425,7 @@ epilog:
1239 * us. Claim we don't have anything pending. We'll 1425 * us. Claim we don't have anything pending. We'll
1240 * adjust that if needed while exiting. 1426 * adjust that if needed while exiting.
1241 */ 1427 */
1242 clear_bit(vcpu_id, &dist->irq_pending_on_cpu); 1428 clear_bit(vcpu_id, dist->irq_pending_on_cpu);
1243 } 1429 }
1244} 1430}
1245 1431
@@ -1261,17 +1447,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1261 1447
1262 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { 1448 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
1263 struct vgic_lr vlr = vgic_get_lr(vcpu, lr); 1449 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1450 WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1264 1451
1265 vgic_irq_clear_active(vcpu, vlr.irq); 1452 vgic_irq_clear_queued(vcpu, vlr.irq);
1266 WARN_ON(vlr.state & LR_STATE_MASK); 1453 WARN_ON(vlr.state & LR_STATE_MASK);
1267 vlr.state = 0; 1454 vlr.state = 0;
1268 vgic_set_lr(vcpu, lr, vlr); 1455 vgic_set_lr(vcpu, lr, vlr);
1269 1456
1457 /*
1458 * If the IRQ was EOIed it was also ACKed and we we
1459 * therefore assume we can clear the soft pending
1460 * state (should it had been set) for this interrupt.
1461 *
1462 * Note: if the IRQ soft pending state was set after
1463 * the IRQ was acked, it actually shouldn't be
1464 * cleared, but we have no way of knowing that unless
1465 * we start trapping ACKs when the soft-pending state
1466 * is set.
1467 */
1468 vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1469
1270 /* Any additional pending interrupt? */ 1470 /* Any additional pending interrupt? */
1271 if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { 1471 if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1272 vgic_cpu_irq_set(vcpu, vlr.irq); 1472 vgic_cpu_irq_set(vcpu, vlr.irq);
1273 level_pending = true; 1473 level_pending = true;
1274 } else { 1474 } else {
1475 vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1275 vgic_cpu_irq_clear(vcpu, vlr.irq); 1476 vgic_cpu_irq_clear(vcpu, vlr.irq);
1276 } 1477 }
1277 1478
@@ -1315,14 +1516,14 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1315 1516
1316 vlr = vgic_get_lr(vcpu, lr); 1517 vlr = vgic_get_lr(vcpu, lr);
1317 1518
1318 BUG_ON(vlr.irq >= VGIC_NR_IRQS); 1519 BUG_ON(vlr.irq >= dist->nr_irqs);
1319 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; 1520 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
1320 } 1521 }
1321 1522
1322 /* Check if we still have something up our sleeve... */ 1523 /* Check if we still have something up our sleeve... */
1323 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); 1524 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
1324 if (level_pending || pending < vgic->nr_lr) 1525 if (level_pending || pending < vgic->nr_lr)
1325 set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); 1526 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1326} 1527}
1327 1528
1328void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) 1529void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
@@ -1356,7 +1557,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1356 if (!irqchip_in_kernel(vcpu->kvm)) 1557 if (!irqchip_in_kernel(vcpu->kvm))
1357 return 0; 1558 return 0;
1358 1559
1359 return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); 1560 return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1360} 1561}
1361 1562
1362static void vgic_kick_vcpus(struct kvm *kvm) 1563static void vgic_kick_vcpus(struct kvm *kvm)
@@ -1376,34 +1577,36 @@ static void vgic_kick_vcpus(struct kvm *kvm)
1376 1577
1377static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) 1578static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1378{ 1579{
1379 int is_edge = vgic_irq_is_edge(vcpu, irq); 1580 int edge_triggered = vgic_irq_is_edge(vcpu, irq);
1380 int state = vgic_dist_irq_is_pending(vcpu, irq);
1381 1581
1382 /* 1582 /*
1383 * Only inject an interrupt if: 1583 * Only inject an interrupt if:
1384 * - edge triggered and we have a rising edge 1584 * - edge triggered and we have a rising edge
1385 * - level triggered and we change level 1585 * - level triggered and we change level
1386 */ 1586 */
1387 if (is_edge) 1587 if (edge_triggered) {
1588 int state = vgic_dist_irq_is_pending(vcpu, irq);
1388 return level > state; 1589 return level > state;
1389 else 1590 } else {
1591 int state = vgic_dist_irq_get_level(vcpu, irq);
1390 return level != state; 1592 return level != state;
1593 }
1391} 1594}
1392 1595
1393static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, 1596static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1394 unsigned int irq_num, bool level) 1597 unsigned int irq_num, bool level)
1395{ 1598{
1396 struct vgic_dist *dist = &kvm->arch.vgic; 1599 struct vgic_dist *dist = &kvm->arch.vgic;
1397 struct kvm_vcpu *vcpu; 1600 struct kvm_vcpu *vcpu;
1398 int is_edge, is_level; 1601 int edge_triggered, level_triggered;
1399 int enabled; 1602 int enabled;
1400 bool ret = true; 1603 bool ret = true;
1401 1604
1402 spin_lock(&dist->lock); 1605 spin_lock(&dist->lock);
1403 1606
1404 vcpu = kvm_get_vcpu(kvm, cpuid); 1607 vcpu = kvm_get_vcpu(kvm, cpuid);
1405 is_edge = vgic_irq_is_edge(vcpu, irq_num); 1608 edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
1406 is_level = !is_edge; 1609 level_triggered = !edge_triggered;
1407 1610
1408 if (!vgic_validate_injection(vcpu, irq_num, level)) { 1611 if (!vgic_validate_injection(vcpu, irq_num, level)) {
1409 ret = false; 1612 ret = false;
@@ -1417,10 +1620,19 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1417 1620
1418 kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); 1621 kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
1419 1622
1420 if (level) 1623 if (level) {
1421 vgic_dist_irq_set(vcpu, irq_num); 1624 if (level_triggered)
1422 else 1625 vgic_dist_irq_set_level(vcpu, irq_num);
1423 vgic_dist_irq_clear(vcpu, irq_num); 1626 vgic_dist_irq_set_pending(vcpu, irq_num);
1627 } else {
1628 if (level_triggered) {
1629 vgic_dist_irq_clear_level(vcpu, irq_num);
1630 if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
1631 vgic_dist_irq_clear_pending(vcpu, irq_num);
1632 } else {
1633 vgic_dist_irq_clear_pending(vcpu, irq_num);
1634 }
1635 }
1424 1636
1425 enabled = vgic_irq_is_enabled(vcpu, irq_num); 1637 enabled = vgic_irq_is_enabled(vcpu, irq_num);
1426 1638
@@ -1429,7 +1641,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1429 goto out; 1641 goto out;
1430 } 1642 }
1431 1643
1432 if (is_level && vgic_irq_is_active(vcpu, irq_num)) { 1644 if (!vgic_can_sample_irq(vcpu, irq_num)) {
1433 /* 1645 /*
1434 * Level interrupt in progress, will be picked up 1646 * Level interrupt in progress, will be picked up
1435 * when EOId. 1647 * when EOId.
@@ -1440,7 +1652,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1440 1652
1441 if (level) { 1653 if (level) {
1442 vgic_cpu_irq_set(vcpu, irq_num); 1654 vgic_cpu_irq_set(vcpu, irq_num);
1443 set_bit(cpuid, &dist->irq_pending_on_cpu); 1655 set_bit(cpuid, dist->irq_pending_on_cpu);
1444 } 1656 }
1445 1657
1446out: 1658out:
@@ -1466,7 +1678,8 @@ out:
1466int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 1678int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1467 bool level) 1679 bool level)
1468{ 1680{
1469 if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) 1681 if (likely(vgic_initialized(kvm)) &&
1682 vgic_update_irq_pending(kvm, cpuid, irq_num, level))
1470 vgic_kick_vcpus(kvm); 1683 vgic_kick_vcpus(kvm);
1471 1684
1472 return 0; 1685 return 0;
@@ -1483,6 +1696,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1483 return IRQ_HANDLED; 1696 return IRQ_HANDLED;
1484} 1697}
1485 1698
1699void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1700{
1701 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1702
1703 kfree(vgic_cpu->pending_shared);
1704 kfree(vgic_cpu->vgic_irq_lr_map);
1705 vgic_cpu->pending_shared = NULL;
1706 vgic_cpu->vgic_irq_lr_map = NULL;
1707}
1708
1709static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1710{
1711 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1712
1713 int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
1714 vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1715 vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
1716
1717 if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
1718 kvm_vgic_vcpu_destroy(vcpu);
1719 return -ENOMEM;
1720 }
1721
1722 return 0;
1723}
1724
1486/** 1725/**
1487 * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state 1726 * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
1488 * @vcpu: pointer to the vcpu struct 1727 * @vcpu: pointer to the vcpu struct
@@ -1490,16 +1729,13 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1490 * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to 1729 * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
1491 * this vcpu and enable the VGIC for this VCPU 1730 * this vcpu and enable the VGIC for this VCPU
1492 */ 1731 */
1493int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) 1732static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1494{ 1733{
1495 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1734 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1496 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1735 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1497 int i; 1736 int i;
1498 1737
1499 if (vcpu->vcpu_id >= VGIC_MAX_CPUS) 1738 for (i = 0; i < dist->nr_irqs; i++) {
1500 return -EBUSY;
1501
1502 for (i = 0; i < VGIC_NR_IRQS; i++) {
1503 if (i < VGIC_NR_PPIS) 1739 if (i < VGIC_NR_PPIS)
1504 vgic_bitmap_set_irq_val(&dist->irq_enabled, 1740 vgic_bitmap_set_irq_val(&dist->irq_enabled,
1505 vcpu->vcpu_id, i, 1); 1741 vcpu->vcpu_id, i, 1);
@@ -1518,84 +1754,112 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1518 vgic_cpu->nr_lr = vgic->nr_lr; 1754 vgic_cpu->nr_lr = vgic->nr_lr;
1519 1755
1520 vgic_enable(vcpu); 1756 vgic_enable(vcpu);
1521
1522 return 0;
1523} 1757}
1524 1758
1525static void vgic_init_maintenance_interrupt(void *info) 1759void kvm_vgic_destroy(struct kvm *kvm)
1526{ 1760{
1527 enable_percpu_irq(vgic->maint_irq, 0); 1761 struct vgic_dist *dist = &kvm->arch.vgic;
1762 struct kvm_vcpu *vcpu;
1763 int i;
1764
1765 kvm_for_each_vcpu(i, vcpu, kvm)
1766 kvm_vgic_vcpu_destroy(vcpu);
1767
1768 vgic_free_bitmap(&dist->irq_enabled);
1769 vgic_free_bitmap(&dist->irq_level);
1770 vgic_free_bitmap(&dist->irq_pending);
1771 vgic_free_bitmap(&dist->irq_soft_pend);
1772 vgic_free_bitmap(&dist->irq_queued);
1773 vgic_free_bitmap(&dist->irq_cfg);
1774 vgic_free_bytemap(&dist->irq_priority);
1775 if (dist->irq_spi_target) {
1776 for (i = 0; i < dist->nr_cpus; i++)
1777 vgic_free_bitmap(&dist->irq_spi_target[i]);
1778 }
1779 kfree(dist->irq_sgi_sources);
1780 kfree(dist->irq_spi_cpu);
1781 kfree(dist->irq_spi_target);
1782 kfree(dist->irq_pending_on_cpu);
1783 dist->irq_sgi_sources = NULL;
1784 dist->irq_spi_cpu = NULL;
1785 dist->irq_spi_target = NULL;
1786 dist->irq_pending_on_cpu = NULL;
1528} 1787}
1529 1788
1530static int vgic_cpu_notify(struct notifier_block *self, 1789/*
1531 unsigned long action, void *cpu) 1790 * Allocate and initialize the various data structures. Must be called
1791 * with kvm->lock held!
1792 */
1793static int vgic_init_maps(struct kvm *kvm)
1532{ 1794{
1533 switch (action) { 1795 struct vgic_dist *dist = &kvm->arch.vgic;
1534 case CPU_STARTING: 1796 struct kvm_vcpu *vcpu;
1535 case CPU_STARTING_FROZEN: 1797 int nr_cpus, nr_irqs;
1536 vgic_init_maintenance_interrupt(NULL); 1798 int ret, i;
1537 break;
1538 case CPU_DYING:
1539 case CPU_DYING_FROZEN:
1540 disable_percpu_irq(vgic->maint_irq);
1541 break;
1542 }
1543 1799
1544 return NOTIFY_OK; 1800 if (dist->nr_cpus) /* Already allocated */
1545} 1801 return 0;
1546 1802
1547static struct notifier_block vgic_cpu_nb = { 1803 nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
1548 .notifier_call = vgic_cpu_notify, 1804 if (!nr_cpus) /* No vcpus? Can't be good... */
1549}; 1805 return -EINVAL;
1550 1806
1551static const struct of_device_id vgic_ids[] = { 1807 /*
1552 { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, 1808 * If nobody configured the number of interrupts, use the
1553 { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, 1809 * legacy one.
1554 {}, 1810 */
1555}; 1811 if (!dist->nr_irqs)
1812 dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
1556 1813
1557int kvm_vgic_hyp_init(void) 1814 nr_irqs = dist->nr_irqs;
1558{
1559 const struct of_device_id *matched_id;
1560 int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
1561 const struct vgic_params **);
1562 struct device_node *vgic_node;
1563 int ret;
1564 1815
1565 vgic_node = of_find_matching_node_and_match(NULL, 1816 ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
1566 vgic_ids, &matched_id); 1817 ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
1567 if (!vgic_node) { 1818 ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
1568 kvm_err("error: no compatible GIC node found\n"); 1819 ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
1569 return -ENODEV; 1820 ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
1570 } 1821 ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
1822 ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
1571 1823
1572 vgic_probe = matched_id->data;
1573 ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
1574 if (ret) 1824 if (ret)
1575 return ret; 1825 goto out;
1576 1826
1577 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, 1827 dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
1578 "vgic", kvm_get_running_vcpus()); 1828 dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
1579 if (ret) { 1829 dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
1580 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); 1830 GFP_KERNEL);
1581 return ret; 1831 dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
1832 GFP_KERNEL);
1833 if (!dist->irq_sgi_sources ||
1834 !dist->irq_spi_cpu ||
1835 !dist->irq_spi_target ||
1836 !dist->irq_pending_on_cpu) {
1837 ret = -ENOMEM;
1838 goto out;
1582 } 1839 }
1583 1840
1584 ret = __register_cpu_notifier(&vgic_cpu_nb); 1841 for (i = 0; i < nr_cpus; i++)
1585 if (ret) { 1842 ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
1586 kvm_err("Cannot register vgic CPU notifier\n"); 1843 nr_cpus, nr_irqs);
1587 goto out_free_irq;
1588 }
1589 1844
1590 /* Callback into for arch code for setup */ 1845 if (ret)
1591 vgic_arch_setup(vgic); 1846 goto out;
1592 1847
1593 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); 1848 kvm_for_each_vcpu(i, vcpu, kvm) {
1849 ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
1850 if (ret) {
1851 kvm_err("VGIC: Failed to allocate vcpu memory\n");
1852 break;
1853 }
1854 }
1594 1855
1595 return 0; 1856 for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
1857 vgic_set_target_reg(kvm, 0, i);
1858
1859out:
1860 if (ret)
1861 kvm_vgic_destroy(kvm);
1596 1862
1597out_free_irq:
1598 free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
1599 return ret; 1863 return ret;
1600} 1864}
1601 1865
@@ -1610,6 +1874,7 @@ out_free_irq:
1610 */ 1874 */
1611int kvm_vgic_init(struct kvm *kvm) 1875int kvm_vgic_init(struct kvm *kvm)
1612{ 1876{
1877 struct kvm_vcpu *vcpu;
1613 int ret = 0, i; 1878 int ret = 0, i;
1614 1879
1615 if (!irqchip_in_kernel(kvm)) 1880 if (!irqchip_in_kernel(kvm))
@@ -1627,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm)
1627 goto out; 1892 goto out;
1628 } 1893 }
1629 1894
1895 ret = vgic_init_maps(kvm);
1896 if (ret) {
1897 kvm_err("Unable to allocate maps\n");
1898 goto out;
1899 }
1900
1630 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, 1901 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
1631 vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); 1902 vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
1632 if (ret) { 1903 if (ret) {
@@ -1634,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm)
1634 goto out; 1905 goto out;
1635 } 1906 }
1636 1907
1637 for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) 1908 kvm_for_each_vcpu(i, vcpu, kvm)
1638 vgic_set_target_reg(kvm, 0, i); 1909 kvm_vgic_vcpu_init(vcpu);
1639 1910
1640 kvm->arch.vgic.ready = true; 1911 kvm->arch.vgic.ready = true;
1641out: 1912out:
1913 if (ret)
1914 kvm_vgic_destroy(kvm);
1642 mutex_unlock(&kvm->lock); 1915 mutex_unlock(&kvm->lock);
1643 return ret; 1916 return ret;
1644} 1917}
@@ -1690,7 +1963,7 @@ out:
1690 return ret; 1963 return ret;
1691} 1964}
1692 1965
1693static bool vgic_ioaddr_overlap(struct kvm *kvm) 1966static int vgic_ioaddr_overlap(struct kvm *kvm)
1694{ 1967{
1695 phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; 1968 phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
1696 phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; 1969 phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
@@ -1879,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
1879 2152
1880 mutex_lock(&dev->kvm->lock); 2153 mutex_lock(&dev->kvm->lock);
1881 2154
2155 ret = vgic_init_maps(dev->kvm);
2156 if (ret)
2157 goto out;
2158
1882 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { 2159 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
1883 ret = -EINVAL; 2160 ret = -EINVAL;
1884 goto out; 2161 goto out;
@@ -1976,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1976 2253
1977 return vgic_attr_regs_access(dev, attr, &reg, true); 2254 return vgic_attr_regs_access(dev, attr, &reg, true);
1978 } 2255 }
2256 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2257 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2258 u32 val;
2259 int ret = 0;
2260
2261 if (get_user(val, uaddr))
2262 return -EFAULT;
2263
2264 /*
2265 * We require:
2266 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
2267 * - at most 1024 interrupts
2268 * - a multiple of 32 interrupts
2269 */
2270 if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
2271 val > VGIC_MAX_IRQS ||
2272 (val & 31))
2273 return -EINVAL;
2274
2275 mutex_lock(&dev->kvm->lock);
2276
2277 if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
2278 ret = -EBUSY;
2279 else
2280 dev->kvm->arch.vgic.nr_irqs = val;
2281
2282 mutex_unlock(&dev->kvm->lock);
2283
2284 return ret;
2285 }
1979 2286
1980 } 2287 }
1981 2288
@@ -2012,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2012 r = put_user(reg, uaddr); 2319 r = put_user(reg, uaddr);
2013 break; 2320 break;
2014 } 2321 }
2322 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2323 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2324 r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
2325 break;
2326 }
2015 2327
2016 } 2328 }
2017 2329
@@ -2048,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2048 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: 2360 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
2049 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; 2361 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2050 return vgic_has_attr_regs(vgic_cpu_ranges, offset); 2362 return vgic_has_attr_regs(vgic_cpu_ranges, offset);
2363 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
2364 return 0;
2051 } 2365 }
2052 return -ENXIO; 2366 return -ENXIO;
2053} 2367}
@@ -2062,7 +2376,7 @@ static int vgic_create(struct kvm_device *dev, u32 type)
2062 return kvm_vgic_create(dev->kvm); 2376 return kvm_vgic_create(dev->kvm);
2063} 2377}
2064 2378
2065struct kvm_device_ops kvm_arm_vgic_v2_ops = { 2379static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2066 .name = "kvm-arm-vgic", 2380 .name = "kvm-arm-vgic",
2067 .create = vgic_create, 2381 .create = vgic_create,
2068 .destroy = vgic_destroy, 2382 .destroy = vgic_destroy,
@@ -2070,3 +2384,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2070 .get_attr = vgic_get_attr, 2384 .get_attr = vgic_get_attr,
2071 .has_attr = vgic_has_attr, 2385 .has_attr = vgic_has_attr,
2072}; 2386};
2387
2388static void vgic_init_maintenance_interrupt(void *info)
2389{
2390 enable_percpu_irq(vgic->maint_irq, 0);
2391}
2392
2393static int vgic_cpu_notify(struct notifier_block *self,
2394 unsigned long action, void *cpu)
2395{
2396 switch (action) {
2397 case CPU_STARTING:
2398 case CPU_STARTING_FROZEN:
2399 vgic_init_maintenance_interrupt(NULL);
2400 break;
2401 case CPU_DYING:
2402 case CPU_DYING_FROZEN:
2403 disable_percpu_irq(vgic->maint_irq);
2404 break;
2405 }
2406
2407 return NOTIFY_OK;
2408}
2409
2410static struct notifier_block vgic_cpu_nb = {
2411 .notifier_call = vgic_cpu_notify,
2412};
2413
2414static const struct of_device_id vgic_ids[] = {
2415 { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
2416 { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
2417 {},
2418};
2419
2420int kvm_vgic_hyp_init(void)
2421{
2422 const struct of_device_id *matched_id;
2423 const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
2424 const struct vgic_params **);
2425 struct device_node *vgic_node;
2426 int ret;
2427
2428 vgic_node = of_find_matching_node_and_match(NULL,
2429 vgic_ids, &matched_id);
2430 if (!vgic_node) {
2431 kvm_err("error: no compatible GIC node found\n");
2432 return -ENODEV;
2433 }
2434
2435 vgic_probe = matched_id->data;
2436 ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
2437 if (ret)
2438 return ret;
2439
2440 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
2441 "vgic", kvm_get_running_vcpus());
2442 if (ret) {
2443 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
2444 return ret;
2445 }
2446
2447 ret = __register_cpu_notifier(&vgic_cpu_nb);
2448 if (ret) {
2449 kvm_err("Cannot register vgic CPU notifier\n");
2450 goto out_free_irq;
2451 }
2452
2453 /* Callback into for arch code for setup */
2454 vgic_arch_setup(vgic);
2455
2456 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
2457
2458 return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
2459 KVM_DEV_TYPE_ARM_VGIC_V2);
2460
2461out_free_irq:
2462 free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
2463 return ret;
2464}
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d6a3d0993d88..5ff7f7f2689a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work)
80 80
81 might_sleep(); 81 might_sleep();
82 82
83 down_read(&mm->mmap_sem); 83 kvm_get_user_page_io(NULL, mm, addr, 1, NULL);
84 get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
85 up_read(&mm->mmap_sem);
86 kvm_async_page_present_sync(vcpu, apf); 84 kvm_async_page_present_sync(vcpu, apf);
87 85
88 spin_lock(&vcpu->async_pf.lock); 86 spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 3c5981c87c3f..b0fb390943c6 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,7 +36,9 @@
36#include <linux/seqlock.h> 36#include <linux/seqlock.h>
37#include <trace/events/kvm.h> 37#include <trace/events/kvm.h>
38 38
39#include "irq.h" 39#ifdef __KVM_HAVE_IOAPIC
40#include "ioapic.h"
41#endif
40#include "iodev.h" 42#include "iodev.h"
41 43
42#ifdef CONFIG_HAVE_KVM_IRQFD 44#ifdef CONFIG_HAVE_KVM_IRQFD
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index e8ce34c9db32..0ba4057d271b 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -405,6 +405,26 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
405 spin_unlock(&ioapic->lock); 405 spin_unlock(&ioapic->lock);
406} 406}
407 407
408static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
409{
410 int i;
411 struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
412 eoi_inject.work);
413 spin_lock(&ioapic->lock);
414 for (i = 0; i < IOAPIC_NUM_PINS; i++) {
415 union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
416
417 if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
418 continue;
419
420 if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
421 ioapic_service(ioapic, i, false);
422 }
423 spin_unlock(&ioapic->lock);
424}
425
426#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
427
408static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, 428static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
409 struct kvm_ioapic *ioapic, int vector, int trigger_mode) 429 struct kvm_ioapic *ioapic, int vector, int trigger_mode)
410{ 430{
@@ -435,8 +455,26 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
435 455
436 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 456 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
437 ent->fields.remote_irr = 0; 457 ent->fields.remote_irr = 0;
438 if (ioapic->irr & (1 << i)) 458 if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
439 ioapic_service(ioapic, i, false); 459 ++ioapic->irq_eoi[i];
460 if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
461 /*
462 * Real hardware does not deliver the interrupt
463 * immediately during eoi broadcast, and this
464 * lets a buggy guest make slow progress
465 * even if it does not correctly handle a
466 * level-triggered interrupt. Emulate this
467 * behavior if we detect an interrupt storm.
468 */
469 schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
470 ioapic->irq_eoi[i] = 0;
471 trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
472 } else {
473 ioapic_service(ioapic, i, false);
474 }
475 } else {
476 ioapic->irq_eoi[i] = 0;
477 }
440 } 478 }
441} 479}
442 480
@@ -565,12 +603,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
565{ 603{
566 int i; 604 int i;
567 605
606 cancel_delayed_work_sync(&ioapic->eoi_inject);
568 for (i = 0; i < IOAPIC_NUM_PINS; i++) 607 for (i = 0; i < IOAPIC_NUM_PINS; i++)
569 ioapic->redirtbl[i].fields.mask = 1; 608 ioapic->redirtbl[i].fields.mask = 1;
570 ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; 609 ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
571 ioapic->ioregsel = 0; 610 ioapic->ioregsel = 0;
572 ioapic->irr = 0; 611 ioapic->irr = 0;
573 ioapic->id = 0; 612 ioapic->id = 0;
613 memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
574 rtc_irq_eoi_tracking_reset(ioapic); 614 rtc_irq_eoi_tracking_reset(ioapic);
575 update_handled_vectors(ioapic); 615 update_handled_vectors(ioapic);
576} 616}
@@ -589,6 +629,7 @@ int kvm_ioapic_init(struct kvm *kvm)
589 if (!ioapic) 629 if (!ioapic)
590 return -ENOMEM; 630 return -ENOMEM;
591 spin_lock_init(&ioapic->lock); 631 spin_lock_init(&ioapic->lock);
632 INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
592 kvm->arch.vioapic = ioapic; 633 kvm->arch.vioapic = ioapic;
593 kvm_ioapic_reset(ioapic); 634 kvm_ioapic_reset(ioapic);
594 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); 635 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
@@ -609,6 +650,7 @@ void kvm_ioapic_destroy(struct kvm *kvm)
609{ 650{
610 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 651 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
611 652
653 cancel_delayed_work_sync(&ioapic->eoi_inject);
612 if (ioapic) { 654 if (ioapic) {
613 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); 655 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
614 kvm->arch.vioapic = NULL; 656 kvm->arch.vioapic = NULL;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 90d43e95dcf8..e23b70634f1e 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -59,6 +59,8 @@ struct kvm_ioapic {
59 spinlock_t lock; 59 spinlock_t lock;
60 DECLARE_BITMAP(handled_vectors, 256); 60 DECLARE_BITMAP(handled_vectors, 256);
61 struct rtc_status rtc_status; 61 struct rtc_status rtc_status;
62 struct delayed_work eoi_inject;
63 u32 irq_eoi[IOAPIC_NUM_PINS];
62}; 64};
63 65
64#ifdef DEBUG 66#ifdef DEBUG
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 95519bc959ed..384eaa7b02fa 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -52,11 +52,13 @@
52 52
53#include <asm/processor.h> 53#include <asm/processor.h>
54#include <asm/io.h> 54#include <asm/io.h>
55#include <asm/ioctl.h>
55#include <asm/uaccess.h> 56#include <asm/uaccess.h>
56#include <asm/pgtable.h> 57#include <asm/pgtable.h>
57 58
58#include "coalesced_mmio.h" 59#include "coalesced_mmio.h"
59#include "async_pf.h" 60#include "async_pf.h"
61#include "vfio.h"
60 62
61#define CREATE_TRACE_POINTS 63#define CREATE_TRACE_POINTS
62#include <trace/events/kvm.h> 64#include <trace/events/kvm.h>
@@ -95,8 +97,6 @@ static int hardware_enable_all(void);
95static void hardware_disable_all(void); 97static void hardware_disable_all(void);
96 98
97static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 99static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
98static void update_memslots(struct kvm_memslots *slots,
99 struct kvm_memory_slot *new, u64 last_generation);
100 100
101static void kvm_release_pfn_dirty(pfn_t pfn); 101static void kvm_release_pfn_dirty(pfn_t pfn);
102static void mark_page_dirty_in_slot(struct kvm *kvm, 102static void mark_page_dirty_in_slot(struct kvm *kvm,
@@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
129 struct pid *oldpid = vcpu->pid; 129 struct pid *oldpid = vcpu->pid;
130 struct pid *newpid = get_task_pid(current, PIDTYPE_PID); 130 struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
131 rcu_assign_pointer(vcpu->pid, newpid); 131 rcu_assign_pointer(vcpu->pid, newpid);
132 synchronize_rcu(); 132 if (oldpid)
133 synchronize_rcu();
133 put_pid(oldpid); 134 put_pid(oldpid);
134 } 135 }
135 cpu = get_cpu(); 136 cpu = get_cpu();
@@ -152,7 +153,7 @@ static void ack_flush(void *_completed)
152{ 153{
153} 154}
154 155
155static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) 156bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
156{ 157{
157 int i, cpu, me; 158 int i, cpu, me;
158 cpumask_var_t cpus; 159 cpumask_var_t cpus;
@@ -189,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
189 long dirty_count = kvm->tlbs_dirty; 190 long dirty_count = kvm->tlbs_dirty;
190 191
191 smp_mb(); 192 smp_mb();
192 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 193 if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
193 ++kvm->stat.remote_tlb_flush; 194 ++kvm->stat.remote_tlb_flush;
194 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 195 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
195} 196}
@@ -197,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
197 198
198void kvm_reload_remote_mmus(struct kvm *kvm) 199void kvm_reload_remote_mmus(struct kvm *kvm)
199{ 200{
200 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); 201 kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
201} 202}
202 203
203void kvm_make_mclock_inprogress_request(struct kvm *kvm) 204void kvm_make_mclock_inprogress_request(struct kvm *kvm)
204{ 205{
205 make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); 206 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
206} 207}
207 208
208void kvm_make_scan_ioapic_request(struct kvm *kvm) 209void kvm_make_scan_ioapic_request(struct kvm *kvm)
209{ 210{
210 make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); 211 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
211} 212}
212 213
213int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 214int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -295,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
295 kvm_flush_remote_tlbs(kvm); 296 kvm_flush_remote_tlbs(kvm);
296 297
297 spin_unlock(&kvm->mmu_lock); 298 spin_unlock(&kvm->mmu_lock);
299
300 kvm_arch_mmu_notifier_invalidate_page(kvm, address);
301
298 srcu_read_unlock(&kvm->srcu, idx); 302 srcu_read_unlock(&kvm->srcu, idx);
299} 303}
300 304
@@ -368,7 +372,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
368 372
369static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 373static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
370 struct mm_struct *mm, 374 struct mm_struct *mm,
371 unsigned long address) 375 unsigned long start,
376 unsigned long end)
372{ 377{
373 struct kvm *kvm = mmu_notifier_to_kvm(mn); 378 struct kvm *kvm = mmu_notifier_to_kvm(mn);
374 int young, idx; 379 int young, idx;
@@ -376,7 +381,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
376 idx = srcu_read_lock(&kvm->srcu); 381 idx = srcu_read_lock(&kvm->srcu);
377 spin_lock(&kvm->mmu_lock); 382 spin_lock(&kvm->mmu_lock);
378 383
379 young = kvm_age_hva(kvm, address); 384 young = kvm_age_hva(kvm, start, end);
380 if (young) 385 if (young)
381 kvm_flush_remote_tlbs(kvm); 386 kvm_flush_remote_tlbs(kvm);
382 387
@@ -476,6 +481,13 @@ static struct kvm *kvm_create_vm(unsigned long type)
476 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 481 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
477 if (!kvm->memslots) 482 if (!kvm->memslots)
478 goto out_err_no_srcu; 483 goto out_err_no_srcu;
484
485 /*
486 * Init kvm generation close to the maximum to easily test the
487 * code of handling generation number wrap-around.
488 */
489 kvm->memslots->generation = -150;
490
479 kvm_init_memslots_id(kvm); 491 kvm_init_memslots_id(kvm);
480 if (init_srcu_struct(&kvm->srcu)) 492 if (init_srcu_struct(&kvm->srcu))
481 goto out_err_no_srcu; 493 goto out_err_no_srcu;
@@ -687,8 +699,7 @@ static void sort_memslots(struct kvm_memslots *slots)
687} 699}
688 700
689static void update_memslots(struct kvm_memslots *slots, 701static void update_memslots(struct kvm_memslots *slots,
690 struct kvm_memory_slot *new, 702 struct kvm_memory_slot *new)
691 u64 last_generation)
692{ 703{
693 if (new) { 704 if (new) {
694 int id = new->id; 705 int id = new->id;
@@ -699,15 +710,13 @@ static void update_memslots(struct kvm_memslots *slots,
699 if (new->npages != npages) 710 if (new->npages != npages)
700 sort_memslots(slots); 711 sort_memslots(slots);
701 } 712 }
702
703 slots->generation = last_generation + 1;
704} 713}
705 714
706static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) 715static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
707{ 716{
708 u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; 717 u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
709 718
710#ifdef KVM_CAP_READONLY_MEM 719#ifdef __KVM_HAVE_READONLY_MEM
711 valid_flags |= KVM_MEM_READONLY; 720 valid_flags |= KVM_MEM_READONLY;
712#endif 721#endif
713 722
@@ -722,10 +731,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
722{ 731{
723 struct kvm_memslots *old_memslots = kvm->memslots; 732 struct kvm_memslots *old_memslots = kvm->memslots;
724 733
725 update_memslots(slots, new, kvm->memslots->generation); 734 /*
735 * Set the low bit in the generation, which disables SPTE caching
736 * until the end of synchronize_srcu_expedited.
737 */
738 WARN_ON(old_memslots->generation & 1);
739 slots->generation = old_memslots->generation + 1;
740
741 update_memslots(slots, new);
726 rcu_assign_pointer(kvm->memslots, slots); 742 rcu_assign_pointer(kvm->memslots, slots);
727 synchronize_srcu_expedited(&kvm->srcu); 743 synchronize_srcu_expedited(&kvm->srcu);
728 744
745 /*
746 * Increment the new memslot generation a second time. This prevents
747 * vm exits that race with memslot updates from caching a memslot
748 * generation that will (potentially) be valid forever.
749 */
750 slots->generation++;
751
729 kvm_arch_memslots_updated(kvm); 752 kvm_arch_memslots_updated(kvm);
730 753
731 return old_memslots; 754 return old_memslots;
@@ -776,7 +799,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
776 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 799 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
777 npages = mem->memory_size >> PAGE_SHIFT; 800 npages = mem->memory_size >> PAGE_SHIFT;
778 801
779 r = -EINVAL;
780 if (npages > KVM_MEM_MAX_NR_PAGES) 802 if (npages > KVM_MEM_MAX_NR_PAGES)
781 goto out; 803 goto out;
782 804
@@ -790,7 +812,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
790 new.npages = npages; 812 new.npages = npages;
791 new.flags = mem->flags; 813 new.flags = mem->flags;
792 814
793 r = -EINVAL;
794 if (npages) { 815 if (npages) {
795 if (!old.npages) 816 if (!old.npages)
796 change = KVM_MR_CREATE; 817 change = KVM_MR_CREATE;
@@ -846,7 +867,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
846 } 867 }
847 868
848 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { 869 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
849 r = -ENOMEM;
850 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), 870 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
851 GFP_KERNEL); 871 GFP_KERNEL);
852 if (!slots) 872 if (!slots)
@@ -1075,9 +1095,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
1075 * If writable is set to false, the hva returned by this function is only 1095 * If writable is set to false, the hva returned by this function is only
1076 * allowed to be read. 1096 * allowed to be read.
1077 */ 1097 */
1078unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) 1098unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
1099 gfn_t gfn, bool *writable)
1079{ 1100{
1080 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1081 unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); 1101 unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
1082 1102
1083 if (!kvm_is_error_hva(hva) && writable) 1103 if (!kvm_is_error_hva(hva) && writable)
@@ -1086,6 +1106,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
1086 return hva; 1106 return hva;
1087} 1107}
1088 1108
1109unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
1110{
1111 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1112
1113 return gfn_to_hva_memslot_prot(slot, gfn, writable);
1114}
1115
1089static int kvm_read_hva(void *data, void __user *hva, int len) 1116static int kvm_read_hva(void *data, void __user *hva, int len)
1090{ 1117{
1091 return __copy_from_user(data, hva, len); 1118 return __copy_from_user(data, hva, len);
@@ -1107,6 +1134,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
1107 return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); 1134 return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
1108} 1135}
1109 1136
1137int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
1138 unsigned long addr, bool write_fault,
1139 struct page **pagep)
1140{
1141 int npages;
1142 int locked = 1;
1143 int flags = FOLL_TOUCH | FOLL_HWPOISON |
1144 (pagep ? FOLL_GET : 0) |
1145 (write_fault ? FOLL_WRITE : 0);
1146
1147 /*
1148 * If retrying the fault, we get here *not* having allowed the filemap
1149 * to wait on the page lock. We should now allow waiting on the IO with
1150 * the mmap semaphore released.
1151 */
1152 down_read(&mm->mmap_sem);
1153 npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
1154 &locked);
1155 if (!locked) {
1156 VM_BUG_ON(npages);
1157
1158 if (!pagep)
1159 return 0;
1160
1161 /*
1162 * The previous call has now waited on the IO. Now we can
1163 * retry and complete. Pass TRIED to ensure we do not re
1164 * schedule async IO (see e.g. filemap_fault).
1165 */
1166 down_read(&mm->mmap_sem);
1167 npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
1168 pagep, NULL, NULL);
1169 }
1170 up_read(&mm->mmap_sem);
1171 return npages;
1172}
1173
1110static inline int check_user_page_hwpoison(unsigned long addr) 1174static inline int check_user_page_hwpoison(unsigned long addr)
1111{ 1175{
1112 int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; 1176 int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1169,9 +1233,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
1169 npages = get_user_page_nowait(current, current->mm, 1233 npages = get_user_page_nowait(current, current->mm,
1170 addr, write_fault, page); 1234 addr, write_fault, page);
1171 up_read(&current->mm->mmap_sem); 1235 up_read(&current->mm->mmap_sem);
1172 } else 1236 } else {
1173 npages = get_user_pages_fast(addr, 1, write_fault, 1237 /*
1174 page); 1238 * By now we have tried gup_fast, and possibly async_pf, and we
1239 * are certainly not atomic. Time to retry the gup, allowing
1240 * mmap semaphore to be relinquished in the case of IO.
1241 */
1242 npages = kvm_get_user_page_io(current, current->mm, addr,
1243 write_fault, page);
1244 }
1175 if (npages != 1) 1245 if (npages != 1)
1176 return npages; 1246 return npages;
1177 1247
@@ -1768,8 +1838,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
1768 bool eligible; 1838 bool eligible;
1769 1839
1770 eligible = !vcpu->spin_loop.in_spin_loop || 1840 eligible = !vcpu->spin_loop.in_spin_loop ||
1771 (vcpu->spin_loop.in_spin_loop && 1841 vcpu->spin_loop.dy_eligible;
1772 vcpu->spin_loop.dy_eligible);
1773 1842
1774 if (vcpu->spin_loop.in_spin_loop) 1843 if (vcpu->spin_loop.in_spin_loop)
1775 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); 1844 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
@@ -1975,6 +2044,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
1975 if (vcpu->kvm->mm != current->mm) 2044 if (vcpu->kvm->mm != current->mm)
1976 return -EIO; 2045 return -EIO;
1977 2046
2047 if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
2048 return -EINVAL;
2049
1978#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) 2050#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
1979 /* 2051 /*
1980 * Special cases: vcpu ioctls that are asynchronous to vcpu execution, 2052 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
@@ -2259,6 +2331,29 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
2259 return filp->private_data; 2331 return filp->private_data;
2260} 2332}
2261 2333
2334static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
2335#ifdef CONFIG_KVM_MPIC
2336 [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
2337 [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
2338#endif
2339
2340#ifdef CONFIG_KVM_XICS
2341 [KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
2342#endif
2343};
2344
2345int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
2346{
2347 if (type >= ARRAY_SIZE(kvm_device_ops_table))
2348 return -ENOSPC;
2349
2350 if (kvm_device_ops_table[type] != NULL)
2351 return -EEXIST;
2352
2353 kvm_device_ops_table[type] = ops;
2354 return 0;
2355}
2356
2262static int kvm_ioctl_create_device(struct kvm *kvm, 2357static int kvm_ioctl_create_device(struct kvm *kvm,
2263 struct kvm_create_device *cd) 2358 struct kvm_create_device *cd)
2264{ 2359{
@@ -2267,36 +2362,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2267 bool test = cd->flags & KVM_CREATE_DEVICE_TEST; 2362 bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
2268 int ret; 2363 int ret;
2269 2364
2270 switch (cd->type) { 2365 if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
2271#ifdef CONFIG_KVM_MPIC 2366 return -ENODEV;
2272 case KVM_DEV_TYPE_FSL_MPIC_20: 2367
2273 case KVM_DEV_TYPE_FSL_MPIC_42: 2368 ops = kvm_device_ops_table[cd->type];
2274 ops = &kvm_mpic_ops; 2369 if (ops == NULL)
2275 break;
2276#endif
2277#ifdef CONFIG_KVM_XICS
2278 case KVM_DEV_TYPE_XICS:
2279 ops = &kvm_xics_ops;
2280 break;
2281#endif
2282#ifdef CONFIG_KVM_VFIO
2283 case KVM_DEV_TYPE_VFIO:
2284 ops = &kvm_vfio_ops;
2285 break;
2286#endif
2287#ifdef CONFIG_KVM_ARM_VGIC
2288 case KVM_DEV_TYPE_ARM_VGIC_V2:
2289 ops = &kvm_arm_vgic_v2_ops;
2290 break;
2291#endif
2292#ifdef CONFIG_S390
2293 case KVM_DEV_TYPE_FLIC:
2294 ops = &kvm_flic_ops;
2295 break;
2296#endif
2297 default:
2298 return -ENODEV; 2370 return -ENODEV;
2299 }
2300 2371
2301 if (test) 2372 if (test)
2302 return 0; 2373 return 0;
@@ -2611,7 +2682,6 @@ static long kvm_dev_ioctl(struct file *filp,
2611 2682
2612 switch (ioctl) { 2683 switch (ioctl) {
2613 case KVM_GET_API_VERSION: 2684 case KVM_GET_API_VERSION:
2614 r = -EINVAL;
2615 if (arg) 2685 if (arg)
2616 goto out; 2686 goto out;
2617 r = KVM_API_VERSION; 2687 r = KVM_API_VERSION;
@@ -2623,7 +2693,6 @@ static long kvm_dev_ioctl(struct file *filp,
2623 r = kvm_vm_ioctl_check_extension_generic(NULL, arg); 2693 r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
2624 break; 2694 break;
2625 case KVM_GET_VCPU_MMAP_SIZE: 2695 case KVM_GET_VCPU_MMAP_SIZE:
2626 r = -EINVAL;
2627 if (arg) 2696 if (arg)
2628 goto out; 2697 goto out;
2629 r = PAGE_SIZE; /* struct kvm_run */ 2698 r = PAGE_SIZE; /* struct kvm_run */
@@ -2668,7 +2737,7 @@ static void hardware_enable_nolock(void *junk)
2668 2737
2669 cpumask_set_cpu(cpu, cpus_hardware_enabled); 2738 cpumask_set_cpu(cpu, cpus_hardware_enabled);
2670 2739
2671 r = kvm_arch_hardware_enable(NULL); 2740 r = kvm_arch_hardware_enable();
2672 2741
2673 if (r) { 2742 if (r) {
2674 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 2743 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2693,7 +2762,7 @@ static void hardware_disable_nolock(void *junk)
2693 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) 2762 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
2694 return; 2763 return;
2695 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 2764 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
2696 kvm_arch_hardware_disable(NULL); 2765 kvm_arch_hardware_disable();
2697} 2766}
2698 2767
2699static void hardware_disable(void) 2768static void hardware_disable(void)
@@ -3123,6 +3192,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
3123 if (vcpu->preempted) 3192 if (vcpu->preempted)
3124 vcpu->preempted = false; 3193 vcpu->preempted = false;
3125 3194
3195 kvm_arch_sched_in(vcpu, cpu);
3196
3126 kvm_arch_vcpu_load(vcpu, cpu); 3197 kvm_arch_vcpu_load(vcpu, cpu);
3127} 3198}
3128 3199
@@ -3214,6 +3285,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
3214 goto out_undebugfs; 3285 goto out_undebugfs;
3215 } 3286 }
3216 3287
3288 r = kvm_vfio_ops_init();
3289 WARN_ON(r);
3290
3217 return 0; 3291 return 0;
3218 3292
3219out_undebugfs: 3293out_undebugfs:
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ba1a93f935c7..281e7cf2b8e5 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/uaccess.h> 19#include <linux/uaccess.h>
20#include <linux/vfio.h> 20#include <linux/vfio.h>
21#include "vfio.h"
21 22
22struct kvm_vfio_group { 23struct kvm_vfio_group {
23 struct list_head node; 24 struct list_head node;
@@ -246,6 +247,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
246 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ 247 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
247} 248}
248 249
250static int kvm_vfio_create(struct kvm_device *dev, u32 type);
251
252static struct kvm_device_ops kvm_vfio_ops = {
253 .name = "kvm-vfio",
254 .create = kvm_vfio_create,
255 .destroy = kvm_vfio_destroy,
256 .set_attr = kvm_vfio_set_attr,
257 .has_attr = kvm_vfio_has_attr,
258};
259
249static int kvm_vfio_create(struct kvm_device *dev, u32 type) 260static int kvm_vfio_create(struct kvm_device *dev, u32 type)
250{ 261{
251 struct kvm_device *tmp; 262 struct kvm_device *tmp;
@@ -268,10 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
268 return 0; 279 return 0;
269} 280}
270 281
271struct kvm_device_ops kvm_vfio_ops = { 282int kvm_vfio_ops_init(void)
272 .name = "kvm-vfio", 283{
273 .create = kvm_vfio_create, 284 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
274 .destroy = kvm_vfio_destroy, 285}
275 .set_attr = kvm_vfio_set_attr,
276 .has_attr = kvm_vfio_has_attr,
277};
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
new file mode 100644
index 000000000000..92eac75d6b62
--- /dev/null
+++ b/virt/kvm/vfio.h
@@ -0,0 +1,13 @@
1#ifndef __KVM_VFIO_H
2#define __KVM_VFIO_H
3
4#ifdef CONFIG_KVM_VFIO
5int kvm_vfio_ops_init(void);
6#else
7static inline int kvm_vfio_ops_init(void)
8{
9 return 0;
10}
11#endif
12
13#endif