diff options
101 files changed, 3339 insertions, 1822 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index beae3fde075e..7610eaa4d491 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -1901,6 +1901,8 @@ registers, find a list below: | |||
1901 | PPC | KVM_REG_PPC_ARCH_COMPAT | 32 | 1901 | PPC | KVM_REG_PPC_ARCH_COMPAT | 32 |
1902 | PPC | KVM_REG_PPC_DABRX | 32 | 1902 | PPC | KVM_REG_PPC_DABRX | 32 |
1903 | PPC | KVM_REG_PPC_WORT | 64 | 1903 | PPC | KVM_REG_PPC_WORT | 64 |
1904 | PPC | KVM_REG_PPC_SPRG9 | 64 | ||
1905 | PPC | KVM_REG_PPC_DBSR | 32 | ||
1904 | PPC | KVM_REG_PPC_TM_GPR0 | 64 | 1906 | PPC | KVM_REG_PPC_TM_GPR0 | 64 |
1905 | ... | 1907 | ... |
1906 | PPC | KVM_REG_PPC_TM_GPR31 | 64 | 1908 | PPC | KVM_REG_PPC_TM_GPR31 | 64 |
@@ -2565,6 +2567,120 @@ associated with the service will be forgotten, and subsequent RTAS | |||
2565 | calls by the guest for that service will be passed to userspace to be | 2567 | calls by the guest for that service will be passed to userspace to be |
2566 | handled. | 2568 | handled. |
2567 | 2569 | ||
2570 | 4.87 KVM_SET_GUEST_DEBUG | ||
2571 | |||
2572 | Capability: KVM_CAP_SET_GUEST_DEBUG | ||
2573 | Architectures: x86, s390, ppc | ||
2574 | Type: vcpu ioctl | ||
2575 | Parameters: struct kvm_guest_debug (in) | ||
2576 | Returns: 0 on success; -1 on error | ||
2577 | |||
2578 | struct kvm_guest_debug { | ||
2579 | __u32 control; | ||
2580 | __u32 pad; | ||
2581 | struct kvm_guest_debug_arch arch; | ||
2582 | }; | ||
2583 | |||
2584 | Set up the processor specific debug registers and configure vcpu for | ||
2585 | handling guest debug events. There are two parts to the structure, the | ||
2586 | first a control bitfield indicates the type of debug events to handle | ||
2587 | when running. Common control bits are: | ||
2588 | |||
2589 | - KVM_GUESTDBG_ENABLE: guest debugging is enabled | ||
2590 | - KVM_GUESTDBG_SINGLESTEP: the next run should single-step | ||
2591 | |||
2592 | The top 16 bits of the control field are architecture specific control | ||
2593 | flags which can include the following: | ||
2594 | |||
2595 | - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86] | ||
2596 | - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390] | ||
2597 | - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86] | ||
2598 | - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86] | ||
2599 | - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390] | ||
2600 | |||
2601 | For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints | ||
2602 | are enabled in memory so we need to ensure breakpoint exceptions are | ||
2603 | correctly trapped and the KVM run loop exits at the breakpoint and not | ||
2604 | running off into the normal guest vector. For KVM_GUESTDBG_USE_HW_BP | ||
2605 | we need to ensure the guest vCPUs architecture specific registers are | ||
2606 | updated to the correct (supplied) values. | ||
2607 | |||
2608 | The second part of the structure is architecture specific and | ||
2609 | typically contains a set of debug registers. | ||
2610 | |||
2611 | When debug events exit the main run loop with the reason | ||
2612 | KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run | ||
2613 | structure containing architecture specific debug information. | ||
2614 | |||
2615 | 4.88 KVM_GET_EMULATED_CPUID | ||
2616 | |||
2617 | Capability: KVM_CAP_EXT_EMUL_CPUID | ||
2618 | Architectures: x86 | ||
2619 | Type: system ioctl | ||
2620 | Parameters: struct kvm_cpuid2 (in/out) | ||
2621 | Returns: 0 on success, -1 on error | ||
2622 | |||
2623 | struct kvm_cpuid2 { | ||
2624 | __u32 nent; | ||
2625 | __u32 flags; | ||
2626 | struct kvm_cpuid_entry2 entries[0]; | ||
2627 | }; | ||
2628 | |||
2629 | The member 'flags' is used for passing flags from userspace. | ||
2630 | |||
2631 | #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) | ||
2632 | #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) | ||
2633 | #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) | ||
2634 | |||
2635 | struct kvm_cpuid_entry2 { | ||
2636 | __u32 function; | ||
2637 | __u32 index; | ||
2638 | __u32 flags; | ||
2639 | __u32 eax; | ||
2640 | __u32 ebx; | ||
2641 | __u32 ecx; | ||
2642 | __u32 edx; | ||
2643 | __u32 padding[3]; | ||
2644 | }; | ||
2645 | |||
2646 | This ioctl returns x86 cpuid features which are emulated by | ||
2647 | kvm.Userspace can use the information returned by this ioctl to query | ||
2648 | which features are emulated by kvm instead of being present natively. | ||
2649 | |||
2650 | Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2 | ||
2651 | structure with the 'nent' field indicating the number of entries in | ||
2652 | the variable-size array 'entries'. If the number of entries is too low | ||
2653 | to describe the cpu capabilities, an error (E2BIG) is returned. If the | ||
2654 | number is too high, the 'nent' field is adjusted and an error (ENOMEM) | ||
2655 | is returned. If the number is just right, the 'nent' field is adjusted | ||
2656 | to the number of valid entries in the 'entries' array, which is then | ||
2657 | filled. | ||
2658 | |||
2659 | The entries returned are the set CPUID bits of the respective features | ||
2660 | which kvm emulates, as returned by the CPUID instruction, with unknown | ||
2661 | or unsupported feature bits cleared. | ||
2662 | |||
2663 | Features like x2apic, for example, may not be present in the host cpu | ||
2664 | but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be | ||
2665 | emulated efficiently and thus not included here. | ||
2666 | |||
2667 | The fields in each entry are defined as follows: | ||
2668 | |||
2669 | function: the eax value used to obtain the entry | ||
2670 | index: the ecx value used to obtain the entry (for entries that are | ||
2671 | affected by ecx) | ||
2672 | flags: an OR of zero or more of the following: | ||
2673 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX: | ||
2674 | if the index field is valid | ||
2675 | KVM_CPUID_FLAG_STATEFUL_FUNC: | ||
2676 | if cpuid for this function returns different values for successive | ||
2677 | invocations; there will be several entries with the same function, | ||
2678 | all with this flag set | ||
2679 | KVM_CPUID_FLAG_STATE_READ_NEXT: | ||
2680 | for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is | ||
2681 | the first entry to be read by a cpu | ||
2682 | eax, ebx, ecx, edx: the values returned by the cpuid instruction for | ||
2683 | this function/index combination | ||
2568 | 2684 | ||
2569 | 5. The kvm_run structure | 2685 | 5. The kvm_run structure |
2570 | ------------------------ | 2686 | ------------------------ |
@@ -2861,78 +2977,12 @@ kvm_valid_regs for specific bits. These bits are architecture specific | |||
2861 | and usually define the validity of a groups of registers. (e.g. one bit | 2977 | and usually define the validity of a groups of registers. (e.g. one bit |
2862 | for general purpose registers) | 2978 | for general purpose registers) |
2863 | 2979 | ||
2864 | }; | 2980 | Please note that the kernel is allowed to use the kvm_run structure as the |
2865 | 2981 | primary storage for certain register types. Therefore, the kernel may use the | |
2982 | values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set. | ||
2866 | 2983 | ||
2867 | 4.81 KVM_GET_EMULATED_CPUID | ||
2868 | |||
2869 | Capability: KVM_CAP_EXT_EMUL_CPUID | ||
2870 | Architectures: x86 | ||
2871 | Type: system ioctl | ||
2872 | Parameters: struct kvm_cpuid2 (in/out) | ||
2873 | Returns: 0 on success, -1 on error | ||
2874 | |||
2875 | struct kvm_cpuid2 { | ||
2876 | __u32 nent; | ||
2877 | __u32 flags; | ||
2878 | struct kvm_cpuid_entry2 entries[0]; | ||
2879 | }; | 2984 | }; |
2880 | 2985 | ||
2881 | The member 'flags' is used for passing flags from userspace. | ||
2882 | |||
2883 | #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) | ||
2884 | #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) | ||
2885 | #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) | ||
2886 | |||
2887 | struct kvm_cpuid_entry2 { | ||
2888 | __u32 function; | ||
2889 | __u32 index; | ||
2890 | __u32 flags; | ||
2891 | __u32 eax; | ||
2892 | __u32 ebx; | ||
2893 | __u32 ecx; | ||
2894 | __u32 edx; | ||
2895 | __u32 padding[3]; | ||
2896 | }; | ||
2897 | |||
2898 | This ioctl returns x86 cpuid features which are emulated by | ||
2899 | kvm.Userspace can use the information returned by this ioctl to query | ||
2900 | which features are emulated by kvm instead of being present natively. | ||
2901 | |||
2902 | Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2 | ||
2903 | structure with the 'nent' field indicating the number of entries in | ||
2904 | the variable-size array 'entries'. If the number of entries is too low | ||
2905 | to describe the cpu capabilities, an error (E2BIG) is returned. If the | ||
2906 | number is too high, the 'nent' field is adjusted and an error (ENOMEM) | ||
2907 | is returned. If the number is just right, the 'nent' field is adjusted | ||
2908 | to the number of valid entries in the 'entries' array, which is then | ||
2909 | filled. | ||
2910 | |||
2911 | The entries returned are the set CPUID bits of the respective features | ||
2912 | which kvm emulates, as returned by the CPUID instruction, with unknown | ||
2913 | or unsupported feature bits cleared. | ||
2914 | |||
2915 | Features like x2apic, for example, may not be present in the host cpu | ||
2916 | but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be | ||
2917 | emulated efficiently and thus not included here. | ||
2918 | |||
2919 | The fields in each entry are defined as follows: | ||
2920 | |||
2921 | function: the eax value used to obtain the entry | ||
2922 | index: the ecx value used to obtain the entry (for entries that are | ||
2923 | affected by ecx) | ||
2924 | flags: an OR of zero or more of the following: | ||
2925 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX: | ||
2926 | if the index field is valid | ||
2927 | KVM_CPUID_FLAG_STATEFUL_FUNC: | ||
2928 | if cpuid for this function returns different values for successive | ||
2929 | invocations; there will be several entries with the same function, | ||
2930 | all with this flag set | ||
2931 | KVM_CPUID_FLAG_STATE_READ_NEXT: | ||
2932 | for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is | ||
2933 | the first entry to be read by a cpu | ||
2934 | eax, ebx, ecx, edx: the values returned by the cpuid instruction for | ||
2935 | this function/index combination | ||
2936 | 2986 | ||
2937 | 2987 | ||
2938 | 6. Capabilities that can be enabled on vCPUs | 2988 | 6. Capabilities that can be enabled on vCPUs |
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 7f4e91b1316b..df8b0c7540b6 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt | |||
@@ -71,3 +71,13 @@ Groups: | |||
71 | Errors: | 71 | Errors: |
72 | -ENODEV: Getting or setting this register is not yet supported | 72 | -ENODEV: Getting or setting this register is not yet supported |
73 | -EBUSY: One or more VCPUs are running | 73 | -EBUSY: One or more VCPUs are running |
74 | |||
75 | KVM_DEV_ARM_VGIC_GRP_NR_IRQS | ||
76 | Attributes: | ||
77 | A value describing the number of interrupts (SGI, PPI and SPI) for | ||
78 | this GIC instance, ranging from 64 to 1024, in increments of 32. | ||
79 | |||
80 | Errors: | ||
81 | -EINVAL: Value set is out of the expected range | ||
82 | -EBUSY: Value has already be set, or GIC has already been initialized | ||
83 | with default values. | ||
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 290894176142..53838d9c6295 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
@@ -425,6 +425,20 @@ fault through the slow path. | |||
425 | Since only 19 bits are used to store generation-number on mmio spte, all | 425 | Since only 19 bits are used to store generation-number on mmio spte, all |
426 | pages are zapped when there is an overflow. | 426 | pages are zapped when there is an overflow. |
427 | 427 | ||
428 | Unfortunately, a single memory access might access kvm_memslots(kvm) multiple | ||
429 | times, the last one happening when the generation number is retrieved and | ||
430 | stored into the MMIO spte. Thus, the MMIO spte might be created based on | ||
431 | out-of-date information, but with an up-to-date generation number. | ||
432 | |||
433 | To avoid this, the generation number is incremented again after synchronize_srcu | ||
434 | returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a | ||
435 | memslot update, while some SRCU readers might be using the old copy. We do not | ||
436 | want to use an MMIO sptes created with an odd generation number, and we can do | ||
437 | this without losing a bit in the MMIO spte. The low bit of the generation | ||
438 | is not stored in MMIO spte, and presumed zero when it is extracted out of the | ||
439 | spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1, | ||
440 | the next access to the spte will always be a cache miss. | ||
441 | |||
428 | 442 | ||
429 | Further reading | 443 | Further reading |
430 | =============== | 444 | =============== |
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 69b746955fca..b9db269c6e61 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
@@ -149,6 +149,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu) | |||
149 | 149 | ||
150 | static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) | 150 | static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) |
151 | { | 151 | { |
152 | return kvm_vcpu_get_hsr(vcpu) & HSR_FSC; | ||
153 | } | ||
154 | |||
155 | static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) | ||
156 | { | ||
152 | return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; | 157 | return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; |
153 | } | 158 | } |
154 | 159 | ||
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6dfb404f6c46..53036e21756b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -19,6 +19,8 @@ | |||
19 | #ifndef __ARM_KVM_HOST_H__ | 19 | #ifndef __ARM_KVM_HOST_H__ |
20 | #define __ARM_KVM_HOST_H__ | 20 | #define __ARM_KVM_HOST_H__ |
21 | 21 | ||
22 | #include <linux/types.h> | ||
23 | #include <linux/kvm_types.h> | ||
22 | #include <asm/kvm.h> | 24 | #include <asm/kvm.h> |
23 | #include <asm/kvm_asm.h> | 25 | #include <asm/kvm_asm.h> |
24 | #include <asm/kvm_mmio.h> | 26 | #include <asm/kvm_mmio.h> |
@@ -40,9 +42,8 @@ | |||
40 | 42 | ||
41 | #include <kvm/arm_vgic.h> | 43 | #include <kvm/arm_vgic.h> |
42 | 44 | ||
43 | struct kvm_vcpu; | ||
44 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); | 45 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); |
45 | int kvm_target_cpu(void); | 46 | int __attribute_const__ kvm_target_cpu(void); |
46 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 47 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
47 | void kvm_reset_coprocs(struct kvm_vcpu *vcpu); | 48 | void kvm_reset_coprocs(struct kvm_vcpu *vcpu); |
48 | 49 | ||
@@ -149,20 +150,17 @@ struct kvm_vcpu_stat { | |||
149 | u32 halt_wakeup; | 150 | u32 halt_wakeup; |
150 | }; | 151 | }; |
151 | 152 | ||
152 | struct kvm_vcpu_init; | ||
153 | int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, | 153 | int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, |
154 | const struct kvm_vcpu_init *init); | 154 | const struct kvm_vcpu_init *init); |
155 | int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); | 155 | int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); |
156 | unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); | 156 | unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); |
157 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); | 157 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); |
158 | struct kvm_one_reg; | ||
159 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 158 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
160 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 159 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
161 | u64 kvm_call_hyp(void *hypfn, ...); | 160 | u64 kvm_call_hyp(void *hypfn, ...); |
162 | void force_vm_exit(const cpumask_t *mask); | 161 | void force_vm_exit(const cpumask_t *mask); |
163 | 162 | ||
164 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 163 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
165 | struct kvm; | ||
166 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 164 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
167 | int kvm_unmap_hva_range(struct kvm *kvm, | 165 | int kvm_unmap_hva_range(struct kvm *kvm, |
168 | unsigned long start, unsigned long end); | 166 | unsigned long start, unsigned long end); |
@@ -172,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); | |||
172 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); | 170 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); |
173 | 171 | ||
174 | /* We do not have shadow page tables, hence the empty hooks */ | 172 | /* We do not have shadow page tables, hence the empty hooks */ |
175 | static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 173 | static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, |
174 | unsigned long end) | ||
176 | { | 175 | { |
177 | return 0; | 176 | return 0; |
178 | } | 177 | } |
@@ -182,12 +181,16 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | |||
182 | return 0; | 181 | return 0; |
183 | } | 182 | } |
184 | 183 | ||
184 | static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | ||
185 | unsigned long address) | ||
186 | { | ||
187 | } | ||
188 | |||
185 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void); | 189 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void); |
186 | struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); | 190 | struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); |
187 | 191 | ||
188 | int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); | 192 | int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); |
189 | unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); | 193 | unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); |
190 | struct kvm_one_reg; | ||
191 | int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); | 194 | int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); |
192 | int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); | 195 | int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); |
193 | 196 | ||
@@ -233,4 +236,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) | |||
233 | int kvm_perf_init(void); | 236 | int kvm_perf_init(void); |
234 | int kvm_perf_teardown(void); | 237 | int kvm_perf_teardown(void); |
235 | 238 | ||
239 | static inline void kvm_arch_hardware_disable(void) {} | ||
240 | static inline void kvm_arch_hardware_unsetup(void) {} | ||
241 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | ||
242 | static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | ||
243 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | ||
244 | |||
236 | #endif /* __ARM_KVM_HOST_H__ */ | 245 | #endif /* __ARM_KVM_HOST_H__ */ |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f5f72f..3f688b458143 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) | |||
78 | flush_pmd_entry(pte); | 78 | flush_pmd_entry(pte); |
79 | } | 79 | } |
80 | 80 | ||
81 | static inline bool kvm_is_write_fault(unsigned long hsr) | ||
82 | { | ||
83 | unsigned long hsr_ec = hsr >> HSR_EC_SHIFT; | ||
84 | if (hsr_ec == HSR_EC_IABT) | ||
85 | return false; | ||
86 | else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR)) | ||
87 | return false; | ||
88 | else | ||
89 | return true; | ||
90 | } | ||
91 | |||
92 | static inline void kvm_clean_pgd(pgd_t *pgd) | 81 | static inline void kvm_clean_pgd(pgd_t *pgd) |
93 | { | 82 | { |
94 | clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); | 83 | clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); |
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index e6ebdd3471e5..09ee408c1a67 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #define __KVM_HAVE_GUEST_DEBUG | 26 | #define __KVM_HAVE_GUEST_DEBUG |
27 | #define __KVM_HAVE_IRQ_LINE | 27 | #define __KVM_HAVE_IRQ_LINE |
28 | #define __KVM_HAVE_READONLY_MEM | ||
28 | 29 | ||
29 | #define KVM_REG_SIZE(id) \ | 30 | #define KVM_REG_SIZE(id) \ |
30 | (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) | 31 | (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) |
@@ -173,6 +174,7 @@ struct kvm_arch_memory_slot { | |||
173 | #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) | 174 | #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) |
174 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 | 175 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 |
175 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) | 176 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) |
177 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 | ||
176 | 178 | ||
177 | /* KVM_IRQ_LINE irq field index values */ | 179 | /* KVM_IRQ_LINE irq field index values */ |
178 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 | 180 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a99e0cdf8ba2..779605122f32 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -82,12 +82,12 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void) | |||
82 | /** | 82 | /** |
83 | * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. | 83 | * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. |
84 | */ | 84 | */ |
85 | struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) | 85 | struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) |
86 | { | 86 | { |
87 | return &kvm_arm_running_vcpu; | 87 | return &kvm_arm_running_vcpu; |
88 | } | 88 | } |
89 | 89 | ||
90 | int kvm_arch_hardware_enable(void *garbage) | 90 | int kvm_arch_hardware_enable(void) |
91 | { | 91 | { |
92 | return 0; | 92 | return 0; |
93 | } | 93 | } |
@@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | |||
97 | return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; | 97 | return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; |
98 | } | 98 | } |
99 | 99 | ||
100 | void kvm_arch_hardware_disable(void *garbage) | ||
101 | { | ||
102 | } | ||
103 | |||
104 | int kvm_arch_hardware_setup(void) | 100 | int kvm_arch_hardware_setup(void) |
105 | { | 101 | { |
106 | return 0; | 102 | return 0; |
107 | } | 103 | } |
108 | 104 | ||
109 | void kvm_arch_hardware_unsetup(void) | ||
110 | { | ||
111 | } | ||
112 | |||
113 | void kvm_arch_check_processor_compat(void *rtn) | 105 | void kvm_arch_check_processor_compat(void *rtn) |
114 | { | 106 | { |
115 | *(int *)rtn = 0; | 107 | *(int *)rtn = 0; |
116 | } | 108 | } |
117 | 109 | ||
118 | void kvm_arch_sync_events(struct kvm *kvm) | ||
119 | { | ||
120 | } | ||
121 | 110 | ||
122 | /** | 111 | /** |
123 | * kvm_arch_init_vm - initializes a VM data structure | 112 | * kvm_arch_init_vm - initializes a VM data structure |
@@ -172,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
172 | kvm->vcpus[i] = NULL; | 161 | kvm->vcpus[i] = NULL; |
173 | } | 162 | } |
174 | } | 163 | } |
164 | |||
165 | kvm_vgic_destroy(kvm); | ||
175 | } | 166 | } |
176 | 167 | ||
177 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | 168 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) |
@@ -188,6 +179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
188 | case KVM_CAP_ONE_REG: | 179 | case KVM_CAP_ONE_REG: |
189 | case KVM_CAP_ARM_PSCI: | 180 | case KVM_CAP_ARM_PSCI: |
190 | case KVM_CAP_ARM_PSCI_0_2: | 181 | case KVM_CAP_ARM_PSCI_0_2: |
182 | case KVM_CAP_READONLY_MEM: | ||
191 | r = 1; | 183 | r = 1; |
192 | break; | 184 | break; |
193 | case KVM_CAP_COALESCED_MMIO: | 185 | case KVM_CAP_COALESCED_MMIO: |
@@ -253,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
253 | { | 245 | { |
254 | kvm_mmu_free_memory_caches(vcpu); | 246 | kvm_mmu_free_memory_caches(vcpu); |
255 | kvm_timer_vcpu_terminate(vcpu); | 247 | kvm_timer_vcpu_terminate(vcpu); |
248 | kvm_vgic_vcpu_destroy(vcpu); | ||
256 | kmem_cache_free(kvm_vcpu_cache, vcpu); | 249 | kmem_cache_free(kvm_vcpu_cache, vcpu); |
257 | } | 250 | } |
258 | 251 | ||
@@ -268,26 +261,15 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
268 | 261 | ||
269 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 262 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
270 | { | 263 | { |
271 | int ret; | ||
272 | |||
273 | /* Force users to call KVM_ARM_VCPU_INIT */ | 264 | /* Force users to call KVM_ARM_VCPU_INIT */ |
274 | vcpu->arch.target = -1; | 265 | vcpu->arch.target = -1; |
275 | 266 | ||
276 | /* Set up VGIC */ | ||
277 | ret = kvm_vgic_vcpu_init(vcpu); | ||
278 | if (ret) | ||
279 | return ret; | ||
280 | |||
281 | /* Set up the timer */ | 267 | /* Set up the timer */ |
282 | kvm_timer_vcpu_init(vcpu); | 268 | kvm_timer_vcpu_init(vcpu); |
283 | 269 | ||
284 | return 0; | 270 | return 0; |
285 | } | 271 | } |
286 | 272 | ||
287 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
288 | { | ||
289 | } | ||
290 | |||
291 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 273 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
292 | { | 274 | { |
293 | vcpu->cpu = cpu; | 275 | vcpu->cpu = cpu; |
@@ -428,9 +410,9 @@ static void update_vttbr(struct kvm *kvm) | |||
428 | 410 | ||
429 | /* update vttbr to be used with the new vmid */ | 411 | /* update vttbr to be used with the new vmid */ |
430 | pgd_phys = virt_to_phys(kvm->arch.pgd); | 412 | pgd_phys = virt_to_phys(kvm->arch.pgd); |
413 | BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); | ||
431 | vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; | 414 | vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; |
432 | kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; | 415 | kvm->arch.vttbr = pgd_phys | vmid; |
433 | kvm->arch.vttbr |= vmid; | ||
434 | 416 | ||
435 | spin_unlock(&kvm_vmid_lock); | 417 | spin_unlock(&kvm_vmid_lock); |
436 | } | 418 | } |
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 37a0fe1bb9bb..7928dbdf2102 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c | |||
@@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val) | |||
791 | u32 level, ctype; | 791 | u32 level, ctype; |
792 | 792 | ||
793 | if (val >= CSSELR_MAX) | 793 | if (val >= CSSELR_MAX) |
794 | return -ENOENT; | 794 | return false; |
795 | 795 | ||
796 | /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ | 796 | /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ |
797 | level = (val >> 1); | 797 | level = (val >> 1); |
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 813e49258690..cc0b78769bd8 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c | |||
@@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
163 | 163 | ||
164 | ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); | 164 | ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); |
165 | if (ret != 0) | 165 | if (ret != 0) |
166 | return ret; | 166 | return -EFAULT; |
167 | 167 | ||
168 | return kvm_arm_timer_set_reg(vcpu, reg->id, val); | 168 | return kvm_arm_timer_set_reg(vcpu, reg->id, val); |
169 | } | 169 | } |
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16e7994bf347..eea03069161b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -746,22 +746,29 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) | |||
746 | return false; | 746 | return false; |
747 | } | 747 | } |
748 | 748 | ||
749 | static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) | ||
750 | { | ||
751 | if (kvm_vcpu_trap_is_iabt(vcpu)) | ||
752 | return false; | ||
753 | |||
754 | return kvm_vcpu_dabt_iswrite(vcpu); | ||
755 | } | ||
756 | |||
749 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | 757 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, |
750 | struct kvm_memory_slot *memslot, | 758 | struct kvm_memory_slot *memslot, unsigned long hva, |
751 | unsigned long fault_status) | 759 | unsigned long fault_status) |
752 | { | 760 | { |
753 | int ret; | 761 | int ret; |
754 | bool write_fault, writable, hugetlb = false, force_pte = false; | 762 | bool write_fault, writable, hugetlb = false, force_pte = false; |
755 | unsigned long mmu_seq; | 763 | unsigned long mmu_seq; |
756 | gfn_t gfn = fault_ipa >> PAGE_SHIFT; | 764 | gfn_t gfn = fault_ipa >> PAGE_SHIFT; |
757 | unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); | ||
758 | struct kvm *kvm = vcpu->kvm; | 765 | struct kvm *kvm = vcpu->kvm; |
759 | struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; | 766 | struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; |
760 | struct vm_area_struct *vma; | 767 | struct vm_area_struct *vma; |
761 | pfn_t pfn; | 768 | pfn_t pfn; |
762 | pgprot_t mem_type = PAGE_S2; | 769 | pgprot_t mem_type = PAGE_S2; |
763 | 770 | ||
764 | write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); | 771 | write_fault = kvm_is_write_fault(vcpu); |
765 | if (fault_status == FSC_PERM && !write_fault) { | 772 | if (fault_status == FSC_PERM && !write_fault) { |
766 | kvm_err("Unexpected L2 read permission error\n"); | 773 | kvm_err("Unexpected L2 read permission error\n"); |
767 | return -EFAULT; | 774 | return -EFAULT; |
@@ -863,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
863 | unsigned long fault_status; | 870 | unsigned long fault_status; |
864 | phys_addr_t fault_ipa; | 871 | phys_addr_t fault_ipa; |
865 | struct kvm_memory_slot *memslot; | 872 | struct kvm_memory_slot *memslot; |
866 | bool is_iabt; | 873 | unsigned long hva; |
874 | bool is_iabt, write_fault, writable; | ||
867 | gfn_t gfn; | 875 | gfn_t gfn; |
868 | int ret, idx; | 876 | int ret, idx; |
869 | 877 | ||
@@ -874,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
874 | kvm_vcpu_get_hfar(vcpu), fault_ipa); | 882 | kvm_vcpu_get_hfar(vcpu), fault_ipa); |
875 | 883 | ||
876 | /* Check the stage-2 fault is trans. fault or write fault */ | 884 | /* Check the stage-2 fault is trans. fault or write fault */ |
877 | fault_status = kvm_vcpu_trap_get_fault(vcpu); | 885 | fault_status = kvm_vcpu_trap_get_fault_type(vcpu); |
878 | if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { | 886 | if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { |
879 | kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", | 887 | kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", |
880 | kvm_vcpu_trap_get_class(vcpu), fault_status); | 888 | kvm_vcpu_trap_get_class(vcpu), |
889 | (unsigned long)kvm_vcpu_trap_get_fault(vcpu), | ||
890 | (unsigned long)kvm_vcpu_get_hsr(vcpu)); | ||
881 | return -EFAULT; | 891 | return -EFAULT; |
882 | } | 892 | } |
883 | 893 | ||
884 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 894 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
885 | 895 | ||
886 | gfn = fault_ipa >> PAGE_SHIFT; | 896 | gfn = fault_ipa >> PAGE_SHIFT; |
887 | if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { | 897 | memslot = gfn_to_memslot(vcpu->kvm, gfn); |
898 | hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); | ||
899 | write_fault = kvm_is_write_fault(vcpu); | ||
900 | if (kvm_is_error_hva(hva) || (write_fault && !writable)) { | ||
888 | if (is_iabt) { | 901 | if (is_iabt) { |
889 | /* Prefetch Abort on I/O address */ | 902 | /* Prefetch Abort on I/O address */ |
890 | kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); | 903 | kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); |
@@ -892,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
892 | goto out_unlock; | 905 | goto out_unlock; |
893 | } | 906 | } |
894 | 907 | ||
895 | if (fault_status != FSC_FAULT) { | ||
896 | kvm_err("Unsupported fault status on io memory: %#lx\n", | ||
897 | fault_status); | ||
898 | ret = -EFAULT; | ||
899 | goto out_unlock; | ||
900 | } | ||
901 | |||
902 | /* | 908 | /* |
903 | * The IPA is reported as [MAX:12], so we need to | 909 | * The IPA is reported as [MAX:12], so we need to |
904 | * complement it with the bottom 12 bits from the | 910 | * complement it with the bottom 12 bits from the |
@@ -910,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
910 | goto out_unlock; | 916 | goto out_unlock; |
911 | } | 917 | } |
912 | 918 | ||
913 | memslot = gfn_to_memslot(vcpu->kvm, gfn); | 919 | ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); |
914 | |||
915 | ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); | ||
916 | if (ret == 0) | 920 | if (ret == 0) |
917 | ret = 1; | 921 | ret = 1; |
918 | out_unlock: | 922 | out_unlock: |
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index cc83520459ed..7fd3e27e3ccc 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h | |||
@@ -122,6 +122,17 @@ | |||
122 | #define VTCR_EL2_T0SZ_MASK 0x3f | 122 | #define VTCR_EL2_T0SZ_MASK 0x3f |
123 | #define VTCR_EL2_T0SZ_40B 24 | 123 | #define VTCR_EL2_T0SZ_40B 24 |
124 | 124 | ||
125 | /* | ||
126 | * We configure the Stage-2 page tables to always restrict the IPA space to be | ||
127 | * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are | ||
128 | * not known to exist and will break with this configuration. | ||
129 | * | ||
130 | * Note that when using 4K pages, we concatenate two first level page tables | ||
131 | * together. | ||
132 | * | ||
133 | * The magic numbers used for VTTBR_X in this patch can be found in Tables | ||
134 | * D4-23 and D4-25 in ARM DDI 0487A.b. | ||
135 | */ | ||
125 | #ifdef CONFIG_ARM64_64K_PAGES | 136 | #ifdef CONFIG_ARM64_64K_PAGES |
126 | /* | 137 | /* |
127 | * Stage2 translation configuration: | 138 | * Stage2 translation configuration: |
@@ -149,7 +160,7 @@ | |||
149 | #endif | 160 | #endif |
150 | 161 | ||
151 | #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) | 162 | #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) |
152 | #define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) | 163 | #define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) |
153 | #define VTTBR_VMID_SHIFT (48LLU) | 164 | #define VTTBR_VMID_SHIFT (48LLU) |
154 | #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) | 165 | #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) |
155 | 166 | ||
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fdc3e21abd8d..5674a55b5518 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h | |||
@@ -174,6 +174,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) | |||
174 | 174 | ||
175 | static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) | 175 | static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) |
176 | { | 176 | { |
177 | return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC; | ||
178 | } | ||
179 | |||
180 | static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) | ||
181 | { | ||
177 | return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; | 182 | return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; |
178 | } | 183 | } |
179 | 184 | ||
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e10c45a578e3..2012c4ba8d67 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -22,6 +22,8 @@ | |||
22 | #ifndef __ARM64_KVM_HOST_H__ | 22 | #ifndef __ARM64_KVM_HOST_H__ |
23 | #define __ARM64_KVM_HOST_H__ | 23 | #define __ARM64_KVM_HOST_H__ |
24 | 24 | ||
25 | #include <linux/types.h> | ||
26 | #include <linux/kvm_types.h> | ||
25 | #include <asm/kvm.h> | 27 | #include <asm/kvm.h> |
26 | #include <asm/kvm_asm.h> | 28 | #include <asm/kvm_asm.h> |
27 | #include <asm/kvm_mmio.h> | 29 | #include <asm/kvm_mmio.h> |
@@ -41,8 +43,7 @@ | |||
41 | 43 | ||
42 | #define KVM_VCPU_MAX_FEATURES 3 | 44 | #define KVM_VCPU_MAX_FEATURES 3 |
43 | 45 | ||
44 | struct kvm_vcpu; | 46 | int __attribute_const__ kvm_target_cpu(void); |
45 | int kvm_target_cpu(void); | ||
46 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 47 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
47 | int kvm_arch_dev_ioctl_check_extension(long ext); | 48 | int kvm_arch_dev_ioctl_check_extension(long ext); |
48 | 49 | ||
@@ -164,25 +165,23 @@ struct kvm_vcpu_stat { | |||
164 | u32 halt_wakeup; | 165 | u32 halt_wakeup; |
165 | }; | 166 | }; |
166 | 167 | ||
167 | struct kvm_vcpu_init; | ||
168 | int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, | 168 | int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, |
169 | const struct kvm_vcpu_init *init); | 169 | const struct kvm_vcpu_init *init); |
170 | int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); | 170 | int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); |
171 | unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); | 171 | unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); |
172 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); | 172 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); |
173 | struct kvm_one_reg; | ||
174 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 173 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
175 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 174 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
176 | 175 | ||
177 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 176 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
178 | struct kvm; | ||
179 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 177 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
180 | int kvm_unmap_hva_range(struct kvm *kvm, | 178 | int kvm_unmap_hva_range(struct kvm *kvm, |
181 | unsigned long start, unsigned long end); | 179 | unsigned long start, unsigned long end); |
182 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 180 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
183 | 181 | ||
184 | /* We do not have shadow page tables, hence the empty hooks */ | 182 | /* We do not have shadow page tables, hence the empty hooks */ |
185 | static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 183 | static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, |
184 | unsigned long end) | ||
186 | { | 185 | { |
187 | return 0; | 186 | return 0; |
188 | } | 187 | } |
@@ -192,8 +191,13 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | |||
192 | return 0; | 191 | return 0; |
193 | } | 192 | } |
194 | 193 | ||
194 | static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | ||
195 | unsigned long address) | ||
196 | { | ||
197 | } | ||
198 | |||
195 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void); | 199 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void); |
196 | struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); | 200 | struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); |
197 | 201 | ||
198 | u64 kvm_call_hyp(void *hypfn, ...); | 202 | u64 kvm_call_hyp(void *hypfn, ...); |
199 | 203 | ||
@@ -244,4 +248,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) | |||
244 | } | 248 | } |
245 | } | 249 | } |
246 | 250 | ||
251 | static inline void kvm_arch_hardware_disable(void) {} | ||
252 | static inline void kvm_arch_hardware_unsetup(void) {} | ||
253 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | ||
254 | static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | ||
255 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | ||
256 | |||
247 | #endif /* __ARM64_KVM_HOST_H__ */ | 257 | #endif /* __ARM64_KVM_HOST_H__ */ |
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8e138c7c53ac..a030d163840b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
@@ -59,10 +59,9 @@ | |||
59 | #define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) | 59 | #define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) |
60 | 60 | ||
61 | /* | 61 | /* |
62 | * Align KVM with the kernel's view of physical memory. Should be | 62 | * We currently only support a 40bit IPA. |
63 | * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration. | ||
64 | */ | 63 | */ |
65 | #define KVM_PHYS_SHIFT PHYS_MASK_SHIFT | 64 | #define KVM_PHYS_SHIFT (40) |
66 | #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) | 65 | #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) |
67 | #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) | 66 | #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) |
68 | 67 | ||
@@ -93,19 +92,6 @@ void kvm_clear_hyp_idmap(void); | |||
93 | #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) | 92 | #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) |
94 | #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) | 93 | #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) |
95 | 94 | ||
96 | static inline bool kvm_is_write_fault(unsigned long esr) | ||
97 | { | ||
98 | unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT; | ||
99 | |||
100 | if (esr_ec == ESR_EL2_EC_IABT) | ||
101 | return false; | ||
102 | |||
103 | if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR)) | ||
104 | return false; | ||
105 | |||
106 | return true; | ||
107 | } | ||
108 | |||
109 | static inline void kvm_clean_pgd(pgd_t *pgd) {} | 95 | static inline void kvm_clean_pgd(pgd_t *pgd) {} |
110 | static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} | 96 | static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} |
111 | static inline void kvm_clean_pte(pte_t *pte) {} | 97 | static inline void kvm_clean_pte(pte_t *pte) {} |
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index e633ff8cdec8..8e38878c87c6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | #define __KVM_HAVE_GUEST_DEBUG | 38 | #define __KVM_HAVE_GUEST_DEBUG |
39 | #define __KVM_HAVE_IRQ_LINE | 39 | #define __KVM_HAVE_IRQ_LINE |
40 | #define __KVM_HAVE_READONLY_MEM | ||
40 | 41 | ||
41 | #define KVM_REG_SIZE(id) \ | 42 | #define KVM_REG_SIZE(id) \ |
42 | (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) | 43 | (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) |
@@ -159,6 +160,7 @@ struct kvm_arch_memory_slot { | |||
159 | #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) | 160 | #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) |
160 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 | 161 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 |
161 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) | 162 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) |
163 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 | ||
162 | 164 | ||
163 | /* KVM_IRQ_LINE irq field index values */ | 165 | /* KVM_IRQ_LINE irq field index values */ |
164 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 | 166 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 |
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 8d1ec2887a26..76794692c20b 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c | |||
@@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
174 | 174 | ||
175 | ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); | 175 | ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); |
176 | if (ret != 0) | 176 | if (ret != 0) |
177 | return ret; | 177 | return -EFAULT; |
178 | 178 | ||
179 | return kvm_arm_timer_set_reg(vcpu, reg->id, val); | 179 | return kvm_arm_timer_set_reg(vcpu, reg->id, val); |
180 | } | 180 | } |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5805e7c4a4dd..4cc3b719208e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c | |||
@@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val) | |||
1218 | u32 level, ctype; | 1218 | u32 level, ctype; |
1219 | 1219 | ||
1220 | if (val >= CSSELR_MAX) | 1220 | if (val >= CSSELR_MAX) |
1221 | return -ENOENT; | 1221 | return false; |
1222 | 1222 | ||
1223 | /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ | 1223 | /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ |
1224 | level = (val >> 1); | 1224 | level = (val >> 1); |
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index db95f570705f..4729752b7256 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
@@ -234,9 +234,6 @@ struct kvm_vm_data { | |||
234 | #define KVM_REQ_PTC_G 32 | 234 | #define KVM_REQ_PTC_G 32 |
235 | #define KVM_REQ_RESUME 33 | 235 | #define KVM_REQ_RESUME 33 |
236 | 236 | ||
237 | struct kvm; | ||
238 | struct kvm_vcpu; | ||
239 | |||
240 | struct kvm_mmio_req { | 237 | struct kvm_mmio_req { |
241 | uint64_t addr; /* physical address */ | 238 | uint64_t addr; /* physical address */ |
242 | uint64_t size; /* size in bytes */ | 239 | uint64_t size; /* size in bytes */ |
@@ -595,6 +592,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu); | |||
595 | struct kvm *kvm_arch_alloc_vm(void); | 592 | struct kvm *kvm_arch_alloc_vm(void); |
596 | void kvm_arch_free_vm(struct kvm *kvm); | 593 | void kvm_arch_free_vm(struct kvm *kvm); |
597 | 594 | ||
595 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | ||
596 | static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {} | ||
597 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {} | ||
598 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | ||
599 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | ||
600 | static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} | ||
601 | static inline void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
602 | struct kvm_userspace_memory_region *mem, | ||
603 | const struct kvm_memory_slot *old, | ||
604 | enum kvm_mr_change change) {} | ||
605 | static inline void kvm_arch_hardware_unsetup(void) {} | ||
606 | |||
598 | #endif /* __ASSEMBLY__*/ | 607 | #endif /* __ASSEMBLY__*/ |
599 | 608 | ||
600 | #endif | 609 | #endif |
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 0729ba6acddf..ec6b9acb6bea 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) | |||
125 | 125 | ||
126 | static DEFINE_SPINLOCK(vp_lock); | 126 | static DEFINE_SPINLOCK(vp_lock); |
127 | 127 | ||
128 | int kvm_arch_hardware_enable(void *garbage) | 128 | int kvm_arch_hardware_enable(void) |
129 | { | 129 | { |
130 | long status; | 130 | long status; |
131 | long tmp_base; | 131 | long tmp_base; |
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage) | |||
160 | return 0; | 160 | return 0; |
161 | } | 161 | } |
162 | 162 | ||
163 | void kvm_arch_hardware_disable(void *garbage) | 163 | void kvm_arch_hardware_disable(void) |
164 | { | 164 | { |
165 | 165 | ||
166 | long status; | 166 | long status; |
@@ -1364,10 +1364,6 @@ static void kvm_release_vm_pages(struct kvm *kvm) | |||
1364 | } | 1364 | } |
1365 | } | 1365 | } |
1366 | 1366 | ||
1367 | void kvm_arch_sync_events(struct kvm *kvm) | ||
1368 | { | ||
1369 | } | ||
1370 | |||
1371 | void kvm_arch_destroy_vm(struct kvm *kvm) | 1367 | void kvm_arch_destroy_vm(struct kvm *kvm) |
1372 | { | 1368 | { |
1373 | kvm_iommu_unmap_guest(kvm); | 1369 | kvm_iommu_unmap_guest(kvm); |
@@ -1376,10 +1372,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
1376 | kvm_release_vm_pages(kvm); | 1372 | kvm_release_vm_pages(kvm); |
1377 | } | 1373 | } |
1378 | 1374 | ||
1379 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | ||
1380 | { | ||
1381 | } | ||
1382 | |||
1383 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1375 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1384 | { | 1376 | { |
1385 | if (cpu != vcpu->cpu) { | 1377 | if (cpu != vcpu->cpu) { |
@@ -1468,7 +1460,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
1468 | kfree(vcpu->arch.apic); | 1460 | kfree(vcpu->arch.apic); |
1469 | } | 1461 | } |
1470 | 1462 | ||
1471 | |||
1472 | long kvm_arch_vcpu_ioctl(struct file *filp, | 1463 | long kvm_arch_vcpu_ioctl(struct file *filp, |
1473 | unsigned int ioctl, unsigned long arg) | 1464 | unsigned int ioctl, unsigned long arg) |
1474 | { | 1465 | { |
@@ -1551,21 +1542,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) | |||
1551 | return VM_FAULT_SIGBUS; | 1542 | return VM_FAULT_SIGBUS; |
1552 | } | 1543 | } |
1553 | 1544 | ||
1554 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | ||
1555 | struct kvm_memory_slot *dont) | ||
1556 | { | ||
1557 | } | ||
1558 | |||
1559 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | 1545 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, |
1560 | unsigned long npages) | 1546 | unsigned long npages) |
1561 | { | 1547 | { |
1562 | return 0; | 1548 | return 0; |
1563 | } | 1549 | } |
1564 | 1550 | ||
1565 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
1566 | { | ||
1567 | } | ||
1568 | |||
1569 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1551 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
1570 | struct kvm_memory_slot *memslot, | 1552 | struct kvm_memory_slot *memslot, |
1571 | struct kvm_userspace_memory_region *mem, | 1553 | struct kvm_userspace_memory_region *mem, |
@@ -1597,14 +1579,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
1597 | return 0; | 1579 | return 0; |
1598 | } | 1580 | } |
1599 | 1581 | ||
1600 | void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
1601 | struct kvm_userspace_memory_region *mem, | ||
1602 | const struct kvm_memory_slot *old, | ||
1603 | enum kvm_mr_change change) | ||
1604 | { | ||
1605 | return; | ||
1606 | } | ||
1607 | |||
1608 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | 1582 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
1609 | { | 1583 | { |
1610 | kvm_flush_remote_tlbs(kvm); | 1584 | kvm_flush_remote_tlbs(kvm); |
@@ -1853,10 +1827,6 @@ int kvm_arch_hardware_setup(void) | |||
1853 | return 0; | 1827 | return 0; |
1854 | } | 1828 | } |
1855 | 1829 | ||
1856 | void kvm_arch_hardware_unsetup(void) | ||
1857 | { | ||
1858 | } | ||
1859 | |||
1860 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | 1830 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) |
1861 | { | 1831 | { |
1862 | return __apic_accept_irq(vcpu, irq->vector); | 1832 | return __apic_accept_irq(vcpu, irq->vector); |
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 7a3fc67bd7f9..f2c249796ea8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h | |||
@@ -96,11 +96,6 @@ | |||
96 | #define CAUSEB_DC 27 | 96 | #define CAUSEB_DC 27 |
97 | #define CAUSEF_DC (_ULCAST_(1) << 27) | 97 | #define CAUSEF_DC (_ULCAST_(1) << 27) |
98 | 98 | ||
99 | struct kvm; | ||
100 | struct kvm_run; | ||
101 | struct kvm_vcpu; | ||
102 | struct kvm_interrupt; | ||
103 | |||
104 | extern atomic_t kvm_mips_instance; | 99 | extern atomic_t kvm_mips_instance; |
105 | extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); | 100 | extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); |
106 | extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); | 101 | extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); |
@@ -767,5 +762,16 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, | |||
767 | extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); | 762 | extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); |
768 | extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); | 763 | extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); |
769 | 764 | ||
765 | static inline void kvm_arch_hardware_disable(void) {} | ||
766 | static inline void kvm_arch_hardware_unsetup(void) {} | ||
767 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | ||
768 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | ||
769 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | ||
770 | static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} | ||
771 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | ||
772 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | ||
773 | struct kvm_memory_slot *slot) {} | ||
774 | static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | ||
775 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | ||
770 | 776 | ||
771 | #endif /* __MIPS_KVM_HOST_H__ */ | 777 | #endif /* __MIPS_KVM_HOST_H__ */ |
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index cd7114147ae7..e3b21e51ff7e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
@@ -77,24 +77,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | |||
77 | return 1; | 77 | return 1; |
78 | } | 78 | } |
79 | 79 | ||
80 | int kvm_arch_hardware_enable(void *garbage) | 80 | int kvm_arch_hardware_enable(void) |
81 | { | 81 | { |
82 | return 0; | 82 | return 0; |
83 | } | 83 | } |
84 | 84 | ||
85 | void kvm_arch_hardware_disable(void *garbage) | ||
86 | { | ||
87 | } | ||
88 | |||
89 | int kvm_arch_hardware_setup(void) | 85 | int kvm_arch_hardware_setup(void) |
90 | { | 86 | { |
91 | return 0; | 87 | return 0; |
92 | } | 88 | } |
93 | 89 | ||
94 | void kvm_arch_hardware_unsetup(void) | ||
95 | { | ||
96 | } | ||
97 | |||
98 | void kvm_arch_check_processor_compat(void *rtn) | 90 | void kvm_arch_check_processor_compat(void *rtn) |
99 | { | 91 | { |
100 | *(int *)rtn = 0; | 92 | *(int *)rtn = 0; |
@@ -163,10 +155,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm) | |||
163 | mutex_unlock(&kvm->lock); | 155 | mutex_unlock(&kvm->lock); |
164 | } | 156 | } |
165 | 157 | ||
166 | void kvm_arch_sync_events(struct kvm *kvm) | ||
167 | { | ||
168 | } | ||
169 | |||
170 | static void kvm_mips_uninit_tlbs(void *arg) | 158 | static void kvm_mips_uninit_tlbs(void *arg) |
171 | { | 159 | { |
172 | /* Restore wired count */ | 160 | /* Restore wired count */ |
@@ -194,21 +182,12 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, | |||
194 | return -ENOIOCTLCMD; | 182 | return -ENOIOCTLCMD; |
195 | } | 183 | } |
196 | 184 | ||
197 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | ||
198 | struct kvm_memory_slot *dont) | ||
199 | { | ||
200 | } | ||
201 | |||
202 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | 185 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, |
203 | unsigned long npages) | 186 | unsigned long npages) |
204 | { | 187 | { |
205 | return 0; | 188 | return 0; |
206 | } | 189 | } |
207 | 190 | ||
208 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
209 | { | ||
210 | } | ||
211 | |||
212 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 191 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
213 | struct kvm_memory_slot *memslot, | 192 | struct kvm_memory_slot *memslot, |
214 | struct kvm_userspace_memory_region *mem, | 193 | struct kvm_userspace_memory_region *mem, |
@@ -254,19 +233,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
254 | } | 233 | } |
255 | } | 234 | } |
256 | 235 | ||
257 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | ||
258 | { | ||
259 | } | ||
260 | |||
261 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | ||
262 | struct kvm_memory_slot *slot) | ||
263 | { | ||
264 | } | ||
265 | |||
266 | void kvm_arch_flush_shadow(struct kvm *kvm) | ||
267 | { | ||
268 | } | ||
269 | |||
270 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | 236 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) |
271 | { | 237 | { |
272 | int err, size, offset; | 238 | int err, size, offset; |
@@ -998,10 +964,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
998 | return 0; | 964 | return 0; |
999 | } | 965 | } |
1000 | 966 | ||
1001 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
1002 | { | ||
1003 | } | ||
1004 | |||
1005 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | 967 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, |
1006 | struct kvm_translation *tr) | 968 | struct kvm_translation *tr) |
1007 | { | 969 | { |
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 465dfcb82c92..5bca220bbb60 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h | |||
@@ -53,17 +53,17 @@ | |||
53 | #define BOOKE_INTERRUPT_DEBUG 15 | 53 | #define BOOKE_INTERRUPT_DEBUG 15 |
54 | 54 | ||
55 | /* E500 */ | 55 | /* E500 */ |
56 | #define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32 | 56 | #ifdef CONFIG_SPE_POSSIBLE |
57 | #define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33 | 57 | #define BOOKE_INTERRUPT_SPE_UNAVAIL 32 |
58 | /* | 58 | #define BOOKE_INTERRUPT_SPE_FP_DATA 33 |
59 | * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines | ||
60 | */ | ||
61 | #define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL | ||
62 | #define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST | ||
63 | #define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL | ||
64 | #define BOOKE_INTERRUPT_ALTIVEC_ASSIST \ | ||
65 | BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST | ||
66 | #define BOOKE_INTERRUPT_SPE_FP_ROUND 34 | 59 | #define BOOKE_INTERRUPT_SPE_FP_ROUND 34 |
60 | #endif | ||
61 | |||
62 | #ifdef CONFIG_PPC_E500MC | ||
63 | #define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32 | ||
64 | #define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33 | ||
65 | #endif | ||
66 | |||
67 | #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 | 67 | #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 |
68 | #define BOOKE_INTERRUPT_DOORBELL 36 | 68 | #define BOOKE_INTERRUPT_DOORBELL 36 |
69 | #define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37 | 69 | #define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37 |
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index f7aa5cc395c4..3286f0d6a86c 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h | |||
@@ -23,15 +23,16 @@ | |||
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
25 | 25 | ||
26 | /* LPIDs we support with this build -- runtime limit may be lower */ | 26 | /* |
27 | * Number of available lpids. Only the low-order 6 bits of LPID rgister are | ||
28 | * implemented on e500mc+ cores. | ||
29 | */ | ||
27 | #define KVMPPC_NR_LPIDS 64 | 30 | #define KVMPPC_NR_LPIDS 64 |
28 | 31 | ||
29 | #define KVMPPC_INST_EHPRIV 0x7c00021c | 32 | #define KVMPPC_INST_EHPRIV 0x7c00021c |
30 | #define EHPRIV_OC_SHIFT 11 | 33 | #define EHPRIV_OC_SHIFT 11 |
31 | /* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */ | 34 | /* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */ |
32 | #define EHPRIV_OC_DEBUG 1 | 35 | #define EHPRIV_OC_DEBUG 1 |
33 | #define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \ | ||
34 | (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT)) | ||
35 | 36 | ||
36 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | 37 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) |
37 | { | 38 | { |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 98d9dd50d063..047855619cc4 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -53,14 +53,18 @@ | |||
53 | 53 | ||
54 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 54 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
55 | 55 | ||
56 | struct kvm; | ||
57 | extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 56 | extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
58 | extern int kvm_unmap_hva_range(struct kvm *kvm, | 57 | extern int kvm_unmap_hva_range(struct kvm *kvm, |
59 | unsigned long start, unsigned long end); | 58 | unsigned long start, unsigned long end); |
60 | extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 59 | extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); |
61 | extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 60 | extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
62 | extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 61 | extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
63 | 62 | ||
63 | static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | ||
64 | unsigned long address) | ||
65 | { | ||
66 | } | ||
67 | |||
64 | #define HPTEG_CACHE_NUM (1 << 15) | 68 | #define HPTEG_CACHE_NUM (1 << 15) |
65 | #define HPTEG_HASH_BITS_PTE 13 | 69 | #define HPTEG_HASH_BITS_PTE 13 |
66 | #define HPTEG_HASH_BITS_PTE_LONG 12 | 70 | #define HPTEG_HASH_BITS_PTE_LONG 12 |
@@ -76,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | |||
76 | /* Physical Address Mask - allowed range of real mode RAM access */ | 80 | /* Physical Address Mask - allowed range of real mode RAM access */ |
77 | #define KVM_PAM 0x0fffffffffffffffULL | 81 | #define KVM_PAM 0x0fffffffffffffffULL |
78 | 82 | ||
79 | struct kvm; | ||
80 | struct kvm_run; | ||
81 | struct kvm_vcpu; | ||
82 | |||
83 | struct lppaca; | 83 | struct lppaca; |
84 | struct slb_shadow; | 84 | struct slb_shadow; |
85 | struct dtl_entry; | 85 | struct dtl_entry; |
@@ -144,6 +144,7 @@ enum kvm_exit_types { | |||
144 | EMULATED_TLBWE_EXITS, | 144 | EMULATED_TLBWE_EXITS, |
145 | EMULATED_RFI_EXITS, | 145 | EMULATED_RFI_EXITS, |
146 | EMULATED_RFCI_EXITS, | 146 | EMULATED_RFCI_EXITS, |
147 | EMULATED_RFDI_EXITS, | ||
147 | DEC_EXITS, | 148 | DEC_EXITS, |
148 | EXT_INTR_EXITS, | 149 | EXT_INTR_EXITS, |
149 | HALT_WAKEUP, | 150 | HALT_WAKEUP, |
@@ -589,8 +590,6 @@ struct kvm_vcpu_arch { | |||
589 | u32 crit_save; | 590 | u32 crit_save; |
590 | /* guest debug registers*/ | 591 | /* guest debug registers*/ |
591 | struct debug_reg dbg_reg; | 592 | struct debug_reg dbg_reg; |
592 | /* hardware visible debug registers when in guest state */ | ||
593 | struct debug_reg shadow_dbg_reg; | ||
594 | #endif | 593 | #endif |
595 | gpa_t paddr_accessed; | 594 | gpa_t paddr_accessed; |
596 | gva_t vaddr_accessed; | 595 | gva_t vaddr_accessed; |
@@ -612,7 +611,6 @@ struct kvm_vcpu_arch { | |||
612 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ | 611 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ |
613 | 612 | ||
614 | struct hrtimer dec_timer; | 613 | struct hrtimer dec_timer; |
615 | struct tasklet_struct tasklet; | ||
616 | u64 dec_jiffies; | 614 | u64 dec_jiffies; |
617 | u64 dec_expires; | 615 | u64 dec_expires; |
618 | unsigned long pending_exceptions; | 616 | unsigned long pending_exceptions; |
@@ -687,4 +685,12 @@ struct kvm_vcpu_arch { | |||
687 | #define __KVM_HAVE_ARCH_WQP | 685 | #define __KVM_HAVE_ARCH_WQP |
688 | #define __KVM_HAVE_CREATE_DEVICE | 686 | #define __KVM_HAVE_CREATE_DEVICE |
689 | 687 | ||
688 | static inline void kvm_arch_hardware_disable(void) {} | ||
689 | static inline void kvm_arch_hardware_unsetup(void) {} | ||
690 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | ||
691 | static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} | ||
692 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | ||
693 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | ||
694 | static inline void kvm_arch_exit(void) {} | ||
695 | |||
690 | #endif /* __POWERPC_KVM_HOST_H__ */ | 696 | #endif /* __POWERPC_KVM_HOST_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fb86a2299d8a..a6dcdb6d13c1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -38,6 +38,12 @@ | |||
38 | #include <asm/paca.h> | 38 | #include <asm/paca.h> |
39 | #endif | 39 | #endif |
40 | 40 | ||
41 | /* | ||
42 | * KVMPPC_INST_SW_BREAKPOINT is debug Instruction | ||
43 | * for supporting software breakpoint. | ||
44 | */ | ||
45 | #define KVMPPC_INST_SW_BREAKPOINT 0x00dddd00 | ||
46 | |||
41 | enum emulation_result { | 47 | enum emulation_result { |
42 | EMULATE_DONE, /* no further processing */ | 48 | EMULATE_DONE, /* no further processing */ |
43 | EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ | 49 | EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ |
@@ -89,7 +95,7 @@ extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); | |||
89 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); | 95 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); |
90 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); | 96 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); |
91 | extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); | 97 | extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); |
92 | extern void kvmppc_decrementer_func(unsigned long data); | 98 | extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu); |
93 | extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); | 99 | extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); |
94 | extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); | 100 | extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); |
95 | extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); | 101 | extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); |
@@ -206,6 +212,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, | |||
206 | extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); | 212 | extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); |
207 | extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); | 213 | extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); |
208 | 214 | ||
215 | void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu); | ||
216 | void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu); | ||
217 | |||
209 | union kvmppc_one_reg { | 218 | union kvmppc_one_reg { |
210 | u32 wval; | 219 | u32 wval; |
211 | u64 dval; | 220 | u64 dval; |
@@ -243,7 +252,7 @@ struct kvmppc_ops { | |||
243 | int (*unmap_hva)(struct kvm *kvm, unsigned long hva); | 252 | int (*unmap_hva)(struct kvm *kvm, unsigned long hva); |
244 | int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, | 253 | int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, |
245 | unsigned long end); | 254 | unsigned long end); |
246 | int (*age_hva)(struct kvm *kvm, unsigned long hva); | 255 | int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); |
247 | int (*test_age_hva)(struct kvm *kvm, unsigned long hva); | 256 | int (*test_age_hva)(struct kvm *kvm, unsigned long hva); |
248 | void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); | 257 | void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); |
249 | void (*mmu_destroy)(struct kvm_vcpu *vcpu); | 258 | void (*mmu_destroy)(struct kvm_vcpu *vcpu); |
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 1d653308a33c..16547efa2d5a 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h | |||
@@ -319,6 +319,8 @@ | |||
319 | * DBSR bits which have conflicting definitions on true Book E versus IBM 40x. | 319 | * DBSR bits which have conflicting definitions on true Book E versus IBM 40x. |
320 | */ | 320 | */ |
321 | #ifdef CONFIG_BOOKE | 321 | #ifdef CONFIG_BOOKE |
322 | #define DBSR_IDE 0x80000000 /* Imprecise Debug Event */ | ||
323 | #define DBSR_MRR 0x30000000 /* Most Recent Reset */ | ||
322 | #define DBSR_IC 0x08000000 /* Instruction Completion */ | 324 | #define DBSR_IC 0x08000000 /* Instruction Completion */ |
323 | #define DBSR_BT 0x04000000 /* Branch Taken */ | 325 | #define DBSR_BT 0x04000000 /* Branch Taken */ |
324 | #define DBSR_IRPT 0x02000000 /* Exception Debug Event */ | 326 | #define DBSR_IRPT 0x02000000 /* Exception Debug Event */ |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index e0e49dbb145d..ab4d4732c492 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -476,6 +476,11 @@ struct kvm_get_htab_header { | |||
476 | 476 | ||
477 | /* FP and vector status/control registers */ | 477 | /* FP and vector status/control registers */ |
478 | #define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) | 478 | #define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) |
479 | /* | ||
480 | * VSCR register is documented as a 32-bit register in the ISA, but it can | ||
481 | * only be accesses via a vector register. Expose VSCR as a 32-bit register | ||
482 | * even though the kernel represents it as a 128-bit vector. | ||
483 | */ | ||
479 | #define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) | 484 | #define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) |
480 | 485 | ||
481 | /* Virtual processor areas */ | 486 | /* Virtual processor areas */ |
@@ -557,6 +562,7 @@ struct kvm_get_htab_header { | |||
557 | #define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) | 562 | #define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) |
558 | #define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9) | 563 | #define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9) |
559 | #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) | 564 | #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) |
565 | #define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) | ||
560 | 566 | ||
561 | /* Transactional Memory checkpointed state: | 567 | /* Transactional Memory checkpointed state: |
562 | * This is all GPRs, all VSX regs and a subset of SPRs | 568 | * This is all GPRs, all VSX regs and a subset of SPRs |
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 4f1393d20079..dddba3e94260 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S | |||
@@ -91,6 +91,7 @@ _GLOBAL(setup_altivec_idle) | |||
91 | 91 | ||
92 | blr | 92 | blr |
93 | 93 | ||
94 | #ifdef CONFIG_PPC_E500MC | ||
94 | _GLOBAL(__setup_cpu_e6500) | 95 | _GLOBAL(__setup_cpu_e6500) |
95 | mflr r6 | 96 | mflr r6 |
96 | #ifdef CONFIG_PPC64 | 97 | #ifdef CONFIG_PPC64 |
@@ -107,14 +108,20 @@ _GLOBAL(__setup_cpu_e6500) | |||
107 | bl __setup_cpu_e5500 | 108 | bl __setup_cpu_e5500 |
108 | mtlr r6 | 109 | mtlr r6 |
109 | blr | 110 | blr |
111 | #endif /* CONFIG_PPC_E500MC */ | ||
110 | 112 | ||
111 | #ifdef CONFIG_PPC32 | 113 | #ifdef CONFIG_PPC32 |
114 | #ifdef CONFIG_E200 | ||
112 | _GLOBAL(__setup_cpu_e200) | 115 | _GLOBAL(__setup_cpu_e200) |
113 | /* enable dedicated debug exception handling resources (Debug APU) */ | 116 | /* enable dedicated debug exception handling resources (Debug APU) */ |
114 | mfspr r3,SPRN_HID0 | 117 | mfspr r3,SPRN_HID0 |
115 | ori r3,r3,HID0_DAPUEN@l | 118 | ori r3,r3,HID0_DAPUEN@l |
116 | mtspr SPRN_HID0,r3 | 119 | mtspr SPRN_HID0,r3 |
117 | b __setup_e200_ivors | 120 | b __setup_e200_ivors |
121 | #endif /* CONFIG_E200 */ | ||
122 | |||
123 | #ifdef CONFIG_E500 | ||
124 | #ifndef CONFIG_PPC_E500MC | ||
118 | _GLOBAL(__setup_cpu_e500v1) | 125 | _GLOBAL(__setup_cpu_e500v1) |
119 | _GLOBAL(__setup_cpu_e500v2) | 126 | _GLOBAL(__setup_cpu_e500v2) |
120 | mflr r4 | 127 | mflr r4 |
@@ -129,6 +136,7 @@ _GLOBAL(__setup_cpu_e500v2) | |||
129 | #endif | 136 | #endif |
130 | mtlr r4 | 137 | mtlr r4 |
131 | blr | 138 | blr |
139 | #else /* CONFIG_PPC_E500MC */ | ||
132 | _GLOBAL(__setup_cpu_e500mc) | 140 | _GLOBAL(__setup_cpu_e500mc) |
133 | _GLOBAL(__setup_cpu_e5500) | 141 | _GLOBAL(__setup_cpu_e5500) |
134 | mflr r5 | 142 | mflr r5 |
@@ -159,7 +167,9 @@ _GLOBAL(__setup_cpu_e5500) | |||
159 | 2: | 167 | 2: |
160 | mtlr r5 | 168 | mtlr r5 |
161 | blr | 169 | blr |
162 | #endif | 170 | #endif /* CONFIG_PPC_E500MC */ |
171 | #endif /* CONFIG_E500 */ | ||
172 | #endif /* CONFIG_PPC32 */ | ||
163 | 173 | ||
164 | #ifdef CONFIG_PPC_BOOK3E_64 | 174 | #ifdef CONFIG_PPC_BOOK3E_64 |
165 | _GLOBAL(__restore_cpu_e6500) | 175 | _GLOBAL(__restore_cpu_e6500) |
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 9b6dcaaec1a3..808405906336 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c | |||
@@ -1961,6 +1961,7 @@ static struct cpu_spec __initdata cpu_specs[] = { | |||
1961 | #endif /* CONFIG_PPC32 */ | 1961 | #endif /* CONFIG_PPC32 */ |
1962 | #ifdef CONFIG_E500 | 1962 | #ifdef CONFIG_E500 |
1963 | #ifdef CONFIG_PPC32 | 1963 | #ifdef CONFIG_PPC32 |
1964 | #ifndef CONFIG_PPC_E500MC | ||
1964 | { /* e500 */ | 1965 | { /* e500 */ |
1965 | .pvr_mask = 0xffff0000, | 1966 | .pvr_mask = 0xffff0000, |
1966 | .pvr_value = 0x80200000, | 1967 | .pvr_value = 0x80200000, |
@@ -2000,6 +2001,7 @@ static struct cpu_spec __initdata cpu_specs[] = { | |||
2000 | .machine_check = machine_check_e500, | 2001 | .machine_check = machine_check_e500, |
2001 | .platform = "ppc8548", | 2002 | .platform = "ppc8548", |
2002 | }, | 2003 | }, |
2004 | #else | ||
2003 | { /* e500mc */ | 2005 | { /* e500mc */ |
2004 | .pvr_mask = 0xffff0000, | 2006 | .pvr_mask = 0xffff0000, |
2005 | .pvr_value = 0x80230000, | 2007 | .pvr_value = 0x80230000, |
@@ -2018,7 +2020,9 @@ static struct cpu_spec __initdata cpu_specs[] = { | |||
2018 | .machine_check = machine_check_e500mc, | 2020 | .machine_check = machine_check_e500mc, |
2019 | .platform = "ppce500mc", | 2021 | .platform = "ppce500mc", |
2020 | }, | 2022 | }, |
2023 | #endif /* CONFIG_PPC_E500MC */ | ||
2021 | #endif /* CONFIG_PPC32 */ | 2024 | #endif /* CONFIG_PPC32 */ |
2025 | #ifdef CONFIG_PPC_E500MC | ||
2022 | { /* e5500 */ | 2026 | { /* e5500 */ |
2023 | .pvr_mask = 0xffff0000, | 2027 | .pvr_mask = 0xffff0000, |
2024 | .pvr_value = 0x80240000, | 2028 | .pvr_value = 0x80240000, |
@@ -2062,6 +2066,7 @@ static struct cpu_spec __initdata cpu_specs[] = { | |||
2062 | .machine_check = machine_check_e500mc, | 2066 | .machine_check = machine_check_e500mc, |
2063 | .platform = "ppce6500", | 2067 | .platform = "ppce6500", |
2064 | }, | 2068 | }, |
2069 | #endif /* CONFIG_PPC_E500MC */ | ||
2065 | #ifdef CONFIG_PPC32 | 2070 | #ifdef CONFIG_PPC32 |
2066 | { /* default match */ | 2071 | { /* default match */ |
2067 | .pvr_mask = 0x00000000, | 2072 | .pvr_mask = 0x00000000, |
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index bb9cac6c8051..3e68d1c69718 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S | |||
@@ -635,7 +635,7 @@ interrupt_end_book3e: | |||
635 | 635 | ||
636 | /* Altivec Unavailable Interrupt */ | 636 | /* Altivec Unavailable Interrupt */ |
637 | START_EXCEPTION(altivec_unavailable); | 637 | START_EXCEPTION(altivec_unavailable); |
638 | NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL, | 638 | NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, |
639 | PROLOG_ADDITION_NONE) | 639 | PROLOG_ADDITION_NONE) |
640 | /* we can probably do a shorter exception entry for that one... */ | 640 | /* we can probably do a shorter exception entry for that one... */ |
641 | EXCEPTION_COMMON(0x200) | 641 | EXCEPTION_COMMON(0x200) |
@@ -658,7 +658,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) | |||
658 | /* AltiVec Assist */ | 658 | /* AltiVec Assist */ |
659 | START_EXCEPTION(altivec_assist); | 659 | START_EXCEPTION(altivec_assist); |
660 | NORMAL_EXCEPTION_PROLOG(0x220, | 660 | NORMAL_EXCEPTION_PROLOG(0x220, |
661 | BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST, | 661 | BOOKE_INTERRUPT_ALTIVEC_ASSIST, |
662 | PROLOG_ADDITION_NONE) | 662 | PROLOG_ADDITION_NONE) |
663 | EXCEPTION_COMMON(0x220) | 663 | EXCEPTION_COMMON(0x220) |
664 | INTS_DISABLE | 664 | INTS_DISABLE |
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index b497188a94a1..fffd1f96bb1d 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S | |||
@@ -613,34 +613,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) | |||
613 | mfspr r10, SPRN_SPRG_RSCRATCH0 | 613 | mfspr r10, SPRN_SPRG_RSCRATCH0 |
614 | b InstructionStorage | 614 | b InstructionStorage |
615 | 615 | ||
616 | /* Define SPE handlers for e200 and e500v2 */ | ||
616 | #ifdef CONFIG_SPE | 617 | #ifdef CONFIG_SPE |
617 | /* SPE Unavailable */ | 618 | /* SPE Unavailable */ |
618 | START_EXCEPTION(SPEUnavailable) | 619 | START_EXCEPTION(SPEUnavailable) |
619 | NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL) | 620 | NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) |
620 | beq 1f | 621 | beq 1f |
621 | bl load_up_spe | 622 | bl load_up_spe |
622 | b fast_exception_return | 623 | b fast_exception_return |
623 | 1: addi r3,r1,STACK_FRAME_OVERHEAD | 624 | 1: addi r3,r1,STACK_FRAME_OVERHEAD |
624 | EXC_XFER_EE_LITE(0x2010, KernelSPE) | 625 | EXC_XFER_EE_LITE(0x2010, KernelSPE) |
625 | #else | 626 | #elif defined(CONFIG_SPE_POSSIBLE) |
626 | EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \ | 627 | EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ |
627 | unknown_exception, EXC_XFER_EE) | 628 | unknown_exception, EXC_XFER_EE) |
628 | #endif /* CONFIG_SPE */ | 629 | #endif /* CONFIG_SPE_POSSIBLE */ |
629 | 630 | ||
630 | /* SPE Floating Point Data */ | 631 | /* SPE Floating Point Data */ |
631 | #ifdef CONFIG_SPE | 632 | #ifdef CONFIG_SPE |
632 | EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, | 633 | EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, |
633 | SPEFloatingPointException, EXC_XFER_EE) | 634 | SPEFloatingPointException, EXC_XFER_EE) |
634 | 635 | ||
635 | /* SPE Floating Point Round */ | 636 | /* SPE Floating Point Round */ |
636 | EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ | 637 | EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ |
637 | SPEFloatingPointRoundException, EXC_XFER_EE) | 638 | SPEFloatingPointRoundException, EXC_XFER_EE) |
638 | #else | 639 | #elif defined(CONFIG_SPE_POSSIBLE) |
639 | EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, | 640 | EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, |
640 | unknown_exception, EXC_XFER_EE) | 641 | unknown_exception, EXC_XFER_EE) |
641 | EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ | 642 | EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ |
642 | unknown_exception, EXC_XFER_EE) | 643 | unknown_exception, EXC_XFER_EE) |
643 | #endif /* CONFIG_SPE */ | 644 | #endif /* CONFIG_SPE_POSSIBLE */ |
645 | |||
644 | 646 | ||
645 | /* Performance Monitor */ | 647 | /* Performance Monitor */ |
646 | EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ | 648 | EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ |
@@ -947,6 +949,7 @@ get_phys_addr: | |||
947 | * Global functions | 949 | * Global functions |
948 | */ | 950 | */ |
949 | 951 | ||
952 | #ifdef CONFIG_E200 | ||
950 | /* Adjust or setup IVORs for e200 */ | 953 | /* Adjust or setup IVORs for e200 */ |
951 | _GLOBAL(__setup_e200_ivors) | 954 | _GLOBAL(__setup_e200_ivors) |
952 | li r3,DebugDebug@l | 955 | li r3,DebugDebug@l |
@@ -959,7 +962,10 @@ _GLOBAL(__setup_e200_ivors) | |||
959 | mtspr SPRN_IVOR34,r3 | 962 | mtspr SPRN_IVOR34,r3 |
960 | sync | 963 | sync |
961 | blr | 964 | blr |
965 | #endif | ||
962 | 966 | ||
967 | #ifdef CONFIG_E500 | ||
968 | #ifndef CONFIG_PPC_E500MC | ||
963 | /* Adjust or setup IVORs for e500v1/v2 */ | 969 | /* Adjust or setup IVORs for e500v1/v2 */ |
964 | _GLOBAL(__setup_e500_ivors) | 970 | _GLOBAL(__setup_e500_ivors) |
965 | li r3,DebugCrit@l | 971 | li r3,DebugCrit@l |
@@ -974,7 +980,7 @@ _GLOBAL(__setup_e500_ivors) | |||
974 | mtspr SPRN_IVOR35,r3 | 980 | mtspr SPRN_IVOR35,r3 |
975 | sync | 981 | sync |
976 | blr | 982 | blr |
977 | 983 | #else | |
978 | /* Adjust or setup IVORs for e500mc */ | 984 | /* Adjust or setup IVORs for e500mc */ |
979 | _GLOBAL(__setup_e500mc_ivors) | 985 | _GLOBAL(__setup_e500mc_ivors) |
980 | li r3,DebugDebug@l | 986 | li r3,DebugDebug@l |
@@ -1000,6 +1006,8 @@ _GLOBAL(__setup_ehv_ivors) | |||
1000 | mtspr SPRN_IVOR41,r3 | 1006 | mtspr SPRN_IVOR41,r3 |
1001 | sync | 1007 | sync |
1002 | blr | 1008 | blr |
1009 | #endif /* CONFIG_PPC_E500MC */ | ||
1010 | #endif /* CONFIG_E500 */ | ||
1003 | 1011 | ||
1004 | #ifdef CONFIG_SPE | 1012 | #ifdef CONFIG_SPE |
1005 | /* | 1013 | /* |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index dd03f6b299ba..b32db4b95361 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -535,174 +535,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
535 | return -ENOTSUPP; | 535 | return -ENOTSUPP; |
536 | } | 536 | } |
537 | 537 | ||
538 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | 538 | int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, |
539 | union kvmppc_one_reg *val) | ||
539 | { | 540 | { |
540 | int r; | 541 | int r = 0; |
541 | union kvmppc_one_reg val; | ||
542 | int size; | ||
543 | long int i; | 542 | long int i; |
544 | 543 | ||
545 | size = one_reg_size(reg->id); | 544 | r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val); |
546 | if (size > sizeof(val)) | ||
547 | return -EINVAL; | ||
548 | |||
549 | r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); | ||
550 | if (r == -EINVAL) { | 545 | if (r == -EINVAL) { |
551 | r = 0; | 546 | r = 0; |
552 | switch (reg->id) { | 547 | switch (id) { |
553 | case KVM_REG_PPC_DAR: | 548 | case KVM_REG_PPC_DAR: |
554 | val = get_reg_val(reg->id, kvmppc_get_dar(vcpu)); | 549 | *val = get_reg_val(id, kvmppc_get_dar(vcpu)); |
555 | break; | 550 | break; |
556 | case KVM_REG_PPC_DSISR: | 551 | case KVM_REG_PPC_DSISR: |
557 | val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu)); | 552 | *val = get_reg_val(id, kvmppc_get_dsisr(vcpu)); |
558 | break; | 553 | break; |
559 | case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: | 554 | case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: |
560 | i = reg->id - KVM_REG_PPC_FPR0; | 555 | i = id - KVM_REG_PPC_FPR0; |
561 | val = get_reg_val(reg->id, VCPU_FPR(vcpu, i)); | 556 | *val = get_reg_val(id, VCPU_FPR(vcpu, i)); |
562 | break; | 557 | break; |
563 | case KVM_REG_PPC_FPSCR: | 558 | case KVM_REG_PPC_FPSCR: |
564 | val = get_reg_val(reg->id, vcpu->arch.fp.fpscr); | 559 | *val = get_reg_val(id, vcpu->arch.fp.fpscr); |
565 | break; | ||
566 | #ifdef CONFIG_ALTIVEC | ||
567 | case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: | ||
568 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
569 | r = -ENXIO; | ||
570 | break; | ||
571 | } | ||
572 | val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; | ||
573 | break; | ||
574 | case KVM_REG_PPC_VSCR: | ||
575 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
576 | r = -ENXIO; | ||
577 | break; | ||
578 | } | ||
579 | val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); | ||
580 | break; | 560 | break; |
581 | case KVM_REG_PPC_VRSAVE: | ||
582 | val = get_reg_val(reg->id, vcpu->arch.vrsave); | ||
583 | break; | ||
584 | #endif /* CONFIG_ALTIVEC */ | ||
585 | #ifdef CONFIG_VSX | 561 | #ifdef CONFIG_VSX |
586 | case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: | 562 | case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: |
587 | if (cpu_has_feature(CPU_FTR_VSX)) { | 563 | if (cpu_has_feature(CPU_FTR_VSX)) { |
588 | long int i = reg->id - KVM_REG_PPC_VSR0; | 564 | i = id - KVM_REG_PPC_VSR0; |
589 | val.vsxval[0] = vcpu->arch.fp.fpr[i][0]; | 565 | val->vsxval[0] = vcpu->arch.fp.fpr[i][0]; |
590 | val.vsxval[1] = vcpu->arch.fp.fpr[i][1]; | 566 | val->vsxval[1] = vcpu->arch.fp.fpr[i][1]; |
591 | } else { | 567 | } else { |
592 | r = -ENXIO; | 568 | r = -ENXIO; |
593 | } | 569 | } |
594 | break; | 570 | break; |
595 | #endif /* CONFIG_VSX */ | 571 | #endif /* CONFIG_VSX */ |
596 | case KVM_REG_PPC_DEBUG_INST: { | 572 | case KVM_REG_PPC_DEBUG_INST: |
597 | u32 opcode = INS_TW; | 573 | *val = get_reg_val(id, INS_TW); |
598 | r = copy_to_user((u32 __user *)(long)reg->addr, | ||
599 | &opcode, sizeof(u32)); | ||
600 | break; | 574 | break; |
601 | } | ||
602 | #ifdef CONFIG_KVM_XICS | 575 | #ifdef CONFIG_KVM_XICS |
603 | case KVM_REG_PPC_ICP_STATE: | 576 | case KVM_REG_PPC_ICP_STATE: |
604 | if (!vcpu->arch.icp) { | 577 | if (!vcpu->arch.icp) { |
605 | r = -ENXIO; | 578 | r = -ENXIO; |
606 | break; | 579 | break; |
607 | } | 580 | } |
608 | val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); | 581 | *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); |
609 | break; | 582 | break; |
610 | #endif /* CONFIG_KVM_XICS */ | 583 | #endif /* CONFIG_KVM_XICS */ |
611 | case KVM_REG_PPC_FSCR: | 584 | case KVM_REG_PPC_FSCR: |
612 | val = get_reg_val(reg->id, vcpu->arch.fscr); | 585 | *val = get_reg_val(id, vcpu->arch.fscr); |
613 | break; | 586 | break; |
614 | case KVM_REG_PPC_TAR: | 587 | case KVM_REG_PPC_TAR: |
615 | val = get_reg_val(reg->id, vcpu->arch.tar); | 588 | *val = get_reg_val(id, vcpu->arch.tar); |
616 | break; | 589 | break; |
617 | case KVM_REG_PPC_EBBHR: | 590 | case KVM_REG_PPC_EBBHR: |
618 | val = get_reg_val(reg->id, vcpu->arch.ebbhr); | 591 | *val = get_reg_val(id, vcpu->arch.ebbhr); |
619 | break; | 592 | break; |
620 | case KVM_REG_PPC_EBBRR: | 593 | case KVM_REG_PPC_EBBRR: |
621 | val = get_reg_val(reg->id, vcpu->arch.ebbrr); | 594 | *val = get_reg_val(id, vcpu->arch.ebbrr); |
622 | break; | 595 | break; |
623 | case KVM_REG_PPC_BESCR: | 596 | case KVM_REG_PPC_BESCR: |
624 | val = get_reg_val(reg->id, vcpu->arch.bescr); | 597 | *val = get_reg_val(id, vcpu->arch.bescr); |
625 | break; | 598 | break; |
626 | case KVM_REG_PPC_VTB: | 599 | case KVM_REG_PPC_VTB: |
627 | val = get_reg_val(reg->id, vcpu->arch.vtb); | 600 | *val = get_reg_val(id, vcpu->arch.vtb); |
628 | break; | 601 | break; |
629 | case KVM_REG_PPC_IC: | 602 | case KVM_REG_PPC_IC: |
630 | val = get_reg_val(reg->id, vcpu->arch.ic); | 603 | *val = get_reg_val(id, vcpu->arch.ic); |
631 | break; | 604 | break; |
632 | default: | 605 | default: |
633 | r = -EINVAL; | 606 | r = -EINVAL; |
634 | break; | 607 | break; |
635 | } | 608 | } |
636 | } | 609 | } |
637 | if (r) | ||
638 | return r; | ||
639 | |||
640 | if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) | ||
641 | r = -EFAULT; | ||
642 | 610 | ||
643 | return r; | 611 | return r; |
644 | } | 612 | } |
645 | 613 | ||
646 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | 614 | int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, |
615 | union kvmppc_one_reg *val) | ||
647 | { | 616 | { |
648 | int r; | 617 | int r = 0; |
649 | union kvmppc_one_reg val; | ||
650 | int size; | ||
651 | long int i; | 618 | long int i; |
652 | 619 | ||
653 | size = one_reg_size(reg->id); | 620 | r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val); |
654 | if (size > sizeof(val)) | ||
655 | return -EINVAL; | ||
656 | |||
657 | if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) | ||
658 | return -EFAULT; | ||
659 | |||
660 | r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); | ||
661 | if (r == -EINVAL) { | 621 | if (r == -EINVAL) { |
662 | r = 0; | 622 | r = 0; |
663 | switch (reg->id) { | 623 | switch (id) { |
664 | case KVM_REG_PPC_DAR: | 624 | case KVM_REG_PPC_DAR: |
665 | kvmppc_set_dar(vcpu, set_reg_val(reg->id, val)); | 625 | kvmppc_set_dar(vcpu, set_reg_val(id, *val)); |
666 | break; | 626 | break; |
667 | case KVM_REG_PPC_DSISR: | 627 | case KVM_REG_PPC_DSISR: |
668 | kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val)); | 628 | kvmppc_set_dsisr(vcpu, set_reg_val(id, *val)); |
669 | break; | 629 | break; |
670 | case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: | 630 | case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: |
671 | i = reg->id - KVM_REG_PPC_FPR0; | 631 | i = id - KVM_REG_PPC_FPR0; |
672 | VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val); | 632 | VCPU_FPR(vcpu, i) = set_reg_val(id, *val); |
673 | break; | 633 | break; |
674 | case KVM_REG_PPC_FPSCR: | 634 | case KVM_REG_PPC_FPSCR: |
675 | vcpu->arch.fp.fpscr = set_reg_val(reg->id, val); | 635 | vcpu->arch.fp.fpscr = set_reg_val(id, *val); |
676 | break; | ||
677 | #ifdef CONFIG_ALTIVEC | ||
678 | case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: | ||
679 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
680 | r = -ENXIO; | ||
681 | break; | ||
682 | } | ||
683 | vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; | ||
684 | break; | ||
685 | case KVM_REG_PPC_VSCR: | ||
686 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
687 | r = -ENXIO; | ||
688 | break; | ||
689 | } | ||
690 | vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); | ||
691 | break; | ||
692 | case KVM_REG_PPC_VRSAVE: | ||
693 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
694 | r = -ENXIO; | ||
695 | break; | ||
696 | } | ||
697 | vcpu->arch.vrsave = set_reg_val(reg->id, val); | ||
698 | break; | 636 | break; |
699 | #endif /* CONFIG_ALTIVEC */ | ||
700 | #ifdef CONFIG_VSX | 637 | #ifdef CONFIG_VSX |
701 | case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: | 638 | case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: |
702 | if (cpu_has_feature(CPU_FTR_VSX)) { | 639 | if (cpu_has_feature(CPU_FTR_VSX)) { |
703 | long int i = reg->id - KVM_REG_PPC_VSR0; | 640 | i = id - KVM_REG_PPC_VSR0; |
704 | vcpu->arch.fp.fpr[i][0] = val.vsxval[0]; | 641 | vcpu->arch.fp.fpr[i][0] = val->vsxval[0]; |
705 | vcpu->arch.fp.fpr[i][1] = val.vsxval[1]; | 642 | vcpu->arch.fp.fpr[i][1] = val->vsxval[1]; |
706 | } else { | 643 | } else { |
707 | r = -ENXIO; | 644 | r = -ENXIO; |
708 | } | 645 | } |
@@ -715,29 +652,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | |||
715 | break; | 652 | break; |
716 | } | 653 | } |
717 | r = kvmppc_xics_set_icp(vcpu, | 654 | r = kvmppc_xics_set_icp(vcpu, |
718 | set_reg_val(reg->id, val)); | 655 | set_reg_val(id, *val)); |
719 | break; | 656 | break; |
720 | #endif /* CONFIG_KVM_XICS */ | 657 | #endif /* CONFIG_KVM_XICS */ |
721 | case KVM_REG_PPC_FSCR: | 658 | case KVM_REG_PPC_FSCR: |
722 | vcpu->arch.fscr = set_reg_val(reg->id, val); | 659 | vcpu->arch.fscr = set_reg_val(id, *val); |
723 | break; | 660 | break; |
724 | case KVM_REG_PPC_TAR: | 661 | case KVM_REG_PPC_TAR: |
725 | vcpu->arch.tar = set_reg_val(reg->id, val); | 662 | vcpu->arch.tar = set_reg_val(id, *val); |
726 | break; | 663 | break; |
727 | case KVM_REG_PPC_EBBHR: | 664 | case KVM_REG_PPC_EBBHR: |
728 | vcpu->arch.ebbhr = set_reg_val(reg->id, val); | 665 | vcpu->arch.ebbhr = set_reg_val(id, *val); |
729 | break; | 666 | break; |
730 | case KVM_REG_PPC_EBBRR: | 667 | case KVM_REG_PPC_EBBRR: |
731 | vcpu->arch.ebbrr = set_reg_val(reg->id, val); | 668 | vcpu->arch.ebbrr = set_reg_val(id, *val); |
732 | break; | 669 | break; |
733 | case KVM_REG_PPC_BESCR: | 670 | case KVM_REG_PPC_BESCR: |
734 | vcpu->arch.bescr = set_reg_val(reg->id, val); | 671 | vcpu->arch.bescr = set_reg_val(id, *val); |
735 | break; | 672 | break; |
736 | case KVM_REG_PPC_VTB: | 673 | case KVM_REG_PPC_VTB: |
737 | vcpu->arch.vtb = set_reg_val(reg->id, val); | 674 | vcpu->arch.vtb = set_reg_val(id, *val); |
738 | break; | 675 | break; |
739 | case KVM_REG_PPC_IC: | 676 | case KVM_REG_PPC_IC: |
740 | vcpu->arch.ic = set_reg_val(reg->id, val); | 677 | vcpu->arch.ic = set_reg_val(id, *val); |
741 | break; | 678 | break; |
742 | default: | 679 | default: |
743 | r = -EINVAL; | 680 | r = -EINVAL; |
@@ -778,13 +715,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
778 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | 715 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, |
779 | struct kvm_guest_debug *dbg) | 716 | struct kvm_guest_debug *dbg) |
780 | { | 717 | { |
781 | return -EINVAL; | 718 | vcpu->guest_debug = dbg->control; |
719 | return 0; | ||
782 | } | 720 | } |
783 | 721 | ||
784 | void kvmppc_decrementer_func(unsigned long data) | 722 | void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) |
785 | { | 723 | { |
786 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | ||
787 | |||
788 | kvmppc_core_queue_dec(vcpu); | 724 | kvmppc_core_queue_dec(vcpu); |
789 | kvm_vcpu_kick(vcpu); | 725 | kvm_vcpu_kick(vcpu); |
790 | } | 726 | } |
@@ -851,9 +787,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | |||
851 | return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); | 787 | return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); |
852 | } | 788 | } |
853 | 789 | ||
854 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 790 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) |
855 | { | 791 | { |
856 | return kvm->arch.kvm_ops->age_hva(kvm, hva); | 792 | return kvm->arch.kvm_ops->age_hva(kvm, start, end); |
857 | } | 793 | } |
858 | 794 | ||
859 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | 795 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) |
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 4bf956cf94d6..d2b3ec088b8c 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h | |||
@@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, | |||
17 | extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); | 17 | extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); |
18 | extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, | 18 | extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, |
19 | unsigned long end); | 19 | unsigned long end); |
20 | extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); | 20 | extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, |
21 | unsigned long end); | ||
21 | extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); | 22 | extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); |
22 | extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); | 23 | extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); |
23 | 24 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 79294c4c5015..d40770248b6a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1002 | return ret; | 1002 | return ret; |
1003 | } | 1003 | } |
1004 | 1004 | ||
1005 | int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) | 1005 | int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) |
1006 | { | 1006 | { |
1007 | if (!kvm->arch.using_mmu_notifiers) | 1007 | if (!kvm->arch.using_mmu_notifiers) |
1008 | return 0; | 1008 | return 0; |
1009 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | 1009 | return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); |
1010 | } | 1010 | } |
1011 | 1011 | ||
1012 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1012 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 27cced9c7249..e63587d30b70 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -725,6 +725,30 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) | |||
725 | return kvmppc_hcall_impl_hv_realmode(cmd); | 725 | return kvmppc_hcall_impl_hv_realmode(cmd); |
726 | } | 726 | } |
727 | 727 | ||
728 | static int kvmppc_emulate_debug_inst(struct kvm_run *run, | ||
729 | struct kvm_vcpu *vcpu) | ||
730 | { | ||
731 | u32 last_inst; | ||
732 | |||
733 | if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != | ||
734 | EMULATE_DONE) { | ||
735 | /* | ||
736 | * Fetch failed, so return to guest and | ||
737 | * try executing it again. | ||
738 | */ | ||
739 | return RESUME_GUEST; | ||
740 | } | ||
741 | |||
742 | if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { | ||
743 | run->exit_reason = KVM_EXIT_DEBUG; | ||
744 | run->debug.arch.address = kvmppc_get_pc(vcpu); | ||
745 | return RESUME_HOST; | ||
746 | } else { | ||
747 | kvmppc_core_queue_program(vcpu, SRR1_PROGILL); | ||
748 | return RESUME_GUEST; | ||
749 | } | ||
750 | } | ||
751 | |||
728 | static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, | 752 | static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, |
729 | struct task_struct *tsk) | 753 | struct task_struct *tsk) |
730 | { | 754 | { |
@@ -807,12 +831,18 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
807 | break; | 831 | break; |
808 | /* | 832 | /* |
809 | * This occurs if the guest executes an illegal instruction. | 833 | * This occurs if the guest executes an illegal instruction. |
810 | * We just generate a program interrupt to the guest, since | 834 | * If the guest debug is disabled, generate a program interrupt |
811 | * we don't emulate any guest instructions at this stage. | 835 | * to the guest. If guest debug is enabled, we need to check |
836 | * whether the instruction is a software breakpoint instruction. | ||
837 | * Accordingly return to Guest or Host. | ||
812 | */ | 838 | */ |
813 | case BOOK3S_INTERRUPT_H_EMUL_ASSIST: | 839 | case BOOK3S_INTERRUPT_H_EMUL_ASSIST: |
814 | kvmppc_core_queue_program(vcpu, SRR1_PROGILL); | 840 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { |
815 | r = RESUME_GUEST; | 841 | r = kvmppc_emulate_debug_inst(run, vcpu); |
842 | } else { | ||
843 | kvmppc_core_queue_program(vcpu, SRR1_PROGILL); | ||
844 | r = RESUME_GUEST; | ||
845 | } | ||
816 | break; | 846 | break; |
817 | /* | 847 | /* |
818 | * This occurs if the guest (kernel or userspace), does something that | 848 | * This occurs if the guest (kernel or userspace), does something that |
@@ -856,7 +886,9 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, | |||
856 | { | 886 | { |
857 | int i, j; | 887 | int i, j; |
858 | 888 | ||
859 | kvmppc_set_pvr_hv(vcpu, sregs->pvr); | 889 | /* Only accept the same PVR as the host's, since we can't spoof it */ |
890 | if (sregs->pvr != vcpu->arch.pvr) | ||
891 | return -EINVAL; | ||
860 | 892 | ||
861 | j = 0; | 893 | j = 0; |
862 | for (i = 0; i < vcpu->arch.slb_nr; i++) { | 894 | for (i = 0; i < vcpu->arch.slb_nr; i++) { |
@@ -922,6 +954,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, | |||
922 | long int i; | 954 | long int i; |
923 | 955 | ||
924 | switch (id) { | 956 | switch (id) { |
957 | case KVM_REG_PPC_DEBUG_INST: | ||
958 | *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); | ||
959 | break; | ||
925 | case KVM_REG_PPC_HIOR: | 960 | case KVM_REG_PPC_HIOR: |
926 | *val = get_reg_val(id, 0); | 961 | *val = get_reg_val(id, 0); |
927 | break; | 962 | break; |
@@ -1489,7 +1524,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, | |||
1489 | static int kvmppc_grab_hwthread(int cpu) | 1524 | static int kvmppc_grab_hwthread(int cpu) |
1490 | { | 1525 | { |
1491 | struct paca_struct *tpaca; | 1526 | struct paca_struct *tpaca; |
1492 | long timeout = 1000; | 1527 | long timeout = 10000; |
1493 | 1528 | ||
1494 | tpaca = &paca[cpu]; | 1529 | tpaca = &paca[cpu]; |
1495 | 1530 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index b9615ba5b083..4fdc27c80f4c 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -163,6 +163,12 @@ void __init kvm_cma_reserve(void) | |||
163 | unsigned long align_size; | 163 | unsigned long align_size; |
164 | struct memblock_region *reg; | 164 | struct memblock_region *reg; |
165 | phys_addr_t selected_size = 0; | 165 | phys_addr_t selected_size = 0; |
166 | |||
167 | /* | ||
168 | * We need CMA reservation only when we are in HV mode | ||
169 | */ | ||
170 | if (!cpu_has_feature(CPU_FTR_HVMODE)) | ||
171 | return; | ||
166 | /* | 172 | /* |
167 | * We cannot use memblock_phys_mem_size() here, because | 173 | * We cannot use memblock_phys_mem_size() here, because |
168 | * memblock_analyze() has not been called yet. | 174 | * memblock_analyze() has not been called yet. |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f0c4db7704c3..edb2ccdbb2ba 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -355,6 +355,7 @@ kvmppc_hv_entry: | |||
355 | * MSR = ~IR|DR | 355 | * MSR = ~IR|DR |
356 | * R13 = PACA | 356 | * R13 = PACA |
357 | * R1 = host R1 | 357 | * R1 = host R1 |
358 | * R2 = TOC | ||
358 | * all other volatile GPRS = free | 359 | * all other volatile GPRS = free |
359 | */ | 360 | */ |
360 | mflr r0 | 361 | mflr r0 |
@@ -503,7 +504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
503 | toc_tlbie_lock: | 504 | toc_tlbie_lock: |
504 | .tc native_tlbie_lock[TC],native_tlbie_lock | 505 | .tc native_tlbie_lock[TC],native_tlbie_lock |
505 | .previous | 506 | .previous |
506 | ld r3,toc_tlbie_lock@toc(2) | 507 | ld r3,toc_tlbie_lock@toc(r2) |
507 | #ifdef __BIG_ENDIAN__ | 508 | #ifdef __BIG_ENDIAN__ |
508 | lwz r8,PACA_LOCK_TOKEN(r13) | 509 | lwz r8,PACA_LOCK_TOKEN(r13) |
509 | #else | 510 | #else |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index faffb27badd9..cf2eb16846d1 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, | |||
295 | return 0; | 295 | return 0; |
296 | } | 296 | } |
297 | 297 | ||
298 | static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) | 298 | static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start, |
299 | unsigned long end) | ||
299 | { | 300 | { |
300 | /* XXX could be more clever ;) */ | 301 | /* XXX could be more clever ;) */ |
301 | return 0; | 302 | return 0; |
@@ -1319,6 +1320,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, | |||
1319 | int r = 0; | 1320 | int r = 0; |
1320 | 1321 | ||
1321 | switch (id) { | 1322 | switch (id) { |
1323 | case KVM_REG_PPC_DEBUG_INST: | ||
1324 | *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); | ||
1325 | break; | ||
1322 | case KVM_REG_PPC_HIOR: | 1326 | case KVM_REG_PPC_HIOR: |
1323 | *val = get_reg_val(id, to_book3s(vcpu)->hior); | 1327 | *val = get_reg_val(id, to_book3s(vcpu)->hior); |
1324 | break; | 1328 | break; |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index b4c89fa6f109..9b55dec2d6cc 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -124,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) | |||
124 | } | 124 | } |
125 | #endif | 125 | #endif |
126 | 126 | ||
127 | /* | ||
128 | * Load up guest vcpu FP state if it's needed. | ||
129 | * It also set the MSR_FP in thread so that host know | ||
130 | * we're holding FPU, and then host can help to save | ||
131 | * guest vcpu FP state if other threads require to use FPU. | ||
132 | * This simulates an FP unavailable fault. | ||
133 | * | ||
134 | * It requires to be called with preemption disabled. | ||
135 | */ | ||
136 | static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) | ||
137 | { | ||
138 | #ifdef CONFIG_PPC_FPU | ||
139 | if (!(current->thread.regs->msr & MSR_FP)) { | ||
140 | enable_kernel_fp(); | ||
141 | load_fp_state(&vcpu->arch.fp); | ||
142 | current->thread.fp_save_area = &vcpu->arch.fp; | ||
143 | current->thread.regs->msr |= MSR_FP; | ||
144 | } | ||
145 | #endif | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Save guest vcpu FP state into thread. | ||
150 | * It requires to be called with preemption disabled. | ||
151 | */ | ||
152 | static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) | ||
153 | { | ||
154 | #ifdef CONFIG_PPC_FPU | ||
155 | if (current->thread.regs->msr & MSR_FP) | ||
156 | giveup_fpu(current); | ||
157 | current->thread.fp_save_area = NULL; | ||
158 | #endif | ||
159 | } | ||
160 | |||
127 | static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) | 161 | static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) |
128 | { | 162 | { |
129 | #if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV) | 163 | #if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV) |
@@ -134,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) | |||
134 | #endif | 168 | #endif |
135 | } | 169 | } |
136 | 170 | ||
171 | /* | ||
172 | * Simulate AltiVec unavailable fault to load guest state | ||
173 | * from thread to AltiVec unit. | ||
174 | * It requires to be called with preemption disabled. | ||
175 | */ | ||
176 | static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu) | ||
177 | { | ||
178 | #ifdef CONFIG_ALTIVEC | ||
179 | if (cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
180 | if (!(current->thread.regs->msr & MSR_VEC)) { | ||
181 | enable_kernel_altivec(); | ||
182 | load_vr_state(&vcpu->arch.vr); | ||
183 | current->thread.vr_save_area = &vcpu->arch.vr; | ||
184 | current->thread.regs->msr |= MSR_VEC; | ||
185 | } | ||
186 | } | ||
187 | #endif | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Save guest vcpu AltiVec state into thread. | ||
192 | * It requires to be called with preemption disabled. | ||
193 | */ | ||
194 | static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu) | ||
195 | { | ||
196 | #ifdef CONFIG_ALTIVEC | ||
197 | if (cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
198 | if (current->thread.regs->msr & MSR_VEC) | ||
199 | giveup_altivec(current); | ||
200 | current->thread.vr_save_area = NULL; | ||
201 | } | ||
202 | #endif | ||
203 | } | ||
204 | |||
137 | static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) | 205 | static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) |
138 | { | 206 | { |
139 | /* Synchronize guest's desire to get debug interrupts into shadow MSR */ | 207 | /* Synchronize guest's desire to get debug interrupts into shadow MSR */ |
@@ -267,6 +335,16 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) | |||
267 | clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); | 335 | clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); |
268 | } | 336 | } |
269 | 337 | ||
338 | void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu) | ||
339 | { | ||
340 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG); | ||
341 | } | ||
342 | |||
343 | void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu) | ||
344 | { | ||
345 | clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions); | ||
346 | } | ||
347 | |||
270 | static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) | 348 | static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) |
271 | { | 349 | { |
272 | kvmppc_set_srr0(vcpu, srr0); | 350 | kvmppc_set_srr0(vcpu, srr0); |
@@ -341,9 +419,15 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
341 | case BOOKE_IRQPRIO_ITLB_MISS: | 419 | case BOOKE_IRQPRIO_ITLB_MISS: |
342 | case BOOKE_IRQPRIO_SYSCALL: | 420 | case BOOKE_IRQPRIO_SYSCALL: |
343 | case BOOKE_IRQPRIO_FP_UNAVAIL: | 421 | case BOOKE_IRQPRIO_FP_UNAVAIL: |
422 | #ifdef CONFIG_SPE_POSSIBLE | ||
344 | case BOOKE_IRQPRIO_SPE_UNAVAIL: | 423 | case BOOKE_IRQPRIO_SPE_UNAVAIL: |
345 | case BOOKE_IRQPRIO_SPE_FP_DATA: | 424 | case BOOKE_IRQPRIO_SPE_FP_DATA: |
346 | case BOOKE_IRQPRIO_SPE_FP_ROUND: | 425 | case BOOKE_IRQPRIO_SPE_FP_ROUND: |
426 | #endif | ||
427 | #ifdef CONFIG_ALTIVEC | ||
428 | case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL: | ||
429 | case BOOKE_IRQPRIO_ALTIVEC_ASSIST: | ||
430 | #endif | ||
347 | case BOOKE_IRQPRIO_AP_UNAVAIL: | 431 | case BOOKE_IRQPRIO_AP_UNAVAIL: |
348 | allowed = 1; | 432 | allowed = 1; |
349 | msr_mask = MSR_CE | MSR_ME | MSR_DE; | 433 | msr_mask = MSR_CE | MSR_ME | MSR_DE; |
@@ -377,7 +461,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
377 | allowed = vcpu->arch.shared->msr & MSR_DE; | 461 | allowed = vcpu->arch.shared->msr & MSR_DE; |
378 | allowed = allowed && !crit; | 462 | allowed = allowed && !crit; |
379 | msr_mask = MSR_ME; | 463 | msr_mask = MSR_ME; |
380 | int_class = INT_CLASS_CRIT; | 464 | if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) |
465 | int_class = INT_CLASS_DBG; | ||
466 | else | ||
467 | int_class = INT_CLASS_CRIT; | ||
468 | |||
381 | break; | 469 | break; |
382 | } | 470 | } |
383 | 471 | ||
@@ -654,20 +742,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
654 | 742 | ||
655 | /* | 743 | /* |
656 | * Since we can't trap on MSR_FP in GS-mode, we consider the guest | 744 | * Since we can't trap on MSR_FP in GS-mode, we consider the guest |
657 | * as always using the FPU. Kernel usage of FP (via | 745 | * as always using the FPU. |
658 | * enable_kernel_fp()) in this thread must not occur while | ||
659 | * vcpu->fpu_active is set. | ||
660 | */ | 746 | */ |
661 | vcpu->fpu_active = 1; | ||
662 | |||
663 | kvmppc_load_guest_fp(vcpu); | 747 | kvmppc_load_guest_fp(vcpu); |
664 | #endif | 748 | #endif |
665 | 749 | ||
750 | #ifdef CONFIG_ALTIVEC | ||
751 | /* Save userspace AltiVec state in stack */ | ||
752 | if (cpu_has_feature(CPU_FTR_ALTIVEC)) | ||
753 | enable_kernel_altivec(); | ||
754 | /* | ||
755 | * Since we can't trap on MSR_VEC in GS-mode, we consider the guest | ||
756 | * as always using the AltiVec. | ||
757 | */ | ||
758 | kvmppc_load_guest_altivec(vcpu); | ||
759 | #endif | ||
760 | |||
666 | /* Switch to guest debug context */ | 761 | /* Switch to guest debug context */ |
667 | debug = vcpu->arch.shadow_dbg_reg; | 762 | debug = vcpu->arch.dbg_reg; |
668 | switch_booke_debug_regs(&debug); | 763 | switch_booke_debug_regs(&debug); |
669 | debug = current->thread.debug; | 764 | debug = current->thread.debug; |
670 | current->thread.debug = vcpu->arch.shadow_dbg_reg; | 765 | current->thread.debug = vcpu->arch.dbg_reg; |
671 | 766 | ||
672 | vcpu->arch.pgdir = current->mm->pgd; | 767 | vcpu->arch.pgdir = current->mm->pgd; |
673 | kvmppc_fix_ee_before_entry(); | 768 | kvmppc_fix_ee_before_entry(); |
@@ -683,8 +778,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
683 | 778 | ||
684 | #ifdef CONFIG_PPC_FPU | 779 | #ifdef CONFIG_PPC_FPU |
685 | kvmppc_save_guest_fp(vcpu); | 780 | kvmppc_save_guest_fp(vcpu); |
781 | #endif | ||
686 | 782 | ||
687 | vcpu->fpu_active = 0; | 783 | #ifdef CONFIG_ALTIVEC |
784 | kvmppc_save_guest_altivec(vcpu); | ||
688 | #endif | 785 | #endif |
689 | 786 | ||
690 | out: | 787 | out: |
@@ -728,9 +825,36 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
728 | 825 | ||
729 | static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) | 826 | static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) |
730 | { | 827 | { |
731 | struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg); | 828 | struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg); |
732 | u32 dbsr = vcpu->arch.dbsr; | 829 | u32 dbsr = vcpu->arch.dbsr; |
733 | 830 | ||
831 | if (vcpu->guest_debug == 0) { | ||
832 | /* | ||
833 | * Debug resources belong to Guest. | ||
834 | * Imprecise debug event is not injected | ||
835 | */ | ||
836 | if (dbsr & DBSR_IDE) { | ||
837 | dbsr &= ~DBSR_IDE; | ||
838 | if (!dbsr) | ||
839 | return RESUME_GUEST; | ||
840 | } | ||
841 | |||
842 | if (dbsr && (vcpu->arch.shared->msr & MSR_DE) && | ||
843 | (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM)) | ||
844 | kvmppc_core_queue_debug(vcpu); | ||
845 | |||
846 | /* Inject a program interrupt if trap debug is not allowed */ | ||
847 | if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE)) | ||
848 | kvmppc_core_queue_program(vcpu, ESR_PTR); | ||
849 | |||
850 | return RESUME_GUEST; | ||
851 | } | ||
852 | |||
853 | /* | ||
854 | * Debug resource owned by userspace. | ||
855 | * Clear guest dbsr (vcpu->arch.dbsr) | ||
856 | */ | ||
857 | vcpu->arch.dbsr = 0; | ||
734 | run->debug.arch.status = 0; | 858 | run->debug.arch.status = 0; |
735 | run->debug.arch.address = vcpu->arch.pc; | 859 | run->debug.arch.address = vcpu->arch.pc; |
736 | 860 | ||
@@ -868,7 +992,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
868 | case BOOKE_INTERRUPT_DATA_STORAGE: | 992 | case BOOKE_INTERRUPT_DATA_STORAGE: |
869 | case BOOKE_INTERRUPT_DTLB_MISS: | 993 | case BOOKE_INTERRUPT_DTLB_MISS: |
870 | case BOOKE_INTERRUPT_HV_PRIV: | 994 | case BOOKE_INTERRUPT_HV_PRIV: |
871 | emulated = kvmppc_get_last_inst(vcpu, false, &last_inst); | 995 | emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); |
996 | break; | ||
997 | case BOOKE_INTERRUPT_PROGRAM: | ||
998 | /* SW breakpoints arrive as illegal instructions on HV */ | ||
999 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | ||
1000 | emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); | ||
872 | break; | 1001 | break; |
873 | default: | 1002 | default: |
874 | break; | 1003 | break; |
@@ -947,6 +1076,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
947 | break; | 1076 | break; |
948 | 1077 | ||
949 | case BOOKE_INTERRUPT_PROGRAM: | 1078 | case BOOKE_INTERRUPT_PROGRAM: |
1079 | if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) && | ||
1080 | (last_inst == KVMPPC_INST_SW_BREAKPOINT)) { | ||
1081 | /* | ||
1082 | * We are here because of an SW breakpoint instr, | ||
1083 | * so lets return to host to handle. | ||
1084 | */ | ||
1085 | r = kvmppc_handle_debug(run, vcpu); | ||
1086 | run->exit_reason = KVM_EXIT_DEBUG; | ||
1087 | kvmppc_account_exit(vcpu, DEBUG_EXITS); | ||
1088 | break; | ||
1089 | } | ||
1090 | |||
950 | if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { | 1091 | if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { |
951 | /* | 1092 | /* |
952 | * Program traps generated by user-level software must | 1093 | * Program traps generated by user-level software must |
@@ -991,7 +1132,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
991 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); | 1132 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); |
992 | r = RESUME_GUEST; | 1133 | r = RESUME_GUEST; |
993 | break; | 1134 | break; |
994 | #else | 1135 | #elif defined(CONFIG_SPE_POSSIBLE) |
995 | case BOOKE_INTERRUPT_SPE_UNAVAIL: | 1136 | case BOOKE_INTERRUPT_SPE_UNAVAIL: |
996 | /* | 1137 | /* |
997 | * Guest wants SPE, but host kernel doesn't support it. Send | 1138 | * Guest wants SPE, but host kernel doesn't support it. Send |
@@ -1012,6 +1153,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
1012 | run->hw.hardware_exit_reason = exit_nr; | 1153 | run->hw.hardware_exit_reason = exit_nr; |
1013 | r = RESUME_HOST; | 1154 | r = RESUME_HOST; |
1014 | break; | 1155 | break; |
1156 | #endif /* CONFIG_SPE_POSSIBLE */ | ||
1157 | |||
1158 | /* | ||
1159 | * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC, | ||
1160 | * see kvmppc_core_check_processor_compat(). | ||
1161 | */ | ||
1162 | #ifdef CONFIG_ALTIVEC | ||
1163 | case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL: | ||
1164 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL); | ||
1165 | r = RESUME_GUEST; | ||
1166 | break; | ||
1167 | |||
1168 | case BOOKE_INTERRUPT_ALTIVEC_ASSIST: | ||
1169 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST); | ||
1170 | r = RESUME_GUEST; | ||
1171 | break; | ||
1015 | #endif | 1172 | #endif |
1016 | 1173 | ||
1017 | case BOOKE_INTERRUPT_DATA_STORAGE: | 1174 | case BOOKE_INTERRUPT_DATA_STORAGE: |
@@ -1188,6 +1345,8 @@ out: | |||
1188 | else { | 1345 | else { |
1189 | /* interrupts now hard-disabled */ | 1346 | /* interrupts now hard-disabled */ |
1190 | kvmppc_fix_ee_before_entry(); | 1347 | kvmppc_fix_ee_before_entry(); |
1348 | kvmppc_load_guest_fp(vcpu); | ||
1349 | kvmppc_load_guest_altivec(vcpu); | ||
1191 | } | 1350 | } |
1192 | } | 1351 | } |
1193 | 1352 | ||
@@ -1243,6 +1402,11 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) | |||
1243 | setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, | 1402 | setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, |
1244 | (unsigned long)vcpu); | 1403 | (unsigned long)vcpu); |
1245 | 1404 | ||
1405 | /* | ||
1406 | * Clear DBSR.MRR to avoid guest debug interrupt as | ||
1407 | * this is of host interest | ||
1408 | */ | ||
1409 | mtspr(SPRN_DBSR, DBSR_MRR); | ||
1246 | return 0; | 1410 | return 0; |
1247 | } | 1411 | } |
1248 | 1412 | ||
@@ -1457,144 +1621,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
1457 | return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); | 1621 | return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); |
1458 | } | 1622 | } |
1459 | 1623 | ||
1460 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | 1624 | int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, |
1625 | union kvmppc_one_reg *val) | ||
1461 | { | 1626 | { |
1462 | int r = 0; | 1627 | int r = 0; |
1463 | union kvmppc_one_reg val; | ||
1464 | int size; | ||
1465 | |||
1466 | size = one_reg_size(reg->id); | ||
1467 | if (size > sizeof(val)) | ||
1468 | return -EINVAL; | ||
1469 | 1628 | ||
1470 | switch (reg->id) { | 1629 | switch (id) { |
1471 | case KVM_REG_PPC_IAC1: | 1630 | case KVM_REG_PPC_IAC1: |
1472 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1); | 1631 | *val = get_reg_val(id, vcpu->arch.dbg_reg.iac1); |
1473 | break; | 1632 | break; |
1474 | case KVM_REG_PPC_IAC2: | 1633 | case KVM_REG_PPC_IAC2: |
1475 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2); | 1634 | *val = get_reg_val(id, vcpu->arch.dbg_reg.iac2); |
1476 | break; | 1635 | break; |
1477 | #if CONFIG_PPC_ADV_DEBUG_IACS > 2 | 1636 | #if CONFIG_PPC_ADV_DEBUG_IACS > 2 |
1478 | case KVM_REG_PPC_IAC3: | 1637 | case KVM_REG_PPC_IAC3: |
1479 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3); | 1638 | *val = get_reg_val(id, vcpu->arch.dbg_reg.iac3); |
1480 | break; | 1639 | break; |
1481 | case KVM_REG_PPC_IAC4: | 1640 | case KVM_REG_PPC_IAC4: |
1482 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4); | 1641 | *val = get_reg_val(id, vcpu->arch.dbg_reg.iac4); |
1483 | break; | 1642 | break; |
1484 | #endif | 1643 | #endif |
1485 | case KVM_REG_PPC_DAC1: | 1644 | case KVM_REG_PPC_DAC1: |
1486 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1); | 1645 | *val = get_reg_val(id, vcpu->arch.dbg_reg.dac1); |
1487 | break; | 1646 | break; |
1488 | case KVM_REG_PPC_DAC2: | 1647 | case KVM_REG_PPC_DAC2: |
1489 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); | 1648 | *val = get_reg_val(id, vcpu->arch.dbg_reg.dac2); |
1490 | break; | 1649 | break; |
1491 | case KVM_REG_PPC_EPR: { | 1650 | case KVM_REG_PPC_EPR: { |
1492 | u32 epr = kvmppc_get_epr(vcpu); | 1651 | u32 epr = kvmppc_get_epr(vcpu); |
1493 | val = get_reg_val(reg->id, epr); | 1652 | *val = get_reg_val(id, epr); |
1494 | break; | 1653 | break; |
1495 | } | 1654 | } |
1496 | #if defined(CONFIG_64BIT) | 1655 | #if defined(CONFIG_64BIT) |
1497 | case KVM_REG_PPC_EPCR: | 1656 | case KVM_REG_PPC_EPCR: |
1498 | val = get_reg_val(reg->id, vcpu->arch.epcr); | 1657 | *val = get_reg_val(id, vcpu->arch.epcr); |
1499 | break; | 1658 | break; |
1500 | #endif | 1659 | #endif |
1501 | case KVM_REG_PPC_TCR: | 1660 | case KVM_REG_PPC_TCR: |
1502 | val = get_reg_val(reg->id, vcpu->arch.tcr); | 1661 | *val = get_reg_val(id, vcpu->arch.tcr); |
1503 | break; | 1662 | break; |
1504 | case KVM_REG_PPC_TSR: | 1663 | case KVM_REG_PPC_TSR: |
1505 | val = get_reg_val(reg->id, vcpu->arch.tsr); | 1664 | *val = get_reg_val(id, vcpu->arch.tsr); |
1506 | break; | 1665 | break; |
1507 | case KVM_REG_PPC_DEBUG_INST: | 1666 | case KVM_REG_PPC_DEBUG_INST: |
1508 | val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG); | 1667 | *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); |
1509 | break; | 1668 | break; |
1510 | case KVM_REG_PPC_VRSAVE: | 1669 | case KVM_REG_PPC_VRSAVE: |
1511 | val = get_reg_val(reg->id, vcpu->arch.vrsave); | 1670 | *val = get_reg_val(id, vcpu->arch.vrsave); |
1512 | break; | 1671 | break; |
1513 | default: | 1672 | default: |
1514 | r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); | 1673 | r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val); |
1515 | break; | 1674 | break; |
1516 | } | 1675 | } |
1517 | 1676 | ||
1518 | if (r) | ||
1519 | return r; | ||
1520 | |||
1521 | if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) | ||
1522 | r = -EFAULT; | ||
1523 | |||
1524 | return r; | 1677 | return r; |
1525 | } | 1678 | } |
1526 | 1679 | ||
1527 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | 1680 | int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, |
1681 | union kvmppc_one_reg *val) | ||
1528 | { | 1682 | { |
1529 | int r = 0; | 1683 | int r = 0; |
1530 | union kvmppc_one_reg val; | ||
1531 | int size; | ||
1532 | 1684 | ||
1533 | size = one_reg_size(reg->id); | 1685 | switch (id) { |
1534 | if (size > sizeof(val)) | ||
1535 | return -EINVAL; | ||
1536 | |||
1537 | if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) | ||
1538 | return -EFAULT; | ||
1539 | |||
1540 | switch (reg->id) { | ||
1541 | case KVM_REG_PPC_IAC1: | 1686 | case KVM_REG_PPC_IAC1: |
1542 | vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val); | 1687 | vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val); |
1543 | break; | 1688 | break; |
1544 | case KVM_REG_PPC_IAC2: | 1689 | case KVM_REG_PPC_IAC2: |
1545 | vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val); | 1690 | vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val); |
1546 | break; | 1691 | break; |
1547 | #if CONFIG_PPC_ADV_DEBUG_IACS > 2 | 1692 | #if CONFIG_PPC_ADV_DEBUG_IACS > 2 |
1548 | case KVM_REG_PPC_IAC3: | 1693 | case KVM_REG_PPC_IAC3: |
1549 | vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val); | 1694 | vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val); |
1550 | break; | 1695 | break; |
1551 | case KVM_REG_PPC_IAC4: | 1696 | case KVM_REG_PPC_IAC4: |
1552 | vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val); | 1697 | vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val); |
1553 | break; | 1698 | break; |
1554 | #endif | 1699 | #endif |
1555 | case KVM_REG_PPC_DAC1: | 1700 | case KVM_REG_PPC_DAC1: |
1556 | vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val); | 1701 | vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val); |
1557 | break; | 1702 | break; |
1558 | case KVM_REG_PPC_DAC2: | 1703 | case KVM_REG_PPC_DAC2: |
1559 | vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val); | 1704 | vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val); |
1560 | break; | 1705 | break; |
1561 | case KVM_REG_PPC_EPR: { | 1706 | case KVM_REG_PPC_EPR: { |
1562 | u32 new_epr = set_reg_val(reg->id, val); | 1707 | u32 new_epr = set_reg_val(id, *val); |
1563 | kvmppc_set_epr(vcpu, new_epr); | 1708 | kvmppc_set_epr(vcpu, new_epr); |
1564 | break; | 1709 | break; |
1565 | } | 1710 | } |
1566 | #if defined(CONFIG_64BIT) | 1711 | #if defined(CONFIG_64BIT) |
1567 | case KVM_REG_PPC_EPCR: { | 1712 | case KVM_REG_PPC_EPCR: { |
1568 | u32 new_epcr = set_reg_val(reg->id, val); | 1713 | u32 new_epcr = set_reg_val(id, *val); |
1569 | kvmppc_set_epcr(vcpu, new_epcr); | 1714 | kvmppc_set_epcr(vcpu, new_epcr); |
1570 | break; | 1715 | break; |
1571 | } | 1716 | } |
1572 | #endif | 1717 | #endif |
1573 | case KVM_REG_PPC_OR_TSR: { | 1718 | case KVM_REG_PPC_OR_TSR: { |
1574 | u32 tsr_bits = set_reg_val(reg->id, val); | 1719 | u32 tsr_bits = set_reg_val(id, *val); |
1575 | kvmppc_set_tsr_bits(vcpu, tsr_bits); | 1720 | kvmppc_set_tsr_bits(vcpu, tsr_bits); |
1576 | break; | 1721 | break; |
1577 | } | 1722 | } |
1578 | case KVM_REG_PPC_CLEAR_TSR: { | 1723 | case KVM_REG_PPC_CLEAR_TSR: { |
1579 | u32 tsr_bits = set_reg_val(reg->id, val); | 1724 | u32 tsr_bits = set_reg_val(id, *val); |
1580 | kvmppc_clr_tsr_bits(vcpu, tsr_bits); | 1725 | kvmppc_clr_tsr_bits(vcpu, tsr_bits); |
1581 | break; | 1726 | break; |
1582 | } | 1727 | } |
1583 | case KVM_REG_PPC_TSR: { | 1728 | case KVM_REG_PPC_TSR: { |
1584 | u32 tsr = set_reg_val(reg->id, val); | 1729 | u32 tsr = set_reg_val(id, *val); |
1585 | kvmppc_set_tsr(vcpu, tsr); | 1730 | kvmppc_set_tsr(vcpu, tsr); |
1586 | break; | 1731 | break; |
1587 | } | 1732 | } |
1588 | case KVM_REG_PPC_TCR: { | 1733 | case KVM_REG_PPC_TCR: { |
1589 | u32 tcr = set_reg_val(reg->id, val); | 1734 | u32 tcr = set_reg_val(id, *val); |
1590 | kvmppc_set_tcr(vcpu, tcr); | 1735 | kvmppc_set_tcr(vcpu, tcr); |
1591 | break; | 1736 | break; |
1592 | } | 1737 | } |
1593 | case KVM_REG_PPC_VRSAVE: | 1738 | case KVM_REG_PPC_VRSAVE: |
1594 | vcpu->arch.vrsave = set_reg_val(reg->id, val); | 1739 | vcpu->arch.vrsave = set_reg_val(id, *val); |
1595 | break; | 1740 | break; |
1596 | default: | 1741 | default: |
1597 | r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); | 1742 | r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val); |
1598 | break; | 1743 | break; |
1599 | } | 1744 | } |
1600 | 1745 | ||
@@ -1694,10 +1839,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) | |||
1694 | update_timer_ints(vcpu); | 1839 | update_timer_ints(vcpu); |
1695 | } | 1840 | } |
1696 | 1841 | ||
1697 | void kvmppc_decrementer_func(unsigned long data) | 1842 | void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) |
1698 | { | 1843 | { |
1699 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | ||
1700 | |||
1701 | if (vcpu->arch.tcr & TCR_ARE) { | 1844 | if (vcpu->arch.tcr & TCR_ARE) { |
1702 | vcpu->arch.dec = vcpu->arch.decar; | 1845 | vcpu->arch.dec = vcpu->arch.decar; |
1703 | kvmppc_emulate_dec(vcpu); | 1846 | kvmppc_emulate_dec(vcpu); |
@@ -1842,7 +1985,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
1842 | int n, b = 0, w = 0; | 1985 | int n, b = 0, w = 0; |
1843 | 1986 | ||
1844 | if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { | 1987 | if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { |
1845 | vcpu->arch.shadow_dbg_reg.dbcr0 = 0; | 1988 | vcpu->arch.dbg_reg.dbcr0 = 0; |
1846 | vcpu->guest_debug = 0; | 1989 | vcpu->guest_debug = 0; |
1847 | kvm_guest_protect_msr(vcpu, MSR_DE, false); | 1990 | kvm_guest_protect_msr(vcpu, MSR_DE, false); |
1848 | return 0; | 1991 | return 0; |
@@ -1850,15 +1993,13 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
1850 | 1993 | ||
1851 | kvm_guest_protect_msr(vcpu, MSR_DE, true); | 1994 | kvm_guest_protect_msr(vcpu, MSR_DE, true); |
1852 | vcpu->guest_debug = dbg->control; | 1995 | vcpu->guest_debug = dbg->control; |
1853 | vcpu->arch.shadow_dbg_reg.dbcr0 = 0; | 1996 | vcpu->arch.dbg_reg.dbcr0 = 0; |
1854 | /* Set DBCR0_EDM in guest visible DBCR0 register. */ | ||
1855 | vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; | ||
1856 | 1997 | ||
1857 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 1998 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
1858 | vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; | 1999 | vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; |
1859 | 2000 | ||
1860 | /* Code below handles only HW breakpoints */ | 2001 | /* Code below handles only HW breakpoints */ |
1861 | dbg_reg = &(vcpu->arch.shadow_dbg_reg); | 2002 | dbg_reg = &(vcpu->arch.dbg_reg); |
1862 | 2003 | ||
1863 | #ifdef CONFIG_KVM_BOOKE_HV | 2004 | #ifdef CONFIG_KVM_BOOKE_HV |
1864 | /* | 2005 | /* |
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index f753543c56fa..22ba08ea68e9 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h | |||
@@ -32,9 +32,15 @@ | |||
32 | #define BOOKE_IRQPRIO_ALIGNMENT 2 | 32 | #define BOOKE_IRQPRIO_ALIGNMENT 2 |
33 | #define BOOKE_IRQPRIO_PROGRAM 3 | 33 | #define BOOKE_IRQPRIO_PROGRAM 3 |
34 | #define BOOKE_IRQPRIO_FP_UNAVAIL 4 | 34 | #define BOOKE_IRQPRIO_FP_UNAVAIL 4 |
35 | #ifdef CONFIG_SPE_POSSIBLE | ||
35 | #define BOOKE_IRQPRIO_SPE_UNAVAIL 5 | 36 | #define BOOKE_IRQPRIO_SPE_UNAVAIL 5 |
36 | #define BOOKE_IRQPRIO_SPE_FP_DATA 6 | 37 | #define BOOKE_IRQPRIO_SPE_FP_DATA 6 |
37 | #define BOOKE_IRQPRIO_SPE_FP_ROUND 7 | 38 | #define BOOKE_IRQPRIO_SPE_FP_ROUND 7 |
39 | #endif | ||
40 | #ifdef CONFIG_PPC_E500MC | ||
41 | #define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5 | ||
42 | #define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6 | ||
43 | #endif | ||
38 | #define BOOKE_IRQPRIO_SYSCALL 8 | 44 | #define BOOKE_IRQPRIO_SYSCALL 8 |
39 | #define BOOKE_IRQPRIO_AP_UNAVAIL 9 | 45 | #define BOOKE_IRQPRIO_AP_UNAVAIL 9 |
40 | #define BOOKE_IRQPRIO_DTLB_MISS 10 | 46 | #define BOOKE_IRQPRIO_DTLB_MISS 10 |
@@ -116,40 +122,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, | |||
116 | extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, | 122 | extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, |
117 | ulong *spr_val); | 123 | ulong *spr_val); |
118 | 124 | ||
119 | /* | ||
120 | * Load up guest vcpu FP state if it's needed. | ||
121 | * It also set the MSR_FP in thread so that host know | ||
122 | * we're holding FPU, and then host can help to save | ||
123 | * guest vcpu FP state if other threads require to use FPU. | ||
124 | * This simulates an FP unavailable fault. | ||
125 | * | ||
126 | * It requires to be called with preemption disabled. | ||
127 | */ | ||
128 | static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) | ||
129 | { | ||
130 | #ifdef CONFIG_PPC_FPU | ||
131 | if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) { | ||
132 | enable_kernel_fp(); | ||
133 | load_fp_state(&vcpu->arch.fp); | ||
134 | current->thread.fp_save_area = &vcpu->arch.fp; | ||
135 | current->thread.regs->msr |= MSR_FP; | ||
136 | } | ||
137 | #endif | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Save guest vcpu FP state into thread. | ||
142 | * It requires to be called with preemption disabled. | ||
143 | */ | ||
144 | static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) | ||
145 | { | ||
146 | #ifdef CONFIG_PPC_FPU | ||
147 | if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP)) | ||
148 | giveup_fpu(current); | ||
149 | current->thread.fp_save_area = NULL; | ||
150 | #endif | ||
151 | } | ||
152 | |||
153 | static inline void kvmppc_clear_dbsr(void) | 125 | static inline void kvmppc_clear_dbsr(void) |
154 | { | 126 | { |
155 | mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); | 127 | mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); |
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 28c158881d23..a82f64502de1 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #define OP_19_XOP_RFI 50 | 26 | #define OP_19_XOP_RFI 50 |
27 | #define OP_19_XOP_RFCI 51 | 27 | #define OP_19_XOP_RFCI 51 |
28 | #define OP_19_XOP_RFDI 39 | ||
28 | 29 | ||
29 | #define OP_31_XOP_MFMSR 83 | 30 | #define OP_31_XOP_MFMSR 83 |
30 | #define OP_31_XOP_WRTEE 131 | 31 | #define OP_31_XOP_WRTEE 131 |
@@ -37,6 +38,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) | |||
37 | kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); | 38 | kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); |
38 | } | 39 | } |
39 | 40 | ||
41 | static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu) | ||
42 | { | ||
43 | vcpu->arch.pc = vcpu->arch.dsrr0; | ||
44 | kvmppc_set_msr(vcpu, vcpu->arch.dsrr1); | ||
45 | } | ||
46 | |||
40 | static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) | 47 | static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) |
41 | { | 48 | { |
42 | vcpu->arch.pc = vcpu->arch.csrr0; | 49 | vcpu->arch.pc = vcpu->arch.csrr0; |
@@ -65,6 +72,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
65 | *advance = 0; | 72 | *advance = 0; |
66 | break; | 73 | break; |
67 | 74 | ||
75 | case OP_19_XOP_RFDI: | ||
76 | kvmppc_emul_rfdi(vcpu); | ||
77 | kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS); | ||
78 | *advance = 0; | ||
79 | break; | ||
80 | |||
68 | default: | 81 | default: |
69 | emulated = EMULATE_FAIL; | 82 | emulated = EMULATE_FAIL; |
70 | break; | 83 | break; |
@@ -118,6 +131,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
118 | int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) | 131 | int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) |
119 | { | 132 | { |
120 | int emulated = EMULATE_DONE; | 133 | int emulated = EMULATE_DONE; |
134 | bool debug_inst = false; | ||
121 | 135 | ||
122 | switch (sprn) { | 136 | switch (sprn) { |
123 | case SPRN_DEAR: | 137 | case SPRN_DEAR: |
@@ -132,14 +146,128 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) | |||
132 | case SPRN_CSRR1: | 146 | case SPRN_CSRR1: |
133 | vcpu->arch.csrr1 = spr_val; | 147 | vcpu->arch.csrr1 = spr_val; |
134 | break; | 148 | break; |
149 | case SPRN_DSRR0: | ||
150 | vcpu->arch.dsrr0 = spr_val; | ||
151 | break; | ||
152 | case SPRN_DSRR1: | ||
153 | vcpu->arch.dsrr1 = spr_val; | ||
154 | break; | ||
155 | case SPRN_IAC1: | ||
156 | /* | ||
157 | * If userspace is debugging guest then guest | ||
158 | * can not access debug registers. | ||
159 | */ | ||
160 | if (vcpu->guest_debug) | ||
161 | break; | ||
162 | |||
163 | debug_inst = true; | ||
164 | vcpu->arch.dbg_reg.iac1 = spr_val; | ||
165 | break; | ||
166 | case SPRN_IAC2: | ||
167 | /* | ||
168 | * If userspace is debugging guest then guest | ||
169 | * can not access debug registers. | ||
170 | */ | ||
171 | if (vcpu->guest_debug) | ||
172 | break; | ||
173 | |||
174 | debug_inst = true; | ||
175 | vcpu->arch.dbg_reg.iac2 = spr_val; | ||
176 | break; | ||
177 | #if CONFIG_PPC_ADV_DEBUG_IACS > 2 | ||
178 | case SPRN_IAC3: | ||
179 | /* | ||
180 | * If userspace is debugging guest then guest | ||
181 | * can not access debug registers. | ||
182 | */ | ||
183 | if (vcpu->guest_debug) | ||
184 | break; | ||
185 | |||
186 | debug_inst = true; | ||
187 | vcpu->arch.dbg_reg.iac3 = spr_val; | ||
188 | break; | ||
189 | case SPRN_IAC4: | ||
190 | /* | ||
191 | * If userspace is debugging guest then guest | ||
192 | * can not access debug registers. | ||
193 | */ | ||
194 | if (vcpu->guest_debug) | ||
195 | break; | ||
196 | |||
197 | debug_inst = true; | ||
198 | vcpu->arch.dbg_reg.iac4 = spr_val; | ||
199 | break; | ||
200 | #endif | ||
201 | case SPRN_DAC1: | ||
202 | /* | ||
203 | * If userspace is debugging guest then guest | ||
204 | * can not access debug registers. | ||
205 | */ | ||
206 | if (vcpu->guest_debug) | ||
207 | break; | ||
208 | |||
209 | debug_inst = true; | ||
210 | vcpu->arch.dbg_reg.dac1 = spr_val; | ||
211 | break; | ||
212 | case SPRN_DAC2: | ||
213 | /* | ||
214 | * If userspace is debugging guest then guest | ||
215 | * can not access debug registers. | ||
216 | */ | ||
217 | if (vcpu->guest_debug) | ||
218 | break; | ||
219 | |||
220 | debug_inst = true; | ||
221 | vcpu->arch.dbg_reg.dac2 = spr_val; | ||
222 | break; | ||
135 | case SPRN_DBCR0: | 223 | case SPRN_DBCR0: |
224 | /* | ||
225 | * If userspace is debugging guest then guest | ||
226 | * can not access debug registers. | ||
227 | */ | ||
228 | if (vcpu->guest_debug) | ||
229 | break; | ||
230 | |||
231 | debug_inst = true; | ||
232 | spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE | | ||
233 | DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | | ||
234 | DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W); | ||
235 | |||
136 | vcpu->arch.dbg_reg.dbcr0 = spr_val; | 236 | vcpu->arch.dbg_reg.dbcr0 = spr_val; |
137 | break; | 237 | break; |
138 | case SPRN_DBCR1: | 238 | case SPRN_DBCR1: |
239 | /* | ||
240 | * If userspace is debugging guest then guest | ||
241 | * can not access debug registers. | ||
242 | */ | ||
243 | if (vcpu->guest_debug) | ||
244 | break; | ||
245 | |||
246 | debug_inst = true; | ||
139 | vcpu->arch.dbg_reg.dbcr1 = spr_val; | 247 | vcpu->arch.dbg_reg.dbcr1 = spr_val; |
140 | break; | 248 | break; |
249 | case SPRN_DBCR2: | ||
250 | /* | ||
251 | * If userspace is debugging guest then guest | ||
252 | * can not access debug registers. | ||
253 | */ | ||
254 | if (vcpu->guest_debug) | ||
255 | break; | ||
256 | |||
257 | debug_inst = true; | ||
258 | vcpu->arch.dbg_reg.dbcr2 = spr_val; | ||
259 | break; | ||
141 | case SPRN_DBSR: | 260 | case SPRN_DBSR: |
261 | /* | ||
262 | * If userspace is debugging guest then guest | ||
263 | * can not access debug registers. | ||
264 | */ | ||
265 | if (vcpu->guest_debug) | ||
266 | break; | ||
267 | |||
142 | vcpu->arch.dbsr &= ~spr_val; | 268 | vcpu->arch.dbsr &= ~spr_val; |
269 | if (!(vcpu->arch.dbsr & ~DBSR_IDE)) | ||
270 | kvmppc_core_dequeue_debug(vcpu); | ||
143 | break; | 271 | break; |
144 | case SPRN_TSR: | 272 | case SPRN_TSR: |
145 | kvmppc_clr_tsr_bits(vcpu, spr_val); | 273 | kvmppc_clr_tsr_bits(vcpu, spr_val); |
@@ -252,6 +380,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) | |||
252 | emulated = EMULATE_FAIL; | 380 | emulated = EMULATE_FAIL; |
253 | } | 381 | } |
254 | 382 | ||
383 | if (debug_inst) { | ||
384 | current->thread.debug = vcpu->arch.dbg_reg; | ||
385 | switch_booke_debug_regs(&vcpu->arch.dbg_reg); | ||
386 | } | ||
255 | return emulated; | 387 | return emulated; |
256 | } | 388 | } |
257 | 389 | ||
@@ -278,12 +410,43 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) | |||
278 | case SPRN_CSRR1: | 410 | case SPRN_CSRR1: |
279 | *spr_val = vcpu->arch.csrr1; | 411 | *spr_val = vcpu->arch.csrr1; |
280 | break; | 412 | break; |
413 | case SPRN_DSRR0: | ||
414 | *spr_val = vcpu->arch.dsrr0; | ||
415 | break; | ||
416 | case SPRN_DSRR1: | ||
417 | *spr_val = vcpu->arch.dsrr1; | ||
418 | break; | ||
419 | case SPRN_IAC1: | ||
420 | *spr_val = vcpu->arch.dbg_reg.iac1; | ||
421 | break; | ||
422 | case SPRN_IAC2: | ||
423 | *spr_val = vcpu->arch.dbg_reg.iac2; | ||
424 | break; | ||
425 | #if CONFIG_PPC_ADV_DEBUG_IACS > 2 | ||
426 | case SPRN_IAC3: | ||
427 | *spr_val = vcpu->arch.dbg_reg.iac3; | ||
428 | break; | ||
429 | case SPRN_IAC4: | ||
430 | *spr_val = vcpu->arch.dbg_reg.iac4; | ||
431 | break; | ||
432 | #endif | ||
433 | case SPRN_DAC1: | ||
434 | *spr_val = vcpu->arch.dbg_reg.dac1; | ||
435 | break; | ||
436 | case SPRN_DAC2: | ||
437 | *spr_val = vcpu->arch.dbg_reg.dac2; | ||
438 | break; | ||
281 | case SPRN_DBCR0: | 439 | case SPRN_DBCR0: |
282 | *spr_val = vcpu->arch.dbg_reg.dbcr0; | 440 | *spr_val = vcpu->arch.dbg_reg.dbcr0; |
441 | if (vcpu->guest_debug) | ||
442 | *spr_val = *spr_val | DBCR0_EDM; | ||
283 | break; | 443 | break; |
284 | case SPRN_DBCR1: | 444 | case SPRN_DBCR1: |
285 | *spr_val = vcpu->arch.dbg_reg.dbcr1; | 445 | *spr_val = vcpu->arch.dbg_reg.dbcr1; |
286 | break; | 446 | break; |
447 | case SPRN_DBCR2: | ||
448 | *spr_val = vcpu->arch.dbg_reg.dbcr2; | ||
449 | break; | ||
287 | case SPRN_DBSR: | 450 | case SPRN_DBSR: |
288 | *spr_val = vcpu->arch.dbsr; | 451 | *spr_val = vcpu->arch.dbsr; |
289 | break; | 452 | break; |
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e9fa56a911fd..81bd8a07aa51 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S | |||
@@ -238,7 +238,7 @@ kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \ | |||
238 | kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \ | 238 | kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \ |
239 | SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR) | 239 | SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR) |
240 | kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \ | 240 | kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \ |
241 | SPRN_SRR0, SPRN_SRR1,NEED_ESR | 241 | SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU) |
242 | kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \ | 242 | kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \ |
243 | SPRN_SRR0, SPRN_SRR1, 0 | 243 | SPRN_SRR0, SPRN_SRR1, 0 |
244 | kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \ | 244 | kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \ |
@@ -256,11 +256,9 @@ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \ | |||
256 | SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) | 256 | SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) |
257 | kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \ | 257 | kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \ |
258 | SPRN_SRR0, SPRN_SRR1, 0 | 258 | SPRN_SRR0, SPRN_SRR1, 0 |
259 | kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \ | 259 | kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \ |
260 | SPRN_SRR0, SPRN_SRR1, 0 | 260 | SPRN_SRR0, SPRN_SRR1, 0 |
261 | kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \ | 261 | kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \ |
262 | SPRN_SRR0, SPRN_SRR1, 0 | ||
263 | kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \ | ||
264 | SPRN_SRR0, SPRN_SRR1, 0 | 262 | SPRN_SRR0, SPRN_SRR1, 0 |
265 | kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \ | 263 | kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \ |
266 | SPRN_SRR0, SPRN_SRR1, 0 | 264 | SPRN_SRR0, SPRN_SRR1, 0 |
@@ -350,7 +348,7 @@ kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR | |||
350 | kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 | 348 | kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 |
351 | kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ | 349 | kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ |
352 | SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR) | 350 | SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR) |
353 | kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR | 351 | kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU) |
354 | kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 | 352 | kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 |
355 | kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 | 353 | kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 |
356 | kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 | 354 | kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 |
@@ -361,9 +359,6 @@ kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \ | |||
361 | kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \ | 359 | kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \ |
362 | SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) | 360 | SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) |
363 | kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0 | 361 | kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0 |
364 | kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 | ||
365 | kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0 | ||
366 | kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0 | ||
367 | kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0 | 362 | kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0 |
368 | kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0 | 363 | kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0 |
369 | kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \ | 364 | kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \ |
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index a326178bdea5..72920bed3ac6 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
23 | #include <asm/mmu-book3e.h> | 23 | #include <asm/mmu-book3e.h> |
24 | #include <asm/tlb.h> | 24 | #include <asm/tlb.h> |
25 | #include <asm/cputhreads.h> | ||
25 | 26 | ||
26 | enum vcpu_ftr { | 27 | enum vcpu_ftr { |
27 | VCPU_FTR_MMU_V2 | 28 | VCPU_FTR_MMU_V2 |
@@ -289,6 +290,25 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500); | |||
289 | #define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe) | 290 | #define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe) |
290 | #define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu) | 291 | #define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu) |
291 | #define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS) | 292 | #define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS) |
293 | |||
294 | /* | ||
295 | * These functions should be called with preemption disabled | ||
296 | * and the returned value is valid only in that context | ||
297 | */ | ||
298 | static inline int get_thread_specific_lpid(int vm_lpid) | ||
299 | { | ||
300 | int vcpu_lpid = vm_lpid; | ||
301 | |||
302 | if (threads_per_core == 2) | ||
303 | vcpu_lpid |= smp_processor_id() & 1; | ||
304 | |||
305 | return vcpu_lpid; | ||
306 | } | ||
307 | |||
308 | static inline int get_lpid(struct kvm_vcpu *vcpu) | ||
309 | { | ||
310 | return get_thread_specific_lpid(vcpu->kvm->arch.lpid); | ||
311 | } | ||
292 | #else | 312 | #else |
293 | unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, | 313 | unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, |
294 | struct kvm_book3e_206_tlb_entry *gtlbe); | 314 | struct kvm_book3e_206_tlb_entry *gtlbe); |
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index c99c40e9182a..ce7291c79f6c 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c | |||
@@ -259,6 +259,7 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va | |||
259 | break; | 259 | break; |
260 | 260 | ||
261 | /* extra exceptions */ | 261 | /* extra exceptions */ |
262 | #ifdef CONFIG_SPE_POSSIBLE | ||
262 | case SPRN_IVOR32: | 263 | case SPRN_IVOR32: |
263 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; | 264 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; |
264 | break; | 265 | break; |
@@ -268,6 +269,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va | |||
268 | case SPRN_IVOR34: | 269 | case SPRN_IVOR34: |
269 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val; | 270 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val; |
270 | break; | 271 | break; |
272 | #endif | ||
273 | #ifdef CONFIG_ALTIVEC | ||
274 | case SPRN_IVOR32: | ||
275 | vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val; | ||
276 | break; | ||
277 | case SPRN_IVOR33: | ||
278 | vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val; | ||
279 | break; | ||
280 | #endif | ||
271 | case SPRN_IVOR35: | 281 | case SPRN_IVOR35: |
272 | vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; | 282 | vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; |
273 | break; | 283 | break; |
@@ -381,6 +391,7 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v | |||
381 | break; | 391 | break; |
382 | 392 | ||
383 | /* extra exceptions */ | 393 | /* extra exceptions */ |
394 | #ifdef CONFIG_SPE_POSSIBLE | ||
384 | case SPRN_IVOR32: | 395 | case SPRN_IVOR32: |
385 | *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; | 396 | *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; |
386 | break; | 397 | break; |
@@ -390,6 +401,15 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v | |||
390 | case SPRN_IVOR34: | 401 | case SPRN_IVOR34: |
391 | *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; | 402 | *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; |
392 | break; | 403 | break; |
404 | #endif | ||
405 | #ifdef CONFIG_ALTIVEC | ||
406 | case SPRN_IVOR32: | ||
407 | *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL]; | ||
408 | break; | ||
409 | case SPRN_IVOR33: | ||
410 | *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST]; | ||
411 | break; | ||
412 | #endif | ||
393 | case SPRN_IVOR35: | 413 | case SPRN_IVOR35: |
394 | *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; | 414 | *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; |
395 | break; | 415 | break; |
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 08f14bb57897..769778f855b0 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c | |||
@@ -69,7 +69,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) | |||
69 | * writing shadow tlb entry to host TLB | 69 | * writing shadow tlb entry to host TLB |
70 | */ | 70 | */ |
71 | static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, | 71 | static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, |
72 | uint32_t mas0) | 72 | uint32_t mas0, |
73 | uint32_t lpid) | ||
73 | { | 74 | { |
74 | unsigned long flags; | 75 | unsigned long flags; |
75 | 76 | ||
@@ -80,7 +81,7 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, | |||
80 | mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); | 81 | mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); |
81 | mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); | 82 | mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); |
82 | #ifdef CONFIG_KVM_BOOKE_HV | 83 | #ifdef CONFIG_KVM_BOOKE_HV |
83 | mtspr(SPRN_MAS8, stlbe->mas8); | 84 | mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid)); |
84 | #endif | 85 | #endif |
85 | asm volatile("isync; tlbwe" : : : "memory"); | 86 | asm volatile("isync; tlbwe" : : : "memory"); |
86 | 87 | ||
@@ -129,11 +130,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
129 | 130 | ||
130 | if (tlbsel == 0) { | 131 | if (tlbsel == 0) { |
131 | mas0 = get_host_mas0(stlbe->mas2); | 132 | mas0 = get_host_mas0(stlbe->mas2); |
132 | __write_host_tlbe(stlbe, mas0); | 133 | __write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid); |
133 | } else { | 134 | } else { |
134 | __write_host_tlbe(stlbe, | 135 | __write_host_tlbe(stlbe, |
135 | MAS0_TLBSEL(1) | | 136 | MAS0_TLBSEL(1) | |
136 | MAS0_ESEL(to_htlb1_esel(sesel))); | 137 | MAS0_ESEL(to_htlb1_esel(sesel)), |
138 | vcpu_e500->vcpu.kvm->arch.lpid); | ||
137 | } | 139 | } |
138 | } | 140 | } |
139 | 141 | ||
@@ -176,7 +178,7 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) | |||
176 | MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; | 178 | MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; |
177 | magic.mas8 = 0; | 179 | magic.mas8 = 0; |
178 | 180 | ||
179 | __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); | 181 | __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0); |
180 | preempt_enable(); | 182 | preempt_enable(); |
181 | } | 183 | } |
182 | #endif | 184 | #endif |
@@ -317,10 +319,6 @@ static void kvmppc_e500_setup_stlbe( | |||
317 | stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); | 319 | stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); |
318 | stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | | 320 | stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | |
319 | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); | 321 | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); |
320 | |||
321 | #ifdef CONFIG_KVM_BOOKE_HV | ||
322 | stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; | ||
323 | #endif | ||
324 | } | 322 | } |
325 | 323 | ||
326 | static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | 324 | static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, |
@@ -633,7 +631,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, | |||
633 | 631 | ||
634 | local_irq_save(flags); | 632 | local_irq_save(flags); |
635 | mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space); | 633 | mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space); |
636 | mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid); | 634 | mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu)); |
637 | asm volatile("tlbsx 0, %[geaddr]\n" : : | 635 | asm volatile("tlbsx 0, %[geaddr]\n" : : |
638 | [geaddr] "r" (geaddr)); | 636 | [geaddr] "r" (geaddr)); |
639 | mtspr(SPRN_MAS5, 0); | 637 | mtspr(SPRN_MAS5, 0); |
@@ -732,7 +730,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | |||
732 | return 0; | 730 | return 0; |
733 | } | 731 | } |
734 | 732 | ||
735 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 733 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) |
736 | { | 734 | { |
737 | /* XXX could be more clever ;) */ | 735 | /* XXX could be more clever ;) */ |
738 | return 0; | 736 | return 0; |
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 164bad2a19bf..2fdc8722e324 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c | |||
@@ -48,10 +48,11 @@ void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type) | |||
48 | return; | 48 | return; |
49 | } | 49 | } |
50 | 50 | ||
51 | 51 | preempt_disable(); | |
52 | tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id; | 52 | tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id; |
53 | mb(); | 53 | mb(); |
54 | ppc_msgsnd(dbell_type, 0, tag); | 54 | ppc_msgsnd(dbell_type, 0, tag); |
55 | preempt_enable(); | ||
55 | } | 56 | } |
56 | 57 | ||
57 | /* gtlbe must not be mapped by more than one host tlb entry */ | 58 | /* gtlbe must not be mapped by more than one host tlb entry */ |
@@ -60,12 +61,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
60 | { | 61 | { |
61 | unsigned int tid, ts; | 62 | unsigned int tid, ts; |
62 | gva_t eaddr; | 63 | gva_t eaddr; |
63 | u32 val, lpid; | 64 | u32 val; |
64 | unsigned long flags; | 65 | unsigned long flags; |
65 | 66 | ||
66 | ts = get_tlb_ts(gtlbe); | 67 | ts = get_tlb_ts(gtlbe); |
67 | tid = get_tlb_tid(gtlbe); | 68 | tid = get_tlb_tid(gtlbe); |
68 | lpid = vcpu_e500->vcpu.kvm->arch.lpid; | ||
69 | 69 | ||
70 | /* We search the host TLB to invalidate its shadow TLB entry */ | 70 | /* We search the host TLB to invalidate its shadow TLB entry */ |
71 | val = (tid << 16) | ts; | 71 | val = (tid << 16) | ts; |
@@ -74,7 +74,7 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
74 | local_irq_save(flags); | 74 | local_irq_save(flags); |
75 | 75 | ||
76 | mtspr(SPRN_MAS6, val); | 76 | mtspr(SPRN_MAS6, val); |
77 | mtspr(SPRN_MAS5, MAS5_SGS | lpid); | 77 | mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); |
78 | 78 | ||
79 | asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr)); | 79 | asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr)); |
80 | val = mfspr(SPRN_MAS1); | 80 | val = mfspr(SPRN_MAS1); |
@@ -95,7 +95,7 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
95 | unsigned long flags; | 95 | unsigned long flags; |
96 | 96 | ||
97 | local_irq_save(flags); | 97 | local_irq_save(flags); |
98 | mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid); | 98 | mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); |
99 | asm volatile("tlbilxlpid"); | 99 | asm volatile("tlbilxlpid"); |
100 | mtspr(SPRN_MAS5, 0); | 100 | mtspr(SPRN_MAS5, 0); |
101 | local_irq_restore(flags); | 101 | local_irq_restore(flags); |
@@ -110,6 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) | |||
110 | { | 110 | { |
111 | } | 111 | } |
112 | 112 | ||
113 | /* We use two lpids per VM */ | ||
113 | static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid); | 114 | static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid); |
114 | 115 | ||
115 | static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) | 116 | static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) |
@@ -118,10 +119,12 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) | |||
118 | 119 | ||
119 | kvmppc_booke_vcpu_load(vcpu, cpu); | 120 | kvmppc_booke_vcpu_load(vcpu, cpu); |
120 | 121 | ||
121 | mtspr(SPRN_LPID, vcpu->kvm->arch.lpid); | 122 | mtspr(SPRN_LPID, get_lpid(vcpu)); |
122 | mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); | 123 | mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); |
123 | mtspr(SPRN_GPIR, vcpu->vcpu_id); | 124 | mtspr(SPRN_GPIR, vcpu->vcpu_id); |
124 | mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp); | 125 | mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp); |
126 | vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT); | ||
127 | vcpu->arch.epsc = vcpu->arch.eplc; | ||
125 | mtspr(SPRN_EPLC, vcpu->arch.eplc); | 128 | mtspr(SPRN_EPLC, vcpu->arch.eplc); |
126 | mtspr(SPRN_EPSC, vcpu->arch.epsc); | 129 | mtspr(SPRN_EPSC, vcpu->arch.epsc); |
127 | 130 | ||
@@ -141,12 +144,10 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) | |||
141 | mtspr(SPRN_GESR, vcpu->arch.shared->esr); | 144 | mtspr(SPRN_GESR, vcpu->arch.shared->esr); |
142 | 145 | ||
143 | if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || | 146 | if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || |
144 | __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { | 147 | __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) { |
145 | kvmppc_e500_tlbil_all(vcpu_e500); | 148 | kvmppc_e500_tlbil_all(vcpu_e500); |
146 | __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; | 149 | __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu; |
147 | } | 150 | } |
148 | |||
149 | kvmppc_load_guest_fp(vcpu); | ||
150 | } | 151 | } |
151 | 152 | ||
152 | static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu) | 153 | static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu) |
@@ -179,6 +180,16 @@ int kvmppc_core_check_processor_compat(void) | |||
179 | r = 0; | 180 | r = 0; |
180 | else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) | 181 | else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) |
181 | r = 0; | 182 | r = 0; |
183 | #ifdef CONFIG_ALTIVEC | ||
184 | /* | ||
185 | * Since guests have the priviledge to enable AltiVec, we need AltiVec | ||
186 | * support in the host to save/restore their context. | ||
187 | * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit | ||
188 | * because it's cleared in the absence of CONFIG_ALTIVEC! | ||
189 | */ | ||
190 | else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0) | ||
191 | r = 0; | ||
192 | #endif | ||
182 | else | 193 | else |
183 | r = -ENOTSUPP; | 194 | r = -ENOTSUPP; |
184 | 195 | ||
@@ -194,9 +205,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) | |||
194 | #ifdef CONFIG_64BIT | 205 | #ifdef CONFIG_64BIT |
195 | vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM; | 206 | vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM; |
196 | #endif | 207 | #endif |
197 | vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP; | 208 | vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP; |
198 | vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT); | ||
199 | vcpu->arch.epsc = vcpu->arch.eplc; | ||
200 | 209 | ||
201 | vcpu->arch.pvr = mfspr(SPRN_PVR); | 210 | vcpu->arch.pvr = mfspr(SPRN_PVR); |
202 | vcpu_e500->svr = mfspr(SPRN_SVR); | 211 | vcpu_e500->svr = mfspr(SPRN_SVR); |
@@ -356,13 +365,26 @@ static int kvmppc_core_init_vm_e500mc(struct kvm *kvm) | |||
356 | if (lpid < 0) | 365 | if (lpid < 0) |
357 | return lpid; | 366 | return lpid; |
358 | 367 | ||
368 | /* | ||
369 | * Use two lpids per VM on cores with two threads like e6500. Use | ||
370 | * even numbers to speedup vcpu lpid computation with consecutive lpids | ||
371 | * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on. | ||
372 | */ | ||
373 | if (threads_per_core == 2) | ||
374 | lpid <<= 1; | ||
375 | |||
359 | kvm->arch.lpid = lpid; | 376 | kvm->arch.lpid = lpid; |
360 | return 0; | 377 | return 0; |
361 | } | 378 | } |
362 | 379 | ||
363 | static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm) | 380 | static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm) |
364 | { | 381 | { |
365 | kvmppc_free_lpid(kvm->arch.lpid); | 382 | int lpid = kvm->arch.lpid; |
383 | |||
384 | if (threads_per_core == 2) | ||
385 | lpid >>= 1; | ||
386 | |||
387 | kvmppc_free_lpid(lpid); | ||
366 | } | 388 | } |
367 | 389 | ||
368 | static struct kvmppc_ops kvm_ops_e500mc = { | 390 | static struct kvmppc_ops kvm_ops_e500mc = { |
@@ -390,7 +412,13 @@ static int __init kvmppc_e500mc_init(void) | |||
390 | if (r) | 412 | if (r) |
391 | goto err_out; | 413 | goto err_out; |
392 | 414 | ||
393 | kvmppc_init_lpid(64); | 415 | /* |
416 | * Use two lpids per VM on dual threaded processors like e6500 | ||
417 | * to workarround the lack of tlb write conditional instruction. | ||
418 | * Expose half the number of available hardware lpids to the lpid | ||
419 | * allocator. | ||
420 | */ | ||
421 | kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core); | ||
394 | kvmppc_claim_lpid(0); /* host */ | 422 | kvmppc_claim_lpid(0); /* host */ |
395 | 423 | ||
396 | r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); | 424 | r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index e96b50d0bdab..5cc2e7af3a7b 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -219,7 +219,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
219 | /* this default type might be overwritten by subcategories */ | 219 | /* this default type might be overwritten by subcategories */ |
220 | kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); | 220 | kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); |
221 | 221 | ||
222 | emulated = kvmppc_get_last_inst(vcpu, false, &inst); | 222 | emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); |
223 | if (emulated != EMULATE_DONE) | 223 | if (emulated != EMULATE_DONE) |
224 | return emulated; | 224 | return emulated; |
225 | 225 | ||
@@ -274,6 +274,21 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
274 | } | 274 | } |
275 | break; | 275 | break; |
276 | 276 | ||
277 | case 0: | ||
278 | /* | ||
279 | * Instruction with primary opcode 0. Based on PowerISA | ||
280 | * these are illegal instructions. | ||
281 | */ | ||
282 | if (inst == KVMPPC_INST_SW_BREAKPOINT) { | ||
283 | run->exit_reason = KVM_EXIT_DEBUG; | ||
284 | run->debug.arch.address = kvmppc_get_pc(vcpu); | ||
285 | emulated = EMULATE_EXIT_USER; | ||
286 | advance = 0; | ||
287 | } else | ||
288 | emulated = EMULATE_FAIL; | ||
289 | |||
290 | break; | ||
291 | |||
277 | default: | 292 | default: |
278 | emulated = EMULATE_FAIL; | 293 | emulated = EMULATE_FAIL; |
279 | } | 294 | } |
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 0de4ffa175a9..6d3c0ee1d744 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c | |||
@@ -58,7 +58,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
58 | /* this default type might be overwritten by subcategories */ | 58 | /* this default type might be overwritten by subcategories */ |
59 | kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); | 59 | kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); |
60 | 60 | ||
61 | emulated = kvmppc_get_last_inst(vcpu, false, &inst); | 61 | emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); |
62 | if (emulated != EMULATE_DONE) | 62 | if (emulated != EMULATE_DONE) |
63 | return emulated; | 63 | return emulated; |
64 | 64 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4c79284b58be..c1f8f53cd312 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -294,7 +294,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
294 | { | 294 | { |
295 | u32 last_inst; | 295 | u32 last_inst; |
296 | 296 | ||
297 | kvmppc_get_last_inst(vcpu, false, &last_inst); | 297 | kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); |
298 | /* XXX Deliver Program interrupt to guest. */ | 298 | /* XXX Deliver Program interrupt to guest. */ |
299 | pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); | 299 | pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); |
300 | r = RESUME_HOST; | 300 | r = RESUME_HOST; |
@@ -384,24 +384,16 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, | |||
384 | } | 384 | } |
385 | EXPORT_SYMBOL_GPL(kvmppc_ld); | 385 | EXPORT_SYMBOL_GPL(kvmppc_ld); |
386 | 386 | ||
387 | int kvm_arch_hardware_enable(void *garbage) | 387 | int kvm_arch_hardware_enable(void) |
388 | { | 388 | { |
389 | return 0; | 389 | return 0; |
390 | } | 390 | } |
391 | 391 | ||
392 | void kvm_arch_hardware_disable(void *garbage) | ||
393 | { | ||
394 | } | ||
395 | |||
396 | int kvm_arch_hardware_setup(void) | 392 | int kvm_arch_hardware_setup(void) |
397 | { | 393 | { |
398 | return 0; | 394 | return 0; |
399 | } | 395 | } |
400 | 396 | ||
401 | void kvm_arch_hardware_unsetup(void) | ||
402 | { | ||
403 | } | ||
404 | |||
405 | void kvm_arch_check_processor_compat(void *rtn) | 397 | void kvm_arch_check_processor_compat(void *rtn) |
406 | { | 398 | { |
407 | *(int *)rtn = kvmppc_core_check_processor_compat(); | 399 | *(int *)rtn = kvmppc_core_check_processor_compat(); |
@@ -462,10 +454,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
462 | module_put(kvm->arch.kvm_ops->owner); | 454 | module_put(kvm->arch.kvm_ops->owner); |
463 | } | 455 | } |
464 | 456 | ||
465 | void kvm_arch_sync_events(struct kvm *kvm) | ||
466 | { | ||
467 | } | ||
468 | |||
469 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | 457 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) |
470 | { | 458 | { |
471 | int r; | 459 | int r; |
@@ -608,10 +596,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
608 | return kvmppc_core_create_memslot(kvm, slot, npages); | 596 | return kvmppc_core_create_memslot(kvm, slot, npages); |
609 | } | 597 | } |
610 | 598 | ||
611 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
612 | { | ||
613 | } | ||
614 | |||
615 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 599 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
616 | struct kvm_memory_slot *memslot, | 600 | struct kvm_memory_slot *memslot, |
617 | struct kvm_userspace_memory_region *mem, | 601 | struct kvm_userspace_memory_region *mem, |
@@ -628,10 +612,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
628 | kvmppc_core_commit_memory_region(kvm, mem, old); | 612 | kvmppc_core_commit_memory_region(kvm, mem, old); |
629 | } | 613 | } |
630 | 614 | ||
631 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | ||
632 | { | ||
633 | } | ||
634 | |||
635 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 615 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
636 | struct kvm_memory_slot *slot) | 616 | struct kvm_memory_slot *slot) |
637 | { | 617 | { |
@@ -658,7 +638,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
658 | { | 638 | { |
659 | /* Make sure we're not using the vcpu anymore */ | 639 | /* Make sure we're not using the vcpu anymore */ |
660 | hrtimer_cancel(&vcpu->arch.dec_timer); | 640 | hrtimer_cancel(&vcpu->arch.dec_timer); |
661 | tasklet_kill(&vcpu->arch.tasklet); | ||
662 | 641 | ||
663 | kvmppc_remove_vcpu_debugfs(vcpu); | 642 | kvmppc_remove_vcpu_debugfs(vcpu); |
664 | 643 | ||
@@ -684,16 +663,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
684 | return kvmppc_core_pending_dec(vcpu); | 663 | return kvmppc_core_pending_dec(vcpu); |
685 | } | 664 | } |
686 | 665 | ||
687 | /* | ||
688 | * low level hrtimer wake routine. Because this runs in hardirq context | ||
689 | * we schedule a tasklet to do the real work. | ||
690 | */ | ||
691 | enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) | 666 | enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) |
692 | { | 667 | { |
693 | struct kvm_vcpu *vcpu; | 668 | struct kvm_vcpu *vcpu; |
694 | 669 | ||
695 | vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); | 670 | vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); |
696 | tasklet_schedule(&vcpu->arch.tasklet); | 671 | kvmppc_decrementer_func(vcpu); |
697 | 672 | ||
698 | return HRTIMER_NORESTART; | 673 | return HRTIMER_NORESTART; |
699 | } | 674 | } |
@@ -703,7 +678,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
703 | int ret; | 678 | int ret; |
704 | 679 | ||
705 | hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 680 | hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
706 | tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); | ||
707 | vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; | 681 | vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; |
708 | vcpu->arch.dec_expires = ~(u64)0; | 682 | vcpu->arch.dec_expires = ~(u64)0; |
709 | 683 | ||
@@ -927,6 +901,103 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
927 | } | 901 | } |
928 | EXPORT_SYMBOL_GPL(kvmppc_handle_store); | 902 | EXPORT_SYMBOL_GPL(kvmppc_handle_store); |
929 | 903 | ||
904 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | ||
905 | { | ||
906 | int r = 0; | ||
907 | union kvmppc_one_reg val; | ||
908 | int size; | ||
909 | |||
910 | size = one_reg_size(reg->id); | ||
911 | if (size > sizeof(val)) | ||
912 | return -EINVAL; | ||
913 | |||
914 | r = kvmppc_get_one_reg(vcpu, reg->id, &val); | ||
915 | if (r == -EINVAL) { | ||
916 | r = 0; | ||
917 | switch (reg->id) { | ||
918 | #ifdef CONFIG_ALTIVEC | ||
919 | case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: | ||
920 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
921 | r = -ENXIO; | ||
922 | break; | ||
923 | } | ||
924 | vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; | ||
925 | break; | ||
926 | case KVM_REG_PPC_VSCR: | ||
927 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
928 | r = -ENXIO; | ||
929 | break; | ||
930 | } | ||
931 | vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); | ||
932 | break; | ||
933 | case KVM_REG_PPC_VRSAVE: | ||
934 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
935 | r = -ENXIO; | ||
936 | break; | ||
937 | } | ||
938 | vcpu->arch.vrsave = set_reg_val(reg->id, val); | ||
939 | break; | ||
940 | #endif /* CONFIG_ALTIVEC */ | ||
941 | default: | ||
942 | r = -EINVAL; | ||
943 | break; | ||
944 | } | ||
945 | } | ||
946 | |||
947 | if (r) | ||
948 | return r; | ||
949 | |||
950 | if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) | ||
951 | r = -EFAULT; | ||
952 | |||
953 | return r; | ||
954 | } | ||
955 | |||
956 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | ||
957 | { | ||
958 | int r; | ||
959 | union kvmppc_one_reg val; | ||
960 | int size; | ||
961 | |||
962 | size = one_reg_size(reg->id); | ||
963 | if (size > sizeof(val)) | ||
964 | return -EINVAL; | ||
965 | |||
966 | if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) | ||
967 | return -EFAULT; | ||
968 | |||
969 | r = kvmppc_set_one_reg(vcpu, reg->id, &val); | ||
970 | if (r == -EINVAL) { | ||
971 | r = 0; | ||
972 | switch (reg->id) { | ||
973 | #ifdef CONFIG_ALTIVEC | ||
974 | case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: | ||
975 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
976 | r = -ENXIO; | ||
977 | break; | ||
978 | } | ||
979 | val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; | ||
980 | break; | ||
981 | case KVM_REG_PPC_VSCR: | ||
982 | if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { | ||
983 | r = -ENXIO; | ||
984 | break; | ||
985 | } | ||
986 | val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); | ||
987 | break; | ||
988 | case KVM_REG_PPC_VRSAVE: | ||
989 | val = get_reg_val(reg->id, vcpu->arch.vrsave); | ||
990 | break; | ||
991 | #endif /* CONFIG_ALTIVEC */ | ||
992 | default: | ||
993 | r = -EINVAL; | ||
994 | break; | ||
995 | } | ||
996 | } | ||
997 | |||
998 | return r; | ||
999 | } | ||
1000 | |||
930 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | 1001 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) |
931 | { | 1002 | { |
932 | int r; | 1003 | int r; |
@@ -1343,9 +1414,4 @@ int kvm_arch_init(void *opaque) | |||
1343 | return 0; | 1414 | return 0; |
1344 | } | 1415 | } |
1345 | 1416 | ||
1346 | void kvm_arch_exit(void) | ||
1347 | { | ||
1348 | |||
1349 | } | ||
1350 | |||
1351 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); | 1417 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); |
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e8bc40869cbd..7d9ee3d8c618 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype | |||
@@ -303,9 +303,13 @@ config PPC_ICSWX_USE_SIGILL | |||
303 | 303 | ||
304 | If in doubt, say N here. | 304 | If in doubt, say N here. |
305 | 305 | ||
306 | config SPE_POSSIBLE | ||
307 | def_bool y | ||
308 | depends on E200 || (E500 && !PPC_E500MC) | ||
309 | |||
306 | config SPE | 310 | config SPE |
307 | bool "SPE Support" | 311 | bool "SPE Support" |
308 | depends on E200 || (E500 && !PPC_E500MC) | 312 | depends on SPE_POSSIBLE |
309 | default y | 313 | default y |
310 | ---help--- | 314 | ---help--- |
311 | This option enables kernel support for the Signal Processing | 315 | This option enables kernel support for the Signal Processing |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 773bef7614d8..2175f911a73a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -13,8 +13,11 @@ | |||
13 | 13 | ||
14 | #ifndef ASM_KVM_HOST_H | 14 | #ifndef ASM_KVM_HOST_H |
15 | #define ASM_KVM_HOST_H | 15 | #define ASM_KVM_HOST_H |
16 | |||
17 | #include <linux/types.h> | ||
16 | #include <linux/hrtimer.h> | 18 | #include <linux/hrtimer.h> |
17 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
20 | #include <linux/kvm_types.h> | ||
18 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
19 | #include <linux/kvm.h> | 22 | #include <linux/kvm.h> |
20 | #include <asm/debug.h> | 23 | #include <asm/debug.h> |
@@ -154,7 +157,9 @@ struct kvm_s390_sie_block { | |||
154 | __u8 armid; /* 0x00e3 */ | 157 | __u8 armid; /* 0x00e3 */ |
155 | __u8 reservede4[4]; /* 0x00e4 */ | 158 | __u8 reservede4[4]; /* 0x00e4 */ |
156 | __u64 tecmc; /* 0x00e8 */ | 159 | __u64 tecmc; /* 0x00e8 */ |
157 | __u8 reservedf0[16]; /* 0x00f0 */ | 160 | __u8 reservedf0[12]; /* 0x00f0 */ |
161 | #define CRYCB_FORMAT1 0x00000001 | ||
162 | __u32 crycbd; /* 0x00fc */ | ||
158 | __u64 gcr[16]; /* 0x0100 */ | 163 | __u64 gcr[16]; /* 0x0100 */ |
159 | __u64 gbea; /* 0x0180 */ | 164 | __u64 gbea; /* 0x0180 */ |
160 | __u8 reserved188[24]; /* 0x0188 */ | 165 | __u8 reserved188[24]; /* 0x0188 */ |
@@ -187,6 +192,7 @@ struct kvm_vcpu_stat { | |||
187 | u32 exit_stop_request; | 192 | u32 exit_stop_request; |
188 | u32 exit_validity; | 193 | u32 exit_validity; |
189 | u32 exit_instruction; | 194 | u32 exit_instruction; |
195 | u32 halt_wakeup; | ||
190 | u32 instruction_lctl; | 196 | u32 instruction_lctl; |
191 | u32 instruction_lctlg; | 197 | u32 instruction_lctlg; |
192 | u32 instruction_stctl; | 198 | u32 instruction_stctl; |
@@ -407,6 +413,15 @@ struct s390_io_adapter { | |||
407 | #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) | 413 | #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) |
408 | #define MAX_S390_ADAPTER_MAPS 256 | 414 | #define MAX_S390_ADAPTER_MAPS 256 |
409 | 415 | ||
416 | struct kvm_s390_crypto { | ||
417 | struct kvm_s390_crypto_cb *crycb; | ||
418 | __u32 crycbd; | ||
419 | }; | ||
420 | |||
421 | struct kvm_s390_crypto_cb { | ||
422 | __u8 reserved00[128]; /* 0x0000 */ | ||
423 | }; | ||
424 | |||
410 | struct kvm_arch{ | 425 | struct kvm_arch{ |
411 | struct sca_block *sca; | 426 | struct sca_block *sca; |
412 | debug_info_t *dbf; | 427 | debug_info_t *dbf; |
@@ -420,6 +435,7 @@ struct kvm_arch{ | |||
420 | struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; | 435 | struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; |
421 | wait_queue_head_t ipte_wq; | 436 | wait_queue_head_t ipte_wq; |
422 | spinlock_t start_stop_lock; | 437 | spinlock_t start_stop_lock; |
438 | struct kvm_s390_crypto crypto; | ||
423 | }; | 439 | }; |
424 | 440 | ||
425 | #define KVM_HVA_ERR_BAD (-1UL) | 441 | #define KVM_HVA_ERR_BAD (-1UL) |
@@ -431,8 +447,6 @@ static inline bool kvm_is_error_hva(unsigned long addr) | |||
431 | } | 447 | } |
432 | 448 | ||
433 | #define ASYNC_PF_PER_VCPU 64 | 449 | #define ASYNC_PF_PER_VCPU 64 |
434 | struct kvm_vcpu; | ||
435 | struct kvm_async_pf; | ||
436 | struct kvm_arch_async_pf { | 450 | struct kvm_arch_async_pf { |
437 | unsigned long pfault_token; | 451 | unsigned long pfault_token; |
438 | }; | 452 | }; |
@@ -450,4 +464,18 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | |||
450 | 464 | ||
451 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); | 465 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); |
452 | extern char sie_exit; | 466 | extern char sie_exit; |
467 | |||
468 | static inline void kvm_arch_hardware_disable(void) {} | ||
469 | static inline void kvm_arch_check_processor_compat(void *rtn) {} | ||
470 | static inline void kvm_arch_exit(void) {} | ||
471 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | ||
472 | static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | ||
473 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | ||
474 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | ||
475 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | ||
476 | static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} | ||
477 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | ||
478 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | ||
479 | struct kvm_memory_slot *slot) {} | ||
480 | |||
453 | #endif | 481 | #endif |
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 9e18a61d3df3..d39a31c3cdf2 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h | |||
@@ -18,9 +18,9 @@ | |||
18 | unsigned long *crst_table_alloc(struct mm_struct *); | 18 | unsigned long *crst_table_alloc(struct mm_struct *); |
19 | void crst_table_free(struct mm_struct *, unsigned long *); | 19 | void crst_table_free(struct mm_struct *, unsigned long *); |
20 | 20 | ||
21 | unsigned long *page_table_alloc(struct mm_struct *, unsigned long); | 21 | unsigned long *page_table_alloc(struct mm_struct *); |
22 | void page_table_free(struct mm_struct *, unsigned long *); | 22 | void page_table_free(struct mm_struct *, unsigned long *); |
23 | void page_table_free_rcu(struct mmu_gather *, unsigned long *); | 23 | void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); |
24 | 24 | ||
25 | void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, | 25 | void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, |
26 | bool init_skey); | 26 | bool init_skey); |
@@ -145,8 +145,8 @@ static inline void pmd_populate(struct mm_struct *mm, | |||
145 | /* | 145 | /* |
146 | * page table entry allocation/free routines. | 146 | * page table entry allocation/free routines. |
147 | */ | 147 | */ |
148 | #define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) | 148 | #define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) |
149 | #define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) | 149 | #define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) |
150 | 150 | ||
151 | #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) | 151 | #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) |
152 | #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) | 152 | #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) |
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5efb2fe186e7..b7054356cc98 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
31 | #include <linux/mm_types.h> | 31 | #include <linux/mm_types.h> |
32 | #include <linux/page-flags.h> | 32 | #include <linux/page-flags.h> |
33 | #include <linux/radix-tree.h> | ||
33 | #include <asm/bug.h> | 34 | #include <asm/bug.h> |
34 | #include <asm/page.h> | 35 | #include <asm/page.h> |
35 | 36 | ||
@@ -789,82 +790,67 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) | |||
789 | 790 | ||
790 | /** | 791 | /** |
791 | * struct gmap_struct - guest address space | 792 | * struct gmap_struct - guest address space |
793 | * @crst_list: list of all crst tables used in the guest address space | ||
792 | * @mm: pointer to the parent mm_struct | 794 | * @mm: pointer to the parent mm_struct |
795 | * @guest_to_host: radix tree with guest to host address translation | ||
796 | * @host_to_guest: radix tree with pointer to segment table entries | ||
797 | * @guest_table_lock: spinlock to protect all entries in the guest page table | ||
793 | * @table: pointer to the page directory | 798 | * @table: pointer to the page directory |
794 | * @asce: address space control element for gmap page table | 799 | * @asce: address space control element for gmap page table |
795 | * @crst_list: list of all crst tables used in the guest address space | ||
796 | * @pfault_enabled: defines if pfaults are applicable for the guest | 800 | * @pfault_enabled: defines if pfaults are applicable for the guest |
797 | */ | 801 | */ |
798 | struct gmap { | 802 | struct gmap { |
799 | struct list_head list; | 803 | struct list_head list; |
804 | struct list_head crst_list; | ||
800 | struct mm_struct *mm; | 805 | struct mm_struct *mm; |
806 | struct radix_tree_root guest_to_host; | ||
807 | struct radix_tree_root host_to_guest; | ||
808 | spinlock_t guest_table_lock; | ||
801 | unsigned long *table; | 809 | unsigned long *table; |
802 | unsigned long asce; | 810 | unsigned long asce; |
811 | unsigned long asce_end; | ||
803 | void *private; | 812 | void *private; |
804 | struct list_head crst_list; | ||
805 | bool pfault_enabled; | 813 | bool pfault_enabled; |
806 | }; | 814 | }; |
807 | 815 | ||
808 | /** | 816 | /** |
809 | * struct gmap_rmap - reverse mapping for segment table entries | ||
810 | * @gmap: pointer to the gmap_struct | ||
811 | * @entry: pointer to a segment table entry | ||
812 | * @vmaddr: virtual address in the guest address space | ||
813 | */ | ||
814 | struct gmap_rmap { | ||
815 | struct list_head list; | ||
816 | struct gmap *gmap; | ||
817 | unsigned long *entry; | ||
818 | unsigned long vmaddr; | ||
819 | }; | ||
820 | |||
821 | /** | ||
822 | * struct gmap_pgtable - gmap information attached to a page table | ||
823 | * @vmaddr: address of the 1MB segment in the process virtual memory | ||
824 | * @mapper: list of segment table entries mapping a page table | ||
825 | */ | ||
826 | struct gmap_pgtable { | ||
827 | unsigned long vmaddr; | ||
828 | struct list_head mapper; | ||
829 | }; | ||
830 | |||
831 | /** | ||
832 | * struct gmap_notifier - notify function block for page invalidation | 817 | * struct gmap_notifier - notify function block for page invalidation |
833 | * @notifier_call: address of callback function | 818 | * @notifier_call: address of callback function |
834 | */ | 819 | */ |
835 | struct gmap_notifier { | 820 | struct gmap_notifier { |
836 | struct list_head list; | 821 | struct list_head list; |
837 | void (*notifier_call)(struct gmap *gmap, unsigned long address); | 822 | void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); |
838 | }; | 823 | }; |
839 | 824 | ||
840 | struct gmap *gmap_alloc(struct mm_struct *mm); | 825 | struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); |
841 | void gmap_free(struct gmap *gmap); | 826 | void gmap_free(struct gmap *gmap); |
842 | void gmap_enable(struct gmap *gmap); | 827 | void gmap_enable(struct gmap *gmap); |
843 | void gmap_disable(struct gmap *gmap); | 828 | void gmap_disable(struct gmap *gmap); |
844 | int gmap_map_segment(struct gmap *gmap, unsigned long from, | 829 | int gmap_map_segment(struct gmap *gmap, unsigned long from, |
845 | unsigned long to, unsigned long len); | 830 | unsigned long to, unsigned long len); |
846 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); | 831 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); |
847 | unsigned long __gmap_translate(unsigned long address, struct gmap *); | 832 | unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); |
848 | unsigned long gmap_translate(unsigned long address, struct gmap *); | 833 | unsigned long gmap_translate(struct gmap *, unsigned long gaddr); |
849 | unsigned long __gmap_fault(unsigned long address, struct gmap *); | 834 | int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); |
850 | unsigned long gmap_fault(unsigned long address, struct gmap *); | 835 | int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); |
851 | void gmap_discard(unsigned long from, unsigned long to, struct gmap *); | 836 | void gmap_discard(struct gmap *, unsigned long from, unsigned long to); |
852 | void __gmap_zap(unsigned long address, struct gmap *); | 837 | void __gmap_zap(struct gmap *, unsigned long gaddr); |
853 | bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); | 838 | bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); |
854 | 839 | ||
855 | 840 | ||
856 | void gmap_register_ipte_notifier(struct gmap_notifier *); | 841 | void gmap_register_ipte_notifier(struct gmap_notifier *); |
857 | void gmap_unregister_ipte_notifier(struct gmap_notifier *); | 842 | void gmap_unregister_ipte_notifier(struct gmap_notifier *); |
858 | int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); | 843 | int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); |
859 | void gmap_do_ipte_notify(struct mm_struct *, pte_t *); | 844 | void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); |
860 | 845 | ||
861 | static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, | 846 | static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, |
847 | unsigned long addr, | ||
862 | pte_t *ptep, pgste_t pgste) | 848 | pte_t *ptep, pgste_t pgste) |
863 | { | 849 | { |
864 | #ifdef CONFIG_PGSTE | 850 | #ifdef CONFIG_PGSTE |
865 | if (pgste_val(pgste) & PGSTE_IN_BIT) { | 851 | if (pgste_val(pgste) & PGSTE_IN_BIT) { |
866 | pgste_val(pgste) &= ~PGSTE_IN_BIT; | 852 | pgste_val(pgste) &= ~PGSTE_IN_BIT; |
867 | gmap_do_ipte_notify(mm, ptep); | 853 | gmap_do_ipte_notify(mm, addr, ptep); |
868 | } | 854 | } |
869 | #endif | 855 | #endif |
870 | return pgste; | 856 | return pgste; |
@@ -1110,7 +1096,7 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, | |||
1110 | pgste_val(pgste) &= ~PGSTE_UC_BIT; | 1096 | pgste_val(pgste) &= ~PGSTE_UC_BIT; |
1111 | pte = *ptep; | 1097 | pte = *ptep; |
1112 | if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { | 1098 | if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { |
1113 | pgste = pgste_ipte_notify(mm, ptep, pgste); | 1099 | pgste = pgste_ipte_notify(mm, addr, ptep, pgste); |
1114 | __ptep_ipte(addr, ptep); | 1100 | __ptep_ipte(addr, ptep); |
1115 | if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) | 1101 | if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) |
1116 | pte_val(pte) |= _PAGE_PROTECT; | 1102 | pte_val(pte) |= _PAGE_PROTECT; |
@@ -1132,7 +1118,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
1132 | 1118 | ||
1133 | if (mm_has_pgste(vma->vm_mm)) { | 1119 | if (mm_has_pgste(vma->vm_mm)) { |
1134 | pgste = pgste_get_lock(ptep); | 1120 | pgste = pgste_get_lock(ptep); |
1135 | pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); | 1121 | pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); |
1136 | } | 1122 | } |
1137 | 1123 | ||
1138 | oldpte = pte = *ptep; | 1124 | oldpte = pte = *ptep; |
@@ -1179,7 +1165,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, | |||
1179 | 1165 | ||
1180 | if (mm_has_pgste(mm)) { | 1166 | if (mm_has_pgste(mm)) { |
1181 | pgste = pgste_get_lock(ptep); | 1167 | pgste = pgste_get_lock(ptep); |
1182 | pgste = pgste_ipte_notify(mm, ptep, pgste); | 1168 | pgste = pgste_ipte_notify(mm, address, ptep, pgste); |
1183 | } | 1169 | } |
1184 | 1170 | ||
1185 | pte = *ptep; | 1171 | pte = *ptep; |
@@ -1203,7 +1189,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, | |||
1203 | 1189 | ||
1204 | if (mm_has_pgste(mm)) { | 1190 | if (mm_has_pgste(mm)) { |
1205 | pgste = pgste_get_lock(ptep); | 1191 | pgste = pgste_get_lock(ptep); |
1206 | pgste_ipte_notify(mm, ptep, pgste); | 1192 | pgste_ipte_notify(mm, address, ptep, pgste); |
1207 | } | 1193 | } |
1208 | 1194 | ||
1209 | pte = *ptep; | 1195 | pte = *ptep; |
@@ -1240,7 +1226,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, | |||
1240 | 1226 | ||
1241 | if (mm_has_pgste(vma->vm_mm)) { | 1227 | if (mm_has_pgste(vma->vm_mm)) { |
1242 | pgste = pgste_get_lock(ptep); | 1228 | pgste = pgste_get_lock(ptep); |
1243 | pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); | 1229 | pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); |
1244 | } | 1230 | } |
1245 | 1231 | ||
1246 | pte = *ptep; | 1232 | pte = *ptep; |
@@ -1274,7 +1260,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, | |||
1274 | 1260 | ||
1275 | if (!full && mm_has_pgste(mm)) { | 1261 | if (!full && mm_has_pgste(mm)) { |
1276 | pgste = pgste_get_lock(ptep); | 1262 | pgste = pgste_get_lock(ptep); |
1277 | pgste = pgste_ipte_notify(mm, ptep, pgste); | 1263 | pgste = pgste_ipte_notify(mm, address, ptep, pgste); |
1278 | } | 1264 | } |
1279 | 1265 | ||
1280 | pte = *ptep; | 1266 | pte = *ptep; |
@@ -1299,7 +1285,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, | |||
1299 | if (pte_write(pte)) { | 1285 | if (pte_write(pte)) { |
1300 | if (mm_has_pgste(mm)) { | 1286 | if (mm_has_pgste(mm)) { |
1301 | pgste = pgste_get_lock(ptep); | 1287 | pgste = pgste_get_lock(ptep); |
1302 | pgste = pgste_ipte_notify(mm, ptep, pgste); | 1288 | pgste = pgste_ipte_notify(mm, address, ptep, pgste); |
1303 | } | 1289 | } |
1304 | 1290 | ||
1305 | ptep_flush_lazy(mm, address, ptep); | 1291 | ptep_flush_lazy(mm, address, ptep); |
@@ -1325,7 +1311,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, | |||
1325 | return 0; | 1311 | return 0; |
1326 | if (mm_has_pgste(vma->vm_mm)) { | 1312 | if (mm_has_pgste(vma->vm_mm)) { |
1327 | pgste = pgste_get_lock(ptep); | 1313 | pgste = pgste_get_lock(ptep); |
1328 | pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); | 1314 | pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); |
1329 | } | 1315 | } |
1330 | 1316 | ||
1331 | ptep_flush_direct(vma->vm_mm, address, ptep); | 1317 | ptep_flush_direct(vma->vm_mm, address, ptep); |
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index a25f09fbaf36..572c59949004 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h | |||
@@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) | |||
105 | static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, | 105 | static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, |
106 | unsigned long address) | 106 | unsigned long address) |
107 | { | 107 | { |
108 | page_table_free_rcu(tlb, (unsigned long *) pte); | 108 | page_table_free_rcu(tlb, (unsigned long *) pte, address); |
109 | } | 109 | } |
110 | 110 | ||
111 | /* | 111 | /* |
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 0fc26430a1e5..48eda3ab4944 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h | |||
@@ -111,12 +111,22 @@ struct kvm_guest_debug_arch { | |||
111 | #define KVM_SYNC_GPRS (1UL << 1) | 111 | #define KVM_SYNC_GPRS (1UL << 1) |
112 | #define KVM_SYNC_ACRS (1UL << 2) | 112 | #define KVM_SYNC_ACRS (1UL << 2) |
113 | #define KVM_SYNC_CRS (1UL << 3) | 113 | #define KVM_SYNC_CRS (1UL << 3) |
114 | #define KVM_SYNC_ARCH0 (1UL << 4) | ||
115 | #define KVM_SYNC_PFAULT (1UL << 5) | ||
114 | /* definition of registers in kvm_run */ | 116 | /* definition of registers in kvm_run */ |
115 | struct kvm_sync_regs { | 117 | struct kvm_sync_regs { |
116 | __u64 prefix; /* prefix register */ | 118 | __u64 prefix; /* prefix register */ |
117 | __u64 gprs[16]; /* general purpose registers */ | 119 | __u64 gprs[16]; /* general purpose registers */ |
118 | __u32 acrs[16]; /* access registers */ | 120 | __u32 acrs[16]; /* access registers */ |
119 | __u64 crs[16]; /* control registers */ | 121 | __u64 crs[16]; /* control registers */ |
122 | __u64 todpr; /* tod programmable register [ARCH0] */ | ||
123 | __u64 cputm; /* cpu timer [ARCH0] */ | ||
124 | __u64 ckc; /* clock comparator [ARCH0] */ | ||
125 | __u64 pp; /* program parameter [ARCH0] */ | ||
126 | __u64 gbea; /* guest breaking-event address [ARCH0] */ | ||
127 | __u64 pft; /* pfault token [PFAULT] */ | ||
128 | __u64 pfs; /* pfault select [PFAULT] */ | ||
129 | __u64 pfc; /* pfault compare [PFAULT] */ | ||
120 | }; | 130 | }; |
121 | 131 | ||
122 | #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) | 132 | #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) |
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 59bd8f991b98..9254afff250c 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c | |||
@@ -28,22 +28,32 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) | |||
28 | start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; | 28 | start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; |
29 | end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; | 29 | end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; |
30 | 30 | ||
31 | if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end | 31 | if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end |
32 | || start < 2 * PAGE_SIZE) | 32 | || start < 2 * PAGE_SIZE) |
33 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 33 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
34 | 34 | ||
35 | VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); | 35 | VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); |
36 | vcpu->stat.diagnose_10++; | 36 | vcpu->stat.diagnose_10++; |
37 | 37 | ||
38 | /* we checked for start > end above */ | 38 | /* |
39 | if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { | 39 | * We checked for start >= end above, so lets check for the |
40 | gmap_discard(start, end, vcpu->arch.gmap); | 40 | * fast path (no prefix swap page involved) |
41 | */ | ||
42 | if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) { | ||
43 | gmap_discard(vcpu->arch.gmap, start, end); | ||
41 | } else { | 44 | } else { |
42 | if (start < prefix) | 45 | /* |
43 | gmap_discard(start, prefix, vcpu->arch.gmap); | 46 | * This is slow path. gmap_discard will check for start |
44 | if (end >= prefix) | 47 | * so lets split this into before prefix, prefix, after |
45 | gmap_discard(prefix + 2 * PAGE_SIZE, | 48 | * prefix and let gmap_discard make some of these calls |
46 | end, vcpu->arch.gmap); | 49 | * NOPs. |
50 | */ | ||
51 | gmap_discard(vcpu->arch.gmap, start, prefix); | ||
52 | if (start <= prefix) | ||
53 | gmap_discard(vcpu->arch.gmap, 0, 4096); | ||
54 | if (end > prefix + 4096) | ||
55 | gmap_discard(vcpu->arch.gmap, 4096, 8192); | ||
56 | gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end); | ||
47 | } | 57 | } |
48 | return 0; | 58 | return 0; |
49 | } | 59 | } |
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4653ac6e182b..0f961a1c64b3 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c | |||
@@ -254,8 +254,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) | |||
254 | new = old = ACCESS_ONCE(*ic); | 254 | new = old = ACCESS_ONCE(*ic); |
255 | new.k = 0; | 255 | new.k = 0; |
256 | } while (cmpxchg(&ic->val, old.val, new.val) != old.val); | 256 | } while (cmpxchg(&ic->val, old.val, new.val) != old.val); |
257 | if (!ipte_lock_count) | 257 | wake_up(&vcpu->kvm->arch.ipte_wq); |
258 | wake_up(&vcpu->kvm->arch.ipte_wq); | ||
259 | out: | 258 | out: |
260 | mutex_unlock(&ipte_mutex); | 259 | mutex_unlock(&ipte_mutex); |
261 | } | 260 | } |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f4c819bfc193..a39838457f01 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -26,8 +26,9 @@ | |||
26 | #define IOINT_SSID_MASK 0x00030000 | 26 | #define IOINT_SSID_MASK 0x00030000 |
27 | #define IOINT_CSSID_MASK 0x03fc0000 | 27 | #define IOINT_CSSID_MASK 0x03fc0000 |
28 | #define IOINT_AI_MASK 0x04000000 | 28 | #define IOINT_AI_MASK 0x04000000 |
29 | #define PFAULT_INIT 0x0600 | ||
29 | 30 | ||
30 | static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu); | 31 | static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu); |
31 | 32 | ||
32 | static int is_ioint(u64 type) | 33 | static int is_ioint(u64 type) |
33 | { | 34 | { |
@@ -76,7 +77,7 @@ static u64 int_word_to_isc_bits(u32 int_word) | |||
76 | return (0x80 >> isc) << 24; | 77 | return (0x80 >> isc) << 24; |
77 | } | 78 | } |
78 | 79 | ||
79 | static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, | 80 | static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu, |
80 | struct kvm_s390_interrupt_info *inti) | 81 | struct kvm_s390_interrupt_info *inti) |
81 | { | 82 | { |
82 | switch (inti->type) { | 83 | switch (inti->type) { |
@@ -85,6 +86,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, | |||
85 | return 0; | 86 | return 0; |
86 | if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) | 87 | if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) |
87 | return 1; | 88 | return 1; |
89 | return 0; | ||
88 | case KVM_S390_INT_EMERGENCY: | 90 | case KVM_S390_INT_EMERGENCY: |
89 | if (psw_extint_disabled(vcpu)) | 91 | if (psw_extint_disabled(vcpu)) |
90 | return 0; | 92 | return 0; |
@@ -205,11 +207,30 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, | |||
205 | } | 207 | } |
206 | } | 208 | } |
207 | 209 | ||
208 | static int __deliver_prog_irq(struct kvm_vcpu *vcpu, | 210 | static u16 get_ilc(struct kvm_vcpu *vcpu) |
209 | struct kvm_s390_pgm_info *pgm_info) | ||
210 | { | 211 | { |
211 | const unsigned short table[] = { 2, 4, 4, 6 }; | 212 | const unsigned short table[] = { 2, 4, 4, 6 }; |
213 | |||
214 | switch (vcpu->arch.sie_block->icptcode) { | ||
215 | case ICPT_INST: | ||
216 | case ICPT_INSTPROGI: | ||
217 | case ICPT_OPEREXC: | ||
218 | case ICPT_PARTEXEC: | ||
219 | case ICPT_IOINST: | ||
220 | /* last instruction only stored for these icptcodes */ | ||
221 | return table[vcpu->arch.sie_block->ipa >> 14]; | ||
222 | case ICPT_PROGI: | ||
223 | return vcpu->arch.sie_block->pgmilc; | ||
224 | default: | ||
225 | return 0; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, | ||
230 | struct kvm_s390_pgm_info *pgm_info) | ||
231 | { | ||
212 | int rc = 0; | 232 | int rc = 0; |
233 | u16 ilc = get_ilc(vcpu); | ||
213 | 234 | ||
214 | switch (pgm_info->code & ~PGM_PER) { | 235 | switch (pgm_info->code & ~PGM_PER) { |
215 | case PGM_AFX_TRANSLATION: | 236 | case PGM_AFX_TRANSLATION: |
@@ -276,25 +297,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu, | |||
276 | (u8 *) __LC_PER_ACCESS_ID); | 297 | (u8 *) __LC_PER_ACCESS_ID); |
277 | } | 298 | } |
278 | 299 | ||
279 | switch (vcpu->arch.sie_block->icptcode) { | 300 | rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); |
280 | case ICPT_INST: | ||
281 | case ICPT_INSTPROGI: | ||
282 | case ICPT_OPEREXC: | ||
283 | case ICPT_PARTEXEC: | ||
284 | case ICPT_IOINST: | ||
285 | /* last instruction only stored for these icptcodes */ | ||
286 | rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14], | ||
287 | (u16 *) __LC_PGM_ILC); | ||
288 | break; | ||
289 | case ICPT_PROGI: | ||
290 | rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc, | ||
291 | (u16 *) __LC_PGM_ILC); | ||
292 | break; | ||
293 | default: | ||
294 | rc |= put_guest_lc(vcpu, 0, | ||
295 | (u16 *) __LC_PGM_ILC); | ||
296 | } | ||
297 | |||
298 | rc |= put_guest_lc(vcpu, pgm_info->code, | 301 | rc |= put_guest_lc(vcpu, pgm_info->code, |
299 | (u16 *)__LC_PGM_INT_CODE); | 302 | (u16 *)__LC_PGM_INT_CODE); |
300 | rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, | 303 | rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, |
@@ -305,7 +308,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu, | |||
305 | return rc; | 308 | return rc; |
306 | } | 309 | } |
307 | 310 | ||
308 | static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | 311 | static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu, |
309 | struct kvm_s390_interrupt_info *inti) | 312 | struct kvm_s390_interrupt_info *inti) |
310 | { | 313 | { |
311 | const unsigned short table[] = { 2, 4, 4, 6 }; | 314 | const unsigned short table[] = { 2, 4, 4, 6 }; |
@@ -343,7 +346,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
343 | case KVM_S390_INT_CLOCK_COMP: | 346 | case KVM_S390_INT_CLOCK_COMP: |
344 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 347 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
345 | inti->ext.ext_params, 0); | 348 | inti->ext.ext_params, 0); |
346 | deliver_ckc_interrupt(vcpu); | 349 | rc = deliver_ckc_interrupt(vcpu); |
347 | break; | 350 | break; |
348 | case KVM_S390_INT_CPU_TIMER: | 351 | case KVM_S390_INT_CPU_TIMER: |
349 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 352 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
@@ -376,8 +379,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
376 | case KVM_S390_INT_PFAULT_INIT: | 379 | case KVM_S390_INT_PFAULT_INIT: |
377 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, | 380 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, |
378 | inti->ext.ext_params2); | 381 | inti->ext.ext_params2); |
379 | rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE); | 382 | rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, |
380 | rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR); | 383 | (u16 *) __LC_EXT_INT_CODE); |
384 | rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR); | ||
381 | rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, | 385 | rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, |
382 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | 386 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
383 | rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, | 387 | rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, |
@@ -501,14 +505,11 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
501 | default: | 505 | default: |
502 | BUG(); | 506 | BUG(); |
503 | } | 507 | } |
504 | if (rc) { | 508 | |
505 | printk("kvm: The guest lowcore is not mapped during interrupt " | 509 | return rc; |
506 | "delivery, killing userspace\n"); | ||
507 | do_exit(SIGKILL); | ||
508 | } | ||
509 | } | 510 | } |
510 | 511 | ||
511 | static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu) | 512 | static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu) |
512 | { | 513 | { |
513 | int rc; | 514 | int rc; |
514 | 515 | ||
@@ -518,11 +519,7 @@ static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu) | |||
518 | rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, | 519 | rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, |
519 | &vcpu->arch.sie_block->gpsw, | 520 | &vcpu->arch.sie_block->gpsw, |
520 | sizeof(psw_t)); | 521 | sizeof(psw_t)); |
521 | if (rc) { | 522 | return rc; |
522 | printk("kvm: The guest lowcore is not mapped during interrupt " | ||
523 | "delivery, killing userspace\n"); | ||
524 | do_exit(SIGKILL); | ||
525 | } | ||
526 | } | 523 | } |
527 | 524 | ||
528 | /* Check whether SIGP interpretation facility has an external call pending */ | 525 | /* Check whether SIGP interpretation facility has an external call pending */ |
@@ -629,6 +626,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) | |||
629 | */ | 626 | */ |
630 | vcpu->preempted = true; | 627 | vcpu->preempted = true; |
631 | wake_up_interruptible(&vcpu->wq); | 628 | wake_up_interruptible(&vcpu->wq); |
629 | vcpu->stat.halt_wakeup++; | ||
632 | } | 630 | } |
633 | } | 631 | } |
634 | 632 | ||
@@ -661,12 +659,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) | |||
661 | &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); | 659 | &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); |
662 | } | 660 | } |
663 | 661 | ||
664 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | 662 | int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) |
665 | { | 663 | { |
666 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 664 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
667 | struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; | 665 | struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; |
668 | struct kvm_s390_interrupt_info *n, *inti = NULL; | 666 | struct kvm_s390_interrupt_info *n, *inti = NULL; |
669 | int deliver; | 667 | int deliver; |
668 | int rc = 0; | ||
670 | 669 | ||
671 | __reset_intercept_indicators(vcpu); | 670 | __reset_intercept_indicators(vcpu); |
672 | if (atomic_read(&li->active)) { | 671 | if (atomic_read(&li->active)) { |
@@ -685,16 +684,16 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | |||
685 | atomic_set(&li->active, 0); | 684 | atomic_set(&li->active, 0); |
686 | spin_unlock(&li->lock); | 685 | spin_unlock(&li->lock); |
687 | if (deliver) { | 686 | if (deliver) { |
688 | __do_deliver_interrupt(vcpu, inti); | 687 | rc = __do_deliver_interrupt(vcpu, inti); |
689 | kfree(inti); | 688 | kfree(inti); |
690 | } | 689 | } |
691 | } while (deliver); | 690 | } while (!rc && deliver); |
692 | } | 691 | } |
693 | 692 | ||
694 | if (kvm_cpu_has_pending_timer(vcpu)) | 693 | if (!rc && kvm_cpu_has_pending_timer(vcpu)) |
695 | deliver_ckc_interrupt(vcpu); | 694 | rc = deliver_ckc_interrupt(vcpu); |
696 | 695 | ||
697 | if (atomic_read(&fi->active)) { | 696 | if (!rc && atomic_read(&fi->active)) { |
698 | do { | 697 | do { |
699 | deliver = 0; | 698 | deliver = 0; |
700 | spin_lock(&fi->lock); | 699 | spin_lock(&fi->lock); |
@@ -711,67 +710,13 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | |||
711 | atomic_set(&fi->active, 0); | 710 | atomic_set(&fi->active, 0); |
712 | spin_unlock(&fi->lock); | 711 | spin_unlock(&fi->lock); |
713 | if (deliver) { | 712 | if (deliver) { |
714 | __do_deliver_interrupt(vcpu, inti); | 713 | rc = __do_deliver_interrupt(vcpu, inti); |
715 | kfree(inti); | ||
716 | } | ||
717 | } while (deliver); | ||
718 | } | ||
719 | } | ||
720 | |||
721 | void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) | ||
722 | { | ||
723 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | ||
724 | struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; | ||
725 | struct kvm_s390_interrupt_info *n, *inti = NULL; | ||
726 | int deliver; | ||
727 | |||
728 | __reset_intercept_indicators(vcpu); | ||
729 | if (atomic_read(&li->active)) { | ||
730 | do { | ||
731 | deliver = 0; | ||
732 | spin_lock(&li->lock); | ||
733 | list_for_each_entry_safe(inti, n, &li->list, list) { | ||
734 | if ((inti->type == KVM_S390_MCHK) && | ||
735 | __interrupt_is_deliverable(vcpu, inti)) { | ||
736 | list_del(&inti->list); | ||
737 | deliver = 1; | ||
738 | break; | ||
739 | } | ||
740 | __set_intercept_indicator(vcpu, inti); | ||
741 | } | ||
742 | if (list_empty(&li->list)) | ||
743 | atomic_set(&li->active, 0); | ||
744 | spin_unlock(&li->lock); | ||
745 | if (deliver) { | ||
746 | __do_deliver_interrupt(vcpu, inti); | ||
747 | kfree(inti); | 714 | kfree(inti); |
748 | } | 715 | } |
749 | } while (deliver); | 716 | } while (!rc && deliver); |
750 | } | 717 | } |
751 | 718 | ||
752 | if (atomic_read(&fi->active)) { | 719 | return rc; |
753 | do { | ||
754 | deliver = 0; | ||
755 | spin_lock(&fi->lock); | ||
756 | list_for_each_entry_safe(inti, n, &fi->list, list) { | ||
757 | if ((inti->type == KVM_S390_MCHK) && | ||
758 | __interrupt_is_deliverable(vcpu, inti)) { | ||
759 | list_del(&inti->list); | ||
760 | fi->irq_count--; | ||
761 | deliver = 1; | ||
762 | break; | ||
763 | } | ||
764 | __set_intercept_indicator(vcpu, inti); | ||
765 | } | ||
766 | if (list_empty(&fi->list)) | ||
767 | atomic_set(&fi->active, 0); | ||
768 | spin_unlock(&fi->lock); | ||
769 | if (deliver) { | ||
770 | __do_deliver_interrupt(vcpu, inti); | ||
771 | kfree(inti); | ||
772 | } | ||
773 | } while (deliver); | ||
774 | } | ||
775 | } | 720 | } |
776 | 721 | ||
777 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) | 722 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) |
@@ -1048,7 +993,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | |||
1048 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, | 993 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, |
1049 | s390int->parm64, 2); | 994 | s390int->parm64, 2); |
1050 | 995 | ||
1051 | mutex_lock(&vcpu->kvm->lock); | ||
1052 | li = &vcpu->arch.local_int; | 996 | li = &vcpu->arch.local_int; |
1053 | spin_lock(&li->lock); | 997 | spin_lock(&li->lock); |
1054 | if (inti->type == KVM_S390_PROGRAM_INT) | 998 | if (inti->type == KVM_S390_PROGRAM_INT) |
@@ -1060,7 +1004,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | |||
1060 | li->action_bits |= ACTION_STOP_ON_STOP; | 1004 | li->action_bits |= ACTION_STOP_ON_STOP; |
1061 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | 1005 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); |
1062 | spin_unlock(&li->lock); | 1006 | spin_unlock(&li->lock); |
1063 | mutex_unlock(&vcpu->kvm->lock); | ||
1064 | kvm_s390_vcpu_wakeup(vcpu); | 1007 | kvm_s390_vcpu_wakeup(vcpu); |
1065 | return 0; | 1008 | return 0; |
1066 | } | 1009 | } |
@@ -1300,7 +1243,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) | |||
1300 | } | 1243 | } |
1301 | INIT_LIST_HEAD(&map->list); | 1244 | INIT_LIST_HEAD(&map->list); |
1302 | map->guest_addr = addr; | 1245 | map->guest_addr = addr; |
1303 | map->addr = gmap_translate(addr, kvm->arch.gmap); | 1246 | map->addr = gmap_translate(kvm->arch.gmap, addr); |
1304 | if (map->addr == -EFAULT) { | 1247 | if (map->addr == -EFAULT) { |
1305 | ret = -EFAULT; | 1248 | ret = -EFAULT; |
1306 | goto out; | 1249 | goto out; |
@@ -1410,7 +1353,6 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
1410 | r = enqueue_floating_irq(dev, attr); | 1353 | r = enqueue_floating_irq(dev, attr); |
1411 | break; | 1354 | break; |
1412 | case KVM_DEV_FLIC_CLEAR_IRQS: | 1355 | case KVM_DEV_FLIC_CLEAR_IRQS: |
1413 | r = 0; | ||
1414 | kvm_s390_clear_float_irqs(dev->kvm); | 1356 | kvm_s390_clear_float_irqs(dev->kvm); |
1415 | break; | 1357 | break; |
1416 | case KVM_DEV_FLIC_APF_ENABLE: | 1358 | case KVM_DEV_FLIC_APF_ENABLE: |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 81b0e11521e4..55aade49b6d1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
50 | { "exit_instruction", VCPU_STAT(exit_instruction) }, | 50 | { "exit_instruction", VCPU_STAT(exit_instruction) }, |
51 | { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, | 51 | { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, |
52 | { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, | 52 | { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, |
53 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | ||
53 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, | 54 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, |
54 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, | 55 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, |
55 | { "instruction_stctl", VCPU_STAT(instruction_stctl) }, | 56 | { "instruction_stctl", VCPU_STAT(instruction_stctl) }, |
@@ -100,16 +101,12 @@ int test_vfacility(unsigned long nr) | |||
100 | } | 101 | } |
101 | 102 | ||
102 | /* Section: not file related */ | 103 | /* Section: not file related */ |
103 | int kvm_arch_hardware_enable(void *garbage) | 104 | int kvm_arch_hardware_enable(void) |
104 | { | 105 | { |
105 | /* every s390 is virtualization enabled ;-) */ | 106 | /* every s390 is virtualization enabled ;-) */ |
106 | return 0; | 107 | return 0; |
107 | } | 108 | } |
108 | 109 | ||
109 | void kvm_arch_hardware_disable(void *garbage) | ||
110 | { | ||
111 | } | ||
112 | |||
113 | static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); | 110 | static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); |
114 | 111 | ||
115 | int kvm_arch_hardware_setup(void) | 112 | int kvm_arch_hardware_setup(void) |
@@ -124,17 +121,10 @@ void kvm_arch_hardware_unsetup(void) | |||
124 | gmap_unregister_ipte_notifier(&gmap_notifier); | 121 | gmap_unregister_ipte_notifier(&gmap_notifier); |
125 | } | 122 | } |
126 | 123 | ||
127 | void kvm_arch_check_processor_compat(void *rtn) | ||
128 | { | ||
129 | } | ||
130 | |||
131 | int kvm_arch_init(void *opaque) | 124 | int kvm_arch_init(void *opaque) |
132 | { | 125 | { |
133 | return 0; | 126 | /* Register floating interrupt controller interface. */ |
134 | } | 127 | return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); |
135 | |||
136 | void kvm_arch_exit(void) | ||
137 | { | ||
138 | } | 128 | } |
139 | 129 | ||
140 | /* Section: device related */ | 130 | /* Section: device related */ |
@@ -404,6 +394,22 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
404 | return r; | 394 | return r; |
405 | } | 395 | } |
406 | 396 | ||
397 | static int kvm_s390_crypto_init(struct kvm *kvm) | ||
398 | { | ||
399 | if (!test_vfacility(76)) | ||
400 | return 0; | ||
401 | |||
402 | kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), | ||
403 | GFP_KERNEL | GFP_DMA); | ||
404 | if (!kvm->arch.crypto.crycb) | ||
405 | return -ENOMEM; | ||
406 | |||
407 | kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb | | ||
408 | CRYCB_FORMAT1; | ||
409 | |||
410 | return 0; | ||
411 | } | ||
412 | |||
407 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | 413 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
408 | { | 414 | { |
409 | int rc; | 415 | int rc; |
@@ -441,6 +447,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
441 | if (!kvm->arch.dbf) | 447 | if (!kvm->arch.dbf) |
442 | goto out_nodbf; | 448 | goto out_nodbf; |
443 | 449 | ||
450 | if (kvm_s390_crypto_init(kvm) < 0) | ||
451 | goto out_crypto; | ||
452 | |||
444 | spin_lock_init(&kvm->arch.float_int.lock); | 453 | spin_lock_init(&kvm->arch.float_int.lock); |
445 | INIT_LIST_HEAD(&kvm->arch.float_int.list); | 454 | INIT_LIST_HEAD(&kvm->arch.float_int.list); |
446 | init_waitqueue_head(&kvm->arch.ipte_wq); | 455 | init_waitqueue_head(&kvm->arch.ipte_wq); |
@@ -451,7 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
451 | if (type & KVM_VM_S390_UCONTROL) { | 460 | if (type & KVM_VM_S390_UCONTROL) { |
452 | kvm->arch.gmap = NULL; | 461 | kvm->arch.gmap = NULL; |
453 | } else { | 462 | } else { |
454 | kvm->arch.gmap = gmap_alloc(current->mm); | 463 | kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); |
455 | if (!kvm->arch.gmap) | 464 | if (!kvm->arch.gmap) |
456 | goto out_nogmap; | 465 | goto out_nogmap; |
457 | kvm->arch.gmap->private = kvm; | 466 | kvm->arch.gmap->private = kvm; |
@@ -465,6 +474,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
465 | 474 | ||
466 | return 0; | 475 | return 0; |
467 | out_nogmap: | 476 | out_nogmap: |
477 | kfree(kvm->arch.crypto.crycb); | ||
478 | out_crypto: | ||
468 | debug_unregister(kvm->arch.dbf); | 479 | debug_unregister(kvm->arch.dbf); |
469 | out_nodbf: | 480 | out_nodbf: |
470 | free_page((unsigned long)(kvm->arch.sca)); | 481 | free_page((unsigned long)(kvm->arch.sca)); |
@@ -514,15 +525,12 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
514 | mutex_unlock(&kvm->lock); | 525 | mutex_unlock(&kvm->lock); |
515 | } | 526 | } |
516 | 527 | ||
517 | void kvm_arch_sync_events(struct kvm *kvm) | ||
518 | { | ||
519 | } | ||
520 | |||
521 | void kvm_arch_destroy_vm(struct kvm *kvm) | 528 | void kvm_arch_destroy_vm(struct kvm *kvm) |
522 | { | 529 | { |
523 | kvm_free_vcpus(kvm); | 530 | kvm_free_vcpus(kvm); |
524 | free_page((unsigned long)(kvm->arch.sca)); | 531 | free_page((unsigned long)(kvm->arch.sca)); |
525 | debug_unregister(kvm->arch.dbf); | 532 | debug_unregister(kvm->arch.dbf); |
533 | kfree(kvm->arch.crypto.crycb); | ||
526 | if (!kvm_is_ucontrol(kvm)) | 534 | if (!kvm_is_ucontrol(kvm)) |
527 | gmap_free(kvm->arch.gmap); | 535 | gmap_free(kvm->arch.gmap); |
528 | kvm_s390_destroy_adapters(kvm); | 536 | kvm_s390_destroy_adapters(kvm); |
@@ -535,7 +543,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
535 | vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; | 543 | vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; |
536 | kvm_clear_async_pf_completion_queue(vcpu); | 544 | kvm_clear_async_pf_completion_queue(vcpu); |
537 | if (kvm_is_ucontrol(vcpu->kvm)) { | 545 | if (kvm_is_ucontrol(vcpu->kvm)) { |
538 | vcpu->arch.gmap = gmap_alloc(current->mm); | 546 | vcpu->arch.gmap = gmap_alloc(current->mm, -1UL); |
539 | if (!vcpu->arch.gmap) | 547 | if (!vcpu->arch.gmap) |
540 | return -ENOMEM; | 548 | return -ENOMEM; |
541 | vcpu->arch.gmap->private = vcpu->kvm; | 549 | vcpu->arch.gmap->private = vcpu->kvm; |
@@ -546,15 +554,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
546 | vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | | 554 | vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | |
547 | KVM_SYNC_GPRS | | 555 | KVM_SYNC_GPRS | |
548 | KVM_SYNC_ACRS | | 556 | KVM_SYNC_ACRS | |
549 | KVM_SYNC_CRS; | 557 | KVM_SYNC_CRS | |
558 | KVM_SYNC_ARCH0 | | ||
559 | KVM_SYNC_PFAULT; | ||
550 | return 0; | 560 | return 0; |
551 | } | 561 | } |
552 | 562 | ||
553 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
554 | { | ||
555 | /* Nothing todo */ | ||
556 | } | ||
557 | |||
558 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 563 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
559 | { | 564 | { |
560 | save_fp_ctl(&vcpu->arch.host_fpregs.fpc); | 565 | save_fp_ctl(&vcpu->arch.host_fpregs.fpc); |
@@ -607,6 +612,14 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | |||
607 | return 0; | 612 | return 0; |
608 | } | 613 | } |
609 | 614 | ||
615 | static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) | ||
616 | { | ||
617 | if (!test_vfacility(76)) | ||
618 | return; | ||
619 | |||
620 | vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; | ||
621 | } | ||
622 | |||
610 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) | 623 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) |
611 | { | 624 | { |
612 | free_page(vcpu->arch.sie_block->cbrlo); | 625 | free_page(vcpu->arch.sie_block->cbrlo); |
@@ -653,6 +666,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
653 | vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; | 666 | vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; |
654 | get_cpu_id(&vcpu->arch.cpu_id); | 667 | get_cpu_id(&vcpu->arch.cpu_id); |
655 | vcpu->arch.cpu_id.version = 0xff; | 668 | vcpu->arch.cpu_id.version = 0xff; |
669 | |||
670 | kvm_s390_vcpu_crypto_setup(vcpu); | ||
671 | |||
656 | return rc; | 672 | return rc; |
657 | } | 673 | } |
658 | 674 | ||
@@ -1049,6 +1065,11 @@ retry: | |||
1049 | goto retry; | 1065 | goto retry; |
1050 | } | 1066 | } |
1051 | 1067 | ||
1068 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { | ||
1069 | vcpu->arch.sie_block->ihcpu = 0xffff; | ||
1070 | goto retry; | ||
1071 | } | ||
1072 | |||
1052 | if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { | 1073 | if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { |
1053 | if (!ibs_enabled(vcpu)) { | 1074 | if (!ibs_enabled(vcpu)) { |
1054 | trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); | 1075 | trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); |
@@ -1085,18 +1106,8 @@ retry: | |||
1085 | */ | 1106 | */ |
1086 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) | 1107 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) |
1087 | { | 1108 | { |
1088 | struct mm_struct *mm = current->mm; | 1109 | return gmap_fault(vcpu->arch.gmap, gpa, |
1089 | hva_t hva; | 1110 | writable ? FAULT_FLAG_WRITE : 0); |
1090 | long rc; | ||
1091 | |||
1092 | hva = gmap_fault(gpa, vcpu->arch.gmap); | ||
1093 | if (IS_ERR_VALUE(hva)) | ||
1094 | return (long)hva; | ||
1095 | down_read(&mm->mmap_sem); | ||
1096 | rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL); | ||
1097 | up_read(&mm->mmap_sem); | ||
1098 | |||
1099 | return rc < 0 ? rc : 0; | ||
1100 | } | 1111 | } |
1101 | 1112 | ||
1102 | static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, | 1113 | static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, |
@@ -1191,8 +1202,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) | |||
1191 | if (test_cpu_flag(CIF_MCCK_PENDING)) | 1202 | if (test_cpu_flag(CIF_MCCK_PENDING)) |
1192 | s390_handle_mcck(); | 1203 | s390_handle_mcck(); |
1193 | 1204 | ||
1194 | if (!kvm_is_ucontrol(vcpu->kvm)) | 1205 | if (!kvm_is_ucontrol(vcpu->kvm)) { |
1195 | kvm_s390_deliver_pending_interrupts(vcpu); | 1206 | rc = kvm_s390_deliver_pending_interrupts(vcpu); |
1207 | if (rc) | ||
1208 | return rc; | ||
1209 | } | ||
1196 | 1210 | ||
1197 | rc = kvm_s390_handle_requests(vcpu); | 1211 | rc = kvm_s390_handle_requests(vcpu); |
1198 | if (rc) | 1212 | if (rc) |
@@ -1296,6 +1310,48 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
1296 | return rc; | 1310 | return rc; |
1297 | } | 1311 | } |
1298 | 1312 | ||
1313 | static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
1314 | { | ||
1315 | vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; | ||
1316 | vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; | ||
1317 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) | ||
1318 | kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); | ||
1319 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { | ||
1320 | memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); | ||
1321 | /* some control register changes require a tlb flush */ | ||
1322 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
1323 | } | ||
1324 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { | ||
1325 | vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm; | ||
1326 | vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; | ||
1327 | vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; | ||
1328 | vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; | ||
1329 | vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; | ||
1330 | } | ||
1331 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { | ||
1332 | vcpu->arch.pfault_token = kvm_run->s.regs.pft; | ||
1333 | vcpu->arch.pfault_select = kvm_run->s.regs.pfs; | ||
1334 | vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; | ||
1335 | } | ||
1336 | kvm_run->kvm_dirty_regs = 0; | ||
1337 | } | ||
1338 | |||
1339 | static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
1340 | { | ||
1341 | kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; | ||
1342 | kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; | ||
1343 | kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); | ||
1344 | memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); | ||
1345 | kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm; | ||
1346 | kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; | ||
1347 | kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; | ||
1348 | kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; | ||
1349 | kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; | ||
1350 | kvm_run->s.regs.pft = vcpu->arch.pfault_token; | ||
1351 | kvm_run->s.regs.pfs = vcpu->arch.pfault_select; | ||
1352 | kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; | ||
1353 | } | ||
1354 | |||
1299 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1355 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1300 | { | 1356 | { |
1301 | int rc; | 1357 | int rc; |
@@ -1317,17 +1373,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1317 | return -EINVAL; | 1373 | return -EINVAL; |
1318 | } | 1374 | } |
1319 | 1375 | ||
1320 | vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; | 1376 | sync_regs(vcpu, kvm_run); |
1321 | vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; | ||
1322 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { | ||
1323 | kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; | ||
1324 | kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); | ||
1325 | } | ||
1326 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { | ||
1327 | kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; | ||
1328 | memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); | ||
1329 | kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); | ||
1330 | } | ||
1331 | 1377 | ||
1332 | might_fault(); | 1378 | might_fault(); |
1333 | rc = __vcpu_run(vcpu); | 1379 | rc = __vcpu_run(vcpu); |
@@ -1357,10 +1403,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1357 | rc = 0; | 1403 | rc = 0; |
1358 | } | 1404 | } |
1359 | 1405 | ||
1360 | kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; | 1406 | store_regs(vcpu, kvm_run); |
1361 | kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; | ||
1362 | kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); | ||
1363 | memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); | ||
1364 | 1407 | ||
1365 | if (vcpu->sigset_active) | 1408 | if (vcpu->sigset_active) |
1366 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1409 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
@@ -1489,7 +1532,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) | |||
1489 | * Another VCPU might have used IBS while we were offline. | 1532 | * Another VCPU might have used IBS while we were offline. |
1490 | * Let's play safe and flush the VCPU at startup. | 1533 | * Let's play safe and flush the VCPU at startup. |
1491 | */ | 1534 | */ |
1492 | vcpu->arch.sie_block->ihcpu = 0xffff; | 1535 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
1493 | spin_unlock(&vcpu->kvm->arch.start_stop_lock); | 1536 | spin_unlock(&vcpu->kvm->arch.start_stop_lock); |
1494 | return; | 1537 | return; |
1495 | } | 1538 | } |
@@ -1644,9 +1687,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1644 | } | 1687 | } |
1645 | #endif | 1688 | #endif |
1646 | case KVM_S390_VCPU_FAULT: { | 1689 | case KVM_S390_VCPU_FAULT: { |
1647 | r = gmap_fault(arg, vcpu->arch.gmap); | 1690 | r = gmap_fault(vcpu->arch.gmap, arg, 0); |
1648 | if (!IS_ERR_VALUE(r)) | ||
1649 | r = 0; | ||
1650 | break; | 1691 | break; |
1651 | } | 1692 | } |
1652 | case KVM_ENABLE_CAP: | 1693 | case KVM_ENABLE_CAP: |
@@ -1677,21 +1718,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) | |||
1677 | return VM_FAULT_SIGBUS; | 1718 | return VM_FAULT_SIGBUS; |
1678 | } | 1719 | } |
1679 | 1720 | ||
1680 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | ||
1681 | struct kvm_memory_slot *dont) | ||
1682 | { | ||
1683 | } | ||
1684 | |||
1685 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | 1721 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, |
1686 | unsigned long npages) | 1722 | unsigned long npages) |
1687 | { | 1723 | { |
1688 | return 0; | 1724 | return 0; |
1689 | } | 1725 | } |
1690 | 1726 | ||
1691 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
1692 | { | ||
1693 | } | ||
1694 | |||
1695 | /* Section: memory related */ | 1727 | /* Section: memory related */ |
1696 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1728 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
1697 | struct kvm_memory_slot *memslot, | 1729 | struct kvm_memory_slot *memslot, |
@@ -1737,15 +1769,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
1737 | return; | 1769 | return; |
1738 | } | 1770 | } |
1739 | 1771 | ||
1740 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | ||
1741 | { | ||
1742 | } | ||
1743 | |||
1744 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | ||
1745 | struct kvm_memory_slot *slot) | ||
1746 | { | ||
1747 | } | ||
1748 | |||
1749 | static int __init kvm_s390_init(void) | 1772 | static int __init kvm_s390_init(void) |
1750 | { | 1773 | { |
1751 | int ret; | 1774 | int ret; |
@@ -1764,7 +1787,7 @@ static int __init kvm_s390_init(void) | |||
1764 | return -ENOMEM; | 1787 | return -ENOMEM; |
1765 | } | 1788 | } |
1766 | memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); | 1789 | memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); |
1767 | vfacilities[0] &= 0xff82fff3f4fc2000UL; | 1790 | vfacilities[0] &= 0xff82fffbf47c2000UL; |
1768 | vfacilities[1] &= 0x005c000000000000UL; | 1791 | vfacilities[1] &= 0x005c000000000000UL; |
1769 | return 0; | 1792 | return 0; |
1770 | } | 1793 | } |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 3862fa2cefe0..244d02303182 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -70,7 +70,7 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu) | |||
70 | static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) | 70 | static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) |
71 | { | 71 | { |
72 | vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; | 72 | vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; |
73 | vcpu->arch.sie_block->ihcpu = 0xffff; | 73 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
74 | kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); | 74 | kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); |
75 | } | 75 | } |
76 | 76 | ||
@@ -138,8 +138,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) | |||
138 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); | 138 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); |
139 | void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); | 139 | void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); |
140 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); | 140 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); |
141 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); | 141 | int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); |
142 | void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); | ||
143 | void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); | 142 | void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); |
144 | void kvm_s390_clear_float_irqs(struct kvm *kvm); | 143 | void kvm_s390_clear_float_irqs(struct kvm *kvm); |
145 | int __must_check kvm_s390_inject_vm(struct kvm *kvm, | 144 | int __must_check kvm_s390_inject_vm(struct kvm *kvm, |
@@ -228,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | |||
228 | int psw_extint_disabled(struct kvm_vcpu *vcpu); | 227 | int psw_extint_disabled(struct kvm_vcpu *vcpu); |
229 | void kvm_s390_destroy_adapters(struct kvm *kvm); | 228 | void kvm_s390_destroy_adapters(struct kvm *kvm); |
230 | int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu); | 229 | int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu); |
230 | extern struct kvm_device_ops kvm_flic_ops; | ||
231 | 231 | ||
232 | /* implemented in guestdbg.c */ | 232 | /* implemented in guestdbg.c */ |
233 | void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); | 233 | void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index f89c1cd67751..72bb2dd8b9cd 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -352,13 +352,6 @@ static int handle_stfl(struct kvm_vcpu *vcpu) | |||
352 | return 0; | 352 | return 0; |
353 | } | 353 | } |
354 | 354 | ||
355 | static void handle_new_psw(struct kvm_vcpu *vcpu) | ||
356 | { | ||
357 | /* Check whether the new psw is enabled for machine checks. */ | ||
358 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK) | ||
359 | kvm_s390_deliver_pending_machine_checks(vcpu); | ||
360 | } | ||
361 | |||
362 | #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) | 355 | #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) |
363 | #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL | 356 | #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL |
364 | #define PSW_ADDR_24 0x0000000000ffffffUL | 357 | #define PSW_ADDR_24 0x0000000000ffffffUL |
@@ -405,7 +398,6 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) | |||
405 | gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; | 398 | gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; |
406 | if (!is_valid_psw(gpsw)) | 399 | if (!is_valid_psw(gpsw)) |
407 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 400 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
408 | handle_new_psw(vcpu); | ||
409 | return 0; | 401 | return 0; |
410 | } | 402 | } |
411 | 403 | ||
@@ -427,7 +419,6 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) | |||
427 | vcpu->arch.sie_block->gpsw = new_psw; | 419 | vcpu->arch.sie_block->gpsw = new_psw; |
428 | if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) | 420 | if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) |
429 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 421 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
430 | handle_new_psw(vcpu); | ||
431 | return 0; | 422 | return 0; |
432 | } | 423 | } |
433 | 424 | ||
@@ -738,7 +729,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) | |||
738 | /* invalid entry */ | 729 | /* invalid entry */ |
739 | break; | 730 | break; |
740 | /* try to free backing */ | 731 | /* try to free backing */ |
741 | __gmap_zap(cbrle, gmap); | 732 | __gmap_zap(gmap, cbrle); |
742 | } | 733 | } |
743 | up_read(&gmap->mm->mmap_sem); | 734 | up_read(&gmap->mm->mmap_sem); |
744 | if (i < entries) | 735 | if (i < entries) |
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 3f3b35403d0a..a2b81d6ce8a5 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -442,18 +442,15 @@ static inline int do_exception(struct pt_regs *regs, int access) | |||
442 | down_read(&mm->mmap_sem); | 442 | down_read(&mm->mmap_sem); |
443 | 443 | ||
444 | #ifdef CONFIG_PGSTE | 444 | #ifdef CONFIG_PGSTE |
445 | gmap = (struct gmap *) | 445 | gmap = (current->flags & PF_VCPU) ? |
446 | ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); | 446 | (struct gmap *) S390_lowcore.gmap : NULL; |
447 | if (gmap) { | 447 | if (gmap) { |
448 | address = __gmap_fault(address, gmap); | 448 | current->thread.gmap_addr = address; |
449 | address = __gmap_translate(gmap, address); | ||
449 | if (address == -EFAULT) { | 450 | if (address == -EFAULT) { |
450 | fault = VM_FAULT_BADMAP; | 451 | fault = VM_FAULT_BADMAP; |
451 | goto out_up; | 452 | goto out_up; |
452 | } | 453 | } |
453 | if (address == -ENOMEM) { | ||
454 | fault = VM_FAULT_OOM; | ||
455 | goto out_up; | ||
456 | } | ||
457 | if (gmap->pfault_enabled) | 454 | if (gmap->pfault_enabled) |
458 | flags |= FAULT_FLAG_RETRY_NOWAIT; | 455 | flags |= FAULT_FLAG_RETRY_NOWAIT; |
459 | } | 456 | } |
@@ -530,6 +527,20 @@ retry: | |||
530 | goto retry; | 527 | goto retry; |
531 | } | 528 | } |
532 | } | 529 | } |
530 | #ifdef CONFIG_PGSTE | ||
531 | if (gmap) { | ||
532 | address = __gmap_link(gmap, current->thread.gmap_addr, | ||
533 | address); | ||
534 | if (address == -EFAULT) { | ||
535 | fault = VM_FAULT_BADMAP; | ||
536 | goto out_up; | ||
537 | } | ||
538 | if (address == -ENOMEM) { | ||
539 | fault = VM_FAULT_OOM; | ||
540 | goto out_up; | ||
541 | } | ||
542 | } | ||
543 | #endif | ||
533 | fault = 0; | 544 | fault = 0; |
534 | out_up: | 545 | out_up: |
535 | up_read(&mm->mmap_sem); | 546 | up_read(&mm->mmap_sem); |
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5404a6261db9..296b61a4af59 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -145,30 +145,56 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | |||
145 | /** | 145 | /** |
146 | * gmap_alloc - allocate a guest address space | 146 | * gmap_alloc - allocate a guest address space |
147 | * @mm: pointer to the parent mm_struct | 147 | * @mm: pointer to the parent mm_struct |
148 | * @limit: maximum size of the gmap address space | ||
148 | * | 149 | * |
149 | * Returns a guest address space structure. | 150 | * Returns a guest address space structure. |
150 | */ | 151 | */ |
151 | struct gmap *gmap_alloc(struct mm_struct *mm) | 152 | struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) |
152 | { | 153 | { |
153 | struct gmap *gmap; | 154 | struct gmap *gmap; |
154 | struct page *page; | 155 | struct page *page; |
155 | unsigned long *table; | 156 | unsigned long *table; |
156 | 157 | unsigned long etype, atype; | |
158 | |||
159 | if (limit < (1UL << 31)) { | ||
160 | limit = (1UL << 31) - 1; | ||
161 | atype = _ASCE_TYPE_SEGMENT; | ||
162 | etype = _SEGMENT_ENTRY_EMPTY; | ||
163 | } else if (limit < (1UL << 42)) { | ||
164 | limit = (1UL << 42) - 1; | ||
165 | atype = _ASCE_TYPE_REGION3; | ||
166 | etype = _REGION3_ENTRY_EMPTY; | ||
167 | } else if (limit < (1UL << 53)) { | ||
168 | limit = (1UL << 53) - 1; | ||
169 | atype = _ASCE_TYPE_REGION2; | ||
170 | etype = _REGION2_ENTRY_EMPTY; | ||
171 | } else { | ||
172 | limit = -1UL; | ||
173 | atype = _ASCE_TYPE_REGION1; | ||
174 | etype = _REGION1_ENTRY_EMPTY; | ||
175 | } | ||
157 | gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); | 176 | gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); |
158 | if (!gmap) | 177 | if (!gmap) |
159 | goto out; | 178 | goto out; |
160 | INIT_LIST_HEAD(&gmap->crst_list); | 179 | INIT_LIST_HEAD(&gmap->crst_list); |
180 | INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); | ||
181 | INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); | ||
182 | spin_lock_init(&gmap->guest_table_lock); | ||
161 | gmap->mm = mm; | 183 | gmap->mm = mm; |
162 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 184 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); |
163 | if (!page) | 185 | if (!page) |
164 | goto out_free; | 186 | goto out_free; |
187 | page->index = 0; | ||
165 | list_add(&page->lru, &gmap->crst_list); | 188 | list_add(&page->lru, &gmap->crst_list); |
166 | table = (unsigned long *) page_to_phys(page); | 189 | table = (unsigned long *) page_to_phys(page); |
167 | crst_table_init(table, _REGION1_ENTRY_EMPTY); | 190 | crst_table_init(table, etype); |
168 | gmap->table = table; | 191 | gmap->table = table; |
169 | gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | | 192 | gmap->asce = atype | _ASCE_TABLE_LENGTH | |
170 | _ASCE_USER_BITS | __pa(table); | 193 | _ASCE_USER_BITS | __pa(table); |
194 | gmap->asce_end = limit; | ||
195 | down_write(&mm->mmap_sem); | ||
171 | list_add(&gmap->list, &mm->context.gmap_list); | 196 | list_add(&gmap->list, &mm->context.gmap_list); |
197 | up_write(&mm->mmap_sem); | ||
172 | return gmap; | 198 | return gmap; |
173 | 199 | ||
174 | out_free: | 200 | out_free: |
@@ -178,36 +204,38 @@ out: | |||
178 | } | 204 | } |
179 | EXPORT_SYMBOL_GPL(gmap_alloc); | 205 | EXPORT_SYMBOL_GPL(gmap_alloc); |
180 | 206 | ||
181 | static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) | ||
182 | { | ||
183 | struct gmap_pgtable *mp; | ||
184 | struct gmap_rmap *rmap; | ||
185 | struct page *page; | ||
186 | |||
187 | if (*table & _SEGMENT_ENTRY_INVALID) | ||
188 | return 0; | ||
189 | page = pfn_to_page(*table >> PAGE_SHIFT); | ||
190 | mp = (struct gmap_pgtable *) page->index; | ||
191 | list_for_each_entry(rmap, &mp->mapper, list) { | ||
192 | if (rmap->entry != table) | ||
193 | continue; | ||
194 | list_del(&rmap->list); | ||
195 | kfree(rmap); | ||
196 | break; | ||
197 | } | ||
198 | *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; | ||
199 | return 1; | ||
200 | } | ||
201 | |||
202 | static void gmap_flush_tlb(struct gmap *gmap) | 207 | static void gmap_flush_tlb(struct gmap *gmap) |
203 | { | 208 | { |
204 | if (MACHINE_HAS_IDTE) | 209 | if (MACHINE_HAS_IDTE) |
205 | __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | | 210 | __tlb_flush_asce(gmap->mm, gmap->asce); |
206 | _ASCE_TYPE_REGION1); | ||
207 | else | 211 | else |
208 | __tlb_flush_global(); | 212 | __tlb_flush_global(); |
209 | } | 213 | } |
210 | 214 | ||
215 | static void gmap_radix_tree_free(struct radix_tree_root *root) | ||
216 | { | ||
217 | struct radix_tree_iter iter; | ||
218 | unsigned long indices[16]; | ||
219 | unsigned long index; | ||
220 | void **slot; | ||
221 | int i, nr; | ||
222 | |||
223 | /* A radix tree is freed by deleting all of its entries */ | ||
224 | index = 0; | ||
225 | do { | ||
226 | nr = 0; | ||
227 | radix_tree_for_each_slot(slot, root, &iter, index) { | ||
228 | indices[nr] = iter.index; | ||
229 | if (++nr == 16) | ||
230 | break; | ||
231 | } | ||
232 | for (i = 0; i < nr; i++) { | ||
233 | index = indices[i]; | ||
234 | radix_tree_delete(root, index); | ||
235 | } | ||
236 | } while (nr > 0); | ||
237 | } | ||
238 | |||
211 | /** | 239 | /** |
212 | * gmap_free - free a guest address space | 240 | * gmap_free - free a guest address space |
213 | * @gmap: pointer to the guest address space structure | 241 | * @gmap: pointer to the guest address space structure |
@@ -215,31 +243,21 @@ static void gmap_flush_tlb(struct gmap *gmap) | |||
215 | void gmap_free(struct gmap *gmap) | 243 | void gmap_free(struct gmap *gmap) |
216 | { | 244 | { |
217 | struct page *page, *next; | 245 | struct page *page, *next; |
218 | unsigned long *table; | ||
219 | int i; | ||
220 | |||
221 | 246 | ||
222 | /* Flush tlb. */ | 247 | /* Flush tlb. */ |
223 | if (MACHINE_HAS_IDTE) | 248 | if (MACHINE_HAS_IDTE) |
224 | __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | | 249 | __tlb_flush_asce(gmap->mm, gmap->asce); |
225 | _ASCE_TYPE_REGION1); | ||
226 | else | 250 | else |
227 | __tlb_flush_global(); | 251 | __tlb_flush_global(); |
228 | 252 | ||
229 | /* Free all segment & region tables. */ | 253 | /* Free all segment & region tables. */ |
230 | down_read(&gmap->mm->mmap_sem); | 254 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) |
231 | spin_lock(&gmap->mm->page_table_lock); | ||
232 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { | ||
233 | table = (unsigned long *) page_to_phys(page); | ||
234 | if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) | ||
235 | /* Remove gmap rmap structures for segment table. */ | ||
236 | for (i = 0; i < PTRS_PER_PMD; i++, table++) | ||
237 | gmap_unlink_segment(gmap, table); | ||
238 | __free_pages(page, ALLOC_ORDER); | 255 | __free_pages(page, ALLOC_ORDER); |
239 | } | 256 | gmap_radix_tree_free(&gmap->guest_to_host); |
240 | spin_unlock(&gmap->mm->page_table_lock); | 257 | gmap_radix_tree_free(&gmap->host_to_guest); |
241 | up_read(&gmap->mm->mmap_sem); | 258 | down_write(&gmap->mm->mmap_sem); |
242 | list_del(&gmap->list); | 259 | list_del(&gmap->list); |
260 | up_write(&gmap->mm->mmap_sem); | ||
243 | kfree(gmap); | 261 | kfree(gmap); |
244 | } | 262 | } |
245 | EXPORT_SYMBOL_GPL(gmap_free); | 263 | EXPORT_SYMBOL_GPL(gmap_free); |
@@ -267,42 +285,97 @@ EXPORT_SYMBOL_GPL(gmap_disable); | |||
267 | /* | 285 | /* |
268 | * gmap_alloc_table is assumed to be called with mmap_sem held | 286 | * gmap_alloc_table is assumed to be called with mmap_sem held |
269 | */ | 287 | */ |
270 | static int gmap_alloc_table(struct gmap *gmap, | 288 | static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, |
271 | unsigned long *table, unsigned long init) | 289 | unsigned long init, unsigned long gaddr) |
272 | __releases(&gmap->mm->page_table_lock) | ||
273 | __acquires(&gmap->mm->page_table_lock) | ||
274 | { | 290 | { |
275 | struct page *page; | 291 | struct page *page; |
276 | unsigned long *new; | 292 | unsigned long *new; |
277 | 293 | ||
278 | /* since we dont free the gmap table until gmap_free we can unlock */ | 294 | /* since we dont free the gmap table until gmap_free we can unlock */ |
279 | spin_unlock(&gmap->mm->page_table_lock); | ||
280 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 295 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); |
281 | spin_lock(&gmap->mm->page_table_lock); | ||
282 | if (!page) | 296 | if (!page) |
283 | return -ENOMEM; | 297 | return -ENOMEM; |
284 | new = (unsigned long *) page_to_phys(page); | 298 | new = (unsigned long *) page_to_phys(page); |
285 | crst_table_init(new, init); | 299 | crst_table_init(new, init); |
300 | spin_lock(&gmap->mm->page_table_lock); | ||
286 | if (*table & _REGION_ENTRY_INVALID) { | 301 | if (*table & _REGION_ENTRY_INVALID) { |
287 | list_add(&page->lru, &gmap->crst_list); | 302 | list_add(&page->lru, &gmap->crst_list); |
288 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | | 303 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | |
289 | (*table & _REGION_ENTRY_TYPE_MASK); | 304 | (*table & _REGION_ENTRY_TYPE_MASK); |
290 | } else | 305 | page->index = gaddr; |
306 | page = NULL; | ||
307 | } | ||
308 | spin_unlock(&gmap->mm->page_table_lock); | ||
309 | if (page) | ||
291 | __free_pages(page, ALLOC_ORDER); | 310 | __free_pages(page, ALLOC_ORDER); |
292 | return 0; | 311 | return 0; |
293 | } | 312 | } |
294 | 313 | ||
295 | /** | 314 | /** |
315 | * __gmap_segment_gaddr - find virtual address from segment pointer | ||
316 | * @entry: pointer to a segment table entry in the guest address space | ||
317 | * | ||
318 | * Returns the virtual address in the guest address space for the segment | ||
319 | */ | ||
320 | static unsigned long __gmap_segment_gaddr(unsigned long *entry) | ||
321 | { | ||
322 | struct page *page; | ||
323 | unsigned long offset; | ||
324 | |||
325 | offset = (unsigned long) entry / sizeof(unsigned long); | ||
326 | offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; | ||
327 | page = pmd_to_page((pmd_t *) entry); | ||
328 | return page->index + offset; | ||
329 | } | ||
330 | |||
331 | /** | ||
332 | * __gmap_unlink_by_vmaddr - unlink a single segment via a host address | ||
333 | * @gmap: pointer to the guest address space structure | ||
334 | * @vmaddr: address in the host process address space | ||
335 | * | ||
336 | * Returns 1 if a TLB flush is required | ||
337 | */ | ||
338 | static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) | ||
339 | { | ||
340 | unsigned long *entry; | ||
341 | int flush = 0; | ||
342 | |||
343 | spin_lock(&gmap->guest_table_lock); | ||
344 | entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); | ||
345 | if (entry) { | ||
346 | flush = (*entry != _SEGMENT_ENTRY_INVALID); | ||
347 | *entry = _SEGMENT_ENTRY_INVALID; | ||
348 | } | ||
349 | spin_unlock(&gmap->guest_table_lock); | ||
350 | return flush; | ||
351 | } | ||
352 | |||
353 | /** | ||
354 | * __gmap_unmap_by_gaddr - unmap a single segment via a guest address | ||
355 | * @gmap: pointer to the guest address space structure | ||
356 | * @gaddr: address in the guest address space | ||
357 | * | ||
358 | * Returns 1 if a TLB flush is required | ||
359 | */ | ||
360 | static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) | ||
361 | { | ||
362 | unsigned long vmaddr; | ||
363 | |||
364 | vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, | ||
365 | gaddr >> PMD_SHIFT); | ||
366 | return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; | ||
367 | } | ||
368 | |||
369 | /** | ||
296 | * gmap_unmap_segment - unmap segment from the guest address space | 370 | * gmap_unmap_segment - unmap segment from the guest address space |
297 | * @gmap: pointer to the guest address space structure | 371 | * @gmap: pointer to the guest address space structure |
298 | * @addr: address in the guest address space | 372 | * @to: address in the guest address space |
299 | * @len: length of the memory area to unmap | 373 | * @len: length of the memory area to unmap |
300 | * | 374 | * |
301 | * Returns 0 if the unmap succeeded, -EINVAL if not. | 375 | * Returns 0 if the unmap succeeded, -EINVAL if not. |
302 | */ | 376 | */ |
303 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) | 377 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) |
304 | { | 378 | { |
305 | unsigned long *table; | ||
306 | unsigned long off; | 379 | unsigned long off; |
307 | int flush; | 380 | int flush; |
308 | 381 | ||
@@ -312,31 +385,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) | |||
312 | return -EINVAL; | 385 | return -EINVAL; |
313 | 386 | ||
314 | flush = 0; | 387 | flush = 0; |
315 | down_read(&gmap->mm->mmap_sem); | 388 | down_write(&gmap->mm->mmap_sem); |
316 | spin_lock(&gmap->mm->page_table_lock); | 389 | for (off = 0; off < len; off += PMD_SIZE) |
317 | for (off = 0; off < len; off += PMD_SIZE) { | 390 | flush |= __gmap_unmap_by_gaddr(gmap, to + off); |
318 | /* Walk the guest addr space page table */ | 391 | up_write(&gmap->mm->mmap_sem); |
319 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | ||
320 | if (*table & _REGION_ENTRY_INVALID) | ||
321 | goto out; | ||
322 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
323 | table = table + (((to + off) >> 42) & 0x7ff); | ||
324 | if (*table & _REGION_ENTRY_INVALID) | ||
325 | goto out; | ||
326 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
327 | table = table + (((to + off) >> 31) & 0x7ff); | ||
328 | if (*table & _REGION_ENTRY_INVALID) | ||
329 | goto out; | ||
330 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
331 | table = table + (((to + off) >> 20) & 0x7ff); | ||
332 | |||
333 | /* Clear segment table entry in guest address space. */ | ||
334 | flush |= gmap_unlink_segment(gmap, table); | ||
335 | *table = _SEGMENT_ENTRY_INVALID; | ||
336 | } | ||
337 | out: | ||
338 | spin_unlock(&gmap->mm->page_table_lock); | ||
339 | up_read(&gmap->mm->mmap_sem); | ||
340 | if (flush) | 392 | if (flush) |
341 | gmap_flush_tlb(gmap); | 393 | gmap_flush_tlb(gmap); |
342 | return 0; | 394 | return 0; |
@@ -348,87 +400,47 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment); | |||
348 | * @gmap: pointer to the guest address space structure | 400 | * @gmap: pointer to the guest address space structure |
349 | * @from: source address in the parent address space | 401 | * @from: source address in the parent address space |
350 | * @to: target address in the guest address space | 402 | * @to: target address in the guest address space |
403 | * @len: length of the memory area to map | ||
351 | * | 404 | * |
352 | * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. | 405 | * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. |
353 | */ | 406 | */ |
354 | int gmap_map_segment(struct gmap *gmap, unsigned long from, | 407 | int gmap_map_segment(struct gmap *gmap, unsigned long from, |
355 | unsigned long to, unsigned long len) | 408 | unsigned long to, unsigned long len) |
356 | { | 409 | { |
357 | unsigned long *table; | ||
358 | unsigned long off; | 410 | unsigned long off; |
359 | int flush; | 411 | int flush; |
360 | 412 | ||
361 | if ((from | to | len) & (PMD_SIZE - 1)) | 413 | if ((from | to | len) & (PMD_SIZE - 1)) |
362 | return -EINVAL; | 414 | return -EINVAL; |
363 | if (len == 0 || from + len > TASK_MAX_SIZE || | 415 | if (len == 0 || from + len < from || to + len < to || |
364 | from + len < from || to + len < to) | 416 | from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) |
365 | return -EINVAL; | 417 | return -EINVAL; |
366 | 418 | ||
367 | flush = 0; | 419 | flush = 0; |
368 | down_read(&gmap->mm->mmap_sem); | 420 | down_write(&gmap->mm->mmap_sem); |
369 | spin_lock(&gmap->mm->page_table_lock); | ||
370 | for (off = 0; off < len; off += PMD_SIZE) { | 421 | for (off = 0; off < len; off += PMD_SIZE) { |
371 | /* Walk the gmap address space page table */ | 422 | /* Remove old translation */ |
372 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | 423 | flush |= __gmap_unmap_by_gaddr(gmap, to + off); |
373 | if ((*table & _REGION_ENTRY_INVALID) && | 424 | /* Store new translation */ |
374 | gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) | 425 | if (radix_tree_insert(&gmap->guest_to_host, |
375 | goto out_unmap; | 426 | (to + off) >> PMD_SHIFT, |
376 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 427 | (void *) from + off)) |
377 | table = table + (((to + off) >> 42) & 0x7ff); | 428 | break; |
378 | if ((*table & _REGION_ENTRY_INVALID) && | ||
379 | gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) | ||
380 | goto out_unmap; | ||
381 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
382 | table = table + (((to + off) >> 31) & 0x7ff); | ||
383 | if ((*table & _REGION_ENTRY_INVALID) && | ||
384 | gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) | ||
385 | goto out_unmap; | ||
386 | table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); | ||
387 | table = table + (((to + off) >> 20) & 0x7ff); | ||
388 | |||
389 | /* Store 'from' address in an invalid segment table entry. */ | ||
390 | flush |= gmap_unlink_segment(gmap, table); | ||
391 | *table = (from + off) | (_SEGMENT_ENTRY_INVALID | | ||
392 | _SEGMENT_ENTRY_PROTECT); | ||
393 | } | 429 | } |
394 | spin_unlock(&gmap->mm->page_table_lock); | 430 | up_write(&gmap->mm->mmap_sem); |
395 | up_read(&gmap->mm->mmap_sem); | ||
396 | if (flush) | 431 | if (flush) |
397 | gmap_flush_tlb(gmap); | 432 | gmap_flush_tlb(gmap); |
398 | return 0; | 433 | if (off >= len) |
399 | 434 | return 0; | |
400 | out_unmap: | ||
401 | spin_unlock(&gmap->mm->page_table_lock); | ||
402 | up_read(&gmap->mm->mmap_sem); | ||
403 | gmap_unmap_segment(gmap, to, len); | 435 | gmap_unmap_segment(gmap, to, len); |
404 | return -ENOMEM; | 436 | return -ENOMEM; |
405 | } | 437 | } |
406 | EXPORT_SYMBOL_GPL(gmap_map_segment); | 438 | EXPORT_SYMBOL_GPL(gmap_map_segment); |
407 | 439 | ||
408 | static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) | ||
409 | { | ||
410 | unsigned long *table; | ||
411 | |||
412 | table = gmap->table + ((address >> 53) & 0x7ff); | ||
413 | if (unlikely(*table & _REGION_ENTRY_INVALID)) | ||
414 | return ERR_PTR(-EFAULT); | ||
415 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
416 | table = table + ((address >> 42) & 0x7ff); | ||
417 | if (unlikely(*table & _REGION_ENTRY_INVALID)) | ||
418 | return ERR_PTR(-EFAULT); | ||
419 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
420 | table = table + ((address >> 31) & 0x7ff); | ||
421 | if (unlikely(*table & _REGION_ENTRY_INVALID)) | ||
422 | return ERR_PTR(-EFAULT); | ||
423 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
424 | table = table + ((address >> 20) & 0x7ff); | ||
425 | return table; | ||
426 | } | ||
427 | |||
428 | /** | 440 | /** |
429 | * __gmap_translate - translate a guest address to a user space address | 441 | * __gmap_translate - translate a guest address to a user space address |
430 | * @address: guest address | ||
431 | * @gmap: pointer to guest mapping meta data structure | 442 | * @gmap: pointer to guest mapping meta data structure |
443 | * @gaddr: guest address | ||
432 | * | 444 | * |
433 | * Returns user space address which corresponds to the guest address or | 445 | * Returns user space address which corresponds to the guest address or |
434 | * -EFAULT if no such mapping exists. | 446 | * -EFAULT if no such mapping exists. |
@@ -436,168 +448,161 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) | |||
436 | * The mmap_sem of the mm that belongs to the address space must be held | 448 | * The mmap_sem of the mm that belongs to the address space must be held |
437 | * when this function gets called. | 449 | * when this function gets called. |
438 | */ | 450 | */ |
439 | unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) | 451 | unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) |
440 | { | 452 | { |
441 | unsigned long *segment_ptr, vmaddr, segment; | 453 | unsigned long vmaddr; |
442 | struct gmap_pgtable *mp; | ||
443 | struct page *page; | ||
444 | 454 | ||
445 | current->thread.gmap_addr = address; | 455 | vmaddr = (unsigned long) |
446 | segment_ptr = gmap_table_walk(address, gmap); | 456 | radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); |
447 | if (IS_ERR(segment_ptr)) | 457 | return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; |
448 | return PTR_ERR(segment_ptr); | ||
449 | /* Convert the gmap address to an mm address. */ | ||
450 | segment = *segment_ptr; | ||
451 | if (!(segment & _SEGMENT_ENTRY_INVALID)) { | ||
452 | page = pfn_to_page(segment >> PAGE_SHIFT); | ||
453 | mp = (struct gmap_pgtable *) page->index; | ||
454 | return mp->vmaddr | (address & ~PMD_MASK); | ||
455 | } else if (segment & _SEGMENT_ENTRY_PROTECT) { | ||
456 | vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; | ||
457 | return vmaddr | (address & ~PMD_MASK); | ||
458 | } | ||
459 | return -EFAULT; | ||
460 | } | 458 | } |
461 | EXPORT_SYMBOL_GPL(__gmap_translate); | 459 | EXPORT_SYMBOL_GPL(__gmap_translate); |
462 | 460 | ||
463 | /** | 461 | /** |
464 | * gmap_translate - translate a guest address to a user space address | 462 | * gmap_translate - translate a guest address to a user space address |
465 | * @address: guest address | ||
466 | * @gmap: pointer to guest mapping meta data structure | 463 | * @gmap: pointer to guest mapping meta data structure |
464 | * @gaddr: guest address | ||
467 | * | 465 | * |
468 | * Returns user space address which corresponds to the guest address or | 466 | * Returns user space address which corresponds to the guest address or |
469 | * -EFAULT if no such mapping exists. | 467 | * -EFAULT if no such mapping exists. |
470 | * This function does not establish potentially missing page table entries. | 468 | * This function does not establish potentially missing page table entries. |
471 | */ | 469 | */ |
472 | unsigned long gmap_translate(unsigned long address, struct gmap *gmap) | 470 | unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) |
473 | { | 471 | { |
474 | unsigned long rc; | 472 | unsigned long rc; |
475 | 473 | ||
476 | down_read(&gmap->mm->mmap_sem); | 474 | down_read(&gmap->mm->mmap_sem); |
477 | rc = __gmap_translate(address, gmap); | 475 | rc = __gmap_translate(gmap, gaddr); |
478 | up_read(&gmap->mm->mmap_sem); | 476 | up_read(&gmap->mm->mmap_sem); |
479 | return rc; | 477 | return rc; |
480 | } | 478 | } |
481 | EXPORT_SYMBOL_GPL(gmap_translate); | 479 | EXPORT_SYMBOL_GPL(gmap_translate); |
482 | 480 | ||
483 | static int gmap_connect_pgtable(unsigned long address, unsigned long segment, | 481 | /** |
484 | unsigned long *segment_ptr, struct gmap *gmap) | 482 | * gmap_unlink - disconnect a page table from the gmap shadow tables |
483 | * @gmap: pointer to guest mapping meta data structure | ||
484 | * @table: pointer to the host page table | ||
485 | * @vmaddr: vm address associated with the host page table | ||
486 | */ | ||
487 | static void gmap_unlink(struct mm_struct *mm, unsigned long *table, | ||
488 | unsigned long vmaddr) | ||
489 | { | ||
490 | struct gmap *gmap; | ||
491 | int flush; | ||
492 | |||
493 | list_for_each_entry(gmap, &mm->context.gmap_list, list) { | ||
494 | flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); | ||
495 | if (flush) | ||
496 | gmap_flush_tlb(gmap); | ||
497 | } | ||
498 | } | ||
499 | |||
500 | /** | ||
501 | * gmap_link - set up shadow page tables to connect a host to a guest address | ||
502 | * @gmap: pointer to guest mapping meta data structure | ||
503 | * @gaddr: guest address | ||
504 | * @vmaddr: vm address | ||
505 | * | ||
506 | * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT | ||
507 | * if the vm address is already mapped to a different guest segment. | ||
508 | * The mmap_sem of the mm that belongs to the address space must be held | ||
509 | * when this function gets called. | ||
510 | */ | ||
511 | int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) | ||
485 | { | 512 | { |
486 | unsigned long vmaddr; | ||
487 | struct vm_area_struct *vma; | ||
488 | struct gmap_pgtable *mp; | ||
489 | struct gmap_rmap *rmap; | ||
490 | struct mm_struct *mm; | 513 | struct mm_struct *mm; |
491 | struct page *page; | 514 | unsigned long *table; |
515 | spinlock_t *ptl; | ||
492 | pgd_t *pgd; | 516 | pgd_t *pgd; |
493 | pud_t *pud; | 517 | pud_t *pud; |
494 | pmd_t *pmd; | 518 | pmd_t *pmd; |
519 | int rc; | ||
495 | 520 | ||
496 | mm = gmap->mm; | 521 | /* Create higher level tables in the gmap page table */ |
497 | vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; | 522 | table = gmap->table; |
498 | vma = find_vma(mm, vmaddr); | 523 | if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { |
499 | if (!vma || vma->vm_start > vmaddr) | 524 | table += (gaddr >> 53) & 0x7ff; |
500 | return -EFAULT; | 525 | if ((*table & _REGION_ENTRY_INVALID) && |
526 | gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, | ||
527 | gaddr & 0xffe0000000000000)) | ||
528 | return -ENOMEM; | ||
529 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
530 | } | ||
531 | if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { | ||
532 | table += (gaddr >> 42) & 0x7ff; | ||
533 | if ((*table & _REGION_ENTRY_INVALID) && | ||
534 | gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, | ||
535 | gaddr & 0xfffffc0000000000)) | ||
536 | return -ENOMEM; | ||
537 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
538 | } | ||
539 | if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { | ||
540 | table += (gaddr >> 31) & 0x7ff; | ||
541 | if ((*table & _REGION_ENTRY_INVALID) && | ||
542 | gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, | ||
543 | gaddr & 0xffffffff80000000)) | ||
544 | return -ENOMEM; | ||
545 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
546 | } | ||
547 | table += (gaddr >> 20) & 0x7ff; | ||
501 | /* Walk the parent mm page table */ | 548 | /* Walk the parent mm page table */ |
549 | mm = gmap->mm; | ||
502 | pgd = pgd_offset(mm, vmaddr); | 550 | pgd = pgd_offset(mm, vmaddr); |
503 | pud = pud_alloc(mm, pgd, vmaddr); | 551 | VM_BUG_ON(pgd_none(*pgd)); |
504 | if (!pud) | 552 | pud = pud_offset(pgd, vmaddr); |
505 | return -ENOMEM; | 553 | VM_BUG_ON(pud_none(*pud)); |
506 | pmd = pmd_alloc(mm, pud, vmaddr); | 554 | pmd = pmd_offset(pud, vmaddr); |
507 | if (!pmd) | 555 | VM_BUG_ON(pmd_none(*pmd)); |
508 | return -ENOMEM; | ||
509 | if (!pmd_present(*pmd) && | ||
510 | __pte_alloc(mm, vma, pmd, vmaddr)) | ||
511 | return -ENOMEM; | ||
512 | /* large pmds cannot yet be handled */ | 556 | /* large pmds cannot yet be handled */ |
513 | if (pmd_large(*pmd)) | 557 | if (pmd_large(*pmd)) |
514 | return -EFAULT; | 558 | return -EFAULT; |
515 | /* pmd now points to a valid segment table entry. */ | ||
516 | rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); | ||
517 | if (!rmap) | ||
518 | return -ENOMEM; | ||
519 | /* Link gmap segment table entry location to page table. */ | 559 | /* Link gmap segment table entry location to page table. */ |
520 | page = pmd_page(*pmd); | 560 | rc = radix_tree_preload(GFP_KERNEL); |
521 | mp = (struct gmap_pgtable *) page->index; | 561 | if (rc) |
522 | rmap->gmap = gmap; | 562 | return rc; |
523 | rmap->entry = segment_ptr; | 563 | ptl = pmd_lock(mm, pmd); |
524 | rmap->vmaddr = address & PMD_MASK; | 564 | spin_lock(&gmap->guest_table_lock); |
525 | spin_lock(&mm->page_table_lock); | 565 | if (*table == _SEGMENT_ENTRY_INVALID) { |
526 | if (*segment_ptr == segment) { | 566 | rc = radix_tree_insert(&gmap->host_to_guest, |
527 | list_add(&rmap->list, &mp->mapper); | 567 | vmaddr >> PMD_SHIFT, table); |
528 | /* Set gmap segment table entry to page table. */ | 568 | if (!rc) |
529 | *segment_ptr = pmd_val(*pmd) & PAGE_MASK; | 569 | *table = pmd_val(*pmd); |
530 | rmap = NULL; | 570 | } else |
531 | } | 571 | rc = 0; |
532 | spin_unlock(&mm->page_table_lock); | 572 | spin_unlock(&gmap->guest_table_lock); |
533 | kfree(rmap); | 573 | spin_unlock(ptl); |
534 | return 0; | 574 | radix_tree_preload_end(); |
535 | } | 575 | return rc; |
536 | |||
537 | static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) | ||
538 | { | ||
539 | struct gmap_rmap *rmap, *next; | ||
540 | struct gmap_pgtable *mp; | ||
541 | struct page *page; | ||
542 | int flush; | ||
543 | |||
544 | flush = 0; | ||
545 | spin_lock(&mm->page_table_lock); | ||
546 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
547 | mp = (struct gmap_pgtable *) page->index; | ||
548 | list_for_each_entry_safe(rmap, next, &mp->mapper, list) { | ||
549 | *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | | ||
550 | _SEGMENT_ENTRY_PROTECT); | ||
551 | list_del(&rmap->list); | ||
552 | kfree(rmap); | ||
553 | flush = 1; | ||
554 | } | ||
555 | spin_unlock(&mm->page_table_lock); | ||
556 | if (flush) | ||
557 | __tlb_flush_global(); | ||
558 | } | 576 | } |
559 | 577 | ||
560 | /* | 578 | /** |
561 | * this function is assumed to be called with mmap_sem held | 579 | * gmap_fault - resolve a fault on a guest address |
580 | * @gmap: pointer to guest mapping meta data structure | ||
581 | * @gaddr: guest address | ||
582 | * @fault_flags: flags to pass down to handle_mm_fault() | ||
583 | * | ||
584 | * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT | ||
585 | * if the vm address is already mapped to a different guest segment. | ||
562 | */ | 586 | */ |
563 | unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) | 587 | int gmap_fault(struct gmap *gmap, unsigned long gaddr, |
588 | unsigned int fault_flags) | ||
564 | { | 589 | { |
565 | unsigned long *segment_ptr, segment; | 590 | unsigned long vmaddr; |
566 | struct gmap_pgtable *mp; | ||
567 | struct page *page; | ||
568 | int rc; | 591 | int rc; |
569 | 592 | ||
570 | current->thread.gmap_addr = address; | ||
571 | segment_ptr = gmap_table_walk(address, gmap); | ||
572 | if (IS_ERR(segment_ptr)) | ||
573 | return -EFAULT; | ||
574 | /* Convert the gmap address to an mm address. */ | ||
575 | while (1) { | ||
576 | segment = *segment_ptr; | ||
577 | if (!(segment & _SEGMENT_ENTRY_INVALID)) { | ||
578 | /* Page table is present */ | ||
579 | page = pfn_to_page(segment >> PAGE_SHIFT); | ||
580 | mp = (struct gmap_pgtable *) page->index; | ||
581 | return mp->vmaddr | (address & ~PMD_MASK); | ||
582 | } | ||
583 | if (!(segment & _SEGMENT_ENTRY_PROTECT)) | ||
584 | /* Nothing mapped in the gmap address space. */ | ||
585 | break; | ||
586 | rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); | ||
587 | if (rc) | ||
588 | return rc; | ||
589 | } | ||
590 | return -EFAULT; | ||
591 | } | ||
592 | |||
593 | unsigned long gmap_fault(unsigned long address, struct gmap *gmap) | ||
594 | { | ||
595 | unsigned long rc; | ||
596 | |||
597 | down_read(&gmap->mm->mmap_sem); | 593 | down_read(&gmap->mm->mmap_sem); |
598 | rc = __gmap_fault(address, gmap); | 594 | vmaddr = __gmap_translate(gmap, gaddr); |
595 | if (IS_ERR_VALUE(vmaddr)) { | ||
596 | rc = vmaddr; | ||
597 | goto out_up; | ||
598 | } | ||
599 | if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) { | ||
600 | rc = -EFAULT; | ||
601 | goto out_up; | ||
602 | } | ||
603 | rc = __gmap_link(gmap, gaddr, vmaddr); | ||
604 | out_up: | ||
599 | up_read(&gmap->mm->mmap_sem); | 605 | up_read(&gmap->mm->mmap_sem); |
600 | |||
601 | return rc; | 606 | return rc; |
602 | } | 607 | } |
603 | EXPORT_SYMBOL_GPL(gmap_fault); | 608 | EXPORT_SYMBOL_GPL(gmap_fault); |
@@ -617,17 +622,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) | |||
617 | free_swap_and_cache(entry); | 622 | free_swap_and_cache(entry); |
618 | } | 623 | } |
619 | 624 | ||
620 | /** | 625 | /* |
621 | * The mm->mmap_sem lock must be held | 626 | * this function is assumed to be called with mmap_sem held |
622 | */ | 627 | */ |
623 | static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) | 628 | void __gmap_zap(struct gmap *gmap, unsigned long gaddr) |
624 | { | 629 | { |
625 | unsigned long ptev, pgstev; | 630 | unsigned long vmaddr, ptev, pgstev; |
631 | pte_t *ptep, pte; | ||
626 | spinlock_t *ptl; | 632 | spinlock_t *ptl; |
627 | pgste_t pgste; | 633 | pgste_t pgste; |
628 | pte_t *ptep, pte; | ||
629 | 634 | ||
630 | ptep = get_locked_pte(mm, address, &ptl); | 635 | /* Find the vm address for the guest address */ |
636 | vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, | ||
637 | gaddr >> PMD_SHIFT); | ||
638 | if (!vmaddr) | ||
639 | return; | ||
640 | vmaddr |= gaddr & ~PMD_MASK; | ||
641 | /* Get pointer to the page table entry */ | ||
642 | ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); | ||
631 | if (unlikely(!ptep)) | 643 | if (unlikely(!ptep)) |
632 | return; | 644 | return; |
633 | pte = *ptep; | 645 | pte = *ptep; |
@@ -639,87 +651,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) | |||
639 | ptev = pte_val(pte); | 651 | ptev = pte_val(pte); |
640 | if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || | 652 | if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || |
641 | ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { | 653 | ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { |
642 | gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); | 654 | gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm); |
643 | pte_clear(mm, address, ptep); | 655 | pte_clear(gmap->mm, vmaddr, ptep); |
644 | } | 656 | } |
645 | pgste_set_unlock(ptep, pgste); | 657 | pgste_set_unlock(ptep, pgste); |
646 | out_pte: | 658 | out_pte: |
647 | pte_unmap_unlock(*ptep, ptl); | 659 | pte_unmap_unlock(*ptep, ptl); |
648 | } | 660 | } |
649 | |||
650 | /* | ||
651 | * this function is assumed to be called with mmap_sem held | ||
652 | */ | ||
653 | void __gmap_zap(unsigned long address, struct gmap *gmap) | ||
654 | { | ||
655 | unsigned long *table, *segment_ptr; | ||
656 | unsigned long segment, pgstev, ptev; | ||
657 | struct gmap_pgtable *mp; | ||
658 | struct page *page; | ||
659 | |||
660 | segment_ptr = gmap_table_walk(address, gmap); | ||
661 | if (IS_ERR(segment_ptr)) | ||
662 | return; | ||
663 | segment = *segment_ptr; | ||
664 | if (segment & _SEGMENT_ENTRY_INVALID) | ||
665 | return; | ||
666 | page = pfn_to_page(segment >> PAGE_SHIFT); | ||
667 | mp = (struct gmap_pgtable *) page->index; | ||
668 | address = mp->vmaddr | (address & ~PMD_MASK); | ||
669 | /* Page table is present */ | ||
670 | table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN); | ||
671 | table = table + ((address >> 12) & 0xff); | ||
672 | pgstev = table[PTRS_PER_PTE]; | ||
673 | ptev = table[0]; | ||
674 | /* quick check, checked again with locks held */ | ||
675 | if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || | ||
676 | ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) | ||
677 | gmap_zap_unused(gmap->mm, address); | ||
678 | } | ||
679 | EXPORT_SYMBOL_GPL(__gmap_zap); | 661 | EXPORT_SYMBOL_GPL(__gmap_zap); |
680 | 662 | ||
681 | void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) | 663 | void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) |
682 | { | 664 | { |
683 | 665 | unsigned long gaddr, vmaddr, size; | |
684 | unsigned long *table, address, size; | ||
685 | struct vm_area_struct *vma; | 666 | struct vm_area_struct *vma; |
686 | struct gmap_pgtable *mp; | ||
687 | struct page *page; | ||
688 | 667 | ||
689 | down_read(&gmap->mm->mmap_sem); | 668 | down_read(&gmap->mm->mmap_sem); |
690 | address = from; | 669 | for (gaddr = from; gaddr < to; |
691 | while (address < to) { | 670 | gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { |
692 | /* Walk the gmap address space page table */ | 671 | /* Find the vm address for the guest address */ |
693 | table = gmap->table + ((address >> 53) & 0x7ff); | 672 | vmaddr = (unsigned long) |
694 | if (unlikely(*table & _REGION_ENTRY_INVALID)) { | 673 | radix_tree_lookup(&gmap->guest_to_host, |
695 | address = (address + PMD_SIZE) & PMD_MASK; | 674 | gaddr >> PMD_SHIFT); |
696 | continue; | 675 | if (!vmaddr) |
697 | } | ||
698 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
699 | table = table + ((address >> 42) & 0x7ff); | ||
700 | if (unlikely(*table & _REGION_ENTRY_INVALID)) { | ||
701 | address = (address + PMD_SIZE) & PMD_MASK; | ||
702 | continue; | 676 | continue; |
703 | } | 677 | vmaddr |= gaddr & ~PMD_MASK; |
704 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 678 | /* Find vma in the parent mm */ |
705 | table = table + ((address >> 31) & 0x7ff); | 679 | vma = find_vma(gmap->mm, vmaddr); |
706 | if (unlikely(*table & _REGION_ENTRY_INVALID)) { | 680 | size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); |
707 | address = (address + PMD_SIZE) & PMD_MASK; | 681 | zap_page_range(vma, vmaddr, size, NULL); |
708 | continue; | ||
709 | } | ||
710 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
711 | table = table + ((address >> 20) & 0x7ff); | ||
712 | if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { | ||
713 | address = (address + PMD_SIZE) & PMD_MASK; | ||
714 | continue; | ||
715 | } | ||
716 | page = pfn_to_page(*table >> PAGE_SHIFT); | ||
717 | mp = (struct gmap_pgtable *) page->index; | ||
718 | vma = find_vma(gmap->mm, mp->vmaddr); | ||
719 | size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); | ||
720 | zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), | ||
721 | size, NULL); | ||
722 | address = (address + PMD_SIZE) & PMD_MASK; | ||
723 | } | 682 | } |
724 | up_read(&gmap->mm->mmap_sem); | 683 | up_read(&gmap->mm->mmap_sem); |
725 | } | 684 | } |
@@ -755,7 +714,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); | |||
755 | /** | 714 | /** |
756 | * gmap_ipte_notify - mark a range of ptes for invalidation notification | 715 | * gmap_ipte_notify - mark a range of ptes for invalidation notification |
757 | * @gmap: pointer to guest mapping meta data structure | 716 | * @gmap: pointer to guest mapping meta data structure |
758 | * @start: virtual address in the guest address space | 717 | * @gaddr: virtual address in the guest address space |
759 | * @len: size of area | 718 | * @len: size of area |
760 | * | 719 | * |
761 | * Returns 0 if for each page in the given range a gmap mapping exists and | 720 | * Returns 0 if for each page in the given range a gmap mapping exists and |
@@ -763,7 +722,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); | |||
763 | * for one or more pages -EFAULT is returned. If no memory could be allocated | 722 | * for one or more pages -EFAULT is returned. If no memory could be allocated |
764 | * -ENOMEM is returned. This function establishes missing page table entries. | 723 | * -ENOMEM is returned. This function establishes missing page table entries. |
765 | */ | 724 | */ |
766 | int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) | 725 | int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) |
767 | { | 726 | { |
768 | unsigned long addr; | 727 | unsigned long addr; |
769 | spinlock_t *ptl; | 728 | spinlock_t *ptl; |
@@ -771,12 +730,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) | |||
771 | pgste_t pgste; | 730 | pgste_t pgste; |
772 | int rc = 0; | 731 | int rc = 0; |
773 | 732 | ||
774 | if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) | 733 | if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) |
775 | return -EINVAL; | 734 | return -EINVAL; |
776 | down_read(&gmap->mm->mmap_sem); | 735 | down_read(&gmap->mm->mmap_sem); |
777 | while (len) { | 736 | while (len) { |
778 | /* Convert gmap address and connect the page tables */ | 737 | /* Convert gmap address and connect the page tables */ |
779 | addr = __gmap_fault(start, gmap); | 738 | addr = __gmap_translate(gmap, gaddr); |
780 | if (IS_ERR_VALUE(addr)) { | 739 | if (IS_ERR_VALUE(addr)) { |
781 | rc = addr; | 740 | rc = addr; |
782 | break; | 741 | break; |
@@ -786,6 +745,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) | |||
786 | rc = -EFAULT; | 745 | rc = -EFAULT; |
787 | break; | 746 | break; |
788 | } | 747 | } |
748 | rc = __gmap_link(gmap, gaddr, addr); | ||
749 | if (rc) | ||
750 | break; | ||
789 | /* Walk the process page table, lock and get pte pointer */ | 751 | /* Walk the process page table, lock and get pte pointer */ |
790 | ptep = get_locked_pte(gmap->mm, addr, &ptl); | 752 | ptep = get_locked_pte(gmap->mm, addr, &ptl); |
791 | if (unlikely(!ptep)) | 753 | if (unlikely(!ptep)) |
@@ -796,7 +758,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) | |||
796 | pgste = pgste_get_lock(ptep); | 758 | pgste = pgste_get_lock(ptep); |
797 | pgste_val(pgste) |= PGSTE_IN_BIT; | 759 | pgste_val(pgste) |= PGSTE_IN_BIT; |
798 | pgste_set_unlock(ptep, pgste); | 760 | pgste_set_unlock(ptep, pgste); |
799 | start += PAGE_SIZE; | 761 | gaddr += PAGE_SIZE; |
800 | len -= PAGE_SIZE; | 762 | len -= PAGE_SIZE; |
801 | } | 763 | } |
802 | spin_unlock(ptl); | 764 | spin_unlock(ptl); |
@@ -809,28 +771,30 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify); | |||
809 | /** | 771 | /** |
810 | * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. | 772 | * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. |
811 | * @mm: pointer to the process mm_struct | 773 | * @mm: pointer to the process mm_struct |
774 | * @addr: virtual address in the process address space | ||
812 | * @pte: pointer to the page table entry | 775 | * @pte: pointer to the page table entry |
813 | * | 776 | * |
814 | * This function is assumed to be called with the page table lock held | 777 | * This function is assumed to be called with the page table lock held |
815 | * for the pte to notify. | 778 | * for the pte to notify. |
816 | */ | 779 | */ |
817 | void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte) | 780 | void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) |
818 | { | 781 | { |
819 | unsigned long segment_offset; | 782 | unsigned long offset, gaddr; |
783 | unsigned long *table; | ||
820 | struct gmap_notifier *nb; | 784 | struct gmap_notifier *nb; |
821 | struct gmap_pgtable *mp; | 785 | struct gmap *gmap; |
822 | struct gmap_rmap *rmap; | ||
823 | struct page *page; | ||
824 | 786 | ||
825 | segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); | 787 | offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); |
826 | segment_offset = segment_offset * (4096 / sizeof(pte_t)); | 788 | offset = offset * (4096 / sizeof(pte_t)); |
827 | page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); | ||
828 | mp = (struct gmap_pgtable *) page->index; | ||
829 | spin_lock(&gmap_notifier_lock); | 789 | spin_lock(&gmap_notifier_lock); |
830 | list_for_each_entry(rmap, &mp->mapper, list) { | 790 | list_for_each_entry(gmap, &mm->context.gmap_list, list) { |
791 | table = radix_tree_lookup(&gmap->host_to_guest, | ||
792 | vmaddr >> PMD_SHIFT); | ||
793 | if (!table) | ||
794 | continue; | ||
795 | gaddr = __gmap_segment_gaddr(table) + offset; | ||
831 | list_for_each_entry(nb, &gmap_notifier_list, list) | 796 | list_for_each_entry(nb, &gmap_notifier_list, list) |
832 | nb->notifier_call(rmap->gmap, | 797 | nb->notifier_call(gmap, gaddr); |
833 | rmap->vmaddr + segment_offset); | ||
834 | } | 798 | } |
835 | spin_unlock(&gmap_notifier_lock); | 799 | spin_unlock(&gmap_notifier_lock); |
836 | } | 800 | } |
@@ -841,29 +805,18 @@ static inline int page_table_with_pgste(struct page *page) | |||
841 | return atomic_read(&page->_mapcount) == 0; | 805 | return atomic_read(&page->_mapcount) == 0; |
842 | } | 806 | } |
843 | 807 | ||
844 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 808 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) |
845 | unsigned long vmaddr) | ||
846 | { | 809 | { |
847 | struct page *page; | 810 | struct page *page; |
848 | unsigned long *table; | 811 | unsigned long *table; |
849 | struct gmap_pgtable *mp; | ||
850 | 812 | ||
851 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | 813 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); |
852 | if (!page) | 814 | if (!page) |
853 | return NULL; | 815 | return NULL; |
854 | mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); | ||
855 | if (!mp) { | ||
856 | __free_page(page); | ||
857 | return NULL; | ||
858 | } | ||
859 | if (!pgtable_page_ctor(page)) { | 816 | if (!pgtable_page_ctor(page)) { |
860 | kfree(mp); | ||
861 | __free_page(page); | 817 | __free_page(page); |
862 | return NULL; | 818 | return NULL; |
863 | } | 819 | } |
864 | mp->vmaddr = vmaddr & PMD_MASK; | ||
865 | INIT_LIST_HEAD(&mp->mapper); | ||
866 | page->index = (unsigned long) mp; | ||
867 | atomic_set(&page->_mapcount, 0); | 820 | atomic_set(&page->_mapcount, 0); |
868 | table = (unsigned long *) page_to_phys(page); | 821 | table = (unsigned long *) page_to_phys(page); |
869 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); | 822 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); |
@@ -874,14 +827,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | |||
874 | static inline void page_table_free_pgste(unsigned long *table) | 827 | static inline void page_table_free_pgste(unsigned long *table) |
875 | { | 828 | { |
876 | struct page *page; | 829 | struct page *page; |
877 | struct gmap_pgtable *mp; | ||
878 | 830 | ||
879 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 831 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
880 | mp = (struct gmap_pgtable *) page->index; | ||
881 | BUG_ON(!list_empty(&mp->mapper)); | ||
882 | pgtable_page_dtor(page); | 832 | pgtable_page_dtor(page); |
883 | atomic_set(&page->_mapcount, -1); | 833 | atomic_set(&page->_mapcount, -1); |
884 | kfree(mp); | ||
885 | __free_page(page); | 834 | __free_page(page); |
886 | } | 835 | } |
887 | 836 | ||
@@ -994,13 +943,13 @@ retry: | |||
994 | } | 943 | } |
995 | if (!(pte_val(*ptep) & _PAGE_INVALID) && | 944 | if (!(pte_val(*ptep) & _PAGE_INVALID) && |
996 | (pte_val(*ptep) & _PAGE_PROTECT)) { | 945 | (pte_val(*ptep) & _PAGE_PROTECT)) { |
997 | pte_unmap_unlock(*ptep, ptl); | 946 | pte_unmap_unlock(*ptep, ptl); |
998 | if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { | 947 | if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { |
999 | up_read(&mm->mmap_sem); | 948 | up_read(&mm->mmap_sem); |
1000 | return -EFAULT; | 949 | return -EFAULT; |
1001 | } | ||
1002 | goto retry; | ||
1003 | } | 950 | } |
951 | goto retry; | ||
952 | } | ||
1004 | 953 | ||
1005 | new = old = pgste_get_lock(ptep); | 954 | new = old = pgste_get_lock(ptep); |
1006 | pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | | 955 | pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | |
@@ -1038,8 +987,7 @@ static inline int page_table_with_pgste(struct page *page) | |||
1038 | return 0; | 987 | return 0; |
1039 | } | 988 | } |
1040 | 989 | ||
1041 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 990 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) |
1042 | unsigned long vmaddr) | ||
1043 | { | 991 | { |
1044 | return NULL; | 992 | return NULL; |
1045 | } | 993 | } |
@@ -1053,8 +1001,8 @@ static inline void page_table_free_pgste(unsigned long *table) | |||
1053 | { | 1001 | { |
1054 | } | 1002 | } |
1055 | 1003 | ||
1056 | static inline void gmap_disconnect_pgtable(struct mm_struct *mm, | 1004 | static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, |
1057 | unsigned long *table) | 1005 | unsigned long vmaddr) |
1058 | { | 1006 | { |
1059 | } | 1007 | } |
1060 | 1008 | ||
@@ -1074,14 +1022,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) | |||
1074 | /* | 1022 | /* |
1075 | * page table entry allocation/free routines. | 1023 | * page table entry allocation/free routines. |
1076 | */ | 1024 | */ |
1077 | unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) | 1025 | unsigned long *page_table_alloc(struct mm_struct *mm) |
1078 | { | 1026 | { |
1079 | unsigned long *uninitialized_var(table); | 1027 | unsigned long *uninitialized_var(table); |
1080 | struct page *uninitialized_var(page); | 1028 | struct page *uninitialized_var(page); |
1081 | unsigned int mask, bit; | 1029 | unsigned int mask, bit; |
1082 | 1030 | ||
1083 | if (mm_has_pgste(mm)) | 1031 | if (mm_has_pgste(mm)) |
1084 | return page_table_alloc_pgste(mm, vmaddr); | 1032 | return page_table_alloc_pgste(mm); |
1085 | /* Allocate fragments of a 4K page as 1K/2K page table */ | 1033 | /* Allocate fragments of a 4K page as 1K/2K page table */ |
1086 | spin_lock_bh(&mm->context.list_lock); | 1034 | spin_lock_bh(&mm->context.list_lock); |
1087 | mask = FRAG_MASK; | 1035 | mask = FRAG_MASK; |
@@ -1123,10 +1071,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
1123 | unsigned int bit, mask; | 1071 | unsigned int bit, mask; |
1124 | 1072 | ||
1125 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 1073 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
1126 | if (page_table_with_pgste(page)) { | 1074 | if (page_table_with_pgste(page)) |
1127 | gmap_disconnect_pgtable(mm, table); | ||
1128 | return page_table_free_pgste(table); | 1075 | return page_table_free_pgste(table); |
1129 | } | ||
1130 | /* Free 1K/2K page table fragment of a 4K page */ | 1076 | /* Free 1K/2K page table fragment of a 4K page */ |
1131 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); | 1077 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); |
1132 | spin_lock_bh(&mm->context.list_lock); | 1078 | spin_lock_bh(&mm->context.list_lock); |
@@ -1158,7 +1104,8 @@ static void __page_table_free_rcu(void *table, unsigned bit) | |||
1158 | } | 1104 | } |
1159 | } | 1105 | } |
1160 | 1106 | ||
1161 | void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) | 1107 | void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, |
1108 | unsigned long vmaddr) | ||
1162 | { | 1109 | { |
1163 | struct mm_struct *mm; | 1110 | struct mm_struct *mm; |
1164 | struct page *page; | 1111 | struct page *page; |
@@ -1167,7 +1114,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) | |||
1167 | mm = tlb->mm; | 1114 | mm = tlb->mm; |
1168 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 1115 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
1169 | if (page_table_with_pgste(page)) { | 1116 | if (page_table_with_pgste(page)) { |
1170 | gmap_disconnect_pgtable(mm, table); | 1117 | gmap_unlink(mm, table, vmaddr); |
1171 | table = (unsigned long *) (__pa(table) | FRAG_MASK); | 1118 | table = (unsigned long *) (__pa(table) | FRAG_MASK); |
1172 | tlb_remove_table(tlb, table); | 1119 | tlb_remove_table(tlb, table); |
1173 | return; | 1120 | return; |
@@ -1303,7 +1250,7 @@ again: | |||
1303 | if (page_table_with_pgste(page)) | 1250 | if (page_table_with_pgste(page)) |
1304 | continue; | 1251 | continue; |
1305 | /* Allocate new page table with pgstes */ | 1252 | /* Allocate new page table with pgstes */ |
1306 | new = page_table_alloc_pgste(mm, addr); | 1253 | new = page_table_alloc_pgste(mm); |
1307 | if (!new) | 1254 | if (!new) |
1308 | return -ENOMEM; | 1255 | return -ENOMEM; |
1309 | 1256 | ||
@@ -1318,7 +1265,7 @@ again: | |||
1318 | /* Establish new table */ | 1265 | /* Establish new table */ |
1319 | pmd_populate(mm, pmd, (pte_t *) new); | 1266 | pmd_populate(mm, pmd, (pte_t *) new); |
1320 | /* Free old table with rcu, there might be a walker! */ | 1267 | /* Free old table with rcu, there might be a walker! */ |
1321 | page_table_free_rcu(tlb, table); | 1268 | page_table_free_rcu(tlb, table, addr); |
1322 | new = NULL; | 1269 | new = NULL; |
1323 | } | 1270 | } |
1324 | spin_unlock(ptl); | 1271 | spin_unlock(ptl); |
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index fe9012a49aa5..fdbd7888cb07 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c | |||
@@ -65,7 +65,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address) | |||
65 | pte_t *pte; | 65 | pte_t *pte; |
66 | 66 | ||
67 | if (slab_is_available()) | 67 | if (slab_is_available()) |
68 | pte = (pte_t *) page_table_alloc(&init_mm, address); | 68 | pte = (pte_t *) page_table_alloc(&init_mm); |
69 | else | 69 | else |
70 | pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), | 70 | pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), |
71 | PTRS_PER_PTE * sizeof(pte_t)); | 71 | PTRS_PER_PTE * sizeof(pte_t)); |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 516903b98e06..094292a63e74 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -202,6 +202,7 @@ | |||
202 | #define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ | 202 | #define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ |
203 | #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ | 203 | #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ |
204 | #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ | 204 | #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ |
205 | #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ | ||
205 | 206 | ||
206 | 207 | ||
207 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | 208 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7c492ed9087b..7d603a71ab3a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) | |||
99 | 99 | ||
100 | #define ASYNC_PF_PER_VCPU 64 | 100 | #define ASYNC_PF_PER_VCPU 64 |
101 | 101 | ||
102 | struct kvm_vcpu; | ||
103 | struct kvm; | ||
104 | struct kvm_async_pf; | ||
105 | |||
106 | enum kvm_reg { | 102 | enum kvm_reg { |
107 | VCPU_REGS_RAX = 0, | 103 | VCPU_REGS_RAX = 0, |
108 | VCPU_REGS_RCX = 1, | 104 | VCPU_REGS_RCX = 1, |
@@ -266,7 +262,8 @@ struct kvm_mmu { | |||
266 | struct x86_exception *fault); | 262 | struct x86_exception *fault); |
267 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, | 263 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, |
268 | struct x86_exception *exception); | 264 | struct x86_exception *exception); |
269 | gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); | 265 | gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, |
266 | struct x86_exception *exception); | ||
270 | int (*sync_page)(struct kvm_vcpu *vcpu, | 267 | int (*sync_page)(struct kvm_vcpu *vcpu, |
271 | struct kvm_mmu_page *sp); | 268 | struct kvm_mmu_page *sp); |
272 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 269 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
@@ -481,6 +478,7 @@ struct kvm_vcpu_arch { | |||
481 | u64 mmio_gva; | 478 | u64 mmio_gva; |
482 | unsigned access; | 479 | unsigned access; |
483 | gfn_t mmio_gfn; | 480 | gfn_t mmio_gfn; |
481 | u64 mmio_gen; | ||
484 | 482 | ||
485 | struct kvm_pmu pmu; | 483 | struct kvm_pmu pmu; |
486 | 484 | ||
@@ -576,11 +574,10 @@ struct kvm_arch { | |||
576 | struct kvm_apic_map *apic_map; | 574 | struct kvm_apic_map *apic_map; |
577 | 575 | ||
578 | unsigned int tss_addr; | 576 | unsigned int tss_addr; |
579 | struct page *apic_access_page; | 577 | bool apic_access_page_done; |
580 | 578 | ||
581 | gpa_t wall_clock; | 579 | gpa_t wall_clock; |
582 | 580 | ||
583 | struct page *ept_identity_pagetable; | ||
584 | bool ept_identity_pagetable_done; | 581 | bool ept_identity_pagetable_done; |
585 | gpa_t ept_identity_map_addr; | 582 | gpa_t ept_identity_map_addr; |
586 | 583 | ||
@@ -665,8 +662,8 @@ struct msr_data { | |||
665 | struct kvm_x86_ops { | 662 | struct kvm_x86_ops { |
666 | int (*cpu_has_kvm_support)(void); /* __init */ | 663 | int (*cpu_has_kvm_support)(void); /* __init */ |
667 | int (*disabled_by_bios)(void); /* __init */ | 664 | int (*disabled_by_bios)(void); /* __init */ |
668 | int (*hardware_enable)(void *dummy); | 665 | int (*hardware_enable)(void); |
669 | void (*hardware_disable)(void *dummy); | 666 | void (*hardware_disable)(void); |
670 | void (*check_processor_compatibility)(void *rtn); | 667 | void (*check_processor_compatibility)(void *rtn); |
671 | int (*hardware_setup)(void); /* __init */ | 668 | int (*hardware_setup)(void); /* __init */ |
672 | void (*hardware_unsetup)(void); /* __exit */ | 669 | void (*hardware_unsetup)(void); /* __exit */ |
@@ -710,7 +707,6 @@ struct kvm_x86_ops { | |||
710 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 707 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
711 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 708 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
712 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 709 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
713 | void (*fpu_activate)(struct kvm_vcpu *vcpu); | ||
714 | void (*fpu_deactivate)(struct kvm_vcpu *vcpu); | 710 | void (*fpu_deactivate)(struct kvm_vcpu *vcpu); |
715 | 711 | ||
716 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 712 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
@@ -740,6 +736,7 @@ struct kvm_x86_ops { | |||
740 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); | 736 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); |
741 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); | 737 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
742 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | 738 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); |
739 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); | ||
743 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | 740 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); |
744 | void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); | 741 | void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); |
745 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 742 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
@@ -772,6 +769,8 @@ struct kvm_x86_ops { | |||
772 | bool (*mpx_supported)(void); | 769 | bool (*mpx_supported)(void); |
773 | 770 | ||
774 | int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); | 771 | int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); |
772 | |||
773 | void (*sched_in)(struct kvm_vcpu *kvm, int cpu); | ||
775 | }; | 774 | }; |
776 | 775 | ||
777 | struct kvm_arch_async_pf { | 776 | struct kvm_arch_async_pf { |
@@ -895,7 +894,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); | |||
895 | int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 894 | int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
896 | gfn_t gfn, void *data, int offset, int len, | 895 | gfn_t gfn, void *data, int offset, int len, |
897 | u32 access); | 896 | u32 access); |
898 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); | ||
899 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); | 897 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
900 | 898 | ||
901 | static inline int __kvm_irq_line_state(unsigned long *irq_state, | 899 | static inline int __kvm_irq_line_state(unsigned long *irq_state, |
@@ -917,7 +915,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu); | |||
917 | 915 | ||
918 | int fx_init(struct kvm_vcpu *vcpu); | 916 | int fx_init(struct kvm_vcpu *vcpu); |
919 | 917 | ||
920 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | ||
921 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 918 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
922 | const u8 *new, int bytes); | 919 | const u8 *new, int bytes); |
923 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); | 920 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); |
@@ -926,7 +923,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | |||
926 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 923 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
927 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 924 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
928 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 925 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
929 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); | 926 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, |
927 | struct x86_exception *exception); | ||
930 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, | 928 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
931 | struct x86_exception *exception); | 929 | struct x86_exception *exception); |
932 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, | 930 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, |
@@ -946,7 +944,8 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); | |||
946 | void kvm_enable_tdp(void); | 944 | void kvm_enable_tdp(void); |
947 | void kvm_disable_tdp(void); | 945 | void kvm_disable_tdp(void); |
948 | 946 | ||
949 | static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | 947 | static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, |
948 | struct x86_exception *exception) | ||
950 | { | 949 | { |
951 | return gpa; | 950 | return gpa; |
952 | } | 951 | } |
@@ -1037,7 +1036,7 @@ asmlinkage void kvm_spurious_fault(void); | |||
1037 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 1036 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
1038 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 1037 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
1039 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); | 1038 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); |
1040 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 1039 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); |
1041 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 1040 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
1042 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 1041 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
1043 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | 1042 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); |
@@ -1046,6 +1045,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | |||
1046 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 1045 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
1047 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 1046 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
1048 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu); | 1047 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu); |
1048 | void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); | ||
1049 | void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | ||
1050 | unsigned long address); | ||
1049 | 1051 | ||
1050 | void kvm_define_shared_msr(unsigned index, u32 msr); | 1052 | void kvm_define_shared_msr(unsigned index, u32 msr); |
1051 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 1053 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c7678e43465b..e62cf897f781 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_KVM_PARA_H | 2 | #define _ASM_X86_KVM_PARA_H |
3 | 3 | ||
4 | #include <asm/processor.h> | 4 | #include <asm/processor.h> |
5 | #include <asm/alternative.h> | ||
5 | #include <uapi/asm/kvm_para.h> | 6 | #include <uapi/asm/kvm_para.h> |
6 | 7 | ||
7 | extern void kvmclock_init(void); | 8 | extern void kvmclock_init(void); |
@@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void) | |||
16 | } | 17 | } |
17 | #endif /* CONFIG_KVM_GUEST */ | 18 | #endif /* CONFIG_KVM_GUEST */ |
18 | 19 | ||
19 | /* This instruction is vmcall. On non-VT architectures, it will generate a | 20 | #ifdef CONFIG_DEBUG_RODATA |
20 | * trap that we will then rewrite to the appropriate instruction. | 21 | #define KVM_HYPERCALL \ |
22 | ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) | ||
23 | #else | ||
24 | /* On AMD processors, vmcall will generate a trap that we will | ||
25 | * then rewrite to the appropriate instruction. | ||
21 | */ | 26 | */ |
22 | #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" | 27 | #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" |
28 | #endif | ||
23 | 29 | ||
24 | /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall | 30 | /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall |
25 | * instruction. The hypervisor may replace it with something else but only the | 31 | * instruction. The hypervisor may replace it with something else but only the |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 60e5497681f5..813d29d00a17 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c) | |||
525 | } | 525 | } |
526 | #endif | 526 | #endif |
527 | 527 | ||
528 | /* | ||
529 | * This is only needed to tell the kernel whether to use VMCALL | ||
530 | * and VMMCALL. VMMCALL is never executed except under virt, so | ||
531 | * we can set it unconditionally. | ||
532 | */ | ||
533 | set_cpu_cap(c, X86_FEATURE_VMMCALL); | ||
534 | |||
528 | /* F16h erratum 793, CVE-2013-6885 */ | 535 | /* F16h erratum 793, CVE-2013-6885 */ |
529 | if (c->x86 == 0x16 && c->x86_model <= 0xf) | 536 | if (c->x86 == 0x16 && c->x86_model <= 0xf) |
530 | msr_set_bit(MSR_AMD64_LS_CFG, 15); | 537 | msr_set_bit(MSR_AMD64_LS_CFG, 15); |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 38a0afe83c6b..976e3a57f9ea 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -53,14 +53,14 @@ u64 kvm_supported_xcr0(void) | |||
53 | return xcr0; | 53 | return xcr0; |
54 | } | 54 | } |
55 | 55 | ||
56 | void kvm_update_cpuid(struct kvm_vcpu *vcpu) | 56 | int kvm_update_cpuid(struct kvm_vcpu *vcpu) |
57 | { | 57 | { |
58 | struct kvm_cpuid_entry2 *best; | 58 | struct kvm_cpuid_entry2 *best; |
59 | struct kvm_lapic *apic = vcpu->arch.apic; | 59 | struct kvm_lapic *apic = vcpu->arch.apic; |
60 | 60 | ||
61 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | 61 | best = kvm_find_cpuid_entry(vcpu, 1, 0); |
62 | if (!best) | 62 | if (!best) |
63 | return; | 63 | return 0; |
64 | 64 | ||
65 | /* Update OSXSAVE bit */ | 65 | /* Update OSXSAVE bit */ |
66 | if (cpu_has_xsave && best->function == 0x1) { | 66 | if (cpu_has_xsave && best->function == 0x1) { |
@@ -88,7 +88,17 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
88 | xstate_required_size(vcpu->arch.xcr0); | 88 | xstate_required_size(vcpu->arch.xcr0); |
89 | } | 89 | } |
90 | 90 | ||
91 | /* | ||
92 | * The existing code assumes virtual address is 48-bit in the canonical | ||
93 | * address checks; exit if it is ever changed. | ||
94 | */ | ||
95 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
96 | if (best && ((best->eax & 0xff00) >> 8) != 48 && | ||
97 | ((best->eax & 0xff00) >> 8) != 0) | ||
98 | return -EINVAL; | ||
99 | |||
91 | kvm_pmu_cpuid_update(vcpu); | 100 | kvm_pmu_cpuid_update(vcpu); |
101 | return 0; | ||
92 | } | 102 | } |
93 | 103 | ||
94 | static int is_efer_nx(void) | 104 | static int is_efer_nx(void) |
@@ -112,8 +122,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | |||
112 | break; | 122 | break; |
113 | } | 123 | } |
114 | } | 124 | } |
115 | if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { | 125 | if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) { |
116 | entry->edx &= ~(1 << 20); | 126 | entry->edx &= ~bit(X86_FEATURE_NX); |
117 | printk(KERN_INFO "kvm: guest NX capability removed\n"); | 127 | printk(KERN_INFO "kvm: guest NX capability removed\n"); |
118 | } | 128 | } |
119 | } | 129 | } |
@@ -151,10 +161,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
151 | } | 161 | } |
152 | vcpu->arch.cpuid_nent = cpuid->nent; | 162 | vcpu->arch.cpuid_nent = cpuid->nent; |
153 | cpuid_fix_nx_cap(vcpu); | 163 | cpuid_fix_nx_cap(vcpu); |
154 | r = 0; | ||
155 | kvm_apic_set_version(vcpu); | 164 | kvm_apic_set_version(vcpu); |
156 | kvm_x86_ops->cpuid_update(vcpu); | 165 | kvm_x86_ops->cpuid_update(vcpu); |
157 | kvm_update_cpuid(vcpu); | 166 | r = kvm_update_cpuid(vcpu); |
158 | 167 | ||
159 | out_free: | 168 | out_free: |
160 | vfree(cpuid_entries); | 169 | vfree(cpuid_entries); |
@@ -178,9 +187,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
178 | vcpu->arch.cpuid_nent = cpuid->nent; | 187 | vcpu->arch.cpuid_nent = cpuid->nent; |
179 | kvm_apic_set_version(vcpu); | 188 | kvm_apic_set_version(vcpu); |
180 | kvm_x86_ops->cpuid_update(vcpu); | 189 | kvm_x86_ops->cpuid_update(vcpu); |
181 | kvm_update_cpuid(vcpu); | 190 | r = kvm_update_cpuid(vcpu); |
182 | return 0; | ||
183 | |||
184 | out: | 191 | out: |
185 | return r; | 192 | return r; |
186 | } | 193 | } |
@@ -767,6 +774,12 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | |||
767 | if (!best) | 774 | if (!best) |
768 | best = check_cpuid_limit(vcpu, function, index); | 775 | best = check_cpuid_limit(vcpu, function, index); |
769 | 776 | ||
777 | /* | ||
778 | * Perfmon not yet supported for L2 guest. | ||
779 | */ | ||
780 | if (is_guest_mode(vcpu) && function == 0xa) | ||
781 | best = NULL; | ||
782 | |||
770 | if (best) { | 783 | if (best) { |
771 | *eax = best->eax; | 784 | *eax = best->eax; |
772 | *ebx = best->ebx; | 785 | *ebx = best->ebx; |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index a5380590ab0e..4452eedfaedd 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #include "x86.h" | 4 | #include "x86.h" |
5 | 5 | ||
6 | void kvm_update_cpuid(struct kvm_vcpu *vcpu); | 6 | int kvm_update_cpuid(struct kvm_vcpu *vcpu); |
7 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | 7 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, |
8 | u32 function, u32 index); | 8 | u32 function, u32 index); |
9 | int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, | 9 | int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, |
@@ -88,6 +88,14 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu) | |||
88 | return best && (best->ecx & bit(X86_FEATURE_X2APIC)); | 88 | return best && (best->ecx & bit(X86_FEATURE_X2APIC)); |
89 | } | 89 | } |
90 | 90 | ||
91 | static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) | ||
92 | { | ||
93 | struct kvm_cpuid_entry2 *best; | ||
94 | |||
95 | best = kvm_find_cpuid_entry(vcpu, 0, 0); | ||
96 | return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; | ||
97 | } | ||
98 | |||
91 | static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) | 99 | static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) |
92 | { | 100 | { |
93 | struct kvm_cpuid_entry2 *best; | 101 | struct kvm_cpuid_entry2 *best; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 03954f7900f5..a46207a05835 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -527,6 +527,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) | |||
527 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | 527 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, |
528 | u32 error, bool valid) | 528 | u32 error, bool valid) |
529 | { | 529 | { |
530 | WARN_ON(vec > 0x1f); | ||
530 | ctxt->exception.vector = vec; | 531 | ctxt->exception.vector = vec; |
531 | ctxt->exception.error_code = error; | 532 | ctxt->exception.error_code = error; |
532 | ctxt->exception.error_code_valid = valid; | 533 | ctxt->exception.error_code_valid = valid; |
@@ -1468,7 +1469,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1468 | return ret; | 1469 | return ret; |
1469 | 1470 | ||
1470 | err_code = selector & 0xfffc; | 1471 | err_code = selector & 0xfffc; |
1471 | err_vec = GP_VECTOR; | 1472 | err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR; |
1472 | 1473 | ||
1473 | /* can't load system descriptor into segment selector */ | 1474 | /* can't load system descriptor into segment selector */ |
1474 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | 1475 | if (seg <= VCPU_SREG_GS && !seg_desc.s) |
@@ -1503,6 +1504,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1503 | if (rpl > cpl || dpl != cpl) | 1504 | if (rpl > cpl || dpl != cpl) |
1504 | goto exception; | 1505 | goto exception; |
1505 | } | 1506 | } |
1507 | /* in long-mode d/b must be clear if l is set */ | ||
1508 | if (seg_desc.d && seg_desc.l) { | ||
1509 | u64 efer = 0; | ||
1510 | |||
1511 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
1512 | if (efer & EFER_LMA) | ||
1513 | goto exception; | ||
1514 | } | ||
1515 | |||
1506 | /* CS(RPL) <- CPL */ | 1516 | /* CS(RPL) <- CPL */ |
1507 | selector = (selector & 0xfffc) | cpl; | 1517 | selector = (selector & 0xfffc) | cpl; |
1508 | break; | 1518 | break; |
@@ -1549,8 +1559,7 @@ load: | |||
1549 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); | 1559 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); |
1550 | return X86EMUL_CONTINUE; | 1560 | return X86EMUL_CONTINUE; |
1551 | exception: | 1561 | exception: |
1552 | emulate_exception(ctxt, err_vec, err_code, true); | 1562 | return emulate_exception(ctxt, err_vec, err_code, true); |
1553 | return X86EMUL_PROPAGATE_FAULT; | ||
1554 | } | 1563 | } |
1555 | 1564 | ||
1556 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1565 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
@@ -2723,8 +2732,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2723 | if (!next_tss_desc.p || | 2732 | if (!next_tss_desc.p || |
2724 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | 2733 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || |
2725 | desc_limit < 0x2b)) { | 2734 | desc_limit < 0x2b)) { |
2726 | emulate_ts(ctxt, tss_selector & 0xfffc); | 2735 | return emulate_ts(ctxt, tss_selector & 0xfffc); |
2727 | return X86EMUL_PROPAGATE_FAULT; | ||
2728 | } | 2736 | } |
2729 | 2737 | ||
2730 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | 2738 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { |
@@ -3016,7 +3024,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) | |||
3016 | ctxt->dst.val = swab64(ctxt->src.val); | 3024 | ctxt->dst.val = swab64(ctxt->src.val); |
3017 | break; | 3025 | break; |
3018 | default: | 3026 | default: |
3019 | return X86EMUL_PROPAGATE_FAULT; | 3027 | BUG(); |
3020 | } | 3028 | } |
3021 | return X86EMUL_CONTINUE; | 3029 | return X86EMUL_CONTINUE; |
3022 | } | 3030 | } |
@@ -3140,12 +3148,8 @@ static int em_clts(struct x86_emulate_ctxt *ctxt) | |||
3140 | 3148 | ||
3141 | static int em_vmcall(struct x86_emulate_ctxt *ctxt) | 3149 | static int em_vmcall(struct x86_emulate_ctxt *ctxt) |
3142 | { | 3150 | { |
3143 | int rc; | 3151 | int rc = ctxt->ops->fix_hypercall(ctxt); |
3144 | |||
3145 | if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1) | ||
3146 | return X86EMUL_UNHANDLEABLE; | ||
3147 | 3152 | ||
3148 | rc = ctxt->ops->fix_hypercall(ctxt); | ||
3149 | if (rc != X86EMUL_CONTINUE) | 3153 | if (rc != X86EMUL_CONTINUE) |
3150 | return rc; | 3154 | return rc; |
3151 | 3155 | ||
@@ -3563,6 +3567,12 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3563 | F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ | 3567 | F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ |
3564 | F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) | 3568 | F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) |
3565 | 3569 | ||
3570 | static const struct opcode group7_rm0[] = { | ||
3571 | N, | ||
3572 | I(SrcNone | Priv | EmulateOnUD, em_vmcall), | ||
3573 | N, N, N, N, N, N, | ||
3574 | }; | ||
3575 | |||
3566 | static const struct opcode group7_rm1[] = { | 3576 | static const struct opcode group7_rm1[] = { |
3567 | DI(SrcNone | Priv, monitor), | 3577 | DI(SrcNone | Priv, monitor), |
3568 | DI(SrcNone | Priv, mwait), | 3578 | DI(SrcNone | Priv, mwait), |
@@ -3656,7 +3666,7 @@ static const struct group_dual group7 = { { | |||
3656 | II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), | 3666 | II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), |
3657 | II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), | 3667 | II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), |
3658 | }, { | 3668 | }, { |
3659 | I(SrcNone | Priv | EmulateOnUD, em_vmcall), | 3669 | EXT(0, group7_rm0), |
3660 | EXT(0, group7_rm1), | 3670 | EXT(0, group7_rm1), |
3661 | N, EXT(0, group7_rm3), | 3671 | N, EXT(0, group7_rm3), |
3662 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, | 3672 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, |
@@ -3687,14 +3697,18 @@ static const struct gprefix pfx_0f_6f_0f_7f = { | |||
3687 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), | 3697 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), |
3688 | }; | 3698 | }; |
3689 | 3699 | ||
3690 | static const struct gprefix pfx_vmovntpx = { | 3700 | static const struct gprefix pfx_0f_2b = { |
3691 | I(0, em_mov), N, N, N, | 3701 | I(0, em_mov), I(0, em_mov), N, N, |
3692 | }; | 3702 | }; |
3693 | 3703 | ||
3694 | static const struct gprefix pfx_0f_28_0f_29 = { | 3704 | static const struct gprefix pfx_0f_28_0f_29 = { |
3695 | I(Aligned, em_mov), I(Aligned, em_mov), N, N, | 3705 | I(Aligned, em_mov), I(Aligned, em_mov), N, N, |
3696 | }; | 3706 | }; |
3697 | 3707 | ||
3708 | static const struct gprefix pfx_0f_e7 = { | ||
3709 | N, I(Sse, em_mov), N, N, | ||
3710 | }; | ||
3711 | |||
3698 | static const struct escape escape_d9 = { { | 3712 | static const struct escape escape_d9 = { { |
3699 | N, N, N, N, N, N, N, I(DstMem, em_fnstcw), | 3713 | N, N, N, N, N, N, N, I(DstMem, em_fnstcw), |
3700 | }, { | 3714 | }, { |
@@ -3901,7 +3915,7 @@ static const struct opcode twobyte_table[256] = { | |||
3901 | N, N, N, N, | 3915 | N, N, N, N, |
3902 | GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), | 3916 | GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), |
3903 | GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), | 3917 | GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), |
3904 | N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), | 3918 | N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b), |
3905 | N, N, N, N, | 3919 | N, N, N, N, |
3906 | /* 0x30 - 0x3F */ | 3920 | /* 0x30 - 0x3F */ |
3907 | II(ImplicitOps | Priv, em_wrmsr, wrmsr), | 3921 | II(ImplicitOps | Priv, em_wrmsr, wrmsr), |
@@ -3965,7 +3979,8 @@ static const struct opcode twobyte_table[256] = { | |||
3965 | /* 0xD0 - 0xDF */ | 3979 | /* 0xD0 - 0xDF */ |
3966 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3980 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, |
3967 | /* 0xE0 - 0xEF */ | 3981 | /* 0xE0 - 0xEF */ |
3968 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3982 | N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7), |
3983 | N, N, N, N, N, N, N, N, | ||
3969 | /* 0xF0 - 0xFF */ | 3984 | /* 0xF0 - 0xFF */ |
3970 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N | 3985 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N |
3971 | }; | 3986 | }; |
@@ -4829,8 +4844,10 @@ writeback: | |||
4829 | ctxt->eip = ctxt->_eip; | 4844 | ctxt->eip = ctxt->_eip; |
4830 | 4845 | ||
4831 | done: | 4846 | done: |
4832 | if (rc == X86EMUL_PROPAGATE_FAULT) | 4847 | if (rc == X86EMUL_PROPAGATE_FAULT) { |
4848 | WARN_ON(ctxt->exception.vector > 0x1f); | ||
4833 | ctxt->have_exception = true; | 4849 | ctxt->have_exception = true; |
4850 | } | ||
4834 | if (rc == X86EMUL_INTERCEPTED) | 4851 | if (rc == X86EMUL_INTERCEPTED) |
4835 | return EMULATION_INTERCEPTED; | 4852 | return EMULATION_INTERCEPTED; |
4836 | 4853 | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 08e8a899e005..b8345dd41b25 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -112,17 +112,6 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap) | |||
112 | struct static_key_deferred apic_hw_disabled __read_mostly; | 112 | struct static_key_deferred apic_hw_disabled __read_mostly; |
113 | struct static_key_deferred apic_sw_disabled __read_mostly; | 113 | struct static_key_deferred apic_sw_disabled __read_mostly; |
114 | 114 | ||
115 | static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) | ||
116 | { | ||
117 | if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { | ||
118 | if (val & APIC_SPIV_APIC_ENABLED) | ||
119 | static_key_slow_dec_deferred(&apic_sw_disabled); | ||
120 | else | ||
121 | static_key_slow_inc(&apic_sw_disabled.key); | ||
122 | } | ||
123 | apic_set_reg(apic, APIC_SPIV, val); | ||
124 | } | ||
125 | |||
126 | static inline int apic_enabled(struct kvm_lapic *apic) | 115 | static inline int apic_enabled(struct kvm_lapic *apic) |
127 | { | 116 | { |
128 | return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); | 117 | return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); |
@@ -210,6 +199,20 @@ out: | |||
210 | kvm_vcpu_request_scan_ioapic(kvm); | 199 | kvm_vcpu_request_scan_ioapic(kvm); |
211 | } | 200 | } |
212 | 201 | ||
202 | static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) | ||
203 | { | ||
204 | u32 prev = kvm_apic_get_reg(apic, APIC_SPIV); | ||
205 | |||
206 | apic_set_reg(apic, APIC_SPIV, val); | ||
207 | if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) { | ||
208 | if (val & APIC_SPIV_APIC_ENABLED) { | ||
209 | static_key_slow_dec_deferred(&apic_sw_disabled); | ||
210 | recalculate_apic_map(apic->vcpu->kvm); | ||
211 | } else | ||
212 | static_key_slow_inc(&apic_sw_disabled.key); | ||
213 | } | ||
214 | } | ||
215 | |||
213 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) | 216 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) |
214 | { | 217 | { |
215 | apic_set_reg(apic, APIC_ID, id << 24); | 218 | apic_set_reg(apic, APIC_ID, id << 24); |
@@ -706,6 +709,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
706 | int result = 0; | 709 | int result = 0; |
707 | struct kvm_vcpu *vcpu = apic->vcpu; | 710 | struct kvm_vcpu *vcpu = apic->vcpu; |
708 | 711 | ||
712 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | ||
713 | trig_mode, vector); | ||
709 | switch (delivery_mode) { | 714 | switch (delivery_mode) { |
710 | case APIC_DM_LOWEST: | 715 | case APIC_DM_LOWEST: |
711 | vcpu->arch.apic_arb_prio++; | 716 | vcpu->arch.apic_arb_prio++; |
@@ -727,8 +732,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
727 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 732 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
728 | kvm_vcpu_kick(vcpu); | 733 | kvm_vcpu_kick(vcpu); |
729 | } | 734 | } |
730 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | ||
731 | trig_mode, vector, false); | ||
732 | break; | 735 | break; |
733 | 736 | ||
734 | case APIC_DM_REMRD: | 737 | case APIC_DM_REMRD: |
@@ -1352,6 +1355,9 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) | |||
1352 | return; | 1355 | return; |
1353 | 1356 | ||
1354 | hrtimer_cancel(&apic->lapic_timer.timer); | 1357 | hrtimer_cancel(&apic->lapic_timer.timer); |
1358 | /* Inject here so clearing tscdeadline won't override new value */ | ||
1359 | if (apic_has_pending_timer(vcpu)) | ||
1360 | kvm_inject_apic_timer_irqs(vcpu); | ||
1355 | apic->lapic_timer.tscdeadline = data; | 1361 | apic->lapic_timer.tscdeadline = data; |
1356 | start_apic_timer(apic); | 1362 | start_apic_timer(apic); |
1357 | } | 1363 | } |
@@ -1639,6 +1645,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) | |||
1639 | 1645 | ||
1640 | if (atomic_read(&apic->lapic_timer.pending) > 0) { | 1646 | if (atomic_read(&apic->lapic_timer.pending) > 0) { |
1641 | kvm_apic_local_deliver(apic, APIC_LVTT); | 1647 | kvm_apic_local_deliver(apic, APIC_LVTT); |
1648 | if (apic_lvtt_tscdeadline(apic)) | ||
1649 | apic->lapic_timer.tscdeadline = 0; | ||
1642 | atomic_set(&apic->lapic_timer.pending, 0); | 1650 | atomic_set(&apic->lapic_timer.pending, 0); |
1643 | } | 1651 | } |
1644 | } | 1652 | } |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 931467881da7..3201e93ebd07 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) | |||
199 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); | 199 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); |
200 | 200 | ||
201 | /* | 201 | /* |
202 | * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, | 202 | * the low bit of the generation number is always presumed to be zero. |
203 | * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation | 203 | * This disables mmio caching during memslot updates. The concept is |
204 | * number. | 204 | * similar to a seqcount but instead of retrying the access we just punt |
205 | * and ignore the cache. | ||
206 | * | ||
207 | * spte bits 3-11 are used as bits 1-9 of the generation number, | ||
208 | * the bits 52-61 are used as bits 10-19 of the generation number. | ||
205 | */ | 209 | */ |
206 | #define MMIO_SPTE_GEN_LOW_SHIFT 3 | 210 | #define MMIO_SPTE_GEN_LOW_SHIFT 2 |
207 | #define MMIO_SPTE_GEN_HIGH_SHIFT 52 | 211 | #define MMIO_SPTE_GEN_HIGH_SHIFT 52 |
208 | 212 | ||
209 | #define MMIO_GEN_SHIFT 19 | 213 | #define MMIO_GEN_SHIFT 20 |
210 | #define MMIO_GEN_LOW_SHIFT 9 | 214 | #define MMIO_GEN_LOW_SHIFT 10 |
211 | #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) | 215 | #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) |
212 | #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) | 216 | #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) |
213 | #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) | 217 | #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) |
214 | 218 | ||
@@ -236,12 +240,7 @@ static unsigned int get_mmio_spte_generation(u64 spte) | |||
236 | 240 | ||
237 | static unsigned int kvm_current_mmio_generation(struct kvm *kvm) | 241 | static unsigned int kvm_current_mmio_generation(struct kvm *kvm) |
238 | { | 242 | { |
239 | /* | 243 | return kvm_memslots(kvm)->generation & MMIO_GEN_MASK; |
240 | * Init kvm generation close to MMIO_MAX_GEN to easily test the | ||
241 | * code of handling generation number wrap-around. | ||
242 | */ | ||
243 | return (kvm_memslots(kvm)->generation + | ||
244 | MMIO_MAX_GEN - 150) & MMIO_GEN_MASK; | ||
245 | } | 244 | } |
246 | 245 | ||
247 | static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, | 246 | static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, |
@@ -296,11 +295,6 @@ static bool check_mmio_spte(struct kvm *kvm, u64 spte) | |||
296 | return likely(kvm_gen == spte_gen); | 295 | return likely(kvm_gen == spte_gen); |
297 | } | 296 | } |
298 | 297 | ||
299 | static inline u64 rsvd_bits(int s, int e) | ||
300 | { | ||
301 | return ((1ULL << (e - s + 1)) - 1) << s; | ||
302 | } | ||
303 | |||
304 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 298 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
305 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | 299 | u64 dirty_mask, u64 nx_mask, u64 x_mask) |
306 | { | 300 | { |
@@ -1180,7 +1174,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | |||
1180 | * Write-protect on the specified @sptep, @pt_protect indicates whether | 1174 | * Write-protect on the specified @sptep, @pt_protect indicates whether |
1181 | * spte write-protection is caused by protecting shadow page table. | 1175 | * spte write-protection is caused by protecting shadow page table. |
1182 | * | 1176 | * |
1183 | * Note: write protection is difference between drity logging and spte | 1177 | * Note: write protection is difference between dirty logging and spte |
1184 | * protection: | 1178 | * protection: |
1185 | * - for dirty logging, the spte can be set to writable at anytime if | 1179 | * - for dirty logging, the spte can be set to writable at anytime if |
1186 | * its dirty bitmap is properly set. | 1180 | * its dirty bitmap is properly set. |
@@ -1268,7 +1262,8 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
1268 | } | 1262 | } |
1269 | 1263 | ||
1270 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1264 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1271 | struct kvm_memory_slot *slot, unsigned long data) | 1265 | struct kvm_memory_slot *slot, gfn_t gfn, int level, |
1266 | unsigned long data) | ||
1272 | { | 1267 | { |
1273 | u64 *sptep; | 1268 | u64 *sptep; |
1274 | struct rmap_iterator iter; | 1269 | struct rmap_iterator iter; |
@@ -1276,7 +1271,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1276 | 1271 | ||
1277 | while ((sptep = rmap_get_first(*rmapp, &iter))) { | 1272 | while ((sptep = rmap_get_first(*rmapp, &iter))) { |
1278 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | 1273 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1279 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); | 1274 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n", |
1275 | sptep, *sptep, gfn, level); | ||
1280 | 1276 | ||
1281 | drop_spte(kvm, sptep); | 1277 | drop_spte(kvm, sptep); |
1282 | need_tlb_flush = 1; | 1278 | need_tlb_flush = 1; |
@@ -1286,7 +1282,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1286 | } | 1282 | } |
1287 | 1283 | ||
1288 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1284 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1289 | struct kvm_memory_slot *slot, unsigned long data) | 1285 | struct kvm_memory_slot *slot, gfn_t gfn, int level, |
1286 | unsigned long data) | ||
1290 | { | 1287 | { |
1291 | u64 *sptep; | 1288 | u64 *sptep; |
1292 | struct rmap_iterator iter; | 1289 | struct rmap_iterator iter; |
@@ -1300,7 +1297,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1300 | 1297 | ||
1301 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | 1298 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { |
1302 | BUG_ON(!is_shadow_present_pte(*sptep)); | 1299 | BUG_ON(!is_shadow_present_pte(*sptep)); |
1303 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep); | 1300 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", |
1301 | sptep, *sptep, gfn, level); | ||
1304 | 1302 | ||
1305 | need_flush = 1; | 1303 | need_flush = 1; |
1306 | 1304 | ||
@@ -1334,6 +1332,8 @@ static int kvm_handle_hva_range(struct kvm *kvm, | |||
1334 | int (*handler)(struct kvm *kvm, | 1332 | int (*handler)(struct kvm *kvm, |
1335 | unsigned long *rmapp, | 1333 | unsigned long *rmapp, |
1336 | struct kvm_memory_slot *slot, | 1334 | struct kvm_memory_slot *slot, |
1335 | gfn_t gfn, | ||
1336 | int level, | ||
1337 | unsigned long data)) | 1337 | unsigned long data)) |
1338 | { | 1338 | { |
1339 | int j; | 1339 | int j; |
@@ -1363,6 +1363,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, | |||
1363 | j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { | 1363 | j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { |
1364 | unsigned long idx, idx_end; | 1364 | unsigned long idx, idx_end; |
1365 | unsigned long *rmapp; | 1365 | unsigned long *rmapp; |
1366 | gfn_t gfn = gfn_start; | ||
1366 | 1367 | ||
1367 | /* | 1368 | /* |
1368 | * {idx(page_j) | page_j intersects with | 1369 | * {idx(page_j) | page_j intersects with |
@@ -1373,8 +1374,10 @@ static int kvm_handle_hva_range(struct kvm *kvm, | |||
1373 | 1374 | ||
1374 | rmapp = __gfn_to_rmap(gfn_start, j, memslot); | 1375 | rmapp = __gfn_to_rmap(gfn_start, j, memslot); |
1375 | 1376 | ||
1376 | for (; idx <= idx_end; ++idx) | 1377 | for (; idx <= idx_end; |
1377 | ret |= handler(kvm, rmapp++, memslot, data); | 1378 | ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j))) |
1379 | ret |= handler(kvm, rmapp++, memslot, | ||
1380 | gfn, j, data); | ||
1378 | } | 1381 | } |
1379 | } | 1382 | } |
1380 | 1383 | ||
@@ -1385,6 +1388,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
1385 | unsigned long data, | 1388 | unsigned long data, |
1386 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 1389 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, |
1387 | struct kvm_memory_slot *slot, | 1390 | struct kvm_memory_slot *slot, |
1391 | gfn_t gfn, int level, | ||
1388 | unsigned long data)) | 1392 | unsigned long data)) |
1389 | { | 1393 | { |
1390 | return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); | 1394 | return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); |
@@ -1406,24 +1410,14 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | |||
1406 | } | 1410 | } |
1407 | 1411 | ||
1408 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1412 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1409 | struct kvm_memory_slot *slot, unsigned long data) | 1413 | struct kvm_memory_slot *slot, gfn_t gfn, int level, |
1414 | unsigned long data) | ||
1410 | { | 1415 | { |
1411 | u64 *sptep; | 1416 | u64 *sptep; |
1412 | struct rmap_iterator uninitialized_var(iter); | 1417 | struct rmap_iterator uninitialized_var(iter); |
1413 | int young = 0; | 1418 | int young = 0; |
1414 | 1419 | ||
1415 | /* | 1420 | BUG_ON(!shadow_accessed_mask); |
1416 | * In case of absence of EPT Access and Dirty Bits supports, | ||
1417 | * emulate the accessed bit for EPT, by checking if this page has | ||
1418 | * an EPT mapping, and clearing it if it does. On the next access, | ||
1419 | * a new EPT mapping will be established. | ||
1420 | * This has some overhead, but not as much as the cost of swapping | ||
1421 | * out actively used pages or breaking up actively used hugepages. | ||
1422 | */ | ||
1423 | if (!shadow_accessed_mask) { | ||
1424 | young = kvm_unmap_rmapp(kvm, rmapp, slot, data); | ||
1425 | goto out; | ||
1426 | } | ||
1427 | 1421 | ||
1428 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 1422 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1429 | sptep = rmap_get_next(&iter)) { | 1423 | sptep = rmap_get_next(&iter)) { |
@@ -1435,14 +1429,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1435 | (unsigned long *)sptep); | 1429 | (unsigned long *)sptep); |
1436 | } | 1430 | } |
1437 | } | 1431 | } |
1438 | out: | 1432 | trace_kvm_age_page(gfn, level, slot, young); |
1439 | /* @data has hva passed to kvm_age_hva(). */ | ||
1440 | trace_kvm_age_page(data, slot, young); | ||
1441 | return young; | 1433 | return young; |
1442 | } | 1434 | } |
1443 | 1435 | ||
1444 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1436 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1445 | struct kvm_memory_slot *slot, unsigned long data) | 1437 | struct kvm_memory_slot *slot, gfn_t gfn, |
1438 | int level, unsigned long data) | ||
1446 | { | 1439 | { |
1447 | u64 *sptep; | 1440 | u64 *sptep; |
1448 | struct rmap_iterator iter; | 1441 | struct rmap_iterator iter; |
@@ -1480,13 +1473,33 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
1480 | 1473 | ||
1481 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 1474 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
1482 | 1475 | ||
1483 | kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); | 1476 | kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); |
1484 | kvm_flush_remote_tlbs(vcpu->kvm); | 1477 | kvm_flush_remote_tlbs(vcpu->kvm); |
1485 | } | 1478 | } |
1486 | 1479 | ||
1487 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 1480 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) |
1488 | { | 1481 | { |
1489 | return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); | 1482 | /* |
1483 | * In case of absence of EPT Access and Dirty Bits supports, | ||
1484 | * emulate the accessed bit for EPT, by checking if this page has | ||
1485 | * an EPT mapping, and clearing it if it does. On the next access, | ||
1486 | * a new EPT mapping will be established. | ||
1487 | * This has some overhead, but not as much as the cost of swapping | ||
1488 | * out actively used pages or breaking up actively used hugepages. | ||
1489 | */ | ||
1490 | if (!shadow_accessed_mask) { | ||
1491 | /* | ||
1492 | * We are holding the kvm->mmu_lock, and we are blowing up | ||
1493 | * shadow PTEs. MMU notifier consumers need to be kept at bay. | ||
1494 | * This is correct as long as we don't decouple the mmu_lock | ||
1495 | * protected regions (like invalidate_range_start|end does). | ||
1496 | */ | ||
1497 | kvm->mmu_notifier_seq++; | ||
1498 | return kvm_handle_hva_range(kvm, start, end, 0, | ||
1499 | kvm_unmap_rmapp); | ||
1500 | } | ||
1501 | |||
1502 | return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); | ||
1490 | } | 1503 | } |
1491 | 1504 | ||
1492 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | 1505 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) |
@@ -1749,7 +1762,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1749 | return 1; | 1762 | return 1; |
1750 | } | 1763 | } |
1751 | 1764 | ||
1752 | kvm_mmu_flush_tlb(vcpu); | 1765 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
1753 | return 0; | 1766 | return 0; |
1754 | } | 1767 | } |
1755 | 1768 | ||
@@ -1802,7 +1815,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
1802 | 1815 | ||
1803 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 1816 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
1804 | if (flush) | 1817 | if (flush) |
1805 | kvm_mmu_flush_tlb(vcpu); | 1818 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
1806 | } | 1819 | } |
1807 | 1820 | ||
1808 | struct mmu_page_path { | 1821 | struct mmu_page_path { |
@@ -2536,7 +2549,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2536 | true, host_writable)) { | 2549 | true, host_writable)) { |
2537 | if (write_fault) | 2550 | if (write_fault) |
2538 | *emulate = 1; | 2551 | *emulate = 1; |
2539 | kvm_mmu_flush_tlb(vcpu); | 2552 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
2540 | } | 2553 | } |
2541 | 2554 | ||
2542 | if (unlikely(is_mmio_spte(*sptep) && emulate)) | 2555 | if (unlikely(is_mmio_spte(*sptep) && emulate)) |
@@ -3163,7 +3176,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
3163 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 3176 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
3164 | return; | 3177 | return; |
3165 | 3178 | ||
3166 | vcpu_clear_mmio_info(vcpu, ~0ul); | 3179 | vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); |
3167 | kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); | 3180 | kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); |
3168 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 3181 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { |
3169 | hpa_t root = vcpu->arch.mmu.root_hpa; | 3182 | hpa_t root = vcpu->arch.mmu.root_hpa; |
@@ -3206,7 +3219,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
3206 | { | 3219 | { |
3207 | if (exception) | 3220 | if (exception) |
3208 | exception->error_code = 0; | 3221 | exception->error_code = 0; |
3209 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); | 3222 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception); |
3210 | } | 3223 | } |
3211 | 3224 | ||
3212 | static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) | 3225 | static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) |
@@ -3450,13 +3463,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, | |||
3450 | context->nx = false; | 3463 | context->nx = false; |
3451 | } | 3464 | } |
3452 | 3465 | ||
3453 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | ||
3454 | { | ||
3455 | ++vcpu->stat.tlb_flush; | ||
3456 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
3457 | } | ||
3458 | EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); | ||
3459 | |||
3460 | void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) | 3466 | void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) |
3461 | { | 3467 | { |
3462 | mmu_free_roots(vcpu); | 3468 | mmu_free_roots(vcpu); |
@@ -3518,6 +3524,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3518 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 3524 | int maxphyaddr = cpuid_maxphyaddr(vcpu); |
3519 | u64 exb_bit_rsvd = 0; | 3525 | u64 exb_bit_rsvd = 0; |
3520 | u64 gbpages_bit_rsvd = 0; | 3526 | u64 gbpages_bit_rsvd = 0; |
3527 | u64 nonleaf_bit8_rsvd = 0; | ||
3521 | 3528 | ||
3522 | context->bad_mt_xwr = 0; | 3529 | context->bad_mt_xwr = 0; |
3523 | 3530 | ||
@@ -3525,6 +3532,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3525 | exb_bit_rsvd = rsvd_bits(63, 63); | 3532 | exb_bit_rsvd = rsvd_bits(63, 63); |
3526 | if (!guest_cpuid_has_gbpages(vcpu)) | 3533 | if (!guest_cpuid_has_gbpages(vcpu)) |
3527 | gbpages_bit_rsvd = rsvd_bits(7, 7); | 3534 | gbpages_bit_rsvd = rsvd_bits(7, 7); |
3535 | |||
3536 | /* | ||
3537 | * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for | ||
3538 | * leaf entries) on AMD CPUs only. | ||
3539 | */ | ||
3540 | if (guest_cpuid_is_amd(vcpu)) | ||
3541 | nonleaf_bit8_rsvd = rsvd_bits(8, 8); | ||
3542 | |||
3528 | switch (context->root_level) { | 3543 | switch (context->root_level) { |
3529 | case PT32_ROOT_LEVEL: | 3544 | case PT32_ROOT_LEVEL: |
3530 | /* no rsvd bits for 2 level 4K page table entries */ | 3545 | /* no rsvd bits for 2 level 4K page table entries */ |
@@ -3559,9 +3574,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3559 | break; | 3574 | break; |
3560 | case PT64_ROOT_LEVEL: | 3575 | case PT64_ROOT_LEVEL: |
3561 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 3576 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
3562 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7); | 3577 | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); |
3563 | context->rsvd_bits_mask[0][2] = exb_bit_rsvd | | 3578 | context->rsvd_bits_mask[0][2] = exb_bit_rsvd | |
3564 | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); | 3579 | nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); |
3565 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | | 3580 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | |
3566 | rsvd_bits(maxphyaddr, 51); | 3581 | rsvd_bits(maxphyaddr, 51); |
3567 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | | 3582 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | |
@@ -3962,7 +3977,7 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, | |||
3962 | if (remote_flush) | 3977 | if (remote_flush) |
3963 | kvm_flush_remote_tlbs(vcpu->kvm); | 3978 | kvm_flush_remote_tlbs(vcpu->kvm); |
3964 | else if (local_flush) | 3979 | else if (local_flush) |
3965 | kvm_mmu_flush_tlb(vcpu); | 3980 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
3966 | } | 3981 | } |
3967 | 3982 | ||
3968 | static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, | 3983 | static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, |
@@ -4223,7 +4238,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | |||
4223 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | 4238 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) |
4224 | { | 4239 | { |
4225 | vcpu->arch.mmu.invlpg(vcpu, gva); | 4240 | vcpu->arch.mmu.invlpg(vcpu, gva); |
4226 | kvm_mmu_flush_tlb(vcpu); | 4241 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
4227 | ++vcpu->stat.invlpg; | 4242 | ++vcpu->stat.invlpg; |
4228 | } | 4243 | } |
4229 | EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); | 4244 | EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); |
@@ -4433,7 +4448,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) | |||
4433 | * The very rare case: if the generation-number is round, | 4448 | * The very rare case: if the generation-number is round, |
4434 | * zap all shadow pages. | 4449 | * zap all shadow pages. |
4435 | */ | 4450 | */ |
4436 | if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { | 4451 | if (unlikely(kvm_current_mmio_generation(kvm) == 0)) { |
4437 | printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); | 4452 | printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); |
4438 | kvm_mmu_invalidate_zap_all_pages(kvm); | 4453 | kvm_mmu_invalidate_zap_all_pages(kvm); |
4439 | } | 4454 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index b982112d2ca5..bde8ee725754 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -56,6 +56,11 @@ | |||
56 | #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) | 56 | #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) |
57 | #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) | 57 | #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) |
58 | 58 | ||
59 | static inline u64 rsvd_bits(int s, int e) | ||
60 | { | ||
61 | return ((1ULL << (e - s + 1)) - 1) << s; | ||
62 | } | ||
63 | |||
59 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); | 64 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); |
60 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); | 65 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); |
61 | 66 | ||
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 410776528265..806d58e3c320 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -298,8 +298,7 @@ retry_walk: | |||
298 | } | 298 | } |
299 | #endif | 299 | #endif |
300 | walker->max_level = walker->level; | 300 | walker->max_level = walker->level; |
301 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || | 301 | ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); |
302 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); | ||
303 | 302 | ||
304 | accessed_dirty = PT_GUEST_ACCESSED_MASK; | 303 | accessed_dirty = PT_GUEST_ACCESSED_MASK; |
305 | pt_access = pte_access = ACC_ALL; | 304 | pt_access = pte_access = ACC_ALL; |
@@ -321,9 +320,22 @@ retry_walk: | |||
321 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 320 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
322 | 321 | ||
323 | real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), | 322 | real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), |
324 | PFERR_USER_MASK|PFERR_WRITE_MASK); | 323 | PFERR_USER_MASK|PFERR_WRITE_MASK, |
324 | &walker->fault); | ||
325 | |||
326 | /* | ||
327 | * FIXME: This can happen if emulation (for of an INS/OUTS | ||
328 | * instruction) triggers a nested page fault. The exit | ||
329 | * qualification / exit info field will incorrectly have | ||
330 | * "guest page access" as the nested page fault's cause, | ||
331 | * instead of "guest page structure access". To fix this, | ||
332 | * the x86_exception struct should be augmented with enough | ||
333 | * information to fix the exit_qualification or exit_info_1 | ||
334 | * fields. | ||
335 | */ | ||
325 | if (unlikely(real_gfn == UNMAPPED_GVA)) | 336 | if (unlikely(real_gfn == UNMAPPED_GVA)) |
326 | goto error; | 337 | return 0; |
338 | |||
327 | real_gfn = gpa_to_gfn(real_gfn); | 339 | real_gfn = gpa_to_gfn(real_gfn); |
328 | 340 | ||
329 | host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, | 341 | host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, |
@@ -364,7 +376,7 @@ retry_walk: | |||
364 | if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) | 376 | if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) |
365 | gfn += pse36_gfn_delta(pte); | 377 | gfn += pse36_gfn_delta(pte); |
366 | 378 | ||
367 | real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); | 379 | real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault); |
368 | if (real_gpa == UNMAPPED_GVA) | 380 | if (real_gpa == UNMAPPED_GVA) |
369 | return 0; | 381 | return 0; |
370 | 382 | ||
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 3dd6accb64ec..8e6b7d869d2f 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/kvm_host.h> | 16 | #include <linux/kvm_host.h> |
17 | #include <linux/perf_event.h> | 17 | #include <linux/perf_event.h> |
18 | #include <asm/perf_event.h> | ||
18 | #include "x86.h" | 19 | #include "x86.h" |
19 | #include "cpuid.h" | 20 | #include "cpuid.h" |
20 | #include "lapic.h" | 21 | #include "lapic.h" |
@@ -463,7 +464,8 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
463 | { | 464 | { |
464 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 465 | struct kvm_pmu *pmu = &vcpu->arch.pmu; |
465 | struct kvm_cpuid_entry2 *entry; | 466 | struct kvm_cpuid_entry2 *entry; |
466 | unsigned bitmap_len; | 467 | union cpuid10_eax eax; |
468 | union cpuid10_edx edx; | ||
467 | 469 | ||
468 | pmu->nr_arch_gp_counters = 0; | 470 | pmu->nr_arch_gp_counters = 0; |
469 | pmu->nr_arch_fixed_counters = 0; | 471 | pmu->nr_arch_fixed_counters = 0; |
@@ -475,25 +477,27 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
475 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | 477 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); |
476 | if (!entry) | 478 | if (!entry) |
477 | return; | 479 | return; |
480 | eax.full = entry->eax; | ||
481 | edx.full = entry->edx; | ||
478 | 482 | ||
479 | pmu->version = entry->eax & 0xff; | 483 | pmu->version = eax.split.version_id; |
480 | if (!pmu->version) | 484 | if (!pmu->version) |
481 | return; | 485 | return; |
482 | 486 | ||
483 | pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, | 487 | pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, |
484 | INTEL_PMC_MAX_GENERIC); | 488 | INTEL_PMC_MAX_GENERIC); |
485 | pmu->counter_bitmask[KVM_PMC_GP] = | 489 | pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; |
486 | ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; | 490 | pmu->available_event_types = ~entry->ebx & |
487 | bitmap_len = (entry->eax >> 24) & 0xff; | 491 | ((1ull << eax.split.mask_length) - 1); |
488 | pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); | ||
489 | 492 | ||
490 | if (pmu->version == 1) { | 493 | if (pmu->version == 1) { |
491 | pmu->nr_arch_fixed_counters = 0; | 494 | pmu->nr_arch_fixed_counters = 0; |
492 | } else { | 495 | } else { |
493 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), | 496 | pmu->nr_arch_fixed_counters = |
497 | min_t(int, edx.split.num_counters_fixed, | ||
494 | INTEL_PMC_MAX_FIXED); | 498 | INTEL_PMC_MAX_FIXED); |
495 | pmu->counter_bitmask[KVM_PMC_FIXED] = | 499 | pmu->counter_bitmask[KVM_PMC_FIXED] = |
496 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; | 500 | ((u64)1 << edx.split.bit_width_fixed) - 1; |
497 | } | 501 | } |
498 | 502 | ||
499 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | | 503 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ddf742768ecf..f7f6a4a157a6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -622,7 +622,7 @@ static int has_svm(void) | |||
622 | return 1; | 622 | return 1; |
623 | } | 623 | } |
624 | 624 | ||
625 | static void svm_hardware_disable(void *garbage) | 625 | static void svm_hardware_disable(void) |
626 | { | 626 | { |
627 | /* Make sure we clean up behind us */ | 627 | /* Make sure we clean up behind us */ |
628 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) | 628 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) |
@@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage) | |||
633 | amd_pmu_disable_virt(); | 633 | amd_pmu_disable_virt(); |
634 | } | 634 | } |
635 | 635 | ||
636 | static int svm_hardware_enable(void *garbage) | 636 | static int svm_hardware_enable(void) |
637 | { | 637 | { |
638 | 638 | ||
639 | struct svm_cpu_data *sd; | 639 | struct svm_cpu_data *sd; |
@@ -1257,7 +1257,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1257 | svm->asid_generation = 0; | 1257 | svm->asid_generation = 0; |
1258 | init_vmcb(svm); | 1258 | init_vmcb(svm); |
1259 | 1259 | ||
1260 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 1260 | svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | |
1261 | MSR_IA32_APICBASE_ENABLE; | ||
1261 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 1262 | if (kvm_vcpu_is_bsp(&svm->vcpu)) |
1262 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1263 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1263 | 1264 | ||
@@ -1974,10 +1975,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, | |||
1974 | { | 1975 | { |
1975 | struct vcpu_svm *svm = to_svm(vcpu); | 1976 | struct vcpu_svm *svm = to_svm(vcpu); |
1976 | 1977 | ||
1977 | svm->vmcb->control.exit_code = SVM_EXIT_NPF; | 1978 | if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) { |
1978 | svm->vmcb->control.exit_code_hi = 0; | 1979 | /* |
1979 | svm->vmcb->control.exit_info_1 = fault->error_code; | 1980 | * TODO: track the cause of the nested page fault, and |
1980 | svm->vmcb->control.exit_info_2 = fault->address; | 1981 | * correctly fill in the high bits of exit_info_1. |
1982 | */ | ||
1983 | svm->vmcb->control.exit_code = SVM_EXIT_NPF; | ||
1984 | svm->vmcb->control.exit_code_hi = 0; | ||
1985 | svm->vmcb->control.exit_info_1 = (1ULL << 32); | ||
1986 | svm->vmcb->control.exit_info_2 = fault->address; | ||
1987 | } | ||
1988 | |||
1989 | svm->vmcb->control.exit_info_1 &= ~0xffffffffULL; | ||
1990 | svm->vmcb->control.exit_info_1 |= fault->error_code; | ||
1991 | |||
1992 | /* | ||
1993 | * The present bit is always zero for page structure faults on real | ||
1994 | * hardware. | ||
1995 | */ | ||
1996 | if (svm->vmcb->control.exit_info_1 & (2ULL << 32)) | ||
1997 | svm->vmcb->control.exit_info_1 &= ~1; | ||
1981 | 1998 | ||
1982 | nested_svm_vmexit(svm); | 1999 | nested_svm_vmexit(svm); |
1983 | } | 2000 | } |
@@ -3031,7 +3048,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) | |||
3031 | return 0; | 3048 | return 0; |
3032 | } | 3049 | } |
3033 | 3050 | ||
3034 | u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) | 3051 | static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) |
3035 | { | 3052 | { |
3036 | struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); | 3053 | struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); |
3037 | return vmcb->control.tsc_offset + | 3054 | return vmcb->control.tsc_offset + |
@@ -4305,6 +4322,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu) | |||
4305 | local_irq_enable(); | 4322 | local_irq_enable(); |
4306 | } | 4323 | } |
4307 | 4324 | ||
4325 | static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) | ||
4326 | { | ||
4327 | } | ||
4328 | |||
4308 | static struct kvm_x86_ops svm_x86_ops = { | 4329 | static struct kvm_x86_ops svm_x86_ops = { |
4309 | .cpu_has_kvm_support = has_svm, | 4330 | .cpu_has_kvm_support = has_svm, |
4310 | .disabled_by_bios = is_disabled, | 4331 | .disabled_by_bios = is_disabled, |
@@ -4349,7 +4370,6 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4349 | .cache_reg = svm_cache_reg, | 4370 | .cache_reg = svm_cache_reg, |
4350 | .get_rflags = svm_get_rflags, | 4371 | .get_rflags = svm_get_rflags, |
4351 | .set_rflags = svm_set_rflags, | 4372 | .set_rflags = svm_set_rflags, |
4352 | .fpu_activate = svm_fpu_activate, | ||
4353 | .fpu_deactivate = svm_fpu_deactivate, | 4373 | .fpu_deactivate = svm_fpu_deactivate, |
4354 | 4374 | ||
4355 | .tlb_flush = svm_flush_tlb, | 4375 | .tlb_flush = svm_flush_tlb, |
@@ -4406,6 +4426,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4406 | 4426 | ||
4407 | .check_intercept = svm_check_intercept, | 4427 | .check_intercept = svm_check_intercept, |
4408 | .handle_external_intr = svm_handle_external_intr, | 4428 | .handle_external_intr = svm_handle_external_intr, |
4429 | |||
4430 | .sched_in = svm_sched_in, | ||
4409 | }; | 4431 | }; |
4410 | 4432 | ||
4411 | static int __init svm_init(void) | 4433 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index e850a7d332be..6b06ab8748dd 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -415,15 +415,14 @@ TRACE_EVENT(kvm_apic_ipi, | |||
415 | ); | 415 | ); |
416 | 416 | ||
417 | TRACE_EVENT(kvm_apic_accept_irq, | 417 | TRACE_EVENT(kvm_apic_accept_irq, |
418 | TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced), | 418 | TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec), |
419 | TP_ARGS(apicid, dm, tm, vec, coalesced), | 419 | TP_ARGS(apicid, dm, tm, vec), |
420 | 420 | ||
421 | TP_STRUCT__entry( | 421 | TP_STRUCT__entry( |
422 | __field( __u32, apicid ) | 422 | __field( __u32, apicid ) |
423 | __field( __u16, dm ) | 423 | __field( __u16, dm ) |
424 | __field( __u8, tm ) | 424 | __field( __u8, tm ) |
425 | __field( __u8, vec ) | 425 | __field( __u8, vec ) |
426 | __field( bool, coalesced ) | ||
427 | ), | 426 | ), |
428 | 427 | ||
429 | TP_fast_assign( | 428 | TP_fast_assign( |
@@ -431,14 +430,12 @@ TRACE_EVENT(kvm_apic_accept_irq, | |||
431 | __entry->dm = dm; | 430 | __entry->dm = dm; |
432 | __entry->tm = tm; | 431 | __entry->tm = tm; |
433 | __entry->vec = vec; | 432 | __entry->vec = vec; |
434 | __entry->coalesced = coalesced; | ||
435 | ), | 433 | ), |
436 | 434 | ||
437 | TP_printk("apicid %x vec %u (%s|%s)%s", | 435 | TP_printk("apicid %x vec %u (%s|%s)", |
438 | __entry->apicid, __entry->vec, | 436 | __entry->apicid, __entry->vec, |
439 | __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), | 437 | __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), |
440 | __entry->tm ? "level" : "edge", | 438 | __entry->tm ? "level" : "edge") |
441 | __entry->coalesced ? " (coalesced)" : "") | ||
442 | ); | 439 | ); |
443 | 440 | ||
444 | TRACE_EVENT(kvm_eoi, | 441 | TRACE_EVENT(kvm_eoi, |
@@ -850,6 +847,36 @@ TRACE_EVENT(kvm_track_tsc, | |||
850 | 847 | ||
851 | #endif /* CONFIG_X86_64 */ | 848 | #endif /* CONFIG_X86_64 */ |
852 | 849 | ||
850 | TRACE_EVENT(kvm_ple_window, | ||
851 | TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), | ||
852 | TP_ARGS(grow, vcpu_id, new, old), | ||
853 | |||
854 | TP_STRUCT__entry( | ||
855 | __field( bool, grow ) | ||
856 | __field( unsigned int, vcpu_id ) | ||
857 | __field( int, new ) | ||
858 | __field( int, old ) | ||
859 | ), | ||
860 | |||
861 | TP_fast_assign( | ||
862 | __entry->grow = grow; | ||
863 | __entry->vcpu_id = vcpu_id; | ||
864 | __entry->new = new; | ||
865 | __entry->old = old; | ||
866 | ), | ||
867 | |||
868 | TP_printk("vcpu %u: ple_window %d (%s %d)", | ||
869 | __entry->vcpu_id, | ||
870 | __entry->new, | ||
871 | __entry->grow ? "grow" : "shrink", | ||
872 | __entry->old) | ||
873 | ); | ||
874 | |||
875 | #define trace_kvm_ple_window_grow(vcpu_id, new, old) \ | ||
876 | trace_kvm_ple_window(true, vcpu_id, new, old) | ||
877 | #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \ | ||
878 | trace_kvm_ple_window(false, vcpu_id, new, old) | ||
879 | |||
853 | #endif /* _TRACE_KVM_H */ | 880 | #endif /* _TRACE_KVM_H */ |
854 | 881 | ||
855 | #undef TRACE_INCLUDE_PATH | 882 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bfe11cf124a1..04fa1b8298c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO); | |||
125 | * Time is measured based on a counter that runs at the same rate as the TSC, | 125 | * Time is measured based on a counter that runs at the same rate as the TSC, |
126 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | 126 | * refer SDM volume 3b section 21.6.13 & 22.1.3. |
127 | */ | 127 | */ |
128 | #define KVM_VMX_DEFAULT_PLE_GAP 128 | 128 | #define KVM_VMX_DEFAULT_PLE_GAP 128 |
129 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | 129 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 |
130 | #define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2 | ||
131 | #define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0 | ||
132 | #define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \ | ||
133 | INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW | ||
134 | |||
130 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | 135 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; |
131 | module_param(ple_gap, int, S_IRUGO); | 136 | module_param(ple_gap, int, S_IRUGO); |
132 | 137 | ||
133 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | 138 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
134 | module_param(ple_window, int, S_IRUGO); | 139 | module_param(ple_window, int, S_IRUGO); |
135 | 140 | ||
141 | /* Default doubles per-vcpu window every exit. */ | ||
142 | static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW; | ||
143 | module_param(ple_window_grow, int, S_IRUGO); | ||
144 | |||
145 | /* Default resets per-vcpu window every exit to ple_window. */ | ||
146 | static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK; | ||
147 | module_param(ple_window_shrink, int, S_IRUGO); | ||
148 | |||
149 | /* Default is to compute the maximum so we can never overflow. */ | ||
150 | static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; | ||
151 | static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; | ||
152 | module_param(ple_window_max, int, S_IRUGO); | ||
153 | |||
136 | extern const ulong vmx_return; | 154 | extern const ulong vmx_return; |
137 | 155 | ||
138 | #define NR_AUTOLOAD_MSRS 8 | 156 | #define NR_AUTOLOAD_MSRS 8 |
@@ -379,6 +397,7 @@ struct nested_vmx { | |||
379 | * we must keep them pinned while L2 runs. | 397 | * we must keep them pinned while L2 runs. |
380 | */ | 398 | */ |
381 | struct page *apic_access_page; | 399 | struct page *apic_access_page; |
400 | struct page *virtual_apic_page; | ||
382 | u64 msr_ia32_feature_control; | 401 | u64 msr_ia32_feature_control; |
383 | 402 | ||
384 | struct hrtimer preemption_timer; | 403 | struct hrtimer preemption_timer; |
@@ -484,6 +503,10 @@ struct vcpu_vmx { | |||
484 | 503 | ||
485 | /* Support for a guest hypervisor (nested VMX) */ | 504 | /* Support for a guest hypervisor (nested VMX) */ |
486 | struct nested_vmx nested; | 505 | struct nested_vmx nested; |
506 | |||
507 | /* Dynamic PLE window. */ | ||
508 | int ple_window; | ||
509 | bool ple_window_dirty; | ||
487 | }; | 510 | }; |
488 | 511 | ||
489 | enum segment_cache_field { | 512 | enum segment_cache_field { |
@@ -533,6 +556,7 @@ static int max_shadow_read_only_fields = | |||
533 | ARRAY_SIZE(shadow_read_only_fields); | 556 | ARRAY_SIZE(shadow_read_only_fields); |
534 | 557 | ||
535 | static unsigned long shadow_read_write_fields[] = { | 558 | static unsigned long shadow_read_write_fields[] = { |
559 | TPR_THRESHOLD, | ||
536 | GUEST_RIP, | 560 | GUEST_RIP, |
537 | GUEST_RSP, | 561 | GUEST_RSP, |
538 | GUEST_CR0, | 562 | GUEST_CR0, |
@@ -743,6 +767,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); | |||
743 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); | 767 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); |
744 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); | 768 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); |
745 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); | 769 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); |
770 | static int alloc_identity_pagetable(struct kvm *kvm); | ||
746 | 771 | ||
747 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 772 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
748 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 773 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -2135,7 +2160,7 @@ static u64 guest_read_tsc(void) | |||
2135 | * Like guest_read_tsc, but always returns L1's notion of the timestamp | 2160 | * Like guest_read_tsc, but always returns L1's notion of the timestamp |
2136 | * counter, even if a nested guest (L2) is currently running. | 2161 | * counter, even if a nested guest (L2) is currently running. |
2137 | */ | 2162 | */ |
2138 | u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) | 2163 | static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) |
2139 | { | 2164 | { |
2140 | u64 tsc_offset; | 2165 | u64 tsc_offset; |
2141 | 2166 | ||
@@ -2330,7 +2355,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2330 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | | 2355 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | |
2331 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | | 2356 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | |
2332 | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | | 2357 | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | |
2333 | CPU_BASED_PAUSE_EXITING | | 2358 | CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW | |
2334 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 2359 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
2335 | /* | 2360 | /* |
2336 | * We can allow some features even when not supported by the | 2361 | * We can allow some features even when not supported by the |
@@ -2601,6 +2626,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2601 | break; | 2626 | break; |
2602 | case MSR_IA32_CR_PAT: | 2627 | case MSR_IA32_CR_PAT: |
2603 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | 2628 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
2629 | if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) | ||
2630 | return 1; | ||
2604 | vmcs_write64(GUEST_IA32_PAT, data); | 2631 | vmcs_write64(GUEST_IA32_PAT, data); |
2605 | vcpu->arch.pat = data; | 2632 | vcpu->arch.pat = data; |
2606 | break; | 2633 | break; |
@@ -2704,7 +2731,7 @@ static void kvm_cpu_vmxon(u64 addr) | |||
2704 | : "memory", "cc"); | 2731 | : "memory", "cc"); |
2705 | } | 2732 | } |
2706 | 2733 | ||
2707 | static int hardware_enable(void *garbage) | 2734 | static int hardware_enable(void) |
2708 | { | 2735 | { |
2709 | int cpu = raw_smp_processor_id(); | 2736 | int cpu = raw_smp_processor_id(); |
2710 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 2737 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
@@ -2768,7 +2795,7 @@ static void kvm_cpu_vmxoff(void) | |||
2768 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); | 2795 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); |
2769 | } | 2796 | } |
2770 | 2797 | ||
2771 | static void hardware_disable(void *garbage) | 2798 | static void hardware_disable(void) |
2772 | { | 2799 | { |
2773 | if (vmm_exclusive) { | 2800 | if (vmm_exclusive) { |
2774 | vmclear_local_loaded_vmcss(); | 2801 | vmclear_local_loaded_vmcss(); |
@@ -3107,9 +3134,17 @@ static __init int hardware_setup(void) | |||
3107 | if (!cpu_has_vmx_unrestricted_guest()) | 3134 | if (!cpu_has_vmx_unrestricted_guest()) |
3108 | enable_unrestricted_guest = 0; | 3135 | enable_unrestricted_guest = 0; |
3109 | 3136 | ||
3110 | if (!cpu_has_vmx_flexpriority()) | 3137 | if (!cpu_has_vmx_flexpriority()) { |
3111 | flexpriority_enabled = 0; | 3138 | flexpriority_enabled = 0; |
3112 | 3139 | ||
3140 | /* | ||
3141 | * set_apic_access_page_addr() is used to reload apic access | ||
3142 | * page upon invalidation. No need to do anything if the | ||
3143 | * processor does not have the APIC_ACCESS_ADDR VMCS field. | ||
3144 | */ | ||
3145 | kvm_x86_ops->set_apic_access_page_addr = NULL; | ||
3146 | } | ||
3147 | |||
3113 | if (!cpu_has_vmx_tpr_shadow()) | 3148 | if (!cpu_has_vmx_tpr_shadow()) |
3114 | kvm_x86_ops->update_cr8_intercept = NULL; | 3149 | kvm_x86_ops->update_cr8_intercept = NULL; |
3115 | 3150 | ||
@@ -3905,7 +3940,7 @@ static int init_rmode_tss(struct kvm *kvm) | |||
3905 | { | 3940 | { |
3906 | gfn_t fn; | 3941 | gfn_t fn; |
3907 | u16 data = 0; | 3942 | u16 data = 0; |
3908 | int r, idx, ret = 0; | 3943 | int idx, r; |
3909 | 3944 | ||
3910 | idx = srcu_read_lock(&kvm->srcu); | 3945 | idx = srcu_read_lock(&kvm->srcu); |
3911 | fn = kvm->arch.tss_addr >> PAGE_SHIFT; | 3946 | fn = kvm->arch.tss_addr >> PAGE_SHIFT; |
@@ -3927,32 +3962,32 @@ static int init_rmode_tss(struct kvm *kvm) | |||
3927 | r = kvm_write_guest_page(kvm, fn, &data, | 3962 | r = kvm_write_guest_page(kvm, fn, &data, |
3928 | RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, | 3963 | RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, |
3929 | sizeof(u8)); | 3964 | sizeof(u8)); |
3930 | if (r < 0) | ||
3931 | goto out; | ||
3932 | |||
3933 | ret = 1; | ||
3934 | out: | 3965 | out: |
3935 | srcu_read_unlock(&kvm->srcu, idx); | 3966 | srcu_read_unlock(&kvm->srcu, idx); |
3936 | return ret; | 3967 | return r; |
3937 | } | 3968 | } |
3938 | 3969 | ||
3939 | static int init_rmode_identity_map(struct kvm *kvm) | 3970 | static int init_rmode_identity_map(struct kvm *kvm) |
3940 | { | 3971 | { |
3941 | int i, idx, r, ret; | 3972 | int i, idx, r = 0; |
3942 | pfn_t identity_map_pfn; | 3973 | pfn_t identity_map_pfn; |
3943 | u32 tmp; | 3974 | u32 tmp; |
3944 | 3975 | ||
3945 | if (!enable_ept) | 3976 | if (!enable_ept) |
3946 | return 1; | ||
3947 | if (unlikely(!kvm->arch.ept_identity_pagetable)) { | ||
3948 | printk(KERN_ERR "EPT: identity-mapping pagetable " | ||
3949 | "haven't been allocated!\n"); | ||
3950 | return 0; | 3977 | return 0; |
3951 | } | 3978 | |
3979 | /* Protect kvm->arch.ept_identity_pagetable_done. */ | ||
3980 | mutex_lock(&kvm->slots_lock); | ||
3981 | |||
3952 | if (likely(kvm->arch.ept_identity_pagetable_done)) | 3982 | if (likely(kvm->arch.ept_identity_pagetable_done)) |
3953 | return 1; | 3983 | goto out2; |
3954 | ret = 0; | 3984 | |
3955 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; | 3985 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; |
3986 | |||
3987 | r = alloc_identity_pagetable(kvm); | ||
3988 | if (r < 0) | ||
3989 | goto out2; | ||
3990 | |||
3956 | idx = srcu_read_lock(&kvm->srcu); | 3991 | idx = srcu_read_lock(&kvm->srcu); |
3957 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); | 3992 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); |
3958 | if (r < 0) | 3993 | if (r < 0) |
@@ -3967,10 +4002,13 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
3967 | goto out; | 4002 | goto out; |
3968 | } | 4003 | } |
3969 | kvm->arch.ept_identity_pagetable_done = true; | 4004 | kvm->arch.ept_identity_pagetable_done = true; |
3970 | ret = 1; | 4005 | |
3971 | out: | 4006 | out: |
3972 | srcu_read_unlock(&kvm->srcu, idx); | 4007 | srcu_read_unlock(&kvm->srcu, idx); |
3973 | return ret; | 4008 | |
4009 | out2: | ||
4010 | mutex_unlock(&kvm->slots_lock); | ||
4011 | return r; | ||
3974 | } | 4012 | } |
3975 | 4013 | ||
3976 | static void seg_setup(int seg) | 4014 | static void seg_setup(int seg) |
@@ -3995,23 +4033,28 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
3995 | int r = 0; | 4033 | int r = 0; |
3996 | 4034 | ||
3997 | mutex_lock(&kvm->slots_lock); | 4035 | mutex_lock(&kvm->slots_lock); |
3998 | if (kvm->arch.apic_access_page) | 4036 | if (kvm->arch.apic_access_page_done) |
3999 | goto out; | 4037 | goto out; |
4000 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | 4038 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; |
4001 | kvm_userspace_mem.flags = 0; | 4039 | kvm_userspace_mem.flags = 0; |
4002 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; | 4040 | kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE; |
4003 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 4041 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
4004 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); | 4042 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); |
4005 | if (r) | 4043 | if (r) |
4006 | goto out; | 4044 | goto out; |
4007 | 4045 | ||
4008 | page = gfn_to_page(kvm, 0xfee00); | 4046 | page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); |
4009 | if (is_error_page(page)) { | 4047 | if (is_error_page(page)) { |
4010 | r = -EFAULT; | 4048 | r = -EFAULT; |
4011 | goto out; | 4049 | goto out; |
4012 | } | 4050 | } |
4013 | 4051 | ||
4014 | kvm->arch.apic_access_page = page; | 4052 | /* |
4053 | * Do not pin the page in memory, so that memory hot-unplug | ||
4054 | * is able to migrate it. | ||
4055 | */ | ||
4056 | put_page(page); | ||
4057 | kvm->arch.apic_access_page_done = true; | ||
4015 | out: | 4058 | out: |
4016 | mutex_unlock(&kvm->slots_lock); | 4059 | mutex_unlock(&kvm->slots_lock); |
4017 | return r; | 4060 | return r; |
@@ -4019,31 +4062,20 @@ out: | |||
4019 | 4062 | ||
4020 | static int alloc_identity_pagetable(struct kvm *kvm) | 4063 | static int alloc_identity_pagetable(struct kvm *kvm) |
4021 | { | 4064 | { |
4022 | struct page *page; | 4065 | /* Called with kvm->slots_lock held. */ |
4066 | |||
4023 | struct kvm_userspace_memory_region kvm_userspace_mem; | 4067 | struct kvm_userspace_memory_region kvm_userspace_mem; |
4024 | int r = 0; | 4068 | int r = 0; |
4025 | 4069 | ||
4026 | mutex_lock(&kvm->slots_lock); | 4070 | BUG_ON(kvm->arch.ept_identity_pagetable_done); |
4027 | if (kvm->arch.ept_identity_pagetable) | 4071 | |
4028 | goto out; | ||
4029 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | 4072 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; |
4030 | kvm_userspace_mem.flags = 0; | 4073 | kvm_userspace_mem.flags = 0; |
4031 | kvm_userspace_mem.guest_phys_addr = | 4074 | kvm_userspace_mem.guest_phys_addr = |
4032 | kvm->arch.ept_identity_map_addr; | 4075 | kvm->arch.ept_identity_map_addr; |
4033 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 4076 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
4034 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); | 4077 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); |
4035 | if (r) | ||
4036 | goto out; | ||
4037 | |||
4038 | page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); | ||
4039 | if (is_error_page(page)) { | ||
4040 | r = -EFAULT; | ||
4041 | goto out; | ||
4042 | } | ||
4043 | 4078 | ||
4044 | kvm->arch.ept_identity_pagetable = page; | ||
4045 | out: | ||
4046 | mutex_unlock(&kvm->slots_lock); | ||
4047 | return r; | 4079 | return r; |
4048 | } | 4080 | } |
4049 | 4081 | ||
@@ -4402,7 +4434,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4402 | 4434 | ||
4403 | if (ple_gap) { | 4435 | if (ple_gap) { |
4404 | vmcs_write32(PLE_GAP, ple_gap); | 4436 | vmcs_write32(PLE_GAP, ple_gap); |
4405 | vmcs_write32(PLE_WINDOW, ple_window); | 4437 | vmx->ple_window = ple_window; |
4438 | vmx->ple_window_dirty = true; | ||
4406 | } | 4439 | } |
4407 | 4440 | ||
4408 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); | 4441 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); |
@@ -4477,7 +4510,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4477 | 4510 | ||
4478 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 4511 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
4479 | kvm_set_cr8(&vmx->vcpu, 0); | 4512 | kvm_set_cr8(&vmx->vcpu, 0); |
4480 | apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 4513 | apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; |
4481 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4514 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) |
4482 | apic_base_msr.data |= MSR_IA32_APICBASE_BSP; | 4515 | apic_base_msr.data |= MSR_IA32_APICBASE_BSP; |
4483 | apic_base_msr.host_initiated = true; | 4516 | apic_base_msr.host_initiated = true; |
@@ -4537,9 +4570,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4537 | vmcs_write32(TPR_THRESHOLD, 0); | 4570 | vmcs_write32(TPR_THRESHOLD, 0); |
4538 | } | 4571 | } |
4539 | 4572 | ||
4540 | if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) | 4573 | kvm_vcpu_reload_apic_access_page(vcpu); |
4541 | vmcs_write64(APIC_ACCESS_ADDR, | ||
4542 | page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); | ||
4543 | 4574 | ||
4544 | if (vmx_vm_has_apicv(vcpu->kvm)) | 4575 | if (vmx_vm_has_apicv(vcpu->kvm)) |
4545 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); | 4576 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); |
@@ -4729,10 +4760,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
4729 | if (ret) | 4760 | if (ret) |
4730 | return ret; | 4761 | return ret; |
4731 | kvm->arch.tss_addr = addr; | 4762 | kvm->arch.tss_addr = addr; |
4732 | if (!init_rmode_tss(kvm)) | 4763 | return init_rmode_tss(kvm); |
4733 | return -ENOMEM; | ||
4734 | |||
4735 | return 0; | ||
4736 | } | 4764 | } |
4737 | 4765 | ||
4738 | static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) | 4766 | static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) |
@@ -5521,17 +5549,18 @@ static u64 ept_rsvd_mask(u64 spte, int level) | |||
5521 | for (i = 51; i > boot_cpu_data.x86_phys_bits; i--) | 5549 | for (i = 51; i > boot_cpu_data.x86_phys_bits; i--) |
5522 | mask |= (1ULL << i); | 5550 | mask |= (1ULL << i); |
5523 | 5551 | ||
5524 | if (level > 2) | 5552 | if (level == 4) |
5525 | /* bits 7:3 reserved */ | 5553 | /* bits 7:3 reserved */ |
5526 | mask |= 0xf8; | 5554 | mask |= 0xf8; |
5527 | else if (level == 2) { | 5555 | else if (spte & (1ULL << 7)) |
5528 | if (spte & (1ULL << 7)) | 5556 | /* |
5529 | /* 2MB ref, bits 20:12 reserved */ | 5557 | * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively, |
5530 | mask |= 0x1ff000; | 5558 | * level == 1 if the hypervisor is using the ignored bit 7. |
5531 | else | 5559 | */ |
5532 | /* bits 6:3 reserved */ | 5560 | mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE; |
5533 | mask |= 0x78; | 5561 | else if (level > 1) |
5534 | } | 5562 | /* bits 6:3 reserved */ |
5563 | mask |= 0x78; | ||
5535 | 5564 | ||
5536 | return mask; | 5565 | return mask; |
5537 | } | 5566 | } |
@@ -5561,7 +5590,8 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, | |||
5561 | WARN_ON(1); | 5590 | WARN_ON(1); |
5562 | } | 5591 | } |
5563 | 5592 | ||
5564 | if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) { | 5593 | /* bits 5:3 are _not_ reserved for large page or leaf page */ |
5594 | if ((rsvd_bits & 0x38) == 0) { | ||
5565 | u64 ept_mem_type = (spte & 0x38) >> 3; | 5595 | u64 ept_mem_type = (spte & 0x38) >> 3; |
5566 | 5596 | ||
5567 | if (ept_mem_type == 2 || ept_mem_type == 3 || | 5597 | if (ept_mem_type == 2 || ept_mem_type == 3 || |
@@ -5676,12 +5706,85 @@ out: | |||
5676 | return ret; | 5706 | return ret; |
5677 | } | 5707 | } |
5678 | 5708 | ||
5709 | static int __grow_ple_window(int val) | ||
5710 | { | ||
5711 | if (ple_window_grow < 1) | ||
5712 | return ple_window; | ||
5713 | |||
5714 | val = min(val, ple_window_actual_max); | ||
5715 | |||
5716 | if (ple_window_grow < ple_window) | ||
5717 | val *= ple_window_grow; | ||
5718 | else | ||
5719 | val += ple_window_grow; | ||
5720 | |||
5721 | return val; | ||
5722 | } | ||
5723 | |||
5724 | static int __shrink_ple_window(int val, int modifier, int minimum) | ||
5725 | { | ||
5726 | if (modifier < 1) | ||
5727 | return ple_window; | ||
5728 | |||
5729 | if (modifier < ple_window) | ||
5730 | val /= modifier; | ||
5731 | else | ||
5732 | val -= modifier; | ||
5733 | |||
5734 | return max(val, minimum); | ||
5735 | } | ||
5736 | |||
5737 | static void grow_ple_window(struct kvm_vcpu *vcpu) | ||
5738 | { | ||
5739 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5740 | int old = vmx->ple_window; | ||
5741 | |||
5742 | vmx->ple_window = __grow_ple_window(old); | ||
5743 | |||
5744 | if (vmx->ple_window != old) | ||
5745 | vmx->ple_window_dirty = true; | ||
5746 | |||
5747 | trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old); | ||
5748 | } | ||
5749 | |||
5750 | static void shrink_ple_window(struct kvm_vcpu *vcpu) | ||
5751 | { | ||
5752 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5753 | int old = vmx->ple_window; | ||
5754 | |||
5755 | vmx->ple_window = __shrink_ple_window(old, | ||
5756 | ple_window_shrink, ple_window); | ||
5757 | |||
5758 | if (vmx->ple_window != old) | ||
5759 | vmx->ple_window_dirty = true; | ||
5760 | |||
5761 | trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old); | ||
5762 | } | ||
5763 | |||
5764 | /* | ||
5765 | * ple_window_actual_max is computed to be one grow_ple_window() below | ||
5766 | * ple_window_max. (See __grow_ple_window for the reason.) | ||
5767 | * This prevents overflows, because ple_window_max is int. | ||
5768 | * ple_window_max effectively rounded down to a multiple of ple_window_grow in | ||
5769 | * this process. | ||
5770 | * ple_window_max is also prevented from setting vmx->ple_window < ple_window. | ||
5771 | */ | ||
5772 | static void update_ple_window_actual_max(void) | ||
5773 | { | ||
5774 | ple_window_actual_max = | ||
5775 | __shrink_ple_window(max(ple_window_max, ple_window), | ||
5776 | ple_window_grow, INT_MIN); | ||
5777 | } | ||
5778 | |||
5679 | /* | 5779 | /* |
5680 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE | 5780 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE |
5681 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. | 5781 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. |
5682 | */ | 5782 | */ |
5683 | static int handle_pause(struct kvm_vcpu *vcpu) | 5783 | static int handle_pause(struct kvm_vcpu *vcpu) |
5684 | { | 5784 | { |
5785 | if (ple_gap) | ||
5786 | grow_ple_window(vcpu); | ||
5787 | |||
5685 | skip_emulated_instruction(vcpu); | 5788 | skip_emulated_instruction(vcpu); |
5686 | kvm_vcpu_on_spin(vcpu); | 5789 | kvm_vcpu_on_spin(vcpu); |
5687 | 5790 | ||
@@ -6146,7 +6249,11 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
6146 | /* Unpin physical memory we referred to in current vmcs02 */ | 6249 | /* Unpin physical memory we referred to in current vmcs02 */ |
6147 | if (vmx->nested.apic_access_page) { | 6250 | if (vmx->nested.apic_access_page) { |
6148 | nested_release_page(vmx->nested.apic_access_page); | 6251 | nested_release_page(vmx->nested.apic_access_page); |
6149 | vmx->nested.apic_access_page = 0; | 6252 | vmx->nested.apic_access_page = NULL; |
6253 | } | ||
6254 | if (vmx->nested.virtual_apic_page) { | ||
6255 | nested_release_page(vmx->nested.virtual_apic_page); | ||
6256 | vmx->nested.virtual_apic_page = NULL; | ||
6150 | } | 6257 | } |
6151 | 6258 | ||
6152 | nested_free_all_saved_vmcss(vmx); | 6259 | nested_free_all_saved_vmcss(vmx); |
@@ -6617,7 +6724,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
6617 | switch (type) { | 6724 | switch (type) { |
6618 | case VMX_EPT_EXTENT_GLOBAL: | 6725 | case VMX_EPT_EXTENT_GLOBAL: |
6619 | kvm_mmu_sync_roots(vcpu); | 6726 | kvm_mmu_sync_roots(vcpu); |
6620 | kvm_mmu_flush_tlb(vcpu); | 6727 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
6621 | nested_vmx_succeed(vcpu); | 6728 | nested_vmx_succeed(vcpu); |
6622 | break; | 6729 | break; |
6623 | default: | 6730 | default: |
@@ -6892,6 +6999,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6892 | case EXIT_REASON_TASK_SWITCH: | 6999 | case EXIT_REASON_TASK_SWITCH: |
6893 | return 1; | 7000 | return 1; |
6894 | case EXIT_REASON_CPUID: | 7001 | case EXIT_REASON_CPUID: |
7002 | if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) | ||
7003 | return 0; | ||
6895 | return 1; | 7004 | return 1; |
6896 | case EXIT_REASON_HLT: | 7005 | case EXIT_REASON_HLT: |
6897 | return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); | 7006 | return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); |
@@ -6936,7 +7045,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6936 | case EXIT_REASON_MCE_DURING_VMENTRY: | 7045 | case EXIT_REASON_MCE_DURING_VMENTRY: |
6937 | return 0; | 7046 | return 0; |
6938 | case EXIT_REASON_TPR_BELOW_THRESHOLD: | 7047 | case EXIT_REASON_TPR_BELOW_THRESHOLD: |
6939 | return 1; | 7048 | return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); |
6940 | case EXIT_REASON_APIC_ACCESS: | 7049 | case EXIT_REASON_APIC_ACCESS: |
6941 | return nested_cpu_has2(vmcs12, | 7050 | return nested_cpu_has2(vmcs12, |
6942 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 7051 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); |
@@ -7057,6 +7166,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
7057 | 7166 | ||
7058 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | 7167 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) |
7059 | { | 7168 | { |
7169 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
7170 | |||
7171 | if (is_guest_mode(vcpu) && | ||
7172 | nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) | ||
7173 | return; | ||
7174 | |||
7060 | if (irr == -1 || tpr < irr) { | 7175 | if (irr == -1 || tpr < irr) { |
7061 | vmcs_write32(TPR_THRESHOLD, 0); | 7176 | vmcs_write32(TPR_THRESHOLD, 0); |
7062 | return; | 7177 | return; |
@@ -7094,6 +7209,29 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | |||
7094 | vmx_set_msr_bitmap(vcpu); | 7209 | vmx_set_msr_bitmap(vcpu); |
7095 | } | 7210 | } |
7096 | 7211 | ||
7212 | static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) | ||
7213 | { | ||
7214 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
7215 | |||
7216 | /* | ||
7217 | * Currently we do not handle the nested case where L2 has an | ||
7218 | * APIC access page of its own; that page is still pinned. | ||
7219 | * Hence, we skip the case where the VCPU is in guest mode _and_ | ||
7220 | * L1 prepared an APIC access page for L2. | ||
7221 | * | ||
7222 | * For the case where L1 and L2 share the same APIC access page | ||
7223 | * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear | ||
7224 | * in the vmcs12), this function will only update either the vmcs01 | ||
7225 | * or the vmcs02. If the former, the vmcs02 will be updated by | ||
7226 | * prepare_vmcs02. If the latter, the vmcs01 will be updated in | ||
7227 | * the next L2->L1 exit. | ||
7228 | */ | ||
7229 | if (!is_guest_mode(vcpu) || | ||
7230 | !nested_cpu_has2(vmx->nested.current_vmcs12, | ||
7231 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | ||
7232 | vmcs_write64(APIC_ACCESS_ADDR, hpa); | ||
7233 | } | ||
7234 | |||
7097 | static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) | 7235 | static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) |
7098 | { | 7236 | { |
7099 | u16 status; | 7237 | u16 status; |
@@ -7387,6 +7525,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
7387 | if (vmx->emulation_required) | 7525 | if (vmx->emulation_required) |
7388 | return; | 7526 | return; |
7389 | 7527 | ||
7528 | if (vmx->ple_window_dirty) { | ||
7529 | vmx->ple_window_dirty = false; | ||
7530 | vmcs_write32(PLE_WINDOW, vmx->ple_window); | ||
7531 | } | ||
7532 | |||
7390 | if (vmx->nested.sync_shadow_vmcs) { | 7533 | if (vmx->nested.sync_shadow_vmcs) { |
7391 | copy_vmcs12_to_shadow(vmx); | 7534 | copy_vmcs12_to_shadow(vmx); |
7392 | vmx->nested.sync_shadow_vmcs = false; | 7535 | vmx->nested.sync_shadow_vmcs = false; |
@@ -7642,10 +7785,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
7642 | if (!kvm->arch.ept_identity_map_addr) | 7785 | if (!kvm->arch.ept_identity_map_addr) |
7643 | kvm->arch.ept_identity_map_addr = | 7786 | kvm->arch.ept_identity_map_addr = |
7644 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | 7787 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; |
7645 | err = -ENOMEM; | 7788 | err = init_rmode_identity_map(kvm); |
7646 | if (alloc_identity_pagetable(kvm) != 0) | 7789 | if (err) |
7647 | goto free_vmcs; | ||
7648 | if (!init_rmode_identity_map(kvm)) | ||
7649 | goto free_vmcs; | 7790 | goto free_vmcs; |
7650 | } | 7791 | } |
7651 | 7792 | ||
@@ -7824,6 +7965,55 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | |||
7824 | kvm_inject_page_fault(vcpu, fault); | 7965 | kvm_inject_page_fault(vcpu, fault); |
7825 | } | 7966 | } |
7826 | 7967 | ||
7968 | static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | ||
7969 | struct vmcs12 *vmcs12) | ||
7970 | { | ||
7971 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
7972 | |||
7973 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | ||
7974 | /* TODO: Also verify bits beyond physical address width are 0 */ | ||
7975 | if (!PAGE_ALIGNED(vmcs12->apic_access_addr)) | ||
7976 | return false; | ||
7977 | |||
7978 | /* | ||
7979 | * Translate L1 physical address to host physical | ||
7980 | * address for vmcs02. Keep the page pinned, so this | ||
7981 | * physical address remains valid. We keep a reference | ||
7982 | * to it so we can release it later. | ||
7983 | */ | ||
7984 | if (vmx->nested.apic_access_page) /* shouldn't happen */ | ||
7985 | nested_release_page(vmx->nested.apic_access_page); | ||
7986 | vmx->nested.apic_access_page = | ||
7987 | nested_get_page(vcpu, vmcs12->apic_access_addr); | ||
7988 | } | ||
7989 | |||
7990 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { | ||
7991 | /* TODO: Also verify bits beyond physical address width are 0 */ | ||
7992 | if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr)) | ||
7993 | return false; | ||
7994 | |||
7995 | if (vmx->nested.virtual_apic_page) /* shouldn't happen */ | ||
7996 | nested_release_page(vmx->nested.virtual_apic_page); | ||
7997 | vmx->nested.virtual_apic_page = | ||
7998 | nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); | ||
7999 | |||
8000 | /* | ||
8001 | * Failing the vm entry is _not_ what the processor does | ||
8002 | * but it's basically the only possibility we have. | ||
8003 | * We could still enter the guest if CR8 load exits are | ||
8004 | * enabled, CR8 store exits are enabled, and virtualize APIC | ||
8005 | * access is disabled; in this case the processor would never | ||
8006 | * use the TPR shadow and we could simply clear the bit from | ||
8007 | * the execution control. But such a configuration is useless, | ||
8008 | * so let's keep the code simple. | ||
8009 | */ | ||
8010 | if (!vmx->nested.virtual_apic_page) | ||
8011 | return false; | ||
8012 | } | ||
8013 | |||
8014 | return true; | ||
8015 | } | ||
8016 | |||
7827 | static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) | 8017 | static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) |
7828 | { | 8018 | { |
7829 | u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; | 8019 | u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; |
@@ -7849,7 +8039,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) | |||
7849 | /* | 8039 | /* |
7850 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested | 8040 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested |
7851 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it | 8041 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it |
7852 | * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2 | 8042 | * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 |
7853 | * guest in a way that will both be appropriate to L1's requests, and our | 8043 | * guest in a way that will both be appropriate to L1's requests, and our |
7854 | * needs. In addition to modifying the active vmcs (which is vmcs02), this | 8044 | * needs. In addition to modifying the active vmcs (which is vmcs02), this |
7855 | * function also has additional necessary side-effects, like setting various | 8045 | * function also has additional necessary side-effects, like setting various |
@@ -7970,16 +8160,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7970 | 8160 | ||
7971 | if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { | 8161 | if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { |
7972 | /* | 8162 | /* |
7973 | * Translate L1 physical address to host physical | ||
7974 | * address for vmcs02. Keep the page pinned, so this | ||
7975 | * physical address remains valid. We keep a reference | ||
7976 | * to it so we can release it later. | ||
7977 | */ | ||
7978 | if (vmx->nested.apic_access_page) /* shouldn't happen */ | ||
7979 | nested_release_page(vmx->nested.apic_access_page); | ||
7980 | vmx->nested.apic_access_page = | ||
7981 | nested_get_page(vcpu, vmcs12->apic_access_addr); | ||
7982 | /* | ||
7983 | * If translation failed, no matter: This feature asks | 8163 | * If translation failed, no matter: This feature asks |
7984 | * to exit when accessing the given address, and if it | 8164 | * to exit when accessing the given address, and if it |
7985 | * can never be accessed, this feature won't do | 8165 | * can never be accessed, this feature won't do |
@@ -7994,8 +8174,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7994 | } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { | 8174 | } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { |
7995 | exec_control |= | 8175 | exec_control |= |
7996 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 8176 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
7997 | vmcs_write64(APIC_ACCESS_ADDR, | 8177 | kvm_vcpu_reload_apic_access_page(vcpu); |
7998 | page_to_phys(vcpu->kvm->arch.apic_access_page)); | ||
7999 | } | 8178 | } |
8000 | 8179 | ||
8001 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 8180 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
@@ -8024,6 +8203,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8024 | exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; | 8203 | exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; |
8025 | exec_control &= ~CPU_BASED_TPR_SHADOW; | 8204 | exec_control &= ~CPU_BASED_TPR_SHADOW; |
8026 | exec_control |= vmcs12->cpu_based_vm_exec_control; | 8205 | exec_control |= vmcs12->cpu_based_vm_exec_control; |
8206 | |||
8207 | if (exec_control & CPU_BASED_TPR_SHADOW) { | ||
8208 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | ||
8209 | page_to_phys(vmx->nested.virtual_apic_page)); | ||
8210 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); | ||
8211 | } | ||
8212 | |||
8027 | /* | 8213 | /* |
8028 | * Merging of IO and MSR bitmaps not currently supported. | 8214 | * Merging of IO and MSR bitmaps not currently supported. |
8029 | * Rather, exit every time. | 8215 | * Rather, exit every time. |
@@ -8185,8 +8371,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8185 | return 1; | 8371 | return 1; |
8186 | } | 8372 | } |
8187 | 8373 | ||
8188 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && | 8374 | if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { |
8189 | !PAGE_ALIGNED(vmcs12->apic_access_addr)) { | ||
8190 | /*TODO: Also verify bits beyond physical address width are 0*/ | 8375 | /*TODO: Also verify bits beyond physical address width are 0*/ |
8191 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 8376 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
8192 | return 1; | 8377 | return 1; |
@@ -8790,10 +8975,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
8790 | /* Unpin physical memory we referred to in vmcs02 */ | 8975 | /* Unpin physical memory we referred to in vmcs02 */ |
8791 | if (vmx->nested.apic_access_page) { | 8976 | if (vmx->nested.apic_access_page) { |
8792 | nested_release_page(vmx->nested.apic_access_page); | 8977 | nested_release_page(vmx->nested.apic_access_page); |
8793 | vmx->nested.apic_access_page = 0; | 8978 | vmx->nested.apic_access_page = NULL; |
8979 | } | ||
8980 | if (vmx->nested.virtual_apic_page) { | ||
8981 | nested_release_page(vmx->nested.virtual_apic_page); | ||
8982 | vmx->nested.virtual_apic_page = NULL; | ||
8794 | } | 8983 | } |
8795 | 8984 | ||
8796 | /* | 8985 | /* |
8986 | * We are now running in L2, mmu_notifier will force to reload the | ||
8987 | * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1. | ||
8988 | */ | ||
8989 | kvm_vcpu_reload_apic_access_page(vcpu); | ||
8990 | |||
8991 | /* | ||
8797 | * Exiting from L2 to L1, we're now back to L1 which thinks it just | 8992 | * Exiting from L2 to L1, we're now back to L1 which thinks it just |
8798 | * finished a VMLAUNCH or VMRESUME instruction, so we need to set the | 8993 | * finished a VMLAUNCH or VMRESUME instruction, so we need to set the |
8799 | * success or failure flag accordingly. | 8994 | * success or failure flag accordingly. |
@@ -8846,6 +9041,12 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, | |||
8846 | return X86EMUL_CONTINUE; | 9041 | return X86EMUL_CONTINUE; |
8847 | } | 9042 | } |
8848 | 9043 | ||
9044 | static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) | ||
9045 | { | ||
9046 | if (ple_gap) | ||
9047 | shrink_ple_window(vcpu); | ||
9048 | } | ||
9049 | |||
8849 | static struct kvm_x86_ops vmx_x86_ops = { | 9050 | static struct kvm_x86_ops vmx_x86_ops = { |
8850 | .cpu_has_kvm_support = cpu_has_kvm_support, | 9051 | .cpu_has_kvm_support = cpu_has_kvm_support, |
8851 | .disabled_by_bios = vmx_disabled_by_bios, | 9052 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -8890,7 +9091,6 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
8890 | .cache_reg = vmx_cache_reg, | 9091 | .cache_reg = vmx_cache_reg, |
8891 | .get_rflags = vmx_get_rflags, | 9092 | .get_rflags = vmx_get_rflags, |
8892 | .set_rflags = vmx_set_rflags, | 9093 | .set_rflags = vmx_set_rflags, |
8893 | .fpu_activate = vmx_fpu_activate, | ||
8894 | .fpu_deactivate = vmx_fpu_deactivate, | 9094 | .fpu_deactivate = vmx_fpu_deactivate, |
8895 | 9095 | ||
8896 | .tlb_flush = vmx_flush_tlb, | 9096 | .tlb_flush = vmx_flush_tlb, |
@@ -8913,6 +9113,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
8913 | .enable_irq_window = enable_irq_window, | 9113 | .enable_irq_window = enable_irq_window, |
8914 | .update_cr8_intercept = update_cr8_intercept, | 9114 | .update_cr8_intercept = update_cr8_intercept, |
8915 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, | 9115 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, |
9116 | .set_apic_access_page_addr = vmx_set_apic_access_page_addr, | ||
8916 | .vm_has_apicv = vmx_vm_has_apicv, | 9117 | .vm_has_apicv = vmx_vm_has_apicv, |
8917 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | 9118 | .load_eoi_exitmap = vmx_load_eoi_exitmap, |
8918 | .hwapic_irr_update = vmx_hwapic_irr_update, | 9119 | .hwapic_irr_update = vmx_hwapic_irr_update, |
@@ -8951,6 +9152,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
8951 | .mpx_supported = vmx_mpx_supported, | 9152 | .mpx_supported = vmx_mpx_supported, |
8952 | 9153 | ||
8953 | .check_nested_events = vmx_check_nested_events, | 9154 | .check_nested_events = vmx_check_nested_events, |
9155 | |||
9156 | .sched_in = vmx_sched_in, | ||
8954 | }; | 9157 | }; |
8955 | 9158 | ||
8956 | static int __init vmx_init(void) | 9159 | static int __init vmx_init(void) |
@@ -9065,6 +9268,8 @@ static int __init vmx_init(void) | |||
9065 | } else | 9268 | } else |
9066 | kvm_disable_tdp(); | 9269 | kvm_disable_tdp(); |
9067 | 9270 | ||
9271 | update_ple_window_actual_max(); | ||
9272 | |||
9068 | return 0; | 9273 | return 0; |
9069 | 9274 | ||
9070 | out7: | 9275 | out7: |
@@ -9098,7 +9303,7 @@ static void __exit vmx_exit(void) | |||
9098 | free_page((unsigned long)vmx_vmread_bitmap); | 9303 | free_page((unsigned long)vmx_vmread_bitmap); |
9099 | 9304 | ||
9100 | #ifdef CONFIG_KEXEC | 9305 | #ifdef CONFIG_KEXEC |
9101 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); | 9306 | RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); |
9102 | synchronize_rcu(); | 9307 | synchronize_rcu(); |
9103 | #endif | 9308 | #endif |
9104 | 9309 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f1e22d3b286..5430e4b0af29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | |||
246 | } | 246 | } |
247 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); | 247 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); |
248 | 248 | ||
249 | static void drop_user_return_notifiers(void *ignore) | 249 | static void drop_user_return_notifiers(void) |
250 | { | 250 | { |
251 | unsigned int cpu = smp_processor_id(); | 251 | unsigned int cpu = smp_processor_id(); |
252 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); | 252 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); |
@@ -408,12 +408,14 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | |||
408 | } | 408 | } |
409 | EXPORT_SYMBOL_GPL(kvm_inject_page_fault); | 409 | EXPORT_SYMBOL_GPL(kvm_inject_page_fault); |
410 | 410 | ||
411 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | 411 | static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) |
412 | { | 412 | { |
413 | if (mmu_is_nested(vcpu) && !fault->nested_page_fault) | 413 | if (mmu_is_nested(vcpu) && !fault->nested_page_fault) |
414 | vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); | 414 | vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); |
415 | else | 415 | else |
416 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); | 416 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); |
417 | |||
418 | return fault->nested_page_fault; | ||
417 | } | 419 | } |
418 | 420 | ||
419 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 421 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -457,11 +459,12 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
457 | gfn_t ngfn, void *data, int offset, int len, | 459 | gfn_t ngfn, void *data, int offset, int len, |
458 | u32 access) | 460 | u32 access) |
459 | { | 461 | { |
462 | struct x86_exception exception; | ||
460 | gfn_t real_gfn; | 463 | gfn_t real_gfn; |
461 | gpa_t ngpa; | 464 | gpa_t ngpa; |
462 | 465 | ||
463 | ngpa = gfn_to_gpa(ngfn); | 466 | ngpa = gfn_to_gpa(ngfn); |
464 | real_gfn = mmu->translate_gpa(vcpu, ngpa, access); | 467 | real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception); |
465 | if (real_gfn == UNMAPPED_GVA) | 468 | if (real_gfn == UNMAPPED_GVA) |
466 | return -EFAULT; | 469 | return -EFAULT; |
467 | 470 | ||
@@ -726,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
726 | { | 729 | { |
727 | if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { | 730 | if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { |
728 | kvm_mmu_sync_roots(vcpu); | 731 | kvm_mmu_sync_roots(vcpu); |
729 | kvm_mmu_flush_tlb(vcpu); | 732 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
730 | return 0; | 733 | return 0; |
731 | } | 734 | } |
732 | 735 | ||
@@ -1518,7 +1521,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
1518 | pvclock_update_vm_gtod_copy(kvm); | 1521 | pvclock_update_vm_gtod_copy(kvm); |
1519 | 1522 | ||
1520 | kvm_for_each_vcpu(i, vcpu, kvm) | 1523 | kvm_for_each_vcpu(i, vcpu, kvm) |
1521 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | 1524 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
1522 | 1525 | ||
1523 | /* guest entries allowed */ | 1526 | /* guest entries allowed */ |
1524 | kvm_for_each_vcpu(i, vcpu, kvm) | 1527 | kvm_for_each_vcpu(i, vcpu, kvm) |
@@ -1661,7 +1664,7 @@ static void kvmclock_update_fn(struct work_struct *work) | |||
1661 | struct kvm_vcpu *vcpu; | 1664 | struct kvm_vcpu *vcpu; |
1662 | 1665 | ||
1663 | kvm_for_each_vcpu(i, vcpu, kvm) { | 1666 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1664 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | 1667 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
1665 | kvm_vcpu_kick(vcpu); | 1668 | kvm_vcpu_kick(vcpu); |
1666 | } | 1669 | } |
1667 | } | 1670 | } |
@@ -1670,7 +1673,7 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) | |||
1670 | { | 1673 | { |
1671 | struct kvm *kvm = v->kvm; | 1674 | struct kvm *kvm = v->kvm; |
1672 | 1675 | ||
1673 | set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests); | 1676 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); |
1674 | schedule_delayed_work(&kvm->arch.kvmclock_update_work, | 1677 | schedule_delayed_work(&kvm->arch.kvmclock_update_work, |
1675 | KVMCLOCK_UPDATE_DELAY); | 1678 | KVMCLOCK_UPDATE_DELAY); |
1676 | } | 1679 | } |
@@ -1723,9 +1726,10 @@ static bool valid_mtrr_type(unsigned t) | |||
1723 | return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ | 1726 | return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ |
1724 | } | 1727 | } |
1725 | 1728 | ||
1726 | static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1729 | bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1727 | { | 1730 | { |
1728 | int i; | 1731 | int i; |
1732 | u64 mask; | ||
1729 | 1733 | ||
1730 | if (!msr_mtrr_valid(msr)) | 1734 | if (!msr_mtrr_valid(msr)) |
1731 | return false; | 1735 | return false; |
@@ -1747,14 +1751,31 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1747 | } | 1751 | } |
1748 | 1752 | ||
1749 | /* variable MTRRs */ | 1753 | /* variable MTRRs */ |
1750 | return valid_mtrr_type(data & 0xff); | 1754 | WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR)); |
1755 | |||
1756 | mask = (~0ULL) << cpuid_maxphyaddr(vcpu); | ||
1757 | if ((msr & 1) == 0) { | ||
1758 | /* MTRR base */ | ||
1759 | if (!valid_mtrr_type(data & 0xff)) | ||
1760 | return false; | ||
1761 | mask |= 0xf00; | ||
1762 | } else | ||
1763 | /* MTRR mask */ | ||
1764 | mask |= 0x7ff; | ||
1765 | if (data & mask) { | ||
1766 | kvm_inject_gp(vcpu, 0); | ||
1767 | return false; | ||
1768 | } | ||
1769 | |||
1770 | return true; | ||
1751 | } | 1771 | } |
1772 | EXPORT_SYMBOL_GPL(kvm_mtrr_valid); | ||
1752 | 1773 | ||
1753 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1774 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1754 | { | 1775 | { |
1755 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; | 1776 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; |
1756 | 1777 | ||
1757 | if (!mtrr_valid(vcpu, msr, data)) | 1778 | if (!kvm_mtrr_valid(vcpu, msr, data)) |
1758 | return 1; | 1779 | return 1; |
1759 | 1780 | ||
1760 | if (msr == MSR_MTRRdefType) { | 1781 | if (msr == MSR_MTRRdefType) { |
@@ -1805,7 +1826,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1805 | break; | 1826 | break; |
1806 | default: | 1827 | default: |
1807 | if (msr >= MSR_IA32_MC0_CTL && | 1828 | if (msr >= MSR_IA32_MC0_CTL && |
1808 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { | 1829 | msr < MSR_IA32_MCx_CTL(bank_num)) { |
1809 | u32 offset = msr - MSR_IA32_MC0_CTL; | 1830 | u32 offset = msr - MSR_IA32_MC0_CTL; |
1810 | /* only 0 or all 1s can be written to IA32_MCi_CTL | 1831 | /* only 0 or all 1s can be written to IA32_MCi_CTL |
1811 | * some Linux kernels though clear bit 10 in bank 4 to | 1832 | * some Linux kernels though clear bit 10 in bank 4 to |
@@ -2164,7 +2185,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2164 | 2185 | ||
2165 | case MSR_IA32_MCG_CTL: | 2186 | case MSR_IA32_MCG_CTL: |
2166 | case MSR_IA32_MCG_STATUS: | 2187 | case MSR_IA32_MCG_STATUS: |
2167 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 2188 | case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: |
2168 | return set_msr_mce(vcpu, msr, data); | 2189 | return set_msr_mce(vcpu, msr, data); |
2169 | 2190 | ||
2170 | /* Performance counters are not protected by a CPUID bit, | 2191 | /* Performance counters are not protected by a CPUID bit, |
@@ -2330,7 +2351,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2330 | break; | 2351 | break; |
2331 | default: | 2352 | default: |
2332 | if (msr >= MSR_IA32_MC0_CTL && | 2353 | if (msr >= MSR_IA32_MC0_CTL && |
2333 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { | 2354 | msr < MSR_IA32_MCx_CTL(bank_num)) { |
2334 | u32 offset = msr - MSR_IA32_MC0_CTL; | 2355 | u32 offset = msr - MSR_IA32_MC0_CTL; |
2335 | data = vcpu->arch.mce_banks[offset]; | 2356 | data = vcpu->arch.mce_banks[offset]; |
2336 | break; | 2357 | break; |
@@ -2419,7 +2440,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2419 | case MSR_K7_HWCR: | 2440 | case MSR_K7_HWCR: |
2420 | case MSR_VM_HSAVE_PA: | 2441 | case MSR_VM_HSAVE_PA: |
2421 | case MSR_K7_EVNTSEL0: | 2442 | case MSR_K7_EVNTSEL0: |
2443 | case MSR_K7_EVNTSEL1: | ||
2444 | case MSR_K7_EVNTSEL2: | ||
2445 | case MSR_K7_EVNTSEL3: | ||
2422 | case MSR_K7_PERFCTR0: | 2446 | case MSR_K7_PERFCTR0: |
2447 | case MSR_K7_PERFCTR1: | ||
2448 | case MSR_K7_PERFCTR2: | ||
2449 | case MSR_K7_PERFCTR3: | ||
2423 | case MSR_K8_INT_PENDING_MSG: | 2450 | case MSR_K8_INT_PENDING_MSG: |
2424 | case MSR_AMD64_NB_CFG: | 2451 | case MSR_AMD64_NB_CFG: |
2425 | case MSR_FAM10H_MMIO_CONF_BASE: | 2452 | case MSR_FAM10H_MMIO_CONF_BASE: |
@@ -2505,7 +2532,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2505 | case MSR_IA32_MCG_CAP: | 2532 | case MSR_IA32_MCG_CAP: |
2506 | case MSR_IA32_MCG_CTL: | 2533 | case MSR_IA32_MCG_CTL: |
2507 | case MSR_IA32_MCG_STATUS: | 2534 | case MSR_IA32_MCG_STATUS: |
2508 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 2535 | case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: |
2509 | return get_msr_mce(vcpu, msr, pdata); | 2536 | return get_msr_mce(vcpu, msr, pdata); |
2510 | case MSR_K7_CLK_CTL: | 2537 | case MSR_K7_CLK_CTL: |
2511 | /* | 2538 | /* |
@@ -2823,7 +2850,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2823 | if (unlikely(vcpu->arch.tsc_offset_adjustment)) { | 2850 | if (unlikely(vcpu->arch.tsc_offset_adjustment)) { |
2824 | adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); | 2851 | adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); |
2825 | vcpu->arch.tsc_offset_adjustment = 0; | 2852 | vcpu->arch.tsc_offset_adjustment = 0; |
2826 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | 2853 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
2827 | } | 2854 | } |
2828 | 2855 | ||
2829 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { | 2856 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { |
@@ -4040,16 +4067,16 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, | |||
4040 | kvm_x86_ops->get_segment(vcpu, var, seg); | 4067 | kvm_x86_ops->get_segment(vcpu, var, seg); |
4041 | } | 4068 | } |
4042 | 4069 | ||
4043 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | 4070 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, |
4071 | struct x86_exception *exception) | ||
4044 | { | 4072 | { |
4045 | gpa_t t_gpa; | 4073 | gpa_t t_gpa; |
4046 | struct x86_exception exception; | ||
4047 | 4074 | ||
4048 | BUG_ON(!mmu_is_nested(vcpu)); | 4075 | BUG_ON(!mmu_is_nested(vcpu)); |
4049 | 4076 | ||
4050 | /* NPT walks are always user-walks */ | 4077 | /* NPT walks are always user-walks */ |
4051 | access |= PFERR_USER_MASK; | 4078 | access |= PFERR_USER_MASK; |
4052 | t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception); | 4079 | t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception); |
4053 | 4080 | ||
4054 | return t_gpa; | 4081 | return t_gpa; |
4055 | } | 4082 | } |
@@ -4906,16 +4933,18 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | |||
4906 | } | 4933 | } |
4907 | } | 4934 | } |
4908 | 4935 | ||
4909 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) | 4936 | static bool inject_emulated_exception(struct kvm_vcpu *vcpu) |
4910 | { | 4937 | { |
4911 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4938 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4912 | if (ctxt->exception.vector == PF_VECTOR) | 4939 | if (ctxt->exception.vector == PF_VECTOR) |
4913 | kvm_propagate_fault(vcpu, &ctxt->exception); | 4940 | return kvm_propagate_fault(vcpu, &ctxt->exception); |
4914 | else if (ctxt->exception.error_code_valid) | 4941 | |
4942 | if (ctxt->exception.error_code_valid) | ||
4915 | kvm_queue_exception_e(vcpu, ctxt->exception.vector, | 4943 | kvm_queue_exception_e(vcpu, ctxt->exception.vector, |
4916 | ctxt->exception.error_code); | 4944 | ctxt->exception.error_code); |
4917 | else | 4945 | else |
4918 | kvm_queue_exception(vcpu, ctxt->exception.vector); | 4946 | kvm_queue_exception(vcpu, ctxt->exception.vector); |
4947 | return false; | ||
4919 | } | 4948 | } |
4920 | 4949 | ||
4921 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | 4950 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) |
@@ -4972,7 +5001,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) | |||
4972 | 5001 | ||
4973 | ++vcpu->stat.insn_emulation_fail; | 5002 | ++vcpu->stat.insn_emulation_fail; |
4974 | trace_kvm_emulate_insn_failed(vcpu); | 5003 | trace_kvm_emulate_insn_failed(vcpu); |
4975 | if (!is_guest_mode(vcpu)) { | 5004 | if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { |
4976 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 5005 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
4977 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 5006 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
4978 | vcpu->run->internal.ndata = 0; | 5007 | vcpu->run->internal.ndata = 0; |
@@ -5224,6 +5253,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
5224 | 5253 | ||
5225 | ctxt->interruptibility = 0; | 5254 | ctxt->interruptibility = 0; |
5226 | ctxt->have_exception = false; | 5255 | ctxt->have_exception = false; |
5256 | ctxt->exception.vector = -1; | ||
5227 | ctxt->perm_ok = false; | 5257 | ctxt->perm_ok = false; |
5228 | 5258 | ||
5229 | ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; | 5259 | ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; |
@@ -5276,8 +5306,9 @@ restart: | |||
5276 | } | 5306 | } |
5277 | 5307 | ||
5278 | if (ctxt->have_exception) { | 5308 | if (ctxt->have_exception) { |
5279 | inject_emulated_exception(vcpu); | ||
5280 | r = EMULATE_DONE; | 5309 | r = EMULATE_DONE; |
5310 | if (inject_emulated_exception(vcpu)) | ||
5311 | return r; | ||
5281 | } else if (vcpu->arch.pio.count) { | 5312 | } else if (vcpu->arch.pio.count) { |
5282 | if (!vcpu->arch.pio.in) { | 5313 | if (!vcpu->arch.pio.in) { |
5283 | /* FIXME: return into emulator if single-stepping. */ | 5314 | /* FIXME: return into emulator if single-stepping. */ |
@@ -5545,7 +5576,7 @@ static void kvm_set_mmio_spte_mask(void) | |||
5545 | * entry to generate page fault with PFER.RSV = 1. | 5576 | * entry to generate page fault with PFER.RSV = 1. |
5546 | */ | 5577 | */ |
5547 | /* Mask the reserved physical address bits. */ | 5578 | /* Mask the reserved physical address bits. */ |
5548 | mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr; | 5579 | mask = rsvd_bits(maxphyaddr, 51); |
5549 | 5580 | ||
5550 | /* Bit 62 is always reserved for 32bit host. */ | 5581 | /* Bit 62 is always reserved for 32bit host. */ |
5551 | mask |= 0x3ull << 62; | 5582 | mask |= 0x3ull << 62; |
@@ -5576,7 +5607,7 @@ static void pvclock_gtod_update_fn(struct work_struct *work) | |||
5576 | spin_lock(&kvm_lock); | 5607 | spin_lock(&kvm_lock); |
5577 | list_for_each_entry(kvm, &vm_list, vm_list) | 5608 | list_for_each_entry(kvm, &vm_list, vm_list) |
5578 | kvm_for_each_vcpu(i, vcpu, kvm) | 5609 | kvm_for_each_vcpu(i, vcpu, kvm) |
5579 | set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); | 5610 | kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); |
5580 | atomic_set(&kvm_guest_has_master_clock, 0); | 5611 | atomic_set(&kvm_guest_has_master_clock, 0); |
5581 | spin_unlock(&kvm_lock); | 5612 | spin_unlock(&kvm_lock); |
5582 | } | 5613 | } |
@@ -5989,6 +6020,44 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | |||
5989 | kvm_apic_update_tmr(vcpu, tmr); | 6020 | kvm_apic_update_tmr(vcpu, tmr); |
5990 | } | 6021 | } |
5991 | 6022 | ||
6023 | static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) | ||
6024 | { | ||
6025 | ++vcpu->stat.tlb_flush; | ||
6026 | kvm_x86_ops->tlb_flush(vcpu); | ||
6027 | } | ||
6028 | |||
6029 | void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) | ||
6030 | { | ||
6031 | struct page *page = NULL; | ||
6032 | |||
6033 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
6034 | return; | ||
6035 | |||
6036 | if (!kvm_x86_ops->set_apic_access_page_addr) | ||
6037 | return; | ||
6038 | |||
6039 | page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); | ||
6040 | kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); | ||
6041 | |||
6042 | /* | ||
6043 | * Do not pin apic access page in memory, the MMU notifier | ||
6044 | * will call us again if it is migrated or swapped out. | ||
6045 | */ | ||
6046 | put_page(page); | ||
6047 | } | ||
6048 | EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); | ||
6049 | |||
6050 | void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | ||
6051 | unsigned long address) | ||
6052 | { | ||
6053 | /* | ||
6054 | * The physical address of apic access page is stored in the VMCS. | ||
6055 | * Update it when it becomes invalid. | ||
6056 | */ | ||
6057 | if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) | ||
6058 | kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); | ||
6059 | } | ||
6060 | |||
5992 | /* | 6061 | /* |
5993 | * Returns 1 to let __vcpu_run() continue the guest execution loop without | 6062 | * Returns 1 to let __vcpu_run() continue the guest execution loop without |
5994 | * exiting to the userspace. Otherwise, the value will be returned to the | 6063 | * exiting to the userspace. Otherwise, the value will be returned to the |
@@ -6018,7 +6087,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6018 | if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) | 6087 | if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) |
6019 | kvm_mmu_sync_roots(vcpu); | 6088 | kvm_mmu_sync_roots(vcpu); |
6020 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) | 6089 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) |
6021 | kvm_x86_ops->tlb_flush(vcpu); | 6090 | kvm_vcpu_flush_tlb(vcpu); |
6022 | if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { | 6091 | if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { |
6023 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; | 6092 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; |
6024 | r = 0; | 6093 | r = 0; |
@@ -6049,6 +6118,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6049 | kvm_deliver_pmi(vcpu); | 6118 | kvm_deliver_pmi(vcpu); |
6050 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) | 6119 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) |
6051 | vcpu_scan_ioapic(vcpu); | 6120 | vcpu_scan_ioapic(vcpu); |
6121 | if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) | ||
6122 | kvm_vcpu_reload_apic_access_page(vcpu); | ||
6052 | } | 6123 | } |
6053 | 6124 | ||
6054 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 6125 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
@@ -6934,7 +7005,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) | |||
6934 | kvm_rip_write(vcpu, 0); | 7005 | kvm_rip_write(vcpu, 0); |
6935 | } | 7006 | } |
6936 | 7007 | ||
6937 | int kvm_arch_hardware_enable(void *garbage) | 7008 | int kvm_arch_hardware_enable(void) |
6938 | { | 7009 | { |
6939 | struct kvm *kvm; | 7010 | struct kvm *kvm; |
6940 | struct kvm_vcpu *vcpu; | 7011 | struct kvm_vcpu *vcpu; |
@@ -6945,7 +7016,7 @@ int kvm_arch_hardware_enable(void *garbage) | |||
6945 | bool stable, backwards_tsc = false; | 7016 | bool stable, backwards_tsc = false; |
6946 | 7017 | ||
6947 | kvm_shared_msr_cpu_online(); | 7018 | kvm_shared_msr_cpu_online(); |
6948 | ret = kvm_x86_ops->hardware_enable(garbage); | 7019 | ret = kvm_x86_ops->hardware_enable(); |
6949 | if (ret != 0) | 7020 | if (ret != 0) |
6950 | return ret; | 7021 | return ret; |
6951 | 7022 | ||
@@ -6954,7 +7025,7 @@ int kvm_arch_hardware_enable(void *garbage) | |||
6954 | list_for_each_entry(kvm, &vm_list, vm_list) { | 7025 | list_for_each_entry(kvm, &vm_list, vm_list) { |
6955 | kvm_for_each_vcpu(i, vcpu, kvm) { | 7026 | kvm_for_each_vcpu(i, vcpu, kvm) { |
6956 | if (!stable && vcpu->cpu == smp_processor_id()) | 7027 | if (!stable && vcpu->cpu == smp_processor_id()) |
6957 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | 7028 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
6958 | if (stable && vcpu->arch.last_host_tsc > local_tsc) { | 7029 | if (stable && vcpu->arch.last_host_tsc > local_tsc) { |
6959 | backwards_tsc = true; | 7030 | backwards_tsc = true; |
6960 | if (vcpu->arch.last_host_tsc > max_tsc) | 7031 | if (vcpu->arch.last_host_tsc > max_tsc) |
@@ -7008,8 +7079,7 @@ int kvm_arch_hardware_enable(void *garbage) | |||
7008 | kvm_for_each_vcpu(i, vcpu, kvm) { | 7079 | kvm_for_each_vcpu(i, vcpu, kvm) { |
7009 | vcpu->arch.tsc_offset_adjustment += delta_cyc; | 7080 | vcpu->arch.tsc_offset_adjustment += delta_cyc; |
7010 | vcpu->arch.last_host_tsc = local_tsc; | 7081 | vcpu->arch.last_host_tsc = local_tsc; |
7011 | set_bit(KVM_REQ_MASTERCLOCK_UPDATE, | 7082 | kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); |
7012 | &vcpu->requests); | ||
7013 | } | 7083 | } |
7014 | 7084 | ||
7015 | /* | 7085 | /* |
@@ -7026,10 +7096,10 @@ int kvm_arch_hardware_enable(void *garbage) | |||
7026 | return 0; | 7096 | return 0; |
7027 | } | 7097 | } |
7028 | 7098 | ||
7029 | void kvm_arch_hardware_disable(void *garbage) | 7099 | void kvm_arch_hardware_disable(void) |
7030 | { | 7100 | { |
7031 | kvm_x86_ops->hardware_disable(garbage); | 7101 | kvm_x86_ops->hardware_disable(); |
7032 | drop_user_return_notifiers(garbage); | 7102 | drop_user_return_notifiers(); |
7033 | } | 7103 | } |
7034 | 7104 | ||
7035 | int kvm_arch_hardware_setup(void) | 7105 | int kvm_arch_hardware_setup(void) |
@@ -7146,6 +7216,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
7146 | static_key_slow_dec(&kvm_no_apic_vcpu); | 7216 | static_key_slow_dec(&kvm_no_apic_vcpu); |
7147 | } | 7217 | } |
7148 | 7218 | ||
7219 | void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) | ||
7220 | { | ||
7221 | kvm_x86_ops->sched_in(vcpu, cpu); | ||
7222 | } | ||
7223 | |||
7149 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | 7224 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
7150 | { | 7225 | { |
7151 | if (type) | 7226 | if (type) |
@@ -7237,10 +7312,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
7237 | kfree(kvm->arch.vpic); | 7312 | kfree(kvm->arch.vpic); |
7238 | kfree(kvm->arch.vioapic); | 7313 | kfree(kvm->arch.vioapic); |
7239 | kvm_free_vcpus(kvm); | 7314 | kvm_free_vcpus(kvm); |
7240 | if (kvm->arch.apic_access_page) | ||
7241 | put_page(kvm->arch.apic_access_page); | ||
7242 | if (kvm->arch.ept_identity_pagetable) | ||
7243 | put_page(kvm->arch.ept_identity_pagetable); | ||
7244 | kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); | 7315 | kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); |
7245 | } | 7316 | } |
7246 | 7317 | ||
@@ -7643,3 +7714,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | |||
7643 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | 7714 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); |
7644 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | 7715 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); |
7645 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); | 7716 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); |
7717 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 306a1b77581f..7cb9c45a5fe0 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -88,15 +88,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, | |||
88 | vcpu->arch.mmio_gva = gva & PAGE_MASK; | 88 | vcpu->arch.mmio_gva = gva & PAGE_MASK; |
89 | vcpu->arch.access = access; | 89 | vcpu->arch.access = access; |
90 | vcpu->arch.mmio_gfn = gfn; | 90 | vcpu->arch.mmio_gfn = gfn; |
91 | vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; | ||
92 | } | ||
93 | |||
94 | static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) | ||
95 | { | ||
96 | return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation; | ||
91 | } | 97 | } |
92 | 98 | ||
93 | /* | 99 | /* |
94 | * Clear the mmio cache info for the given gva, | 100 | * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we |
95 | * specially, if gva is ~0ul, we clear all mmio cache info. | 101 | * clear all mmio cache info. |
96 | */ | 102 | */ |
103 | #define MMIO_GVA_ANY (~(gva_t)0) | ||
104 | |||
97 | static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) | 105 | static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) |
98 | { | 106 | { |
99 | if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) | 107 | if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) |
100 | return; | 108 | return; |
101 | 109 | ||
102 | vcpu->arch.mmio_gva = 0; | 110 | vcpu->arch.mmio_gva = 0; |
@@ -104,7 +112,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) | |||
104 | 112 | ||
105 | static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) | 113 | static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) |
106 | { | 114 | { |
107 | if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) | 115 | if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva && |
116 | vcpu->arch.mmio_gva == (gva & PAGE_MASK)) | ||
108 | return true; | 117 | return true; |
109 | 118 | ||
110 | return false; | 119 | return false; |
@@ -112,7 +121,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) | |||
112 | 121 | ||
113 | static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) | 122 | static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) |
114 | { | 123 | { |
115 | if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) | 124 | if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn && |
125 | vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) | ||
116 | return true; | 126 | return true; |
117 | 127 | ||
118 | return false; | 128 | return false; |
@@ -149,6 +159,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
149 | gva_t addr, void *val, unsigned int bytes, | 159 | gva_t addr, void *val, unsigned int bytes, |
150 | struct x86_exception *exception); | 160 | struct x86_exception *exception); |
151 | 161 | ||
162 | bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); | ||
163 | |||
152 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ | 164 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ |
153 | | XSTATE_BNDREGS | XSTATE_BNDCSR) | 165 | | XSTATE_BNDREGS | XSTATE_BNDCSR) |
154 | extern u64 host_xcr0; | 166 | extern u64 host_xcr0; |
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 5f578e850fc5..90d734bbf467 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c | |||
@@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn, | |||
402 | 402 | ||
403 | static int mn_clear_flush_young(struct mmu_notifier *mn, | 403 | static int mn_clear_flush_young(struct mmu_notifier *mn, |
404 | struct mm_struct *mm, | 404 | struct mm_struct *mm, |
405 | unsigned long address) | 405 | unsigned long start, |
406 | unsigned long end) | ||
406 | { | 407 | { |
407 | __mn_flush_page(mn, address); | 408 | for (; start < end; start += PAGE_SIZE) |
409 | __mn_flush_page(mn, start); | ||
408 | 410 | ||
409 | return 0; | 411 | return 0; |
410 | } | 412 | } |
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 35b0c121bb65..2f2aac8448a4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
@@ -25,26 +25,25 @@ | |||
25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
26 | #include <linux/types.h> | 26 | #include <linux/types.h> |
27 | 27 | ||
28 | #define VGIC_NR_IRQS 256 | 28 | #define VGIC_NR_IRQS_LEGACY 256 |
29 | #define VGIC_NR_SGIS 16 | 29 | #define VGIC_NR_SGIS 16 |
30 | #define VGIC_NR_PPIS 16 | 30 | #define VGIC_NR_PPIS 16 |
31 | #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) | 31 | #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) |
32 | #define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) | ||
33 | #define VGIC_MAX_CPUS KVM_MAX_VCPUS | ||
34 | 32 | ||
35 | #define VGIC_V2_MAX_LRS (1 << 6) | 33 | #define VGIC_V2_MAX_LRS (1 << 6) |
36 | #define VGIC_V3_MAX_LRS 16 | 34 | #define VGIC_V3_MAX_LRS 16 |
35 | #define VGIC_MAX_IRQS 1024 | ||
37 | 36 | ||
38 | /* Sanity checks... */ | 37 | /* Sanity checks... */ |
39 | #if (VGIC_MAX_CPUS > 8) | 38 | #if (KVM_MAX_VCPUS > 8) |
40 | #error Invalid number of CPU interfaces | 39 | #error Invalid number of CPU interfaces |
41 | #endif | 40 | #endif |
42 | 41 | ||
43 | #if (VGIC_NR_IRQS & 31) | 42 | #if (VGIC_NR_IRQS_LEGACY & 31) |
44 | #error "VGIC_NR_IRQS must be a multiple of 32" | 43 | #error "VGIC_NR_IRQS must be a multiple of 32" |
45 | #endif | 44 | #endif |
46 | 45 | ||
47 | #if (VGIC_NR_IRQS > 1024) | 46 | #if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS) |
48 | #error "VGIC_NR_IRQS must be <= 1024" | 47 | #error "VGIC_NR_IRQS must be <= 1024" |
49 | #endif | 48 | #endif |
50 | 49 | ||
@@ -54,19 +53,33 @@ | |||
54 | * - a bunch of shared interrupts (SPI) | 53 | * - a bunch of shared interrupts (SPI) |
55 | */ | 54 | */ |
56 | struct vgic_bitmap { | 55 | struct vgic_bitmap { |
57 | union { | 56 | /* |
58 | u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; | 57 | * - One UL per VCPU for private interrupts (assumes UL is at |
59 | DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); | 58 | * least 32 bits) |
60 | } percpu[VGIC_MAX_CPUS]; | 59 | * - As many UL as necessary for shared interrupts. |
61 | union { | 60 | * |
62 | u32 reg[VGIC_NR_SHARED_IRQS / 32]; | 61 | * The private interrupts are accessed via the "private" |
63 | DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); | 62 | * field, one UL per vcpu (the state for vcpu n is in |
64 | } shared; | 63 | * private[n]). The shared interrupts are accessed via the |
64 | * "shared" pointer (IRQn state is at bit n-32 in the bitmap). | ||
65 | */ | ||
66 | unsigned long *private; | ||
67 | unsigned long *shared; | ||
65 | }; | 68 | }; |
66 | 69 | ||
67 | struct vgic_bytemap { | 70 | struct vgic_bytemap { |
68 | u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; | 71 | /* |
69 | u32 shared[VGIC_NR_SHARED_IRQS / 4]; | 72 | * - 8 u32 per VCPU for private interrupts |
73 | * - As many u32 as necessary for shared interrupts. | ||
74 | * | ||
75 | * The private interrupts are accessed via the "private" | ||
76 | * field, (the state for vcpu n is in private[n*8] to | ||
77 | * private[n*8 + 7]). The shared interrupts are accessed via | ||
78 | * the "shared" pointer (IRQn state is at byte (n-32)%4 of the | ||
79 | * shared[(n-32)/4] word). | ||
80 | */ | ||
81 | u32 *private; | ||
82 | u32 *shared; | ||
70 | }; | 83 | }; |
71 | 84 | ||
72 | struct kvm_vcpu; | 85 | struct kvm_vcpu; |
@@ -127,6 +140,9 @@ struct vgic_dist { | |||
127 | bool in_kernel; | 140 | bool in_kernel; |
128 | bool ready; | 141 | bool ready; |
129 | 142 | ||
143 | int nr_cpus; | ||
144 | int nr_irqs; | ||
145 | |||
130 | /* Virtual control interface mapping */ | 146 | /* Virtual control interface mapping */ |
131 | void __iomem *vctrl_base; | 147 | void __iomem *vctrl_base; |
132 | 148 | ||
@@ -140,11 +156,25 @@ struct vgic_dist { | |||
140 | /* Interrupt enabled (one bit per IRQ) */ | 156 | /* Interrupt enabled (one bit per IRQ) */ |
141 | struct vgic_bitmap irq_enabled; | 157 | struct vgic_bitmap irq_enabled; |
142 | 158 | ||
143 | /* Interrupt 'pin' level */ | 159 | /* Level-triggered interrupt external input is asserted */ |
144 | struct vgic_bitmap irq_state; | 160 | struct vgic_bitmap irq_level; |
145 | 161 | ||
146 | /* Level-triggered interrupt in progress */ | 162 | /* |
147 | struct vgic_bitmap irq_active; | 163 | * Interrupt state is pending on the distributor |
164 | */ | ||
165 | struct vgic_bitmap irq_pending; | ||
166 | |||
167 | /* | ||
168 | * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered | ||
169 | * interrupts. Essentially holds the state of the flip-flop in | ||
170 | * Figure 4-10 on page 4-101 in ARM IHI 0048B.b. | ||
171 | * Once set, it is only cleared for level-triggered interrupts on | ||
172 | * guest ACKs (when we queue it) or writes to GICD_ICPENDRn. | ||
173 | */ | ||
174 | struct vgic_bitmap irq_soft_pend; | ||
175 | |||
176 | /* Level-triggered interrupt queued on VCPU interface */ | ||
177 | struct vgic_bitmap irq_queued; | ||
148 | 178 | ||
149 | /* Interrupt priority. Not used yet. */ | 179 | /* Interrupt priority. Not used yet. */ |
150 | struct vgic_bytemap irq_priority; | 180 | struct vgic_bytemap irq_priority; |
@@ -152,15 +182,36 @@ struct vgic_dist { | |||
152 | /* Level/edge triggered */ | 182 | /* Level/edge triggered */ |
153 | struct vgic_bitmap irq_cfg; | 183 | struct vgic_bitmap irq_cfg; |
154 | 184 | ||
155 | /* Source CPU per SGI and target CPU */ | 185 | /* |
156 | u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; | 186 | * Source CPU per SGI and target CPU: |
157 | 187 | * | |
158 | /* Target CPU for each IRQ */ | 188 | * Each byte represent a SGI observable on a VCPU, each bit of |
159 | u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; | 189 | * this byte indicating if the corresponding VCPU has |
160 | struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; | 190 | * generated this interrupt. This is a GICv2 feature only. |
191 | * | ||
192 | * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are | ||
193 | * the SGIs observable on VCPUn. | ||
194 | */ | ||
195 | u8 *irq_sgi_sources; | ||
196 | |||
197 | /* | ||
198 | * Target CPU for each SPI: | ||
199 | * | ||
200 | * Array of available SPI, each byte indicating the target | ||
201 | * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32]. | ||
202 | */ | ||
203 | u8 *irq_spi_cpu; | ||
204 | |||
205 | /* | ||
206 | * Reverse lookup of irq_spi_cpu for faster compute pending: | ||
207 | * | ||
208 | * Array of bitmaps, one per VCPU, describing if IRQn is | ||
209 | * routed to a particular VCPU. | ||
210 | */ | ||
211 | struct vgic_bitmap *irq_spi_target; | ||
161 | 212 | ||
162 | /* Bitmap indicating which CPU has something pending */ | 213 | /* Bitmap indicating which CPU has something pending */ |
163 | unsigned long irq_pending_on_cpu; | 214 | unsigned long *irq_pending_on_cpu; |
164 | #endif | 215 | #endif |
165 | }; | 216 | }; |
166 | 217 | ||
@@ -190,11 +241,11 @@ struct vgic_v3_cpu_if { | |||
190 | struct vgic_cpu { | 241 | struct vgic_cpu { |
191 | #ifdef CONFIG_KVM_ARM_VGIC | 242 | #ifdef CONFIG_KVM_ARM_VGIC |
192 | /* per IRQ to LR mapping */ | 243 | /* per IRQ to LR mapping */ |
193 | u8 vgic_irq_lr_map[VGIC_NR_IRQS]; | 244 | u8 *vgic_irq_lr_map; |
194 | 245 | ||
195 | /* Pending interrupts on this VCPU */ | 246 | /* Pending interrupts on this VCPU */ |
196 | DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); | 247 | DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); |
197 | DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); | 248 | unsigned long *pending_shared; |
198 | 249 | ||
199 | /* Bitmap of used/free list registers */ | 250 | /* Bitmap of used/free list registers */ |
200 | DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); | 251 | DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); |
@@ -225,7 +276,8 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); | |||
225 | int kvm_vgic_hyp_init(void); | 276 | int kvm_vgic_hyp_init(void); |
226 | int kvm_vgic_init(struct kvm *kvm); | 277 | int kvm_vgic_init(struct kvm *kvm); |
227 | int kvm_vgic_create(struct kvm *kvm); | 278 | int kvm_vgic_create(struct kvm *kvm); |
228 | int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); | 279 | void kvm_vgic_destroy(struct kvm *kvm); |
280 | void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); | ||
229 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); | 281 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); |
230 | void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); | 282 | void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); |
231 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, | 283 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a4c33b34fe3f..28be31f49250 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -136,12 +136,11 @@ static inline bool is_error_page(struct page *page) | |||
136 | #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 | 136 | #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 |
137 | #define KVM_REQ_ENABLE_IBS 23 | 137 | #define KVM_REQ_ENABLE_IBS 23 |
138 | #define KVM_REQ_DISABLE_IBS 24 | 138 | #define KVM_REQ_DISABLE_IBS 24 |
139 | #define KVM_REQ_APIC_PAGE_RELOAD 25 | ||
139 | 140 | ||
140 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 141 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
141 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 | 142 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 |
142 | 143 | ||
143 | struct kvm; | ||
144 | struct kvm_vcpu; | ||
145 | extern struct kmem_cache *kvm_vcpu_cache; | 144 | extern struct kmem_cache *kvm_vcpu_cache; |
146 | 145 | ||
147 | extern spinlock_t kvm_lock; | 146 | extern spinlock_t kvm_lock; |
@@ -200,6 +199,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, | |||
200 | int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); | 199 | int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); |
201 | #endif | 200 | #endif |
202 | 201 | ||
202 | /* | ||
203 | * Carry out a gup that requires IO. Allow the mm to relinquish the mmap | ||
204 | * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL | ||
205 | * controls whether we retry the gup one more time to completion in that case. | ||
206 | * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp | ||
207 | * handler. | ||
208 | */ | ||
209 | int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, | ||
210 | unsigned long addr, bool write_fault, | ||
211 | struct page **pagep); | ||
212 | |||
203 | enum { | 213 | enum { |
204 | OUTSIDE_GUEST_MODE, | 214 | OUTSIDE_GUEST_MODE, |
205 | IN_GUEST_MODE, | 215 | IN_GUEST_MODE, |
@@ -325,8 +335,6 @@ struct kvm_kernel_irq_routing_entry { | |||
325 | struct hlist_node link; | 335 | struct hlist_node link; |
326 | }; | 336 | }; |
327 | 337 | ||
328 | struct kvm_irq_routing_table; | ||
329 | |||
330 | #ifndef KVM_PRIVATE_MEM_SLOTS | 338 | #ifndef KVM_PRIVATE_MEM_SLOTS |
331 | #define KVM_PRIVATE_MEM_SLOTS 0 | 339 | #define KVM_PRIVATE_MEM_SLOTS 0 |
332 | #endif | 340 | #endif |
@@ -528,6 +536,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); | |||
528 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); | 536 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); |
529 | unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); | 537 | unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); |
530 | unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); | 538 | unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); |
539 | unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, | ||
540 | bool *writable); | ||
531 | void kvm_release_page_clean(struct page *page); | 541 | void kvm_release_page_clean(struct page *page); |
532 | void kvm_release_page_dirty(struct page *page); | 542 | void kvm_release_page_dirty(struct page *page); |
533 | void kvm_set_page_accessed(struct page *page); | 543 | void kvm_set_page_accessed(struct page *page); |
@@ -579,6 +589,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); | |||
579 | void kvm_reload_remote_mmus(struct kvm *kvm); | 589 | void kvm_reload_remote_mmus(struct kvm *kvm); |
580 | void kvm_make_mclock_inprogress_request(struct kvm *kvm); | 590 | void kvm_make_mclock_inprogress_request(struct kvm *kvm); |
581 | void kvm_make_scan_ioapic_request(struct kvm *kvm); | 591 | void kvm_make_scan_ioapic_request(struct kvm *kvm); |
592 | bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); | ||
582 | 593 | ||
583 | long kvm_arch_dev_ioctl(struct file *filp, | 594 | long kvm_arch_dev_ioctl(struct file *filp, |
584 | unsigned int ioctl, unsigned long arg); | 595 | unsigned int ioctl, unsigned long arg); |
@@ -624,6 +635,8 @@ void kvm_arch_exit(void); | |||
624 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); | 635 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); |
625 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); | 636 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); |
626 | 637 | ||
638 | void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); | ||
639 | |||
627 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); | 640 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); |
628 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); | 641 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); |
629 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); | 642 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); |
@@ -632,8 +645,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); | |||
632 | int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); | 645 | int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); |
633 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); | 646 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); |
634 | 647 | ||
635 | int kvm_arch_hardware_enable(void *garbage); | 648 | int kvm_arch_hardware_enable(void); |
636 | void kvm_arch_hardware_disable(void *garbage); | 649 | void kvm_arch_hardware_disable(void); |
637 | int kvm_arch_hardware_setup(void); | 650 | int kvm_arch_hardware_setup(void); |
638 | void kvm_arch_hardware_unsetup(void); | 651 | void kvm_arch_hardware_unsetup(void); |
639 | void kvm_arch_check_processor_compat(void *rtn); | 652 | void kvm_arch_check_processor_compat(void *rtn); |
@@ -1034,8 +1047,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) | |||
1034 | 1047 | ||
1035 | extern bool kvm_rebooting; | 1048 | extern bool kvm_rebooting; |
1036 | 1049 | ||
1037 | struct kvm_device_ops; | ||
1038 | |||
1039 | struct kvm_device { | 1050 | struct kvm_device { |
1040 | struct kvm_device_ops *ops; | 1051 | struct kvm_device_ops *ops; |
1041 | struct kvm *kvm; | 1052 | struct kvm *kvm; |
@@ -1068,12 +1079,10 @@ struct kvm_device_ops { | |||
1068 | void kvm_device_get(struct kvm_device *dev); | 1079 | void kvm_device_get(struct kvm_device *dev); |
1069 | void kvm_device_put(struct kvm_device *dev); | 1080 | void kvm_device_put(struct kvm_device *dev); |
1070 | struct kvm_device *kvm_device_from_filp(struct file *filp); | 1081 | struct kvm_device *kvm_device_from_filp(struct file *filp); |
1082 | int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); | ||
1071 | 1083 | ||
1072 | extern struct kvm_device_ops kvm_mpic_ops; | 1084 | extern struct kvm_device_ops kvm_mpic_ops; |
1073 | extern struct kvm_device_ops kvm_xics_ops; | 1085 | extern struct kvm_device_ops kvm_xics_ops; |
1074 | extern struct kvm_device_ops kvm_vfio_ops; | ||
1075 | extern struct kvm_device_ops kvm_arm_vgic_v2_ops; | ||
1076 | extern struct kvm_device_ops kvm_flic_ops; | ||
1077 | 1086 | ||
1078 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | 1087 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT |
1079 | 1088 | ||
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b0bcce0ddc95..b606bb689a3e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h | |||
@@ -17,6 +17,20 @@ | |||
17 | #ifndef __KVM_TYPES_H__ | 17 | #ifndef __KVM_TYPES_H__ |
18 | #define __KVM_TYPES_H__ | 18 | #define __KVM_TYPES_H__ |
19 | 19 | ||
20 | struct kvm; | ||
21 | struct kvm_async_pf; | ||
22 | struct kvm_device_ops; | ||
23 | struct kvm_interrupt; | ||
24 | struct kvm_irq_routing_table; | ||
25 | struct kvm_memory_slot; | ||
26 | struct kvm_one_reg; | ||
27 | struct kvm_run; | ||
28 | struct kvm_userspace_memory_region; | ||
29 | struct kvm_vcpu; | ||
30 | struct kvm_vcpu_init; | ||
31 | |||
32 | enum kvm_mr_change; | ||
33 | |||
20 | #include <asm/types.h> | 34 | #include <asm/types.h> |
21 | 35 | ||
22 | /* | 36 | /* |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 8981cc882ed2..0f4196a0bc20 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, | |||
1985 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ | 1985 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ |
1986 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ | 1986 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ |
1987 | #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ | 1987 | #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ |
1988 | #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ | ||
1988 | 1989 | ||
1989 | typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, | 1990 | typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, |
1990 | void *data); | 1991 | void *data); |
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 27288692241e..88787bb4b3b9 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h | |||
@@ -57,10 +57,13 @@ struct mmu_notifier_ops { | |||
57 | * pte. This way the VM will provide proper aging to the | 57 | * pte. This way the VM will provide proper aging to the |
58 | * accesses to the page through the secondary MMUs and not | 58 | * accesses to the page through the secondary MMUs and not |
59 | * only to the ones through the Linux pte. | 59 | * only to the ones through the Linux pte. |
60 | * Start-end is necessary in case the secondary MMU is mapping the page | ||
61 | * at a smaller granularity than the primary MMU. | ||
60 | */ | 62 | */ |
61 | int (*clear_flush_young)(struct mmu_notifier *mn, | 63 | int (*clear_flush_young)(struct mmu_notifier *mn, |
62 | struct mm_struct *mm, | 64 | struct mm_struct *mm, |
63 | unsigned long address); | 65 | unsigned long start, |
66 | unsigned long end); | ||
64 | 67 | ||
65 | /* | 68 | /* |
66 | * test_young is called to check the young/accessed bitflag in | 69 | * test_young is called to check the young/accessed bitflag in |
@@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, | |||
175 | extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); | 178 | extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); |
176 | extern void __mmu_notifier_release(struct mm_struct *mm); | 179 | extern void __mmu_notifier_release(struct mm_struct *mm); |
177 | extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, | 180 | extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, |
178 | unsigned long address); | 181 | unsigned long start, |
182 | unsigned long end); | ||
179 | extern int __mmu_notifier_test_young(struct mm_struct *mm, | 183 | extern int __mmu_notifier_test_young(struct mm_struct *mm, |
180 | unsigned long address); | 184 | unsigned long address); |
181 | extern void __mmu_notifier_change_pte(struct mm_struct *mm, | 185 | extern void __mmu_notifier_change_pte(struct mm_struct *mm, |
@@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm) | |||
194 | } | 198 | } |
195 | 199 | ||
196 | static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, | 200 | static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, |
197 | unsigned long address) | 201 | unsigned long start, |
202 | unsigned long end) | ||
198 | { | 203 | { |
199 | if (mm_has_notifiers(mm)) | 204 | if (mm_has_notifiers(mm)) |
200 | return __mmu_notifier_clear_flush_young(mm, address); | 205 | return __mmu_notifier_clear_flush_young(mm, start, end); |
201 | return 0; | 206 | return 0; |
202 | } | 207 | } |
203 | 208 | ||
@@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) | |||
255 | unsigned long ___address = __address; \ | 260 | unsigned long ___address = __address; \ |
256 | __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ | 261 | __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ |
257 | __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ | 262 | __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ |
258 | ___address); \ | 263 | ___address, \ |
264 | ___address + \ | ||
265 | PAGE_SIZE); \ | ||
259 | __young; \ | 266 | __young; \ |
260 | }) | 267 | }) |
261 | 268 | ||
@@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) | |||
266 | unsigned long ___address = __address; \ | 273 | unsigned long ___address = __address; \ |
267 | __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ | 274 | __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ |
268 | __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ | 275 | __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ |
269 | ___address); \ | 276 | ___address, \ |
277 | ___address + \ | ||
278 | PMD_SIZE); \ | ||
270 | __young; \ | 279 | __young; \ |
271 | }) | 280 | }) |
272 | 281 | ||
@@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm) | |||
301 | } | 310 | } |
302 | 311 | ||
303 | static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, | 312 | static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, |
304 | unsigned long address) | 313 | unsigned long start, |
314 | unsigned long end) | ||
305 | { | 315 | { |
306 | return 0; | 316 | return 0; |
307 | } | 317 | } |
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 908925ace776..6edf1f2028cd 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h | |||
@@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq, | |||
95 | __entry->coalesced ? " (coalesced)" : "") | 95 | __entry->coalesced ? " (coalesced)" : "") |
96 | ); | 96 | ); |
97 | 97 | ||
98 | TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, | ||
99 | TP_PROTO(__u64 e), | ||
100 | TP_ARGS(e), | ||
101 | |||
102 | TP_STRUCT__entry( | ||
103 | __field( __u64, e ) | ||
104 | ), | ||
105 | |||
106 | TP_fast_assign( | ||
107 | __entry->e = e; | ||
108 | ), | ||
109 | |||
110 | TP_printk("dst %x vec=%u (%s|%s|%s%s)", | ||
111 | (u8)(__entry->e >> 56), (u8)__entry->e, | ||
112 | __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), | ||
113 | (__entry->e & (1<<11)) ? "logical" : "physical", | ||
114 | (__entry->e & (1<<15)) ? "level" : "edge", | ||
115 | (__entry->e & (1<<16)) ? "|masked" : "") | ||
116 | ); | ||
117 | |||
98 | TRACE_EVENT(kvm_msi_set_irq, | 118 | TRACE_EVENT(kvm_msi_set_irq, |
99 | TP_PROTO(__u64 address, __u64 data), | 119 | TP_PROTO(__u64 address, __u64 data), |
100 | TP_ARGS(address, data), | 120 | TP_ARGS(address, data), |
@@ -205,24 +225,26 @@ TRACE_EVENT(kvm_fpu, | |||
205 | ); | 225 | ); |
206 | 226 | ||
207 | TRACE_EVENT(kvm_age_page, | 227 | TRACE_EVENT(kvm_age_page, |
208 | TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref), | 228 | TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref), |
209 | TP_ARGS(hva, slot, ref), | 229 | TP_ARGS(gfn, level, slot, ref), |
210 | 230 | ||
211 | TP_STRUCT__entry( | 231 | TP_STRUCT__entry( |
212 | __field( u64, hva ) | 232 | __field( u64, hva ) |
213 | __field( u64, gfn ) | 233 | __field( u64, gfn ) |
234 | __field( u8, level ) | ||
214 | __field( u8, referenced ) | 235 | __field( u8, referenced ) |
215 | ), | 236 | ), |
216 | 237 | ||
217 | TP_fast_assign( | 238 | TP_fast_assign( |
218 | __entry->hva = hva; | 239 | __entry->gfn = gfn; |
219 | __entry->gfn = | 240 | __entry->level = level; |
220 | slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT); | 241 | __entry->hva = ((gfn - slot->base_gfn) << |
242 | PAGE_SHIFT) + slot->userspace_addr; | ||
221 | __entry->referenced = ref; | 243 | __entry->referenced = ref; |
222 | ), | 244 | ), |
223 | 245 | ||
224 | TP_printk("hva %llx gfn %llx %s", | 246 | TP_printk("hva %llx gfn %llx level %u %s", |
225 | __entry->hva, __entry->gfn, | 247 | __entry->hva, __entry->gfn, __entry->level, |
226 | __entry->referenced ? "YOUNG" : "OLD") | 248 | __entry->referenced ? "YOUNG" : "OLD") |
227 | ); | 249 | ); |
228 | 250 | ||
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cf3a2ff440e4..60768822b140 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -654,9 +654,7 @@ struct kvm_ppc_smmu_info { | |||
654 | #endif | 654 | #endif |
655 | /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ | 655 | /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ |
656 | #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 | 656 | #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 |
657 | #ifdef __KVM_HAVE_USER_NMI | ||
658 | #define KVM_CAP_USER_NMI 22 | 657 | #define KVM_CAP_USER_NMI 22 |
659 | #endif | ||
660 | #ifdef __KVM_HAVE_GUEST_DEBUG | 658 | #ifdef __KVM_HAVE_GUEST_DEBUG |
661 | #define KVM_CAP_SET_GUEST_DEBUG 23 | 659 | #define KVM_CAP_SET_GUEST_DEBUG 23 |
662 | #endif | 660 | #endif |
@@ -738,9 +736,7 @@ struct kvm_ppc_smmu_info { | |||
738 | #define KVM_CAP_PPC_GET_SMMU_INFO 78 | 736 | #define KVM_CAP_PPC_GET_SMMU_INFO 78 |
739 | #define KVM_CAP_S390_COW 79 | 737 | #define KVM_CAP_S390_COW 79 |
740 | #define KVM_CAP_PPC_ALLOC_HTAB 80 | 738 | #define KVM_CAP_PPC_ALLOC_HTAB 80 |
741 | #ifdef __KVM_HAVE_READONLY_MEM | ||
742 | #define KVM_CAP_READONLY_MEM 81 | 739 | #define KVM_CAP_READONLY_MEM 81 |
743 | #endif | ||
744 | #define KVM_CAP_IRQFD_RESAMPLE 82 | 740 | #define KVM_CAP_IRQFD_RESAMPLE 82 |
745 | #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 | 741 | #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 |
746 | #define KVM_CAP_PPC_HTAB_FD 84 | 742 | #define KVM_CAP_PPC_HTAB_FD 84 |
@@ -947,15 +943,25 @@ struct kvm_device_attr { | |||
947 | __u64 addr; /* userspace address of attr data */ | 943 | __u64 addr; /* userspace address of attr data */ |
948 | }; | 944 | }; |
949 | 945 | ||
950 | #define KVM_DEV_TYPE_FSL_MPIC_20 1 | ||
951 | #define KVM_DEV_TYPE_FSL_MPIC_42 2 | ||
952 | #define KVM_DEV_TYPE_XICS 3 | ||
953 | #define KVM_DEV_TYPE_VFIO 4 | ||
954 | #define KVM_DEV_VFIO_GROUP 1 | 946 | #define KVM_DEV_VFIO_GROUP 1 |
955 | #define KVM_DEV_VFIO_GROUP_ADD 1 | 947 | #define KVM_DEV_VFIO_GROUP_ADD 1 |
956 | #define KVM_DEV_VFIO_GROUP_DEL 2 | 948 | #define KVM_DEV_VFIO_GROUP_DEL 2 |
957 | #define KVM_DEV_TYPE_ARM_VGIC_V2 5 | 949 | |
958 | #define KVM_DEV_TYPE_FLIC 6 | 950 | enum kvm_device_type { |
951 | KVM_DEV_TYPE_FSL_MPIC_20 = 1, | ||
952 | #define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20 | ||
953 | KVM_DEV_TYPE_FSL_MPIC_42, | ||
954 | #define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42 | ||
955 | KVM_DEV_TYPE_XICS, | ||
956 | #define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS | ||
957 | KVM_DEV_TYPE_VFIO, | ||
958 | #define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO | ||
959 | KVM_DEV_TYPE_ARM_VGIC_V2, | ||
960 | #define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 | ||
961 | KVM_DEV_TYPE_FLIC, | ||
962 | #define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC | ||
963 | KVM_DEV_TYPE_MAX, | ||
964 | }; | ||
959 | 965 | ||
960 | /* | 966 | /* |
961 | * ioctls for VM fds | 967 | * ioctls for VM fds |
@@ -1093,7 +1099,7 @@ struct kvm_s390_ucas_mapping { | |||
1093 | #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) | 1099 | #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) |
1094 | #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) | 1100 | #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) |
1095 | #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) | 1101 | #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) |
1096 | /* Available with KVM_CAP_NMI */ | 1102 | /* Available with KVM_CAP_USER_NMI */ |
1097 | #define KVM_NMI _IO(KVMIO, 0x9a) | 1103 | #define KVM_NMI _IO(KVMIO, 0x9a) |
1098 | /* Available with KVM_CAP_SET_GUEST_DEBUG */ | 1104 | /* Available with KVM_CAP_SET_GUEST_DEBUG */ |
1099 | #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) | 1105 | #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) |
@@ -281,6 +281,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, | |||
281 | fault_flags |= FAULT_FLAG_ALLOW_RETRY; | 281 | fault_flags |= FAULT_FLAG_ALLOW_RETRY; |
282 | if (*flags & FOLL_NOWAIT) | 282 | if (*flags & FOLL_NOWAIT) |
283 | fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; | 283 | fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; |
284 | if (*flags & FOLL_TRIED) { | ||
285 | VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY); | ||
286 | fault_flags |= FAULT_FLAG_TRIED; | ||
287 | } | ||
284 | 288 | ||
285 | ret = handle_mm_fault(mm, vma, address, fault_flags); | 289 | ret = handle_mm_fault(mm, vma, address, fault_flags); |
286 | if (ret & VM_FAULT_ERROR) { | 290 | if (ret & VM_FAULT_ERROR) { |
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 950813b1eb36..2c8da9825fe3 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm) | |||
107 | * existed or not. | 107 | * existed or not. |
108 | */ | 108 | */ |
109 | int __mmu_notifier_clear_flush_young(struct mm_struct *mm, | 109 | int __mmu_notifier_clear_flush_young(struct mm_struct *mm, |
110 | unsigned long address) | 110 | unsigned long start, |
111 | unsigned long end) | ||
111 | { | 112 | { |
112 | struct mmu_notifier *mn; | 113 | struct mmu_notifier *mn; |
113 | int young = 0, id; | 114 | int young = 0, id; |
@@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, | |||
115 | id = srcu_read_lock(&srcu); | 116 | id = srcu_read_lock(&srcu); |
116 | hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { | 117 | hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { |
117 | if (mn->ops->clear_flush_young) | 118 | if (mn->ops->clear_flush_young) |
118 | young |= mn->ops->clear_flush_young(mn, mm, address); | 119 | young |= mn->ops->clear_flush_young(mn, mm, start, end); |
119 | } | 120 | } |
120 | srcu_read_unlock(&srcu, id); | 121 | srcu_read_unlock(&srcu, id); |
121 | 122 | ||
@@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
1355 | continue; /* don't unmap */ | 1355 | continue; /* don't unmap */ |
1356 | } | 1356 | } |
1357 | 1357 | ||
1358 | if (ptep_clear_flush_young_notify(vma, address, pte)) | 1358 | /* |
1359 | * No need for _notify because we're within an | ||
1360 | * mmu_notifier_invalidate_range_ {start|end} scope. | ||
1361 | */ | ||
1362 | if (ptep_clear_flush_young(vma, address, pte)) | ||
1359 | continue; | 1363 | continue; |
1360 | 1364 | ||
1361 | /* Nuke the page table entry. */ | 1365 | /* Nuke the page table entry. */ |
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 73eba793b17f..862967852d5a 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
@@ -36,21 +36,22 @@ | |||
36 | * How the whole thing works (courtesy of Christoffer Dall): | 36 | * How the whole thing works (courtesy of Christoffer Dall): |
37 | * | 37 | * |
38 | * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if | 38 | * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if |
39 | * something is pending | 39 | * something is pending on the CPU interface. |
40 | * - VGIC pending interrupts are stored on the vgic.irq_state vgic | 40 | * - Interrupts that are pending on the distributor are stored on the |
41 | * bitmap (this bitmap is updated by both user land ioctls and guest | 41 | * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land |
42 | * mmio ops, and other in-kernel peripherals such as the | 42 | * ioctls and guest mmio ops, and other in-kernel peripherals such as the |
43 | * arch. timers) and indicate the 'wire' state. | 43 | * arch. timers). |
44 | * - Every time the bitmap changes, the irq_pending_on_cpu oracle is | 44 | * - Every time the bitmap changes, the irq_pending_on_cpu oracle is |
45 | * recalculated | 45 | * recalculated |
46 | * - To calculate the oracle, we need info for each cpu from | 46 | * - To calculate the oracle, we need info for each cpu from |
47 | * compute_pending_for_cpu, which considers: | 47 | * compute_pending_for_cpu, which considers: |
48 | * - PPI: dist->irq_state & dist->irq_enable | 48 | * - PPI: dist->irq_pending & dist->irq_enable |
49 | * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target | 49 | * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target |
50 | * - irq_spi_target is a 'formatted' version of the GICD_ICFGR | 50 | * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn |
51 | * registers, stored on each vcpu. We only keep one bit of | 51 | * registers, stored on each vcpu. We only keep one bit of |
52 | * information per interrupt, making sure that only one vcpu can | 52 | * information per interrupt, making sure that only one vcpu can |
53 | * accept the interrupt. | 53 | * accept the interrupt. |
54 | * - If any of the above state changes, we must recalculate the oracle. | ||
54 | * - The same is true when injecting an interrupt, except that we only | 55 | * - The same is true when injecting an interrupt, except that we only |
55 | * consider a single interrupt at a time. The irq_spi_cpu array | 56 | * consider a single interrupt at a time. The irq_spi_cpu array |
56 | * contains the target CPU for each SPI. | 57 | * contains the target CPU for each SPI. |
@@ -60,13 +61,18 @@ | |||
60 | * the 'line' again. This is achieved as such: | 61 | * the 'line' again. This is achieved as such: |
61 | * | 62 | * |
62 | * - When a level interrupt is moved onto a vcpu, the corresponding | 63 | * - When a level interrupt is moved onto a vcpu, the corresponding |
63 | * bit in irq_active is set. As long as this bit is set, the line | 64 | * bit in irq_queued is set. As long as this bit is set, the line |
64 | * will be ignored for further interrupts. The interrupt is injected | 65 | * will be ignored for further interrupts. The interrupt is injected |
65 | * into the vcpu with the GICH_LR_EOI bit set (generate a | 66 | * into the vcpu with the GICH_LR_EOI bit set (generate a |
66 | * maintenance interrupt on EOI). | 67 | * maintenance interrupt on EOI). |
67 | * - When the interrupt is EOIed, the maintenance interrupt fires, | 68 | * - When the interrupt is EOIed, the maintenance interrupt fires, |
68 | * and clears the corresponding bit in irq_active. This allow the | 69 | * and clears the corresponding bit in irq_queued. This allows the |
69 | * interrupt line to be sampled again. | 70 | * interrupt line to be sampled again. |
71 | * - Note that level-triggered interrupts can also be set to pending from | ||
72 | * writes to GICD_ISPENDRn and lowering the external input line does not | ||
73 | * cause the interrupt to become inactive in such a situation. | ||
74 | * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become | ||
75 | * inactive as long as the external input line is held high. | ||
70 | */ | 76 | */ |
71 | 77 | ||
72 | #define VGIC_ADDR_UNDEF (-1) | 78 | #define VGIC_ADDR_UNDEF (-1) |
@@ -89,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); | |||
89 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); | 95 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); |
90 | static void vgic_update_state(struct kvm *kvm); | 96 | static void vgic_update_state(struct kvm *kvm); |
91 | static void vgic_kick_vcpus(struct kvm *kvm); | 97 | static void vgic_kick_vcpus(struct kvm *kvm); |
98 | static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi); | ||
92 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); | 99 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); |
93 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); | 100 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); |
94 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); | 101 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); |
@@ -99,10 +106,8 @@ static const struct vgic_ops *vgic_ops; | |||
99 | static const struct vgic_params *vgic; | 106 | static const struct vgic_params *vgic; |
100 | 107 | ||
101 | /* | 108 | /* |
102 | * struct vgic_bitmap contains unions that provide two views of | 109 | * struct vgic_bitmap contains a bitmap made of unsigned longs, but |
103 | * the same data. In one case it is an array of registers of | 110 | * extracts u32s out of them. |
104 | * u32's, and in the other case it is a bitmap of unsigned | ||
105 | * longs. | ||
106 | * | 111 | * |
107 | * This does not work on 64-bit BE systems, because the bitmap access | 112 | * This does not work on 64-bit BE systems, because the bitmap access |
108 | * will store two consecutive 32-bit words with the higher-addressed | 113 | * will store two consecutive 32-bit words with the higher-addressed |
@@ -118,23 +123,45 @@ static const struct vgic_params *vgic; | |||
118 | #define REG_OFFSET_SWIZZLE 0 | 123 | #define REG_OFFSET_SWIZZLE 0 |
119 | #endif | 124 | #endif |
120 | 125 | ||
126 | static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs) | ||
127 | { | ||
128 | int nr_longs; | ||
129 | |||
130 | nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); | ||
131 | |||
132 | b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL); | ||
133 | if (!b->private) | ||
134 | return -ENOMEM; | ||
135 | |||
136 | b->shared = b->private + nr_cpus; | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | static void vgic_free_bitmap(struct vgic_bitmap *b) | ||
142 | { | ||
143 | kfree(b->private); | ||
144 | b->private = NULL; | ||
145 | b->shared = NULL; | ||
146 | } | ||
147 | |||
121 | static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, | 148 | static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, |
122 | int cpuid, u32 offset) | 149 | int cpuid, u32 offset) |
123 | { | 150 | { |
124 | offset >>= 2; | 151 | offset >>= 2; |
125 | if (!offset) | 152 | if (!offset) |
126 | return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); | 153 | return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE; |
127 | else | 154 | else |
128 | return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); | 155 | return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE); |
129 | } | 156 | } |
130 | 157 | ||
131 | static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, | 158 | static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, |
132 | int cpuid, int irq) | 159 | int cpuid, int irq) |
133 | { | 160 | { |
134 | if (irq < VGIC_NR_PRIVATE_IRQS) | 161 | if (irq < VGIC_NR_PRIVATE_IRQS) |
135 | return test_bit(irq, x->percpu[cpuid].reg_ul); | 162 | return test_bit(irq, x->private + cpuid); |
136 | 163 | ||
137 | return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); | 164 | return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); |
138 | } | 165 | } |
139 | 166 | ||
140 | static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, | 167 | static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, |
@@ -143,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, | |||
143 | unsigned long *reg; | 170 | unsigned long *reg; |
144 | 171 | ||
145 | if (irq < VGIC_NR_PRIVATE_IRQS) { | 172 | if (irq < VGIC_NR_PRIVATE_IRQS) { |
146 | reg = x->percpu[cpuid].reg_ul; | 173 | reg = x->private + cpuid; |
147 | } else { | 174 | } else { |
148 | reg = x->shared.reg_ul; | 175 | reg = x->shared; |
149 | irq -= VGIC_NR_PRIVATE_IRQS; | 176 | irq -= VGIC_NR_PRIVATE_IRQS; |
150 | } | 177 | } |
151 | 178 | ||
@@ -157,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, | |||
157 | 184 | ||
158 | static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) | 185 | static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) |
159 | { | 186 | { |
160 | if (unlikely(cpuid >= VGIC_MAX_CPUS)) | 187 | return x->private + cpuid; |
161 | return NULL; | ||
162 | return x->percpu[cpuid].reg_ul; | ||
163 | } | 188 | } |
164 | 189 | ||
165 | static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) | 190 | static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) |
166 | { | 191 | { |
167 | return x->shared.reg_ul; | 192 | return x->shared; |
193 | } | ||
194 | |||
195 | static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs) | ||
196 | { | ||
197 | int size; | ||
198 | |||
199 | size = nr_cpus * VGIC_NR_PRIVATE_IRQS; | ||
200 | size += nr_irqs - VGIC_NR_PRIVATE_IRQS; | ||
201 | |||
202 | x->private = kzalloc(size, GFP_KERNEL); | ||
203 | if (!x->private) | ||
204 | return -ENOMEM; | ||
205 | |||
206 | x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32); | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | static void vgic_free_bytemap(struct vgic_bytemap *b) | ||
211 | { | ||
212 | kfree(b->private); | ||
213 | b->private = NULL; | ||
214 | b->shared = NULL; | ||
168 | } | 215 | } |
169 | 216 | ||
170 | static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) | 217 | static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) |
171 | { | 218 | { |
172 | offset >>= 2; | 219 | u32 *reg; |
173 | BUG_ON(offset > (VGIC_NR_IRQS / 4)); | 220 | |
174 | if (offset < 8) | 221 | if (offset < VGIC_NR_PRIVATE_IRQS) { |
175 | return x->percpu[cpuid] + offset; | 222 | reg = x->private; |
176 | else | 223 | offset += cpuid * VGIC_NR_PRIVATE_IRQS; |
177 | return x->shared + offset - 8; | 224 | } else { |
225 | reg = x->shared; | ||
226 | offset -= VGIC_NR_PRIVATE_IRQS; | ||
227 | } | ||
228 | |||
229 | return reg + (offset / sizeof(u32)); | ||
178 | } | 230 | } |
179 | 231 | ||
180 | #define VGIC_CFG_LEVEL 0 | 232 | #define VGIC_CFG_LEVEL 0 |
@@ -196,46 +248,81 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) | |||
196 | return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); | 248 | return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); |
197 | } | 249 | } |
198 | 250 | ||
199 | static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) | 251 | static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) |
252 | { | ||
253 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
254 | |||
255 | return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); | ||
256 | } | ||
257 | |||
258 | static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) | ||
259 | { | ||
260 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
261 | |||
262 | vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1); | ||
263 | } | ||
264 | |||
265 | static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) | ||
266 | { | ||
267 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
268 | |||
269 | vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); | ||
270 | } | ||
271 | |||
272 | static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) | ||
200 | { | 273 | { |
201 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 274 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
202 | 275 | ||
203 | return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); | 276 | return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq); |
204 | } | 277 | } |
205 | 278 | ||
206 | static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) | 279 | static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq) |
207 | { | 280 | { |
208 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 281 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
209 | 282 | ||
210 | vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); | 283 | vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1); |
211 | } | 284 | } |
212 | 285 | ||
213 | static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) | 286 | static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq) |
214 | { | 287 | { |
215 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 288 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
216 | 289 | ||
217 | vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); | 290 | vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0); |
291 | } | ||
292 | |||
293 | static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq) | ||
294 | { | ||
295 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
296 | |||
297 | return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq); | ||
298 | } | ||
299 | |||
300 | static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) | ||
301 | { | ||
302 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
303 | |||
304 | vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); | ||
218 | } | 305 | } |
219 | 306 | ||
220 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) | 307 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) |
221 | { | 308 | { |
222 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 309 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
223 | 310 | ||
224 | return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); | 311 | return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); |
225 | } | 312 | } |
226 | 313 | ||
227 | static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) | 314 | static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) |
228 | { | 315 | { |
229 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 316 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
230 | 317 | ||
231 | vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); | 318 | vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); |
232 | } | 319 | } |
233 | 320 | ||
234 | static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) | 321 | static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) |
235 | { | 322 | { |
236 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 323 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
237 | 324 | ||
238 | vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); | 325 | vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0); |
239 | } | 326 | } |
240 | 327 | ||
241 | static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) | 328 | static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) |
@@ -256,6 +343,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) | |||
256 | vcpu->arch.vgic_cpu.pending_shared); | 343 | vcpu->arch.vgic_cpu.pending_shared); |
257 | } | 344 | } |
258 | 345 | ||
346 | static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) | ||
347 | { | ||
348 | return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); | ||
349 | } | ||
350 | |||
259 | static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) | 351 | static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) |
260 | { | 352 | { |
261 | return le32_to_cpu(*((u32 *)mmio->data)) & mask; | 353 | return le32_to_cpu(*((u32 *)mmio->data)) & mask; |
@@ -347,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, | |||
347 | 439 | ||
348 | case 4: /* GICD_TYPER */ | 440 | case 4: /* GICD_TYPER */ |
349 | reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; | 441 | reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; |
350 | reg |= (VGIC_NR_IRQS >> 5) - 1; | 442 | reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; |
351 | vgic_reg_access(mmio, ®, word_offset, | 443 | vgic_reg_access(mmio, ®, word_offset, |
352 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | 444 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); |
353 | break; | 445 | break; |
@@ -409,11 +501,33 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, | |||
409 | struct kvm_exit_mmio *mmio, | 501 | struct kvm_exit_mmio *mmio, |
410 | phys_addr_t offset) | 502 | phys_addr_t offset) |
411 | { | 503 | { |
412 | u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, | 504 | u32 *reg, orig; |
413 | vcpu->vcpu_id, offset); | 505 | u32 level_mask; |
506 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
507 | |||
508 | reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset); | ||
509 | level_mask = (~(*reg)); | ||
510 | |||
511 | /* Mark both level and edge triggered irqs as pending */ | ||
512 | reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); | ||
513 | orig = *reg; | ||
414 | vgic_reg_access(mmio, reg, offset, | 514 | vgic_reg_access(mmio, reg, offset, |
415 | ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); | 515 | ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); |
516 | |||
416 | if (mmio->is_write) { | 517 | if (mmio->is_write) { |
518 | /* Set the soft-pending flag only for level-triggered irqs */ | ||
519 | reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, | ||
520 | vcpu->vcpu_id, offset); | ||
521 | vgic_reg_access(mmio, reg, offset, | ||
522 | ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); | ||
523 | *reg &= level_mask; | ||
524 | |||
525 | /* Ignore writes to SGIs */ | ||
526 | if (offset < 2) { | ||
527 | *reg &= ~0xffff; | ||
528 | *reg |= orig & 0xffff; | ||
529 | } | ||
530 | |||
417 | vgic_update_state(vcpu->kvm); | 531 | vgic_update_state(vcpu->kvm); |
418 | return true; | 532 | return true; |
419 | } | 533 | } |
@@ -425,11 +539,34 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, | |||
425 | struct kvm_exit_mmio *mmio, | 539 | struct kvm_exit_mmio *mmio, |
426 | phys_addr_t offset) | 540 | phys_addr_t offset) |
427 | { | 541 | { |
428 | u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, | 542 | u32 *level_active; |
429 | vcpu->vcpu_id, offset); | 543 | u32 *reg, orig; |
544 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
545 | |||
546 | reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); | ||
547 | orig = *reg; | ||
430 | vgic_reg_access(mmio, reg, offset, | 548 | vgic_reg_access(mmio, reg, offset, |
431 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | 549 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); |
432 | if (mmio->is_write) { | 550 | if (mmio->is_write) { |
551 | /* Re-set level triggered level-active interrupts */ | ||
552 | level_active = vgic_bitmap_get_reg(&dist->irq_level, | ||
553 | vcpu->vcpu_id, offset); | ||
554 | reg = vgic_bitmap_get_reg(&dist->irq_pending, | ||
555 | vcpu->vcpu_id, offset); | ||
556 | *reg |= *level_active; | ||
557 | |||
558 | /* Ignore writes to SGIs */ | ||
559 | if (offset < 2) { | ||
560 | *reg &= ~0xffff; | ||
561 | *reg |= orig & 0xffff; | ||
562 | } | ||
563 | |||
564 | /* Clear soft-pending flags */ | ||
565 | reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, | ||
566 | vcpu->vcpu_id, offset); | ||
567 | vgic_reg_access(mmio, reg, offset, | ||
568 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | ||
569 | |||
433 | vgic_update_state(vcpu->kvm); | 570 | vgic_update_state(vcpu->kvm); |
434 | return true; | 571 | return true; |
435 | } | 572 | } |
@@ -651,9 +788,9 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | |||
651 | * is fine, then we are only setting a few bits that were | 788 | * is fine, then we are only setting a few bits that were |
652 | * already set. | 789 | * already set. |
653 | */ | 790 | */ |
654 | vgic_dist_irq_set(vcpu, lr.irq); | 791 | vgic_dist_irq_set_pending(vcpu, lr.irq); |
655 | if (lr.irq < VGIC_NR_SGIS) | 792 | if (lr.irq < VGIC_NR_SGIS) |
656 | dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; | 793 | *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source; |
657 | lr.state &= ~LR_STATE_PENDING; | 794 | lr.state &= ~LR_STATE_PENDING; |
658 | vgic_set_lr(vcpu, i, lr); | 795 | vgic_set_lr(vcpu, i, lr); |
659 | 796 | ||
@@ -662,8 +799,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | |||
662 | * active), then the LR does not hold any useful info and can | 799 | * active), then the LR does not hold any useful info and can |
663 | * be marked as free for other use. | 800 | * be marked as free for other use. |
664 | */ | 801 | */ |
665 | if (!(lr.state & LR_STATE_MASK)) | 802 | if (!(lr.state & LR_STATE_MASK)) { |
666 | vgic_retire_lr(i, lr.irq, vcpu); | 803 | vgic_retire_lr(i, lr.irq, vcpu); |
804 | vgic_irq_clear_queued(vcpu, lr.irq); | ||
805 | } | ||
667 | 806 | ||
668 | /* Finally update the VGIC state. */ | 807 | /* Finally update the VGIC state. */ |
669 | vgic_update_state(vcpu->kvm); | 808 | vgic_update_state(vcpu->kvm); |
@@ -677,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | |||
677 | { | 816 | { |
678 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 817 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
679 | int sgi; | 818 | int sgi; |
680 | int min_sgi = (offset & ~0x3) * 4; | 819 | int min_sgi = (offset & ~0x3); |
681 | int max_sgi = min_sgi + 3; | 820 | int max_sgi = min_sgi + 3; |
682 | int vcpu_id = vcpu->vcpu_id; | 821 | int vcpu_id = vcpu->vcpu_id; |
683 | u32 reg = 0; | 822 | u32 reg = 0; |
@@ -685,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | |||
685 | /* Copy source SGIs from distributor side */ | 824 | /* Copy source SGIs from distributor side */ |
686 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { | 825 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { |
687 | int shift = 8 * (sgi - min_sgi); | 826 | int shift = 8 * (sgi - min_sgi); |
688 | reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; | 827 | reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift; |
689 | } | 828 | } |
690 | 829 | ||
691 | mmio_data_write(mmio, ~0, reg); | 830 | mmio_data_write(mmio, ~0, reg); |
@@ -698,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | |||
698 | { | 837 | { |
699 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 838 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
700 | int sgi; | 839 | int sgi; |
701 | int min_sgi = (offset & ~0x3) * 4; | 840 | int min_sgi = (offset & ~0x3); |
702 | int max_sgi = min_sgi + 3; | 841 | int max_sgi = min_sgi + 3; |
703 | int vcpu_id = vcpu->vcpu_id; | 842 | int vcpu_id = vcpu->vcpu_id; |
704 | u32 reg; | 843 | u32 reg; |
@@ -709,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | |||
709 | /* Clear pending SGIs on the distributor */ | 848 | /* Clear pending SGIs on the distributor */ |
710 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { | 849 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { |
711 | u8 mask = reg >> (8 * (sgi - min_sgi)); | 850 | u8 mask = reg >> (8 * (sgi - min_sgi)); |
851 | u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); | ||
712 | if (set) { | 852 | if (set) { |
713 | if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) | 853 | if ((*src & mask) != mask) |
714 | updated = true; | 854 | updated = true; |
715 | dist->irq_sgi_sources[vcpu_id][sgi] |= mask; | 855 | *src |= mask; |
716 | } else { | 856 | } else { |
717 | if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) | 857 | if (*src & mask) |
718 | updated = true; | 858 | updated = true; |
719 | dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; | 859 | *src &= ~mask; |
720 | } | 860 | } |
721 | } | 861 | } |
722 | 862 | ||
@@ -755,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, | |||
755 | struct mmio_range { | 895 | struct mmio_range { |
756 | phys_addr_t base; | 896 | phys_addr_t base; |
757 | unsigned long len; | 897 | unsigned long len; |
898 | int bits_per_irq; | ||
758 | bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, | 899 | bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, |
759 | phys_addr_t offset); | 900 | phys_addr_t offset); |
760 | }; | 901 | }; |
@@ -763,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = { | |||
763 | { | 904 | { |
764 | .base = GIC_DIST_CTRL, | 905 | .base = GIC_DIST_CTRL, |
765 | .len = 12, | 906 | .len = 12, |
907 | .bits_per_irq = 0, | ||
766 | .handle_mmio = handle_mmio_misc, | 908 | .handle_mmio = handle_mmio_misc, |
767 | }, | 909 | }, |
768 | { | 910 | { |
769 | .base = GIC_DIST_IGROUP, | 911 | .base = GIC_DIST_IGROUP, |
770 | .len = VGIC_NR_IRQS / 8, | 912 | .len = VGIC_MAX_IRQS / 8, |
913 | .bits_per_irq = 1, | ||
771 | .handle_mmio = handle_mmio_raz_wi, | 914 | .handle_mmio = handle_mmio_raz_wi, |
772 | }, | 915 | }, |
773 | { | 916 | { |
774 | .base = GIC_DIST_ENABLE_SET, | 917 | .base = GIC_DIST_ENABLE_SET, |
775 | .len = VGIC_NR_IRQS / 8, | 918 | .len = VGIC_MAX_IRQS / 8, |
919 | .bits_per_irq = 1, | ||
776 | .handle_mmio = handle_mmio_set_enable_reg, | 920 | .handle_mmio = handle_mmio_set_enable_reg, |
777 | }, | 921 | }, |
778 | { | 922 | { |
779 | .base = GIC_DIST_ENABLE_CLEAR, | 923 | .base = GIC_DIST_ENABLE_CLEAR, |
780 | .len = VGIC_NR_IRQS / 8, | 924 | .len = VGIC_MAX_IRQS / 8, |
925 | .bits_per_irq = 1, | ||
781 | .handle_mmio = handle_mmio_clear_enable_reg, | 926 | .handle_mmio = handle_mmio_clear_enable_reg, |
782 | }, | 927 | }, |
783 | { | 928 | { |
784 | .base = GIC_DIST_PENDING_SET, | 929 | .base = GIC_DIST_PENDING_SET, |
785 | .len = VGIC_NR_IRQS / 8, | 930 | .len = VGIC_MAX_IRQS / 8, |
931 | .bits_per_irq = 1, | ||
786 | .handle_mmio = handle_mmio_set_pending_reg, | 932 | .handle_mmio = handle_mmio_set_pending_reg, |
787 | }, | 933 | }, |
788 | { | 934 | { |
789 | .base = GIC_DIST_PENDING_CLEAR, | 935 | .base = GIC_DIST_PENDING_CLEAR, |
790 | .len = VGIC_NR_IRQS / 8, | 936 | .len = VGIC_MAX_IRQS / 8, |
937 | .bits_per_irq = 1, | ||
791 | .handle_mmio = handle_mmio_clear_pending_reg, | 938 | .handle_mmio = handle_mmio_clear_pending_reg, |
792 | }, | 939 | }, |
793 | { | 940 | { |
794 | .base = GIC_DIST_ACTIVE_SET, | 941 | .base = GIC_DIST_ACTIVE_SET, |
795 | .len = VGIC_NR_IRQS / 8, | 942 | .len = VGIC_MAX_IRQS / 8, |
943 | .bits_per_irq = 1, | ||
796 | .handle_mmio = handle_mmio_raz_wi, | 944 | .handle_mmio = handle_mmio_raz_wi, |
797 | }, | 945 | }, |
798 | { | 946 | { |
799 | .base = GIC_DIST_ACTIVE_CLEAR, | 947 | .base = GIC_DIST_ACTIVE_CLEAR, |
800 | .len = VGIC_NR_IRQS / 8, | 948 | .len = VGIC_MAX_IRQS / 8, |
949 | .bits_per_irq = 1, | ||
801 | .handle_mmio = handle_mmio_raz_wi, | 950 | .handle_mmio = handle_mmio_raz_wi, |
802 | }, | 951 | }, |
803 | { | 952 | { |
804 | .base = GIC_DIST_PRI, | 953 | .base = GIC_DIST_PRI, |
805 | .len = VGIC_NR_IRQS, | 954 | .len = VGIC_MAX_IRQS, |
955 | .bits_per_irq = 8, | ||
806 | .handle_mmio = handle_mmio_priority_reg, | 956 | .handle_mmio = handle_mmio_priority_reg, |
807 | }, | 957 | }, |
808 | { | 958 | { |
809 | .base = GIC_DIST_TARGET, | 959 | .base = GIC_DIST_TARGET, |
810 | .len = VGIC_NR_IRQS, | 960 | .len = VGIC_MAX_IRQS, |
961 | .bits_per_irq = 8, | ||
811 | .handle_mmio = handle_mmio_target_reg, | 962 | .handle_mmio = handle_mmio_target_reg, |
812 | }, | 963 | }, |
813 | { | 964 | { |
814 | .base = GIC_DIST_CONFIG, | 965 | .base = GIC_DIST_CONFIG, |
815 | .len = VGIC_NR_IRQS / 4, | 966 | .len = VGIC_MAX_IRQS / 4, |
967 | .bits_per_irq = 2, | ||
816 | .handle_mmio = handle_mmio_cfg_reg, | 968 | .handle_mmio = handle_mmio_cfg_reg, |
817 | }, | 969 | }, |
818 | { | 970 | { |
@@ -850,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges, | |||
850 | return NULL; | 1002 | return NULL; |
851 | } | 1003 | } |
852 | 1004 | ||
1005 | static bool vgic_validate_access(const struct vgic_dist *dist, | ||
1006 | const struct mmio_range *range, | ||
1007 | unsigned long offset) | ||
1008 | { | ||
1009 | int irq; | ||
1010 | |||
1011 | if (!range->bits_per_irq) | ||
1012 | return true; /* Not an irq-based access */ | ||
1013 | |||
1014 | irq = offset * 8 / range->bits_per_irq; | ||
1015 | if (irq >= dist->nr_irqs) | ||
1016 | return false; | ||
1017 | |||
1018 | return true; | ||
1019 | } | ||
1020 | |||
853 | /** | 1021 | /** |
854 | * vgic_handle_mmio - handle an in-kernel MMIO access | 1022 | * vgic_handle_mmio - handle an in-kernel MMIO access |
855 | * @vcpu: pointer to the vcpu performing the access | 1023 | * @vcpu: pointer to the vcpu performing the access |
@@ -889,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
889 | 1057 | ||
890 | spin_lock(&vcpu->kvm->arch.vgic.lock); | 1058 | spin_lock(&vcpu->kvm->arch.vgic.lock); |
891 | offset = mmio->phys_addr - range->base - base; | 1059 | offset = mmio->phys_addr - range->base - base; |
892 | updated_state = range->handle_mmio(vcpu, mmio, offset); | 1060 | if (vgic_validate_access(dist, range, offset)) { |
1061 | updated_state = range->handle_mmio(vcpu, mmio, offset); | ||
1062 | } else { | ||
1063 | vgic_reg_access(mmio, NULL, offset, | ||
1064 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
1065 | updated_state = false; | ||
1066 | } | ||
893 | spin_unlock(&vcpu->kvm->arch.vgic.lock); | 1067 | spin_unlock(&vcpu->kvm->arch.vgic.lock); |
894 | kvm_prepare_mmio(run, mmio); | 1068 | kvm_prepare_mmio(run, mmio); |
895 | kvm_handle_mmio_return(vcpu, run); | 1069 | kvm_handle_mmio_return(vcpu, run); |
@@ -900,6 +1074,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
900 | return true; | 1074 | return true; |
901 | } | 1075 | } |
902 | 1076 | ||
1077 | static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) | ||
1078 | { | ||
1079 | return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; | ||
1080 | } | ||
1081 | |||
903 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) | 1082 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) |
904 | { | 1083 | { |
905 | struct kvm *kvm = vcpu->kvm; | 1084 | struct kvm *kvm = vcpu->kvm; |
@@ -932,8 +1111,8 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) | |||
932 | kvm_for_each_vcpu(c, vcpu, kvm) { | 1111 | kvm_for_each_vcpu(c, vcpu, kvm) { |
933 | if (target_cpus & 1) { | 1112 | if (target_cpus & 1) { |
934 | /* Flag the SGI as pending */ | 1113 | /* Flag the SGI as pending */ |
935 | vgic_dist_irq_set(vcpu, sgi); | 1114 | vgic_dist_irq_set_pending(vcpu, sgi); |
936 | dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; | 1115 | *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; |
937 | kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); | 1116 | kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); |
938 | } | 1117 | } |
939 | 1118 | ||
@@ -941,32 +1120,38 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) | |||
941 | } | 1120 | } |
942 | } | 1121 | } |
943 | 1122 | ||
1123 | static int vgic_nr_shared_irqs(struct vgic_dist *dist) | ||
1124 | { | ||
1125 | return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; | ||
1126 | } | ||
1127 | |||
944 | static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) | 1128 | static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) |
945 | { | 1129 | { |
946 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1130 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
947 | unsigned long *pending, *enabled, *pend_percpu, *pend_shared; | 1131 | unsigned long *pending, *enabled, *pend_percpu, *pend_shared; |
948 | unsigned long pending_private, pending_shared; | 1132 | unsigned long pending_private, pending_shared; |
1133 | int nr_shared = vgic_nr_shared_irqs(dist); | ||
949 | int vcpu_id; | 1134 | int vcpu_id; |
950 | 1135 | ||
951 | vcpu_id = vcpu->vcpu_id; | 1136 | vcpu_id = vcpu->vcpu_id; |
952 | pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; | 1137 | pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; |
953 | pend_shared = vcpu->arch.vgic_cpu.pending_shared; | 1138 | pend_shared = vcpu->arch.vgic_cpu.pending_shared; |
954 | 1139 | ||
955 | pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); | 1140 | pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); |
956 | enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); | 1141 | enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); |
957 | bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); | 1142 | bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); |
958 | 1143 | ||
959 | pending = vgic_bitmap_get_shared_map(&dist->irq_state); | 1144 | pending = vgic_bitmap_get_shared_map(&dist->irq_pending); |
960 | enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); | 1145 | enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); |
961 | bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); | 1146 | bitmap_and(pend_shared, pending, enabled, nr_shared); |
962 | bitmap_and(pend_shared, pend_shared, | 1147 | bitmap_and(pend_shared, pend_shared, |
963 | vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), | 1148 | vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), |
964 | VGIC_NR_SHARED_IRQS); | 1149 | nr_shared); |
965 | 1150 | ||
966 | pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); | 1151 | pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); |
967 | pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); | 1152 | pending_shared = find_first_bit(pend_shared, nr_shared); |
968 | return (pending_private < VGIC_NR_PRIVATE_IRQS || | 1153 | return (pending_private < VGIC_NR_PRIVATE_IRQS || |
969 | pending_shared < VGIC_NR_SHARED_IRQS); | 1154 | pending_shared < vgic_nr_shared_irqs(dist)); |
970 | } | 1155 | } |
971 | 1156 | ||
972 | /* | 1157 | /* |
@@ -980,14 +1165,14 @@ static void vgic_update_state(struct kvm *kvm) | |||
980 | int c; | 1165 | int c; |
981 | 1166 | ||
982 | if (!dist->enabled) { | 1167 | if (!dist->enabled) { |
983 | set_bit(0, &dist->irq_pending_on_cpu); | 1168 | set_bit(0, dist->irq_pending_on_cpu); |
984 | return; | 1169 | return; |
985 | } | 1170 | } |
986 | 1171 | ||
987 | kvm_for_each_vcpu(c, vcpu, kvm) { | 1172 | kvm_for_each_vcpu(c, vcpu, kvm) { |
988 | if (compute_pending_for_cpu(vcpu)) { | 1173 | if (compute_pending_for_cpu(vcpu)) { |
989 | pr_debug("CPU%d has pending interrupts\n", c); | 1174 | pr_debug("CPU%d has pending interrupts\n", c); |
990 | set_bit(c, &dist->irq_pending_on_cpu); | 1175 | set_bit(c, dist->irq_pending_on_cpu); |
991 | } | 1176 | } |
992 | } | 1177 | } |
993 | } | 1178 | } |
@@ -1079,8 +1264,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) | |||
1079 | 1264 | ||
1080 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { | 1265 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { |
1081 | vgic_retire_lr(lr, vlr.irq, vcpu); | 1266 | vgic_retire_lr(lr, vlr.irq, vcpu); |
1082 | if (vgic_irq_is_active(vcpu, vlr.irq)) | 1267 | if (vgic_irq_is_queued(vcpu, vlr.irq)) |
1083 | vgic_irq_clear_active(vcpu, vlr.irq); | 1268 | vgic_irq_clear_queued(vcpu, vlr.irq); |
1084 | } | 1269 | } |
1085 | } | 1270 | } |
1086 | } | 1271 | } |
@@ -1092,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) | |||
1092 | static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | 1277 | static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) |
1093 | { | 1278 | { |
1094 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 1279 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; |
1280 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
1095 | struct vgic_lr vlr; | 1281 | struct vgic_lr vlr; |
1096 | int lr; | 1282 | int lr; |
1097 | 1283 | ||
1098 | /* Sanitize the input... */ | 1284 | /* Sanitize the input... */ |
1099 | BUG_ON(sgi_source_id & ~7); | 1285 | BUG_ON(sgi_source_id & ~7); |
1100 | BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); | 1286 | BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); |
1101 | BUG_ON(irq >= VGIC_NR_IRQS); | 1287 | BUG_ON(irq >= dist->nr_irqs); |
1102 | 1288 | ||
1103 | kvm_debug("Queue IRQ%d\n", irq); | 1289 | kvm_debug("Queue IRQ%d\n", irq); |
1104 | 1290 | ||
@@ -1144,14 +1330,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) | |||
1144 | int vcpu_id = vcpu->vcpu_id; | 1330 | int vcpu_id = vcpu->vcpu_id; |
1145 | int c; | 1331 | int c; |
1146 | 1332 | ||
1147 | sources = dist->irq_sgi_sources[vcpu_id][irq]; | 1333 | sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); |
1148 | 1334 | ||
1149 | for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { | 1335 | for_each_set_bit(c, &sources, dist->nr_cpus) { |
1150 | if (vgic_queue_irq(vcpu, c, irq)) | 1336 | if (vgic_queue_irq(vcpu, c, irq)) |
1151 | clear_bit(c, &sources); | 1337 | clear_bit(c, &sources); |
1152 | } | 1338 | } |
1153 | 1339 | ||
1154 | dist->irq_sgi_sources[vcpu_id][irq] = sources; | 1340 | *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; |
1155 | 1341 | ||
1156 | /* | 1342 | /* |
1157 | * If the sources bitmap has been cleared it means that we | 1343 | * If the sources bitmap has been cleared it means that we |
@@ -1160,7 +1346,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) | |||
1160 | * our emulated gic and can get rid of them. | 1346 | * our emulated gic and can get rid of them. |
1161 | */ | 1347 | */ |
1162 | if (!sources) { | 1348 | if (!sources) { |
1163 | vgic_dist_irq_clear(vcpu, irq); | 1349 | vgic_dist_irq_clear_pending(vcpu, irq); |
1164 | vgic_cpu_irq_clear(vcpu, irq); | 1350 | vgic_cpu_irq_clear(vcpu, irq); |
1165 | return true; | 1351 | return true; |
1166 | } | 1352 | } |
@@ -1170,15 +1356,15 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) | |||
1170 | 1356 | ||
1171 | static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) | 1357 | static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) |
1172 | { | 1358 | { |
1173 | if (vgic_irq_is_active(vcpu, irq)) | 1359 | if (!vgic_can_sample_irq(vcpu, irq)) |
1174 | return true; /* level interrupt, already queued */ | 1360 | return true; /* level interrupt, already queued */ |
1175 | 1361 | ||
1176 | if (vgic_queue_irq(vcpu, 0, irq)) { | 1362 | if (vgic_queue_irq(vcpu, 0, irq)) { |
1177 | if (vgic_irq_is_edge(vcpu, irq)) { | 1363 | if (vgic_irq_is_edge(vcpu, irq)) { |
1178 | vgic_dist_irq_clear(vcpu, irq); | 1364 | vgic_dist_irq_clear_pending(vcpu, irq); |
1179 | vgic_cpu_irq_clear(vcpu, irq); | 1365 | vgic_cpu_irq_clear(vcpu, irq); |
1180 | } else { | 1366 | } else { |
1181 | vgic_irq_set_active(vcpu, irq); | 1367 | vgic_irq_set_queued(vcpu, irq); |
1182 | } | 1368 | } |
1183 | 1369 | ||
1184 | return true; | 1370 | return true; |
@@ -1223,7 +1409,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) | |||
1223 | } | 1409 | } |
1224 | 1410 | ||
1225 | /* SPIs */ | 1411 | /* SPIs */ |
1226 | for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { | 1412 | for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { |
1227 | if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) | 1413 | if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) |
1228 | overflow = 1; | 1414 | overflow = 1; |
1229 | } | 1415 | } |
@@ -1239,7 +1425,7 @@ epilog: | |||
1239 | * us. Claim we don't have anything pending. We'll | 1425 | * us. Claim we don't have anything pending. We'll |
1240 | * adjust that if needed while exiting. | 1426 | * adjust that if needed while exiting. |
1241 | */ | 1427 | */ |
1242 | clear_bit(vcpu_id, &dist->irq_pending_on_cpu); | 1428 | clear_bit(vcpu_id, dist->irq_pending_on_cpu); |
1243 | } | 1429 | } |
1244 | } | 1430 | } |
1245 | 1431 | ||
@@ -1261,17 +1447,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | |||
1261 | 1447 | ||
1262 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { | 1448 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { |
1263 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); | 1449 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
1450 | WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); | ||
1264 | 1451 | ||
1265 | vgic_irq_clear_active(vcpu, vlr.irq); | 1452 | vgic_irq_clear_queued(vcpu, vlr.irq); |
1266 | WARN_ON(vlr.state & LR_STATE_MASK); | 1453 | WARN_ON(vlr.state & LR_STATE_MASK); |
1267 | vlr.state = 0; | 1454 | vlr.state = 0; |
1268 | vgic_set_lr(vcpu, lr, vlr); | 1455 | vgic_set_lr(vcpu, lr, vlr); |
1269 | 1456 | ||
1457 | /* | ||
1458 | * If the IRQ was EOIed it was also ACKed and we we | ||
1459 | * therefore assume we can clear the soft pending | ||
1460 | * state (should it had been set) for this interrupt. | ||
1461 | * | ||
1462 | * Note: if the IRQ soft pending state was set after | ||
1463 | * the IRQ was acked, it actually shouldn't be | ||
1464 | * cleared, but we have no way of knowing that unless | ||
1465 | * we start trapping ACKs when the soft-pending state | ||
1466 | * is set. | ||
1467 | */ | ||
1468 | vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); | ||
1469 | |||
1270 | /* Any additional pending interrupt? */ | 1470 | /* Any additional pending interrupt? */ |
1271 | if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { | 1471 | if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { |
1272 | vgic_cpu_irq_set(vcpu, vlr.irq); | 1472 | vgic_cpu_irq_set(vcpu, vlr.irq); |
1273 | level_pending = true; | 1473 | level_pending = true; |
1274 | } else { | 1474 | } else { |
1475 | vgic_dist_irq_clear_pending(vcpu, vlr.irq); | ||
1275 | vgic_cpu_irq_clear(vcpu, vlr.irq); | 1476 | vgic_cpu_irq_clear(vcpu, vlr.irq); |
1276 | } | 1477 | } |
1277 | 1478 | ||
@@ -1315,14 +1516,14 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | |||
1315 | 1516 | ||
1316 | vlr = vgic_get_lr(vcpu, lr); | 1517 | vlr = vgic_get_lr(vcpu, lr); |
1317 | 1518 | ||
1318 | BUG_ON(vlr.irq >= VGIC_NR_IRQS); | 1519 | BUG_ON(vlr.irq >= dist->nr_irqs); |
1319 | vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; | 1520 | vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; |
1320 | } | 1521 | } |
1321 | 1522 | ||
1322 | /* Check if we still have something up our sleeve... */ | 1523 | /* Check if we still have something up our sleeve... */ |
1323 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); | 1524 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); |
1324 | if (level_pending || pending < vgic->nr_lr) | 1525 | if (level_pending || pending < vgic->nr_lr) |
1325 | set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); | 1526 | set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); |
1326 | } | 1527 | } |
1327 | 1528 | ||
1328 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) | 1529 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) |
@@ -1356,7 +1557,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) | |||
1356 | if (!irqchip_in_kernel(vcpu->kvm)) | 1557 | if (!irqchip_in_kernel(vcpu->kvm)) |
1357 | return 0; | 1558 | return 0; |
1358 | 1559 | ||
1359 | return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); | 1560 | return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); |
1360 | } | 1561 | } |
1361 | 1562 | ||
1362 | static void vgic_kick_vcpus(struct kvm *kvm) | 1563 | static void vgic_kick_vcpus(struct kvm *kvm) |
@@ -1376,34 +1577,36 @@ static void vgic_kick_vcpus(struct kvm *kvm) | |||
1376 | 1577 | ||
1377 | static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) | 1578 | static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) |
1378 | { | 1579 | { |
1379 | int is_edge = vgic_irq_is_edge(vcpu, irq); | 1580 | int edge_triggered = vgic_irq_is_edge(vcpu, irq); |
1380 | int state = vgic_dist_irq_is_pending(vcpu, irq); | ||
1381 | 1581 | ||
1382 | /* | 1582 | /* |
1383 | * Only inject an interrupt if: | 1583 | * Only inject an interrupt if: |
1384 | * - edge triggered and we have a rising edge | 1584 | * - edge triggered and we have a rising edge |
1385 | * - level triggered and we change level | 1585 | * - level triggered and we change level |
1386 | */ | 1586 | */ |
1387 | if (is_edge) | 1587 | if (edge_triggered) { |
1588 | int state = vgic_dist_irq_is_pending(vcpu, irq); | ||
1388 | return level > state; | 1589 | return level > state; |
1389 | else | 1590 | } else { |
1591 | int state = vgic_dist_irq_get_level(vcpu, irq); | ||
1390 | return level != state; | 1592 | return level != state; |
1593 | } | ||
1391 | } | 1594 | } |
1392 | 1595 | ||
1393 | static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, | 1596 | static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, |
1394 | unsigned int irq_num, bool level) | 1597 | unsigned int irq_num, bool level) |
1395 | { | 1598 | { |
1396 | struct vgic_dist *dist = &kvm->arch.vgic; | 1599 | struct vgic_dist *dist = &kvm->arch.vgic; |
1397 | struct kvm_vcpu *vcpu; | 1600 | struct kvm_vcpu *vcpu; |
1398 | int is_edge, is_level; | 1601 | int edge_triggered, level_triggered; |
1399 | int enabled; | 1602 | int enabled; |
1400 | bool ret = true; | 1603 | bool ret = true; |
1401 | 1604 | ||
1402 | spin_lock(&dist->lock); | 1605 | spin_lock(&dist->lock); |
1403 | 1606 | ||
1404 | vcpu = kvm_get_vcpu(kvm, cpuid); | 1607 | vcpu = kvm_get_vcpu(kvm, cpuid); |
1405 | is_edge = vgic_irq_is_edge(vcpu, irq_num); | 1608 | edge_triggered = vgic_irq_is_edge(vcpu, irq_num); |
1406 | is_level = !is_edge; | 1609 | level_triggered = !edge_triggered; |
1407 | 1610 | ||
1408 | if (!vgic_validate_injection(vcpu, irq_num, level)) { | 1611 | if (!vgic_validate_injection(vcpu, irq_num, level)) { |
1409 | ret = false; | 1612 | ret = false; |
@@ -1417,10 +1620,19 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, | |||
1417 | 1620 | ||
1418 | kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); | 1621 | kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); |
1419 | 1622 | ||
1420 | if (level) | 1623 | if (level) { |
1421 | vgic_dist_irq_set(vcpu, irq_num); | 1624 | if (level_triggered) |
1422 | else | 1625 | vgic_dist_irq_set_level(vcpu, irq_num); |
1423 | vgic_dist_irq_clear(vcpu, irq_num); | 1626 | vgic_dist_irq_set_pending(vcpu, irq_num); |
1627 | } else { | ||
1628 | if (level_triggered) { | ||
1629 | vgic_dist_irq_clear_level(vcpu, irq_num); | ||
1630 | if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) | ||
1631 | vgic_dist_irq_clear_pending(vcpu, irq_num); | ||
1632 | } else { | ||
1633 | vgic_dist_irq_clear_pending(vcpu, irq_num); | ||
1634 | } | ||
1635 | } | ||
1424 | 1636 | ||
1425 | enabled = vgic_irq_is_enabled(vcpu, irq_num); | 1637 | enabled = vgic_irq_is_enabled(vcpu, irq_num); |
1426 | 1638 | ||
@@ -1429,7 +1641,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, | |||
1429 | goto out; | 1641 | goto out; |
1430 | } | 1642 | } |
1431 | 1643 | ||
1432 | if (is_level && vgic_irq_is_active(vcpu, irq_num)) { | 1644 | if (!vgic_can_sample_irq(vcpu, irq_num)) { |
1433 | /* | 1645 | /* |
1434 | * Level interrupt in progress, will be picked up | 1646 | * Level interrupt in progress, will be picked up |
1435 | * when EOId. | 1647 | * when EOId. |
@@ -1440,7 +1652,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, | |||
1440 | 1652 | ||
1441 | if (level) { | 1653 | if (level) { |
1442 | vgic_cpu_irq_set(vcpu, irq_num); | 1654 | vgic_cpu_irq_set(vcpu, irq_num); |
1443 | set_bit(cpuid, &dist->irq_pending_on_cpu); | 1655 | set_bit(cpuid, dist->irq_pending_on_cpu); |
1444 | } | 1656 | } |
1445 | 1657 | ||
1446 | out: | 1658 | out: |
@@ -1466,7 +1678,8 @@ out: | |||
1466 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, | 1678 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, |
1467 | bool level) | 1679 | bool level) |
1468 | { | 1680 | { |
1469 | if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) | 1681 | if (likely(vgic_initialized(kvm)) && |
1682 | vgic_update_irq_pending(kvm, cpuid, irq_num, level)) | ||
1470 | vgic_kick_vcpus(kvm); | 1683 | vgic_kick_vcpus(kvm); |
1471 | 1684 | ||
1472 | return 0; | 1685 | return 0; |
@@ -1483,6 +1696,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) | |||
1483 | return IRQ_HANDLED; | 1696 | return IRQ_HANDLED; |
1484 | } | 1697 | } |
1485 | 1698 | ||
1699 | void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) | ||
1700 | { | ||
1701 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1702 | |||
1703 | kfree(vgic_cpu->pending_shared); | ||
1704 | kfree(vgic_cpu->vgic_irq_lr_map); | ||
1705 | vgic_cpu->pending_shared = NULL; | ||
1706 | vgic_cpu->vgic_irq_lr_map = NULL; | ||
1707 | } | ||
1708 | |||
1709 | static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | ||
1710 | { | ||
1711 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1712 | |||
1713 | int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; | ||
1714 | vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); | ||
1715 | vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); | ||
1716 | |||
1717 | if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { | ||
1718 | kvm_vgic_vcpu_destroy(vcpu); | ||
1719 | return -ENOMEM; | ||
1720 | } | ||
1721 | |||
1722 | return 0; | ||
1723 | } | ||
1724 | |||
1486 | /** | 1725 | /** |
1487 | * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state | 1726 | * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state |
1488 | * @vcpu: pointer to the vcpu struct | 1727 | * @vcpu: pointer to the vcpu struct |
@@ -1490,16 +1729,13 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) | |||
1490 | * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to | 1729 | * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to |
1491 | * this vcpu and enable the VGIC for this VCPU | 1730 | * this vcpu and enable the VGIC for this VCPU |
1492 | */ | 1731 | */ |
1493 | int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) | 1732 | static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) |
1494 | { | 1733 | { |
1495 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 1734 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; |
1496 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1735 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1497 | int i; | 1736 | int i; |
1498 | 1737 | ||
1499 | if (vcpu->vcpu_id >= VGIC_MAX_CPUS) | 1738 | for (i = 0; i < dist->nr_irqs; i++) { |
1500 | return -EBUSY; | ||
1501 | |||
1502 | for (i = 0; i < VGIC_NR_IRQS; i++) { | ||
1503 | if (i < VGIC_NR_PPIS) | 1739 | if (i < VGIC_NR_PPIS) |
1504 | vgic_bitmap_set_irq_val(&dist->irq_enabled, | 1740 | vgic_bitmap_set_irq_val(&dist->irq_enabled, |
1505 | vcpu->vcpu_id, i, 1); | 1741 | vcpu->vcpu_id, i, 1); |
@@ -1518,84 +1754,112 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) | |||
1518 | vgic_cpu->nr_lr = vgic->nr_lr; | 1754 | vgic_cpu->nr_lr = vgic->nr_lr; |
1519 | 1755 | ||
1520 | vgic_enable(vcpu); | 1756 | vgic_enable(vcpu); |
1521 | |||
1522 | return 0; | ||
1523 | } | 1757 | } |
1524 | 1758 | ||
1525 | static void vgic_init_maintenance_interrupt(void *info) | 1759 | void kvm_vgic_destroy(struct kvm *kvm) |
1526 | { | 1760 | { |
1527 | enable_percpu_irq(vgic->maint_irq, 0); | 1761 | struct vgic_dist *dist = &kvm->arch.vgic; |
1762 | struct kvm_vcpu *vcpu; | ||
1763 | int i; | ||
1764 | |||
1765 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1766 | kvm_vgic_vcpu_destroy(vcpu); | ||
1767 | |||
1768 | vgic_free_bitmap(&dist->irq_enabled); | ||
1769 | vgic_free_bitmap(&dist->irq_level); | ||
1770 | vgic_free_bitmap(&dist->irq_pending); | ||
1771 | vgic_free_bitmap(&dist->irq_soft_pend); | ||
1772 | vgic_free_bitmap(&dist->irq_queued); | ||
1773 | vgic_free_bitmap(&dist->irq_cfg); | ||
1774 | vgic_free_bytemap(&dist->irq_priority); | ||
1775 | if (dist->irq_spi_target) { | ||
1776 | for (i = 0; i < dist->nr_cpus; i++) | ||
1777 | vgic_free_bitmap(&dist->irq_spi_target[i]); | ||
1778 | } | ||
1779 | kfree(dist->irq_sgi_sources); | ||
1780 | kfree(dist->irq_spi_cpu); | ||
1781 | kfree(dist->irq_spi_target); | ||
1782 | kfree(dist->irq_pending_on_cpu); | ||
1783 | dist->irq_sgi_sources = NULL; | ||
1784 | dist->irq_spi_cpu = NULL; | ||
1785 | dist->irq_spi_target = NULL; | ||
1786 | dist->irq_pending_on_cpu = NULL; | ||
1528 | } | 1787 | } |
1529 | 1788 | ||
1530 | static int vgic_cpu_notify(struct notifier_block *self, | 1789 | /* |
1531 | unsigned long action, void *cpu) | 1790 | * Allocate and initialize the various data structures. Must be called |
1791 | * with kvm->lock held! | ||
1792 | */ | ||
1793 | static int vgic_init_maps(struct kvm *kvm) | ||
1532 | { | 1794 | { |
1533 | switch (action) { | 1795 | struct vgic_dist *dist = &kvm->arch.vgic; |
1534 | case CPU_STARTING: | 1796 | struct kvm_vcpu *vcpu; |
1535 | case CPU_STARTING_FROZEN: | 1797 | int nr_cpus, nr_irqs; |
1536 | vgic_init_maintenance_interrupt(NULL); | 1798 | int ret, i; |
1537 | break; | ||
1538 | case CPU_DYING: | ||
1539 | case CPU_DYING_FROZEN: | ||
1540 | disable_percpu_irq(vgic->maint_irq); | ||
1541 | break; | ||
1542 | } | ||
1543 | 1799 | ||
1544 | return NOTIFY_OK; | 1800 | if (dist->nr_cpus) /* Already allocated */ |
1545 | } | 1801 | return 0; |
1546 | 1802 | ||
1547 | static struct notifier_block vgic_cpu_nb = { | 1803 | nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); |
1548 | .notifier_call = vgic_cpu_notify, | 1804 | if (!nr_cpus) /* No vcpus? Can't be good... */ |
1549 | }; | 1805 | return -EINVAL; |
1550 | 1806 | ||
1551 | static const struct of_device_id vgic_ids[] = { | 1807 | /* |
1552 | { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, | 1808 | * If nobody configured the number of interrupts, use the |
1553 | { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, | 1809 | * legacy one. |
1554 | {}, | 1810 | */ |
1555 | }; | 1811 | if (!dist->nr_irqs) |
1812 | dist->nr_irqs = VGIC_NR_IRQS_LEGACY; | ||
1556 | 1813 | ||
1557 | int kvm_vgic_hyp_init(void) | 1814 | nr_irqs = dist->nr_irqs; |
1558 | { | ||
1559 | const struct of_device_id *matched_id; | ||
1560 | int (*vgic_probe)(struct device_node *,const struct vgic_ops **, | ||
1561 | const struct vgic_params **); | ||
1562 | struct device_node *vgic_node; | ||
1563 | int ret; | ||
1564 | 1815 | ||
1565 | vgic_node = of_find_matching_node_and_match(NULL, | 1816 | ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); |
1566 | vgic_ids, &matched_id); | 1817 | ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); |
1567 | if (!vgic_node) { | 1818 | ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); |
1568 | kvm_err("error: no compatible GIC node found\n"); | 1819 | ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); |
1569 | return -ENODEV; | 1820 | ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); |
1570 | } | 1821 | ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); |
1822 | ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); | ||
1571 | 1823 | ||
1572 | vgic_probe = matched_id->data; | ||
1573 | ret = vgic_probe(vgic_node, &vgic_ops, &vgic); | ||
1574 | if (ret) | 1824 | if (ret) |
1575 | return ret; | 1825 | goto out; |
1576 | 1826 | ||
1577 | ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, | 1827 | dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL); |
1578 | "vgic", kvm_get_running_vcpus()); | 1828 | dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL); |
1579 | if (ret) { | 1829 | dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus, |
1580 | kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); | 1830 | GFP_KERNEL); |
1581 | return ret; | 1831 | dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), |
1832 | GFP_KERNEL); | ||
1833 | if (!dist->irq_sgi_sources || | ||
1834 | !dist->irq_spi_cpu || | ||
1835 | !dist->irq_spi_target || | ||
1836 | !dist->irq_pending_on_cpu) { | ||
1837 | ret = -ENOMEM; | ||
1838 | goto out; | ||
1582 | } | 1839 | } |
1583 | 1840 | ||
1584 | ret = __register_cpu_notifier(&vgic_cpu_nb); | 1841 | for (i = 0; i < nr_cpus; i++) |
1585 | if (ret) { | 1842 | ret |= vgic_init_bitmap(&dist->irq_spi_target[i], |
1586 | kvm_err("Cannot register vgic CPU notifier\n"); | 1843 | nr_cpus, nr_irqs); |
1587 | goto out_free_irq; | ||
1588 | } | ||
1589 | 1844 | ||
1590 | /* Callback into for arch code for setup */ | 1845 | if (ret) |
1591 | vgic_arch_setup(vgic); | 1846 | goto out; |
1592 | 1847 | ||
1593 | on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); | 1848 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1849 | ret = vgic_vcpu_init_maps(vcpu, nr_irqs); | ||
1850 | if (ret) { | ||
1851 | kvm_err("VGIC: Failed to allocate vcpu memory\n"); | ||
1852 | break; | ||
1853 | } | ||
1854 | } | ||
1594 | 1855 | ||
1595 | return 0; | 1856 | for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) |
1857 | vgic_set_target_reg(kvm, 0, i); | ||
1858 | |||
1859 | out: | ||
1860 | if (ret) | ||
1861 | kvm_vgic_destroy(kvm); | ||
1596 | 1862 | ||
1597 | out_free_irq: | ||
1598 | free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); | ||
1599 | return ret; | 1863 | return ret; |
1600 | } | 1864 | } |
1601 | 1865 | ||
@@ -1610,6 +1874,7 @@ out_free_irq: | |||
1610 | */ | 1874 | */ |
1611 | int kvm_vgic_init(struct kvm *kvm) | 1875 | int kvm_vgic_init(struct kvm *kvm) |
1612 | { | 1876 | { |
1877 | struct kvm_vcpu *vcpu; | ||
1613 | int ret = 0, i; | 1878 | int ret = 0, i; |
1614 | 1879 | ||
1615 | if (!irqchip_in_kernel(kvm)) | 1880 | if (!irqchip_in_kernel(kvm)) |
@@ -1627,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm) | |||
1627 | goto out; | 1892 | goto out; |
1628 | } | 1893 | } |
1629 | 1894 | ||
1895 | ret = vgic_init_maps(kvm); | ||
1896 | if (ret) { | ||
1897 | kvm_err("Unable to allocate maps\n"); | ||
1898 | goto out; | ||
1899 | } | ||
1900 | |||
1630 | ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, | 1901 | ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, |
1631 | vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); | 1902 | vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); |
1632 | if (ret) { | 1903 | if (ret) { |
@@ -1634,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm) | |||
1634 | goto out; | 1905 | goto out; |
1635 | } | 1906 | } |
1636 | 1907 | ||
1637 | for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) | 1908 | kvm_for_each_vcpu(i, vcpu, kvm) |
1638 | vgic_set_target_reg(kvm, 0, i); | 1909 | kvm_vgic_vcpu_init(vcpu); |
1639 | 1910 | ||
1640 | kvm->arch.vgic.ready = true; | 1911 | kvm->arch.vgic.ready = true; |
1641 | out: | 1912 | out: |
1913 | if (ret) | ||
1914 | kvm_vgic_destroy(kvm); | ||
1642 | mutex_unlock(&kvm->lock); | 1915 | mutex_unlock(&kvm->lock); |
1643 | return ret; | 1916 | return ret; |
1644 | } | 1917 | } |
@@ -1690,7 +1963,7 @@ out: | |||
1690 | return ret; | 1963 | return ret; |
1691 | } | 1964 | } |
1692 | 1965 | ||
1693 | static bool vgic_ioaddr_overlap(struct kvm *kvm) | 1966 | static int vgic_ioaddr_overlap(struct kvm *kvm) |
1694 | { | 1967 | { |
1695 | phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; | 1968 | phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; |
1696 | phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; | 1969 | phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; |
@@ -1879,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev, | |||
1879 | 2152 | ||
1880 | mutex_lock(&dev->kvm->lock); | 2153 | mutex_lock(&dev->kvm->lock); |
1881 | 2154 | ||
2155 | ret = vgic_init_maps(dev->kvm); | ||
2156 | if (ret) | ||
2157 | goto out; | ||
2158 | |||
1882 | if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { | 2159 | if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { |
1883 | ret = -EINVAL; | 2160 | ret = -EINVAL; |
1884 | goto out; | 2161 | goto out; |
@@ -1976,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
1976 | 2253 | ||
1977 | return vgic_attr_regs_access(dev, attr, ®, true); | 2254 | return vgic_attr_regs_access(dev, attr, ®, true); |
1978 | } | 2255 | } |
2256 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { | ||
2257 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
2258 | u32 val; | ||
2259 | int ret = 0; | ||
2260 | |||
2261 | if (get_user(val, uaddr)) | ||
2262 | return -EFAULT; | ||
2263 | |||
2264 | /* | ||
2265 | * We require: | ||
2266 | * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs | ||
2267 | * - at most 1024 interrupts | ||
2268 | * - a multiple of 32 interrupts | ||
2269 | */ | ||
2270 | if (val < (VGIC_NR_PRIVATE_IRQS + 32) || | ||
2271 | val > VGIC_MAX_IRQS || | ||
2272 | (val & 31)) | ||
2273 | return -EINVAL; | ||
2274 | |||
2275 | mutex_lock(&dev->kvm->lock); | ||
2276 | |||
2277 | if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) | ||
2278 | ret = -EBUSY; | ||
2279 | else | ||
2280 | dev->kvm->arch.vgic.nr_irqs = val; | ||
2281 | |||
2282 | mutex_unlock(&dev->kvm->lock); | ||
2283 | |||
2284 | return ret; | ||
2285 | } | ||
1979 | 2286 | ||
1980 | } | 2287 | } |
1981 | 2288 | ||
@@ -2012,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2012 | r = put_user(reg, uaddr); | 2319 | r = put_user(reg, uaddr); |
2013 | break; | 2320 | break; |
2014 | } | 2321 | } |
2322 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { | ||
2323 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
2324 | r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); | ||
2325 | break; | ||
2326 | } | ||
2015 | 2327 | ||
2016 | } | 2328 | } |
2017 | 2329 | ||
@@ -2048,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2048 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | 2360 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: |
2049 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | 2361 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; |
2050 | return vgic_has_attr_regs(vgic_cpu_ranges, offset); | 2362 | return vgic_has_attr_regs(vgic_cpu_ranges, offset); |
2363 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: | ||
2364 | return 0; | ||
2051 | } | 2365 | } |
2052 | return -ENXIO; | 2366 | return -ENXIO; |
2053 | } | 2367 | } |
@@ -2062,7 +2376,7 @@ static int vgic_create(struct kvm_device *dev, u32 type) | |||
2062 | return kvm_vgic_create(dev->kvm); | 2376 | return kvm_vgic_create(dev->kvm); |
2063 | } | 2377 | } |
2064 | 2378 | ||
2065 | struct kvm_device_ops kvm_arm_vgic_v2_ops = { | 2379 | static struct kvm_device_ops kvm_arm_vgic_v2_ops = { |
2066 | .name = "kvm-arm-vgic", | 2380 | .name = "kvm-arm-vgic", |
2067 | .create = vgic_create, | 2381 | .create = vgic_create, |
2068 | .destroy = vgic_destroy, | 2382 | .destroy = vgic_destroy, |
@@ -2070,3 +2384,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = { | |||
2070 | .get_attr = vgic_get_attr, | 2384 | .get_attr = vgic_get_attr, |
2071 | .has_attr = vgic_has_attr, | 2385 | .has_attr = vgic_has_attr, |
2072 | }; | 2386 | }; |
2387 | |||
2388 | static void vgic_init_maintenance_interrupt(void *info) | ||
2389 | { | ||
2390 | enable_percpu_irq(vgic->maint_irq, 0); | ||
2391 | } | ||
2392 | |||
2393 | static int vgic_cpu_notify(struct notifier_block *self, | ||
2394 | unsigned long action, void *cpu) | ||
2395 | { | ||
2396 | switch (action) { | ||
2397 | case CPU_STARTING: | ||
2398 | case CPU_STARTING_FROZEN: | ||
2399 | vgic_init_maintenance_interrupt(NULL); | ||
2400 | break; | ||
2401 | case CPU_DYING: | ||
2402 | case CPU_DYING_FROZEN: | ||
2403 | disable_percpu_irq(vgic->maint_irq); | ||
2404 | break; | ||
2405 | } | ||
2406 | |||
2407 | return NOTIFY_OK; | ||
2408 | } | ||
2409 | |||
2410 | static struct notifier_block vgic_cpu_nb = { | ||
2411 | .notifier_call = vgic_cpu_notify, | ||
2412 | }; | ||
2413 | |||
2414 | static const struct of_device_id vgic_ids[] = { | ||
2415 | { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, | ||
2416 | { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, | ||
2417 | {}, | ||
2418 | }; | ||
2419 | |||
2420 | int kvm_vgic_hyp_init(void) | ||
2421 | { | ||
2422 | const struct of_device_id *matched_id; | ||
2423 | const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, | ||
2424 | const struct vgic_params **); | ||
2425 | struct device_node *vgic_node; | ||
2426 | int ret; | ||
2427 | |||
2428 | vgic_node = of_find_matching_node_and_match(NULL, | ||
2429 | vgic_ids, &matched_id); | ||
2430 | if (!vgic_node) { | ||
2431 | kvm_err("error: no compatible GIC node found\n"); | ||
2432 | return -ENODEV; | ||
2433 | } | ||
2434 | |||
2435 | vgic_probe = matched_id->data; | ||
2436 | ret = vgic_probe(vgic_node, &vgic_ops, &vgic); | ||
2437 | if (ret) | ||
2438 | return ret; | ||
2439 | |||
2440 | ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, | ||
2441 | "vgic", kvm_get_running_vcpus()); | ||
2442 | if (ret) { | ||
2443 | kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); | ||
2444 | return ret; | ||
2445 | } | ||
2446 | |||
2447 | ret = __register_cpu_notifier(&vgic_cpu_nb); | ||
2448 | if (ret) { | ||
2449 | kvm_err("Cannot register vgic CPU notifier\n"); | ||
2450 | goto out_free_irq; | ||
2451 | } | ||
2452 | |||
2453 | /* Callback into for arch code for setup */ | ||
2454 | vgic_arch_setup(vgic); | ||
2455 | |||
2456 | on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); | ||
2457 | |||
2458 | return kvm_register_device_ops(&kvm_arm_vgic_v2_ops, | ||
2459 | KVM_DEV_TYPE_ARM_VGIC_V2); | ||
2460 | |||
2461 | out_free_irq: | ||
2462 | free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); | ||
2463 | return ret; | ||
2464 | } | ||
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index d6a3d0993d88..5ff7f7f2689a 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work) | |||
80 | 80 | ||
81 | might_sleep(); | 81 | might_sleep(); |
82 | 82 | ||
83 | down_read(&mm->mmap_sem); | 83 | kvm_get_user_page_io(NULL, mm, addr, 1, NULL); |
84 | get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL); | ||
85 | up_read(&mm->mmap_sem); | ||
86 | kvm_async_page_present_sync(vcpu, apf); | 84 | kvm_async_page_present_sync(vcpu, apf); |
87 | 85 | ||
88 | spin_lock(&vcpu->async_pf.lock); | 86 | spin_lock(&vcpu->async_pf.lock); |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 3c5981c87c3f..b0fb390943c6 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -36,7 +36,9 @@ | |||
36 | #include <linux/seqlock.h> | 36 | #include <linux/seqlock.h> |
37 | #include <trace/events/kvm.h> | 37 | #include <trace/events/kvm.h> |
38 | 38 | ||
39 | #include "irq.h" | 39 | #ifdef __KVM_HAVE_IOAPIC |
40 | #include "ioapic.h" | ||
41 | #endif | ||
40 | #include "iodev.h" | 42 | #include "iodev.h" |
41 | 43 | ||
42 | #ifdef CONFIG_HAVE_KVM_IRQFD | 44 | #ifdef CONFIG_HAVE_KVM_IRQFD |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index e8ce34c9db32..0ba4057d271b 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -405,6 +405,26 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) | |||
405 | spin_unlock(&ioapic->lock); | 405 | spin_unlock(&ioapic->lock); |
406 | } | 406 | } |
407 | 407 | ||
408 | static void kvm_ioapic_eoi_inject_work(struct work_struct *work) | ||
409 | { | ||
410 | int i; | ||
411 | struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, | ||
412 | eoi_inject.work); | ||
413 | spin_lock(&ioapic->lock); | ||
414 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { | ||
415 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; | ||
416 | |||
417 | if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) | ||
418 | continue; | ||
419 | |||
420 | if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) | ||
421 | ioapic_service(ioapic, i, false); | ||
422 | } | ||
423 | spin_unlock(&ioapic->lock); | ||
424 | } | ||
425 | |||
426 | #define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 | ||
427 | |||
408 | static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, | 428 | static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, |
409 | struct kvm_ioapic *ioapic, int vector, int trigger_mode) | 429 | struct kvm_ioapic *ioapic, int vector, int trigger_mode) |
410 | { | 430 | { |
@@ -435,8 +455,26 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, | |||
435 | 455 | ||
436 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | 456 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); |
437 | ent->fields.remote_irr = 0; | 457 | ent->fields.remote_irr = 0; |
438 | if (ioapic->irr & (1 << i)) | 458 | if (!ent->fields.mask && (ioapic->irr & (1 << i))) { |
439 | ioapic_service(ioapic, i, false); | 459 | ++ioapic->irq_eoi[i]; |
460 | if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { | ||
461 | /* | ||
462 | * Real hardware does not deliver the interrupt | ||
463 | * immediately during eoi broadcast, and this | ||
464 | * lets a buggy guest make slow progress | ||
465 | * even if it does not correctly handle a | ||
466 | * level-triggered interrupt. Emulate this | ||
467 | * behavior if we detect an interrupt storm. | ||
468 | */ | ||
469 | schedule_delayed_work(&ioapic->eoi_inject, HZ / 100); | ||
470 | ioapic->irq_eoi[i] = 0; | ||
471 | trace_kvm_ioapic_delayed_eoi_inj(ent->bits); | ||
472 | } else { | ||
473 | ioapic_service(ioapic, i, false); | ||
474 | } | ||
475 | } else { | ||
476 | ioapic->irq_eoi[i] = 0; | ||
477 | } | ||
440 | } | 478 | } |
441 | } | 479 | } |
442 | 480 | ||
@@ -565,12 +603,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
565 | { | 603 | { |
566 | int i; | 604 | int i; |
567 | 605 | ||
606 | cancel_delayed_work_sync(&ioapic->eoi_inject); | ||
568 | for (i = 0; i < IOAPIC_NUM_PINS; i++) | 607 | for (i = 0; i < IOAPIC_NUM_PINS; i++) |
569 | ioapic->redirtbl[i].fields.mask = 1; | 608 | ioapic->redirtbl[i].fields.mask = 1; |
570 | ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; | 609 | ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; |
571 | ioapic->ioregsel = 0; | 610 | ioapic->ioregsel = 0; |
572 | ioapic->irr = 0; | 611 | ioapic->irr = 0; |
573 | ioapic->id = 0; | 612 | ioapic->id = 0; |
613 | memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); | ||
574 | rtc_irq_eoi_tracking_reset(ioapic); | 614 | rtc_irq_eoi_tracking_reset(ioapic); |
575 | update_handled_vectors(ioapic); | 615 | update_handled_vectors(ioapic); |
576 | } | 616 | } |
@@ -589,6 +629,7 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
589 | if (!ioapic) | 629 | if (!ioapic) |
590 | return -ENOMEM; | 630 | return -ENOMEM; |
591 | spin_lock_init(&ioapic->lock); | 631 | spin_lock_init(&ioapic->lock); |
632 | INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); | ||
592 | kvm->arch.vioapic = ioapic; | 633 | kvm->arch.vioapic = ioapic; |
593 | kvm_ioapic_reset(ioapic); | 634 | kvm_ioapic_reset(ioapic); |
594 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | 635 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
@@ -609,6 +650,7 @@ void kvm_ioapic_destroy(struct kvm *kvm) | |||
609 | { | 650 | { |
610 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 651 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
611 | 652 | ||
653 | cancel_delayed_work_sync(&ioapic->eoi_inject); | ||
612 | if (ioapic) { | 654 | if (ioapic) { |
613 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); | 655 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); |
614 | kvm->arch.vioapic = NULL; | 656 | kvm->arch.vioapic = NULL; |
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 90d43e95dcf8..e23b70634f1e 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
@@ -59,6 +59,8 @@ struct kvm_ioapic { | |||
59 | spinlock_t lock; | 59 | spinlock_t lock; |
60 | DECLARE_BITMAP(handled_vectors, 256); | 60 | DECLARE_BITMAP(handled_vectors, 256); |
61 | struct rtc_status rtc_status; | 61 | struct rtc_status rtc_status; |
62 | struct delayed_work eoi_inject; | ||
63 | u32 irq_eoi[IOAPIC_NUM_PINS]; | ||
62 | }; | 64 | }; |
63 | 65 | ||
64 | #ifdef DEBUG | 66 | #ifdef DEBUG |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 95519bc959ed..384eaa7b02fa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -52,11 +52,13 @@ | |||
52 | 52 | ||
53 | #include <asm/processor.h> | 53 | #include <asm/processor.h> |
54 | #include <asm/io.h> | 54 | #include <asm/io.h> |
55 | #include <asm/ioctl.h> | ||
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/pgtable.h> | 57 | #include <asm/pgtable.h> |
57 | 58 | ||
58 | #include "coalesced_mmio.h" | 59 | #include "coalesced_mmio.h" |
59 | #include "async_pf.h" | 60 | #include "async_pf.h" |
61 | #include "vfio.h" | ||
60 | 62 | ||
61 | #define CREATE_TRACE_POINTS | 63 | #define CREATE_TRACE_POINTS |
62 | #include <trace/events/kvm.h> | 64 | #include <trace/events/kvm.h> |
@@ -95,8 +97,6 @@ static int hardware_enable_all(void); | |||
95 | static void hardware_disable_all(void); | 97 | static void hardware_disable_all(void); |
96 | 98 | ||
97 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); | 99 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); |
98 | static void update_memslots(struct kvm_memslots *slots, | ||
99 | struct kvm_memory_slot *new, u64 last_generation); | ||
100 | 100 | ||
101 | static void kvm_release_pfn_dirty(pfn_t pfn); | 101 | static void kvm_release_pfn_dirty(pfn_t pfn); |
102 | static void mark_page_dirty_in_slot(struct kvm *kvm, | 102 | static void mark_page_dirty_in_slot(struct kvm *kvm, |
@@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu) | |||
129 | struct pid *oldpid = vcpu->pid; | 129 | struct pid *oldpid = vcpu->pid; |
130 | struct pid *newpid = get_task_pid(current, PIDTYPE_PID); | 130 | struct pid *newpid = get_task_pid(current, PIDTYPE_PID); |
131 | rcu_assign_pointer(vcpu->pid, newpid); | 131 | rcu_assign_pointer(vcpu->pid, newpid); |
132 | synchronize_rcu(); | 132 | if (oldpid) |
133 | synchronize_rcu(); | ||
133 | put_pid(oldpid); | 134 | put_pid(oldpid); |
134 | } | 135 | } |
135 | cpu = get_cpu(); | 136 | cpu = get_cpu(); |
@@ -152,7 +153,7 @@ static void ack_flush(void *_completed) | |||
152 | { | 153 | { |
153 | } | 154 | } |
154 | 155 | ||
155 | static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | 156 | bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) |
156 | { | 157 | { |
157 | int i, cpu, me; | 158 | int i, cpu, me; |
158 | cpumask_var_t cpus; | 159 | cpumask_var_t cpus; |
@@ -189,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
189 | long dirty_count = kvm->tlbs_dirty; | 190 | long dirty_count = kvm->tlbs_dirty; |
190 | 191 | ||
191 | smp_mb(); | 192 | smp_mb(); |
192 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) | 193 | if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) |
193 | ++kvm->stat.remote_tlb_flush; | 194 | ++kvm->stat.remote_tlb_flush; |
194 | cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); | 195 | cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); |
195 | } | 196 | } |
@@ -197,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); | |||
197 | 198 | ||
198 | void kvm_reload_remote_mmus(struct kvm *kvm) | 199 | void kvm_reload_remote_mmus(struct kvm *kvm) |
199 | { | 200 | { |
200 | make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); | 201 | kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); |
201 | } | 202 | } |
202 | 203 | ||
203 | void kvm_make_mclock_inprogress_request(struct kvm *kvm) | 204 | void kvm_make_mclock_inprogress_request(struct kvm *kvm) |
204 | { | 205 | { |
205 | make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); | 206 | kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); |
206 | } | 207 | } |
207 | 208 | ||
208 | void kvm_make_scan_ioapic_request(struct kvm *kvm) | 209 | void kvm_make_scan_ioapic_request(struct kvm *kvm) |
209 | { | 210 | { |
210 | make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); | 211 | kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); |
211 | } | 212 | } |
212 | 213 | ||
213 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 214 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
@@ -295,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
295 | kvm_flush_remote_tlbs(kvm); | 296 | kvm_flush_remote_tlbs(kvm); |
296 | 297 | ||
297 | spin_unlock(&kvm->mmu_lock); | 298 | spin_unlock(&kvm->mmu_lock); |
299 | |||
300 | kvm_arch_mmu_notifier_invalidate_page(kvm, address); | ||
301 | |||
298 | srcu_read_unlock(&kvm->srcu, idx); | 302 | srcu_read_unlock(&kvm->srcu, idx); |
299 | } | 303 | } |
300 | 304 | ||
@@ -368,7 +372,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | |||
368 | 372 | ||
369 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | 373 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, |
370 | struct mm_struct *mm, | 374 | struct mm_struct *mm, |
371 | unsigned long address) | 375 | unsigned long start, |
376 | unsigned long end) | ||
372 | { | 377 | { |
373 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 378 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
374 | int young, idx; | 379 | int young, idx; |
@@ -376,7 +381,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
376 | idx = srcu_read_lock(&kvm->srcu); | 381 | idx = srcu_read_lock(&kvm->srcu); |
377 | spin_lock(&kvm->mmu_lock); | 382 | spin_lock(&kvm->mmu_lock); |
378 | 383 | ||
379 | young = kvm_age_hva(kvm, address); | 384 | young = kvm_age_hva(kvm, start, end); |
380 | if (young) | 385 | if (young) |
381 | kvm_flush_remote_tlbs(kvm); | 386 | kvm_flush_remote_tlbs(kvm); |
382 | 387 | ||
@@ -476,6 +481,13 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
476 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 481 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
477 | if (!kvm->memslots) | 482 | if (!kvm->memslots) |
478 | goto out_err_no_srcu; | 483 | goto out_err_no_srcu; |
484 | |||
485 | /* | ||
486 | * Init kvm generation close to the maximum to easily test the | ||
487 | * code of handling generation number wrap-around. | ||
488 | */ | ||
489 | kvm->memslots->generation = -150; | ||
490 | |||
479 | kvm_init_memslots_id(kvm); | 491 | kvm_init_memslots_id(kvm); |
480 | if (init_srcu_struct(&kvm->srcu)) | 492 | if (init_srcu_struct(&kvm->srcu)) |
481 | goto out_err_no_srcu; | 493 | goto out_err_no_srcu; |
@@ -687,8 +699,7 @@ static void sort_memslots(struct kvm_memslots *slots) | |||
687 | } | 699 | } |
688 | 700 | ||
689 | static void update_memslots(struct kvm_memslots *slots, | 701 | static void update_memslots(struct kvm_memslots *slots, |
690 | struct kvm_memory_slot *new, | 702 | struct kvm_memory_slot *new) |
691 | u64 last_generation) | ||
692 | { | 703 | { |
693 | if (new) { | 704 | if (new) { |
694 | int id = new->id; | 705 | int id = new->id; |
@@ -699,15 +710,13 @@ static void update_memslots(struct kvm_memslots *slots, | |||
699 | if (new->npages != npages) | 710 | if (new->npages != npages) |
700 | sort_memslots(slots); | 711 | sort_memslots(slots); |
701 | } | 712 | } |
702 | |||
703 | slots->generation = last_generation + 1; | ||
704 | } | 713 | } |
705 | 714 | ||
706 | static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) | 715 | static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) |
707 | { | 716 | { |
708 | u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; | 717 | u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; |
709 | 718 | ||
710 | #ifdef KVM_CAP_READONLY_MEM | 719 | #ifdef __KVM_HAVE_READONLY_MEM |
711 | valid_flags |= KVM_MEM_READONLY; | 720 | valid_flags |= KVM_MEM_READONLY; |
712 | #endif | 721 | #endif |
713 | 722 | ||
@@ -722,10 +731,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
722 | { | 731 | { |
723 | struct kvm_memslots *old_memslots = kvm->memslots; | 732 | struct kvm_memslots *old_memslots = kvm->memslots; |
724 | 733 | ||
725 | update_memslots(slots, new, kvm->memslots->generation); | 734 | /* |
735 | * Set the low bit in the generation, which disables SPTE caching | ||
736 | * until the end of synchronize_srcu_expedited. | ||
737 | */ | ||
738 | WARN_ON(old_memslots->generation & 1); | ||
739 | slots->generation = old_memslots->generation + 1; | ||
740 | |||
741 | update_memslots(slots, new); | ||
726 | rcu_assign_pointer(kvm->memslots, slots); | 742 | rcu_assign_pointer(kvm->memslots, slots); |
727 | synchronize_srcu_expedited(&kvm->srcu); | 743 | synchronize_srcu_expedited(&kvm->srcu); |
728 | 744 | ||
745 | /* | ||
746 | * Increment the new memslot generation a second time. This prevents | ||
747 | * vm exits that race with memslot updates from caching a memslot | ||
748 | * generation that will (potentially) be valid forever. | ||
749 | */ | ||
750 | slots->generation++; | ||
751 | |||
729 | kvm_arch_memslots_updated(kvm); | 752 | kvm_arch_memslots_updated(kvm); |
730 | 753 | ||
731 | return old_memslots; | 754 | return old_memslots; |
@@ -776,7 +799,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
776 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 799 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
777 | npages = mem->memory_size >> PAGE_SHIFT; | 800 | npages = mem->memory_size >> PAGE_SHIFT; |
778 | 801 | ||
779 | r = -EINVAL; | ||
780 | if (npages > KVM_MEM_MAX_NR_PAGES) | 802 | if (npages > KVM_MEM_MAX_NR_PAGES) |
781 | goto out; | 803 | goto out; |
782 | 804 | ||
@@ -790,7 +812,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
790 | new.npages = npages; | 812 | new.npages = npages; |
791 | new.flags = mem->flags; | 813 | new.flags = mem->flags; |
792 | 814 | ||
793 | r = -EINVAL; | ||
794 | if (npages) { | 815 | if (npages) { |
795 | if (!old.npages) | 816 | if (!old.npages) |
796 | change = KVM_MR_CREATE; | 817 | change = KVM_MR_CREATE; |
@@ -846,7 +867,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
846 | } | 867 | } |
847 | 868 | ||
848 | if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { | 869 | if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { |
849 | r = -ENOMEM; | ||
850 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), | 870 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), |
851 | GFP_KERNEL); | 871 | GFP_KERNEL); |
852 | if (!slots) | 872 | if (!slots) |
@@ -1075,9 +1095,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); | |||
1075 | * If writable is set to false, the hva returned by this function is only | 1095 | * If writable is set to false, the hva returned by this function is only |
1076 | * allowed to be read. | 1096 | * allowed to be read. |
1077 | */ | 1097 | */ |
1078 | unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) | 1098 | unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, |
1099 | gfn_t gfn, bool *writable) | ||
1079 | { | 1100 | { |
1080 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
1081 | unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); | 1101 | unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); |
1082 | 1102 | ||
1083 | if (!kvm_is_error_hva(hva) && writable) | 1103 | if (!kvm_is_error_hva(hva) && writable) |
@@ -1086,6 +1106,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) | |||
1086 | return hva; | 1106 | return hva; |
1087 | } | 1107 | } |
1088 | 1108 | ||
1109 | unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) | ||
1110 | { | ||
1111 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
1112 | |||
1113 | return gfn_to_hva_memslot_prot(slot, gfn, writable); | ||
1114 | } | ||
1115 | |||
1089 | static int kvm_read_hva(void *data, void __user *hva, int len) | 1116 | static int kvm_read_hva(void *data, void __user *hva, int len) |
1090 | { | 1117 | { |
1091 | return __copy_from_user(data, hva, len); | 1118 | return __copy_from_user(data, hva, len); |
@@ -1107,6 +1134,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | |||
1107 | return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); | 1134 | return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); |
1108 | } | 1135 | } |
1109 | 1136 | ||
1137 | int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, | ||
1138 | unsigned long addr, bool write_fault, | ||
1139 | struct page **pagep) | ||
1140 | { | ||
1141 | int npages; | ||
1142 | int locked = 1; | ||
1143 | int flags = FOLL_TOUCH | FOLL_HWPOISON | | ||
1144 | (pagep ? FOLL_GET : 0) | | ||
1145 | (write_fault ? FOLL_WRITE : 0); | ||
1146 | |||
1147 | /* | ||
1148 | * If retrying the fault, we get here *not* having allowed the filemap | ||
1149 | * to wait on the page lock. We should now allow waiting on the IO with | ||
1150 | * the mmap semaphore released. | ||
1151 | */ | ||
1152 | down_read(&mm->mmap_sem); | ||
1153 | npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL, | ||
1154 | &locked); | ||
1155 | if (!locked) { | ||
1156 | VM_BUG_ON(npages); | ||
1157 | |||
1158 | if (!pagep) | ||
1159 | return 0; | ||
1160 | |||
1161 | /* | ||
1162 | * The previous call has now waited on the IO. Now we can | ||
1163 | * retry and complete. Pass TRIED to ensure we do not re | ||
1164 | * schedule async IO (see e.g. filemap_fault). | ||
1165 | */ | ||
1166 | down_read(&mm->mmap_sem); | ||
1167 | npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED, | ||
1168 | pagep, NULL, NULL); | ||
1169 | } | ||
1170 | up_read(&mm->mmap_sem); | ||
1171 | return npages; | ||
1172 | } | ||
1173 | |||
1110 | static inline int check_user_page_hwpoison(unsigned long addr) | 1174 | static inline int check_user_page_hwpoison(unsigned long addr) |
1111 | { | 1175 | { |
1112 | int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; | 1176 | int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; |
@@ -1169,9 +1233,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, | |||
1169 | npages = get_user_page_nowait(current, current->mm, | 1233 | npages = get_user_page_nowait(current, current->mm, |
1170 | addr, write_fault, page); | 1234 | addr, write_fault, page); |
1171 | up_read(¤t->mm->mmap_sem); | 1235 | up_read(¤t->mm->mmap_sem); |
1172 | } else | 1236 | } else { |
1173 | npages = get_user_pages_fast(addr, 1, write_fault, | 1237 | /* |
1174 | page); | 1238 | * By now we have tried gup_fast, and possibly async_pf, and we |
1239 | * are certainly not atomic. Time to retry the gup, allowing | ||
1240 | * mmap semaphore to be relinquished in the case of IO. | ||
1241 | */ | ||
1242 | npages = kvm_get_user_page_io(current, current->mm, addr, | ||
1243 | write_fault, page); | ||
1244 | } | ||
1175 | if (npages != 1) | 1245 | if (npages != 1) |
1176 | return npages; | 1246 | return npages; |
1177 | 1247 | ||
@@ -1768,8 +1838,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | |||
1768 | bool eligible; | 1838 | bool eligible; |
1769 | 1839 | ||
1770 | eligible = !vcpu->spin_loop.in_spin_loop || | 1840 | eligible = !vcpu->spin_loop.in_spin_loop || |
1771 | (vcpu->spin_loop.in_spin_loop && | 1841 | vcpu->spin_loop.dy_eligible; |
1772 | vcpu->spin_loop.dy_eligible); | ||
1773 | 1842 | ||
1774 | if (vcpu->spin_loop.in_spin_loop) | 1843 | if (vcpu->spin_loop.in_spin_loop) |
1775 | kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); | 1844 | kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); |
@@ -1975,6 +2044,9 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
1975 | if (vcpu->kvm->mm != current->mm) | 2044 | if (vcpu->kvm->mm != current->mm) |
1976 | return -EIO; | 2045 | return -EIO; |
1977 | 2046 | ||
2047 | if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) | ||
2048 | return -EINVAL; | ||
2049 | |||
1978 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) | 2050 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) |
1979 | /* | 2051 | /* |
1980 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, | 2052 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, |
@@ -2259,6 +2331,29 @@ struct kvm_device *kvm_device_from_filp(struct file *filp) | |||
2259 | return filp->private_data; | 2331 | return filp->private_data; |
2260 | } | 2332 | } |
2261 | 2333 | ||
2334 | static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { | ||
2335 | #ifdef CONFIG_KVM_MPIC | ||
2336 | [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, | ||
2337 | [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, | ||
2338 | #endif | ||
2339 | |||
2340 | #ifdef CONFIG_KVM_XICS | ||
2341 | [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, | ||
2342 | #endif | ||
2343 | }; | ||
2344 | |||
2345 | int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) | ||
2346 | { | ||
2347 | if (type >= ARRAY_SIZE(kvm_device_ops_table)) | ||
2348 | return -ENOSPC; | ||
2349 | |||
2350 | if (kvm_device_ops_table[type] != NULL) | ||
2351 | return -EEXIST; | ||
2352 | |||
2353 | kvm_device_ops_table[type] = ops; | ||
2354 | return 0; | ||
2355 | } | ||
2356 | |||
2262 | static int kvm_ioctl_create_device(struct kvm *kvm, | 2357 | static int kvm_ioctl_create_device(struct kvm *kvm, |
2263 | struct kvm_create_device *cd) | 2358 | struct kvm_create_device *cd) |
2264 | { | 2359 | { |
@@ -2267,36 +2362,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm, | |||
2267 | bool test = cd->flags & KVM_CREATE_DEVICE_TEST; | 2362 | bool test = cd->flags & KVM_CREATE_DEVICE_TEST; |
2268 | int ret; | 2363 | int ret; |
2269 | 2364 | ||
2270 | switch (cd->type) { | 2365 | if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) |
2271 | #ifdef CONFIG_KVM_MPIC | 2366 | return -ENODEV; |
2272 | case KVM_DEV_TYPE_FSL_MPIC_20: | 2367 | |
2273 | case KVM_DEV_TYPE_FSL_MPIC_42: | 2368 | ops = kvm_device_ops_table[cd->type]; |
2274 | ops = &kvm_mpic_ops; | 2369 | if (ops == NULL) |
2275 | break; | ||
2276 | #endif | ||
2277 | #ifdef CONFIG_KVM_XICS | ||
2278 | case KVM_DEV_TYPE_XICS: | ||
2279 | ops = &kvm_xics_ops; | ||
2280 | break; | ||
2281 | #endif | ||
2282 | #ifdef CONFIG_KVM_VFIO | ||
2283 | case KVM_DEV_TYPE_VFIO: | ||
2284 | ops = &kvm_vfio_ops; | ||
2285 | break; | ||
2286 | #endif | ||
2287 | #ifdef CONFIG_KVM_ARM_VGIC | ||
2288 | case KVM_DEV_TYPE_ARM_VGIC_V2: | ||
2289 | ops = &kvm_arm_vgic_v2_ops; | ||
2290 | break; | ||
2291 | #endif | ||
2292 | #ifdef CONFIG_S390 | ||
2293 | case KVM_DEV_TYPE_FLIC: | ||
2294 | ops = &kvm_flic_ops; | ||
2295 | break; | ||
2296 | #endif | ||
2297 | default: | ||
2298 | return -ENODEV; | 2370 | return -ENODEV; |
2299 | } | ||
2300 | 2371 | ||
2301 | if (test) | 2372 | if (test) |
2302 | return 0; | 2373 | return 0; |
@@ -2611,7 +2682,6 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2611 | 2682 | ||
2612 | switch (ioctl) { | 2683 | switch (ioctl) { |
2613 | case KVM_GET_API_VERSION: | 2684 | case KVM_GET_API_VERSION: |
2614 | r = -EINVAL; | ||
2615 | if (arg) | 2685 | if (arg) |
2616 | goto out; | 2686 | goto out; |
2617 | r = KVM_API_VERSION; | 2687 | r = KVM_API_VERSION; |
@@ -2623,7 +2693,6 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2623 | r = kvm_vm_ioctl_check_extension_generic(NULL, arg); | 2693 | r = kvm_vm_ioctl_check_extension_generic(NULL, arg); |
2624 | break; | 2694 | break; |
2625 | case KVM_GET_VCPU_MMAP_SIZE: | 2695 | case KVM_GET_VCPU_MMAP_SIZE: |
2626 | r = -EINVAL; | ||
2627 | if (arg) | 2696 | if (arg) |
2628 | goto out; | 2697 | goto out; |
2629 | r = PAGE_SIZE; /* struct kvm_run */ | 2698 | r = PAGE_SIZE; /* struct kvm_run */ |
@@ -2668,7 +2737,7 @@ static void hardware_enable_nolock(void *junk) | |||
2668 | 2737 | ||
2669 | cpumask_set_cpu(cpu, cpus_hardware_enabled); | 2738 | cpumask_set_cpu(cpu, cpus_hardware_enabled); |
2670 | 2739 | ||
2671 | r = kvm_arch_hardware_enable(NULL); | 2740 | r = kvm_arch_hardware_enable(); |
2672 | 2741 | ||
2673 | if (r) { | 2742 | if (r) { |
2674 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | 2743 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); |
@@ -2693,7 +2762,7 @@ static void hardware_disable_nolock(void *junk) | |||
2693 | if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) | 2762 | if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
2694 | return; | 2763 | return; |
2695 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | 2764 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); |
2696 | kvm_arch_hardware_disable(NULL); | 2765 | kvm_arch_hardware_disable(); |
2697 | } | 2766 | } |
2698 | 2767 | ||
2699 | static void hardware_disable(void) | 2768 | static void hardware_disable(void) |
@@ -3123,6 +3192,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) | |||
3123 | if (vcpu->preempted) | 3192 | if (vcpu->preempted) |
3124 | vcpu->preempted = false; | 3193 | vcpu->preempted = false; |
3125 | 3194 | ||
3195 | kvm_arch_sched_in(vcpu, cpu); | ||
3196 | |||
3126 | kvm_arch_vcpu_load(vcpu, cpu); | 3197 | kvm_arch_vcpu_load(vcpu, cpu); |
3127 | } | 3198 | } |
3128 | 3199 | ||
@@ -3214,6 +3285,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
3214 | goto out_undebugfs; | 3285 | goto out_undebugfs; |
3215 | } | 3286 | } |
3216 | 3287 | ||
3288 | r = kvm_vfio_ops_init(); | ||
3289 | WARN_ON(r); | ||
3290 | |||
3217 | return 0; | 3291 | return 0; |
3218 | 3292 | ||
3219 | out_undebugfs: | 3293 | out_undebugfs: |
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index ba1a93f935c7..281e7cf2b8e5 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/uaccess.h> | 19 | #include <linux/uaccess.h> |
20 | #include <linux/vfio.h> | 20 | #include <linux/vfio.h> |
21 | #include "vfio.h" | ||
21 | 22 | ||
22 | struct kvm_vfio_group { | 23 | struct kvm_vfio_group { |
23 | struct list_head node; | 24 | struct list_head node; |
@@ -246,6 +247,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev) | |||
246 | kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ | 247 | kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ |
247 | } | 248 | } |
248 | 249 | ||
250 | static int kvm_vfio_create(struct kvm_device *dev, u32 type); | ||
251 | |||
252 | static struct kvm_device_ops kvm_vfio_ops = { | ||
253 | .name = "kvm-vfio", | ||
254 | .create = kvm_vfio_create, | ||
255 | .destroy = kvm_vfio_destroy, | ||
256 | .set_attr = kvm_vfio_set_attr, | ||
257 | .has_attr = kvm_vfio_has_attr, | ||
258 | }; | ||
259 | |||
249 | static int kvm_vfio_create(struct kvm_device *dev, u32 type) | 260 | static int kvm_vfio_create(struct kvm_device *dev, u32 type) |
250 | { | 261 | { |
251 | struct kvm_device *tmp; | 262 | struct kvm_device *tmp; |
@@ -268,10 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) | |||
268 | return 0; | 279 | return 0; |
269 | } | 280 | } |
270 | 281 | ||
271 | struct kvm_device_ops kvm_vfio_ops = { | 282 | int kvm_vfio_ops_init(void) |
272 | .name = "kvm-vfio", | 283 | { |
273 | .create = kvm_vfio_create, | 284 | return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); |
274 | .destroy = kvm_vfio_destroy, | 285 | } |
275 | .set_attr = kvm_vfio_set_attr, | ||
276 | .has_attr = kvm_vfio_has_attr, | ||
277 | }; | ||
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h new file mode 100644 index 000000000000..92eac75d6b62 --- /dev/null +++ b/virt/kvm/vfio.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef __KVM_VFIO_H | ||
2 | #define __KVM_VFIO_H | ||
3 | |||
4 | #ifdef CONFIG_KVM_VFIO | ||
5 | int kvm_vfio_ops_init(void); | ||
6 | #else | ||
7 | static inline int kvm_vfio_ops_init(void) | ||
8 | { | ||
9 | return 0; | ||
10 | } | ||
11 | #endif | ||
12 | |||
13 | #endif | ||